{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.102509883239864, "eval_steps": 5000, "global_step": 55500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999540314424934e-05, "loss": 0.9745, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.999080628849867e-05, "loss": 0.9978, "step": 20 }, { "epoch": 0.0, "learning_rate": 4.9986209432748005e-05, "loss": 0.92, "step": 30 }, { "epoch": 0.0, "learning_rate": 4.998161257699734e-05, "loss": 0.9636, "step": 40 }, { "epoch": 0.0, "learning_rate": 4.9977015721246664e-05, "loss": 0.9684, "step": 50 }, { "epoch": 0.01, "learning_rate": 4.9972418865496e-05, "loss": 0.9837, "step": 60 }, { "epoch": 0.01, "learning_rate": 4.996782200974534e-05, "loss": 0.9911, "step": 70 }, { "epoch": 0.01, "learning_rate": 4.996322515399467e-05, "loss": 0.9517, "step": 80 }, { "epoch": 0.01, "learning_rate": 4.9958628298244004e-05, "loss": 0.9236, "step": 90 }, { "epoch": 0.01, "learning_rate": 4.995403144249334e-05, "loss": 0.98, "step": 100 }, { "epoch": 0.01, "learning_rate": 4.994943458674267e-05, "loss": 0.937, "step": 110 }, { "epoch": 0.01, "learning_rate": 4.9944837730992006e-05, "loss": 0.8949, "step": 120 }, { "epoch": 0.01, "learning_rate": 4.994024087524134e-05, "loss": 0.9701, "step": 130 }, { "epoch": 0.01, "learning_rate": 4.9935644019490666e-05, "loss": 0.9134, "step": 140 }, { "epoch": 0.01, "learning_rate": 4.993104716374e-05, "loss": 1.0187, "step": 150 }, { "epoch": 0.01, "learning_rate": 4.992645030798934e-05, "loss": 0.941, "step": 160 }, { "epoch": 0.02, "learning_rate": 4.992185345223867e-05, "loss": 0.9512, "step": 170 }, { "epoch": 0.02, "learning_rate": 4.9917256596488005e-05, "loss": 0.9193, "step": 180 }, { "epoch": 0.02, "learning_rate": 4.991265974073734e-05, "loss": 0.8975, "step": 190 }, { "epoch": 0.02, "learning_rate": 4.990806288498667e-05, "loss": 1.0146, "step": 200 }, { "epoch": 0.02, "learning_rate": 4.990346602923601e-05, "loss": 0.8947, "step": 210 }, { "epoch": 0.02, "learning_rate": 4.9898869173485344e-05, "loss": 0.9947, "step": 220 }, { "epoch": 0.02, "learning_rate": 4.989427231773467e-05, "loss": 0.9054, "step": 230 }, { "epoch": 0.02, "learning_rate": 4.9889675461984e-05, "loss": 0.8698, "step": 240 }, { "epoch": 0.02, "learning_rate": 4.988507860623334e-05, "loss": 0.947, "step": 250 }, { "epoch": 0.02, "learning_rate": 4.988048175048267e-05, "loss": 0.9899, "step": 260 }, { "epoch": 0.02, "learning_rate": 4.9875884894732006e-05, "loss": 1.0354, "step": 270 }, { "epoch": 0.03, "learning_rate": 4.987128803898134e-05, "loss": 1.0453, "step": 280 }, { "epoch": 0.03, "learning_rate": 4.986669118323067e-05, "loss": 0.9188, "step": 290 }, { "epoch": 0.03, "learning_rate": 4.986209432748001e-05, "loss": 0.93, "step": 300 }, { "epoch": 0.03, "learning_rate": 4.9857497471729345e-05, "loss": 0.9614, "step": 310 }, { "epoch": 0.03, "learning_rate": 4.985290061597867e-05, "loss": 0.9783, "step": 320 }, { "epoch": 0.03, "learning_rate": 4.9848303760228005e-05, "loss": 0.9796, "step": 330 }, { "epoch": 0.03, "learning_rate": 4.984370690447734e-05, "loss": 1.0028, "step": 340 }, { "epoch": 0.03, "learning_rate": 4.983911004872667e-05, "loss": 0.8967, "step": 350 }, { "epoch": 0.03, "learning_rate": 4.983451319297601e-05, "loss": 0.9753, "step": 360 }, { "epoch": 0.03, "learning_rate": 4.9829916337225344e-05, "loss": 1.0108, "step": 370 }, { "epoch": 0.03, "learning_rate": 4.982531948147467e-05, "loss": 0.9963, "step": 380 }, { "epoch": 0.04, "learning_rate": 4.982072262572401e-05, "loss": 0.9727, "step": 390 }, { "epoch": 0.04, "learning_rate": 4.9816125769973346e-05, "loss": 0.943, "step": 400 }, { "epoch": 0.04, "learning_rate": 4.981152891422267e-05, "loss": 1.006, "step": 410 }, { "epoch": 0.04, "learning_rate": 4.9806932058472006e-05, "loss": 0.9082, "step": 420 }, { "epoch": 0.04, "learning_rate": 4.980233520272134e-05, "loss": 1.0516, "step": 430 }, { "epoch": 0.04, "learning_rate": 4.979773834697067e-05, "loss": 0.9686, "step": 440 }, { "epoch": 0.04, "learning_rate": 4.979314149122001e-05, "loss": 0.9554, "step": 450 }, { "epoch": 0.04, "learning_rate": 4.9788544635469345e-05, "loss": 0.9972, "step": 460 }, { "epoch": 0.04, "learning_rate": 4.9783947779718675e-05, "loss": 1.0398, "step": 470 }, { "epoch": 0.04, "learning_rate": 4.977935092396801e-05, "loss": 1.0127, "step": 480 }, { "epoch": 0.05, "learning_rate": 4.977475406821734e-05, "loss": 0.8921, "step": 490 }, { "epoch": 0.05, "learning_rate": 4.977015721246667e-05, "loss": 1.0424, "step": 500 }, { "epoch": 0.05, "learning_rate": 4.976556035671601e-05, "loss": 1.0228, "step": 510 }, { "epoch": 0.05, "learning_rate": 4.9760963500965343e-05, "loss": 0.978, "step": 520 }, { "epoch": 0.05, "learning_rate": 4.975636664521467e-05, "loss": 0.9348, "step": 530 }, { "epoch": 0.05, "learning_rate": 4.975176978946401e-05, "loss": 0.9722, "step": 540 }, { "epoch": 0.05, "learning_rate": 4.9747172933713346e-05, "loss": 0.8894, "step": 550 }, { "epoch": 0.05, "learning_rate": 4.9742576077962676e-05, "loss": 0.9188, "step": 560 }, { "epoch": 0.05, "learning_rate": 4.973797922221201e-05, "loss": 1.0509, "step": 570 }, { "epoch": 0.05, "learning_rate": 4.973338236646134e-05, "loss": 0.9719, "step": 580 }, { "epoch": 0.05, "learning_rate": 4.972878551071067e-05, "loss": 0.9682, "step": 590 }, { "epoch": 0.06, "learning_rate": 4.972418865496001e-05, "loss": 0.8215, "step": 600 }, { "epoch": 0.06, "learning_rate": 4.9719591799209345e-05, "loss": 0.9522, "step": 610 }, { "epoch": 0.06, "learning_rate": 4.9714994943458674e-05, "loss": 0.9875, "step": 620 }, { "epoch": 0.06, "learning_rate": 4.971039808770801e-05, "loss": 0.8945, "step": 630 }, { "epoch": 0.06, "learning_rate": 4.970580123195735e-05, "loss": 0.9984, "step": 640 }, { "epoch": 0.06, "learning_rate": 4.970120437620668e-05, "loss": 0.9307, "step": 650 }, { "epoch": 0.06, "learning_rate": 4.9696607520456013e-05, "loss": 1.0127, "step": 660 }, { "epoch": 0.06, "learning_rate": 4.969201066470534e-05, "loss": 0.8825, "step": 670 }, { "epoch": 0.06, "learning_rate": 4.968741380895467e-05, "loss": 0.9459, "step": 680 }, { "epoch": 0.06, "learning_rate": 4.968281695320401e-05, "loss": 1.0124, "step": 690 }, { "epoch": 0.06, "learning_rate": 4.9678220097453346e-05, "loss": 0.9543, "step": 700 }, { "epoch": 0.07, "learning_rate": 4.9673623241702675e-05, "loss": 0.9098, "step": 710 }, { "epoch": 0.07, "learning_rate": 4.966902638595201e-05, "loss": 0.8344, "step": 720 }, { "epoch": 0.07, "learning_rate": 4.966442953020135e-05, "loss": 0.8529, "step": 730 }, { "epoch": 0.07, "learning_rate": 4.965983267445068e-05, "loss": 0.9051, "step": 740 }, { "epoch": 0.07, "learning_rate": 4.9655235818700015e-05, "loss": 0.9432, "step": 750 }, { "epoch": 0.07, "learning_rate": 4.9650638962949344e-05, "loss": 1.0073, "step": 760 }, { "epoch": 0.07, "learning_rate": 4.9646042107198674e-05, "loss": 1.0228, "step": 770 }, { "epoch": 0.07, "learning_rate": 4.964144525144801e-05, "loss": 0.8818, "step": 780 }, { "epoch": 0.07, "learning_rate": 4.963684839569735e-05, "loss": 1.0007, "step": 790 }, { "epoch": 0.07, "learning_rate": 4.963225153994668e-05, "loss": 0.926, "step": 800 }, { "epoch": 0.07, "learning_rate": 4.962765468419601e-05, "loss": 0.9359, "step": 810 }, { "epoch": 0.08, "learning_rate": 4.962305782844534e-05, "loss": 0.9373, "step": 820 }, { "epoch": 0.08, "learning_rate": 4.961846097269468e-05, "loss": 1.0229, "step": 830 }, { "epoch": 0.08, "learning_rate": 4.9613864116944016e-05, "loss": 0.9919, "step": 840 }, { "epoch": 0.08, "learning_rate": 4.9609267261193346e-05, "loss": 0.9905, "step": 850 }, { "epoch": 0.08, "learning_rate": 4.9604670405442675e-05, "loss": 0.9602, "step": 860 }, { "epoch": 0.08, "learning_rate": 4.960007354969201e-05, "loss": 1.026, "step": 870 }, { "epoch": 0.08, "learning_rate": 4.959547669394135e-05, "loss": 0.9833, "step": 880 }, { "epoch": 0.08, "learning_rate": 4.959087983819068e-05, "loss": 0.8711, "step": 890 }, { "epoch": 0.08, "learning_rate": 4.9586282982440014e-05, "loss": 0.9478, "step": 900 }, { "epoch": 0.08, "learning_rate": 4.9581686126689344e-05, "loss": 1.0195, "step": 910 }, { "epoch": 0.08, "learning_rate": 4.957708927093868e-05, "loss": 0.8746, "step": 920 }, { "epoch": 0.09, "learning_rate": 4.957249241518802e-05, "loss": 0.8922, "step": 930 }, { "epoch": 0.09, "learning_rate": 4.956789555943735e-05, "loss": 1.0195, "step": 940 }, { "epoch": 0.09, "learning_rate": 4.9563298703686676e-05, "loss": 0.9601, "step": 950 }, { "epoch": 0.09, "learning_rate": 4.955870184793601e-05, "loss": 0.8524, "step": 960 }, { "epoch": 0.09, "learning_rate": 4.955410499218535e-05, "loss": 0.9508, "step": 970 }, { "epoch": 0.09, "learning_rate": 4.954950813643468e-05, "loss": 1.0284, "step": 980 }, { "epoch": 0.09, "learning_rate": 4.9544911280684016e-05, "loss": 0.9629, "step": 990 }, { "epoch": 0.09, "learning_rate": 4.9540314424933345e-05, "loss": 0.9604, "step": 1000 }, { "epoch": 0.09, "learning_rate": 4.953571756918268e-05, "loss": 0.9483, "step": 1010 }, { "epoch": 0.09, "learning_rate": 4.953112071343202e-05, "loss": 0.9298, "step": 1020 }, { "epoch": 0.09, "learning_rate": 4.952652385768135e-05, "loss": 0.9247, "step": 1030 }, { "epoch": 0.1, "learning_rate": 4.952192700193068e-05, "loss": 0.9759, "step": 1040 }, { "epoch": 0.1, "learning_rate": 4.9517330146180014e-05, "loss": 0.9612, "step": 1050 }, { "epoch": 0.1, "learning_rate": 4.951273329042935e-05, "loss": 1.0357, "step": 1060 }, { "epoch": 0.1, "learning_rate": 4.950813643467868e-05, "loss": 0.9234, "step": 1070 }, { "epoch": 0.1, "learning_rate": 4.950353957892802e-05, "loss": 1.004, "step": 1080 }, { "epoch": 0.1, "learning_rate": 4.9498942723177346e-05, "loss": 1.038, "step": 1090 }, { "epoch": 0.1, "learning_rate": 4.949434586742668e-05, "loss": 0.9078, "step": 1100 }, { "epoch": 0.1, "learning_rate": 4.948974901167602e-05, "loss": 0.8739, "step": 1110 }, { "epoch": 0.1, "learning_rate": 4.948515215592535e-05, "loss": 1.0222, "step": 1120 }, { "epoch": 0.1, "learning_rate": 4.948055530017468e-05, "loss": 0.9808, "step": 1130 }, { "epoch": 0.1, "learning_rate": 4.9475958444424015e-05, "loss": 0.9663, "step": 1140 }, { "epoch": 0.11, "learning_rate": 4.9471361588673345e-05, "loss": 1.0217, "step": 1150 }, { "epoch": 0.11, "learning_rate": 4.946676473292268e-05, "loss": 1.0532, "step": 1160 }, { "epoch": 0.11, "learning_rate": 4.946216787717202e-05, "loss": 0.9233, "step": 1170 }, { "epoch": 0.11, "learning_rate": 4.945757102142135e-05, "loss": 0.8988, "step": 1180 }, { "epoch": 0.11, "learning_rate": 4.9452974165670684e-05, "loss": 0.9789, "step": 1190 }, { "epoch": 0.11, "learning_rate": 4.944837730992002e-05, "loss": 0.948, "step": 1200 }, { "epoch": 0.11, "learning_rate": 4.944378045416935e-05, "loss": 1.022, "step": 1210 }, { "epoch": 0.11, "learning_rate": 4.943918359841868e-05, "loss": 1.0613, "step": 1220 }, { "epoch": 0.11, "learning_rate": 4.9434586742668016e-05, "loss": 0.8609, "step": 1230 }, { "epoch": 0.11, "learning_rate": 4.9429989886917346e-05, "loss": 0.9011, "step": 1240 }, { "epoch": 0.11, "learning_rate": 4.942539303116668e-05, "loss": 0.9044, "step": 1250 }, { "epoch": 0.12, "learning_rate": 4.942079617541602e-05, "loss": 0.8883, "step": 1260 }, { "epoch": 0.12, "learning_rate": 4.941619931966535e-05, "loss": 0.9078, "step": 1270 }, { "epoch": 0.12, "learning_rate": 4.9411602463914685e-05, "loss": 1.0061, "step": 1280 }, { "epoch": 0.12, "learning_rate": 4.940700560816402e-05, "loss": 0.9432, "step": 1290 }, { "epoch": 0.12, "learning_rate": 4.940240875241335e-05, "loss": 0.9144, "step": 1300 }, { "epoch": 0.12, "learning_rate": 4.939781189666268e-05, "loss": 0.8251, "step": 1310 }, { "epoch": 0.12, "learning_rate": 4.939321504091202e-05, "loss": 0.9741, "step": 1320 }, { "epoch": 0.12, "learning_rate": 4.938861818516135e-05, "loss": 0.8966, "step": 1330 }, { "epoch": 0.12, "learning_rate": 4.9384021329410684e-05, "loss": 0.8834, "step": 1340 }, { "epoch": 0.12, "learning_rate": 4.937942447366002e-05, "loss": 0.9911, "step": 1350 }, { "epoch": 0.13, "learning_rate": 4.937482761790935e-05, "loss": 0.9333, "step": 1360 }, { "epoch": 0.13, "learning_rate": 4.9370230762158687e-05, "loss": 1.024, "step": 1370 }, { "epoch": 0.13, "learning_rate": 4.936563390640802e-05, "loss": 0.912, "step": 1380 }, { "epoch": 0.13, "learning_rate": 4.936103705065735e-05, "loss": 1.1053, "step": 1390 }, { "epoch": 0.13, "learning_rate": 4.935644019490668e-05, "loss": 0.8265, "step": 1400 }, { "epoch": 0.13, "learning_rate": 4.935184333915602e-05, "loss": 1.0776, "step": 1410 }, { "epoch": 0.13, "learning_rate": 4.934724648340535e-05, "loss": 1.0148, "step": 1420 }, { "epoch": 0.13, "learning_rate": 4.9342649627654685e-05, "loss": 0.9746, "step": 1430 }, { "epoch": 0.13, "learning_rate": 4.933805277190402e-05, "loss": 0.8809, "step": 1440 }, { "epoch": 0.13, "learning_rate": 4.933345591615335e-05, "loss": 0.9155, "step": 1450 }, { "epoch": 0.13, "learning_rate": 4.932885906040269e-05, "loss": 0.9706, "step": 1460 }, { "epoch": 0.14, "learning_rate": 4.9324262204652024e-05, "loss": 0.9858, "step": 1470 }, { "epoch": 0.14, "learning_rate": 4.9319665348901354e-05, "loss": 0.9738, "step": 1480 }, { "epoch": 0.14, "learning_rate": 4.9315068493150684e-05, "loss": 1.0844, "step": 1490 }, { "epoch": 0.14, "learning_rate": 4.931047163740002e-05, "loss": 0.9689, "step": 1500 }, { "epoch": 0.14, "learning_rate": 4.930587478164935e-05, "loss": 0.9703, "step": 1510 }, { "epoch": 0.14, "learning_rate": 4.9301277925898686e-05, "loss": 0.9335, "step": 1520 }, { "epoch": 0.14, "learning_rate": 4.929668107014802e-05, "loss": 1.0215, "step": 1530 }, { "epoch": 0.14, "learning_rate": 4.929208421439735e-05, "loss": 0.9247, "step": 1540 }, { "epoch": 0.14, "learning_rate": 4.928748735864669e-05, "loss": 0.9884, "step": 1550 }, { "epoch": 0.14, "learning_rate": 4.9282890502896025e-05, "loss": 0.9417, "step": 1560 }, { "epoch": 0.14, "learning_rate": 4.9278293647145355e-05, "loss": 1.029, "step": 1570 }, { "epoch": 0.15, "learning_rate": 4.9273696791394685e-05, "loss": 1.0022, "step": 1580 }, { "epoch": 0.15, "learning_rate": 4.926909993564402e-05, "loss": 1.0037, "step": 1590 }, { "epoch": 0.15, "learning_rate": 4.926450307989335e-05, "loss": 0.9434, "step": 1600 }, { "epoch": 0.15, "learning_rate": 4.925990622414269e-05, "loss": 1.0303, "step": 1610 }, { "epoch": 0.15, "learning_rate": 4.9255309368392024e-05, "loss": 1.0032, "step": 1620 }, { "epoch": 0.15, "learning_rate": 4.9250712512641354e-05, "loss": 1.0157, "step": 1630 }, { "epoch": 0.15, "learning_rate": 4.924611565689069e-05, "loss": 0.966, "step": 1640 }, { "epoch": 0.15, "learning_rate": 4.9241518801140027e-05, "loss": 0.928, "step": 1650 }, { "epoch": 0.15, "learning_rate": 4.9236921945389356e-05, "loss": 0.9483, "step": 1660 }, { "epoch": 0.15, "learning_rate": 4.9232325089638686e-05, "loss": 0.885, "step": 1670 }, { "epoch": 0.15, "learning_rate": 4.922772823388802e-05, "loss": 0.9944, "step": 1680 }, { "epoch": 0.16, "learning_rate": 4.922313137813735e-05, "loss": 0.9512, "step": 1690 }, { "epoch": 0.16, "learning_rate": 4.921853452238669e-05, "loss": 0.9031, "step": 1700 }, { "epoch": 0.16, "learning_rate": 4.9213937666636025e-05, "loss": 0.9833, "step": 1710 }, { "epoch": 0.16, "learning_rate": 4.9209340810885355e-05, "loss": 0.8866, "step": 1720 }, { "epoch": 0.16, "learning_rate": 4.920474395513469e-05, "loss": 1.0016, "step": 1730 }, { "epoch": 0.16, "learning_rate": 4.920014709938403e-05, "loss": 0.9396, "step": 1740 }, { "epoch": 0.16, "learning_rate": 4.919555024363336e-05, "loss": 0.8614, "step": 1750 }, { "epoch": 0.16, "learning_rate": 4.9190953387882694e-05, "loss": 0.9154, "step": 1760 }, { "epoch": 0.16, "learning_rate": 4.9186356532132024e-05, "loss": 0.9616, "step": 1770 }, { "epoch": 0.16, "learning_rate": 4.918175967638135e-05, "loss": 0.9843, "step": 1780 }, { "epoch": 0.16, "learning_rate": 4.917716282063069e-05, "loss": 0.9456, "step": 1790 }, { "epoch": 0.17, "learning_rate": 4.9172565964880026e-05, "loss": 0.9458, "step": 1800 }, { "epoch": 0.17, "learning_rate": 4.9167969109129356e-05, "loss": 0.9737, "step": 1810 }, { "epoch": 0.17, "learning_rate": 4.916337225337869e-05, "loss": 0.9982, "step": 1820 }, { "epoch": 0.17, "learning_rate": 4.915877539762803e-05, "loss": 0.881, "step": 1830 }, { "epoch": 0.17, "learning_rate": 4.915417854187736e-05, "loss": 1.0422, "step": 1840 }, { "epoch": 0.17, "learning_rate": 4.9149581686126695e-05, "loss": 0.9156, "step": 1850 }, { "epoch": 0.17, "learning_rate": 4.9144984830376025e-05, "loss": 0.9906, "step": 1860 }, { "epoch": 0.17, "learning_rate": 4.9140387974625355e-05, "loss": 0.8811, "step": 1870 }, { "epoch": 0.17, "learning_rate": 4.913579111887469e-05, "loss": 0.9466, "step": 1880 }, { "epoch": 0.17, "learning_rate": 4.913119426312403e-05, "loss": 0.8843, "step": 1890 }, { "epoch": 0.17, "learning_rate": 4.912659740737336e-05, "loss": 1.043, "step": 1900 }, { "epoch": 0.18, "learning_rate": 4.9122000551622694e-05, "loss": 0.8872, "step": 1910 }, { "epoch": 0.18, "learning_rate": 4.911740369587203e-05, "loss": 0.9973, "step": 1920 }, { "epoch": 0.18, "learning_rate": 4.911280684012136e-05, "loss": 0.9862, "step": 1930 }, { "epoch": 0.18, "learning_rate": 4.9108209984370696e-05, "loss": 0.9889, "step": 1940 }, { "epoch": 0.18, "learning_rate": 4.9103613128620026e-05, "loss": 0.9135, "step": 1950 }, { "epoch": 0.18, "learning_rate": 4.9099016272869356e-05, "loss": 0.9208, "step": 1960 }, { "epoch": 0.18, "learning_rate": 4.909441941711869e-05, "loss": 0.9219, "step": 1970 }, { "epoch": 0.18, "learning_rate": 4.908982256136803e-05, "loss": 0.887, "step": 1980 }, { "epoch": 0.18, "learning_rate": 4.908522570561736e-05, "loss": 0.9606, "step": 1990 }, { "epoch": 0.18, "learning_rate": 4.9080628849866695e-05, "loss": 0.9298, "step": 2000 }, { "epoch": 0.18, "learning_rate": 4.907603199411603e-05, "loss": 0.8867, "step": 2010 }, { "epoch": 0.19, "learning_rate": 4.907143513836536e-05, "loss": 0.8856, "step": 2020 }, { "epoch": 0.19, "learning_rate": 4.90668382826147e-05, "loss": 0.8442, "step": 2030 }, { "epoch": 0.19, "learning_rate": 4.906224142686403e-05, "loss": 0.9634, "step": 2040 }, { "epoch": 0.19, "learning_rate": 4.905764457111336e-05, "loss": 0.965, "step": 2050 }, { "epoch": 0.19, "learning_rate": 4.9053047715362693e-05, "loss": 0.8834, "step": 2060 }, { "epoch": 0.19, "learning_rate": 4.904845085961203e-05, "loss": 1.0231, "step": 2070 }, { "epoch": 0.19, "learning_rate": 4.904385400386136e-05, "loss": 0.9807, "step": 2080 }, { "epoch": 0.19, "learning_rate": 4.9039257148110696e-05, "loss": 0.8146, "step": 2090 }, { "epoch": 0.19, "learning_rate": 4.903466029236003e-05, "loss": 0.9296, "step": 2100 }, { "epoch": 0.19, "learning_rate": 4.903006343660936e-05, "loss": 0.831, "step": 2110 }, { "epoch": 0.19, "learning_rate": 4.90254665808587e-05, "loss": 0.8892, "step": 2120 }, { "epoch": 0.2, "learning_rate": 4.902086972510803e-05, "loss": 0.9259, "step": 2130 }, { "epoch": 0.2, "learning_rate": 4.901627286935736e-05, "loss": 1.1263, "step": 2140 }, { "epoch": 0.2, "learning_rate": 4.9011676013606695e-05, "loss": 0.872, "step": 2150 }, { "epoch": 0.2, "learning_rate": 4.900707915785603e-05, "loss": 0.9134, "step": 2160 }, { "epoch": 0.2, "learning_rate": 4.900248230210536e-05, "loss": 0.9481, "step": 2170 }, { "epoch": 0.2, "learning_rate": 4.89978854463547e-05, "loss": 0.8769, "step": 2180 }, { "epoch": 0.2, "learning_rate": 4.8993288590604034e-05, "loss": 0.9643, "step": 2190 }, { "epoch": 0.2, "learning_rate": 4.8988691734853363e-05, "loss": 1.0846, "step": 2200 }, { "epoch": 0.2, "learning_rate": 4.89840948791027e-05, "loss": 0.9085, "step": 2210 }, { "epoch": 0.2, "learning_rate": 4.897949802335203e-05, "loss": 0.8347, "step": 2220 }, { "epoch": 0.21, "learning_rate": 4.897490116760136e-05, "loss": 0.9126, "step": 2230 }, { "epoch": 0.21, "learning_rate": 4.8970304311850696e-05, "loss": 0.7547, "step": 2240 }, { "epoch": 0.21, "learning_rate": 4.896570745610003e-05, "loss": 0.9314, "step": 2250 }, { "epoch": 0.21, "learning_rate": 4.896111060034936e-05, "loss": 0.9754, "step": 2260 }, { "epoch": 0.21, "learning_rate": 4.89565137445987e-05, "loss": 0.8824, "step": 2270 }, { "epoch": 0.21, "learning_rate": 4.8951916888848035e-05, "loss": 1.0279, "step": 2280 }, { "epoch": 0.21, "learning_rate": 4.8947320033097365e-05, "loss": 1.0157, "step": 2290 }, { "epoch": 0.21, "learning_rate": 4.89427231773467e-05, "loss": 0.9303, "step": 2300 }, { "epoch": 0.21, "learning_rate": 4.893812632159603e-05, "loss": 0.9381, "step": 2310 }, { "epoch": 0.21, "learning_rate": 4.893352946584536e-05, "loss": 0.9546, "step": 2320 }, { "epoch": 0.21, "learning_rate": 4.89289326100947e-05, "loss": 0.9939, "step": 2330 }, { "epoch": 0.22, "learning_rate": 4.8924335754344033e-05, "loss": 0.9354, "step": 2340 }, { "epoch": 0.22, "learning_rate": 4.891973889859336e-05, "loss": 1.0036, "step": 2350 }, { "epoch": 0.22, "learning_rate": 4.89151420428427e-05, "loss": 1.0223, "step": 2360 }, { "epoch": 0.22, "learning_rate": 4.8910545187092036e-05, "loss": 0.8981, "step": 2370 }, { "epoch": 0.22, "learning_rate": 4.8905948331341366e-05, "loss": 0.925, "step": 2380 }, { "epoch": 0.22, "learning_rate": 4.89013514755907e-05, "loss": 0.9115, "step": 2390 }, { "epoch": 0.22, "learning_rate": 4.889675461984003e-05, "loss": 1.0528, "step": 2400 }, { "epoch": 0.22, "learning_rate": 4.889215776408936e-05, "loss": 0.9579, "step": 2410 }, { "epoch": 0.22, "learning_rate": 4.88875609083387e-05, "loss": 0.9576, "step": 2420 }, { "epoch": 0.22, "learning_rate": 4.8882964052588035e-05, "loss": 0.8322, "step": 2430 }, { "epoch": 0.22, "learning_rate": 4.8878367196837364e-05, "loss": 0.9541, "step": 2440 }, { "epoch": 0.23, "learning_rate": 4.88737703410867e-05, "loss": 0.8976, "step": 2450 }, { "epoch": 0.23, "learning_rate": 4.886917348533603e-05, "loss": 0.9819, "step": 2460 }, { "epoch": 0.23, "learning_rate": 4.886457662958537e-05, "loss": 1.0292, "step": 2470 }, { "epoch": 0.23, "learning_rate": 4.8859979773834704e-05, "loss": 0.9267, "step": 2480 }, { "epoch": 0.23, "learning_rate": 4.885538291808403e-05, "loss": 1.0393, "step": 2490 }, { "epoch": 0.23, "learning_rate": 4.885078606233336e-05, "loss": 0.9499, "step": 2500 }, { "epoch": 0.23, "learning_rate": 4.88461892065827e-05, "loss": 0.9265, "step": 2510 }, { "epoch": 0.23, "learning_rate": 4.8841592350832036e-05, "loss": 0.8737, "step": 2520 }, { "epoch": 0.23, "learning_rate": 4.8836995495081366e-05, "loss": 0.8004, "step": 2530 }, { "epoch": 0.23, "learning_rate": 4.88323986393307e-05, "loss": 0.932, "step": 2540 }, { "epoch": 0.23, "learning_rate": 4.882780178358003e-05, "loss": 0.9967, "step": 2550 }, { "epoch": 0.24, "learning_rate": 4.882320492782937e-05, "loss": 0.9678, "step": 2560 }, { "epoch": 0.24, "learning_rate": 4.8818608072078705e-05, "loss": 1.0356, "step": 2570 }, { "epoch": 0.24, "learning_rate": 4.8814011216328034e-05, "loss": 0.9875, "step": 2580 }, { "epoch": 0.24, "learning_rate": 4.8809414360577364e-05, "loss": 1.0758, "step": 2590 }, { "epoch": 0.24, "learning_rate": 4.88048175048267e-05, "loss": 0.9232, "step": 2600 }, { "epoch": 0.24, "learning_rate": 4.880022064907604e-05, "loss": 0.9855, "step": 2610 }, { "epoch": 0.24, "learning_rate": 4.879562379332537e-05, "loss": 0.9618, "step": 2620 }, { "epoch": 0.24, "learning_rate": 4.87910269375747e-05, "loss": 0.8927, "step": 2630 }, { "epoch": 0.24, "learning_rate": 4.878643008182403e-05, "loss": 0.9335, "step": 2640 }, { "epoch": 0.24, "learning_rate": 4.878183322607337e-05, "loss": 1.0304, "step": 2650 }, { "epoch": 0.24, "learning_rate": 4.8777236370322706e-05, "loss": 0.8923, "step": 2660 }, { "epoch": 0.25, "learning_rate": 4.8772639514572036e-05, "loss": 1.0377, "step": 2670 }, { "epoch": 0.25, "learning_rate": 4.8768042658821365e-05, "loss": 0.9609, "step": 2680 }, { "epoch": 0.25, "learning_rate": 4.87634458030707e-05, "loss": 0.8888, "step": 2690 }, { "epoch": 0.25, "learning_rate": 4.875884894732004e-05, "loss": 0.9756, "step": 2700 }, { "epoch": 0.25, "learning_rate": 4.875425209156937e-05, "loss": 0.9398, "step": 2710 }, { "epoch": 0.25, "learning_rate": 4.8749655235818704e-05, "loss": 0.8033, "step": 2720 }, { "epoch": 0.25, "learning_rate": 4.8745058380068034e-05, "loss": 0.8771, "step": 2730 }, { "epoch": 0.25, "learning_rate": 4.874046152431737e-05, "loss": 0.8756, "step": 2740 }, { "epoch": 0.25, "learning_rate": 4.873586466856671e-05, "loss": 1.051, "step": 2750 }, { "epoch": 0.25, "learning_rate": 4.873126781281604e-05, "loss": 0.881, "step": 2760 }, { "epoch": 0.25, "learning_rate": 4.8726670957065366e-05, "loss": 0.949, "step": 2770 }, { "epoch": 0.26, "learning_rate": 4.87220741013147e-05, "loss": 1.0034, "step": 2780 }, { "epoch": 0.26, "learning_rate": 4.871747724556403e-05, "loss": 0.9227, "step": 2790 }, { "epoch": 0.26, "learning_rate": 4.871288038981337e-05, "loss": 0.9569, "step": 2800 }, { "epoch": 0.26, "learning_rate": 4.8708283534062706e-05, "loss": 1.0622, "step": 2810 }, { "epoch": 0.26, "learning_rate": 4.8703686678312035e-05, "loss": 0.8858, "step": 2820 }, { "epoch": 0.26, "learning_rate": 4.869908982256137e-05, "loss": 1.0565, "step": 2830 }, { "epoch": 0.26, "learning_rate": 4.869449296681071e-05, "loss": 0.9979, "step": 2840 }, { "epoch": 0.26, "learning_rate": 4.868989611106004e-05, "loss": 0.9144, "step": 2850 }, { "epoch": 0.26, "learning_rate": 4.868529925530937e-05, "loss": 1.0345, "step": 2860 }, { "epoch": 0.26, "learning_rate": 4.8680702399558704e-05, "loss": 0.954, "step": 2870 }, { "epoch": 0.26, "learning_rate": 4.8676105543808034e-05, "loss": 0.9484, "step": 2880 }, { "epoch": 0.27, "learning_rate": 4.867150868805737e-05, "loss": 0.9399, "step": 2890 }, { "epoch": 0.27, "learning_rate": 4.866691183230671e-05, "loss": 1.0062, "step": 2900 }, { "epoch": 0.27, "learning_rate": 4.8662314976556037e-05, "loss": 1.0299, "step": 2910 }, { "epoch": 0.27, "learning_rate": 4.865771812080537e-05, "loss": 0.9574, "step": 2920 }, { "epoch": 0.27, "learning_rate": 4.865312126505471e-05, "loss": 0.9657, "step": 2930 }, { "epoch": 0.27, "learning_rate": 4.864852440930404e-05, "loss": 0.971, "step": 2940 }, { "epoch": 0.27, "learning_rate": 4.864392755355337e-05, "loss": 0.9247, "step": 2950 }, { "epoch": 0.27, "learning_rate": 4.8639330697802705e-05, "loss": 0.8734, "step": 2960 }, { "epoch": 0.27, "learning_rate": 4.8634733842052035e-05, "loss": 1.0099, "step": 2970 }, { "epoch": 0.27, "learning_rate": 4.863013698630137e-05, "loss": 1.0051, "step": 2980 }, { "epoch": 0.27, "learning_rate": 4.862554013055071e-05, "loss": 0.8267, "step": 2990 }, { "epoch": 0.28, "learning_rate": 4.862094327480004e-05, "loss": 0.8826, "step": 3000 }, { "epoch": 0.28, "learning_rate": 4.8616346419049374e-05, "loss": 0.8881, "step": 3010 }, { "epoch": 0.28, "learning_rate": 4.861174956329871e-05, "loss": 1.1478, "step": 3020 }, { "epoch": 0.28, "learning_rate": 4.860715270754804e-05, "loss": 0.844, "step": 3030 }, { "epoch": 0.28, "learning_rate": 4.860255585179737e-05, "loss": 0.8689, "step": 3040 }, { "epoch": 0.28, "learning_rate": 4.8597958996046707e-05, "loss": 0.8529, "step": 3050 }, { "epoch": 0.28, "learning_rate": 4.8593362140296036e-05, "loss": 0.8957, "step": 3060 }, { "epoch": 0.28, "learning_rate": 4.858876528454537e-05, "loss": 0.897, "step": 3070 }, { "epoch": 0.28, "learning_rate": 4.858416842879471e-05, "loss": 0.9625, "step": 3080 }, { "epoch": 0.28, "learning_rate": 4.857957157304404e-05, "loss": 0.9303, "step": 3090 }, { "epoch": 0.29, "learning_rate": 4.8574974717293375e-05, "loss": 0.9855, "step": 3100 }, { "epoch": 0.29, "learning_rate": 4.857037786154271e-05, "loss": 0.9919, "step": 3110 }, { "epoch": 0.29, "learning_rate": 4.8565781005792035e-05, "loss": 1.0804, "step": 3120 }, { "epoch": 0.29, "learning_rate": 4.856118415004137e-05, "loss": 0.8978, "step": 3130 }, { "epoch": 0.29, "learning_rate": 4.855658729429071e-05, "loss": 1.0202, "step": 3140 }, { "epoch": 0.29, "learning_rate": 4.855199043854004e-05, "loss": 0.941, "step": 3150 }, { "epoch": 0.29, "learning_rate": 4.8547393582789374e-05, "loss": 0.9408, "step": 3160 }, { "epoch": 0.29, "learning_rate": 4.854279672703871e-05, "loss": 0.8859, "step": 3170 }, { "epoch": 0.29, "learning_rate": 4.853819987128804e-05, "loss": 0.9506, "step": 3180 }, { "epoch": 0.29, "learning_rate": 4.8533603015537377e-05, "loss": 1.025, "step": 3190 }, { "epoch": 0.29, "learning_rate": 4.852900615978671e-05, "loss": 0.9741, "step": 3200 }, { "epoch": 0.3, "learning_rate": 4.8524409304036036e-05, "loss": 0.9514, "step": 3210 }, { "epoch": 0.3, "learning_rate": 4.851981244828537e-05, "loss": 0.9614, "step": 3220 }, { "epoch": 0.3, "learning_rate": 4.851521559253471e-05, "loss": 1.0121, "step": 3230 }, { "epoch": 0.3, "learning_rate": 4.851061873678404e-05, "loss": 0.8948, "step": 3240 }, { "epoch": 0.3, "learning_rate": 4.8506021881033375e-05, "loss": 0.985, "step": 3250 }, { "epoch": 0.3, "learning_rate": 4.850142502528271e-05, "loss": 0.9391, "step": 3260 }, { "epoch": 0.3, "learning_rate": 4.849682816953204e-05, "loss": 1.0194, "step": 3270 }, { "epoch": 0.3, "learning_rate": 4.849223131378138e-05, "loss": 0.8655, "step": 3280 }, { "epoch": 0.3, "learning_rate": 4.8487634458030714e-05, "loss": 0.9888, "step": 3290 }, { "epoch": 0.3, "learning_rate": 4.848303760228004e-05, "loss": 0.8615, "step": 3300 }, { "epoch": 0.3, "learning_rate": 4.8478440746529374e-05, "loss": 0.9933, "step": 3310 }, { "epoch": 0.31, "learning_rate": 4.847384389077871e-05, "loss": 0.9066, "step": 3320 }, { "epoch": 0.31, "learning_rate": 4.846924703502804e-05, "loss": 0.9087, "step": 3330 }, { "epoch": 0.31, "learning_rate": 4.8464650179277376e-05, "loss": 1.0999, "step": 3340 }, { "epoch": 0.31, "learning_rate": 4.846005332352671e-05, "loss": 1.0572, "step": 3350 }, { "epoch": 0.31, "learning_rate": 4.845545646777604e-05, "loss": 0.8708, "step": 3360 }, { "epoch": 0.31, "learning_rate": 4.845085961202538e-05, "loss": 0.8502, "step": 3370 }, { "epoch": 0.31, "learning_rate": 4.8446262756274715e-05, "loss": 1.1002, "step": 3380 }, { "epoch": 0.31, "learning_rate": 4.844166590052404e-05, "loss": 0.8142, "step": 3390 }, { "epoch": 0.31, "learning_rate": 4.8437069044773375e-05, "loss": 0.9461, "step": 3400 }, { "epoch": 0.31, "learning_rate": 4.843247218902271e-05, "loss": 0.9266, "step": 3410 }, { "epoch": 0.31, "learning_rate": 4.842787533327204e-05, "loss": 0.9287, "step": 3420 }, { "epoch": 0.32, "learning_rate": 4.842327847752138e-05, "loss": 0.9237, "step": 3430 }, { "epoch": 0.32, "learning_rate": 4.8418681621770714e-05, "loss": 0.9715, "step": 3440 }, { "epoch": 0.32, "learning_rate": 4.8414084766020044e-05, "loss": 0.897, "step": 3450 }, { "epoch": 0.32, "learning_rate": 4.840948791026938e-05, "loss": 0.9284, "step": 3460 }, { "epoch": 0.32, "learning_rate": 4.840489105451872e-05, "loss": 0.8379, "step": 3470 }, { "epoch": 0.32, "learning_rate": 4.840029419876804e-05, "loss": 0.8233, "step": 3480 }, { "epoch": 0.32, "learning_rate": 4.8395697343017376e-05, "loss": 0.976, "step": 3490 }, { "epoch": 0.32, "learning_rate": 4.839110048726671e-05, "loss": 0.9462, "step": 3500 }, { "epoch": 0.32, "learning_rate": 4.838650363151604e-05, "loss": 0.9967, "step": 3510 }, { "epoch": 0.32, "learning_rate": 4.838190677576538e-05, "loss": 0.9009, "step": 3520 }, { "epoch": 0.32, "learning_rate": 4.8377309920014715e-05, "loss": 0.9394, "step": 3530 }, { "epoch": 0.33, "learning_rate": 4.8372713064264045e-05, "loss": 0.9478, "step": 3540 }, { "epoch": 0.33, "learning_rate": 4.836811620851338e-05, "loss": 0.9309, "step": 3550 }, { "epoch": 0.33, "learning_rate": 4.836351935276272e-05, "loss": 0.8633, "step": 3560 }, { "epoch": 0.33, "learning_rate": 4.835892249701204e-05, "loss": 1.0007, "step": 3570 }, { "epoch": 0.33, "learning_rate": 4.835432564126138e-05, "loss": 0.9862, "step": 3580 }, { "epoch": 0.33, "learning_rate": 4.8349728785510714e-05, "loss": 0.9759, "step": 3590 }, { "epoch": 0.33, "learning_rate": 4.8345131929760043e-05, "loss": 1.0573, "step": 3600 }, { "epoch": 0.33, "learning_rate": 4.834053507400938e-05, "loss": 0.969, "step": 3610 }, { "epoch": 0.33, "learning_rate": 4.8335938218258716e-05, "loss": 0.9551, "step": 3620 }, { "epoch": 0.33, "learning_rate": 4.8331341362508046e-05, "loss": 0.967, "step": 3630 }, { "epoch": 0.33, "learning_rate": 4.832674450675738e-05, "loss": 0.9396, "step": 3640 }, { "epoch": 0.34, "learning_rate": 4.832214765100672e-05, "loss": 0.9685, "step": 3650 }, { "epoch": 0.34, "learning_rate": 4.831755079525604e-05, "loss": 0.8825, "step": 3660 }, { "epoch": 0.34, "learning_rate": 4.831295393950538e-05, "loss": 0.9393, "step": 3670 }, { "epoch": 0.34, "learning_rate": 4.8308357083754715e-05, "loss": 1.0037, "step": 3680 }, { "epoch": 0.34, "learning_rate": 4.8303760228004045e-05, "loss": 0.8655, "step": 3690 }, { "epoch": 0.34, "learning_rate": 4.829916337225338e-05, "loss": 0.9361, "step": 3700 }, { "epoch": 0.34, "learning_rate": 4.829456651650272e-05, "loss": 1.0242, "step": 3710 }, { "epoch": 0.34, "learning_rate": 4.828996966075205e-05, "loss": 0.9324, "step": 3720 }, { "epoch": 0.34, "learning_rate": 4.8285372805001384e-05, "loss": 0.9658, "step": 3730 }, { "epoch": 0.34, "learning_rate": 4.828077594925072e-05, "loss": 0.9503, "step": 3740 }, { "epoch": 0.34, "learning_rate": 4.827617909350004e-05, "loss": 0.8828, "step": 3750 }, { "epoch": 0.35, "learning_rate": 4.827158223774938e-05, "loss": 0.9675, "step": 3760 }, { "epoch": 0.35, "learning_rate": 4.8266985381998716e-05, "loss": 0.931, "step": 3770 }, { "epoch": 0.35, "learning_rate": 4.8262388526248046e-05, "loss": 0.9289, "step": 3780 }, { "epoch": 0.35, "learning_rate": 4.825779167049738e-05, "loss": 0.9229, "step": 3790 }, { "epoch": 0.35, "learning_rate": 4.825319481474672e-05, "loss": 0.9178, "step": 3800 }, { "epoch": 0.35, "learning_rate": 4.824859795899605e-05, "loss": 0.8967, "step": 3810 }, { "epoch": 0.35, "learning_rate": 4.8244001103245385e-05, "loss": 0.9358, "step": 3820 }, { "epoch": 0.35, "learning_rate": 4.823940424749472e-05, "loss": 1.0154, "step": 3830 }, { "epoch": 0.35, "learning_rate": 4.8234807391744044e-05, "loss": 1.0108, "step": 3840 }, { "epoch": 0.35, "learning_rate": 4.823021053599338e-05, "loss": 0.8626, "step": 3850 }, { "epoch": 0.35, "learning_rate": 4.822561368024272e-05, "loss": 1.0241, "step": 3860 }, { "epoch": 0.36, "learning_rate": 4.822101682449205e-05, "loss": 0.9192, "step": 3870 }, { "epoch": 0.36, "learning_rate": 4.8216419968741383e-05, "loss": 0.9282, "step": 3880 }, { "epoch": 0.36, "learning_rate": 4.821182311299072e-05, "loss": 1.0221, "step": 3890 }, { "epoch": 0.36, "learning_rate": 4.820722625724005e-05, "loss": 1.1095, "step": 3900 }, { "epoch": 0.36, "learning_rate": 4.8202629401489386e-05, "loss": 0.9597, "step": 3910 }, { "epoch": 0.36, "learning_rate": 4.819803254573872e-05, "loss": 0.9161, "step": 3920 }, { "epoch": 0.36, "learning_rate": 4.8193435689988046e-05, "loss": 0.9358, "step": 3930 }, { "epoch": 0.36, "learning_rate": 4.818883883423738e-05, "loss": 0.8769, "step": 3940 }, { "epoch": 0.36, "learning_rate": 4.818424197848672e-05, "loss": 0.9096, "step": 3950 }, { "epoch": 0.36, "learning_rate": 4.817964512273605e-05, "loss": 1.0096, "step": 3960 }, { "epoch": 0.36, "learning_rate": 4.8175048266985385e-05, "loss": 1.0112, "step": 3970 }, { "epoch": 0.37, "learning_rate": 4.817045141123472e-05, "loss": 1.0786, "step": 3980 }, { "epoch": 0.37, "learning_rate": 4.816585455548405e-05, "loss": 0.9748, "step": 3990 }, { "epoch": 0.37, "learning_rate": 4.816125769973339e-05, "loss": 0.8457, "step": 4000 }, { "epoch": 0.37, "learning_rate": 4.8156660843982724e-05, "loss": 0.921, "step": 4010 }, { "epoch": 0.37, "learning_rate": 4.815206398823205e-05, "loss": 1.0226, "step": 4020 }, { "epoch": 0.37, "learning_rate": 4.814746713248138e-05, "loss": 1.0003, "step": 4030 }, { "epoch": 0.37, "learning_rate": 4.814287027673072e-05, "loss": 0.9738, "step": 4040 }, { "epoch": 0.37, "learning_rate": 4.813827342098005e-05, "loss": 1.0444, "step": 4050 }, { "epoch": 0.37, "learning_rate": 4.8133676565229386e-05, "loss": 0.9197, "step": 4060 }, { "epoch": 0.37, "learning_rate": 4.812907970947872e-05, "loss": 0.9679, "step": 4070 }, { "epoch": 0.38, "learning_rate": 4.812448285372805e-05, "loss": 1.0104, "step": 4080 }, { "epoch": 0.38, "learning_rate": 4.811988599797739e-05, "loss": 0.8813, "step": 4090 }, { "epoch": 0.38, "learning_rate": 4.811528914222672e-05, "loss": 0.8844, "step": 4100 }, { "epoch": 0.38, "learning_rate": 4.811069228647605e-05, "loss": 0.9497, "step": 4110 }, { "epoch": 0.38, "learning_rate": 4.8106095430725384e-05, "loss": 0.9705, "step": 4120 }, { "epoch": 0.38, "learning_rate": 4.810149857497472e-05, "loss": 0.8796, "step": 4130 }, { "epoch": 0.38, "learning_rate": 4.809690171922405e-05, "loss": 0.8341, "step": 4140 }, { "epoch": 0.38, "learning_rate": 4.809230486347339e-05, "loss": 1.0386, "step": 4150 }, { "epoch": 0.38, "learning_rate": 4.8087708007722724e-05, "loss": 0.9074, "step": 4160 }, { "epoch": 0.38, "learning_rate": 4.808311115197205e-05, "loss": 1.0244, "step": 4170 }, { "epoch": 0.38, "learning_rate": 4.807851429622139e-05, "loss": 0.9296, "step": 4180 }, { "epoch": 0.39, "learning_rate": 4.807391744047072e-05, "loss": 0.8878, "step": 4190 }, { "epoch": 0.39, "learning_rate": 4.806932058472005e-05, "loss": 1.0382, "step": 4200 }, { "epoch": 0.39, "learning_rate": 4.8064723728969386e-05, "loss": 0.8711, "step": 4210 }, { "epoch": 0.39, "learning_rate": 4.806012687321872e-05, "loss": 0.9117, "step": 4220 }, { "epoch": 0.39, "learning_rate": 4.805553001746805e-05, "loss": 0.9209, "step": 4230 }, { "epoch": 0.39, "learning_rate": 4.805093316171739e-05, "loss": 0.9701, "step": 4240 }, { "epoch": 0.39, "learning_rate": 4.8046336305966725e-05, "loss": 1.0646, "step": 4250 }, { "epoch": 0.39, "learning_rate": 4.8041739450216054e-05, "loss": 0.9933, "step": 4260 }, { "epoch": 0.39, "learning_rate": 4.803714259446539e-05, "loss": 1.0548, "step": 4270 }, { "epoch": 0.39, "learning_rate": 4.803254573871472e-05, "loss": 1.0018, "step": 4280 }, { "epoch": 0.39, "learning_rate": 4.802794888296405e-05, "loss": 0.943, "step": 4290 }, { "epoch": 0.4, "learning_rate": 4.802335202721339e-05, "loss": 0.9669, "step": 4300 }, { "epoch": 0.4, "learning_rate": 4.801875517146272e-05, "loss": 1.0725, "step": 4310 }, { "epoch": 0.4, "learning_rate": 4.801415831571205e-05, "loss": 0.9105, "step": 4320 }, { "epoch": 0.4, "learning_rate": 4.800956145996139e-05, "loss": 0.9969, "step": 4330 }, { "epoch": 0.4, "learning_rate": 4.8004964604210726e-05, "loss": 1.0874, "step": 4340 }, { "epoch": 0.4, "learning_rate": 4.8000367748460056e-05, "loss": 0.9017, "step": 4350 }, { "epoch": 0.4, "learning_rate": 4.799577089270939e-05, "loss": 0.9318, "step": 4360 }, { "epoch": 0.4, "learning_rate": 4.799117403695872e-05, "loss": 0.8452, "step": 4370 }, { "epoch": 0.4, "learning_rate": 4.798657718120805e-05, "loss": 1.0091, "step": 4380 }, { "epoch": 0.4, "learning_rate": 4.798198032545739e-05, "loss": 0.9686, "step": 4390 }, { "epoch": 0.4, "learning_rate": 4.7977383469706724e-05, "loss": 0.9686, "step": 4400 }, { "epoch": 0.41, "learning_rate": 4.7972786613956054e-05, "loss": 0.9917, "step": 4410 }, { "epoch": 0.41, "learning_rate": 4.796818975820539e-05, "loss": 1.0095, "step": 4420 }, { "epoch": 0.41, "learning_rate": 4.796359290245472e-05, "loss": 0.8846, "step": 4430 }, { "epoch": 0.41, "learning_rate": 4.795899604670406e-05, "loss": 0.8445, "step": 4440 }, { "epoch": 0.41, "learning_rate": 4.795439919095339e-05, "loss": 0.9852, "step": 4450 }, { "epoch": 0.41, "learning_rate": 4.794980233520272e-05, "loss": 1.0499, "step": 4460 }, { "epoch": 0.41, "learning_rate": 4.794520547945205e-05, "loss": 0.9371, "step": 4470 }, { "epoch": 0.41, "learning_rate": 4.794060862370139e-05, "loss": 0.9536, "step": 4480 }, { "epoch": 0.41, "learning_rate": 4.7936011767950726e-05, "loss": 0.878, "step": 4490 }, { "epoch": 0.41, "learning_rate": 4.7931414912200055e-05, "loss": 1.0025, "step": 4500 }, { "epoch": 0.41, "learning_rate": 4.792681805644939e-05, "loss": 0.9968, "step": 4510 }, { "epoch": 0.42, "learning_rate": 4.792222120069872e-05, "loss": 0.972, "step": 4520 }, { "epoch": 0.42, "learning_rate": 4.791762434494806e-05, "loss": 1.0427, "step": 4530 }, { "epoch": 0.42, "learning_rate": 4.7913027489197395e-05, "loss": 1.0409, "step": 4540 }, { "epoch": 0.42, "learning_rate": 4.7908430633446724e-05, "loss": 0.9942, "step": 4550 }, { "epoch": 0.42, "learning_rate": 4.7903833777696054e-05, "loss": 0.989, "step": 4560 }, { "epoch": 0.42, "learning_rate": 4.789923692194539e-05, "loss": 0.9263, "step": 4570 }, { "epoch": 0.42, "learning_rate": 4.789464006619473e-05, "loss": 0.9706, "step": 4580 }, { "epoch": 0.42, "learning_rate": 4.7890043210444057e-05, "loss": 0.9298, "step": 4590 }, { "epoch": 0.42, "learning_rate": 4.788544635469339e-05, "loss": 0.7919, "step": 4600 }, { "epoch": 0.42, "learning_rate": 4.788084949894272e-05, "loss": 0.9605, "step": 4610 }, { "epoch": 0.42, "learning_rate": 4.787625264319206e-05, "loss": 1.0069, "step": 4620 }, { "epoch": 0.43, "learning_rate": 4.7871655787441396e-05, "loss": 0.9832, "step": 4630 }, { "epoch": 0.43, "learning_rate": 4.7867058931690725e-05, "loss": 0.9184, "step": 4640 }, { "epoch": 0.43, "learning_rate": 4.7862462075940055e-05, "loss": 0.9883, "step": 4650 }, { "epoch": 0.43, "learning_rate": 4.785786522018939e-05, "loss": 0.9971, "step": 4660 }, { "epoch": 0.43, "learning_rate": 4.785326836443873e-05, "loss": 0.9714, "step": 4670 }, { "epoch": 0.43, "learning_rate": 4.784867150868806e-05, "loss": 0.9607, "step": 4680 }, { "epoch": 0.43, "learning_rate": 4.7844074652937394e-05, "loss": 1.0062, "step": 4690 }, { "epoch": 0.43, "learning_rate": 4.7839477797186724e-05, "loss": 0.8788, "step": 4700 }, { "epoch": 0.43, "learning_rate": 4.783488094143606e-05, "loss": 0.9645, "step": 4710 }, { "epoch": 0.43, "learning_rate": 4.78302840856854e-05, "loss": 0.9656, "step": 4720 }, { "epoch": 0.43, "learning_rate": 4.782568722993473e-05, "loss": 0.9687, "step": 4730 }, { "epoch": 0.44, "learning_rate": 4.7821090374184056e-05, "loss": 0.9983, "step": 4740 }, { "epoch": 0.44, "learning_rate": 4.781649351843339e-05, "loss": 0.9814, "step": 4750 }, { "epoch": 0.44, "learning_rate": 4.781189666268272e-05, "loss": 0.899, "step": 4760 }, { "epoch": 0.44, "learning_rate": 4.780729980693206e-05, "loss": 0.9326, "step": 4770 }, { "epoch": 0.44, "learning_rate": 4.7802702951181395e-05, "loss": 0.9149, "step": 4780 }, { "epoch": 0.44, "learning_rate": 4.7798106095430725e-05, "loss": 0.9339, "step": 4790 }, { "epoch": 0.44, "learning_rate": 4.779350923968006e-05, "loss": 0.9642, "step": 4800 }, { "epoch": 0.44, "learning_rate": 4.77889123839294e-05, "loss": 0.9197, "step": 4810 }, { "epoch": 0.44, "learning_rate": 4.778431552817873e-05, "loss": 0.9412, "step": 4820 }, { "epoch": 0.44, "learning_rate": 4.777971867242806e-05, "loss": 0.9661, "step": 4830 }, { "epoch": 0.44, "learning_rate": 4.7775121816677394e-05, "loss": 0.972, "step": 4840 }, { "epoch": 0.45, "learning_rate": 4.7770524960926724e-05, "loss": 0.9623, "step": 4850 }, { "epoch": 0.45, "learning_rate": 4.776592810517606e-05, "loss": 1.0311, "step": 4860 }, { "epoch": 0.45, "learning_rate": 4.77613312494254e-05, "loss": 1.0047, "step": 4870 }, { "epoch": 0.45, "learning_rate": 4.7756734393674726e-05, "loss": 0.9579, "step": 4880 }, { "epoch": 0.45, "learning_rate": 4.775213753792406e-05, "loss": 0.9257, "step": 4890 }, { "epoch": 0.45, "learning_rate": 4.77475406821734e-05, "loss": 0.9912, "step": 4900 }, { "epoch": 0.45, "learning_rate": 4.774294382642273e-05, "loss": 0.8673, "step": 4910 }, { "epoch": 0.45, "learning_rate": 4.773834697067206e-05, "loss": 0.8715, "step": 4920 }, { "epoch": 0.45, "learning_rate": 4.7733750114921395e-05, "loss": 0.8165, "step": 4930 }, { "epoch": 0.45, "learning_rate": 4.7729153259170725e-05, "loss": 0.9617, "step": 4940 }, { "epoch": 0.46, "learning_rate": 4.772455640342006e-05, "loss": 0.9032, "step": 4950 }, { "epoch": 0.46, "learning_rate": 4.77199595476694e-05, "loss": 0.9404, "step": 4960 }, { "epoch": 0.46, "learning_rate": 4.771536269191873e-05, "loss": 0.9519, "step": 4970 }, { "epoch": 0.46, "learning_rate": 4.7710765836168064e-05, "loss": 0.922, "step": 4980 }, { "epoch": 0.46, "learning_rate": 4.77061689804174e-05, "loss": 1.0137, "step": 4990 }, { "epoch": 0.46, "learning_rate": 4.770157212466673e-05, "loss": 0.9427, "step": 5000 }, { "epoch": 0.46, "eval_accuracy": 0.5388646288209606, "eval_loss": 0.9456362724304199, "eval_runtime": 159.6722, "eval_samples_per_second": 28.684, "eval_steps_per_second": 3.589, "step": 5000 }, { "epoch": 0.46, "learning_rate": 4.769697526891606e-05, "loss": 0.9568, "step": 5010 }, { "epoch": 0.46, "learning_rate": 4.7692378413165396e-05, "loss": 0.8912, "step": 5020 }, { "epoch": 0.46, "learning_rate": 4.7687781557414726e-05, "loss": 0.9492, "step": 5030 }, { "epoch": 0.46, "learning_rate": 4.768318470166406e-05, "loss": 1.0019, "step": 5040 }, { "epoch": 0.46, "learning_rate": 4.76785878459134e-05, "loss": 0.9071, "step": 5050 }, { "epoch": 0.47, "learning_rate": 4.767399099016273e-05, "loss": 0.9758, "step": 5060 }, { "epoch": 0.47, "learning_rate": 4.7669394134412065e-05, "loss": 0.9423, "step": 5070 }, { "epoch": 0.47, "learning_rate": 4.76647972786614e-05, "loss": 1.0457, "step": 5080 }, { "epoch": 0.47, "learning_rate": 4.766020042291073e-05, "loss": 0.9989, "step": 5090 }, { "epoch": 0.47, "learning_rate": 4.765560356716007e-05, "loss": 0.9192, "step": 5100 }, { "epoch": 0.47, "learning_rate": 4.76510067114094e-05, "loss": 0.907, "step": 5110 }, { "epoch": 0.47, "learning_rate": 4.764640985565873e-05, "loss": 0.9327, "step": 5120 }, { "epoch": 0.47, "learning_rate": 4.7641812999908064e-05, "loss": 0.7998, "step": 5130 }, { "epoch": 0.47, "learning_rate": 4.76372161441574e-05, "loss": 0.8857, "step": 5140 }, { "epoch": 0.47, "learning_rate": 4.763261928840673e-05, "loss": 1.0188, "step": 5150 }, { "epoch": 0.47, "learning_rate": 4.7628022432656066e-05, "loss": 0.9446, "step": 5160 }, { "epoch": 0.48, "learning_rate": 4.76234255769054e-05, "loss": 0.8976, "step": 5170 }, { "epoch": 0.48, "learning_rate": 4.761882872115473e-05, "loss": 1.1064, "step": 5180 }, { "epoch": 0.48, "learning_rate": 4.761423186540407e-05, "loss": 0.9045, "step": 5190 }, { "epoch": 0.48, "learning_rate": 4.76096350096534e-05, "loss": 0.9362, "step": 5200 }, { "epoch": 0.48, "learning_rate": 4.760503815390273e-05, "loss": 0.9063, "step": 5210 }, { "epoch": 0.48, "learning_rate": 4.7600441298152065e-05, "loss": 0.9325, "step": 5220 }, { "epoch": 0.48, "learning_rate": 4.75958444424014e-05, "loss": 0.9443, "step": 5230 }, { "epoch": 0.48, "learning_rate": 4.759124758665073e-05, "loss": 0.9077, "step": 5240 }, { "epoch": 0.48, "learning_rate": 4.758665073090007e-05, "loss": 0.9318, "step": 5250 }, { "epoch": 0.48, "learning_rate": 4.7582053875149404e-05, "loss": 0.9981, "step": 5260 }, { "epoch": 0.48, "learning_rate": 4.7577457019398734e-05, "loss": 1.0895, "step": 5270 }, { "epoch": 0.49, "learning_rate": 4.757286016364807e-05, "loss": 0.9403, "step": 5280 }, { "epoch": 0.49, "learning_rate": 4.75682633078974e-05, "loss": 0.993, "step": 5290 }, { "epoch": 0.49, "learning_rate": 4.756366645214673e-05, "loss": 0.9024, "step": 5300 }, { "epoch": 0.49, "learning_rate": 4.7559069596396066e-05, "loss": 0.8814, "step": 5310 }, { "epoch": 0.49, "learning_rate": 4.75544727406454e-05, "loss": 0.9604, "step": 5320 }, { "epoch": 0.49, "learning_rate": 4.754987588489473e-05, "loss": 0.9268, "step": 5330 }, { "epoch": 0.49, "learning_rate": 4.754527902914407e-05, "loss": 0.9647, "step": 5340 }, { "epoch": 0.49, "learning_rate": 4.7540682173393405e-05, "loss": 0.8258, "step": 5350 }, { "epoch": 0.49, "learning_rate": 4.7536085317642735e-05, "loss": 0.9386, "step": 5360 }, { "epoch": 0.49, "learning_rate": 4.753148846189207e-05, "loss": 0.9623, "step": 5370 }, { "epoch": 0.49, "learning_rate": 4.75268916061414e-05, "loss": 0.9751, "step": 5380 }, { "epoch": 0.5, "learning_rate": 4.752229475039073e-05, "loss": 0.9381, "step": 5390 }, { "epoch": 0.5, "learning_rate": 4.751769789464007e-05, "loss": 1.043, "step": 5400 }, { "epoch": 0.5, "learning_rate": 4.7513101038889404e-05, "loss": 0.888, "step": 5410 }, { "epoch": 0.5, "learning_rate": 4.7508504183138734e-05, "loss": 1.0717, "step": 5420 }, { "epoch": 0.5, "learning_rate": 4.750390732738807e-05, "loss": 0.9471, "step": 5430 }, { "epoch": 0.5, "learning_rate": 4.7499310471637406e-05, "loss": 0.8793, "step": 5440 }, { "epoch": 0.5, "learning_rate": 4.7494713615886736e-05, "loss": 0.9956, "step": 5450 }, { "epoch": 0.5, "learning_rate": 4.749011676013607e-05, "loss": 0.9041, "step": 5460 }, { "epoch": 0.5, "learning_rate": 4.74855199043854e-05, "loss": 0.9561, "step": 5470 }, { "epoch": 0.5, "learning_rate": 4.748092304863473e-05, "loss": 0.9218, "step": 5480 }, { "epoch": 0.5, "learning_rate": 4.747632619288407e-05, "loss": 0.9512, "step": 5490 }, { "epoch": 0.51, "learning_rate": 4.7471729337133405e-05, "loss": 0.9194, "step": 5500 }, { "epoch": 0.51, "learning_rate": 4.7467132481382735e-05, "loss": 0.8455, "step": 5510 }, { "epoch": 0.51, "learning_rate": 4.746253562563207e-05, "loss": 0.8755, "step": 5520 }, { "epoch": 0.51, "learning_rate": 4.745793876988141e-05, "loss": 0.9611, "step": 5530 }, { "epoch": 0.51, "learning_rate": 4.745334191413074e-05, "loss": 0.9257, "step": 5540 }, { "epoch": 0.51, "learning_rate": 4.7448745058380074e-05, "loss": 0.95, "step": 5550 }, { "epoch": 0.51, "learning_rate": 4.7444148202629404e-05, "loss": 0.9126, "step": 5560 }, { "epoch": 0.51, "learning_rate": 4.743955134687873e-05, "loss": 0.96, "step": 5570 }, { "epoch": 0.51, "learning_rate": 4.743495449112807e-05, "loss": 0.981, "step": 5580 }, { "epoch": 0.51, "learning_rate": 4.7430357635377406e-05, "loss": 0.8837, "step": 5590 }, { "epoch": 0.51, "learning_rate": 4.7425760779626736e-05, "loss": 0.8876, "step": 5600 }, { "epoch": 0.52, "learning_rate": 4.742116392387607e-05, "loss": 0.9981, "step": 5610 }, { "epoch": 0.52, "learning_rate": 4.741656706812541e-05, "loss": 0.8476, "step": 5620 }, { "epoch": 0.52, "learning_rate": 4.741197021237474e-05, "loss": 0.9011, "step": 5630 }, { "epoch": 0.52, "learning_rate": 4.7407373356624075e-05, "loss": 1.0251, "step": 5640 }, { "epoch": 0.52, "learning_rate": 4.7402776500873405e-05, "loss": 1.0021, "step": 5650 }, { "epoch": 0.52, "learning_rate": 4.7398179645122734e-05, "loss": 1.0356, "step": 5660 }, { "epoch": 0.52, "learning_rate": 4.739358278937207e-05, "loss": 1.016, "step": 5670 }, { "epoch": 0.52, "learning_rate": 4.738898593362141e-05, "loss": 0.9401, "step": 5680 }, { "epoch": 0.52, "learning_rate": 4.738438907787074e-05, "loss": 0.9111, "step": 5690 }, { "epoch": 0.52, "learning_rate": 4.7379792222120074e-05, "loss": 0.9158, "step": 5700 }, { "epoch": 0.52, "learning_rate": 4.737519536636941e-05, "loss": 0.7603, "step": 5710 }, { "epoch": 0.53, "learning_rate": 4.737059851061874e-05, "loss": 0.8553, "step": 5720 }, { "epoch": 0.53, "learning_rate": 4.7366001654868076e-05, "loss": 0.8371, "step": 5730 }, { "epoch": 0.53, "learning_rate": 4.7361404799117406e-05, "loss": 1.0102, "step": 5740 }, { "epoch": 0.53, "learning_rate": 4.7356807943366736e-05, "loss": 0.9413, "step": 5750 }, { "epoch": 0.53, "learning_rate": 4.735221108761607e-05, "loss": 0.9853, "step": 5760 }, { "epoch": 0.53, "learning_rate": 4.734761423186541e-05, "loss": 0.9161, "step": 5770 }, { "epoch": 0.53, "learning_rate": 4.734301737611474e-05, "loss": 0.9149, "step": 5780 }, { "epoch": 0.53, "learning_rate": 4.7338420520364075e-05, "loss": 0.8176, "step": 5790 }, { "epoch": 0.53, "learning_rate": 4.733382366461341e-05, "loss": 0.9692, "step": 5800 }, { "epoch": 0.53, "learning_rate": 4.732922680886274e-05, "loss": 0.9556, "step": 5810 }, { "epoch": 0.54, "learning_rate": 4.732462995311208e-05, "loss": 0.8998, "step": 5820 }, { "epoch": 0.54, "learning_rate": 4.732003309736141e-05, "loss": 0.9182, "step": 5830 }, { "epoch": 0.54, "learning_rate": 4.731543624161074e-05, "loss": 0.9012, "step": 5840 }, { "epoch": 0.54, "learning_rate": 4.731083938586007e-05, "loss": 0.845, "step": 5850 }, { "epoch": 0.54, "learning_rate": 4.730624253010941e-05, "loss": 0.8313, "step": 5860 }, { "epoch": 0.54, "learning_rate": 4.730164567435874e-05, "loss": 0.8624, "step": 5870 }, { "epoch": 0.54, "learning_rate": 4.7297048818608076e-05, "loss": 1.0908, "step": 5880 }, { "epoch": 0.54, "learning_rate": 4.729245196285741e-05, "loss": 0.9056, "step": 5890 }, { "epoch": 0.54, "learning_rate": 4.728785510710674e-05, "loss": 0.8573, "step": 5900 }, { "epoch": 0.54, "learning_rate": 4.728325825135608e-05, "loss": 0.9547, "step": 5910 }, { "epoch": 0.54, "learning_rate": 4.727866139560541e-05, "loss": 0.8992, "step": 5920 }, { "epoch": 0.55, "learning_rate": 4.727406453985474e-05, "loss": 0.9014, "step": 5930 }, { "epoch": 0.55, "learning_rate": 4.7269467684104075e-05, "loss": 0.896, "step": 5940 }, { "epoch": 0.55, "learning_rate": 4.726487082835341e-05, "loss": 0.9668, "step": 5950 }, { "epoch": 0.55, "learning_rate": 4.726027397260274e-05, "loss": 1.0132, "step": 5960 }, { "epoch": 0.55, "learning_rate": 4.725567711685208e-05, "loss": 0.8343, "step": 5970 }, { "epoch": 0.55, "learning_rate": 4.7251080261101414e-05, "loss": 0.9907, "step": 5980 }, { "epoch": 0.55, "learning_rate": 4.724648340535074e-05, "loss": 0.877, "step": 5990 }, { "epoch": 0.55, "learning_rate": 4.724188654960008e-05, "loss": 0.9711, "step": 6000 }, { "epoch": 0.55, "learning_rate": 4.723728969384941e-05, "loss": 0.9057, "step": 6010 }, { "epoch": 0.55, "learning_rate": 4.723269283809874e-05, "loss": 0.8324, "step": 6020 }, { "epoch": 0.55, "learning_rate": 4.7228095982348076e-05, "loss": 0.9007, "step": 6030 }, { "epoch": 0.56, "learning_rate": 4.722349912659741e-05, "loss": 0.9415, "step": 6040 }, { "epoch": 0.56, "learning_rate": 4.721890227084674e-05, "loss": 0.8663, "step": 6050 }, { "epoch": 0.56, "learning_rate": 4.721430541509608e-05, "loss": 0.936, "step": 6060 }, { "epoch": 0.56, "learning_rate": 4.720970855934541e-05, "loss": 0.9742, "step": 6070 }, { "epoch": 0.56, "learning_rate": 4.7205111703594745e-05, "loss": 1.0575, "step": 6080 }, { "epoch": 0.56, "learning_rate": 4.720051484784408e-05, "loss": 0.9156, "step": 6090 }, { "epoch": 0.56, "learning_rate": 4.719591799209341e-05, "loss": 0.9435, "step": 6100 }, { "epoch": 0.56, "learning_rate": 4.719132113634274e-05, "loss": 0.8532, "step": 6110 }, { "epoch": 0.56, "learning_rate": 4.718672428059208e-05, "loss": 0.9434, "step": 6120 }, { "epoch": 0.56, "learning_rate": 4.718212742484141e-05, "loss": 0.92, "step": 6130 }, { "epoch": 0.56, "learning_rate": 4.717753056909074e-05, "loss": 0.9286, "step": 6140 }, { "epoch": 0.57, "learning_rate": 4.717293371334008e-05, "loss": 0.9871, "step": 6150 }, { "epoch": 0.57, "learning_rate": 4.716833685758941e-05, "loss": 0.8818, "step": 6160 }, { "epoch": 0.57, "learning_rate": 4.7163740001838746e-05, "loss": 0.8258, "step": 6170 }, { "epoch": 0.57, "learning_rate": 4.715914314608808e-05, "loss": 0.9504, "step": 6180 }, { "epoch": 0.57, "learning_rate": 4.715454629033741e-05, "loss": 0.8188, "step": 6190 }, { "epoch": 0.57, "learning_rate": 4.714994943458674e-05, "loss": 1.0046, "step": 6200 }, { "epoch": 0.57, "learning_rate": 4.714535257883608e-05, "loss": 0.9748, "step": 6210 }, { "epoch": 0.57, "learning_rate": 4.7140755723085415e-05, "loss": 1.0456, "step": 6220 }, { "epoch": 0.57, "learning_rate": 4.7136158867334744e-05, "loss": 0.9625, "step": 6230 }, { "epoch": 0.57, "learning_rate": 4.713156201158408e-05, "loss": 0.8917, "step": 6240 }, { "epoch": 0.57, "learning_rate": 4.712696515583341e-05, "loss": 0.9744, "step": 6250 }, { "epoch": 0.58, "learning_rate": 4.712236830008275e-05, "loss": 0.849, "step": 6260 }, { "epoch": 0.58, "learning_rate": 4.7117771444332083e-05, "loss": 0.9506, "step": 6270 }, { "epoch": 0.58, "learning_rate": 4.711317458858141e-05, "loss": 0.8776, "step": 6280 }, { "epoch": 0.58, "learning_rate": 4.710857773283074e-05, "loss": 0.874, "step": 6290 }, { "epoch": 0.58, "learning_rate": 4.710398087708008e-05, "loss": 0.8853, "step": 6300 }, { "epoch": 0.58, "learning_rate": 4.7099384021329416e-05, "loss": 1.0371, "step": 6310 }, { "epoch": 0.58, "learning_rate": 4.7094787165578745e-05, "loss": 0.9104, "step": 6320 }, { "epoch": 0.58, "learning_rate": 4.709019030982808e-05, "loss": 0.8479, "step": 6330 }, { "epoch": 0.58, "learning_rate": 4.708559345407741e-05, "loss": 0.8877, "step": 6340 }, { "epoch": 0.58, "learning_rate": 4.708099659832675e-05, "loss": 0.9463, "step": 6350 }, { "epoch": 0.58, "learning_rate": 4.7076399742576085e-05, "loss": 0.9059, "step": 6360 }, { "epoch": 0.59, "learning_rate": 4.7071802886825414e-05, "loss": 1.0372, "step": 6370 }, { "epoch": 0.59, "learning_rate": 4.7067206031074744e-05, "loss": 0.8248, "step": 6380 }, { "epoch": 0.59, "learning_rate": 4.706260917532408e-05, "loss": 0.8908, "step": 6390 }, { "epoch": 0.59, "learning_rate": 4.705801231957341e-05, "loss": 1.002, "step": 6400 }, { "epoch": 0.59, "learning_rate": 4.705341546382275e-05, "loss": 1.0619, "step": 6410 }, { "epoch": 0.59, "learning_rate": 4.704881860807208e-05, "loss": 0.9716, "step": 6420 }, { "epoch": 0.59, "learning_rate": 4.704422175232141e-05, "loss": 0.9142, "step": 6430 }, { "epoch": 0.59, "learning_rate": 4.703962489657075e-05, "loss": 0.9567, "step": 6440 }, { "epoch": 0.59, "learning_rate": 4.7035028040820086e-05, "loss": 0.8968, "step": 6450 }, { "epoch": 0.59, "learning_rate": 4.7030431185069416e-05, "loss": 0.7701, "step": 6460 }, { "epoch": 0.59, "learning_rate": 4.7025834329318745e-05, "loss": 0.8457, "step": 6470 }, { "epoch": 0.6, "learning_rate": 4.702123747356808e-05, "loss": 0.7709, "step": 6480 }, { "epoch": 0.6, "learning_rate": 4.701664061781741e-05, "loss": 0.9908, "step": 6490 }, { "epoch": 0.6, "learning_rate": 4.701204376206675e-05, "loss": 1.0772, "step": 6500 }, { "epoch": 0.6, "learning_rate": 4.7007446906316084e-05, "loss": 0.8958, "step": 6510 }, { "epoch": 0.6, "learning_rate": 4.7002850050565414e-05, "loss": 0.9754, "step": 6520 }, { "epoch": 0.6, "learning_rate": 4.699825319481475e-05, "loss": 1.0278, "step": 6530 }, { "epoch": 0.6, "learning_rate": 4.699365633906409e-05, "loss": 0.9408, "step": 6540 }, { "epoch": 0.6, "learning_rate": 4.698905948331342e-05, "loss": 0.9297, "step": 6550 }, { "epoch": 0.6, "learning_rate": 4.6984462627562746e-05, "loss": 1.0081, "step": 6560 }, { "epoch": 0.6, "learning_rate": 4.697986577181208e-05, "loss": 0.8354, "step": 6570 }, { "epoch": 0.6, "learning_rate": 4.697526891606141e-05, "loss": 0.8158, "step": 6580 }, { "epoch": 0.61, "learning_rate": 4.697067206031075e-05, "loss": 0.7737, "step": 6590 }, { "epoch": 0.61, "learning_rate": 4.6966075204560086e-05, "loss": 0.8756, "step": 6600 }, { "epoch": 0.61, "learning_rate": 4.6961478348809415e-05, "loss": 1.0093, "step": 6610 }, { "epoch": 0.61, "learning_rate": 4.695688149305875e-05, "loss": 0.9151, "step": 6620 }, { "epoch": 0.61, "learning_rate": 4.695228463730809e-05, "loss": 0.9182, "step": 6630 }, { "epoch": 0.61, "learning_rate": 4.694768778155742e-05, "loss": 0.9575, "step": 6640 }, { "epoch": 0.61, "learning_rate": 4.694309092580675e-05, "loss": 0.8997, "step": 6650 }, { "epoch": 0.61, "learning_rate": 4.6938494070056084e-05, "loss": 1.0191, "step": 6660 }, { "epoch": 0.61, "learning_rate": 4.6933897214305414e-05, "loss": 0.921, "step": 6670 }, { "epoch": 0.61, "learning_rate": 4.692930035855475e-05, "loss": 0.9304, "step": 6680 }, { "epoch": 0.62, "learning_rate": 4.692470350280409e-05, "loss": 0.9434, "step": 6690 }, { "epoch": 0.62, "learning_rate": 4.6920106647053416e-05, "loss": 1.03, "step": 6700 }, { "epoch": 0.62, "learning_rate": 4.691550979130275e-05, "loss": 0.993, "step": 6710 }, { "epoch": 0.62, "learning_rate": 4.691091293555209e-05, "loss": 0.9933, "step": 6720 }, { "epoch": 0.62, "learning_rate": 4.690631607980141e-05, "loss": 0.8856, "step": 6730 }, { "epoch": 0.62, "learning_rate": 4.690171922405075e-05, "loss": 0.9776, "step": 6740 }, { "epoch": 0.62, "learning_rate": 4.6897122368300085e-05, "loss": 0.9305, "step": 6750 }, { "epoch": 0.62, "learning_rate": 4.6892525512549415e-05, "loss": 0.8587, "step": 6760 }, { "epoch": 0.62, "learning_rate": 4.688792865679875e-05, "loss": 0.8586, "step": 6770 }, { "epoch": 0.62, "learning_rate": 4.688333180104809e-05, "loss": 0.8837, "step": 6780 }, { "epoch": 0.62, "learning_rate": 4.687873494529742e-05, "loss": 0.9914, "step": 6790 }, { "epoch": 0.63, "learning_rate": 4.6874138089546754e-05, "loss": 0.8536, "step": 6800 }, { "epoch": 0.63, "learning_rate": 4.686954123379609e-05, "loss": 1.0221, "step": 6810 }, { "epoch": 0.63, "learning_rate": 4.6864944378045414e-05, "loss": 0.8317, "step": 6820 }, { "epoch": 0.63, "learning_rate": 4.686034752229475e-05, "loss": 0.8937, "step": 6830 }, { "epoch": 0.63, "learning_rate": 4.6855750666544086e-05, "loss": 0.9584, "step": 6840 }, { "epoch": 0.63, "learning_rate": 4.6851153810793416e-05, "loss": 0.8973, "step": 6850 }, { "epoch": 0.63, "learning_rate": 4.684655695504275e-05, "loss": 0.8727, "step": 6860 }, { "epoch": 0.63, "learning_rate": 4.684196009929209e-05, "loss": 0.8199, "step": 6870 }, { "epoch": 0.63, "learning_rate": 4.683736324354142e-05, "loss": 0.9241, "step": 6880 }, { "epoch": 0.63, "learning_rate": 4.6832766387790755e-05, "loss": 0.8119, "step": 6890 }, { "epoch": 0.63, "learning_rate": 4.682816953204009e-05, "loss": 0.9161, "step": 6900 }, { "epoch": 0.64, "learning_rate": 4.6823572676289415e-05, "loss": 0.9221, "step": 6910 }, { "epoch": 0.64, "learning_rate": 4.681897582053875e-05, "loss": 0.9031, "step": 6920 }, { "epoch": 0.64, "learning_rate": 4.681437896478809e-05, "loss": 0.9187, "step": 6930 }, { "epoch": 0.64, "learning_rate": 4.680978210903742e-05, "loss": 0.8815, "step": 6940 }, { "epoch": 0.64, "learning_rate": 4.6805185253286754e-05, "loss": 1.0057, "step": 6950 }, { "epoch": 0.64, "learning_rate": 4.680058839753609e-05, "loss": 0.9811, "step": 6960 }, { "epoch": 0.64, "learning_rate": 4.679599154178542e-05, "loss": 0.9313, "step": 6970 }, { "epoch": 0.64, "learning_rate": 4.6791394686034756e-05, "loss": 0.9516, "step": 6980 }, { "epoch": 0.64, "learning_rate": 4.678679783028409e-05, "loss": 0.9074, "step": 6990 }, { "epoch": 0.64, "learning_rate": 4.6782200974533416e-05, "loss": 0.9564, "step": 7000 }, { "epoch": 0.64, "learning_rate": 4.677760411878275e-05, "loss": 0.8665, "step": 7010 }, { "epoch": 0.65, "learning_rate": 4.677300726303209e-05, "loss": 0.9682, "step": 7020 }, { "epoch": 0.65, "learning_rate": 4.676841040728142e-05, "loss": 0.901, "step": 7030 }, { "epoch": 0.65, "learning_rate": 4.6763813551530755e-05, "loss": 0.9427, "step": 7040 }, { "epoch": 0.65, "learning_rate": 4.675921669578009e-05, "loss": 0.9436, "step": 7050 }, { "epoch": 0.65, "learning_rate": 4.675461984002942e-05, "loss": 0.9431, "step": 7060 }, { "epoch": 0.65, "learning_rate": 4.675002298427876e-05, "loss": 0.9188, "step": 7070 }, { "epoch": 0.65, "learning_rate": 4.6745426128528094e-05, "loss": 0.9387, "step": 7080 }, { "epoch": 0.65, "learning_rate": 4.674082927277742e-05, "loss": 0.7898, "step": 7090 }, { "epoch": 0.65, "learning_rate": 4.6736232417026754e-05, "loss": 1.1022, "step": 7100 }, { "epoch": 0.65, "learning_rate": 4.673163556127609e-05, "loss": 1.0154, "step": 7110 }, { "epoch": 0.65, "learning_rate": 4.672703870552542e-05, "loss": 0.9514, "step": 7120 }, { "epoch": 0.66, "learning_rate": 4.6722441849774756e-05, "loss": 0.852, "step": 7130 }, { "epoch": 0.66, "learning_rate": 4.671784499402409e-05, "loss": 0.8602, "step": 7140 }, { "epoch": 0.66, "learning_rate": 4.671324813827342e-05, "loss": 0.9013, "step": 7150 }, { "epoch": 0.66, "learning_rate": 4.670865128252276e-05, "loss": 0.7947, "step": 7160 }, { "epoch": 0.66, "learning_rate": 4.6704054426772095e-05, "loss": 0.9176, "step": 7170 }, { "epoch": 0.66, "learning_rate": 4.669945757102142e-05, "loss": 0.8809, "step": 7180 }, { "epoch": 0.66, "learning_rate": 4.6694860715270755e-05, "loss": 0.8874, "step": 7190 }, { "epoch": 0.66, "learning_rate": 4.669026385952009e-05, "loss": 0.9915, "step": 7200 }, { "epoch": 0.66, "learning_rate": 4.668566700376942e-05, "loss": 0.9328, "step": 7210 }, { "epoch": 0.66, "learning_rate": 4.668107014801876e-05, "loss": 0.8961, "step": 7220 }, { "epoch": 0.66, "learning_rate": 4.6676473292268094e-05, "loss": 1.0482, "step": 7230 }, { "epoch": 0.67, "learning_rate": 4.6671876436517424e-05, "loss": 0.9711, "step": 7240 }, { "epoch": 0.67, "learning_rate": 4.666727958076676e-05, "loss": 1.0289, "step": 7250 }, { "epoch": 0.67, "learning_rate": 4.6662682725016097e-05, "loss": 0.9586, "step": 7260 }, { "epoch": 0.67, "learning_rate": 4.665808586926542e-05, "loss": 0.9178, "step": 7270 }, { "epoch": 0.67, "learning_rate": 4.6653489013514756e-05, "loss": 0.8158, "step": 7280 }, { "epoch": 0.67, "learning_rate": 4.664889215776409e-05, "loss": 0.9513, "step": 7290 }, { "epoch": 0.67, "learning_rate": 4.664429530201342e-05, "loss": 0.8031, "step": 7300 }, { "epoch": 0.67, "learning_rate": 4.663969844626276e-05, "loss": 0.8974, "step": 7310 }, { "epoch": 0.67, "learning_rate": 4.6635101590512095e-05, "loss": 0.9998, "step": 7320 }, { "epoch": 0.67, "learning_rate": 4.6630504734761425e-05, "loss": 0.9037, "step": 7330 }, { "epoch": 0.67, "learning_rate": 4.662590787901076e-05, "loss": 0.9054, "step": 7340 }, { "epoch": 0.68, "learning_rate": 4.66213110232601e-05, "loss": 0.999, "step": 7350 }, { "epoch": 0.68, "learning_rate": 4.661671416750942e-05, "loss": 0.953, "step": 7360 }, { "epoch": 0.68, "learning_rate": 4.661211731175876e-05, "loss": 0.926, "step": 7370 }, { "epoch": 0.68, "learning_rate": 4.6607520456008094e-05, "loss": 0.9692, "step": 7380 }, { "epoch": 0.68, "learning_rate": 4.660292360025742e-05, "loss": 0.9667, "step": 7390 }, { "epoch": 0.68, "learning_rate": 4.659832674450676e-05, "loss": 0.7981, "step": 7400 }, { "epoch": 0.68, "learning_rate": 4.6593729888756096e-05, "loss": 1.0221, "step": 7410 }, { "epoch": 0.68, "learning_rate": 4.6589133033005426e-05, "loss": 1.0226, "step": 7420 }, { "epoch": 0.68, "learning_rate": 4.658453617725476e-05, "loss": 0.9383, "step": 7430 }, { "epoch": 0.68, "learning_rate": 4.65799393215041e-05, "loss": 0.9596, "step": 7440 }, { "epoch": 0.68, "learning_rate": 4.657534246575342e-05, "loss": 0.8783, "step": 7450 }, { "epoch": 0.69, "learning_rate": 4.657074561000276e-05, "loss": 0.908, "step": 7460 }, { "epoch": 0.69, "learning_rate": 4.6566148754252095e-05, "loss": 1.0087, "step": 7470 }, { "epoch": 0.69, "learning_rate": 4.6561551898501425e-05, "loss": 0.887, "step": 7480 }, { "epoch": 0.69, "learning_rate": 4.655695504275076e-05, "loss": 0.9528, "step": 7490 }, { "epoch": 0.69, "learning_rate": 4.65523581870001e-05, "loss": 0.9504, "step": 7500 }, { "epoch": 0.69, "learning_rate": 4.654776133124943e-05, "loss": 1.0215, "step": 7510 }, { "epoch": 0.69, "learning_rate": 4.6543164475498764e-05, "loss": 0.9759, "step": 7520 }, { "epoch": 0.69, "learning_rate": 4.65385676197481e-05, "loss": 0.9513, "step": 7530 }, { "epoch": 0.69, "learning_rate": 4.653397076399742e-05, "loss": 0.9184, "step": 7540 }, { "epoch": 0.69, "learning_rate": 4.652937390824676e-05, "loss": 1.0114, "step": 7550 }, { "epoch": 0.7, "learning_rate": 4.6524777052496096e-05, "loss": 0.8216, "step": 7560 }, { "epoch": 0.7, "learning_rate": 4.6520180196745426e-05, "loss": 0.8299, "step": 7570 }, { "epoch": 0.7, "learning_rate": 4.651558334099476e-05, "loss": 0.8972, "step": 7580 }, { "epoch": 0.7, "learning_rate": 4.65109864852441e-05, "loss": 0.7963, "step": 7590 }, { "epoch": 0.7, "learning_rate": 4.650638962949343e-05, "loss": 1.0112, "step": 7600 }, { "epoch": 0.7, "learning_rate": 4.6501792773742765e-05, "loss": 0.7989, "step": 7610 }, { "epoch": 0.7, "learning_rate": 4.64971959179921e-05, "loss": 0.9572, "step": 7620 }, { "epoch": 0.7, "learning_rate": 4.6492599062241424e-05, "loss": 1.0296, "step": 7630 }, { "epoch": 0.7, "learning_rate": 4.648800220649076e-05, "loss": 0.9615, "step": 7640 }, { "epoch": 0.7, "learning_rate": 4.64834053507401e-05, "loss": 0.9038, "step": 7650 }, { "epoch": 0.7, "learning_rate": 4.647880849498943e-05, "loss": 0.9469, "step": 7660 }, { "epoch": 0.71, "learning_rate": 4.6474211639238763e-05, "loss": 0.9469, "step": 7670 }, { "epoch": 0.71, "learning_rate": 4.64696147834881e-05, "loss": 0.835, "step": 7680 }, { "epoch": 0.71, "learning_rate": 4.646501792773743e-05, "loss": 1.0294, "step": 7690 }, { "epoch": 0.71, "learning_rate": 4.6460421071986766e-05, "loss": 0.9579, "step": 7700 }, { "epoch": 0.71, "learning_rate": 4.6455824216236096e-05, "loss": 0.9462, "step": 7710 }, { "epoch": 0.71, "learning_rate": 4.6451227360485425e-05, "loss": 0.9222, "step": 7720 }, { "epoch": 0.71, "learning_rate": 4.644663050473476e-05, "loss": 0.9099, "step": 7730 }, { "epoch": 0.71, "learning_rate": 4.64420336489841e-05, "loss": 0.8329, "step": 7740 }, { "epoch": 0.71, "learning_rate": 4.643743679323343e-05, "loss": 0.8279, "step": 7750 }, { "epoch": 0.71, "learning_rate": 4.6432839937482765e-05, "loss": 0.9033, "step": 7760 }, { "epoch": 0.71, "learning_rate": 4.64282430817321e-05, "loss": 1.0114, "step": 7770 }, { "epoch": 0.72, "learning_rate": 4.642364622598143e-05, "loss": 0.859, "step": 7780 }, { "epoch": 0.72, "learning_rate": 4.641904937023077e-05, "loss": 0.9119, "step": 7790 }, { "epoch": 0.72, "learning_rate": 4.64144525144801e-05, "loss": 0.956, "step": 7800 }, { "epoch": 0.72, "learning_rate": 4.640985565872943e-05, "loss": 0.8445, "step": 7810 }, { "epoch": 0.72, "learning_rate": 4.640525880297876e-05, "loss": 0.9098, "step": 7820 }, { "epoch": 0.72, "learning_rate": 4.64006619472281e-05, "loss": 0.993, "step": 7830 }, { "epoch": 0.72, "learning_rate": 4.639606509147743e-05, "loss": 0.7811, "step": 7840 }, { "epoch": 0.72, "learning_rate": 4.6391468235726766e-05, "loss": 0.9625, "step": 7850 }, { "epoch": 0.72, "learning_rate": 4.63868713799761e-05, "loss": 0.9788, "step": 7860 }, { "epoch": 0.72, "learning_rate": 4.638227452422543e-05, "loss": 0.9167, "step": 7870 }, { "epoch": 0.72, "learning_rate": 4.637767766847477e-05, "loss": 0.9248, "step": 7880 }, { "epoch": 0.73, "learning_rate": 4.63730808127241e-05, "loss": 0.9081, "step": 7890 }, { "epoch": 0.73, "learning_rate": 4.636848395697343e-05, "loss": 0.8743, "step": 7900 }, { "epoch": 0.73, "learning_rate": 4.6363887101222764e-05, "loss": 0.9266, "step": 7910 }, { "epoch": 0.73, "learning_rate": 4.63592902454721e-05, "loss": 0.9506, "step": 7920 }, { "epoch": 0.73, "learning_rate": 4.635469338972143e-05, "loss": 0.9126, "step": 7930 }, { "epoch": 0.73, "learning_rate": 4.635009653397077e-05, "loss": 0.8768, "step": 7940 }, { "epoch": 0.73, "learning_rate": 4.6345499678220103e-05, "loss": 0.8792, "step": 7950 }, { "epoch": 0.73, "learning_rate": 4.634090282246943e-05, "loss": 0.8548, "step": 7960 }, { "epoch": 0.73, "learning_rate": 4.633630596671877e-05, "loss": 0.8766, "step": 7970 }, { "epoch": 0.73, "learning_rate": 4.63317091109681e-05, "loss": 0.9301, "step": 7980 }, { "epoch": 0.73, "learning_rate": 4.632711225521743e-05, "loss": 0.9481, "step": 7990 }, { "epoch": 0.74, "learning_rate": 4.6322515399466766e-05, "loss": 0.879, "step": 8000 }, { "epoch": 0.74, "learning_rate": 4.63179185437161e-05, "loss": 0.9006, "step": 8010 }, { "epoch": 0.74, "learning_rate": 4.631332168796543e-05, "loss": 0.8532, "step": 8020 }, { "epoch": 0.74, "learning_rate": 4.630872483221477e-05, "loss": 1.0088, "step": 8030 }, { "epoch": 0.74, "learning_rate": 4.63041279764641e-05, "loss": 0.9281, "step": 8040 }, { "epoch": 0.74, "learning_rate": 4.6299531120713434e-05, "loss": 0.9401, "step": 8050 }, { "epoch": 0.74, "learning_rate": 4.629493426496277e-05, "loss": 0.9023, "step": 8060 }, { "epoch": 0.74, "learning_rate": 4.62903374092121e-05, "loss": 0.931, "step": 8070 }, { "epoch": 0.74, "learning_rate": 4.628574055346143e-05, "loss": 0.9297, "step": 8080 }, { "epoch": 0.74, "learning_rate": 4.628114369771077e-05, "loss": 0.9526, "step": 8090 }, { "epoch": 0.74, "learning_rate": 4.62765468419601e-05, "loss": 0.8054, "step": 8100 }, { "epoch": 0.75, "learning_rate": 4.627194998620943e-05, "loss": 0.9754, "step": 8110 }, { "epoch": 0.75, "learning_rate": 4.626735313045877e-05, "loss": 1.0054, "step": 8120 }, { "epoch": 0.75, "learning_rate": 4.62627562747081e-05, "loss": 0.858, "step": 8130 }, { "epoch": 0.75, "learning_rate": 4.6258159418957436e-05, "loss": 0.9574, "step": 8140 }, { "epoch": 0.75, "learning_rate": 4.625356256320677e-05, "loss": 0.7805, "step": 8150 }, { "epoch": 0.75, "learning_rate": 4.62489657074561e-05, "loss": 0.8775, "step": 8160 }, { "epoch": 0.75, "learning_rate": 4.624436885170543e-05, "loss": 0.9306, "step": 8170 }, { "epoch": 0.75, "learning_rate": 4.623977199595477e-05, "loss": 1.0693, "step": 8180 }, { "epoch": 0.75, "learning_rate": 4.6235175140204104e-05, "loss": 0.9722, "step": 8190 }, { "epoch": 0.75, "learning_rate": 4.6230578284453434e-05, "loss": 0.9618, "step": 8200 }, { "epoch": 0.75, "learning_rate": 4.622598142870277e-05, "loss": 0.8457, "step": 8210 }, { "epoch": 0.76, "learning_rate": 4.62213845729521e-05, "loss": 0.9497, "step": 8220 }, { "epoch": 0.76, "learning_rate": 4.621678771720144e-05, "loss": 0.8787, "step": 8230 }, { "epoch": 0.76, "learning_rate": 4.621219086145077e-05, "loss": 0.9033, "step": 8240 }, { "epoch": 0.76, "learning_rate": 4.62075940057001e-05, "loss": 0.8872, "step": 8250 }, { "epoch": 0.76, "learning_rate": 4.620299714994943e-05, "loss": 0.8246, "step": 8260 }, { "epoch": 0.76, "learning_rate": 4.619840029419877e-05, "loss": 0.9612, "step": 8270 }, { "epoch": 0.76, "learning_rate": 4.6193803438448106e-05, "loss": 0.8578, "step": 8280 }, { "epoch": 0.76, "learning_rate": 4.6189206582697435e-05, "loss": 0.9147, "step": 8290 }, { "epoch": 0.76, "learning_rate": 4.618460972694677e-05, "loss": 0.9273, "step": 8300 }, { "epoch": 0.76, "learning_rate": 4.61800128711961e-05, "loss": 0.8135, "step": 8310 }, { "epoch": 0.76, "learning_rate": 4.617541601544544e-05, "loss": 0.8515, "step": 8320 }, { "epoch": 0.77, "learning_rate": 4.6170819159694774e-05, "loss": 0.9319, "step": 8330 }, { "epoch": 0.77, "learning_rate": 4.6166222303944104e-05, "loss": 0.9397, "step": 8340 }, { "epoch": 0.77, "learning_rate": 4.6161625448193434e-05, "loss": 0.8886, "step": 8350 }, { "epoch": 0.77, "learning_rate": 4.615702859244277e-05, "loss": 1.0093, "step": 8360 }, { "epoch": 0.77, "learning_rate": 4.61524317366921e-05, "loss": 0.896, "step": 8370 }, { "epoch": 0.77, "learning_rate": 4.6147834880941436e-05, "loss": 0.9082, "step": 8380 }, { "epoch": 0.77, "learning_rate": 4.614323802519077e-05, "loss": 0.8562, "step": 8390 }, { "epoch": 0.77, "learning_rate": 4.61386411694401e-05, "loss": 0.8869, "step": 8400 }, { "epoch": 0.77, "learning_rate": 4.613404431368944e-05, "loss": 0.9303, "step": 8410 }, { "epoch": 0.77, "learning_rate": 4.6129447457938776e-05, "loss": 0.7779, "step": 8420 }, { "epoch": 0.78, "learning_rate": 4.6124850602188105e-05, "loss": 0.9314, "step": 8430 }, { "epoch": 0.78, "learning_rate": 4.6120253746437435e-05, "loss": 0.9407, "step": 8440 }, { "epoch": 0.78, "learning_rate": 4.611565689068677e-05, "loss": 0.9324, "step": 8450 }, { "epoch": 0.78, "learning_rate": 4.61110600349361e-05, "loss": 0.8839, "step": 8460 }, { "epoch": 0.78, "learning_rate": 4.610646317918544e-05, "loss": 1.0149, "step": 8470 }, { "epoch": 0.78, "learning_rate": 4.6101866323434774e-05, "loss": 0.9564, "step": 8480 }, { "epoch": 0.78, "learning_rate": 4.6097269467684104e-05, "loss": 0.9496, "step": 8490 }, { "epoch": 0.78, "learning_rate": 4.609267261193344e-05, "loss": 0.781, "step": 8500 }, { "epoch": 0.78, "learning_rate": 4.608807575618278e-05, "loss": 0.7956, "step": 8510 }, { "epoch": 0.78, "learning_rate": 4.6083478900432107e-05, "loss": 0.8251, "step": 8520 }, { "epoch": 0.78, "learning_rate": 4.607888204468144e-05, "loss": 0.8574, "step": 8530 }, { "epoch": 0.79, "learning_rate": 4.607428518893077e-05, "loss": 0.9041, "step": 8540 }, { "epoch": 0.79, "learning_rate": 4.60696883331801e-05, "loss": 0.9488, "step": 8550 }, { "epoch": 0.79, "learning_rate": 4.606509147742944e-05, "loss": 0.9011, "step": 8560 }, { "epoch": 0.79, "learning_rate": 4.6060494621678775e-05, "loss": 1.07, "step": 8570 }, { "epoch": 0.79, "learning_rate": 4.6055897765928105e-05, "loss": 0.9035, "step": 8580 }, { "epoch": 0.79, "learning_rate": 4.605130091017744e-05, "loss": 0.9104, "step": 8590 }, { "epoch": 0.79, "learning_rate": 4.604670405442678e-05, "loss": 0.9687, "step": 8600 }, { "epoch": 0.79, "learning_rate": 4.604210719867611e-05, "loss": 0.8945, "step": 8610 }, { "epoch": 0.79, "learning_rate": 4.6037510342925444e-05, "loss": 0.8843, "step": 8620 }, { "epoch": 0.79, "learning_rate": 4.6032913487174774e-05, "loss": 0.9736, "step": 8630 }, { "epoch": 0.79, "learning_rate": 4.6028316631424104e-05, "loss": 0.9156, "step": 8640 }, { "epoch": 0.8, "learning_rate": 4.602371977567344e-05, "loss": 0.8903, "step": 8650 }, { "epoch": 0.8, "learning_rate": 4.6019122919922777e-05, "loss": 1.0464, "step": 8660 }, { "epoch": 0.8, "learning_rate": 4.6014526064172106e-05, "loss": 0.8583, "step": 8670 }, { "epoch": 0.8, "learning_rate": 4.600992920842144e-05, "loss": 0.8893, "step": 8680 }, { "epoch": 0.8, "learning_rate": 4.600533235267078e-05, "loss": 0.9144, "step": 8690 }, { "epoch": 0.8, "learning_rate": 4.600073549692011e-05, "loss": 0.9408, "step": 8700 }, { "epoch": 0.8, "learning_rate": 4.5996138641169445e-05, "loss": 0.9763, "step": 8710 }, { "epoch": 0.8, "learning_rate": 4.5991541785418775e-05, "loss": 0.9499, "step": 8720 }, { "epoch": 0.8, "learning_rate": 4.5986944929668105e-05, "loss": 0.8354, "step": 8730 }, { "epoch": 0.8, "learning_rate": 4.598234807391744e-05, "loss": 0.8823, "step": 8740 }, { "epoch": 0.8, "learning_rate": 4.597775121816678e-05, "loss": 0.942, "step": 8750 }, { "epoch": 0.81, "learning_rate": 4.597315436241611e-05, "loss": 0.8892, "step": 8760 }, { "epoch": 0.81, "learning_rate": 4.5968557506665444e-05, "loss": 0.8544, "step": 8770 }, { "epoch": 0.81, "learning_rate": 4.596396065091478e-05, "loss": 0.9719, "step": 8780 }, { "epoch": 0.81, "learning_rate": 4.595936379516411e-05, "loss": 0.8874, "step": 8790 }, { "epoch": 0.81, "learning_rate": 4.5954766939413447e-05, "loss": 0.9312, "step": 8800 }, { "epoch": 0.81, "learning_rate": 4.5950170083662776e-05, "loss": 0.9053, "step": 8810 }, { "epoch": 0.81, "learning_rate": 4.5945573227912106e-05, "loss": 0.9424, "step": 8820 }, { "epoch": 0.81, "learning_rate": 4.594097637216144e-05, "loss": 0.8635, "step": 8830 }, { "epoch": 0.81, "learning_rate": 4.593637951641078e-05, "loss": 0.9689, "step": 8840 }, { "epoch": 0.81, "learning_rate": 4.593178266066011e-05, "loss": 0.9001, "step": 8850 }, { "epoch": 0.81, "learning_rate": 4.5927185804909445e-05, "loss": 0.9846, "step": 8860 }, { "epoch": 0.82, "learning_rate": 4.592258894915878e-05, "loss": 0.8666, "step": 8870 }, { "epoch": 0.82, "learning_rate": 4.591799209340811e-05, "loss": 0.8585, "step": 8880 }, { "epoch": 0.82, "learning_rate": 4.591339523765745e-05, "loss": 0.8445, "step": 8890 }, { "epoch": 0.82, "learning_rate": 4.590879838190678e-05, "loss": 1.0266, "step": 8900 }, { "epoch": 0.82, "learning_rate": 4.590420152615611e-05, "loss": 1.0117, "step": 8910 }, { "epoch": 0.82, "learning_rate": 4.5899604670405444e-05, "loss": 0.9814, "step": 8920 }, { "epoch": 0.82, "learning_rate": 4.589500781465478e-05, "loss": 0.9063, "step": 8930 }, { "epoch": 0.82, "learning_rate": 4.589041095890411e-05, "loss": 0.8184, "step": 8940 }, { "epoch": 0.82, "learning_rate": 4.5885814103153446e-05, "loss": 0.8957, "step": 8950 }, { "epoch": 0.82, "learning_rate": 4.588121724740278e-05, "loss": 0.9468, "step": 8960 }, { "epoch": 0.82, "learning_rate": 4.587662039165211e-05, "loss": 0.9206, "step": 8970 }, { "epoch": 0.83, "learning_rate": 4.587202353590145e-05, "loss": 1.161, "step": 8980 }, { "epoch": 0.83, "learning_rate": 4.586742668015078e-05, "loss": 0.8617, "step": 8990 }, { "epoch": 0.83, "learning_rate": 4.586282982440011e-05, "loss": 0.9063, "step": 9000 }, { "epoch": 0.83, "learning_rate": 4.5858232968649445e-05, "loss": 0.9072, "step": 9010 }, { "epoch": 0.83, "learning_rate": 4.585363611289878e-05, "loss": 0.885, "step": 9020 }, { "epoch": 0.83, "learning_rate": 4.584903925714811e-05, "loss": 0.8781, "step": 9030 }, { "epoch": 0.83, "learning_rate": 4.584444240139745e-05, "loss": 0.8612, "step": 9040 }, { "epoch": 0.83, "learning_rate": 4.5839845545646784e-05, "loss": 0.8331, "step": 9050 }, { "epoch": 0.83, "learning_rate": 4.5835248689896114e-05, "loss": 0.8512, "step": 9060 }, { "epoch": 0.83, "learning_rate": 4.583065183414545e-05, "loss": 0.8398, "step": 9070 }, { "epoch": 0.83, "learning_rate": 4.582605497839478e-05, "loss": 0.8693, "step": 9080 }, { "epoch": 0.84, "learning_rate": 4.582145812264411e-05, "loss": 0.8372, "step": 9090 }, { "epoch": 0.84, "learning_rate": 4.5816861266893446e-05, "loss": 0.8334, "step": 9100 }, { "epoch": 0.84, "learning_rate": 4.581226441114278e-05, "loss": 0.8157, "step": 9110 }, { "epoch": 0.84, "learning_rate": 4.580766755539211e-05, "loss": 0.9965, "step": 9120 }, { "epoch": 0.84, "learning_rate": 4.580307069964145e-05, "loss": 0.9398, "step": 9130 }, { "epoch": 0.84, "learning_rate": 4.5798473843890785e-05, "loss": 0.9553, "step": 9140 }, { "epoch": 0.84, "learning_rate": 4.5793876988140115e-05, "loss": 0.9531, "step": 9150 }, { "epoch": 0.84, "learning_rate": 4.578928013238945e-05, "loss": 1.0181, "step": 9160 }, { "epoch": 0.84, "learning_rate": 4.578468327663878e-05, "loss": 0.9433, "step": 9170 }, { "epoch": 0.84, "learning_rate": 4.578008642088811e-05, "loss": 0.9153, "step": 9180 }, { "epoch": 0.84, "learning_rate": 4.577548956513745e-05, "loss": 0.9699, "step": 9190 }, { "epoch": 0.85, "learning_rate": 4.5770892709386784e-05, "loss": 0.8301, "step": 9200 }, { "epoch": 0.85, "learning_rate": 4.5766295853636113e-05, "loss": 0.8787, "step": 9210 }, { "epoch": 0.85, "learning_rate": 4.576169899788545e-05, "loss": 0.8864, "step": 9220 }, { "epoch": 0.85, "learning_rate": 4.5757102142134786e-05, "loss": 0.9928, "step": 9230 }, { "epoch": 0.85, "learning_rate": 4.5752505286384116e-05, "loss": 0.9047, "step": 9240 }, { "epoch": 0.85, "learning_rate": 4.574790843063345e-05, "loss": 0.9569, "step": 9250 }, { "epoch": 0.85, "learning_rate": 4.574331157488278e-05, "loss": 0.8903, "step": 9260 }, { "epoch": 0.85, "learning_rate": 4.573871471913211e-05, "loss": 0.9678, "step": 9270 }, { "epoch": 0.85, "learning_rate": 4.573411786338145e-05, "loss": 0.9929, "step": 9280 }, { "epoch": 0.85, "learning_rate": 4.5729521007630785e-05, "loss": 0.9279, "step": 9290 }, { "epoch": 0.86, "learning_rate": 4.5724924151880115e-05, "loss": 0.8225, "step": 9300 }, { "epoch": 0.86, "learning_rate": 4.572032729612945e-05, "loss": 0.8946, "step": 9310 }, { "epoch": 0.86, "learning_rate": 4.571573044037879e-05, "loss": 0.8858, "step": 9320 }, { "epoch": 0.86, "learning_rate": 4.571113358462812e-05, "loss": 0.9494, "step": 9330 }, { "epoch": 0.86, "learning_rate": 4.5706536728877454e-05, "loss": 1.0795, "step": 9340 }, { "epoch": 0.86, "learning_rate": 4.5701939873126783e-05, "loss": 0.9214, "step": 9350 }, { "epoch": 0.86, "learning_rate": 4.569734301737611e-05, "loss": 0.9415, "step": 9360 }, { "epoch": 0.86, "learning_rate": 4.569274616162545e-05, "loss": 0.9509, "step": 9370 }, { "epoch": 0.86, "learning_rate": 4.5688149305874786e-05, "loss": 0.9369, "step": 9380 }, { "epoch": 0.86, "learning_rate": 4.5683552450124116e-05, "loss": 0.8521, "step": 9390 }, { "epoch": 0.86, "learning_rate": 4.567895559437345e-05, "loss": 0.9066, "step": 9400 }, { "epoch": 0.87, "learning_rate": 4.567435873862279e-05, "loss": 0.8537, "step": 9410 }, { "epoch": 0.87, "learning_rate": 4.566976188287212e-05, "loss": 0.9234, "step": 9420 }, { "epoch": 0.87, "learning_rate": 4.5665165027121455e-05, "loss": 0.7931, "step": 9430 }, { "epoch": 0.87, "learning_rate": 4.5660568171370785e-05, "loss": 0.7876, "step": 9440 }, { "epoch": 0.87, "learning_rate": 4.5655971315620114e-05, "loss": 0.9976, "step": 9450 }, { "epoch": 0.87, "learning_rate": 4.565137445986945e-05, "loss": 0.9684, "step": 9460 }, { "epoch": 0.87, "learning_rate": 4.564677760411879e-05, "loss": 0.8602, "step": 9470 }, { "epoch": 0.87, "learning_rate": 4.564218074836812e-05, "loss": 0.9195, "step": 9480 }, { "epoch": 0.87, "learning_rate": 4.5637583892617453e-05, "loss": 0.9705, "step": 9490 }, { "epoch": 0.87, "learning_rate": 4.563298703686679e-05, "loss": 0.8714, "step": 9500 }, { "epoch": 0.87, "learning_rate": 4.562839018111612e-05, "loss": 0.8325, "step": 9510 }, { "epoch": 0.88, "learning_rate": 4.5623793325365456e-05, "loss": 0.8938, "step": 9520 }, { "epoch": 0.88, "learning_rate": 4.5619196469614786e-05, "loss": 0.968, "step": 9530 }, { "epoch": 0.88, "learning_rate": 4.5614599613864116e-05, "loss": 0.8756, "step": 9540 }, { "epoch": 0.88, "learning_rate": 4.561000275811345e-05, "loss": 0.9233, "step": 9550 }, { "epoch": 0.88, "learning_rate": 4.560540590236279e-05, "loss": 0.9545, "step": 9560 }, { "epoch": 0.88, "learning_rate": 4.560080904661212e-05, "loss": 0.8786, "step": 9570 }, { "epoch": 0.88, "learning_rate": 4.5596212190861455e-05, "loss": 0.9069, "step": 9580 }, { "epoch": 0.88, "learning_rate": 4.5591615335110784e-05, "loss": 0.8495, "step": 9590 }, { "epoch": 0.88, "learning_rate": 4.558701847936012e-05, "loss": 0.9046, "step": 9600 }, { "epoch": 0.88, "learning_rate": 4.558242162360946e-05, "loss": 0.9158, "step": 9610 }, { "epoch": 0.88, "learning_rate": 4.557782476785879e-05, "loss": 0.895, "step": 9620 }, { "epoch": 0.89, "learning_rate": 4.557322791210812e-05, "loss": 0.9712, "step": 9630 }, { "epoch": 0.89, "learning_rate": 4.556863105635745e-05, "loss": 0.9486, "step": 9640 }, { "epoch": 0.89, "learning_rate": 4.556403420060679e-05, "loss": 0.8435, "step": 9650 }, { "epoch": 0.89, "learning_rate": 4.555943734485612e-05, "loss": 0.8821, "step": 9660 }, { "epoch": 0.89, "learning_rate": 4.5554840489105456e-05, "loss": 0.9525, "step": 9670 }, { "epoch": 0.89, "learning_rate": 4.5550243633354786e-05, "loss": 0.8794, "step": 9680 }, { "epoch": 0.89, "learning_rate": 4.554564677760412e-05, "loss": 0.9858, "step": 9690 }, { "epoch": 0.89, "learning_rate": 4.554104992185346e-05, "loss": 1.032, "step": 9700 }, { "epoch": 0.89, "learning_rate": 4.553645306610279e-05, "loss": 1.0159, "step": 9710 }, { "epoch": 0.89, "learning_rate": 4.553185621035212e-05, "loss": 0.9821, "step": 9720 }, { "epoch": 0.89, "learning_rate": 4.5527259354601454e-05, "loss": 0.8938, "step": 9730 }, { "epoch": 0.9, "learning_rate": 4.552266249885079e-05, "loss": 0.9275, "step": 9740 }, { "epoch": 0.9, "learning_rate": 4.551806564310012e-05, "loss": 0.9485, "step": 9750 }, { "epoch": 0.9, "learning_rate": 4.551346878734946e-05, "loss": 0.9463, "step": 9760 }, { "epoch": 0.9, "learning_rate": 4.550887193159879e-05, "loss": 1.0139, "step": 9770 }, { "epoch": 0.9, "learning_rate": 4.550427507584812e-05, "loss": 0.8501, "step": 9780 }, { "epoch": 0.9, "learning_rate": 4.549967822009746e-05, "loss": 0.9582, "step": 9790 }, { "epoch": 0.9, "learning_rate": 4.549508136434679e-05, "loss": 0.8919, "step": 9800 }, { "epoch": 0.9, "learning_rate": 4.549048450859612e-05, "loss": 0.9804, "step": 9810 }, { "epoch": 0.9, "learning_rate": 4.5485887652845456e-05, "loss": 0.8906, "step": 9820 }, { "epoch": 0.9, "learning_rate": 4.548129079709479e-05, "loss": 0.844, "step": 9830 }, { "epoch": 0.9, "learning_rate": 4.547669394134412e-05, "loss": 0.974, "step": 9840 }, { "epoch": 0.91, "learning_rate": 4.547209708559346e-05, "loss": 0.8602, "step": 9850 }, { "epoch": 0.91, "learning_rate": 4.546750022984279e-05, "loss": 0.9942, "step": 9860 }, { "epoch": 0.91, "learning_rate": 4.5462903374092124e-05, "loss": 1.0303, "step": 9870 }, { "epoch": 0.91, "learning_rate": 4.545830651834146e-05, "loss": 0.8938, "step": 9880 }, { "epoch": 0.91, "learning_rate": 4.545370966259079e-05, "loss": 1.0339, "step": 9890 }, { "epoch": 0.91, "learning_rate": 4.544911280684012e-05, "loss": 0.9507, "step": 9900 }, { "epoch": 0.91, "learning_rate": 4.544451595108946e-05, "loss": 0.815, "step": 9910 }, { "epoch": 0.91, "learning_rate": 4.5439919095338787e-05, "loss": 0.9829, "step": 9920 }, { "epoch": 0.91, "learning_rate": 4.543532223958812e-05, "loss": 0.9953, "step": 9930 }, { "epoch": 0.91, "learning_rate": 4.543072538383746e-05, "loss": 0.8327, "step": 9940 }, { "epoch": 0.91, "learning_rate": 4.542612852808679e-05, "loss": 0.8617, "step": 9950 }, { "epoch": 0.92, "learning_rate": 4.5421531672336126e-05, "loss": 0.9084, "step": 9960 }, { "epoch": 0.92, "learning_rate": 4.541693481658546e-05, "loss": 0.8555, "step": 9970 }, { "epoch": 0.92, "learning_rate": 4.541233796083479e-05, "loss": 0.9555, "step": 9980 }, { "epoch": 0.92, "learning_rate": 4.540774110508412e-05, "loss": 0.8969, "step": 9990 }, { "epoch": 0.92, "learning_rate": 4.540314424933346e-05, "loss": 0.9159, "step": 10000 }, { "epoch": 0.92, "eval_accuracy": 0.5585152838427948, "eval_loss": 0.9121217131614685, "eval_runtime": 159.7746, "eval_samples_per_second": 28.665, "eval_steps_per_second": 3.586, "step": 10000 }, { "epoch": 0.92, "learning_rate": 4.539854739358279e-05, "loss": 0.9468, "step": 10010 }, { "epoch": 0.92, "learning_rate": 4.5393950537832124e-05, "loss": 0.9851, "step": 10020 }, { "epoch": 0.92, "learning_rate": 4.538935368208146e-05, "loss": 0.8688, "step": 10030 }, { "epoch": 0.92, "learning_rate": 4.538475682633079e-05, "loss": 0.8096, "step": 10040 }, { "epoch": 0.92, "learning_rate": 4.538015997058013e-05, "loss": 0.9783, "step": 10050 }, { "epoch": 0.92, "learning_rate": 4.537556311482946e-05, "loss": 0.8334, "step": 10060 }, { "epoch": 0.93, "learning_rate": 4.537096625907879e-05, "loss": 0.882, "step": 10070 }, { "epoch": 0.93, "learning_rate": 4.536636940332812e-05, "loss": 1.0017, "step": 10080 }, { "epoch": 0.93, "learning_rate": 4.536177254757746e-05, "loss": 0.8854, "step": 10090 }, { "epoch": 0.93, "learning_rate": 4.535717569182679e-05, "loss": 0.9157, "step": 10100 }, { "epoch": 0.93, "learning_rate": 4.5352578836076125e-05, "loss": 0.8932, "step": 10110 }, { "epoch": 0.93, "learning_rate": 4.534798198032546e-05, "loss": 0.926, "step": 10120 }, { "epoch": 0.93, "learning_rate": 4.534338512457479e-05, "loss": 0.8957, "step": 10130 }, { "epoch": 0.93, "learning_rate": 4.533878826882413e-05, "loss": 0.828, "step": 10140 }, { "epoch": 0.93, "learning_rate": 4.5334191413073465e-05, "loss": 0.8201, "step": 10150 }, { "epoch": 0.93, "learning_rate": 4.5329594557322794e-05, "loss": 0.9816, "step": 10160 }, { "epoch": 0.94, "learning_rate": 4.5324997701572124e-05, "loss": 0.9979, "step": 10170 }, { "epoch": 0.94, "learning_rate": 4.532040084582146e-05, "loss": 0.9248, "step": 10180 }, { "epoch": 0.94, "learning_rate": 4.531580399007079e-05, "loss": 0.9423, "step": 10190 }, { "epoch": 0.94, "learning_rate": 4.5311207134320127e-05, "loss": 0.8534, "step": 10200 }, { "epoch": 0.94, "learning_rate": 4.530661027856946e-05, "loss": 0.9352, "step": 10210 }, { "epoch": 0.94, "learning_rate": 4.530201342281879e-05, "loss": 0.9023, "step": 10220 }, { "epoch": 0.94, "learning_rate": 4.529741656706813e-05, "loss": 0.8694, "step": 10230 }, { "epoch": 0.94, "learning_rate": 4.5292819711317466e-05, "loss": 0.888, "step": 10240 }, { "epoch": 0.94, "learning_rate": 4.528822285556679e-05, "loss": 0.9553, "step": 10250 }, { "epoch": 0.94, "learning_rate": 4.5283625999816125e-05, "loss": 0.8964, "step": 10260 }, { "epoch": 0.94, "learning_rate": 4.527902914406546e-05, "loss": 1.1738, "step": 10270 }, { "epoch": 0.95, "learning_rate": 4.527443228831479e-05, "loss": 0.8975, "step": 10280 }, { "epoch": 0.95, "learning_rate": 4.526983543256413e-05, "loss": 0.8494, "step": 10290 }, { "epoch": 0.95, "learning_rate": 4.5265238576813464e-05, "loss": 1.0389, "step": 10300 }, { "epoch": 0.95, "learning_rate": 4.5260641721062794e-05, "loss": 0.9733, "step": 10310 }, { "epoch": 0.95, "learning_rate": 4.525604486531213e-05, "loss": 0.9252, "step": 10320 }, { "epoch": 0.95, "learning_rate": 4.525144800956147e-05, "loss": 0.9168, "step": 10330 }, { "epoch": 0.95, "learning_rate": 4.524685115381079e-05, "loss": 0.9535, "step": 10340 }, { "epoch": 0.95, "learning_rate": 4.5242254298060126e-05, "loss": 0.9596, "step": 10350 }, { "epoch": 0.95, "learning_rate": 4.523765744230946e-05, "loss": 0.952, "step": 10360 }, { "epoch": 0.95, "learning_rate": 4.523306058655879e-05, "loss": 1.026, "step": 10370 }, { "epoch": 0.95, "learning_rate": 4.522846373080813e-05, "loss": 0.8833, "step": 10380 }, { "epoch": 0.96, "learning_rate": 4.5223866875057465e-05, "loss": 0.8967, "step": 10390 }, { "epoch": 0.96, "learning_rate": 4.5219270019306795e-05, "loss": 0.8901, "step": 10400 }, { "epoch": 0.96, "learning_rate": 4.521467316355613e-05, "loss": 0.8578, "step": 10410 }, { "epoch": 0.96, "learning_rate": 4.521007630780547e-05, "loss": 0.8658, "step": 10420 }, { "epoch": 0.96, "learning_rate": 4.520547945205479e-05, "loss": 0.8438, "step": 10430 }, { "epoch": 0.96, "learning_rate": 4.520088259630413e-05, "loss": 0.9124, "step": 10440 }, { "epoch": 0.96, "learning_rate": 4.5196285740553464e-05, "loss": 0.8058, "step": 10450 }, { "epoch": 0.96, "learning_rate": 4.5191688884802794e-05, "loss": 0.9143, "step": 10460 }, { "epoch": 0.96, "learning_rate": 4.518709202905213e-05, "loss": 0.8645, "step": 10470 }, { "epoch": 0.96, "learning_rate": 4.518249517330147e-05, "loss": 0.9521, "step": 10480 }, { "epoch": 0.96, "learning_rate": 4.5177898317550796e-05, "loss": 0.8112, "step": 10490 }, { "epoch": 0.97, "learning_rate": 4.517330146180013e-05, "loss": 0.9012, "step": 10500 }, { "epoch": 0.97, "learning_rate": 4.516870460604947e-05, "loss": 0.9395, "step": 10510 }, { "epoch": 0.97, "learning_rate": 4.516410775029879e-05, "loss": 0.961, "step": 10520 }, { "epoch": 0.97, "learning_rate": 4.515951089454813e-05, "loss": 1.0508, "step": 10530 }, { "epoch": 0.97, "learning_rate": 4.5154914038797465e-05, "loss": 0.931, "step": 10540 }, { "epoch": 0.97, "learning_rate": 4.5150317183046795e-05, "loss": 0.8443, "step": 10550 }, { "epoch": 0.97, "learning_rate": 4.514572032729613e-05, "loss": 0.9688, "step": 10560 }, { "epoch": 0.97, "learning_rate": 4.514112347154547e-05, "loss": 0.8909, "step": 10570 }, { "epoch": 0.97, "learning_rate": 4.51365266157948e-05, "loss": 1.0332, "step": 10580 }, { "epoch": 0.97, "learning_rate": 4.5131929760044134e-05, "loss": 1.0047, "step": 10590 }, { "epoch": 0.97, "learning_rate": 4.512733290429347e-05, "loss": 0.935, "step": 10600 }, { "epoch": 0.98, "learning_rate": 4.5122736048542793e-05, "loss": 0.9114, "step": 10610 }, { "epoch": 0.98, "learning_rate": 4.511813919279213e-05, "loss": 0.9685, "step": 10620 }, { "epoch": 0.98, "learning_rate": 4.5113542337041466e-05, "loss": 1.0234, "step": 10630 }, { "epoch": 0.98, "learning_rate": 4.5108945481290796e-05, "loss": 0.8568, "step": 10640 }, { "epoch": 0.98, "learning_rate": 4.510434862554013e-05, "loss": 0.9171, "step": 10650 }, { "epoch": 0.98, "learning_rate": 4.509975176978947e-05, "loss": 0.9971, "step": 10660 }, { "epoch": 0.98, "learning_rate": 4.50951549140388e-05, "loss": 0.7715, "step": 10670 }, { "epoch": 0.98, "learning_rate": 4.5090558058288135e-05, "loss": 0.9997, "step": 10680 }, { "epoch": 0.98, "learning_rate": 4.508596120253747e-05, "loss": 0.9301, "step": 10690 }, { "epoch": 0.98, "learning_rate": 4.5081364346786795e-05, "loss": 0.9331, "step": 10700 }, { "epoch": 0.98, "learning_rate": 4.507676749103613e-05, "loss": 0.988, "step": 10710 }, { "epoch": 0.99, "learning_rate": 4.507217063528547e-05, "loss": 0.8502, "step": 10720 }, { "epoch": 0.99, "learning_rate": 4.50675737795348e-05, "loss": 0.9107, "step": 10730 }, { "epoch": 0.99, "learning_rate": 4.5062976923784134e-05, "loss": 0.7789, "step": 10740 }, { "epoch": 0.99, "learning_rate": 4.505838006803347e-05, "loss": 0.965, "step": 10750 }, { "epoch": 0.99, "learning_rate": 4.50537832122828e-05, "loss": 0.9429, "step": 10760 }, { "epoch": 0.99, "learning_rate": 4.5049186356532136e-05, "loss": 0.8896, "step": 10770 }, { "epoch": 0.99, "learning_rate": 4.504458950078147e-05, "loss": 0.8761, "step": 10780 }, { "epoch": 0.99, "learning_rate": 4.5039992645030796e-05, "loss": 0.9087, "step": 10790 }, { "epoch": 0.99, "learning_rate": 4.503539578928013e-05, "loss": 0.9369, "step": 10800 }, { "epoch": 0.99, "learning_rate": 4.503079893352947e-05, "loss": 0.9147, "step": 10810 }, { "epoch": 0.99, "learning_rate": 4.50262020777788e-05, "loss": 0.8912, "step": 10820 }, { "epoch": 1.0, "learning_rate": 4.5021605222028135e-05, "loss": 0.8998, "step": 10830 }, { "epoch": 1.0, "learning_rate": 4.501700836627747e-05, "loss": 0.8388, "step": 10840 }, { "epoch": 1.0, "learning_rate": 4.50124115105268e-05, "loss": 0.8597, "step": 10850 }, { "epoch": 1.0, "learning_rate": 4.500781465477614e-05, "loss": 0.8806, "step": 10860 }, { "epoch": 1.0, "learning_rate": 4.5003217799025474e-05, "loss": 0.9591, "step": 10870 }, { "epoch": 1.0, "learning_rate": 4.49986209432748e-05, "loss": 0.9699, "step": 10880 }, { "epoch": 1.0, "learning_rate": 4.4994024087524133e-05, "loss": 0.8775, "step": 10890 }, { "epoch": 1.0, "learning_rate": 4.498942723177347e-05, "loss": 1.0192, "step": 10900 }, { "epoch": 1.0, "learning_rate": 4.49848303760228e-05, "loss": 0.8902, "step": 10910 }, { "epoch": 1.0, "learning_rate": 4.4980233520272136e-05, "loss": 0.8457, "step": 10920 }, { "epoch": 1.0, "learning_rate": 4.497563666452147e-05, "loss": 0.9777, "step": 10930 }, { "epoch": 1.01, "learning_rate": 4.49710398087708e-05, "loss": 0.9088, "step": 10940 }, { "epoch": 1.01, "learning_rate": 4.496644295302014e-05, "loss": 0.8713, "step": 10950 }, { "epoch": 1.01, "learning_rate": 4.4961846097269475e-05, "loss": 0.9141, "step": 10960 }, { "epoch": 1.01, "learning_rate": 4.49572492415188e-05, "loss": 0.8549, "step": 10970 }, { "epoch": 1.01, "learning_rate": 4.4952652385768135e-05, "loss": 0.9023, "step": 10980 }, { "epoch": 1.01, "learning_rate": 4.494805553001747e-05, "loss": 0.9571, "step": 10990 }, { "epoch": 1.01, "learning_rate": 4.49434586742668e-05, "loss": 0.9186, "step": 11000 }, { "epoch": 1.01, "learning_rate": 4.493886181851614e-05, "loss": 0.9233, "step": 11010 }, { "epoch": 1.01, "learning_rate": 4.4934264962765474e-05, "loss": 0.7483, "step": 11020 }, { "epoch": 1.01, "learning_rate": 4.4929668107014804e-05, "loss": 0.8481, "step": 11030 }, { "epoch": 1.01, "learning_rate": 4.492507125126414e-05, "loss": 0.9407, "step": 11040 }, { "epoch": 1.02, "learning_rate": 4.4920474395513476e-05, "loss": 1.0488, "step": 11050 }, { "epoch": 1.02, "learning_rate": 4.49158775397628e-05, "loss": 0.9104, "step": 11060 }, { "epoch": 1.02, "learning_rate": 4.4911280684012136e-05, "loss": 0.8958, "step": 11070 }, { "epoch": 1.02, "learning_rate": 4.490668382826147e-05, "loss": 0.9695, "step": 11080 }, { "epoch": 1.02, "learning_rate": 4.49020869725108e-05, "loss": 0.8812, "step": 11090 }, { "epoch": 1.02, "learning_rate": 4.489749011676014e-05, "loss": 0.877, "step": 11100 }, { "epoch": 1.02, "learning_rate": 4.4892893261009475e-05, "loss": 0.9419, "step": 11110 }, { "epoch": 1.02, "learning_rate": 4.4888296405258805e-05, "loss": 0.9154, "step": 11120 }, { "epoch": 1.02, "learning_rate": 4.488369954950814e-05, "loss": 0.9547, "step": 11130 }, { "epoch": 1.02, "learning_rate": 4.487910269375748e-05, "loss": 0.9033, "step": 11140 }, { "epoch": 1.03, "learning_rate": 4.48745058380068e-05, "loss": 0.8742, "step": 11150 }, { "epoch": 1.03, "learning_rate": 4.486990898225614e-05, "loss": 0.9041, "step": 11160 }, { "epoch": 1.03, "learning_rate": 4.4865312126505474e-05, "loss": 0.9042, "step": 11170 }, { "epoch": 1.03, "learning_rate": 4.48607152707548e-05, "loss": 0.906, "step": 11180 }, { "epoch": 1.03, "learning_rate": 4.485611841500414e-05, "loss": 1.0201, "step": 11190 }, { "epoch": 1.03, "learning_rate": 4.4851521559253476e-05, "loss": 0.8753, "step": 11200 }, { "epoch": 1.03, "learning_rate": 4.4846924703502806e-05, "loss": 0.9337, "step": 11210 }, { "epoch": 1.03, "learning_rate": 4.484232784775214e-05, "loss": 1.0417, "step": 11220 }, { "epoch": 1.03, "learning_rate": 4.483773099200147e-05, "loss": 0.9125, "step": 11230 }, { "epoch": 1.03, "learning_rate": 4.48331341362508e-05, "loss": 0.9053, "step": 11240 }, { "epoch": 1.03, "learning_rate": 4.482853728050014e-05, "loss": 0.8643, "step": 11250 }, { "epoch": 1.04, "learning_rate": 4.4823940424749475e-05, "loss": 0.7548, "step": 11260 }, { "epoch": 1.04, "learning_rate": 4.4819343568998804e-05, "loss": 0.8792, "step": 11270 }, { "epoch": 1.04, "learning_rate": 4.481474671324814e-05, "loss": 0.8676, "step": 11280 }, { "epoch": 1.04, "learning_rate": 4.481014985749748e-05, "loss": 0.9439, "step": 11290 }, { "epoch": 1.04, "learning_rate": 4.480555300174681e-05, "loss": 0.9277, "step": 11300 }, { "epoch": 1.04, "learning_rate": 4.4800956145996144e-05, "loss": 0.8223, "step": 11310 }, { "epoch": 1.04, "learning_rate": 4.479635929024547e-05, "loss": 1.0402, "step": 11320 }, { "epoch": 1.04, "learning_rate": 4.47917624344948e-05, "loss": 1.0117, "step": 11330 }, { "epoch": 1.04, "learning_rate": 4.478716557874414e-05, "loss": 0.9454, "step": 11340 }, { "epoch": 1.04, "learning_rate": 4.4782568722993476e-05, "loss": 0.9359, "step": 11350 }, { "epoch": 1.04, "learning_rate": 4.4777971867242806e-05, "loss": 0.9306, "step": 11360 }, { "epoch": 1.05, "learning_rate": 4.477337501149214e-05, "loss": 0.95, "step": 11370 }, { "epoch": 1.05, "learning_rate": 4.476877815574148e-05, "loss": 0.919, "step": 11380 }, { "epoch": 1.05, "learning_rate": 4.476418129999081e-05, "loss": 0.9745, "step": 11390 }, { "epoch": 1.05, "learning_rate": 4.4759584444240145e-05, "loss": 0.8766, "step": 11400 }, { "epoch": 1.05, "learning_rate": 4.4754987588489474e-05, "loss": 0.9099, "step": 11410 }, { "epoch": 1.05, "learning_rate": 4.4750390732738804e-05, "loss": 0.9029, "step": 11420 }, { "epoch": 1.05, "learning_rate": 4.474579387698814e-05, "loss": 0.9362, "step": 11430 }, { "epoch": 1.05, "learning_rate": 4.474119702123748e-05, "loss": 0.962, "step": 11440 }, { "epoch": 1.05, "learning_rate": 4.473660016548681e-05, "loss": 0.997, "step": 11450 }, { "epoch": 1.05, "learning_rate": 4.473200330973614e-05, "loss": 0.9225, "step": 11460 }, { "epoch": 1.05, "learning_rate": 4.472740645398548e-05, "loss": 0.9444, "step": 11470 }, { "epoch": 1.06, "learning_rate": 4.472280959823481e-05, "loss": 0.8482, "step": 11480 }, { "epoch": 1.06, "learning_rate": 4.4718212742484146e-05, "loss": 0.8891, "step": 11490 }, { "epoch": 1.06, "learning_rate": 4.4713615886733476e-05, "loss": 0.8547, "step": 11500 }, { "epoch": 1.06, "learning_rate": 4.4709019030982805e-05, "loss": 0.8996, "step": 11510 }, { "epoch": 1.06, "learning_rate": 4.470442217523214e-05, "loss": 0.8706, "step": 11520 }, { "epoch": 1.06, "learning_rate": 4.469982531948148e-05, "loss": 0.8515, "step": 11530 }, { "epoch": 1.06, "learning_rate": 4.469522846373081e-05, "loss": 0.9831, "step": 11540 }, { "epoch": 1.06, "learning_rate": 4.4690631607980145e-05, "loss": 0.9286, "step": 11550 }, { "epoch": 1.06, "learning_rate": 4.4686034752229474e-05, "loss": 0.8676, "step": 11560 }, { "epoch": 1.06, "learning_rate": 4.468143789647881e-05, "loss": 0.8777, "step": 11570 }, { "epoch": 1.06, "learning_rate": 4.467684104072815e-05, "loss": 0.8716, "step": 11580 }, { "epoch": 1.07, "learning_rate": 4.467224418497748e-05, "loss": 0.8807, "step": 11590 }, { "epoch": 1.07, "learning_rate": 4.4667647329226807e-05, "loss": 0.9552, "step": 11600 }, { "epoch": 1.07, "learning_rate": 4.466305047347614e-05, "loss": 0.7772, "step": 11610 }, { "epoch": 1.07, "learning_rate": 4.465845361772548e-05, "loss": 0.8932, "step": 11620 }, { "epoch": 1.07, "learning_rate": 4.465385676197481e-05, "loss": 0.907, "step": 11630 }, { "epoch": 1.07, "learning_rate": 4.4649259906224146e-05, "loss": 0.845, "step": 11640 }, { "epoch": 1.07, "learning_rate": 4.4644663050473475e-05, "loss": 1.0008, "step": 11650 }, { "epoch": 1.07, "learning_rate": 4.464006619472281e-05, "loss": 0.8506, "step": 11660 }, { "epoch": 1.07, "learning_rate": 4.463546933897215e-05, "loss": 0.9192, "step": 11670 }, { "epoch": 1.07, "learning_rate": 4.463087248322148e-05, "loss": 0.9594, "step": 11680 }, { "epoch": 1.07, "learning_rate": 4.462627562747081e-05, "loss": 0.9733, "step": 11690 }, { "epoch": 1.08, "learning_rate": 4.4621678771720144e-05, "loss": 0.9314, "step": 11700 }, { "epoch": 1.08, "learning_rate": 4.461708191596948e-05, "loss": 0.98, "step": 11710 }, { "epoch": 1.08, "learning_rate": 4.461248506021881e-05, "loss": 0.84, "step": 11720 }, { "epoch": 1.08, "learning_rate": 4.460788820446815e-05, "loss": 0.8419, "step": 11730 }, { "epoch": 1.08, "learning_rate": 4.4603291348717477e-05, "loss": 0.8898, "step": 11740 }, { "epoch": 1.08, "learning_rate": 4.459869449296681e-05, "loss": 0.8486, "step": 11750 }, { "epoch": 1.08, "learning_rate": 4.459409763721615e-05, "loss": 0.974, "step": 11760 }, { "epoch": 1.08, "learning_rate": 4.458950078146548e-05, "loss": 0.8913, "step": 11770 }, { "epoch": 1.08, "learning_rate": 4.458490392571481e-05, "loss": 0.9076, "step": 11780 }, { "epoch": 1.08, "learning_rate": 4.4580307069964145e-05, "loss": 0.8852, "step": 11790 }, { "epoch": 1.08, "learning_rate": 4.457571021421348e-05, "loss": 0.966, "step": 11800 }, { "epoch": 1.09, "learning_rate": 4.457111335846281e-05, "loss": 0.8525, "step": 11810 }, { "epoch": 1.09, "learning_rate": 4.456651650271215e-05, "loss": 0.9273, "step": 11820 }, { "epoch": 1.09, "learning_rate": 4.456191964696148e-05, "loss": 0.9589, "step": 11830 }, { "epoch": 1.09, "learning_rate": 4.4557322791210814e-05, "loss": 0.9207, "step": 11840 }, { "epoch": 1.09, "learning_rate": 4.455272593546015e-05, "loss": 0.8352, "step": 11850 }, { "epoch": 1.09, "learning_rate": 4.454812907970948e-05, "loss": 0.9615, "step": 11860 }, { "epoch": 1.09, "learning_rate": 4.454353222395881e-05, "loss": 0.9029, "step": 11870 }, { "epoch": 1.09, "learning_rate": 4.453893536820815e-05, "loss": 0.9257, "step": 11880 }, { "epoch": 1.09, "learning_rate": 4.4534338512457476e-05, "loss": 0.9517, "step": 11890 }, { "epoch": 1.09, "learning_rate": 4.452974165670681e-05, "loss": 0.9383, "step": 11900 }, { "epoch": 1.09, "learning_rate": 4.452514480095615e-05, "loss": 0.9106, "step": 11910 }, { "epoch": 1.1, "learning_rate": 4.452054794520548e-05, "loss": 0.9423, "step": 11920 }, { "epoch": 1.1, "learning_rate": 4.4515951089454815e-05, "loss": 0.9619, "step": 11930 }, { "epoch": 1.1, "learning_rate": 4.451135423370415e-05, "loss": 0.9074, "step": 11940 }, { "epoch": 1.1, "learning_rate": 4.450675737795348e-05, "loss": 0.9478, "step": 11950 }, { "epoch": 1.1, "learning_rate": 4.450216052220282e-05, "loss": 0.9828, "step": 11960 }, { "epoch": 1.1, "learning_rate": 4.449756366645215e-05, "loss": 0.9545, "step": 11970 }, { "epoch": 1.1, "learning_rate": 4.449296681070148e-05, "loss": 0.8727, "step": 11980 }, { "epoch": 1.1, "learning_rate": 4.4488369954950814e-05, "loss": 1.004, "step": 11990 }, { "epoch": 1.1, "learning_rate": 4.448377309920015e-05, "loss": 0.8317, "step": 12000 }, { "epoch": 1.1, "learning_rate": 4.447917624344948e-05, "loss": 0.9328, "step": 12010 }, { "epoch": 1.11, "learning_rate": 4.447457938769882e-05, "loss": 0.9026, "step": 12020 }, { "epoch": 1.11, "learning_rate": 4.446998253194815e-05, "loss": 0.8617, "step": 12030 }, { "epoch": 1.11, "learning_rate": 4.446538567619748e-05, "loss": 0.8596, "step": 12040 }, { "epoch": 1.11, "learning_rate": 4.446078882044682e-05, "loss": 0.968, "step": 12050 }, { "epoch": 1.11, "learning_rate": 4.445619196469615e-05, "loss": 1.0175, "step": 12060 }, { "epoch": 1.11, "learning_rate": 4.445159510894548e-05, "loss": 0.9531, "step": 12070 }, { "epoch": 1.11, "learning_rate": 4.4446998253194815e-05, "loss": 0.9737, "step": 12080 }, { "epoch": 1.11, "learning_rate": 4.444240139744415e-05, "loss": 0.8832, "step": 12090 }, { "epoch": 1.11, "learning_rate": 4.443780454169348e-05, "loss": 0.9962, "step": 12100 }, { "epoch": 1.11, "learning_rate": 4.443320768594282e-05, "loss": 0.9209, "step": 12110 }, { "epoch": 1.11, "learning_rate": 4.4428610830192154e-05, "loss": 0.8191, "step": 12120 }, { "epoch": 1.12, "learning_rate": 4.4424013974441484e-05, "loss": 0.9678, "step": 12130 }, { "epoch": 1.12, "learning_rate": 4.441941711869082e-05, "loss": 0.951, "step": 12140 }, { "epoch": 1.12, "learning_rate": 4.441482026294015e-05, "loss": 0.8869, "step": 12150 }, { "epoch": 1.12, "learning_rate": 4.441022340718948e-05, "loss": 0.9489, "step": 12160 }, { "epoch": 1.12, "learning_rate": 4.4405626551438816e-05, "loss": 0.9752, "step": 12170 }, { "epoch": 1.12, "learning_rate": 4.440102969568815e-05, "loss": 0.8537, "step": 12180 }, { "epoch": 1.12, "learning_rate": 4.439643283993748e-05, "loss": 0.9686, "step": 12190 }, { "epoch": 1.12, "learning_rate": 4.439183598418682e-05, "loss": 0.9393, "step": 12200 }, { "epoch": 1.12, "learning_rate": 4.4387239128436156e-05, "loss": 0.9201, "step": 12210 }, { "epoch": 1.12, "learning_rate": 4.4382642272685485e-05, "loss": 0.9115, "step": 12220 }, { "epoch": 1.12, "learning_rate": 4.437804541693482e-05, "loss": 0.9211, "step": 12230 }, { "epoch": 1.13, "learning_rate": 4.437344856118415e-05, "loss": 0.9772, "step": 12240 }, { "epoch": 1.13, "learning_rate": 4.436885170543348e-05, "loss": 0.9121, "step": 12250 }, { "epoch": 1.13, "learning_rate": 4.436425484968282e-05, "loss": 0.9377, "step": 12260 }, { "epoch": 1.13, "learning_rate": 4.4359657993932154e-05, "loss": 0.8728, "step": 12270 }, { "epoch": 1.13, "learning_rate": 4.4355061138181484e-05, "loss": 0.9826, "step": 12280 }, { "epoch": 1.13, "learning_rate": 4.435046428243082e-05, "loss": 0.9139, "step": 12290 }, { "epoch": 1.13, "learning_rate": 4.434586742668016e-05, "loss": 0.9592, "step": 12300 }, { "epoch": 1.13, "learning_rate": 4.4341270570929486e-05, "loss": 0.8585, "step": 12310 }, { "epoch": 1.13, "learning_rate": 4.433667371517882e-05, "loss": 0.9423, "step": 12320 }, { "epoch": 1.13, "learning_rate": 4.433207685942815e-05, "loss": 0.896, "step": 12330 }, { "epoch": 1.13, "learning_rate": 4.432748000367748e-05, "loss": 1.0304, "step": 12340 }, { "epoch": 1.14, "learning_rate": 4.432288314792682e-05, "loss": 1.0224, "step": 12350 }, { "epoch": 1.14, "learning_rate": 4.4318286292176155e-05, "loss": 0.9011, "step": 12360 }, { "epoch": 1.14, "learning_rate": 4.4313689436425485e-05, "loss": 0.9655, "step": 12370 }, { "epoch": 1.14, "learning_rate": 4.430909258067482e-05, "loss": 0.8919, "step": 12380 }, { "epoch": 1.14, "learning_rate": 4.430449572492416e-05, "loss": 0.9221, "step": 12390 }, { "epoch": 1.14, "learning_rate": 4.429989886917349e-05, "loss": 0.8645, "step": 12400 }, { "epoch": 1.14, "learning_rate": 4.4295302013422824e-05, "loss": 1.0265, "step": 12410 }, { "epoch": 1.14, "learning_rate": 4.4290705157672154e-05, "loss": 0.8864, "step": 12420 }, { "epoch": 1.14, "learning_rate": 4.4286108301921484e-05, "loss": 0.9577, "step": 12430 }, { "epoch": 1.14, "learning_rate": 4.428151144617082e-05, "loss": 0.8873, "step": 12440 }, { "epoch": 1.14, "learning_rate": 4.4276914590420156e-05, "loss": 0.9345, "step": 12450 }, { "epoch": 1.15, "learning_rate": 4.4272317734669486e-05, "loss": 0.9043, "step": 12460 }, { "epoch": 1.15, "learning_rate": 4.426772087891882e-05, "loss": 0.902, "step": 12470 }, { "epoch": 1.15, "learning_rate": 4.426312402316816e-05, "loss": 0.9901, "step": 12480 }, { "epoch": 1.15, "learning_rate": 4.425852716741749e-05, "loss": 0.8414, "step": 12490 }, { "epoch": 1.15, "learning_rate": 4.4253930311666825e-05, "loss": 0.9299, "step": 12500 }, { "epoch": 1.15, "learning_rate": 4.4249333455916155e-05, "loss": 0.9569, "step": 12510 }, { "epoch": 1.15, "learning_rate": 4.4244736600165485e-05, "loss": 0.9398, "step": 12520 }, { "epoch": 1.15, "learning_rate": 4.424013974441482e-05, "loss": 0.8658, "step": 12530 }, { "epoch": 1.15, "learning_rate": 4.423554288866416e-05, "loss": 0.9272, "step": 12540 }, { "epoch": 1.15, "learning_rate": 4.423094603291349e-05, "loss": 0.9907, "step": 12550 }, { "epoch": 1.15, "learning_rate": 4.4226349177162824e-05, "loss": 0.9393, "step": 12560 }, { "epoch": 1.16, "learning_rate": 4.422175232141216e-05, "loss": 0.8531, "step": 12570 }, { "epoch": 1.16, "learning_rate": 4.421715546566149e-05, "loss": 0.7897, "step": 12580 }, { "epoch": 1.16, "learning_rate": 4.4212558609910826e-05, "loss": 0.9248, "step": 12590 }, { "epoch": 1.16, "learning_rate": 4.4207961754160156e-05, "loss": 0.9084, "step": 12600 }, { "epoch": 1.16, "learning_rate": 4.4203364898409486e-05, "loss": 0.9582, "step": 12610 }, { "epoch": 1.16, "learning_rate": 4.419876804265882e-05, "loss": 0.9514, "step": 12620 }, { "epoch": 1.16, "learning_rate": 4.419417118690816e-05, "loss": 0.959, "step": 12630 }, { "epoch": 1.16, "learning_rate": 4.418957433115749e-05, "loss": 0.9548, "step": 12640 }, { "epoch": 1.16, "learning_rate": 4.4184977475406825e-05, "loss": 0.8033, "step": 12650 }, { "epoch": 1.16, "learning_rate": 4.418038061965616e-05, "loss": 0.8173, "step": 12660 }, { "epoch": 1.16, "learning_rate": 4.417578376390549e-05, "loss": 0.8038, "step": 12670 }, { "epoch": 1.17, "learning_rate": 4.417118690815483e-05, "loss": 0.8897, "step": 12680 }, { "epoch": 1.17, "learning_rate": 4.416659005240416e-05, "loss": 0.975, "step": 12690 }, { "epoch": 1.17, "learning_rate": 4.416199319665349e-05, "loss": 0.7838, "step": 12700 }, { "epoch": 1.17, "learning_rate": 4.4157396340902824e-05, "loss": 0.97, "step": 12710 }, { "epoch": 1.17, "learning_rate": 4.415279948515216e-05, "loss": 0.8997, "step": 12720 }, { "epoch": 1.17, "learning_rate": 4.414820262940149e-05, "loss": 1.0767, "step": 12730 }, { "epoch": 1.17, "learning_rate": 4.4143605773650826e-05, "loss": 0.8626, "step": 12740 }, { "epoch": 1.17, "learning_rate": 4.413900891790016e-05, "loss": 0.9489, "step": 12750 }, { "epoch": 1.17, "learning_rate": 4.413441206214949e-05, "loss": 1.0657, "step": 12760 }, { "epoch": 1.17, "learning_rate": 4.412981520639883e-05, "loss": 0.8751, "step": 12770 }, { "epoch": 1.17, "learning_rate": 4.412521835064816e-05, "loss": 0.8295, "step": 12780 }, { "epoch": 1.18, "learning_rate": 4.412062149489749e-05, "loss": 0.953, "step": 12790 }, { "epoch": 1.18, "learning_rate": 4.4116024639146825e-05, "loss": 0.9167, "step": 12800 }, { "epoch": 1.18, "learning_rate": 4.411142778339616e-05, "loss": 0.8756, "step": 12810 }, { "epoch": 1.18, "learning_rate": 4.410683092764549e-05, "loss": 0.8434, "step": 12820 }, { "epoch": 1.18, "learning_rate": 4.410223407189483e-05, "loss": 0.9078, "step": 12830 }, { "epoch": 1.18, "learning_rate": 4.4097637216144164e-05, "loss": 1.0064, "step": 12840 }, { "epoch": 1.18, "learning_rate": 4.4093040360393494e-05, "loss": 0.9519, "step": 12850 }, { "epoch": 1.18, "learning_rate": 4.408844350464283e-05, "loss": 0.9043, "step": 12860 }, { "epoch": 1.18, "learning_rate": 4.408384664889216e-05, "loss": 0.921, "step": 12870 }, { "epoch": 1.18, "learning_rate": 4.407924979314149e-05, "loss": 0.9402, "step": 12880 }, { "epoch": 1.19, "learning_rate": 4.4074652937390826e-05, "loss": 0.8604, "step": 12890 }, { "epoch": 1.19, "learning_rate": 4.407005608164016e-05, "loss": 0.8605, "step": 12900 }, { "epoch": 1.19, "learning_rate": 4.406545922588949e-05, "loss": 0.9168, "step": 12910 }, { "epoch": 1.19, "learning_rate": 4.406086237013883e-05, "loss": 0.953, "step": 12920 }, { "epoch": 1.19, "learning_rate": 4.4056265514388165e-05, "loss": 0.9079, "step": 12930 }, { "epoch": 1.19, "learning_rate": 4.4051668658637495e-05, "loss": 0.9988, "step": 12940 }, { "epoch": 1.19, "learning_rate": 4.404707180288683e-05, "loss": 0.9193, "step": 12950 }, { "epoch": 1.19, "learning_rate": 4.404247494713616e-05, "loss": 0.9079, "step": 12960 }, { "epoch": 1.19, "learning_rate": 4.403787809138549e-05, "loss": 0.9074, "step": 12970 }, { "epoch": 1.19, "learning_rate": 4.403328123563483e-05, "loss": 0.9233, "step": 12980 }, { "epoch": 1.19, "learning_rate": 4.4028684379884164e-05, "loss": 0.9063, "step": 12990 }, { "epoch": 1.2, "learning_rate": 4.402408752413349e-05, "loss": 1.0383, "step": 13000 }, { "epoch": 1.2, "learning_rate": 4.401949066838283e-05, "loss": 0.905, "step": 13010 }, { "epoch": 1.2, "learning_rate": 4.4014893812632166e-05, "loss": 0.856, "step": 13020 }, { "epoch": 1.2, "learning_rate": 4.4010296956881496e-05, "loss": 0.8404, "step": 13030 }, { "epoch": 1.2, "learning_rate": 4.400570010113083e-05, "loss": 0.8839, "step": 13040 }, { "epoch": 1.2, "learning_rate": 4.400110324538016e-05, "loss": 0.8266, "step": 13050 }, { "epoch": 1.2, "learning_rate": 4.399650638962949e-05, "loss": 0.9644, "step": 13060 }, { "epoch": 1.2, "learning_rate": 4.399190953387883e-05, "loss": 0.9381, "step": 13070 }, { "epoch": 1.2, "learning_rate": 4.3987312678128165e-05, "loss": 0.8116, "step": 13080 }, { "epoch": 1.2, "learning_rate": 4.3982715822377495e-05, "loss": 0.9633, "step": 13090 }, { "epoch": 1.2, "learning_rate": 4.397811896662683e-05, "loss": 0.917, "step": 13100 }, { "epoch": 1.21, "learning_rate": 4.397352211087617e-05, "loss": 0.9176, "step": 13110 }, { "epoch": 1.21, "learning_rate": 4.39689252551255e-05, "loss": 0.9243, "step": 13120 }, { "epoch": 1.21, "learning_rate": 4.3964328399374834e-05, "loss": 0.8715, "step": 13130 }, { "epoch": 1.21, "learning_rate": 4.395973154362416e-05, "loss": 0.9165, "step": 13140 }, { "epoch": 1.21, "learning_rate": 4.395513468787349e-05, "loss": 1.0029, "step": 13150 }, { "epoch": 1.21, "learning_rate": 4.395053783212283e-05, "loss": 1.018, "step": 13160 }, { "epoch": 1.21, "learning_rate": 4.3945940976372166e-05, "loss": 0.9339, "step": 13170 }, { "epoch": 1.21, "learning_rate": 4.3941344120621496e-05, "loss": 0.9509, "step": 13180 }, { "epoch": 1.21, "learning_rate": 4.393674726487083e-05, "loss": 0.9241, "step": 13190 }, { "epoch": 1.21, "learning_rate": 4.393215040912016e-05, "loss": 0.7836, "step": 13200 }, { "epoch": 1.21, "learning_rate": 4.39275535533695e-05, "loss": 0.8981, "step": 13210 }, { "epoch": 1.22, "learning_rate": 4.3922956697618835e-05, "loss": 0.9004, "step": 13220 }, { "epoch": 1.22, "learning_rate": 4.3918359841868165e-05, "loss": 0.8525, "step": 13230 }, { "epoch": 1.22, "learning_rate": 4.3913762986117494e-05, "loss": 0.9162, "step": 13240 }, { "epoch": 1.22, "learning_rate": 4.390916613036683e-05, "loss": 0.8821, "step": 13250 }, { "epoch": 1.22, "learning_rate": 4.390456927461617e-05, "loss": 0.9015, "step": 13260 }, { "epoch": 1.22, "learning_rate": 4.38999724188655e-05, "loss": 0.9268, "step": 13270 }, { "epoch": 1.22, "learning_rate": 4.3895375563114833e-05, "loss": 0.883, "step": 13280 }, { "epoch": 1.22, "learning_rate": 4.389077870736416e-05, "loss": 0.8715, "step": 13290 }, { "epoch": 1.22, "learning_rate": 4.38861818516135e-05, "loss": 0.8849, "step": 13300 }, { "epoch": 1.22, "learning_rate": 4.3881584995862836e-05, "loss": 0.8913, "step": 13310 }, { "epoch": 1.22, "learning_rate": 4.3876988140112166e-05, "loss": 0.8227, "step": 13320 }, { "epoch": 1.23, "learning_rate": 4.3872391284361495e-05, "loss": 0.9277, "step": 13330 }, { "epoch": 1.23, "learning_rate": 4.386779442861083e-05, "loss": 1.0101, "step": 13340 }, { "epoch": 1.23, "learning_rate": 4.386319757286017e-05, "loss": 0.8516, "step": 13350 }, { "epoch": 1.23, "learning_rate": 4.38586007171095e-05, "loss": 0.8958, "step": 13360 }, { "epoch": 1.23, "learning_rate": 4.3854003861358835e-05, "loss": 0.9194, "step": 13370 }, { "epoch": 1.23, "learning_rate": 4.3849407005608164e-05, "loss": 0.8234, "step": 13380 }, { "epoch": 1.23, "learning_rate": 4.38448101498575e-05, "loss": 0.9105, "step": 13390 }, { "epoch": 1.23, "learning_rate": 4.384021329410684e-05, "loss": 0.908, "step": 13400 }, { "epoch": 1.23, "learning_rate": 4.383561643835617e-05, "loss": 0.8807, "step": 13410 }, { "epoch": 1.23, "learning_rate": 4.38310195826055e-05, "loss": 1.0101, "step": 13420 }, { "epoch": 1.23, "learning_rate": 4.382642272685483e-05, "loss": 0.8633, "step": 13430 }, { "epoch": 1.24, "learning_rate": 4.382182587110417e-05, "loss": 0.9458, "step": 13440 }, { "epoch": 1.24, "learning_rate": 4.38172290153535e-05, "loss": 0.9018, "step": 13450 }, { "epoch": 1.24, "learning_rate": 4.3812632159602836e-05, "loss": 0.9383, "step": 13460 }, { "epoch": 1.24, "learning_rate": 4.3808035303852165e-05, "loss": 0.938, "step": 13470 }, { "epoch": 1.24, "learning_rate": 4.38034384481015e-05, "loss": 0.9473, "step": 13480 }, { "epoch": 1.24, "learning_rate": 4.379884159235084e-05, "loss": 0.8779, "step": 13490 }, { "epoch": 1.24, "learning_rate": 4.379424473660017e-05, "loss": 0.9166, "step": 13500 }, { "epoch": 1.24, "learning_rate": 4.37896478808495e-05, "loss": 0.9089, "step": 13510 }, { "epoch": 1.24, "learning_rate": 4.3785051025098834e-05, "loss": 0.9371, "step": 13520 }, { "epoch": 1.24, "learning_rate": 4.3780454169348164e-05, "loss": 0.9999, "step": 13530 }, { "epoch": 1.24, "learning_rate": 4.37758573135975e-05, "loss": 0.8423, "step": 13540 }, { "epoch": 1.25, "learning_rate": 4.377126045784684e-05, "loss": 0.9486, "step": 13550 }, { "epoch": 1.25, "learning_rate": 4.376666360209617e-05, "loss": 0.8699, "step": 13560 }, { "epoch": 1.25, "learning_rate": 4.37620667463455e-05, "loss": 0.9271, "step": 13570 }, { "epoch": 1.25, "learning_rate": 4.375746989059484e-05, "loss": 0.9672, "step": 13580 }, { "epoch": 1.25, "learning_rate": 4.375287303484417e-05, "loss": 0.9595, "step": 13590 }, { "epoch": 1.25, "learning_rate": 4.37482761790935e-05, "loss": 0.8653, "step": 13600 }, { "epoch": 1.25, "learning_rate": 4.3743679323342836e-05, "loss": 0.8884, "step": 13610 }, { "epoch": 1.25, "learning_rate": 4.3739082467592165e-05, "loss": 0.9457, "step": 13620 }, { "epoch": 1.25, "learning_rate": 4.37344856118415e-05, "loss": 0.92, "step": 13630 }, { "epoch": 1.25, "learning_rate": 4.372988875609084e-05, "loss": 0.9542, "step": 13640 }, { "epoch": 1.25, "learning_rate": 4.372529190034017e-05, "loss": 0.9206, "step": 13650 }, { "epoch": 1.26, "learning_rate": 4.3720695044589504e-05, "loss": 1.0014, "step": 13660 }, { "epoch": 1.26, "learning_rate": 4.371609818883884e-05, "loss": 0.803, "step": 13670 }, { "epoch": 1.26, "learning_rate": 4.371150133308817e-05, "loss": 0.8779, "step": 13680 }, { "epoch": 1.26, "learning_rate": 4.37069044773375e-05, "loss": 0.8641, "step": 13690 }, { "epoch": 1.26, "learning_rate": 4.370230762158684e-05, "loss": 1.034, "step": 13700 }, { "epoch": 1.26, "learning_rate": 4.3697710765836166e-05, "loss": 0.9457, "step": 13710 }, { "epoch": 1.26, "learning_rate": 4.36931139100855e-05, "loss": 0.898, "step": 13720 }, { "epoch": 1.26, "learning_rate": 4.368851705433484e-05, "loss": 0.9683, "step": 13730 }, { "epoch": 1.26, "learning_rate": 4.368392019858417e-05, "loss": 0.9281, "step": 13740 }, { "epoch": 1.26, "learning_rate": 4.3679323342833506e-05, "loss": 0.8917, "step": 13750 }, { "epoch": 1.27, "learning_rate": 4.367472648708284e-05, "loss": 0.9395, "step": 13760 }, { "epoch": 1.27, "learning_rate": 4.367012963133217e-05, "loss": 0.9146, "step": 13770 }, { "epoch": 1.27, "learning_rate": 4.36655327755815e-05, "loss": 0.9294, "step": 13780 }, { "epoch": 1.27, "learning_rate": 4.366093591983084e-05, "loss": 0.8161, "step": 13790 }, { "epoch": 1.27, "learning_rate": 4.365633906408017e-05, "loss": 0.8931, "step": 13800 }, { "epoch": 1.27, "learning_rate": 4.3651742208329504e-05, "loss": 0.9322, "step": 13810 }, { "epoch": 1.27, "learning_rate": 4.364714535257884e-05, "loss": 0.7893, "step": 13820 }, { "epoch": 1.27, "learning_rate": 4.364254849682817e-05, "loss": 1.0173, "step": 13830 }, { "epoch": 1.27, "learning_rate": 4.363795164107751e-05, "loss": 0.7748, "step": 13840 }, { "epoch": 1.27, "learning_rate": 4.363335478532684e-05, "loss": 0.8631, "step": 13850 }, { "epoch": 1.27, "learning_rate": 4.3628757929576166e-05, "loss": 0.9148, "step": 13860 }, { "epoch": 1.28, "learning_rate": 4.36241610738255e-05, "loss": 0.8311, "step": 13870 }, { "epoch": 1.28, "learning_rate": 4.361956421807484e-05, "loss": 0.8963, "step": 13880 }, { "epoch": 1.28, "learning_rate": 4.361496736232417e-05, "loss": 0.8973, "step": 13890 }, { "epoch": 1.28, "learning_rate": 4.3610370506573505e-05, "loss": 0.8653, "step": 13900 }, { "epoch": 1.28, "learning_rate": 4.360577365082284e-05, "loss": 0.9264, "step": 13910 }, { "epoch": 1.28, "learning_rate": 4.360117679507217e-05, "loss": 0.9279, "step": 13920 }, { "epoch": 1.28, "learning_rate": 4.359657993932151e-05, "loss": 0.8864, "step": 13930 }, { "epoch": 1.28, "learning_rate": 4.3591983083570844e-05, "loss": 0.9464, "step": 13940 }, { "epoch": 1.28, "learning_rate": 4.358738622782017e-05, "loss": 0.9862, "step": 13950 }, { "epoch": 1.28, "learning_rate": 4.3582789372069504e-05, "loss": 0.8883, "step": 13960 }, { "epoch": 1.28, "learning_rate": 4.357819251631884e-05, "loss": 0.8961, "step": 13970 }, { "epoch": 1.29, "learning_rate": 4.357359566056817e-05, "loss": 0.9038, "step": 13980 }, { "epoch": 1.29, "learning_rate": 4.3568998804817506e-05, "loss": 0.8216, "step": 13990 }, { "epoch": 1.29, "learning_rate": 4.356440194906684e-05, "loss": 0.9365, "step": 14000 }, { "epoch": 1.29, "learning_rate": 4.355980509331617e-05, "loss": 0.9689, "step": 14010 }, { "epoch": 1.29, "learning_rate": 4.355520823756551e-05, "loss": 0.9553, "step": 14020 }, { "epoch": 1.29, "learning_rate": 4.3550611381814846e-05, "loss": 0.95, "step": 14030 }, { "epoch": 1.29, "learning_rate": 4.354601452606417e-05, "loss": 0.9088, "step": 14040 }, { "epoch": 1.29, "learning_rate": 4.3541417670313505e-05, "loss": 0.9147, "step": 14050 }, { "epoch": 1.29, "learning_rate": 4.353682081456284e-05, "loss": 0.9576, "step": 14060 }, { "epoch": 1.29, "learning_rate": 4.353222395881217e-05, "loss": 0.9446, "step": 14070 }, { "epoch": 1.29, "learning_rate": 4.352762710306151e-05, "loss": 0.9137, "step": 14080 }, { "epoch": 1.3, "learning_rate": 4.3523030247310844e-05, "loss": 0.8809, "step": 14090 }, { "epoch": 1.3, "learning_rate": 4.3518433391560174e-05, "loss": 0.7567, "step": 14100 }, { "epoch": 1.3, "learning_rate": 4.351383653580951e-05, "loss": 0.8678, "step": 14110 }, { "epoch": 1.3, "learning_rate": 4.350923968005885e-05, "loss": 0.9753, "step": 14120 }, { "epoch": 1.3, "learning_rate": 4.350464282430817e-05, "loss": 0.9888, "step": 14130 }, { "epoch": 1.3, "learning_rate": 4.3500045968557506e-05, "loss": 0.944, "step": 14140 }, { "epoch": 1.3, "learning_rate": 4.349544911280684e-05, "loss": 0.9114, "step": 14150 }, { "epoch": 1.3, "learning_rate": 4.349085225705617e-05, "loss": 0.8941, "step": 14160 }, { "epoch": 1.3, "learning_rate": 4.348625540130551e-05, "loss": 0.9293, "step": 14170 }, { "epoch": 1.3, "learning_rate": 4.3481658545554845e-05, "loss": 0.9976, "step": 14180 }, { "epoch": 1.3, "learning_rate": 4.3477061689804175e-05, "loss": 0.8883, "step": 14190 }, { "epoch": 1.31, "learning_rate": 4.347246483405351e-05, "loss": 0.9261, "step": 14200 }, { "epoch": 1.31, "learning_rate": 4.346786797830285e-05, "loss": 0.8913, "step": 14210 }, { "epoch": 1.31, "learning_rate": 4.346327112255217e-05, "loss": 0.8615, "step": 14220 }, { "epoch": 1.31, "learning_rate": 4.345867426680151e-05, "loss": 0.9476, "step": 14230 }, { "epoch": 1.31, "learning_rate": 4.3454077411050844e-05, "loss": 0.7502, "step": 14240 }, { "epoch": 1.31, "learning_rate": 4.3449480555300174e-05, "loss": 0.9278, "step": 14250 }, { "epoch": 1.31, "learning_rate": 4.344488369954951e-05, "loss": 0.9364, "step": 14260 }, { "epoch": 1.31, "learning_rate": 4.3440286843798847e-05, "loss": 0.9017, "step": 14270 }, { "epoch": 1.31, "learning_rate": 4.3435689988048176e-05, "loss": 0.9878, "step": 14280 }, { "epoch": 1.31, "learning_rate": 4.343109313229751e-05, "loss": 0.9065, "step": 14290 }, { "epoch": 1.31, "learning_rate": 4.342649627654685e-05, "loss": 0.9126, "step": 14300 }, { "epoch": 1.32, "learning_rate": 4.342189942079617e-05, "loss": 0.9353, "step": 14310 }, { "epoch": 1.32, "learning_rate": 4.341730256504551e-05, "loss": 0.8782, "step": 14320 }, { "epoch": 1.32, "learning_rate": 4.3412705709294845e-05, "loss": 0.9248, "step": 14330 }, { "epoch": 1.32, "learning_rate": 4.3408108853544175e-05, "loss": 0.8857, "step": 14340 }, { "epoch": 1.32, "learning_rate": 4.340351199779351e-05, "loss": 0.8787, "step": 14350 }, { "epoch": 1.32, "learning_rate": 4.339891514204285e-05, "loss": 0.9467, "step": 14360 }, { "epoch": 1.32, "learning_rate": 4.339431828629218e-05, "loss": 0.9918, "step": 14370 }, { "epoch": 1.32, "learning_rate": 4.3389721430541514e-05, "loss": 0.9271, "step": 14380 }, { "epoch": 1.32, "learning_rate": 4.338512457479085e-05, "loss": 0.9298, "step": 14390 }, { "epoch": 1.32, "learning_rate": 4.338052771904017e-05, "loss": 1.0416, "step": 14400 }, { "epoch": 1.32, "learning_rate": 4.337593086328951e-05, "loss": 0.9614, "step": 14410 }, { "epoch": 1.33, "learning_rate": 4.3371334007538846e-05, "loss": 0.9721, "step": 14420 }, { "epoch": 1.33, "learning_rate": 4.3366737151788176e-05, "loss": 0.8874, "step": 14430 }, { "epoch": 1.33, "learning_rate": 4.336214029603751e-05, "loss": 0.8861, "step": 14440 }, { "epoch": 1.33, "learning_rate": 4.335754344028685e-05, "loss": 0.9264, "step": 14450 }, { "epoch": 1.33, "learning_rate": 4.335294658453618e-05, "loss": 0.9003, "step": 14460 }, { "epoch": 1.33, "learning_rate": 4.3348349728785515e-05, "loss": 0.8595, "step": 14470 }, { "epoch": 1.33, "learning_rate": 4.334375287303485e-05, "loss": 0.8879, "step": 14480 }, { "epoch": 1.33, "learning_rate": 4.3339156017284175e-05, "loss": 0.9585, "step": 14490 }, { "epoch": 1.33, "learning_rate": 4.333455916153351e-05, "loss": 0.8967, "step": 14500 }, { "epoch": 1.33, "learning_rate": 4.332996230578285e-05, "loss": 0.8466, "step": 14510 }, { "epoch": 1.33, "learning_rate": 4.332536545003218e-05, "loss": 0.8826, "step": 14520 }, { "epoch": 1.34, "learning_rate": 4.3320768594281514e-05, "loss": 0.9375, "step": 14530 }, { "epoch": 1.34, "learning_rate": 4.331617173853085e-05, "loss": 0.8456, "step": 14540 }, { "epoch": 1.34, "learning_rate": 4.331157488278018e-05, "loss": 0.8641, "step": 14550 }, { "epoch": 1.34, "learning_rate": 4.3306978027029516e-05, "loss": 0.9631, "step": 14560 }, { "epoch": 1.34, "learning_rate": 4.330238117127885e-05, "loss": 0.8837, "step": 14570 }, { "epoch": 1.34, "learning_rate": 4.3297784315528176e-05, "loss": 0.9343, "step": 14580 }, { "epoch": 1.34, "learning_rate": 4.329318745977751e-05, "loss": 0.9878, "step": 14590 }, { "epoch": 1.34, "learning_rate": 4.328859060402685e-05, "loss": 0.9439, "step": 14600 }, { "epoch": 1.34, "learning_rate": 4.328399374827618e-05, "loss": 0.8315, "step": 14610 }, { "epoch": 1.34, "learning_rate": 4.3279396892525515e-05, "loss": 0.9415, "step": 14620 }, { "epoch": 1.35, "learning_rate": 4.327480003677485e-05, "loss": 0.8391, "step": 14630 }, { "epoch": 1.35, "learning_rate": 4.327020318102418e-05, "loss": 0.875, "step": 14640 }, { "epoch": 1.35, "learning_rate": 4.326560632527352e-05, "loss": 0.9985, "step": 14650 }, { "epoch": 1.35, "learning_rate": 4.3261009469522854e-05, "loss": 0.8753, "step": 14660 }, { "epoch": 1.35, "learning_rate": 4.325641261377218e-05, "loss": 0.9666, "step": 14670 }, { "epoch": 1.35, "learning_rate": 4.325181575802151e-05, "loss": 0.9287, "step": 14680 }, { "epoch": 1.35, "learning_rate": 4.324721890227085e-05, "loss": 0.9222, "step": 14690 }, { "epoch": 1.35, "learning_rate": 4.324262204652018e-05, "loss": 0.9128, "step": 14700 }, { "epoch": 1.35, "learning_rate": 4.3238025190769516e-05, "loss": 0.906, "step": 14710 }, { "epoch": 1.35, "learning_rate": 4.323342833501885e-05, "loss": 0.9607, "step": 14720 }, { "epoch": 1.35, "learning_rate": 4.322883147926818e-05, "loss": 1.0337, "step": 14730 }, { "epoch": 1.36, "learning_rate": 4.322423462351752e-05, "loss": 0.8988, "step": 14740 }, { "epoch": 1.36, "learning_rate": 4.3219637767766855e-05, "loss": 0.9229, "step": 14750 }, { "epoch": 1.36, "learning_rate": 4.321504091201618e-05, "loss": 0.9016, "step": 14760 }, { "epoch": 1.36, "learning_rate": 4.3210444056265515e-05, "loss": 0.9899, "step": 14770 }, { "epoch": 1.36, "learning_rate": 4.320584720051485e-05, "loss": 0.8438, "step": 14780 }, { "epoch": 1.36, "learning_rate": 4.320125034476418e-05, "loss": 0.9742, "step": 14790 }, { "epoch": 1.36, "learning_rate": 4.319665348901352e-05, "loss": 0.8879, "step": 14800 }, { "epoch": 1.36, "learning_rate": 4.3192056633262854e-05, "loss": 0.9296, "step": 14810 }, { "epoch": 1.36, "learning_rate": 4.3187459777512183e-05, "loss": 0.9947, "step": 14820 }, { "epoch": 1.36, "learning_rate": 4.318286292176152e-05, "loss": 0.8589, "step": 14830 }, { "epoch": 1.36, "learning_rate": 4.317826606601085e-05, "loss": 0.9452, "step": 14840 }, { "epoch": 1.37, "learning_rate": 4.317366921026018e-05, "loss": 0.7949, "step": 14850 }, { "epoch": 1.37, "learning_rate": 4.3169072354509516e-05, "loss": 0.8965, "step": 14860 }, { "epoch": 1.37, "learning_rate": 4.316447549875885e-05, "loss": 1.0258, "step": 14870 }, { "epoch": 1.37, "learning_rate": 4.315987864300818e-05, "loss": 0.8049, "step": 14880 }, { "epoch": 1.37, "learning_rate": 4.315528178725752e-05, "loss": 0.8688, "step": 14890 }, { "epoch": 1.37, "learning_rate": 4.3150684931506855e-05, "loss": 0.8445, "step": 14900 }, { "epoch": 1.37, "learning_rate": 4.3146088075756185e-05, "loss": 0.8768, "step": 14910 }, { "epoch": 1.37, "learning_rate": 4.314149122000552e-05, "loss": 0.9154, "step": 14920 }, { "epoch": 1.37, "learning_rate": 4.313689436425485e-05, "loss": 0.7818, "step": 14930 }, { "epoch": 1.37, "learning_rate": 4.313229750850418e-05, "loss": 0.9111, "step": 14940 }, { "epoch": 1.37, "learning_rate": 4.312770065275352e-05, "loss": 0.8537, "step": 14950 }, { "epoch": 1.38, "learning_rate": 4.3123103797002853e-05, "loss": 0.9774, "step": 14960 }, { "epoch": 1.38, "learning_rate": 4.311850694125218e-05, "loss": 1.0547, "step": 14970 }, { "epoch": 1.38, "learning_rate": 4.311391008550152e-05, "loss": 0.9624, "step": 14980 }, { "epoch": 1.38, "learning_rate": 4.3109313229750856e-05, "loss": 0.9212, "step": 14990 }, { "epoch": 1.38, "learning_rate": 4.3104716374000186e-05, "loss": 0.998, "step": 15000 }, { "epoch": 1.38, "eval_accuracy": 0.5524017467248908, "eval_loss": 0.9208794832229614, "eval_runtime": 159.7479, "eval_samples_per_second": 28.67, "eval_steps_per_second": 3.587, "step": 15000 }, { "epoch": 1.38, "learning_rate": 4.310011951824952e-05, "loss": 0.8646, "step": 15010 }, { "epoch": 1.38, "learning_rate": 4.309552266249885e-05, "loss": 1.0139, "step": 15020 }, { "epoch": 1.38, "learning_rate": 4.309092580674818e-05, "loss": 0.9508, "step": 15030 }, { "epoch": 1.38, "learning_rate": 4.308632895099752e-05, "loss": 0.9612, "step": 15040 }, { "epoch": 1.38, "learning_rate": 4.3081732095246855e-05, "loss": 0.8929, "step": 15050 }, { "epoch": 1.38, "learning_rate": 4.3077135239496184e-05, "loss": 0.9945, "step": 15060 }, { "epoch": 1.39, "learning_rate": 4.307253838374552e-05, "loss": 0.8415, "step": 15070 }, { "epoch": 1.39, "learning_rate": 4.306794152799486e-05, "loss": 0.8619, "step": 15080 }, { "epoch": 1.39, "learning_rate": 4.306334467224419e-05, "loss": 0.8453, "step": 15090 }, { "epoch": 1.39, "learning_rate": 4.3058747816493523e-05, "loss": 0.8524, "step": 15100 }, { "epoch": 1.39, "learning_rate": 4.305415096074285e-05, "loss": 0.9199, "step": 15110 }, { "epoch": 1.39, "learning_rate": 4.304955410499218e-05, "loss": 0.9933, "step": 15120 }, { "epoch": 1.39, "learning_rate": 4.304495724924152e-05, "loss": 0.8525, "step": 15130 }, { "epoch": 1.39, "learning_rate": 4.3040360393490856e-05, "loss": 0.8448, "step": 15140 }, { "epoch": 1.39, "learning_rate": 4.3035763537740186e-05, "loss": 0.8921, "step": 15150 }, { "epoch": 1.39, "learning_rate": 4.303116668198952e-05, "loss": 0.9722, "step": 15160 }, { "epoch": 1.39, "learning_rate": 4.302656982623885e-05, "loss": 0.8379, "step": 15170 }, { "epoch": 1.4, "learning_rate": 4.302197297048819e-05, "loss": 0.917, "step": 15180 }, { "epoch": 1.4, "learning_rate": 4.3017376114737525e-05, "loss": 1.0581, "step": 15190 }, { "epoch": 1.4, "learning_rate": 4.3012779258986854e-05, "loss": 0.9983, "step": 15200 }, { "epoch": 1.4, "learning_rate": 4.3008182403236184e-05, "loss": 0.8928, "step": 15210 }, { "epoch": 1.4, "learning_rate": 4.300358554748552e-05, "loss": 0.8413, "step": 15220 }, { "epoch": 1.4, "learning_rate": 4.299898869173486e-05, "loss": 0.8468, "step": 15230 }, { "epoch": 1.4, "learning_rate": 4.299439183598419e-05, "loss": 0.8105, "step": 15240 }, { "epoch": 1.4, "learning_rate": 4.298979498023352e-05, "loss": 0.8733, "step": 15250 }, { "epoch": 1.4, "learning_rate": 4.298519812448285e-05, "loss": 0.8697, "step": 15260 }, { "epoch": 1.4, "learning_rate": 4.298060126873219e-05, "loss": 0.9559, "step": 15270 }, { "epoch": 1.4, "learning_rate": 4.2976004412981526e-05, "loss": 0.8271, "step": 15280 }, { "epoch": 1.41, "learning_rate": 4.2971407557230856e-05, "loss": 0.9252, "step": 15290 }, { "epoch": 1.41, "learning_rate": 4.296681070148019e-05, "loss": 0.9227, "step": 15300 }, { "epoch": 1.41, "learning_rate": 4.296221384572952e-05, "loss": 0.877, "step": 15310 }, { "epoch": 1.41, "learning_rate": 4.295761698997886e-05, "loss": 0.8812, "step": 15320 }, { "epoch": 1.41, "learning_rate": 4.295302013422819e-05, "loss": 0.995, "step": 15330 }, { "epoch": 1.41, "learning_rate": 4.2948423278477524e-05, "loss": 0.8399, "step": 15340 }, { "epoch": 1.41, "learning_rate": 4.2943826422726854e-05, "loss": 0.7986, "step": 15350 }, { "epoch": 1.41, "learning_rate": 4.293922956697619e-05, "loss": 1.0293, "step": 15360 }, { "epoch": 1.41, "learning_rate": 4.293463271122553e-05, "loss": 0.9551, "step": 15370 }, { "epoch": 1.41, "learning_rate": 4.293003585547486e-05, "loss": 0.8944, "step": 15380 }, { "epoch": 1.41, "learning_rate": 4.292543899972419e-05, "loss": 0.9954, "step": 15390 }, { "epoch": 1.42, "learning_rate": 4.292084214397352e-05, "loss": 0.9534, "step": 15400 }, { "epoch": 1.42, "learning_rate": 4.291624528822286e-05, "loss": 0.9199, "step": 15410 }, { "epoch": 1.42, "learning_rate": 4.291164843247219e-05, "loss": 0.9045, "step": 15420 }, { "epoch": 1.42, "learning_rate": 4.2907051576721526e-05, "loss": 0.868, "step": 15430 }, { "epoch": 1.42, "learning_rate": 4.2902454720970855e-05, "loss": 0.8492, "step": 15440 }, { "epoch": 1.42, "learning_rate": 4.289785786522019e-05, "loss": 0.9878, "step": 15450 }, { "epoch": 1.42, "learning_rate": 4.289326100946953e-05, "loss": 0.8968, "step": 15460 }, { "epoch": 1.42, "learning_rate": 4.288866415371886e-05, "loss": 0.9121, "step": 15470 }, { "epoch": 1.42, "learning_rate": 4.2884067297968194e-05, "loss": 0.9234, "step": 15480 }, { "epoch": 1.42, "learning_rate": 4.2879470442217524e-05, "loss": 0.8812, "step": 15490 }, { "epoch": 1.43, "learning_rate": 4.2874873586466854e-05, "loss": 0.9316, "step": 15500 }, { "epoch": 1.43, "learning_rate": 4.287027673071619e-05, "loss": 0.9155, "step": 15510 }, { "epoch": 1.43, "learning_rate": 4.286567987496553e-05, "loss": 0.9605, "step": 15520 }, { "epoch": 1.43, "learning_rate": 4.2861083019214857e-05, "loss": 0.887, "step": 15530 }, { "epoch": 1.43, "learning_rate": 4.285648616346419e-05, "loss": 0.959, "step": 15540 }, { "epoch": 1.43, "learning_rate": 4.285188930771353e-05, "loss": 0.9795, "step": 15550 }, { "epoch": 1.43, "learning_rate": 4.284729245196286e-05, "loss": 0.9638, "step": 15560 }, { "epoch": 1.43, "learning_rate": 4.2842695596212196e-05, "loss": 0.8615, "step": 15570 }, { "epoch": 1.43, "learning_rate": 4.2838098740461525e-05, "loss": 0.9224, "step": 15580 }, { "epoch": 1.43, "learning_rate": 4.2833501884710855e-05, "loss": 0.845, "step": 15590 }, { "epoch": 1.43, "learning_rate": 4.282890502896019e-05, "loss": 0.9, "step": 15600 }, { "epoch": 1.44, "learning_rate": 4.282430817320953e-05, "loss": 0.8797, "step": 15610 }, { "epoch": 1.44, "learning_rate": 4.281971131745886e-05, "loss": 0.841, "step": 15620 }, { "epoch": 1.44, "learning_rate": 4.2815114461708194e-05, "loss": 0.9366, "step": 15630 }, { "epoch": 1.44, "learning_rate": 4.281051760595753e-05, "loss": 0.8984, "step": 15640 }, { "epoch": 1.44, "learning_rate": 4.280592075020686e-05, "loss": 0.8837, "step": 15650 }, { "epoch": 1.44, "learning_rate": 4.28013238944562e-05, "loss": 0.8772, "step": 15660 }, { "epoch": 1.44, "learning_rate": 4.2796727038705527e-05, "loss": 0.8479, "step": 15670 }, { "epoch": 1.44, "learning_rate": 4.2792130182954856e-05, "loss": 0.9712, "step": 15680 }, { "epoch": 1.44, "learning_rate": 4.278753332720419e-05, "loss": 0.8305, "step": 15690 }, { "epoch": 1.44, "learning_rate": 4.278293647145353e-05, "loss": 0.8753, "step": 15700 }, { "epoch": 1.44, "learning_rate": 4.277833961570286e-05, "loss": 0.8624, "step": 15710 }, { "epoch": 1.45, "learning_rate": 4.2773742759952195e-05, "loss": 0.8033, "step": 15720 }, { "epoch": 1.45, "learning_rate": 4.276914590420153e-05, "loss": 0.9228, "step": 15730 }, { "epoch": 1.45, "learning_rate": 4.276454904845086e-05, "loss": 0.902, "step": 15740 }, { "epoch": 1.45, "learning_rate": 4.27599521927002e-05, "loss": 0.9774, "step": 15750 }, { "epoch": 1.45, "learning_rate": 4.275535533694953e-05, "loss": 0.8696, "step": 15760 }, { "epoch": 1.45, "learning_rate": 4.275075848119886e-05, "loss": 0.9166, "step": 15770 }, { "epoch": 1.45, "learning_rate": 4.2746161625448194e-05, "loss": 0.9399, "step": 15780 }, { "epoch": 1.45, "learning_rate": 4.274156476969753e-05, "loss": 0.8976, "step": 15790 }, { "epoch": 1.45, "learning_rate": 4.273696791394686e-05, "loss": 0.8523, "step": 15800 }, { "epoch": 1.45, "learning_rate": 4.2732371058196197e-05, "loss": 0.9551, "step": 15810 }, { "epoch": 1.45, "learning_rate": 4.272777420244553e-05, "loss": 0.9317, "step": 15820 }, { "epoch": 1.46, "learning_rate": 4.272317734669486e-05, "loss": 0.9672, "step": 15830 }, { "epoch": 1.46, "learning_rate": 4.27185804909442e-05, "loss": 0.913, "step": 15840 }, { "epoch": 1.46, "learning_rate": 4.271398363519353e-05, "loss": 0.9159, "step": 15850 }, { "epoch": 1.46, "learning_rate": 4.270938677944286e-05, "loss": 0.9109, "step": 15860 }, { "epoch": 1.46, "learning_rate": 4.2704789923692195e-05, "loss": 0.868, "step": 15870 }, { "epoch": 1.46, "learning_rate": 4.270019306794153e-05, "loss": 0.8959, "step": 15880 }, { "epoch": 1.46, "learning_rate": 4.269559621219086e-05, "loss": 0.8121, "step": 15890 }, { "epoch": 1.46, "learning_rate": 4.26909993564402e-05, "loss": 1.002, "step": 15900 }, { "epoch": 1.46, "learning_rate": 4.2686402500689534e-05, "loss": 0.8285, "step": 15910 }, { "epoch": 1.46, "learning_rate": 4.2681805644938864e-05, "loss": 0.9122, "step": 15920 }, { "epoch": 1.46, "learning_rate": 4.26772087891882e-05, "loss": 0.9703, "step": 15930 }, { "epoch": 1.47, "learning_rate": 4.267261193343753e-05, "loss": 0.9006, "step": 15940 }, { "epoch": 1.47, "learning_rate": 4.266801507768686e-05, "loss": 0.9382, "step": 15950 }, { "epoch": 1.47, "learning_rate": 4.2663418221936196e-05, "loss": 0.828, "step": 15960 }, { "epoch": 1.47, "learning_rate": 4.265882136618553e-05, "loss": 0.8313, "step": 15970 }, { "epoch": 1.47, "learning_rate": 4.265422451043486e-05, "loss": 0.7578, "step": 15980 }, { "epoch": 1.47, "learning_rate": 4.26496276546842e-05, "loss": 0.9251, "step": 15990 }, { "epoch": 1.47, "learning_rate": 4.2645030798933535e-05, "loss": 0.9256, "step": 16000 }, { "epoch": 1.47, "learning_rate": 4.2640433943182865e-05, "loss": 0.9624, "step": 16010 }, { "epoch": 1.47, "learning_rate": 4.26358370874322e-05, "loss": 1.0759, "step": 16020 }, { "epoch": 1.47, "learning_rate": 4.263124023168153e-05, "loss": 0.8425, "step": 16030 }, { "epoch": 1.47, "learning_rate": 4.262664337593086e-05, "loss": 1.0333, "step": 16040 }, { "epoch": 1.48, "learning_rate": 4.26220465201802e-05, "loss": 0.8345, "step": 16050 }, { "epoch": 1.48, "learning_rate": 4.2617449664429534e-05, "loss": 0.919, "step": 16060 }, { "epoch": 1.48, "learning_rate": 4.2612852808678864e-05, "loss": 0.8761, "step": 16070 }, { "epoch": 1.48, "learning_rate": 4.26082559529282e-05, "loss": 0.8714, "step": 16080 }, { "epoch": 1.48, "learning_rate": 4.260365909717754e-05, "loss": 0.9486, "step": 16090 }, { "epoch": 1.48, "learning_rate": 4.2599062241426866e-05, "loss": 0.8847, "step": 16100 }, { "epoch": 1.48, "learning_rate": 4.25944653856762e-05, "loss": 0.9233, "step": 16110 }, { "epoch": 1.48, "learning_rate": 4.258986852992553e-05, "loss": 0.9652, "step": 16120 }, { "epoch": 1.48, "learning_rate": 4.258527167417486e-05, "loss": 0.9564, "step": 16130 }, { "epoch": 1.48, "learning_rate": 4.25806748184242e-05, "loss": 0.9893, "step": 16140 }, { "epoch": 1.48, "learning_rate": 4.2576077962673535e-05, "loss": 0.9195, "step": 16150 }, { "epoch": 1.49, "learning_rate": 4.2571481106922865e-05, "loss": 0.9592, "step": 16160 }, { "epoch": 1.49, "learning_rate": 4.25668842511722e-05, "loss": 1.0199, "step": 16170 }, { "epoch": 1.49, "learning_rate": 4.256228739542154e-05, "loss": 0.8361, "step": 16180 }, { "epoch": 1.49, "learning_rate": 4.255769053967087e-05, "loss": 1.0159, "step": 16190 }, { "epoch": 1.49, "learning_rate": 4.2553093683920204e-05, "loss": 1.0119, "step": 16200 }, { "epoch": 1.49, "learning_rate": 4.2548496828169534e-05, "loss": 0.8967, "step": 16210 }, { "epoch": 1.49, "learning_rate": 4.2543899972418863e-05, "loss": 0.9577, "step": 16220 }, { "epoch": 1.49, "learning_rate": 4.25393031166682e-05, "loss": 1.0021, "step": 16230 }, { "epoch": 1.49, "learning_rate": 4.2534706260917536e-05, "loss": 0.9336, "step": 16240 }, { "epoch": 1.49, "learning_rate": 4.2530109405166866e-05, "loss": 0.8832, "step": 16250 }, { "epoch": 1.49, "learning_rate": 4.25255125494162e-05, "loss": 0.9232, "step": 16260 }, { "epoch": 1.5, "learning_rate": 4.252091569366554e-05, "loss": 0.8358, "step": 16270 }, { "epoch": 1.5, "learning_rate": 4.251631883791487e-05, "loss": 0.8668, "step": 16280 }, { "epoch": 1.5, "learning_rate": 4.2511721982164205e-05, "loss": 1.0168, "step": 16290 }, { "epoch": 1.5, "learning_rate": 4.2507125126413535e-05, "loss": 0.9167, "step": 16300 }, { "epoch": 1.5, "learning_rate": 4.2502528270662865e-05, "loss": 0.7962, "step": 16310 }, { "epoch": 1.5, "learning_rate": 4.24979314149122e-05, "loss": 0.838, "step": 16320 }, { "epoch": 1.5, "learning_rate": 4.249333455916154e-05, "loss": 0.8936, "step": 16330 }, { "epoch": 1.5, "learning_rate": 4.248873770341087e-05, "loss": 0.8534, "step": 16340 }, { "epoch": 1.5, "learning_rate": 4.2484140847660204e-05, "loss": 0.8666, "step": 16350 }, { "epoch": 1.5, "learning_rate": 4.247954399190954e-05, "loss": 0.9402, "step": 16360 }, { "epoch": 1.51, "learning_rate": 4.247494713615887e-05, "loss": 0.9416, "step": 16370 }, { "epoch": 1.51, "learning_rate": 4.2470350280408206e-05, "loss": 0.8823, "step": 16380 }, { "epoch": 1.51, "learning_rate": 4.2465753424657536e-05, "loss": 0.9079, "step": 16390 }, { "epoch": 1.51, "learning_rate": 4.2461156568906866e-05, "loss": 0.8404, "step": 16400 }, { "epoch": 1.51, "learning_rate": 4.24565597131562e-05, "loss": 0.9094, "step": 16410 }, { "epoch": 1.51, "learning_rate": 4.245196285740554e-05, "loss": 0.9025, "step": 16420 }, { "epoch": 1.51, "learning_rate": 4.244736600165487e-05, "loss": 0.8279, "step": 16430 }, { "epoch": 1.51, "learning_rate": 4.2442769145904205e-05, "loss": 1.0834, "step": 16440 }, { "epoch": 1.51, "learning_rate": 4.243817229015354e-05, "loss": 0.8816, "step": 16450 }, { "epoch": 1.51, "learning_rate": 4.243357543440287e-05, "loss": 0.9011, "step": 16460 }, { "epoch": 1.51, "learning_rate": 4.242897857865221e-05, "loss": 0.8143, "step": 16470 }, { "epoch": 1.52, "learning_rate": 4.242438172290154e-05, "loss": 1.018, "step": 16480 }, { "epoch": 1.52, "learning_rate": 4.241978486715087e-05, "loss": 0.8854, "step": 16490 }, { "epoch": 1.52, "learning_rate": 4.2415188011400203e-05, "loss": 0.9413, "step": 16500 }, { "epoch": 1.52, "learning_rate": 4.241059115564954e-05, "loss": 0.902, "step": 16510 }, { "epoch": 1.52, "learning_rate": 4.240599429989887e-05, "loss": 0.8677, "step": 16520 }, { "epoch": 1.52, "learning_rate": 4.2401397444148206e-05, "loss": 0.9347, "step": 16530 }, { "epoch": 1.52, "learning_rate": 4.239680058839754e-05, "loss": 0.8232, "step": 16540 }, { "epoch": 1.52, "learning_rate": 4.239220373264687e-05, "loss": 0.8357, "step": 16550 }, { "epoch": 1.52, "learning_rate": 4.238760687689621e-05, "loss": 0.745, "step": 16560 }, { "epoch": 1.52, "learning_rate": 4.238301002114554e-05, "loss": 0.7559, "step": 16570 }, { "epoch": 1.52, "learning_rate": 4.237841316539487e-05, "loss": 0.8659, "step": 16580 }, { "epoch": 1.53, "learning_rate": 4.2373816309644205e-05, "loss": 0.9684, "step": 16590 }, { "epoch": 1.53, "learning_rate": 4.236921945389354e-05, "loss": 0.8996, "step": 16600 }, { "epoch": 1.53, "learning_rate": 4.236462259814287e-05, "loss": 0.7936, "step": 16610 }, { "epoch": 1.53, "learning_rate": 4.236002574239221e-05, "loss": 0.8113, "step": 16620 }, { "epoch": 1.53, "learning_rate": 4.2355428886641544e-05, "loss": 0.9122, "step": 16630 }, { "epoch": 1.53, "learning_rate": 4.2350832030890874e-05, "loss": 0.9304, "step": 16640 }, { "epoch": 1.53, "learning_rate": 4.234623517514021e-05, "loss": 1.011, "step": 16650 }, { "epoch": 1.53, "learning_rate": 4.234163831938954e-05, "loss": 0.764, "step": 16660 }, { "epoch": 1.53, "learning_rate": 4.233704146363887e-05, "loss": 1.0059, "step": 16670 }, { "epoch": 1.53, "learning_rate": 4.2332444607888206e-05, "loss": 0.8689, "step": 16680 }, { "epoch": 1.53, "learning_rate": 4.232784775213754e-05, "loss": 0.9827, "step": 16690 }, { "epoch": 1.54, "learning_rate": 4.232325089638687e-05, "loss": 0.8285, "step": 16700 }, { "epoch": 1.54, "learning_rate": 4.231865404063621e-05, "loss": 0.9238, "step": 16710 }, { "epoch": 1.54, "learning_rate": 4.231405718488554e-05, "loss": 0.9087, "step": 16720 }, { "epoch": 1.54, "learning_rate": 4.2309460329134875e-05, "loss": 0.9439, "step": 16730 }, { "epoch": 1.54, "learning_rate": 4.230486347338421e-05, "loss": 0.9504, "step": 16740 }, { "epoch": 1.54, "learning_rate": 4.230026661763354e-05, "loss": 0.9417, "step": 16750 }, { "epoch": 1.54, "learning_rate": 4.229566976188287e-05, "loss": 0.9474, "step": 16760 }, { "epoch": 1.54, "learning_rate": 4.229107290613221e-05, "loss": 0.8691, "step": 16770 }, { "epoch": 1.54, "learning_rate": 4.2286476050381544e-05, "loss": 0.8645, "step": 16780 }, { "epoch": 1.54, "learning_rate": 4.228187919463087e-05, "loss": 0.9543, "step": 16790 }, { "epoch": 1.54, "learning_rate": 4.227728233888021e-05, "loss": 0.9068, "step": 16800 }, { "epoch": 1.55, "learning_rate": 4.227268548312954e-05, "loss": 0.8251, "step": 16810 }, { "epoch": 1.55, "learning_rate": 4.2268088627378876e-05, "loss": 0.9839, "step": 16820 }, { "epoch": 1.55, "learning_rate": 4.226349177162821e-05, "loss": 0.9694, "step": 16830 }, { "epoch": 1.55, "learning_rate": 4.225889491587754e-05, "loss": 0.9232, "step": 16840 }, { "epoch": 1.55, "learning_rate": 4.225429806012687e-05, "loss": 0.9777, "step": 16850 }, { "epoch": 1.55, "learning_rate": 4.224970120437621e-05, "loss": 0.8524, "step": 16860 }, { "epoch": 1.55, "learning_rate": 4.2245104348625545e-05, "loss": 0.9519, "step": 16870 }, { "epoch": 1.55, "learning_rate": 4.2240507492874874e-05, "loss": 0.919, "step": 16880 }, { "epoch": 1.55, "learning_rate": 4.223591063712421e-05, "loss": 0.945, "step": 16890 }, { "epoch": 1.55, "learning_rate": 4.223131378137354e-05, "loss": 0.8519, "step": 16900 }, { "epoch": 1.55, "learning_rate": 4.222671692562288e-05, "loss": 0.8355, "step": 16910 }, { "epoch": 1.56, "learning_rate": 4.2222120069872214e-05, "loss": 0.8547, "step": 16920 }, { "epoch": 1.56, "learning_rate": 4.221752321412154e-05, "loss": 0.915, "step": 16930 }, { "epoch": 1.56, "learning_rate": 4.221292635837087e-05, "loss": 1.0217, "step": 16940 }, { "epoch": 1.56, "learning_rate": 4.220832950262021e-05, "loss": 0.9823, "step": 16950 }, { "epoch": 1.56, "learning_rate": 4.2203732646869546e-05, "loss": 0.8066, "step": 16960 }, { "epoch": 1.56, "learning_rate": 4.2199135791118876e-05, "loss": 0.8725, "step": 16970 }, { "epoch": 1.56, "learning_rate": 4.219453893536821e-05, "loss": 0.8298, "step": 16980 }, { "epoch": 1.56, "learning_rate": 4.218994207961754e-05, "loss": 0.8755, "step": 16990 }, { "epoch": 1.56, "learning_rate": 4.218534522386688e-05, "loss": 0.8106, "step": 17000 }, { "epoch": 1.56, "learning_rate": 4.2180748368116215e-05, "loss": 0.9167, "step": 17010 }, { "epoch": 1.56, "learning_rate": 4.2176151512365544e-05, "loss": 0.8081, "step": 17020 }, { "epoch": 1.57, "learning_rate": 4.2171554656614874e-05, "loss": 0.8725, "step": 17030 }, { "epoch": 1.57, "learning_rate": 4.216695780086421e-05, "loss": 0.9977, "step": 17040 }, { "epoch": 1.57, "learning_rate": 4.216236094511354e-05, "loss": 0.9981, "step": 17050 }, { "epoch": 1.57, "learning_rate": 4.215776408936288e-05, "loss": 0.9359, "step": 17060 }, { "epoch": 1.57, "learning_rate": 4.215316723361221e-05, "loss": 0.9377, "step": 17070 }, { "epoch": 1.57, "learning_rate": 4.214857037786154e-05, "loss": 0.8336, "step": 17080 }, { "epoch": 1.57, "learning_rate": 4.214397352211088e-05, "loss": 0.8486, "step": 17090 }, { "epoch": 1.57, "learning_rate": 4.2139376666360216e-05, "loss": 0.9608, "step": 17100 }, { "epoch": 1.57, "learning_rate": 4.2134779810609546e-05, "loss": 0.9246, "step": 17110 }, { "epoch": 1.57, "learning_rate": 4.2130182954858875e-05, "loss": 0.8587, "step": 17120 }, { "epoch": 1.57, "learning_rate": 4.212558609910821e-05, "loss": 0.9502, "step": 17130 }, { "epoch": 1.58, "learning_rate": 4.212098924335754e-05, "loss": 0.8688, "step": 17140 }, { "epoch": 1.58, "learning_rate": 4.211639238760688e-05, "loss": 0.9838, "step": 17150 }, { "epoch": 1.58, "learning_rate": 4.2111795531856214e-05, "loss": 0.8754, "step": 17160 }, { "epoch": 1.58, "learning_rate": 4.2107198676105544e-05, "loss": 0.8662, "step": 17170 }, { "epoch": 1.58, "learning_rate": 4.210260182035488e-05, "loss": 0.9795, "step": 17180 }, { "epoch": 1.58, "learning_rate": 4.209800496460422e-05, "loss": 0.9245, "step": 17190 }, { "epoch": 1.58, "learning_rate": 4.209340810885355e-05, "loss": 0.876, "step": 17200 }, { "epoch": 1.58, "learning_rate": 4.2088811253102877e-05, "loss": 0.8579, "step": 17210 }, { "epoch": 1.58, "learning_rate": 4.208421439735221e-05, "loss": 0.9625, "step": 17220 }, { "epoch": 1.58, "learning_rate": 4.207961754160154e-05, "loss": 0.9723, "step": 17230 }, { "epoch": 1.58, "learning_rate": 4.207502068585088e-05, "loss": 0.8758, "step": 17240 }, { "epoch": 1.59, "learning_rate": 4.2070423830100216e-05, "loss": 0.9691, "step": 17250 }, { "epoch": 1.59, "learning_rate": 4.2065826974349545e-05, "loss": 0.9131, "step": 17260 }, { "epoch": 1.59, "learning_rate": 4.206123011859888e-05, "loss": 0.8931, "step": 17270 }, { "epoch": 1.59, "learning_rate": 4.205663326284822e-05, "loss": 0.8679, "step": 17280 }, { "epoch": 1.59, "learning_rate": 4.205203640709755e-05, "loss": 0.8929, "step": 17290 }, { "epoch": 1.59, "learning_rate": 4.204743955134688e-05, "loss": 0.8302, "step": 17300 }, { "epoch": 1.59, "learning_rate": 4.2042842695596214e-05, "loss": 0.9526, "step": 17310 }, { "epoch": 1.59, "learning_rate": 4.2038245839845544e-05, "loss": 1.0688, "step": 17320 }, { "epoch": 1.59, "learning_rate": 4.203364898409488e-05, "loss": 0.9432, "step": 17330 }, { "epoch": 1.59, "learning_rate": 4.202905212834422e-05, "loss": 0.928, "step": 17340 }, { "epoch": 1.6, "learning_rate": 4.2024455272593547e-05, "loss": 0.954, "step": 17350 }, { "epoch": 1.6, "learning_rate": 4.201985841684288e-05, "loss": 0.8691, "step": 17360 }, { "epoch": 1.6, "learning_rate": 4.201526156109222e-05, "loss": 0.8669, "step": 17370 }, { "epoch": 1.6, "learning_rate": 4.201066470534155e-05, "loss": 0.9199, "step": 17380 }, { "epoch": 1.6, "learning_rate": 4.200606784959088e-05, "loss": 0.7568, "step": 17390 }, { "epoch": 1.6, "learning_rate": 4.2001470993840215e-05, "loss": 0.8344, "step": 17400 }, { "epoch": 1.6, "learning_rate": 4.1996874138089545e-05, "loss": 0.6882, "step": 17410 }, { "epoch": 1.6, "learning_rate": 4.199227728233888e-05, "loss": 0.8338, "step": 17420 }, { "epoch": 1.6, "learning_rate": 4.198768042658822e-05, "loss": 0.9393, "step": 17430 }, { "epoch": 1.6, "learning_rate": 4.198308357083755e-05, "loss": 0.9644, "step": 17440 }, { "epoch": 1.6, "learning_rate": 4.1978486715086884e-05, "loss": 0.8726, "step": 17450 }, { "epoch": 1.61, "learning_rate": 4.197388985933622e-05, "loss": 0.8877, "step": 17460 }, { "epoch": 1.61, "learning_rate": 4.1969293003585544e-05, "loss": 0.9628, "step": 17470 }, { "epoch": 1.61, "learning_rate": 4.196469614783488e-05, "loss": 0.9693, "step": 17480 }, { "epoch": 1.61, "learning_rate": 4.196009929208422e-05, "loss": 0.9652, "step": 17490 }, { "epoch": 1.61, "learning_rate": 4.1955502436333546e-05, "loss": 0.8493, "step": 17500 }, { "epoch": 1.61, "learning_rate": 4.195090558058288e-05, "loss": 0.8823, "step": 17510 }, { "epoch": 1.61, "learning_rate": 4.194630872483222e-05, "loss": 0.8977, "step": 17520 }, { "epoch": 1.61, "learning_rate": 4.194171186908155e-05, "loss": 0.7897, "step": 17530 }, { "epoch": 1.61, "learning_rate": 4.1937115013330885e-05, "loss": 0.8328, "step": 17540 }, { "epoch": 1.61, "learning_rate": 4.193251815758022e-05, "loss": 0.8911, "step": 17550 }, { "epoch": 1.61, "learning_rate": 4.1927921301829545e-05, "loss": 0.9267, "step": 17560 }, { "epoch": 1.62, "learning_rate": 4.192332444607888e-05, "loss": 0.9596, "step": 17570 }, { "epoch": 1.62, "learning_rate": 4.191872759032822e-05, "loss": 1.0027, "step": 17580 }, { "epoch": 1.62, "learning_rate": 4.191413073457755e-05, "loss": 0.8186, "step": 17590 }, { "epoch": 1.62, "learning_rate": 4.1909533878826884e-05, "loss": 0.7928, "step": 17600 }, { "epoch": 1.62, "learning_rate": 4.190493702307622e-05, "loss": 0.7637, "step": 17610 }, { "epoch": 1.62, "learning_rate": 4.190034016732555e-05, "loss": 0.9459, "step": 17620 }, { "epoch": 1.62, "learning_rate": 4.189574331157489e-05, "loss": 0.9556, "step": 17630 }, { "epoch": 1.62, "learning_rate": 4.189114645582422e-05, "loss": 0.9608, "step": 17640 }, { "epoch": 1.62, "learning_rate": 4.1886549600073546e-05, "loss": 0.9274, "step": 17650 }, { "epoch": 1.62, "learning_rate": 4.188195274432288e-05, "loss": 1.0087, "step": 17660 }, { "epoch": 1.62, "learning_rate": 4.187735588857222e-05, "loss": 0.8451, "step": 17670 }, { "epoch": 1.63, "learning_rate": 4.187275903282155e-05, "loss": 1.0028, "step": 17680 }, { "epoch": 1.63, "learning_rate": 4.1868162177070885e-05, "loss": 0.8954, "step": 17690 }, { "epoch": 1.63, "learning_rate": 4.186356532132022e-05, "loss": 0.9926, "step": 17700 }, { "epoch": 1.63, "learning_rate": 4.185896846556955e-05, "loss": 0.8621, "step": 17710 }, { "epoch": 1.63, "learning_rate": 4.185437160981889e-05, "loss": 0.822, "step": 17720 }, { "epoch": 1.63, "learning_rate": 4.1849774754068224e-05, "loss": 0.8925, "step": 17730 }, { "epoch": 1.63, "learning_rate": 4.184517789831755e-05, "loss": 0.9776, "step": 17740 }, { "epoch": 1.63, "learning_rate": 4.1840581042566884e-05, "loss": 0.9626, "step": 17750 }, { "epoch": 1.63, "learning_rate": 4.183598418681622e-05, "loss": 0.8438, "step": 17760 }, { "epoch": 1.63, "learning_rate": 4.183138733106555e-05, "loss": 0.9135, "step": 17770 }, { "epoch": 1.63, "learning_rate": 4.1826790475314886e-05, "loss": 0.8661, "step": 17780 }, { "epoch": 1.64, "learning_rate": 4.182219361956422e-05, "loss": 0.8731, "step": 17790 }, { "epoch": 1.64, "learning_rate": 4.181759676381355e-05, "loss": 0.8615, "step": 17800 }, { "epoch": 1.64, "learning_rate": 4.181299990806289e-05, "loss": 0.8019, "step": 17810 }, { "epoch": 1.64, "learning_rate": 4.1808403052312226e-05, "loss": 0.9371, "step": 17820 }, { "epoch": 1.64, "learning_rate": 4.180380619656155e-05, "loss": 0.8912, "step": 17830 }, { "epoch": 1.64, "learning_rate": 4.1799209340810885e-05, "loss": 0.9482, "step": 17840 }, { "epoch": 1.64, "learning_rate": 4.179461248506022e-05, "loss": 0.9816, "step": 17850 }, { "epoch": 1.64, "learning_rate": 4.179001562930955e-05, "loss": 1.0005, "step": 17860 }, { "epoch": 1.64, "learning_rate": 4.178541877355889e-05, "loss": 0.8723, "step": 17870 }, { "epoch": 1.64, "learning_rate": 4.1780821917808224e-05, "loss": 0.8992, "step": 17880 }, { "epoch": 1.64, "learning_rate": 4.1776225062057554e-05, "loss": 0.9304, "step": 17890 }, { "epoch": 1.65, "learning_rate": 4.177162820630689e-05, "loss": 0.926, "step": 17900 }, { "epoch": 1.65, "learning_rate": 4.176703135055623e-05, "loss": 0.7975, "step": 17910 }, { "epoch": 1.65, "learning_rate": 4.176243449480555e-05, "loss": 0.8328, "step": 17920 }, { "epoch": 1.65, "learning_rate": 4.1757837639054886e-05, "loss": 1.0241, "step": 17930 }, { "epoch": 1.65, "learning_rate": 4.175324078330422e-05, "loss": 0.8924, "step": 17940 }, { "epoch": 1.65, "learning_rate": 4.174864392755355e-05, "loss": 0.997, "step": 17950 }, { "epoch": 1.65, "learning_rate": 4.174404707180289e-05, "loss": 0.8743, "step": 17960 }, { "epoch": 1.65, "learning_rate": 4.1739450216052225e-05, "loss": 0.814, "step": 17970 }, { "epoch": 1.65, "learning_rate": 4.1734853360301555e-05, "loss": 0.8387, "step": 17980 }, { "epoch": 1.65, "learning_rate": 4.173025650455089e-05, "loss": 0.8864, "step": 17990 }, { "epoch": 1.65, "learning_rate": 4.172565964880023e-05, "loss": 0.8429, "step": 18000 }, { "epoch": 1.66, "learning_rate": 4.172106279304955e-05, "loss": 0.8551, "step": 18010 }, { "epoch": 1.66, "learning_rate": 4.171646593729889e-05, "loss": 0.9071, "step": 18020 }, { "epoch": 1.66, "learning_rate": 4.1711869081548224e-05, "loss": 0.8447, "step": 18030 }, { "epoch": 1.66, "learning_rate": 4.1707272225797553e-05, "loss": 1.0384, "step": 18040 }, { "epoch": 1.66, "learning_rate": 4.170267537004689e-05, "loss": 1.0297, "step": 18050 }, { "epoch": 1.66, "learning_rate": 4.1698078514296226e-05, "loss": 0.9204, "step": 18060 }, { "epoch": 1.66, "learning_rate": 4.1693481658545556e-05, "loss": 0.8407, "step": 18070 }, { "epoch": 1.66, "learning_rate": 4.168888480279489e-05, "loss": 1.0036, "step": 18080 }, { "epoch": 1.66, "learning_rate": 4.168428794704423e-05, "loss": 0.9909, "step": 18090 }, { "epoch": 1.66, "learning_rate": 4.167969109129355e-05, "loss": 0.8865, "step": 18100 }, { "epoch": 1.66, "learning_rate": 4.167509423554289e-05, "loss": 0.915, "step": 18110 }, { "epoch": 1.67, "learning_rate": 4.1670497379792225e-05, "loss": 0.9087, "step": 18120 }, { "epoch": 1.67, "learning_rate": 4.1665900524041555e-05, "loss": 0.9202, "step": 18130 }, { "epoch": 1.67, "learning_rate": 4.166130366829089e-05, "loss": 0.9248, "step": 18140 }, { "epoch": 1.67, "learning_rate": 4.165670681254023e-05, "loss": 0.8786, "step": 18150 }, { "epoch": 1.67, "learning_rate": 4.165210995678956e-05, "loss": 0.9091, "step": 18160 }, { "epoch": 1.67, "learning_rate": 4.1647513101038894e-05, "loss": 0.9167, "step": 18170 }, { "epoch": 1.67, "learning_rate": 4.164291624528823e-05, "loss": 0.9434, "step": 18180 }, { "epoch": 1.67, "learning_rate": 4.163831938953755e-05, "loss": 0.8511, "step": 18190 }, { "epoch": 1.67, "learning_rate": 4.163372253378689e-05, "loss": 0.8831, "step": 18200 }, { "epoch": 1.67, "learning_rate": 4.1629125678036226e-05, "loss": 0.9833, "step": 18210 }, { "epoch": 1.68, "learning_rate": 4.1624528822285556e-05, "loss": 0.9534, "step": 18220 }, { "epoch": 1.68, "learning_rate": 4.161993196653489e-05, "loss": 0.8748, "step": 18230 }, { "epoch": 1.68, "learning_rate": 4.161533511078423e-05, "loss": 1.0637, "step": 18240 }, { "epoch": 1.68, "learning_rate": 4.161073825503356e-05, "loss": 0.9373, "step": 18250 }, { "epoch": 1.68, "learning_rate": 4.1606141399282895e-05, "loss": 0.9656, "step": 18260 }, { "epoch": 1.68, "learning_rate": 4.160154454353223e-05, "loss": 0.8556, "step": 18270 }, { "epoch": 1.68, "learning_rate": 4.1596947687781554e-05, "loss": 0.999, "step": 18280 }, { "epoch": 1.68, "learning_rate": 4.159235083203089e-05, "loss": 0.952, "step": 18290 }, { "epoch": 1.68, "learning_rate": 4.158775397628023e-05, "loss": 0.9284, "step": 18300 }, { "epoch": 1.68, "learning_rate": 4.158315712052956e-05, "loss": 0.8475, "step": 18310 }, { "epoch": 1.68, "learning_rate": 4.1578560264778894e-05, "loss": 0.9138, "step": 18320 }, { "epoch": 1.69, "learning_rate": 4.157396340902823e-05, "loss": 0.841, "step": 18330 }, { "epoch": 1.69, "learning_rate": 4.156936655327756e-05, "loss": 0.8829, "step": 18340 }, { "epoch": 1.69, "learning_rate": 4.1564769697526896e-05, "loss": 0.8014, "step": 18350 }, { "epoch": 1.69, "learning_rate": 4.1560172841776226e-05, "loss": 0.9932, "step": 18360 }, { "epoch": 1.69, "learning_rate": 4.1555575986025556e-05, "loss": 0.8739, "step": 18370 }, { "epoch": 1.69, "learning_rate": 4.155097913027489e-05, "loss": 0.8471, "step": 18380 }, { "epoch": 1.69, "learning_rate": 4.154638227452423e-05, "loss": 0.9335, "step": 18390 }, { "epoch": 1.69, "learning_rate": 4.154178541877356e-05, "loss": 0.8346, "step": 18400 }, { "epoch": 1.69, "learning_rate": 4.1537188563022895e-05, "loss": 0.9154, "step": 18410 }, { "epoch": 1.69, "learning_rate": 4.153259170727223e-05, "loss": 0.9265, "step": 18420 }, { "epoch": 1.69, "learning_rate": 4.152799485152156e-05, "loss": 0.9032, "step": 18430 }, { "epoch": 1.7, "learning_rate": 4.15233979957709e-05, "loss": 0.8824, "step": 18440 }, { "epoch": 1.7, "learning_rate": 4.151880114002023e-05, "loss": 0.9684, "step": 18450 }, { "epoch": 1.7, "learning_rate": 4.151420428426956e-05, "loss": 0.8762, "step": 18460 }, { "epoch": 1.7, "learning_rate": 4.150960742851889e-05, "loss": 0.9358, "step": 18470 }, { "epoch": 1.7, "learning_rate": 4.150501057276823e-05, "loss": 0.8929, "step": 18480 }, { "epoch": 1.7, "learning_rate": 4.150041371701756e-05, "loss": 0.9735, "step": 18490 }, { "epoch": 1.7, "learning_rate": 4.1495816861266896e-05, "loss": 0.8822, "step": 18500 }, { "epoch": 1.7, "learning_rate": 4.149122000551623e-05, "loss": 0.9456, "step": 18510 }, { "epoch": 1.7, "learning_rate": 4.148662314976556e-05, "loss": 0.8862, "step": 18520 }, { "epoch": 1.7, "learning_rate": 4.14820262940149e-05, "loss": 0.9781, "step": 18530 }, { "epoch": 1.7, "learning_rate": 4.147742943826423e-05, "loss": 0.8752, "step": 18540 }, { "epoch": 1.71, "learning_rate": 4.147283258251356e-05, "loss": 0.9195, "step": 18550 }, { "epoch": 1.71, "learning_rate": 4.1468235726762894e-05, "loss": 0.9672, "step": 18560 }, { "epoch": 1.71, "learning_rate": 4.146363887101223e-05, "loss": 0.9318, "step": 18570 }, { "epoch": 1.71, "learning_rate": 4.145904201526156e-05, "loss": 0.9066, "step": 18580 }, { "epoch": 1.71, "learning_rate": 4.14544451595109e-05, "loss": 0.9142, "step": 18590 }, { "epoch": 1.71, "learning_rate": 4.1449848303760234e-05, "loss": 0.9599, "step": 18600 }, { "epoch": 1.71, "learning_rate": 4.144525144800956e-05, "loss": 0.9115, "step": 18610 }, { "epoch": 1.71, "learning_rate": 4.14406545922589e-05, "loss": 0.8883, "step": 18620 }, { "epoch": 1.71, "learning_rate": 4.143605773650823e-05, "loss": 0.8734, "step": 18630 }, { "epoch": 1.71, "learning_rate": 4.143146088075756e-05, "loss": 0.8817, "step": 18640 }, { "epoch": 1.71, "learning_rate": 4.1426864025006896e-05, "loss": 0.8972, "step": 18650 }, { "epoch": 1.72, "learning_rate": 4.142226716925623e-05, "loss": 0.9625, "step": 18660 }, { "epoch": 1.72, "learning_rate": 4.141767031350556e-05, "loss": 0.9883, "step": 18670 }, { "epoch": 1.72, "learning_rate": 4.14130734577549e-05, "loss": 0.9027, "step": 18680 }, { "epoch": 1.72, "learning_rate": 4.140847660200423e-05, "loss": 0.935, "step": 18690 }, { "epoch": 1.72, "learning_rate": 4.1403879746253565e-05, "loss": 0.8578, "step": 18700 }, { "epoch": 1.72, "learning_rate": 4.13992828905029e-05, "loss": 0.8715, "step": 18710 }, { "epoch": 1.72, "learning_rate": 4.139468603475223e-05, "loss": 0.9244, "step": 18720 }, { "epoch": 1.72, "learning_rate": 4.139008917900157e-05, "loss": 0.8628, "step": 18730 }, { "epoch": 1.72, "learning_rate": 4.13854923232509e-05, "loss": 0.943, "step": 18740 }, { "epoch": 1.72, "learning_rate": 4.138089546750023e-05, "loss": 0.8576, "step": 18750 }, { "epoch": 1.72, "learning_rate": 4.137629861174956e-05, "loss": 0.8593, "step": 18760 }, { "epoch": 1.73, "learning_rate": 4.13717017559989e-05, "loss": 0.9834, "step": 18770 }, { "epoch": 1.73, "learning_rate": 4.136710490024823e-05, "loss": 0.8679, "step": 18780 }, { "epoch": 1.73, "learning_rate": 4.1362508044497566e-05, "loss": 0.8944, "step": 18790 }, { "epoch": 1.73, "learning_rate": 4.13579111887469e-05, "loss": 0.9049, "step": 18800 }, { "epoch": 1.73, "learning_rate": 4.135331433299623e-05, "loss": 1.0067, "step": 18810 }, { "epoch": 1.73, "learning_rate": 4.134871747724557e-05, "loss": 0.8506, "step": 18820 }, { "epoch": 1.73, "learning_rate": 4.13441206214949e-05, "loss": 0.8525, "step": 18830 }, { "epoch": 1.73, "learning_rate": 4.1339523765744235e-05, "loss": 1.0159, "step": 18840 }, { "epoch": 1.73, "learning_rate": 4.1334926909993564e-05, "loss": 0.9297, "step": 18850 }, { "epoch": 1.73, "learning_rate": 4.13303300542429e-05, "loss": 0.812, "step": 18860 }, { "epoch": 1.73, "learning_rate": 4.132573319849223e-05, "loss": 0.9888, "step": 18870 }, { "epoch": 1.74, "learning_rate": 4.132113634274157e-05, "loss": 0.7637, "step": 18880 }, { "epoch": 1.74, "learning_rate": 4.13165394869909e-05, "loss": 1.0444, "step": 18890 }, { "epoch": 1.74, "learning_rate": 4.131194263124023e-05, "loss": 0.8955, "step": 18900 }, { "epoch": 1.74, "learning_rate": 4.130734577548957e-05, "loss": 0.8954, "step": 18910 }, { "epoch": 1.74, "learning_rate": 4.13027489197389e-05, "loss": 0.8728, "step": 18920 }, { "epoch": 1.74, "learning_rate": 4.1298152063988236e-05, "loss": 0.897, "step": 18930 }, { "epoch": 1.74, "learning_rate": 4.1293555208237565e-05, "loss": 0.8964, "step": 18940 }, { "epoch": 1.74, "learning_rate": 4.12889583524869e-05, "loss": 0.9353, "step": 18950 }, { "epoch": 1.74, "learning_rate": 4.128436149673623e-05, "loss": 0.9095, "step": 18960 }, { "epoch": 1.74, "learning_rate": 4.127976464098557e-05, "loss": 0.9323, "step": 18970 }, { "epoch": 1.74, "learning_rate": 4.1275167785234905e-05, "loss": 0.898, "step": 18980 }, { "epoch": 1.75, "learning_rate": 4.1270570929484234e-05, "loss": 0.8464, "step": 18990 }, { "epoch": 1.75, "learning_rate": 4.126597407373357e-05, "loss": 0.9086, "step": 19000 }, { "epoch": 1.75, "learning_rate": 4.12613772179829e-05, "loss": 0.9422, "step": 19010 }, { "epoch": 1.75, "learning_rate": 4.125678036223223e-05, "loss": 0.9822, "step": 19020 }, { "epoch": 1.75, "learning_rate": 4.125218350648157e-05, "loss": 0.7724, "step": 19030 }, { "epoch": 1.75, "learning_rate": 4.12475866507309e-05, "loss": 0.9371, "step": 19040 }, { "epoch": 1.75, "learning_rate": 4.124298979498023e-05, "loss": 0.9053, "step": 19050 }, { "epoch": 1.75, "learning_rate": 4.123839293922957e-05, "loss": 0.9016, "step": 19060 }, { "epoch": 1.75, "learning_rate": 4.1233796083478906e-05, "loss": 0.9313, "step": 19070 }, { "epoch": 1.75, "learning_rate": 4.1229199227728235e-05, "loss": 0.917, "step": 19080 }, { "epoch": 1.76, "learning_rate": 4.122460237197757e-05, "loss": 1.0667, "step": 19090 }, { "epoch": 1.76, "learning_rate": 4.12200055162269e-05, "loss": 0.8639, "step": 19100 }, { "epoch": 1.76, "learning_rate": 4.121540866047623e-05, "loss": 1.016, "step": 19110 }, { "epoch": 1.76, "learning_rate": 4.121081180472557e-05, "loss": 0.9181, "step": 19120 }, { "epoch": 1.76, "learning_rate": 4.1206214948974904e-05, "loss": 0.9929, "step": 19130 }, { "epoch": 1.76, "learning_rate": 4.1201618093224234e-05, "loss": 0.8994, "step": 19140 }, { "epoch": 1.76, "learning_rate": 4.119702123747357e-05, "loss": 0.7857, "step": 19150 }, { "epoch": 1.76, "learning_rate": 4.119242438172291e-05, "loss": 0.9072, "step": 19160 }, { "epoch": 1.76, "learning_rate": 4.118782752597224e-05, "loss": 0.903, "step": 19170 }, { "epoch": 1.76, "learning_rate": 4.118323067022157e-05, "loss": 0.9243, "step": 19180 }, { "epoch": 1.76, "learning_rate": 4.11786338144709e-05, "loss": 0.9036, "step": 19190 }, { "epoch": 1.77, "learning_rate": 4.117403695872023e-05, "loss": 0.9438, "step": 19200 }, { "epoch": 1.77, "learning_rate": 4.116944010296957e-05, "loss": 0.839, "step": 19210 }, { "epoch": 1.77, "learning_rate": 4.1164843247218906e-05, "loss": 0.8282, "step": 19220 }, { "epoch": 1.77, "learning_rate": 4.1160246391468235e-05, "loss": 0.9501, "step": 19230 }, { "epoch": 1.77, "learning_rate": 4.115564953571757e-05, "loss": 0.8517, "step": 19240 }, { "epoch": 1.77, "learning_rate": 4.115105267996691e-05, "loss": 0.9707, "step": 19250 }, { "epoch": 1.77, "learning_rate": 4.114645582421624e-05, "loss": 0.8103, "step": 19260 }, { "epoch": 1.77, "learning_rate": 4.1141858968465574e-05, "loss": 0.8902, "step": 19270 }, { "epoch": 1.77, "learning_rate": 4.1137262112714904e-05, "loss": 0.9665, "step": 19280 }, { "epoch": 1.77, "learning_rate": 4.1132665256964234e-05, "loss": 0.925, "step": 19290 }, { "epoch": 1.77, "learning_rate": 4.112806840121357e-05, "loss": 0.8789, "step": 19300 }, { "epoch": 1.78, "learning_rate": 4.112347154546291e-05, "loss": 0.9255, "step": 19310 }, { "epoch": 1.78, "learning_rate": 4.1118874689712236e-05, "loss": 0.806, "step": 19320 }, { "epoch": 1.78, "learning_rate": 4.111427783396157e-05, "loss": 0.8685, "step": 19330 }, { "epoch": 1.78, "learning_rate": 4.110968097821091e-05, "loss": 0.9507, "step": 19340 }, { "epoch": 1.78, "learning_rate": 4.110508412246024e-05, "loss": 0.9108, "step": 19350 }, { "epoch": 1.78, "learning_rate": 4.1100487266709576e-05, "loss": 0.8114, "step": 19360 }, { "epoch": 1.78, "learning_rate": 4.1095890410958905e-05, "loss": 1.0522, "step": 19370 }, { "epoch": 1.78, "learning_rate": 4.1091293555208235e-05, "loss": 0.9294, "step": 19380 }, { "epoch": 1.78, "learning_rate": 4.108669669945757e-05, "loss": 0.84, "step": 19390 }, { "epoch": 1.78, "learning_rate": 4.108209984370691e-05, "loss": 0.8939, "step": 19400 }, { "epoch": 1.78, "learning_rate": 4.107750298795624e-05, "loss": 0.8818, "step": 19410 }, { "epoch": 1.79, "learning_rate": 4.1072906132205574e-05, "loss": 0.9076, "step": 19420 }, { "epoch": 1.79, "learning_rate": 4.106830927645491e-05, "loss": 0.976, "step": 19430 }, { "epoch": 1.79, "learning_rate": 4.106371242070424e-05, "loss": 0.9353, "step": 19440 }, { "epoch": 1.79, "learning_rate": 4.105911556495358e-05, "loss": 0.8911, "step": 19450 }, { "epoch": 1.79, "learning_rate": 4.1054518709202906e-05, "loss": 0.8533, "step": 19460 }, { "epoch": 1.79, "learning_rate": 4.1049921853452236e-05, "loss": 0.9433, "step": 19470 }, { "epoch": 1.79, "learning_rate": 4.104532499770157e-05, "loss": 0.9745, "step": 19480 }, { "epoch": 1.79, "learning_rate": 4.104072814195091e-05, "loss": 0.8671, "step": 19490 }, { "epoch": 1.79, "learning_rate": 4.103613128620024e-05, "loss": 0.9078, "step": 19500 }, { "epoch": 1.79, "learning_rate": 4.1031534430449575e-05, "loss": 0.8682, "step": 19510 }, { "epoch": 1.79, "learning_rate": 4.102693757469891e-05, "loss": 1.0162, "step": 19520 }, { "epoch": 1.8, "learning_rate": 4.102234071894824e-05, "loss": 0.8288, "step": 19530 }, { "epoch": 1.8, "learning_rate": 4.101774386319758e-05, "loss": 0.8193, "step": 19540 }, { "epoch": 1.8, "learning_rate": 4.101314700744691e-05, "loss": 0.7833, "step": 19550 }, { "epoch": 1.8, "learning_rate": 4.100855015169624e-05, "loss": 0.8653, "step": 19560 }, { "epoch": 1.8, "learning_rate": 4.1003953295945574e-05, "loss": 0.8341, "step": 19570 }, { "epoch": 1.8, "learning_rate": 4.099935644019491e-05, "loss": 0.8961, "step": 19580 }, { "epoch": 1.8, "learning_rate": 4.099475958444424e-05, "loss": 0.8924, "step": 19590 }, { "epoch": 1.8, "learning_rate": 4.0990162728693576e-05, "loss": 0.9692, "step": 19600 }, { "epoch": 1.8, "learning_rate": 4.098556587294291e-05, "loss": 0.8752, "step": 19610 }, { "epoch": 1.8, "learning_rate": 4.098096901719224e-05, "loss": 0.9402, "step": 19620 }, { "epoch": 1.8, "learning_rate": 4.097637216144158e-05, "loss": 1.0259, "step": 19630 }, { "epoch": 1.81, "learning_rate": 4.097177530569091e-05, "loss": 0.8459, "step": 19640 }, { "epoch": 1.81, "learning_rate": 4.096717844994024e-05, "loss": 0.9409, "step": 19650 }, { "epoch": 1.81, "learning_rate": 4.0962581594189575e-05, "loss": 0.9955, "step": 19660 }, { "epoch": 1.81, "learning_rate": 4.095798473843891e-05, "loss": 0.8967, "step": 19670 }, { "epoch": 1.81, "learning_rate": 4.095338788268824e-05, "loss": 0.9495, "step": 19680 }, { "epoch": 1.81, "learning_rate": 4.094879102693758e-05, "loss": 0.8795, "step": 19690 }, { "epoch": 1.81, "learning_rate": 4.0944194171186914e-05, "loss": 0.9885, "step": 19700 }, { "epoch": 1.81, "learning_rate": 4.0939597315436244e-05, "loss": 0.8808, "step": 19710 }, { "epoch": 1.81, "learning_rate": 4.093500045968558e-05, "loss": 0.9911, "step": 19720 }, { "epoch": 1.81, "learning_rate": 4.093040360393491e-05, "loss": 0.9332, "step": 19730 }, { "epoch": 1.81, "learning_rate": 4.092580674818424e-05, "loss": 0.8918, "step": 19740 }, { "epoch": 1.82, "learning_rate": 4.0921209892433576e-05, "loss": 1.0028, "step": 19750 }, { "epoch": 1.82, "learning_rate": 4.091661303668291e-05, "loss": 0.9068, "step": 19760 }, { "epoch": 1.82, "learning_rate": 4.091201618093224e-05, "loss": 0.9665, "step": 19770 }, { "epoch": 1.82, "learning_rate": 4.090741932518158e-05, "loss": 0.9099, "step": 19780 }, { "epoch": 1.82, "learning_rate": 4.0902822469430915e-05, "loss": 1.0169, "step": 19790 }, { "epoch": 1.82, "learning_rate": 4.0898225613680245e-05, "loss": 0.8978, "step": 19800 }, { "epoch": 1.82, "learning_rate": 4.089362875792958e-05, "loss": 0.8199, "step": 19810 }, { "epoch": 1.82, "learning_rate": 4.088903190217891e-05, "loss": 0.8222, "step": 19820 }, { "epoch": 1.82, "learning_rate": 4.088443504642824e-05, "loss": 0.7734, "step": 19830 }, { "epoch": 1.82, "learning_rate": 4.087983819067758e-05, "loss": 0.8924, "step": 19840 }, { "epoch": 1.82, "learning_rate": 4.0875241334926914e-05, "loss": 0.9393, "step": 19850 }, { "epoch": 1.83, "learning_rate": 4.0870644479176244e-05, "loss": 0.9308, "step": 19860 }, { "epoch": 1.83, "learning_rate": 4.086604762342558e-05, "loss": 0.8848, "step": 19870 }, { "epoch": 1.83, "learning_rate": 4.0861450767674917e-05, "loss": 0.9171, "step": 19880 }, { "epoch": 1.83, "learning_rate": 4.0856853911924246e-05, "loss": 0.838, "step": 19890 }, { "epoch": 1.83, "learning_rate": 4.085225705617358e-05, "loss": 0.907, "step": 19900 }, { "epoch": 1.83, "learning_rate": 4.084766020042291e-05, "loss": 0.8847, "step": 19910 }, { "epoch": 1.83, "learning_rate": 4.084306334467224e-05, "loss": 0.9986, "step": 19920 }, { "epoch": 1.83, "learning_rate": 4.083846648892158e-05, "loss": 0.8814, "step": 19930 }, { "epoch": 1.83, "learning_rate": 4.0833869633170915e-05, "loss": 0.7981, "step": 19940 }, { "epoch": 1.83, "learning_rate": 4.0829272777420245e-05, "loss": 0.9419, "step": 19950 }, { "epoch": 1.84, "learning_rate": 4.082467592166958e-05, "loss": 0.8213, "step": 19960 }, { "epoch": 1.84, "learning_rate": 4.082007906591892e-05, "loss": 0.8623, "step": 19970 }, { "epoch": 1.84, "learning_rate": 4.081548221016825e-05, "loss": 0.9108, "step": 19980 }, { "epoch": 1.84, "learning_rate": 4.0810885354417584e-05, "loss": 0.9245, "step": 19990 }, { "epoch": 1.84, "learning_rate": 4.0806288498666914e-05, "loss": 0.8881, "step": 20000 }, { "epoch": 1.84, "eval_accuracy": 0.5718340611353712, "eval_loss": 0.8948913812637329, "eval_runtime": 159.4029, "eval_samples_per_second": 28.732, "eval_steps_per_second": 3.595, "step": 20000 }, { "epoch": 1.84, "learning_rate": 4.080169164291624e-05, "loss": 0.8465, "step": 20010 }, { "epoch": 1.84, "learning_rate": 4.079709478716558e-05, "loss": 0.9986, "step": 20020 }, { "epoch": 1.84, "learning_rate": 4.0792497931414916e-05, "loss": 0.8382, "step": 20030 }, { "epoch": 1.84, "learning_rate": 4.0787901075664246e-05, "loss": 0.9059, "step": 20040 }, { "epoch": 1.84, "learning_rate": 4.078330421991358e-05, "loss": 0.9915, "step": 20050 }, { "epoch": 1.84, "learning_rate": 4.077870736416292e-05, "loss": 0.8541, "step": 20060 }, { "epoch": 1.85, "learning_rate": 4.077411050841225e-05, "loss": 0.898, "step": 20070 }, { "epoch": 1.85, "learning_rate": 4.0769513652661585e-05, "loss": 0.9078, "step": 20080 }, { "epoch": 1.85, "learning_rate": 4.0764916796910915e-05, "loss": 0.8659, "step": 20090 }, { "epoch": 1.85, "learning_rate": 4.0760319941160245e-05, "loss": 0.8148, "step": 20100 }, { "epoch": 1.85, "learning_rate": 4.075572308540958e-05, "loss": 1.0013, "step": 20110 }, { "epoch": 1.85, "learning_rate": 4.075112622965892e-05, "loss": 0.8939, "step": 20120 }, { "epoch": 1.85, "learning_rate": 4.074652937390825e-05, "loss": 0.9523, "step": 20130 }, { "epoch": 1.85, "learning_rate": 4.0741932518157584e-05, "loss": 0.932, "step": 20140 }, { "epoch": 1.85, "learning_rate": 4.073733566240692e-05, "loss": 0.8874, "step": 20150 }, { "epoch": 1.85, "learning_rate": 4.073273880665625e-05, "loss": 1.0361, "step": 20160 }, { "epoch": 1.85, "learning_rate": 4.0728141950905586e-05, "loss": 0.9891, "step": 20170 }, { "epoch": 1.86, "learning_rate": 4.0723545095154916e-05, "loss": 0.8961, "step": 20180 }, { "epoch": 1.86, "learning_rate": 4.0718948239404246e-05, "loss": 0.866, "step": 20190 }, { "epoch": 1.86, "learning_rate": 4.071435138365358e-05, "loss": 0.8629, "step": 20200 }, { "epoch": 1.86, "learning_rate": 4.070975452790292e-05, "loss": 0.9099, "step": 20210 }, { "epoch": 1.86, "learning_rate": 4.070515767215225e-05, "loss": 0.8307, "step": 20220 }, { "epoch": 1.86, "learning_rate": 4.0700560816401585e-05, "loss": 0.8605, "step": 20230 }, { "epoch": 1.86, "learning_rate": 4.069596396065092e-05, "loss": 0.7921, "step": 20240 }, { "epoch": 1.86, "learning_rate": 4.069136710490025e-05, "loss": 0.9, "step": 20250 }, { "epoch": 1.86, "learning_rate": 4.068677024914959e-05, "loss": 0.9485, "step": 20260 }, { "epoch": 1.86, "learning_rate": 4.068217339339892e-05, "loss": 0.9716, "step": 20270 }, { "epoch": 1.86, "learning_rate": 4.067757653764825e-05, "loss": 0.8101, "step": 20280 }, { "epoch": 1.87, "learning_rate": 4.067297968189758e-05, "loss": 0.9246, "step": 20290 }, { "epoch": 1.87, "learning_rate": 4.066838282614692e-05, "loss": 0.8999, "step": 20300 }, { "epoch": 1.87, "learning_rate": 4.066378597039625e-05, "loss": 0.9992, "step": 20310 }, { "epoch": 1.87, "learning_rate": 4.0659189114645586e-05, "loss": 0.8228, "step": 20320 }, { "epoch": 1.87, "learning_rate": 4.0654592258894916e-05, "loss": 0.929, "step": 20330 }, { "epoch": 1.87, "learning_rate": 4.064999540314425e-05, "loss": 0.9695, "step": 20340 }, { "epoch": 1.87, "learning_rate": 4.064539854739359e-05, "loss": 0.8766, "step": 20350 }, { "epoch": 1.87, "learning_rate": 4.064080169164292e-05, "loss": 0.8782, "step": 20360 }, { "epoch": 1.87, "learning_rate": 4.063620483589225e-05, "loss": 0.8494, "step": 20370 }, { "epoch": 1.87, "learning_rate": 4.0631607980141585e-05, "loss": 0.8463, "step": 20380 }, { "epoch": 1.87, "learning_rate": 4.062701112439092e-05, "loss": 0.9161, "step": 20390 }, { "epoch": 1.88, "learning_rate": 4.062241426864025e-05, "loss": 0.8858, "step": 20400 }, { "epoch": 1.88, "learning_rate": 4.061781741288959e-05, "loss": 1.0159, "step": 20410 }, { "epoch": 1.88, "learning_rate": 4.061322055713892e-05, "loss": 0.8569, "step": 20420 }, { "epoch": 1.88, "learning_rate": 4.0608623701388253e-05, "loss": 0.9, "step": 20430 }, { "epoch": 1.88, "learning_rate": 4.060402684563759e-05, "loss": 0.8478, "step": 20440 }, { "epoch": 1.88, "learning_rate": 4.059942998988692e-05, "loss": 0.8729, "step": 20450 }, { "epoch": 1.88, "learning_rate": 4.059483313413625e-05, "loss": 0.9638, "step": 20460 }, { "epoch": 1.88, "learning_rate": 4.0590236278385586e-05, "loss": 0.84, "step": 20470 }, { "epoch": 1.88, "learning_rate": 4.058563942263492e-05, "loss": 0.8556, "step": 20480 }, { "epoch": 1.88, "learning_rate": 4.058104256688425e-05, "loss": 0.9391, "step": 20490 }, { "epoch": 1.88, "learning_rate": 4.057644571113359e-05, "loss": 0.9144, "step": 20500 }, { "epoch": 1.89, "learning_rate": 4.057184885538292e-05, "loss": 0.8776, "step": 20510 }, { "epoch": 1.89, "learning_rate": 4.0567251999632255e-05, "loss": 0.8684, "step": 20520 }, { "epoch": 1.89, "learning_rate": 4.056265514388159e-05, "loss": 0.9345, "step": 20530 }, { "epoch": 1.89, "learning_rate": 4.055805828813092e-05, "loss": 0.9432, "step": 20540 }, { "epoch": 1.89, "learning_rate": 4.055346143238025e-05, "loss": 1.0035, "step": 20550 }, { "epoch": 1.89, "learning_rate": 4.054886457662959e-05, "loss": 0.8536, "step": 20560 }, { "epoch": 1.89, "learning_rate": 4.0544267720878923e-05, "loss": 0.9423, "step": 20570 }, { "epoch": 1.89, "learning_rate": 4.053967086512825e-05, "loss": 0.8918, "step": 20580 }, { "epoch": 1.89, "learning_rate": 4.053507400937759e-05, "loss": 0.9524, "step": 20590 }, { "epoch": 1.89, "learning_rate": 4.053047715362692e-05, "loss": 0.8665, "step": 20600 }, { "epoch": 1.89, "learning_rate": 4.0525880297876256e-05, "loss": 0.9779, "step": 20610 }, { "epoch": 1.9, "learning_rate": 4.052128344212559e-05, "loss": 0.8143, "step": 20620 }, { "epoch": 1.9, "learning_rate": 4.051668658637492e-05, "loss": 0.8623, "step": 20630 }, { "epoch": 1.9, "learning_rate": 4.051208973062425e-05, "loss": 1.0424, "step": 20640 }, { "epoch": 1.9, "learning_rate": 4.050749287487359e-05, "loss": 0.9438, "step": 20650 }, { "epoch": 1.9, "learning_rate": 4.050289601912292e-05, "loss": 0.927, "step": 20660 }, { "epoch": 1.9, "learning_rate": 4.0498299163372254e-05, "loss": 0.9322, "step": 20670 }, { "epoch": 1.9, "learning_rate": 4.049370230762159e-05, "loss": 0.9185, "step": 20680 }, { "epoch": 1.9, "learning_rate": 4.048910545187092e-05, "loss": 0.9438, "step": 20690 }, { "epoch": 1.9, "learning_rate": 4.048450859612026e-05, "loss": 0.9043, "step": 20700 }, { "epoch": 1.9, "learning_rate": 4.0479911740369593e-05, "loss": 0.8019, "step": 20710 }, { "epoch": 1.9, "learning_rate": 4.047531488461892e-05, "loss": 1.0152, "step": 20720 }, { "epoch": 1.91, "learning_rate": 4.047071802886825e-05, "loss": 0.889, "step": 20730 }, { "epoch": 1.91, "learning_rate": 4.046612117311759e-05, "loss": 0.8733, "step": 20740 }, { "epoch": 1.91, "learning_rate": 4.046152431736692e-05, "loss": 0.9171, "step": 20750 }, { "epoch": 1.91, "learning_rate": 4.0456927461616256e-05, "loss": 0.8776, "step": 20760 }, { "epoch": 1.91, "learning_rate": 4.045233060586559e-05, "loss": 0.8803, "step": 20770 }, { "epoch": 1.91, "learning_rate": 4.044773375011492e-05, "loss": 0.838, "step": 20780 }, { "epoch": 1.91, "learning_rate": 4.044313689436426e-05, "loss": 0.9693, "step": 20790 }, { "epoch": 1.91, "learning_rate": 4.0438540038613595e-05, "loss": 0.8288, "step": 20800 }, { "epoch": 1.91, "learning_rate": 4.0433943182862924e-05, "loss": 0.889, "step": 20810 }, { "epoch": 1.91, "learning_rate": 4.0429346327112254e-05, "loss": 0.9696, "step": 20820 }, { "epoch": 1.92, "learning_rate": 4.042474947136159e-05, "loss": 0.9633, "step": 20830 }, { "epoch": 1.92, "learning_rate": 4.042015261561092e-05, "loss": 0.8446, "step": 20840 }, { "epoch": 1.92, "learning_rate": 4.041555575986026e-05, "loss": 0.902, "step": 20850 }, { "epoch": 1.92, "learning_rate": 4.041095890410959e-05, "loss": 0.8652, "step": 20860 }, { "epoch": 1.92, "learning_rate": 4.040636204835892e-05, "loss": 0.9225, "step": 20870 }, { "epoch": 1.92, "learning_rate": 4.040176519260826e-05, "loss": 0.8898, "step": 20880 }, { "epoch": 1.92, "learning_rate": 4.0397168336857596e-05, "loss": 0.9459, "step": 20890 }, { "epoch": 1.92, "learning_rate": 4.0392571481106926e-05, "loss": 0.834, "step": 20900 }, { "epoch": 1.92, "learning_rate": 4.0387974625356255e-05, "loss": 0.8881, "step": 20910 }, { "epoch": 1.92, "learning_rate": 4.038337776960559e-05, "loss": 0.9006, "step": 20920 }, { "epoch": 1.92, "learning_rate": 4.037878091385492e-05, "loss": 0.7855, "step": 20930 }, { "epoch": 1.93, "learning_rate": 4.037418405810426e-05, "loss": 0.8567, "step": 20940 }, { "epoch": 1.93, "learning_rate": 4.0369587202353594e-05, "loss": 0.9781, "step": 20950 }, { "epoch": 1.93, "learning_rate": 4.0364990346602924e-05, "loss": 0.8617, "step": 20960 }, { "epoch": 1.93, "learning_rate": 4.036039349085226e-05, "loss": 0.8768, "step": 20970 }, { "epoch": 1.93, "learning_rate": 4.03557966351016e-05, "loss": 0.9551, "step": 20980 }, { "epoch": 1.93, "learning_rate": 4.035119977935092e-05, "loss": 0.9332, "step": 20990 }, { "epoch": 1.93, "learning_rate": 4.0346602923600256e-05, "loss": 0.882, "step": 21000 }, { "epoch": 1.93, "learning_rate": 4.034200606784959e-05, "loss": 0.8489, "step": 21010 }, { "epoch": 1.93, "learning_rate": 4.033740921209892e-05, "loss": 0.8556, "step": 21020 }, { "epoch": 1.93, "learning_rate": 4.033281235634826e-05, "loss": 1.012, "step": 21030 }, { "epoch": 1.93, "learning_rate": 4.0328215500597596e-05, "loss": 0.8541, "step": 21040 }, { "epoch": 1.94, "learning_rate": 4.0323618644846925e-05, "loss": 0.9325, "step": 21050 }, { "epoch": 1.94, "learning_rate": 4.031902178909626e-05, "loss": 0.881, "step": 21060 }, { "epoch": 1.94, "learning_rate": 4.03144249333456e-05, "loss": 0.9542, "step": 21070 }, { "epoch": 1.94, "learning_rate": 4.030982807759492e-05, "loss": 0.8439, "step": 21080 }, { "epoch": 1.94, "learning_rate": 4.030523122184426e-05, "loss": 0.9469, "step": 21090 }, { "epoch": 1.94, "learning_rate": 4.0300634366093594e-05, "loss": 0.8524, "step": 21100 }, { "epoch": 1.94, "learning_rate": 4.0296037510342924e-05, "loss": 0.8643, "step": 21110 }, { "epoch": 1.94, "learning_rate": 4.029144065459226e-05, "loss": 0.9643, "step": 21120 }, { "epoch": 1.94, "learning_rate": 4.02868437988416e-05, "loss": 0.9, "step": 21130 }, { "epoch": 1.94, "learning_rate": 4.0282246943090926e-05, "loss": 0.9066, "step": 21140 }, { "epoch": 1.94, "learning_rate": 4.027765008734026e-05, "loss": 0.8098, "step": 21150 }, { "epoch": 1.95, "learning_rate": 4.02730532315896e-05, "loss": 0.8649, "step": 21160 }, { "epoch": 1.95, "learning_rate": 4.026845637583892e-05, "loss": 0.8806, "step": 21170 }, { "epoch": 1.95, "learning_rate": 4.026385952008826e-05, "loss": 0.96, "step": 21180 }, { "epoch": 1.95, "learning_rate": 4.0259262664337595e-05, "loss": 0.8574, "step": 21190 }, { "epoch": 1.95, "learning_rate": 4.0254665808586925e-05, "loss": 0.872, "step": 21200 }, { "epoch": 1.95, "learning_rate": 4.025006895283626e-05, "loss": 0.8602, "step": 21210 }, { "epoch": 1.95, "learning_rate": 4.02454720970856e-05, "loss": 0.7153, "step": 21220 }, { "epoch": 1.95, "learning_rate": 4.024087524133493e-05, "loss": 0.7712, "step": 21230 }, { "epoch": 1.95, "learning_rate": 4.0236278385584264e-05, "loss": 0.8701, "step": 21240 }, { "epoch": 1.95, "learning_rate": 4.02316815298336e-05, "loss": 0.8243, "step": 21250 }, { "epoch": 1.95, "learning_rate": 4.0227084674082924e-05, "loss": 0.7357, "step": 21260 }, { "epoch": 1.96, "learning_rate": 4.022248781833226e-05, "loss": 0.9544, "step": 21270 }, { "epoch": 1.96, "learning_rate": 4.0217890962581597e-05, "loss": 0.7579, "step": 21280 }, { "epoch": 1.96, "learning_rate": 4.0213294106830926e-05, "loss": 0.9087, "step": 21290 }, { "epoch": 1.96, "learning_rate": 4.020869725108026e-05, "loss": 0.8638, "step": 21300 }, { "epoch": 1.96, "learning_rate": 4.02041003953296e-05, "loss": 1.0027, "step": 21310 }, { "epoch": 1.96, "learning_rate": 4.019950353957893e-05, "loss": 0.9756, "step": 21320 }, { "epoch": 1.96, "learning_rate": 4.0194906683828265e-05, "loss": 0.8906, "step": 21330 }, { "epoch": 1.96, "learning_rate": 4.01903098280776e-05, "loss": 0.9661, "step": 21340 }, { "epoch": 1.96, "learning_rate": 4.0185712972326925e-05, "loss": 0.9704, "step": 21350 }, { "epoch": 1.96, "learning_rate": 4.018111611657626e-05, "loss": 0.829, "step": 21360 }, { "epoch": 1.96, "learning_rate": 4.01765192608256e-05, "loss": 0.8957, "step": 21370 }, { "epoch": 1.97, "learning_rate": 4.017192240507493e-05, "loss": 0.9386, "step": 21380 }, { "epoch": 1.97, "learning_rate": 4.0167325549324264e-05, "loss": 0.8152, "step": 21390 }, { "epoch": 1.97, "learning_rate": 4.01627286935736e-05, "loss": 0.8726, "step": 21400 }, { "epoch": 1.97, "learning_rate": 4.015813183782293e-05, "loss": 0.8153, "step": 21410 }, { "epoch": 1.97, "learning_rate": 4.0153534982072267e-05, "loss": 0.9022, "step": 21420 }, { "epoch": 1.97, "learning_rate": 4.01489381263216e-05, "loss": 0.822, "step": 21430 }, { "epoch": 1.97, "learning_rate": 4.0144341270570926e-05, "loss": 0.8858, "step": 21440 }, { "epoch": 1.97, "learning_rate": 4.013974441482026e-05, "loss": 0.8817, "step": 21450 }, { "epoch": 1.97, "learning_rate": 4.01351475590696e-05, "loss": 0.8454, "step": 21460 }, { "epoch": 1.97, "learning_rate": 4.013055070331893e-05, "loss": 0.8995, "step": 21470 }, { "epoch": 1.97, "learning_rate": 4.0125953847568265e-05, "loss": 0.903, "step": 21480 }, { "epoch": 1.98, "learning_rate": 4.01213569918176e-05, "loss": 0.7868, "step": 21490 }, { "epoch": 1.98, "learning_rate": 4.011676013606693e-05, "loss": 0.9856, "step": 21500 }, { "epoch": 1.98, "learning_rate": 4.011216328031627e-05, "loss": 0.9738, "step": 21510 }, { "epoch": 1.98, "learning_rate": 4.0107566424565604e-05, "loss": 0.8339, "step": 21520 }, { "epoch": 1.98, "learning_rate": 4.010296956881493e-05, "loss": 0.9027, "step": 21530 }, { "epoch": 1.98, "learning_rate": 4.0098372713064264e-05, "loss": 0.878, "step": 21540 }, { "epoch": 1.98, "learning_rate": 4.00937758573136e-05, "loss": 1.06, "step": 21550 }, { "epoch": 1.98, "learning_rate": 4.008917900156293e-05, "loss": 0.9259, "step": 21560 }, { "epoch": 1.98, "learning_rate": 4.0084582145812266e-05, "loss": 0.8766, "step": 21570 }, { "epoch": 1.98, "learning_rate": 4.00799852900616e-05, "loss": 0.7959, "step": 21580 }, { "epoch": 1.98, "learning_rate": 4.007538843431093e-05, "loss": 0.9636, "step": 21590 }, { "epoch": 1.99, "learning_rate": 4.007079157856027e-05, "loss": 0.9235, "step": 21600 }, { "epoch": 1.99, "learning_rate": 4.0066194722809605e-05, "loss": 0.8531, "step": 21610 }, { "epoch": 1.99, "learning_rate": 4.006159786705893e-05, "loss": 0.9479, "step": 21620 }, { "epoch": 1.99, "learning_rate": 4.0057001011308265e-05, "loss": 0.9314, "step": 21630 }, { "epoch": 1.99, "learning_rate": 4.00524041555576e-05, "loss": 0.8426, "step": 21640 }, { "epoch": 1.99, "learning_rate": 4.004780729980693e-05, "loss": 0.9161, "step": 21650 }, { "epoch": 1.99, "learning_rate": 4.004321044405627e-05, "loss": 0.9649, "step": 21660 }, { "epoch": 1.99, "learning_rate": 4.0038613588305604e-05, "loss": 0.8403, "step": 21670 }, { "epoch": 1.99, "learning_rate": 4.0034016732554934e-05, "loss": 0.9374, "step": 21680 }, { "epoch": 1.99, "learning_rate": 4.002941987680427e-05, "loss": 0.8815, "step": 21690 }, { "epoch": 2.0, "learning_rate": 4.002482302105361e-05, "loss": 0.9884, "step": 21700 }, { "epoch": 2.0, "learning_rate": 4.002022616530293e-05, "loss": 0.9265, "step": 21710 }, { "epoch": 2.0, "learning_rate": 4.0015629309552266e-05, "loss": 0.9584, "step": 21720 }, { "epoch": 2.0, "learning_rate": 4.00110324538016e-05, "loss": 1.02, "step": 21730 }, { "epoch": 2.0, "learning_rate": 4.000643559805093e-05, "loss": 0.8321, "step": 21740 }, { "epoch": 2.0, "learning_rate": 4.000183874230027e-05, "loss": 0.9104, "step": 21750 }, { "epoch": 2.0, "learning_rate": 3.9997241886549605e-05, "loss": 0.8733, "step": 21760 }, { "epoch": 2.0, "learning_rate": 3.9992645030798935e-05, "loss": 0.9693, "step": 21770 }, { "epoch": 2.0, "learning_rate": 3.998804817504827e-05, "loss": 0.8226, "step": 21780 }, { "epoch": 2.0, "learning_rate": 3.998345131929761e-05, "loss": 0.9296, "step": 21790 }, { "epoch": 2.0, "learning_rate": 3.997885446354693e-05, "loss": 0.9526, "step": 21800 }, { "epoch": 2.01, "learning_rate": 3.997425760779627e-05, "loss": 0.9696, "step": 21810 }, { "epoch": 2.01, "learning_rate": 3.9969660752045604e-05, "loss": 0.8888, "step": 21820 }, { "epoch": 2.01, "learning_rate": 3.9965063896294933e-05, "loss": 0.9406, "step": 21830 }, { "epoch": 2.01, "learning_rate": 3.996046704054427e-05, "loss": 0.9667, "step": 21840 }, { "epoch": 2.01, "learning_rate": 3.9955870184793606e-05, "loss": 0.9684, "step": 21850 }, { "epoch": 2.01, "learning_rate": 3.9951273329042936e-05, "loss": 0.9378, "step": 21860 }, { "epoch": 2.01, "learning_rate": 3.994667647329227e-05, "loss": 0.921, "step": 21870 }, { "epoch": 2.01, "learning_rate": 3.994207961754161e-05, "loss": 0.9858, "step": 21880 }, { "epoch": 2.01, "learning_rate": 3.993748276179093e-05, "loss": 0.8436, "step": 21890 }, { "epoch": 2.01, "learning_rate": 3.993288590604027e-05, "loss": 0.911, "step": 21900 }, { "epoch": 2.01, "learning_rate": 3.9928289050289605e-05, "loss": 0.8914, "step": 21910 }, { "epoch": 2.02, "learning_rate": 3.9923692194538935e-05, "loss": 0.9415, "step": 21920 }, { "epoch": 2.02, "learning_rate": 3.991909533878827e-05, "loss": 0.9371, "step": 21930 }, { "epoch": 2.02, "learning_rate": 3.991449848303761e-05, "loss": 0.8885, "step": 21940 }, { "epoch": 2.02, "learning_rate": 3.990990162728694e-05, "loss": 0.87, "step": 21950 }, { "epoch": 2.02, "learning_rate": 3.9905304771536274e-05, "loss": 0.9054, "step": 21960 }, { "epoch": 2.02, "learning_rate": 3.9900707915785603e-05, "loss": 0.9088, "step": 21970 }, { "epoch": 2.02, "learning_rate": 3.989611106003493e-05, "loss": 0.8822, "step": 21980 }, { "epoch": 2.02, "learning_rate": 3.989151420428427e-05, "loss": 0.8588, "step": 21990 }, { "epoch": 2.02, "learning_rate": 3.9886917348533606e-05, "loss": 0.9012, "step": 22000 }, { "epoch": 2.02, "learning_rate": 3.9882320492782936e-05, "loss": 0.885, "step": 22010 }, { "epoch": 2.02, "learning_rate": 3.987772363703227e-05, "loss": 0.7474, "step": 22020 }, { "epoch": 2.03, "learning_rate": 3.987312678128161e-05, "loss": 0.9507, "step": 22030 }, { "epoch": 2.03, "learning_rate": 3.986852992553094e-05, "loss": 0.9347, "step": 22040 }, { "epoch": 2.03, "learning_rate": 3.9863933069780275e-05, "loss": 0.8955, "step": 22050 }, { "epoch": 2.03, "learning_rate": 3.9859336214029605e-05, "loss": 0.9146, "step": 22060 }, { "epoch": 2.03, "learning_rate": 3.9854739358278934e-05, "loss": 0.8159, "step": 22070 }, { "epoch": 2.03, "learning_rate": 3.985014250252827e-05, "loss": 0.8328, "step": 22080 }, { "epoch": 2.03, "learning_rate": 3.984554564677761e-05, "loss": 0.9327, "step": 22090 }, { "epoch": 2.03, "learning_rate": 3.984094879102694e-05, "loss": 1.0613, "step": 22100 }, { "epoch": 2.03, "learning_rate": 3.9836351935276273e-05, "loss": 0.8588, "step": 22110 }, { "epoch": 2.03, "learning_rate": 3.983175507952561e-05, "loss": 0.8369, "step": 22120 }, { "epoch": 2.03, "learning_rate": 3.982715822377494e-05, "loss": 0.8908, "step": 22130 }, { "epoch": 2.04, "learning_rate": 3.9822561368024276e-05, "loss": 0.9971, "step": 22140 }, { "epoch": 2.04, "learning_rate": 3.9817964512273606e-05, "loss": 1.0831, "step": 22150 }, { "epoch": 2.04, "learning_rate": 3.981336765652294e-05, "loss": 0.9216, "step": 22160 }, { "epoch": 2.04, "learning_rate": 3.980877080077227e-05, "loss": 0.8909, "step": 22170 }, { "epoch": 2.04, "learning_rate": 3.980417394502161e-05, "loss": 0.8622, "step": 22180 }, { "epoch": 2.04, "learning_rate": 3.979957708927094e-05, "loss": 0.8127, "step": 22190 }, { "epoch": 2.04, "learning_rate": 3.9794980233520275e-05, "loss": 0.8663, "step": 22200 }, { "epoch": 2.04, "learning_rate": 3.979038337776961e-05, "loss": 0.955, "step": 22210 }, { "epoch": 2.04, "learning_rate": 3.978578652201894e-05, "loss": 0.9652, "step": 22220 }, { "epoch": 2.04, "learning_rate": 3.978118966626828e-05, "loss": 0.866, "step": 22230 }, { "epoch": 2.04, "learning_rate": 3.977659281051761e-05, "loss": 0.9743, "step": 22240 }, { "epoch": 2.05, "learning_rate": 3.9771995954766943e-05, "loss": 0.9128, "step": 22250 }, { "epoch": 2.05, "learning_rate": 3.976739909901627e-05, "loss": 0.769, "step": 22260 }, { "epoch": 2.05, "learning_rate": 3.976280224326561e-05, "loss": 0.8548, "step": 22270 }, { "epoch": 2.05, "learning_rate": 3.975820538751494e-05, "loss": 0.831, "step": 22280 }, { "epoch": 2.05, "learning_rate": 3.9753608531764276e-05, "loss": 0.9473, "step": 22290 }, { "epoch": 2.05, "learning_rate": 3.9749011676013606e-05, "loss": 0.8991, "step": 22300 }, { "epoch": 2.05, "learning_rate": 3.974441482026294e-05, "loss": 0.9814, "step": 22310 }, { "epoch": 2.05, "learning_rate": 3.973981796451228e-05, "loss": 0.9239, "step": 22320 }, { "epoch": 2.05, "learning_rate": 3.973522110876161e-05, "loss": 0.9418, "step": 22330 }, { "epoch": 2.05, "learning_rate": 3.9730624253010945e-05, "loss": 0.9405, "step": 22340 }, { "epoch": 2.05, "learning_rate": 3.9726027397260274e-05, "loss": 0.8766, "step": 22350 }, { "epoch": 2.06, "learning_rate": 3.972143054150961e-05, "loss": 0.9466, "step": 22360 }, { "epoch": 2.06, "learning_rate": 3.971683368575894e-05, "loss": 0.7956, "step": 22370 }, { "epoch": 2.06, "learning_rate": 3.971223683000828e-05, "loss": 0.8672, "step": 22380 }, { "epoch": 2.06, "learning_rate": 3.970763997425761e-05, "loss": 0.837, "step": 22390 }, { "epoch": 2.06, "learning_rate": 3.970304311850694e-05, "loss": 0.9252, "step": 22400 }, { "epoch": 2.06, "learning_rate": 3.969844626275628e-05, "loss": 0.8791, "step": 22410 }, { "epoch": 2.06, "learning_rate": 3.969384940700561e-05, "loss": 0.886, "step": 22420 }, { "epoch": 2.06, "learning_rate": 3.9689252551254946e-05, "loss": 0.8833, "step": 22430 }, { "epoch": 2.06, "learning_rate": 3.9684655695504276e-05, "loss": 0.8767, "step": 22440 }, { "epoch": 2.06, "learning_rate": 3.968005883975361e-05, "loss": 1.0463, "step": 22450 }, { "epoch": 2.06, "learning_rate": 3.967546198400294e-05, "loss": 0.8214, "step": 22460 }, { "epoch": 2.07, "learning_rate": 3.967086512825228e-05, "loss": 0.9483, "step": 22470 }, { "epoch": 2.07, "learning_rate": 3.966626827250161e-05, "loss": 0.8843, "step": 22480 }, { "epoch": 2.07, "learning_rate": 3.9661671416750944e-05, "loss": 0.9276, "step": 22490 }, { "epoch": 2.07, "learning_rate": 3.965707456100028e-05, "loss": 0.8846, "step": 22500 }, { "epoch": 2.07, "learning_rate": 3.965247770524961e-05, "loss": 0.9452, "step": 22510 }, { "epoch": 2.07, "learning_rate": 3.964788084949895e-05, "loss": 0.862, "step": 22520 }, { "epoch": 2.07, "learning_rate": 3.964328399374828e-05, "loss": 0.7622, "step": 22530 }, { "epoch": 2.07, "learning_rate": 3.963868713799761e-05, "loss": 0.9097, "step": 22540 }, { "epoch": 2.07, "learning_rate": 3.963409028224694e-05, "loss": 1.0355, "step": 22550 }, { "epoch": 2.07, "learning_rate": 3.962949342649628e-05, "loss": 0.8786, "step": 22560 }, { "epoch": 2.08, "learning_rate": 3.962489657074561e-05, "loss": 0.8725, "step": 22570 }, { "epoch": 2.08, "learning_rate": 3.9620299714994946e-05, "loss": 1.0371, "step": 22580 }, { "epoch": 2.08, "learning_rate": 3.961570285924428e-05, "loss": 0.8719, "step": 22590 }, { "epoch": 2.08, "learning_rate": 3.961110600349361e-05, "loss": 0.9711, "step": 22600 }, { "epoch": 2.08, "learning_rate": 3.960650914774295e-05, "loss": 0.7916, "step": 22610 }, { "epoch": 2.08, "learning_rate": 3.960191229199228e-05, "loss": 0.912, "step": 22620 }, { "epoch": 2.08, "learning_rate": 3.959731543624161e-05, "loss": 0.8577, "step": 22630 }, { "epoch": 2.08, "learning_rate": 3.9592718580490944e-05, "loss": 0.9352, "step": 22640 }, { "epoch": 2.08, "learning_rate": 3.958812172474028e-05, "loss": 0.8243, "step": 22650 }, { "epoch": 2.08, "learning_rate": 3.958352486898961e-05, "loss": 0.9307, "step": 22660 }, { "epoch": 2.08, "learning_rate": 3.957892801323895e-05, "loss": 0.8343, "step": 22670 }, { "epoch": 2.09, "learning_rate": 3.957433115748828e-05, "loss": 0.8719, "step": 22680 }, { "epoch": 2.09, "learning_rate": 3.956973430173761e-05, "loss": 0.9309, "step": 22690 }, { "epoch": 2.09, "learning_rate": 3.956513744598695e-05, "loss": 0.879, "step": 22700 }, { "epoch": 2.09, "learning_rate": 3.956054059023628e-05, "loss": 0.9255, "step": 22710 }, { "epoch": 2.09, "learning_rate": 3.955594373448561e-05, "loss": 0.8922, "step": 22720 }, { "epoch": 2.09, "learning_rate": 3.9551346878734945e-05, "loss": 0.8244, "step": 22730 }, { "epoch": 2.09, "learning_rate": 3.954675002298428e-05, "loss": 0.969, "step": 22740 }, { "epoch": 2.09, "learning_rate": 3.954215316723361e-05, "loss": 0.8789, "step": 22750 }, { "epoch": 2.09, "learning_rate": 3.953755631148295e-05, "loss": 0.9168, "step": 22760 }, { "epoch": 2.09, "learning_rate": 3.9532959455732284e-05, "loss": 0.8747, "step": 22770 }, { "epoch": 2.09, "learning_rate": 3.9528362599981614e-05, "loss": 0.8951, "step": 22780 }, { "epoch": 2.1, "learning_rate": 3.952376574423095e-05, "loss": 0.8626, "step": 22790 }, { "epoch": 2.1, "learning_rate": 3.951916888848028e-05, "loss": 0.9785, "step": 22800 }, { "epoch": 2.1, "learning_rate": 3.951457203272961e-05, "loss": 0.8761, "step": 22810 }, { "epoch": 2.1, "learning_rate": 3.9509975176978947e-05, "loss": 0.9182, "step": 22820 }, { "epoch": 2.1, "learning_rate": 3.950537832122828e-05, "loss": 0.9115, "step": 22830 }, { "epoch": 2.1, "learning_rate": 3.950078146547761e-05, "loss": 0.8233, "step": 22840 }, { "epoch": 2.1, "learning_rate": 3.949618460972695e-05, "loss": 0.898, "step": 22850 }, { "epoch": 2.1, "learning_rate": 3.9491587753976286e-05, "loss": 0.8902, "step": 22860 }, { "epoch": 2.1, "learning_rate": 3.9486990898225615e-05, "loss": 0.9901, "step": 22870 }, { "epoch": 2.1, "learning_rate": 3.948239404247495e-05, "loss": 0.8921, "step": 22880 }, { "epoch": 2.1, "learning_rate": 3.947779718672428e-05, "loss": 0.9265, "step": 22890 }, { "epoch": 2.11, "learning_rate": 3.947320033097361e-05, "loss": 0.9178, "step": 22900 }, { "epoch": 2.11, "learning_rate": 3.946860347522295e-05, "loss": 0.87, "step": 22910 }, { "epoch": 2.11, "learning_rate": 3.9464006619472284e-05, "loss": 0.8272, "step": 22920 }, { "epoch": 2.11, "learning_rate": 3.9459409763721614e-05, "loss": 0.9655, "step": 22930 }, { "epoch": 2.11, "learning_rate": 3.945481290797095e-05, "loss": 0.8808, "step": 22940 }, { "epoch": 2.11, "learning_rate": 3.945021605222029e-05, "loss": 0.9131, "step": 22950 }, { "epoch": 2.11, "learning_rate": 3.9445619196469617e-05, "loss": 0.8067, "step": 22960 }, { "epoch": 2.11, "learning_rate": 3.944102234071895e-05, "loss": 0.9955, "step": 22970 }, { "epoch": 2.11, "learning_rate": 3.943642548496828e-05, "loss": 0.89, "step": 22980 }, { "epoch": 2.11, "learning_rate": 3.943182862921761e-05, "loss": 0.87, "step": 22990 }, { "epoch": 2.11, "learning_rate": 3.942723177346695e-05, "loss": 0.8704, "step": 23000 }, { "epoch": 2.12, "learning_rate": 3.9422634917716285e-05, "loss": 0.9078, "step": 23010 }, { "epoch": 2.12, "learning_rate": 3.9418038061965615e-05, "loss": 0.8811, "step": 23020 }, { "epoch": 2.12, "learning_rate": 3.941344120621495e-05, "loss": 1.02, "step": 23030 }, { "epoch": 2.12, "learning_rate": 3.940884435046429e-05, "loss": 0.857, "step": 23040 }, { "epoch": 2.12, "learning_rate": 3.940424749471362e-05, "loss": 0.9148, "step": 23050 }, { "epoch": 2.12, "learning_rate": 3.9399650638962954e-05, "loss": 0.9369, "step": 23060 }, { "epoch": 2.12, "learning_rate": 3.9395053783212284e-05, "loss": 0.8441, "step": 23070 }, { "epoch": 2.12, "learning_rate": 3.9390456927461614e-05, "loss": 0.9388, "step": 23080 }, { "epoch": 2.12, "learning_rate": 3.938586007171095e-05, "loss": 0.9744, "step": 23090 }, { "epoch": 2.12, "learning_rate": 3.9381263215960287e-05, "loss": 0.9177, "step": 23100 }, { "epoch": 2.12, "learning_rate": 3.9376666360209616e-05, "loss": 0.8752, "step": 23110 }, { "epoch": 2.13, "learning_rate": 3.937206950445895e-05, "loss": 0.9393, "step": 23120 }, { "epoch": 2.13, "learning_rate": 3.936747264870829e-05, "loss": 0.9741, "step": 23130 }, { "epoch": 2.13, "learning_rate": 3.936287579295762e-05, "loss": 0.9203, "step": 23140 }, { "epoch": 2.13, "learning_rate": 3.9358278937206955e-05, "loss": 1.1124, "step": 23150 }, { "epoch": 2.13, "learning_rate": 3.9353682081456285e-05, "loss": 0.9222, "step": 23160 }, { "epoch": 2.13, "learning_rate": 3.9349085225705615e-05, "loss": 0.8574, "step": 23170 }, { "epoch": 2.13, "learning_rate": 3.934448836995495e-05, "loss": 0.9508, "step": 23180 }, { "epoch": 2.13, "learning_rate": 3.933989151420429e-05, "loss": 0.8828, "step": 23190 }, { "epoch": 2.13, "learning_rate": 3.933529465845362e-05, "loss": 0.9973, "step": 23200 }, { "epoch": 2.13, "learning_rate": 3.9330697802702954e-05, "loss": 0.8398, "step": 23210 }, { "epoch": 2.13, "learning_rate": 3.932610094695229e-05, "loss": 0.9062, "step": 23220 }, { "epoch": 2.14, "learning_rate": 3.932150409120162e-05, "loss": 0.8724, "step": 23230 }, { "epoch": 2.14, "learning_rate": 3.931690723545096e-05, "loss": 0.9364, "step": 23240 }, { "epoch": 2.14, "learning_rate": 3.9312310379700286e-05, "loss": 0.8984, "step": 23250 }, { "epoch": 2.14, "learning_rate": 3.9307713523949616e-05, "loss": 0.8438, "step": 23260 }, { "epoch": 2.14, "learning_rate": 3.930311666819895e-05, "loss": 0.8541, "step": 23270 }, { "epoch": 2.14, "learning_rate": 3.929851981244829e-05, "loss": 0.944, "step": 23280 }, { "epoch": 2.14, "learning_rate": 3.929392295669762e-05, "loss": 0.8434, "step": 23290 }, { "epoch": 2.14, "learning_rate": 3.9289326100946955e-05, "loss": 0.8106, "step": 23300 }, { "epoch": 2.14, "learning_rate": 3.928472924519629e-05, "loss": 1.0658, "step": 23310 }, { "epoch": 2.14, "learning_rate": 3.928013238944562e-05, "loss": 0.8612, "step": 23320 }, { "epoch": 2.14, "learning_rate": 3.927553553369496e-05, "loss": 0.9346, "step": 23330 }, { "epoch": 2.15, "learning_rate": 3.927093867794429e-05, "loss": 0.8088, "step": 23340 }, { "epoch": 2.15, "learning_rate": 3.926634182219362e-05, "loss": 0.929, "step": 23350 }, { "epoch": 2.15, "learning_rate": 3.9261744966442954e-05, "loss": 1.0178, "step": 23360 }, { "epoch": 2.15, "learning_rate": 3.925714811069229e-05, "loss": 0.7947, "step": 23370 }, { "epoch": 2.15, "learning_rate": 3.925255125494162e-05, "loss": 0.8329, "step": 23380 }, { "epoch": 2.15, "learning_rate": 3.9247954399190956e-05, "loss": 0.8393, "step": 23390 }, { "epoch": 2.15, "learning_rate": 3.924335754344029e-05, "loss": 0.929, "step": 23400 }, { "epoch": 2.15, "learning_rate": 3.923876068768962e-05, "loss": 0.9178, "step": 23410 }, { "epoch": 2.15, "learning_rate": 3.923416383193896e-05, "loss": 0.7708, "step": 23420 }, { "epoch": 2.15, "learning_rate": 3.922956697618829e-05, "loss": 0.8436, "step": 23430 }, { "epoch": 2.16, "learning_rate": 3.922497012043762e-05, "loss": 1.0939, "step": 23440 }, { "epoch": 2.16, "learning_rate": 3.9220373264686955e-05, "loss": 0.9942, "step": 23450 }, { "epoch": 2.16, "learning_rate": 3.921577640893629e-05, "loss": 0.8565, "step": 23460 }, { "epoch": 2.16, "learning_rate": 3.921117955318562e-05, "loss": 0.794, "step": 23470 }, { "epoch": 2.16, "learning_rate": 3.920658269743496e-05, "loss": 0.8765, "step": 23480 }, { "epoch": 2.16, "learning_rate": 3.9201985841684294e-05, "loss": 0.8813, "step": 23490 }, { "epoch": 2.16, "learning_rate": 3.9197388985933624e-05, "loss": 0.8736, "step": 23500 }, { "epoch": 2.16, "learning_rate": 3.919279213018296e-05, "loss": 0.8556, "step": 23510 }, { "epoch": 2.16, "learning_rate": 3.918819527443229e-05, "loss": 0.9074, "step": 23520 }, { "epoch": 2.16, "learning_rate": 3.918359841868162e-05, "loss": 0.9765, "step": 23530 }, { "epoch": 2.16, "learning_rate": 3.9179001562930956e-05, "loss": 0.8347, "step": 23540 }, { "epoch": 2.17, "learning_rate": 3.917440470718029e-05, "loss": 0.7948, "step": 23550 }, { "epoch": 2.17, "learning_rate": 3.916980785142962e-05, "loss": 0.9687, "step": 23560 }, { "epoch": 2.17, "learning_rate": 3.916521099567896e-05, "loss": 0.8389, "step": 23570 }, { "epoch": 2.17, "learning_rate": 3.9160614139928295e-05, "loss": 0.9639, "step": 23580 }, { "epoch": 2.17, "learning_rate": 3.9156017284177625e-05, "loss": 0.9177, "step": 23590 }, { "epoch": 2.17, "learning_rate": 3.915142042842696e-05, "loss": 0.8366, "step": 23600 }, { "epoch": 2.17, "learning_rate": 3.914682357267629e-05, "loss": 0.8971, "step": 23610 }, { "epoch": 2.17, "learning_rate": 3.914222671692562e-05, "loss": 0.8247, "step": 23620 }, { "epoch": 2.17, "learning_rate": 3.913762986117496e-05, "loss": 0.8694, "step": 23630 }, { "epoch": 2.17, "learning_rate": 3.9133033005424294e-05, "loss": 1.0112, "step": 23640 }, { "epoch": 2.17, "learning_rate": 3.9128436149673623e-05, "loss": 0.894, "step": 23650 }, { "epoch": 2.18, "learning_rate": 3.912383929392296e-05, "loss": 0.8291, "step": 23660 }, { "epoch": 2.18, "learning_rate": 3.9119242438172296e-05, "loss": 0.9751, "step": 23670 }, { "epoch": 2.18, "learning_rate": 3.9114645582421626e-05, "loss": 0.9047, "step": 23680 }, { "epoch": 2.18, "learning_rate": 3.911004872667096e-05, "loss": 0.9514, "step": 23690 }, { "epoch": 2.18, "learning_rate": 3.910545187092029e-05, "loss": 1.0161, "step": 23700 }, { "epoch": 2.18, "learning_rate": 3.910085501516962e-05, "loss": 0.8583, "step": 23710 }, { "epoch": 2.18, "learning_rate": 3.909625815941896e-05, "loss": 0.9442, "step": 23720 }, { "epoch": 2.18, "learning_rate": 3.9091661303668295e-05, "loss": 0.9246, "step": 23730 }, { "epoch": 2.18, "learning_rate": 3.9087064447917625e-05, "loss": 0.982, "step": 23740 }, { "epoch": 2.18, "learning_rate": 3.908246759216696e-05, "loss": 0.8849, "step": 23750 }, { "epoch": 2.18, "learning_rate": 3.90778707364163e-05, "loss": 0.8887, "step": 23760 }, { "epoch": 2.19, "learning_rate": 3.907327388066563e-05, "loss": 0.8549, "step": 23770 }, { "epoch": 2.19, "learning_rate": 3.9068677024914964e-05, "loss": 0.9033, "step": 23780 }, { "epoch": 2.19, "learning_rate": 3.9064080169164294e-05, "loss": 0.8888, "step": 23790 }, { "epoch": 2.19, "learning_rate": 3.905948331341362e-05, "loss": 0.901, "step": 23800 }, { "epoch": 2.19, "learning_rate": 3.905488645766296e-05, "loss": 0.8417, "step": 23810 }, { "epoch": 2.19, "learning_rate": 3.9050289601912296e-05, "loss": 0.9891, "step": 23820 }, { "epoch": 2.19, "learning_rate": 3.9045692746161626e-05, "loss": 0.9673, "step": 23830 }, { "epoch": 2.19, "learning_rate": 3.904109589041096e-05, "loss": 0.885, "step": 23840 }, { "epoch": 2.19, "learning_rate": 3.90364990346603e-05, "loss": 0.9451, "step": 23850 }, { "epoch": 2.19, "learning_rate": 3.903190217890963e-05, "loss": 0.9519, "step": 23860 }, { "epoch": 2.19, "learning_rate": 3.9027305323158965e-05, "loss": 0.9843, "step": 23870 }, { "epoch": 2.2, "learning_rate": 3.9022708467408295e-05, "loss": 0.8818, "step": 23880 }, { "epoch": 2.2, "learning_rate": 3.9018111611657624e-05, "loss": 0.7951, "step": 23890 }, { "epoch": 2.2, "learning_rate": 3.901351475590696e-05, "loss": 0.8305, "step": 23900 }, { "epoch": 2.2, "learning_rate": 3.90089179001563e-05, "loss": 0.8784, "step": 23910 }, { "epoch": 2.2, "learning_rate": 3.900432104440563e-05, "loss": 0.9685, "step": 23920 }, { "epoch": 2.2, "learning_rate": 3.8999724188654964e-05, "loss": 0.9353, "step": 23930 }, { "epoch": 2.2, "learning_rate": 3.899512733290429e-05, "loss": 0.8558, "step": 23940 }, { "epoch": 2.2, "learning_rate": 3.899053047715363e-05, "loss": 0.9285, "step": 23950 }, { "epoch": 2.2, "learning_rate": 3.8985933621402966e-05, "loss": 0.8846, "step": 23960 }, { "epoch": 2.2, "learning_rate": 3.8981336765652296e-05, "loss": 0.9073, "step": 23970 }, { "epoch": 2.2, "learning_rate": 3.8976739909901626e-05, "loss": 0.9302, "step": 23980 }, { "epoch": 2.21, "learning_rate": 3.897214305415096e-05, "loss": 0.8722, "step": 23990 }, { "epoch": 2.21, "learning_rate": 3.89675461984003e-05, "loss": 0.8386, "step": 24000 }, { "epoch": 2.21, "learning_rate": 3.896294934264963e-05, "loss": 0.9517, "step": 24010 }, { "epoch": 2.21, "learning_rate": 3.8958352486898965e-05, "loss": 0.899, "step": 24020 }, { "epoch": 2.21, "learning_rate": 3.8953755631148294e-05, "loss": 1.007, "step": 24030 }, { "epoch": 2.21, "learning_rate": 3.894915877539763e-05, "loss": 0.7857, "step": 24040 }, { "epoch": 2.21, "learning_rate": 3.894456191964697e-05, "loss": 0.8662, "step": 24050 }, { "epoch": 2.21, "learning_rate": 3.89399650638963e-05, "loss": 0.8781, "step": 24060 }, { "epoch": 2.21, "learning_rate": 3.893536820814563e-05, "loss": 0.9055, "step": 24070 }, { "epoch": 2.21, "learning_rate": 3.893077135239496e-05, "loss": 0.8759, "step": 24080 }, { "epoch": 2.21, "learning_rate": 3.89261744966443e-05, "loss": 0.9124, "step": 24090 }, { "epoch": 2.22, "learning_rate": 3.892157764089363e-05, "loss": 0.8988, "step": 24100 }, { "epoch": 2.22, "learning_rate": 3.8916980785142966e-05, "loss": 0.9284, "step": 24110 }, { "epoch": 2.22, "learning_rate": 3.8912383929392296e-05, "loss": 0.9278, "step": 24120 }, { "epoch": 2.22, "learning_rate": 3.890778707364163e-05, "loss": 0.8549, "step": 24130 }, { "epoch": 2.22, "learning_rate": 3.890319021789097e-05, "loss": 0.9773, "step": 24140 }, { "epoch": 2.22, "learning_rate": 3.88985933621403e-05, "loss": 0.8032, "step": 24150 }, { "epoch": 2.22, "learning_rate": 3.889399650638963e-05, "loss": 0.7664, "step": 24160 }, { "epoch": 2.22, "learning_rate": 3.8889399650638964e-05, "loss": 0.8388, "step": 24170 }, { "epoch": 2.22, "learning_rate": 3.88848027948883e-05, "loss": 0.9958, "step": 24180 }, { "epoch": 2.22, "learning_rate": 3.888020593913763e-05, "loss": 0.9024, "step": 24190 }, { "epoch": 2.22, "learning_rate": 3.887560908338697e-05, "loss": 0.9909, "step": 24200 }, { "epoch": 2.23, "learning_rate": 3.88710122276363e-05, "loss": 0.9357, "step": 24210 }, { "epoch": 2.23, "learning_rate": 3.886641537188563e-05, "loss": 0.8767, "step": 24220 }, { "epoch": 2.23, "learning_rate": 3.886181851613497e-05, "loss": 0.8585, "step": 24230 }, { "epoch": 2.23, "learning_rate": 3.88572216603843e-05, "loss": 0.918, "step": 24240 }, { "epoch": 2.23, "learning_rate": 3.885262480463363e-05, "loss": 0.9013, "step": 24250 }, { "epoch": 2.23, "learning_rate": 3.8848027948882966e-05, "loss": 0.8776, "step": 24260 }, { "epoch": 2.23, "learning_rate": 3.8843431093132295e-05, "loss": 0.995, "step": 24270 }, { "epoch": 2.23, "learning_rate": 3.883883423738163e-05, "loss": 0.9256, "step": 24280 }, { "epoch": 2.23, "learning_rate": 3.883423738163097e-05, "loss": 0.8704, "step": 24290 }, { "epoch": 2.23, "learning_rate": 3.88296405258803e-05, "loss": 0.7554, "step": 24300 }, { "epoch": 2.23, "learning_rate": 3.8825043670129635e-05, "loss": 0.9684, "step": 24310 }, { "epoch": 2.24, "learning_rate": 3.882044681437897e-05, "loss": 0.9906, "step": 24320 }, { "epoch": 2.24, "learning_rate": 3.88158499586283e-05, "loss": 0.9158, "step": 24330 }, { "epoch": 2.24, "learning_rate": 3.881125310287763e-05, "loss": 0.857, "step": 24340 }, { "epoch": 2.24, "learning_rate": 3.880665624712697e-05, "loss": 0.8544, "step": 24350 }, { "epoch": 2.24, "learning_rate": 3.8802059391376297e-05, "loss": 0.7842, "step": 24360 }, { "epoch": 2.24, "learning_rate": 3.879746253562563e-05, "loss": 0.9272, "step": 24370 }, { "epoch": 2.24, "learning_rate": 3.879286567987497e-05, "loss": 0.8335, "step": 24380 }, { "epoch": 2.24, "learning_rate": 3.87882688241243e-05, "loss": 1.0488, "step": 24390 }, { "epoch": 2.24, "learning_rate": 3.8783671968373636e-05, "loss": 0.8766, "step": 24400 }, { "epoch": 2.24, "learning_rate": 3.877907511262297e-05, "loss": 0.8361, "step": 24410 }, { "epoch": 2.25, "learning_rate": 3.87744782568723e-05, "loss": 0.9173, "step": 24420 }, { "epoch": 2.25, "learning_rate": 3.876988140112163e-05, "loss": 0.9092, "step": 24430 }, { "epoch": 2.25, "learning_rate": 3.876528454537097e-05, "loss": 0.9601, "step": 24440 }, { "epoch": 2.25, "learning_rate": 3.87606876896203e-05, "loss": 0.8937, "step": 24450 }, { "epoch": 2.25, "learning_rate": 3.8756090833869634e-05, "loss": 0.9442, "step": 24460 }, { "epoch": 2.25, "learning_rate": 3.875149397811897e-05, "loss": 0.9871, "step": 24470 }, { "epoch": 2.25, "learning_rate": 3.87468971223683e-05, "loss": 0.8141, "step": 24480 }, { "epoch": 2.25, "learning_rate": 3.874230026661764e-05, "loss": 0.8113, "step": 24490 }, { "epoch": 2.25, "learning_rate": 3.873770341086697e-05, "loss": 0.7601, "step": 24500 }, { "epoch": 2.25, "learning_rate": 3.87331065551163e-05, "loss": 0.9256, "step": 24510 }, { "epoch": 2.25, "learning_rate": 3.872850969936563e-05, "loss": 0.8278, "step": 24520 }, { "epoch": 2.26, "learning_rate": 3.872391284361497e-05, "loss": 0.8762, "step": 24530 }, { "epoch": 2.26, "learning_rate": 3.87193159878643e-05, "loss": 0.9135, "step": 24540 }, { "epoch": 2.26, "learning_rate": 3.8714719132113635e-05, "loss": 0.8955, "step": 24550 }, { "epoch": 2.26, "learning_rate": 3.871012227636297e-05, "loss": 0.885, "step": 24560 }, { "epoch": 2.26, "learning_rate": 3.87055254206123e-05, "loss": 0.8196, "step": 24570 }, { "epoch": 2.26, "learning_rate": 3.870092856486164e-05, "loss": 0.8553, "step": 24580 }, { "epoch": 2.26, "learning_rate": 3.8696331709110975e-05, "loss": 0.9216, "step": 24590 }, { "epoch": 2.26, "learning_rate": 3.86917348533603e-05, "loss": 0.9979, "step": 24600 }, { "epoch": 2.26, "learning_rate": 3.8687137997609634e-05, "loss": 0.8685, "step": 24610 }, { "epoch": 2.26, "learning_rate": 3.868254114185897e-05, "loss": 0.9263, "step": 24620 }, { "epoch": 2.26, "learning_rate": 3.86779442861083e-05, "loss": 0.9687, "step": 24630 }, { "epoch": 2.27, "learning_rate": 3.867334743035764e-05, "loss": 0.7731, "step": 24640 }, { "epoch": 2.27, "learning_rate": 3.866875057460697e-05, "loss": 0.982, "step": 24650 }, { "epoch": 2.27, "learning_rate": 3.86641537188563e-05, "loss": 0.9328, "step": 24660 }, { "epoch": 2.27, "learning_rate": 3.865955686310564e-05, "loss": 0.9946, "step": 24670 }, { "epoch": 2.27, "learning_rate": 3.8654960007354976e-05, "loss": 0.8228, "step": 24680 }, { "epoch": 2.27, "learning_rate": 3.86503631516043e-05, "loss": 0.803, "step": 24690 }, { "epoch": 2.27, "learning_rate": 3.8645766295853635e-05, "loss": 0.8687, "step": 24700 }, { "epoch": 2.27, "learning_rate": 3.864116944010297e-05, "loss": 0.7943, "step": 24710 }, { "epoch": 2.27, "learning_rate": 3.86365725843523e-05, "loss": 0.8824, "step": 24720 }, { "epoch": 2.27, "learning_rate": 3.863197572860164e-05, "loss": 0.9019, "step": 24730 }, { "epoch": 2.27, "learning_rate": 3.8627378872850974e-05, "loss": 0.8322, "step": 24740 }, { "epoch": 2.28, "learning_rate": 3.8622782017100304e-05, "loss": 1.023, "step": 24750 }, { "epoch": 2.28, "learning_rate": 3.861818516134964e-05, "loss": 0.8711, "step": 24760 }, { "epoch": 2.28, "learning_rate": 3.861358830559898e-05, "loss": 0.9005, "step": 24770 }, { "epoch": 2.28, "learning_rate": 3.86089914498483e-05, "loss": 0.8615, "step": 24780 }, { "epoch": 2.28, "learning_rate": 3.8604394594097636e-05, "loss": 0.9689, "step": 24790 }, { "epoch": 2.28, "learning_rate": 3.859979773834697e-05, "loss": 0.773, "step": 24800 }, { "epoch": 2.28, "learning_rate": 3.85952008825963e-05, "loss": 0.8651, "step": 24810 }, { "epoch": 2.28, "learning_rate": 3.859060402684564e-05, "loss": 0.8595, "step": 24820 }, { "epoch": 2.28, "learning_rate": 3.8586007171094976e-05, "loss": 0.9356, "step": 24830 }, { "epoch": 2.28, "learning_rate": 3.8581410315344305e-05, "loss": 0.9088, "step": 24840 }, { "epoch": 2.28, "learning_rate": 3.857681345959364e-05, "loss": 0.8301, "step": 24850 }, { "epoch": 2.29, "learning_rate": 3.857221660384298e-05, "loss": 0.9789, "step": 24860 }, { "epoch": 2.29, "learning_rate": 3.85676197480923e-05, "loss": 0.7929, "step": 24870 }, { "epoch": 2.29, "learning_rate": 3.856302289234164e-05, "loss": 0.8646, "step": 24880 }, { "epoch": 2.29, "learning_rate": 3.8558426036590974e-05, "loss": 0.8542, "step": 24890 }, { "epoch": 2.29, "learning_rate": 3.8553829180840304e-05, "loss": 0.8829, "step": 24900 }, { "epoch": 2.29, "learning_rate": 3.854923232508964e-05, "loss": 0.8957, "step": 24910 }, { "epoch": 2.29, "learning_rate": 3.854463546933898e-05, "loss": 0.7552, "step": 24920 }, { "epoch": 2.29, "learning_rate": 3.8540038613588306e-05, "loss": 0.8169, "step": 24930 }, { "epoch": 2.29, "learning_rate": 3.853544175783764e-05, "loss": 0.8061, "step": 24940 }, { "epoch": 2.29, "learning_rate": 3.853084490208698e-05, "loss": 0.848, "step": 24950 }, { "epoch": 2.29, "learning_rate": 3.85262480463363e-05, "loss": 1.103, "step": 24960 }, { "epoch": 2.3, "learning_rate": 3.852165119058564e-05, "loss": 0.9931, "step": 24970 }, { "epoch": 2.3, "learning_rate": 3.8517054334834975e-05, "loss": 0.8699, "step": 24980 }, { "epoch": 2.3, "learning_rate": 3.8512457479084305e-05, "loss": 0.7692, "step": 24990 }, { "epoch": 2.3, "learning_rate": 3.850786062333364e-05, "loss": 0.8503, "step": 25000 }, { "epoch": 2.3, "eval_accuracy": 0.5792576419213974, "eval_loss": 0.8908177614212036, "eval_runtime": 159.4739, "eval_samples_per_second": 28.719, "eval_steps_per_second": 3.593, "step": 25000 }, { "epoch": 2.3, "learning_rate": 3.850326376758298e-05, "loss": 0.9654, "step": 25010 }, { "epoch": 2.3, "learning_rate": 3.849866691183231e-05, "loss": 0.9587, "step": 25020 }, { "epoch": 2.3, "learning_rate": 3.8494070056081644e-05, "loss": 0.8202, "step": 25030 }, { "epoch": 2.3, "learning_rate": 3.848947320033098e-05, "loss": 0.9115, "step": 25040 }, { "epoch": 2.3, "learning_rate": 3.8484876344580303e-05, "loss": 0.9631, "step": 25050 }, { "epoch": 2.3, "learning_rate": 3.848027948882964e-05, "loss": 0.9608, "step": 25060 }, { "epoch": 2.3, "learning_rate": 3.8475682633078976e-05, "loss": 0.8807, "step": 25070 }, { "epoch": 2.31, "learning_rate": 3.8471085777328306e-05, "loss": 0.8783, "step": 25080 }, { "epoch": 2.31, "learning_rate": 3.846648892157764e-05, "loss": 0.8961, "step": 25090 }, { "epoch": 2.31, "learning_rate": 3.846189206582698e-05, "loss": 0.9109, "step": 25100 }, { "epoch": 2.31, "learning_rate": 3.845729521007631e-05, "loss": 1.0039, "step": 25110 }, { "epoch": 2.31, "learning_rate": 3.8452698354325645e-05, "loss": 0.8841, "step": 25120 }, { "epoch": 2.31, "learning_rate": 3.844810149857498e-05, "loss": 0.7627, "step": 25130 }, { "epoch": 2.31, "learning_rate": 3.8443504642824305e-05, "loss": 0.9236, "step": 25140 }, { "epoch": 2.31, "learning_rate": 3.843890778707364e-05, "loss": 0.8369, "step": 25150 }, { "epoch": 2.31, "learning_rate": 3.843431093132298e-05, "loss": 0.9142, "step": 25160 }, { "epoch": 2.31, "learning_rate": 3.842971407557231e-05, "loss": 0.9696, "step": 25170 }, { "epoch": 2.31, "learning_rate": 3.8425117219821644e-05, "loss": 0.9228, "step": 25180 }, { "epoch": 2.32, "learning_rate": 3.842052036407098e-05, "loss": 0.9158, "step": 25190 }, { "epoch": 2.32, "learning_rate": 3.841592350832031e-05, "loss": 0.9274, "step": 25200 }, { "epoch": 2.32, "learning_rate": 3.8411326652569646e-05, "loss": 0.9855, "step": 25210 }, { "epoch": 2.32, "learning_rate": 3.840672979681898e-05, "loss": 0.8285, "step": 25220 }, { "epoch": 2.32, "learning_rate": 3.8402132941068306e-05, "loss": 0.8533, "step": 25230 }, { "epoch": 2.32, "learning_rate": 3.839753608531764e-05, "loss": 0.9389, "step": 25240 }, { "epoch": 2.32, "learning_rate": 3.839293922956698e-05, "loss": 0.9925, "step": 25250 }, { "epoch": 2.32, "learning_rate": 3.838834237381631e-05, "loss": 0.8445, "step": 25260 }, { "epoch": 2.32, "learning_rate": 3.8383745518065645e-05, "loss": 0.9331, "step": 25270 }, { "epoch": 2.32, "learning_rate": 3.837914866231498e-05, "loss": 0.9052, "step": 25280 }, { "epoch": 2.33, "learning_rate": 3.837455180656431e-05, "loss": 0.942, "step": 25290 }, { "epoch": 2.33, "learning_rate": 3.836995495081365e-05, "loss": 0.9063, "step": 25300 }, { "epoch": 2.33, "learning_rate": 3.8365358095062984e-05, "loss": 0.8782, "step": 25310 }, { "epoch": 2.33, "learning_rate": 3.836076123931231e-05, "loss": 0.9746, "step": 25320 }, { "epoch": 2.33, "learning_rate": 3.8356164383561644e-05, "loss": 0.8441, "step": 25330 }, { "epoch": 2.33, "learning_rate": 3.835156752781098e-05, "loss": 0.8447, "step": 25340 }, { "epoch": 2.33, "learning_rate": 3.834697067206031e-05, "loss": 0.9252, "step": 25350 }, { "epoch": 2.33, "learning_rate": 3.8342373816309646e-05, "loss": 0.9182, "step": 25360 }, { "epoch": 2.33, "learning_rate": 3.833777696055898e-05, "loss": 0.9666, "step": 25370 }, { "epoch": 2.33, "learning_rate": 3.833318010480831e-05, "loss": 0.8598, "step": 25380 }, { "epoch": 2.33, "learning_rate": 3.832858324905765e-05, "loss": 0.9655, "step": 25390 }, { "epoch": 2.34, "learning_rate": 3.8323986393306985e-05, "loss": 0.9336, "step": 25400 }, { "epoch": 2.34, "learning_rate": 3.831938953755631e-05, "loss": 0.9618, "step": 25410 }, { "epoch": 2.34, "learning_rate": 3.8314792681805645e-05, "loss": 0.9459, "step": 25420 }, { "epoch": 2.34, "learning_rate": 3.831019582605498e-05, "loss": 0.8521, "step": 25430 }, { "epoch": 2.34, "learning_rate": 3.830559897030431e-05, "loss": 0.8881, "step": 25440 }, { "epoch": 2.34, "learning_rate": 3.830100211455365e-05, "loss": 0.9562, "step": 25450 }, { "epoch": 2.34, "learning_rate": 3.8296405258802984e-05, "loss": 0.9136, "step": 25460 }, { "epoch": 2.34, "learning_rate": 3.8291808403052314e-05, "loss": 0.9303, "step": 25470 }, { "epoch": 2.34, "learning_rate": 3.828721154730165e-05, "loss": 0.9289, "step": 25480 }, { "epoch": 2.34, "learning_rate": 3.828261469155098e-05, "loss": 0.8456, "step": 25490 }, { "epoch": 2.34, "learning_rate": 3.8278017835800316e-05, "loss": 0.9169, "step": 25500 }, { "epoch": 2.35, "learning_rate": 3.8273420980049646e-05, "loss": 0.8947, "step": 25510 }, { "epoch": 2.35, "learning_rate": 3.826882412429898e-05, "loss": 0.8928, "step": 25520 }, { "epoch": 2.35, "learning_rate": 3.826422726854831e-05, "loss": 0.8957, "step": 25530 }, { "epoch": 2.35, "learning_rate": 3.825963041279765e-05, "loss": 0.8719, "step": 25540 }, { "epoch": 2.35, "learning_rate": 3.8255033557046985e-05, "loss": 0.9087, "step": 25550 }, { "epoch": 2.35, "learning_rate": 3.8250436701296315e-05, "loss": 0.8619, "step": 25560 }, { "epoch": 2.35, "learning_rate": 3.824583984554565e-05, "loss": 0.8988, "step": 25570 }, { "epoch": 2.35, "learning_rate": 3.824124298979498e-05, "loss": 1.0177, "step": 25580 }, { "epoch": 2.35, "learning_rate": 3.823664613404432e-05, "loss": 0.9281, "step": 25590 }, { "epoch": 2.35, "learning_rate": 3.823204927829365e-05, "loss": 0.8278, "step": 25600 }, { "epoch": 2.35, "learning_rate": 3.8227452422542984e-05, "loss": 0.9111, "step": 25610 }, { "epoch": 2.36, "learning_rate": 3.822285556679231e-05, "loss": 1.0114, "step": 25620 }, { "epoch": 2.36, "learning_rate": 3.821825871104165e-05, "loss": 0.8224, "step": 25630 }, { "epoch": 2.36, "learning_rate": 3.8213661855290986e-05, "loss": 0.9483, "step": 25640 }, { "epoch": 2.36, "learning_rate": 3.8209064999540316e-05, "loss": 0.843, "step": 25650 }, { "epoch": 2.36, "learning_rate": 3.820446814378965e-05, "loss": 0.8577, "step": 25660 }, { "epoch": 2.36, "learning_rate": 3.819987128803898e-05, "loss": 0.9312, "step": 25670 }, { "epoch": 2.36, "learning_rate": 3.819527443228832e-05, "loss": 0.9681, "step": 25680 }, { "epoch": 2.36, "learning_rate": 3.819067757653765e-05, "loss": 0.9021, "step": 25690 }, { "epoch": 2.36, "learning_rate": 3.8186080720786985e-05, "loss": 0.8908, "step": 25700 }, { "epoch": 2.36, "learning_rate": 3.8181483865036314e-05, "loss": 0.9238, "step": 25710 }, { "epoch": 2.36, "learning_rate": 3.817688700928565e-05, "loss": 0.9089, "step": 25720 }, { "epoch": 2.37, "learning_rate": 3.817229015353499e-05, "loss": 0.8789, "step": 25730 }, { "epoch": 2.37, "learning_rate": 3.816769329778432e-05, "loss": 0.8758, "step": 25740 }, { "epoch": 2.37, "learning_rate": 3.8163096442033654e-05, "loss": 0.8979, "step": 25750 }, { "epoch": 2.37, "learning_rate": 3.815849958628298e-05, "loss": 0.8727, "step": 25760 }, { "epoch": 2.37, "learning_rate": 3.815390273053232e-05, "loss": 0.8531, "step": 25770 }, { "epoch": 2.37, "learning_rate": 3.814930587478165e-05, "loss": 0.9329, "step": 25780 }, { "epoch": 2.37, "learning_rate": 3.8144709019030986e-05, "loss": 0.9225, "step": 25790 }, { "epoch": 2.37, "learning_rate": 3.8140112163280316e-05, "loss": 0.8935, "step": 25800 }, { "epoch": 2.37, "learning_rate": 3.813551530752965e-05, "loss": 0.8449, "step": 25810 }, { "epoch": 2.37, "learning_rate": 3.813091845177898e-05, "loss": 0.872, "step": 25820 }, { "epoch": 2.37, "learning_rate": 3.812632159602832e-05, "loss": 0.8843, "step": 25830 }, { "epoch": 2.38, "learning_rate": 3.8121724740277655e-05, "loss": 0.8932, "step": 25840 }, { "epoch": 2.38, "learning_rate": 3.8117127884526985e-05, "loss": 0.9675, "step": 25850 }, { "epoch": 2.38, "learning_rate": 3.811253102877632e-05, "loss": 0.9176, "step": 25860 }, { "epoch": 2.38, "learning_rate": 3.810793417302565e-05, "loss": 0.8327, "step": 25870 }, { "epoch": 2.38, "learning_rate": 3.810333731727499e-05, "loss": 0.7184, "step": 25880 }, { "epoch": 2.38, "learning_rate": 3.809874046152432e-05, "loss": 0.8812, "step": 25890 }, { "epoch": 2.38, "learning_rate": 3.809414360577365e-05, "loss": 0.9274, "step": 25900 }, { "epoch": 2.38, "learning_rate": 3.808954675002298e-05, "loss": 0.9736, "step": 25910 }, { "epoch": 2.38, "learning_rate": 3.808494989427232e-05, "loss": 0.7779, "step": 25920 }, { "epoch": 2.38, "learning_rate": 3.8080353038521656e-05, "loss": 0.8476, "step": 25930 }, { "epoch": 2.38, "learning_rate": 3.8075756182770986e-05, "loss": 0.7943, "step": 25940 }, { "epoch": 2.39, "learning_rate": 3.807115932702032e-05, "loss": 0.8394, "step": 25950 }, { "epoch": 2.39, "learning_rate": 3.806656247126965e-05, "loss": 0.929, "step": 25960 }, { "epoch": 2.39, "learning_rate": 3.806196561551899e-05, "loss": 0.9544, "step": 25970 }, { "epoch": 2.39, "learning_rate": 3.805736875976832e-05, "loss": 0.8877, "step": 25980 }, { "epoch": 2.39, "learning_rate": 3.8052771904017655e-05, "loss": 0.8815, "step": 25990 }, { "epoch": 2.39, "learning_rate": 3.8048175048266984e-05, "loss": 0.863, "step": 26000 }, { "epoch": 2.39, "learning_rate": 3.804357819251632e-05, "loss": 0.8947, "step": 26010 }, { "epoch": 2.39, "learning_rate": 3.803898133676566e-05, "loss": 0.8145, "step": 26020 }, { "epoch": 2.39, "learning_rate": 3.803438448101499e-05, "loss": 0.7507, "step": 26030 }, { "epoch": 2.39, "learning_rate": 3.8029787625264323e-05, "loss": 0.9085, "step": 26040 }, { "epoch": 2.39, "learning_rate": 3.802519076951365e-05, "loss": 0.9477, "step": 26050 }, { "epoch": 2.4, "learning_rate": 3.802059391376299e-05, "loss": 0.9355, "step": 26060 }, { "epoch": 2.4, "learning_rate": 3.801599705801232e-05, "loss": 0.9086, "step": 26070 }, { "epoch": 2.4, "learning_rate": 3.8011400202261656e-05, "loss": 0.9342, "step": 26080 }, { "epoch": 2.4, "learning_rate": 3.8006803346510985e-05, "loss": 0.8491, "step": 26090 }, { "epoch": 2.4, "learning_rate": 3.800220649076032e-05, "loss": 0.8328, "step": 26100 }, { "epoch": 2.4, "learning_rate": 3.799760963500966e-05, "loss": 0.9098, "step": 26110 }, { "epoch": 2.4, "learning_rate": 3.799301277925899e-05, "loss": 0.8654, "step": 26120 }, { "epoch": 2.4, "learning_rate": 3.7988415923508325e-05, "loss": 0.963, "step": 26130 }, { "epoch": 2.4, "learning_rate": 3.7983819067757654e-05, "loss": 0.9352, "step": 26140 }, { "epoch": 2.4, "learning_rate": 3.7979222212006984e-05, "loss": 0.8288, "step": 26150 }, { "epoch": 2.41, "learning_rate": 3.797462535625632e-05, "loss": 0.8363, "step": 26160 }, { "epoch": 2.41, "learning_rate": 3.797002850050566e-05, "loss": 0.8919, "step": 26170 }, { "epoch": 2.41, "learning_rate": 3.796543164475499e-05, "loss": 0.8984, "step": 26180 }, { "epoch": 2.41, "learning_rate": 3.796083478900432e-05, "loss": 0.8543, "step": 26190 }, { "epoch": 2.41, "learning_rate": 3.795623793325366e-05, "loss": 0.826, "step": 26200 }, { "epoch": 2.41, "learning_rate": 3.795164107750299e-05, "loss": 0.8601, "step": 26210 }, { "epoch": 2.41, "learning_rate": 3.7947044221752326e-05, "loss": 0.8988, "step": 26220 }, { "epoch": 2.41, "learning_rate": 3.7942447366001655e-05, "loss": 0.872, "step": 26230 }, { "epoch": 2.41, "learning_rate": 3.7937850510250985e-05, "loss": 0.9495, "step": 26240 }, { "epoch": 2.41, "learning_rate": 3.793325365450032e-05, "loss": 0.755, "step": 26250 }, { "epoch": 2.41, "learning_rate": 3.792865679874966e-05, "loss": 0.8854, "step": 26260 }, { "epoch": 2.42, "learning_rate": 3.792405994299899e-05, "loss": 0.9248, "step": 26270 }, { "epoch": 2.42, "learning_rate": 3.7919463087248324e-05, "loss": 0.9085, "step": 26280 }, { "epoch": 2.42, "learning_rate": 3.791486623149766e-05, "loss": 0.857, "step": 26290 }, { "epoch": 2.42, "learning_rate": 3.791026937574699e-05, "loss": 0.8577, "step": 26300 }, { "epoch": 2.42, "learning_rate": 3.790567251999633e-05, "loss": 0.82, "step": 26310 }, { "epoch": 2.42, "learning_rate": 3.790107566424566e-05, "loss": 0.9741, "step": 26320 }, { "epoch": 2.42, "learning_rate": 3.7896478808494986e-05, "loss": 0.9818, "step": 26330 }, { "epoch": 2.42, "learning_rate": 3.789188195274432e-05, "loss": 0.8748, "step": 26340 }, { "epoch": 2.42, "learning_rate": 3.788728509699366e-05, "loss": 0.8005, "step": 26350 }, { "epoch": 2.42, "learning_rate": 3.788268824124299e-05, "loss": 1.0986, "step": 26360 }, { "epoch": 2.42, "learning_rate": 3.7878091385492326e-05, "loss": 0.943, "step": 26370 }, { "epoch": 2.43, "learning_rate": 3.787349452974166e-05, "loss": 0.944, "step": 26380 }, { "epoch": 2.43, "learning_rate": 3.786889767399099e-05, "loss": 0.8353, "step": 26390 }, { "epoch": 2.43, "learning_rate": 3.786430081824033e-05, "loss": 0.926, "step": 26400 }, { "epoch": 2.43, "learning_rate": 3.785970396248966e-05, "loss": 0.8966, "step": 26410 }, { "epoch": 2.43, "learning_rate": 3.785510710673899e-05, "loss": 0.916, "step": 26420 }, { "epoch": 2.43, "learning_rate": 3.7850510250988324e-05, "loss": 0.8482, "step": 26430 }, { "epoch": 2.43, "learning_rate": 3.784591339523766e-05, "loss": 0.9887, "step": 26440 }, { "epoch": 2.43, "learning_rate": 3.784131653948699e-05, "loss": 0.8427, "step": 26450 }, { "epoch": 2.43, "learning_rate": 3.783671968373633e-05, "loss": 0.7894, "step": 26460 }, { "epoch": 2.43, "learning_rate": 3.783212282798566e-05, "loss": 0.8908, "step": 26470 }, { "epoch": 2.43, "learning_rate": 3.782752597223499e-05, "loss": 0.9063, "step": 26480 }, { "epoch": 2.44, "learning_rate": 3.782292911648433e-05, "loss": 0.9582, "step": 26490 }, { "epoch": 2.44, "learning_rate": 3.781833226073366e-05, "loss": 0.8678, "step": 26500 }, { "epoch": 2.44, "learning_rate": 3.781373540498299e-05, "loss": 0.894, "step": 26510 }, { "epoch": 2.44, "learning_rate": 3.7809138549232325e-05, "loss": 0.8668, "step": 26520 }, { "epoch": 2.44, "learning_rate": 3.780454169348166e-05, "loss": 0.8818, "step": 26530 }, { "epoch": 2.44, "learning_rate": 3.779994483773099e-05, "loss": 1.0279, "step": 26540 }, { "epoch": 2.44, "learning_rate": 3.779534798198033e-05, "loss": 0.8412, "step": 26550 }, { "epoch": 2.44, "learning_rate": 3.7790751126229664e-05, "loss": 0.8871, "step": 26560 }, { "epoch": 2.44, "learning_rate": 3.7786154270478994e-05, "loss": 0.8672, "step": 26570 }, { "epoch": 2.44, "learning_rate": 3.778155741472833e-05, "loss": 1.028, "step": 26580 }, { "epoch": 2.44, "learning_rate": 3.777696055897766e-05, "loss": 0.9084, "step": 26590 }, { "epoch": 2.45, "learning_rate": 3.777236370322699e-05, "loss": 0.9213, "step": 26600 }, { "epoch": 2.45, "learning_rate": 3.7767766847476326e-05, "loss": 0.8328, "step": 26610 }, { "epoch": 2.45, "learning_rate": 3.776316999172566e-05, "loss": 0.8486, "step": 26620 }, { "epoch": 2.45, "learning_rate": 3.775857313597499e-05, "loss": 0.9819, "step": 26630 }, { "epoch": 2.45, "learning_rate": 3.775397628022433e-05, "loss": 0.917, "step": 26640 }, { "epoch": 2.45, "learning_rate": 3.7749379424473666e-05, "loss": 0.8081, "step": 26650 }, { "epoch": 2.45, "learning_rate": 3.7744782568722995e-05, "loss": 0.9475, "step": 26660 }, { "epoch": 2.45, "learning_rate": 3.774018571297233e-05, "loss": 0.9775, "step": 26670 }, { "epoch": 2.45, "learning_rate": 3.773558885722166e-05, "loss": 0.951, "step": 26680 }, { "epoch": 2.45, "learning_rate": 3.773099200147099e-05, "loss": 1.0335, "step": 26690 }, { "epoch": 2.45, "learning_rate": 3.772639514572033e-05, "loss": 0.9145, "step": 26700 }, { "epoch": 2.46, "learning_rate": 3.7721798289969664e-05, "loss": 0.8213, "step": 26710 }, { "epoch": 2.46, "learning_rate": 3.7717201434218994e-05, "loss": 0.7802, "step": 26720 }, { "epoch": 2.46, "learning_rate": 3.771260457846833e-05, "loss": 0.804, "step": 26730 }, { "epoch": 2.46, "learning_rate": 3.770800772271767e-05, "loss": 0.8397, "step": 26740 }, { "epoch": 2.46, "learning_rate": 3.7703410866966996e-05, "loss": 1.0635, "step": 26750 }, { "epoch": 2.46, "learning_rate": 3.769881401121633e-05, "loss": 0.8265, "step": 26760 }, { "epoch": 2.46, "learning_rate": 3.769421715546566e-05, "loss": 0.8071, "step": 26770 }, { "epoch": 2.46, "learning_rate": 3.768962029971499e-05, "loss": 0.8567, "step": 26780 }, { "epoch": 2.46, "learning_rate": 3.768502344396433e-05, "loss": 0.855, "step": 26790 }, { "epoch": 2.46, "learning_rate": 3.7680426588213665e-05, "loss": 0.8997, "step": 26800 }, { "epoch": 2.46, "learning_rate": 3.7675829732462995e-05, "loss": 0.9941, "step": 26810 }, { "epoch": 2.47, "learning_rate": 3.767123287671233e-05, "loss": 0.942, "step": 26820 }, { "epoch": 2.47, "learning_rate": 3.766663602096167e-05, "loss": 0.8279, "step": 26830 }, { "epoch": 2.47, "learning_rate": 3.7662039165211e-05, "loss": 0.9287, "step": 26840 }, { "epoch": 2.47, "learning_rate": 3.7657442309460334e-05, "loss": 0.8933, "step": 26850 }, { "epoch": 2.47, "learning_rate": 3.7652845453709664e-05, "loss": 0.9158, "step": 26860 }, { "epoch": 2.47, "learning_rate": 3.7648248597958994e-05, "loss": 0.8783, "step": 26870 }, { "epoch": 2.47, "learning_rate": 3.764365174220833e-05, "loss": 0.8066, "step": 26880 }, { "epoch": 2.47, "learning_rate": 3.7639054886457667e-05, "loss": 0.9138, "step": 26890 }, { "epoch": 2.47, "learning_rate": 3.7634458030706996e-05, "loss": 0.8011, "step": 26900 }, { "epoch": 2.47, "learning_rate": 3.762986117495633e-05, "loss": 0.7589, "step": 26910 }, { "epoch": 2.47, "learning_rate": 3.762526431920567e-05, "loss": 0.8348, "step": 26920 }, { "epoch": 2.48, "learning_rate": 3.7620667463455e-05, "loss": 0.9179, "step": 26930 }, { "epoch": 2.48, "learning_rate": 3.7616070607704335e-05, "loss": 0.8251, "step": 26940 }, { "epoch": 2.48, "learning_rate": 3.7611473751953665e-05, "loss": 0.8711, "step": 26950 }, { "epoch": 2.48, "learning_rate": 3.7606876896202995e-05, "loss": 0.7834, "step": 26960 }, { "epoch": 2.48, "learning_rate": 3.760228004045233e-05, "loss": 0.8196, "step": 26970 }, { "epoch": 2.48, "learning_rate": 3.759768318470167e-05, "loss": 0.8706, "step": 26980 }, { "epoch": 2.48, "learning_rate": 3.7593086328951e-05, "loss": 1.0458, "step": 26990 }, { "epoch": 2.48, "learning_rate": 3.7588489473200334e-05, "loss": 0.8727, "step": 27000 }, { "epoch": 2.48, "learning_rate": 3.758389261744967e-05, "loss": 0.9774, "step": 27010 }, { "epoch": 2.48, "learning_rate": 3.7579295761699e-05, "loss": 0.909, "step": 27020 }, { "epoch": 2.49, "learning_rate": 3.7574698905948337e-05, "loss": 0.8201, "step": 27030 }, { "epoch": 2.49, "learning_rate": 3.7570102050197666e-05, "loss": 0.8686, "step": 27040 }, { "epoch": 2.49, "learning_rate": 3.7565505194446996e-05, "loss": 0.8181, "step": 27050 }, { "epoch": 2.49, "learning_rate": 3.756090833869633e-05, "loss": 0.9345, "step": 27060 }, { "epoch": 2.49, "learning_rate": 3.755631148294567e-05, "loss": 0.831, "step": 27070 }, { "epoch": 2.49, "learning_rate": 3.7551714627195e-05, "loss": 0.8622, "step": 27080 }, { "epoch": 2.49, "learning_rate": 3.7547117771444335e-05, "loss": 0.985, "step": 27090 }, { "epoch": 2.49, "learning_rate": 3.754252091569367e-05, "loss": 0.865, "step": 27100 }, { "epoch": 2.49, "learning_rate": 3.7537924059943e-05, "loss": 0.8906, "step": 27110 }, { "epoch": 2.49, "learning_rate": 3.753332720419234e-05, "loss": 0.8691, "step": 27120 }, { "epoch": 2.49, "learning_rate": 3.752873034844167e-05, "loss": 1.1329, "step": 27130 }, { "epoch": 2.5, "learning_rate": 3.7524133492691e-05, "loss": 0.9287, "step": 27140 }, { "epoch": 2.5, "learning_rate": 3.7519536636940334e-05, "loss": 0.8633, "step": 27150 }, { "epoch": 2.5, "learning_rate": 3.751493978118967e-05, "loss": 0.9947, "step": 27160 }, { "epoch": 2.5, "learning_rate": 3.7510342925439e-05, "loss": 0.8826, "step": 27170 }, { "epoch": 2.5, "learning_rate": 3.7505746069688336e-05, "loss": 0.8685, "step": 27180 }, { "epoch": 2.5, "learning_rate": 3.750114921393767e-05, "loss": 0.8679, "step": 27190 }, { "epoch": 2.5, "learning_rate": 3.7496552358187e-05, "loss": 0.8681, "step": 27200 }, { "epoch": 2.5, "learning_rate": 3.749195550243634e-05, "loss": 0.8128, "step": 27210 }, { "epoch": 2.5, "learning_rate": 3.748735864668567e-05, "loss": 0.8737, "step": 27220 }, { "epoch": 2.5, "learning_rate": 3.7482761790935e-05, "loss": 0.8459, "step": 27230 }, { "epoch": 2.5, "learning_rate": 3.7478164935184335e-05, "loss": 0.8685, "step": 27240 }, { "epoch": 2.51, "learning_rate": 3.747356807943367e-05, "loss": 0.8499, "step": 27250 }, { "epoch": 2.51, "learning_rate": 3.7468971223683e-05, "loss": 0.8303, "step": 27260 }, { "epoch": 2.51, "learning_rate": 3.746437436793234e-05, "loss": 1.0022, "step": 27270 }, { "epoch": 2.51, "learning_rate": 3.7459777512181674e-05, "loss": 0.8221, "step": 27280 }, { "epoch": 2.51, "learning_rate": 3.7455180656431004e-05, "loss": 0.8406, "step": 27290 }, { "epoch": 2.51, "learning_rate": 3.745058380068034e-05, "loss": 0.9893, "step": 27300 }, { "epoch": 2.51, "learning_rate": 3.744598694492967e-05, "loss": 0.8333, "step": 27310 }, { "epoch": 2.51, "learning_rate": 3.7441390089179e-05, "loss": 1.061, "step": 27320 }, { "epoch": 2.51, "learning_rate": 3.7436793233428336e-05, "loss": 0.8943, "step": 27330 }, { "epoch": 2.51, "learning_rate": 3.743219637767767e-05, "loss": 0.9187, "step": 27340 }, { "epoch": 2.51, "learning_rate": 3.7427599521927e-05, "loss": 0.8354, "step": 27350 }, { "epoch": 2.52, "learning_rate": 3.742300266617634e-05, "loss": 0.9001, "step": 27360 }, { "epoch": 2.52, "learning_rate": 3.7418405810425675e-05, "loss": 0.8514, "step": 27370 }, { "epoch": 2.52, "learning_rate": 3.7413808954675005e-05, "loss": 1.0734, "step": 27380 }, { "epoch": 2.52, "learning_rate": 3.740921209892434e-05, "loss": 0.9236, "step": 27390 }, { "epoch": 2.52, "learning_rate": 3.740461524317367e-05, "loss": 0.9317, "step": 27400 }, { "epoch": 2.52, "learning_rate": 3.7400018387423e-05, "loss": 0.9766, "step": 27410 }, { "epoch": 2.52, "learning_rate": 3.739542153167234e-05, "loss": 0.8304, "step": 27420 }, { "epoch": 2.52, "learning_rate": 3.7390824675921674e-05, "loss": 0.9154, "step": 27430 }, { "epoch": 2.52, "learning_rate": 3.7386227820171003e-05, "loss": 0.8369, "step": 27440 }, { "epoch": 2.52, "learning_rate": 3.738163096442034e-05, "loss": 0.8982, "step": 27450 }, { "epoch": 2.52, "learning_rate": 3.737703410866967e-05, "loss": 0.9155, "step": 27460 }, { "epoch": 2.53, "learning_rate": 3.7372437252919006e-05, "loss": 0.7979, "step": 27470 }, { "epoch": 2.53, "learning_rate": 3.736784039716834e-05, "loss": 0.7933, "step": 27480 }, { "epoch": 2.53, "learning_rate": 3.736324354141767e-05, "loss": 0.924, "step": 27490 }, { "epoch": 2.53, "learning_rate": 3.7358646685667e-05, "loss": 0.9322, "step": 27500 }, { "epoch": 2.53, "learning_rate": 3.735404982991634e-05, "loss": 0.9128, "step": 27510 }, { "epoch": 2.53, "learning_rate": 3.7349452974165675e-05, "loss": 0.8975, "step": 27520 }, { "epoch": 2.53, "learning_rate": 3.7344856118415005e-05, "loss": 0.9852, "step": 27530 }, { "epoch": 2.53, "learning_rate": 3.734025926266434e-05, "loss": 0.9012, "step": 27540 }, { "epoch": 2.53, "learning_rate": 3.733566240691367e-05, "loss": 0.8169, "step": 27550 }, { "epoch": 2.53, "learning_rate": 3.733106555116301e-05, "loss": 0.9136, "step": 27560 }, { "epoch": 2.53, "learning_rate": 3.7326468695412344e-05, "loss": 0.8999, "step": 27570 }, { "epoch": 2.54, "learning_rate": 3.7321871839661673e-05, "loss": 0.9199, "step": 27580 }, { "epoch": 2.54, "learning_rate": 3.7317274983911e-05, "loss": 0.9763, "step": 27590 }, { "epoch": 2.54, "learning_rate": 3.731267812816034e-05, "loss": 0.9225, "step": 27600 }, { "epoch": 2.54, "learning_rate": 3.7308081272409676e-05, "loss": 0.8641, "step": 27610 }, { "epoch": 2.54, "learning_rate": 3.7303484416659006e-05, "loss": 0.9877, "step": 27620 }, { "epoch": 2.54, "learning_rate": 3.729888756090834e-05, "loss": 1.0469, "step": 27630 }, { "epoch": 2.54, "learning_rate": 3.729429070515767e-05, "loss": 0.9175, "step": 27640 }, { "epoch": 2.54, "learning_rate": 3.728969384940701e-05, "loss": 0.8855, "step": 27650 }, { "epoch": 2.54, "learning_rate": 3.7285096993656345e-05, "loss": 0.9285, "step": 27660 }, { "epoch": 2.54, "learning_rate": 3.7280500137905675e-05, "loss": 0.9944, "step": 27670 }, { "epoch": 2.54, "learning_rate": 3.7275903282155004e-05, "loss": 0.9489, "step": 27680 }, { "epoch": 2.55, "learning_rate": 3.727130642640434e-05, "loss": 0.94, "step": 27690 }, { "epoch": 2.55, "learning_rate": 3.726670957065368e-05, "loss": 0.8482, "step": 27700 }, { "epoch": 2.55, "learning_rate": 3.726211271490301e-05, "loss": 0.8601, "step": 27710 }, { "epoch": 2.55, "learning_rate": 3.7257515859152343e-05, "loss": 0.8617, "step": 27720 }, { "epoch": 2.55, "learning_rate": 3.725291900340167e-05, "loss": 1.0493, "step": 27730 }, { "epoch": 2.55, "learning_rate": 3.724832214765101e-05, "loss": 0.9584, "step": 27740 }, { "epoch": 2.55, "learning_rate": 3.7243725291900346e-05, "loss": 0.8862, "step": 27750 }, { "epoch": 2.55, "learning_rate": 3.7239128436149676e-05, "loss": 0.756, "step": 27760 }, { "epoch": 2.55, "learning_rate": 3.7234531580399006e-05, "loss": 0.8479, "step": 27770 }, { "epoch": 2.55, "learning_rate": 3.722993472464834e-05, "loss": 0.8608, "step": 27780 }, { "epoch": 2.55, "learning_rate": 3.722533786889767e-05, "loss": 0.796, "step": 27790 }, { "epoch": 2.56, "learning_rate": 3.722074101314701e-05, "loss": 0.9791, "step": 27800 }, { "epoch": 2.56, "learning_rate": 3.7216144157396345e-05, "loss": 0.9124, "step": 27810 }, { "epoch": 2.56, "learning_rate": 3.7211547301645674e-05, "loss": 0.9666, "step": 27820 }, { "epoch": 2.56, "learning_rate": 3.720695044589501e-05, "loss": 0.8415, "step": 27830 }, { "epoch": 2.56, "learning_rate": 3.720235359014435e-05, "loss": 0.8687, "step": 27840 }, { "epoch": 2.56, "learning_rate": 3.719775673439368e-05, "loss": 0.82, "step": 27850 }, { "epoch": 2.56, "learning_rate": 3.719315987864301e-05, "loss": 0.9407, "step": 27860 }, { "epoch": 2.56, "learning_rate": 3.718856302289234e-05, "loss": 0.9482, "step": 27870 }, { "epoch": 2.56, "learning_rate": 3.718396616714167e-05, "loss": 0.8114, "step": 27880 }, { "epoch": 2.56, "learning_rate": 3.717936931139101e-05, "loss": 0.9051, "step": 27890 }, { "epoch": 2.57, "learning_rate": 3.7174772455640346e-05, "loss": 0.8474, "step": 27900 }, { "epoch": 2.57, "learning_rate": 3.7170175599889676e-05, "loss": 0.8228, "step": 27910 }, { "epoch": 2.57, "learning_rate": 3.716557874413901e-05, "loss": 0.9865, "step": 27920 }, { "epoch": 2.57, "learning_rate": 3.716098188838835e-05, "loss": 0.8705, "step": 27930 }, { "epoch": 2.57, "learning_rate": 3.715638503263768e-05, "loss": 0.8464, "step": 27940 }, { "epoch": 2.57, "learning_rate": 3.715178817688701e-05, "loss": 1.0012, "step": 27950 }, { "epoch": 2.57, "learning_rate": 3.7147191321136344e-05, "loss": 0.9874, "step": 27960 }, { "epoch": 2.57, "learning_rate": 3.7142594465385674e-05, "loss": 0.8754, "step": 27970 }, { "epoch": 2.57, "learning_rate": 3.713799760963501e-05, "loss": 0.9317, "step": 27980 }, { "epoch": 2.57, "learning_rate": 3.713340075388435e-05, "loss": 0.8193, "step": 27990 }, { "epoch": 2.57, "learning_rate": 3.712880389813368e-05, "loss": 0.9799, "step": 28000 }, { "epoch": 2.58, "learning_rate": 3.712420704238301e-05, "loss": 0.8785, "step": 28010 }, { "epoch": 2.58, "learning_rate": 3.711961018663235e-05, "loss": 0.9348, "step": 28020 }, { "epoch": 2.58, "learning_rate": 3.711501333088168e-05, "loss": 0.8947, "step": 28030 }, { "epoch": 2.58, "learning_rate": 3.711041647513101e-05, "loss": 0.918, "step": 28040 }, { "epoch": 2.58, "learning_rate": 3.7105819619380346e-05, "loss": 0.877, "step": 28050 }, { "epoch": 2.58, "learning_rate": 3.7101222763629675e-05, "loss": 0.9391, "step": 28060 }, { "epoch": 2.58, "learning_rate": 3.709662590787901e-05, "loss": 0.8204, "step": 28070 }, { "epoch": 2.58, "learning_rate": 3.709202905212835e-05, "loss": 0.8664, "step": 28080 }, { "epoch": 2.58, "learning_rate": 3.708743219637768e-05, "loss": 0.8922, "step": 28090 }, { "epoch": 2.58, "learning_rate": 3.7082835340627014e-05, "loss": 0.9101, "step": 28100 }, { "epoch": 2.58, "learning_rate": 3.707823848487635e-05, "loss": 0.8769, "step": 28110 }, { "epoch": 2.59, "learning_rate": 3.7073641629125674e-05, "loss": 0.8301, "step": 28120 }, { "epoch": 2.59, "learning_rate": 3.706904477337501e-05, "loss": 0.8498, "step": 28130 }, { "epoch": 2.59, "learning_rate": 3.706444791762435e-05, "loss": 0.8452, "step": 28140 }, { "epoch": 2.59, "learning_rate": 3.7059851061873676e-05, "loss": 0.9467, "step": 28150 }, { "epoch": 2.59, "learning_rate": 3.705525420612301e-05, "loss": 0.9138, "step": 28160 }, { "epoch": 2.59, "learning_rate": 3.705065735037235e-05, "loss": 0.8948, "step": 28170 }, { "epoch": 2.59, "learning_rate": 3.704606049462168e-05, "loss": 0.9797, "step": 28180 }, { "epoch": 2.59, "learning_rate": 3.7041463638871016e-05, "loss": 0.8201, "step": 28190 }, { "epoch": 2.59, "learning_rate": 3.703686678312035e-05, "loss": 0.8825, "step": 28200 }, { "epoch": 2.59, "learning_rate": 3.7032269927369675e-05, "loss": 0.9036, "step": 28210 }, { "epoch": 2.59, "learning_rate": 3.702767307161901e-05, "loss": 0.8402, "step": 28220 }, { "epoch": 2.6, "learning_rate": 3.702307621586835e-05, "loss": 0.9476, "step": 28230 }, { "epoch": 2.6, "learning_rate": 3.701847936011768e-05, "loss": 0.9042, "step": 28240 }, { "epoch": 2.6, "learning_rate": 3.7013882504367014e-05, "loss": 0.8665, "step": 28250 }, { "epoch": 2.6, "learning_rate": 3.700928564861635e-05, "loss": 0.886, "step": 28260 }, { "epoch": 2.6, "learning_rate": 3.700468879286568e-05, "loss": 0.8623, "step": 28270 }, { "epoch": 2.6, "learning_rate": 3.700009193711502e-05, "loss": 0.911, "step": 28280 }, { "epoch": 2.6, "learning_rate": 3.699549508136435e-05, "loss": 0.8947, "step": 28290 }, { "epoch": 2.6, "learning_rate": 3.6990898225613676e-05, "loss": 0.9376, "step": 28300 }, { "epoch": 2.6, "learning_rate": 3.698630136986301e-05, "loss": 0.9099, "step": 28310 }, { "epoch": 2.6, "learning_rate": 3.698170451411235e-05, "loss": 0.9286, "step": 28320 }, { "epoch": 2.6, "learning_rate": 3.697710765836168e-05, "loss": 0.9686, "step": 28330 }, { "epoch": 2.61, "learning_rate": 3.6972510802611015e-05, "loss": 0.9859, "step": 28340 }, { "epoch": 2.61, "learning_rate": 3.696791394686035e-05, "loss": 0.7586, "step": 28350 }, { "epoch": 2.61, "learning_rate": 3.696331709110968e-05, "loss": 0.9201, "step": 28360 }, { "epoch": 2.61, "learning_rate": 3.695872023535902e-05, "loss": 0.9343, "step": 28370 }, { "epoch": 2.61, "learning_rate": 3.6954123379608354e-05, "loss": 0.926, "step": 28380 }, { "epoch": 2.61, "learning_rate": 3.694952652385768e-05, "loss": 0.9042, "step": 28390 }, { "epoch": 2.61, "learning_rate": 3.6944929668107014e-05, "loss": 0.7915, "step": 28400 }, { "epoch": 2.61, "learning_rate": 3.694033281235635e-05, "loss": 0.8241, "step": 28410 }, { "epoch": 2.61, "learning_rate": 3.693573595660568e-05, "loss": 0.8791, "step": 28420 }, { "epoch": 2.61, "learning_rate": 3.6931139100855017e-05, "loss": 0.9003, "step": 28430 }, { "epoch": 2.61, "learning_rate": 3.692654224510435e-05, "loss": 0.9086, "step": 28440 }, { "epoch": 2.62, "learning_rate": 3.692194538935368e-05, "loss": 0.8384, "step": 28450 }, { "epoch": 2.62, "learning_rate": 3.691734853360302e-05, "loss": 0.8135, "step": 28460 }, { "epoch": 2.62, "learning_rate": 3.6912751677852356e-05, "loss": 0.8757, "step": 28470 }, { "epoch": 2.62, "learning_rate": 3.690815482210168e-05, "loss": 0.9036, "step": 28480 }, { "epoch": 2.62, "learning_rate": 3.6903557966351015e-05, "loss": 0.8644, "step": 28490 }, { "epoch": 2.62, "learning_rate": 3.689896111060035e-05, "loss": 0.826, "step": 28500 }, { "epoch": 2.62, "learning_rate": 3.689436425484968e-05, "loss": 0.8792, "step": 28510 }, { "epoch": 2.62, "learning_rate": 3.688976739909902e-05, "loss": 0.9287, "step": 28520 }, { "epoch": 2.62, "learning_rate": 3.6885170543348354e-05, "loss": 0.8579, "step": 28530 }, { "epoch": 2.62, "learning_rate": 3.6880573687597684e-05, "loss": 0.8401, "step": 28540 }, { "epoch": 2.62, "learning_rate": 3.687597683184702e-05, "loss": 0.915, "step": 28550 }, { "epoch": 2.63, "learning_rate": 3.687137997609636e-05, "loss": 1.0519, "step": 28560 }, { "epoch": 2.63, "learning_rate": 3.686678312034568e-05, "loss": 0.9501, "step": 28570 }, { "epoch": 2.63, "learning_rate": 3.6862186264595016e-05, "loss": 0.838, "step": 28580 }, { "epoch": 2.63, "learning_rate": 3.685758940884435e-05, "loss": 0.9672, "step": 28590 }, { "epoch": 2.63, "learning_rate": 3.685299255309368e-05, "loss": 0.9126, "step": 28600 }, { "epoch": 2.63, "learning_rate": 3.684839569734302e-05, "loss": 0.8578, "step": 28610 }, { "epoch": 2.63, "learning_rate": 3.6843798841592355e-05, "loss": 0.9925, "step": 28620 }, { "epoch": 2.63, "learning_rate": 3.6839201985841685e-05, "loss": 0.9203, "step": 28630 }, { "epoch": 2.63, "learning_rate": 3.683460513009102e-05, "loss": 0.8203, "step": 28640 }, { "epoch": 2.63, "learning_rate": 3.683000827434036e-05, "loss": 0.7955, "step": 28650 }, { "epoch": 2.63, "learning_rate": 3.682541141858968e-05, "loss": 0.9978, "step": 28660 }, { "epoch": 2.64, "learning_rate": 3.682081456283902e-05, "loss": 0.8145, "step": 28670 }, { "epoch": 2.64, "learning_rate": 3.6816217707088354e-05, "loss": 1.0306, "step": 28680 }, { "epoch": 2.64, "learning_rate": 3.6811620851337684e-05, "loss": 1.0419, "step": 28690 }, { "epoch": 2.64, "learning_rate": 3.680702399558702e-05, "loss": 0.8373, "step": 28700 }, { "epoch": 2.64, "learning_rate": 3.6802427139836357e-05, "loss": 0.941, "step": 28710 }, { "epoch": 2.64, "learning_rate": 3.6797830284085686e-05, "loss": 0.8074, "step": 28720 }, { "epoch": 2.64, "learning_rate": 3.679323342833502e-05, "loss": 0.8985, "step": 28730 }, { "epoch": 2.64, "learning_rate": 3.678863657258436e-05, "loss": 0.9232, "step": 28740 }, { "epoch": 2.64, "learning_rate": 3.678403971683368e-05, "loss": 0.8829, "step": 28750 }, { "epoch": 2.64, "learning_rate": 3.677944286108302e-05, "loss": 0.9296, "step": 28760 }, { "epoch": 2.65, "learning_rate": 3.6774846005332355e-05, "loss": 0.8418, "step": 28770 }, { "epoch": 2.65, "learning_rate": 3.6770249149581685e-05, "loss": 0.9044, "step": 28780 }, { "epoch": 2.65, "learning_rate": 3.676565229383102e-05, "loss": 1.0593, "step": 28790 }, { "epoch": 2.65, "learning_rate": 3.676105543808036e-05, "loss": 0.8866, "step": 28800 }, { "epoch": 2.65, "learning_rate": 3.675645858232969e-05, "loss": 0.9428, "step": 28810 }, { "epoch": 2.65, "learning_rate": 3.6751861726579024e-05, "loss": 0.8657, "step": 28820 }, { "epoch": 2.65, "learning_rate": 3.674726487082836e-05, "loss": 0.889, "step": 28830 }, { "epoch": 2.65, "learning_rate": 3.674266801507768e-05, "loss": 0.9275, "step": 28840 }, { "epoch": 2.65, "learning_rate": 3.673807115932702e-05, "loss": 0.8931, "step": 28850 }, { "epoch": 2.65, "learning_rate": 3.6733474303576356e-05, "loss": 0.9639, "step": 28860 }, { "epoch": 2.65, "learning_rate": 3.6728877447825686e-05, "loss": 0.9047, "step": 28870 }, { "epoch": 2.66, "learning_rate": 3.672428059207502e-05, "loss": 0.9317, "step": 28880 }, { "epoch": 2.66, "learning_rate": 3.671968373632436e-05, "loss": 0.8627, "step": 28890 }, { "epoch": 2.66, "learning_rate": 3.671508688057369e-05, "loss": 0.8862, "step": 28900 }, { "epoch": 2.66, "learning_rate": 3.6710490024823025e-05, "loss": 0.7778, "step": 28910 }, { "epoch": 2.66, "learning_rate": 3.670589316907236e-05, "loss": 0.8307, "step": 28920 }, { "epoch": 2.66, "learning_rate": 3.670129631332169e-05, "loss": 0.8206, "step": 28930 }, { "epoch": 2.66, "learning_rate": 3.669669945757102e-05, "loss": 0.8096, "step": 28940 }, { "epoch": 2.66, "learning_rate": 3.669210260182036e-05, "loss": 0.8577, "step": 28950 }, { "epoch": 2.66, "learning_rate": 3.668750574606969e-05, "loss": 0.9241, "step": 28960 }, { "epoch": 2.66, "learning_rate": 3.6682908890319024e-05, "loss": 0.8435, "step": 28970 }, { "epoch": 2.66, "learning_rate": 3.667831203456836e-05, "loss": 0.9464, "step": 28980 }, { "epoch": 2.67, "learning_rate": 3.667371517881769e-05, "loss": 0.9871, "step": 28990 }, { "epoch": 2.67, "learning_rate": 3.6669118323067026e-05, "loss": 0.9132, "step": 29000 }, { "epoch": 2.67, "learning_rate": 3.666452146731636e-05, "loss": 0.8767, "step": 29010 }, { "epoch": 2.67, "learning_rate": 3.665992461156569e-05, "loss": 0.937, "step": 29020 }, { "epoch": 2.67, "learning_rate": 3.665532775581502e-05, "loss": 0.7686, "step": 29030 }, { "epoch": 2.67, "learning_rate": 3.665073090006436e-05, "loss": 0.8474, "step": 29040 }, { "epoch": 2.67, "learning_rate": 3.664613404431369e-05, "loss": 0.9139, "step": 29050 }, { "epoch": 2.67, "learning_rate": 3.6641537188563025e-05, "loss": 0.8479, "step": 29060 }, { "epoch": 2.67, "learning_rate": 3.663694033281236e-05, "loss": 0.9096, "step": 29070 }, { "epoch": 2.67, "learning_rate": 3.663234347706169e-05, "loss": 0.8688, "step": 29080 }, { "epoch": 2.67, "learning_rate": 3.662774662131103e-05, "loss": 1.0149, "step": 29090 }, { "epoch": 2.68, "learning_rate": 3.662314976556036e-05, "loss": 0.8913, "step": 29100 }, { "epoch": 2.68, "learning_rate": 3.6618552909809694e-05, "loss": 0.8449, "step": 29110 }, { "epoch": 2.68, "learning_rate": 3.6613956054059023e-05, "loss": 0.8414, "step": 29120 }, { "epoch": 2.68, "learning_rate": 3.660935919830836e-05, "loss": 0.9431, "step": 29130 }, { "epoch": 2.68, "learning_rate": 3.660476234255769e-05, "loss": 0.9583, "step": 29140 }, { "epoch": 2.68, "learning_rate": 3.6600165486807026e-05, "loss": 0.8655, "step": 29150 }, { "epoch": 2.68, "learning_rate": 3.659556863105636e-05, "loss": 0.9735, "step": 29160 }, { "epoch": 2.68, "learning_rate": 3.659097177530569e-05, "loss": 0.9008, "step": 29170 }, { "epoch": 2.68, "learning_rate": 3.658637491955503e-05, "loss": 0.8733, "step": 29180 }, { "epoch": 2.68, "learning_rate": 3.658177806380436e-05, "loss": 0.849, "step": 29190 }, { "epoch": 2.68, "learning_rate": 3.6577181208053695e-05, "loss": 0.8625, "step": 29200 }, { "epoch": 2.69, "learning_rate": 3.6572584352303025e-05, "loss": 0.8986, "step": 29210 }, { "epoch": 2.69, "learning_rate": 3.656798749655236e-05, "loss": 1.0063, "step": 29220 }, { "epoch": 2.69, "learning_rate": 3.656339064080169e-05, "loss": 0.8599, "step": 29230 }, { "epoch": 2.69, "learning_rate": 3.655879378505103e-05, "loss": 0.8722, "step": 29240 }, { "epoch": 2.69, "learning_rate": 3.6554196929300364e-05, "loss": 0.8299, "step": 29250 }, { "epoch": 2.69, "learning_rate": 3.6549600073549693e-05, "loss": 0.7848, "step": 29260 }, { "epoch": 2.69, "learning_rate": 3.654500321779903e-05, "loss": 0.9549, "step": 29270 }, { "epoch": 2.69, "learning_rate": 3.654040636204836e-05, "loss": 0.7804, "step": 29280 }, { "epoch": 2.69, "learning_rate": 3.6535809506297696e-05, "loss": 0.9035, "step": 29290 }, { "epoch": 2.69, "learning_rate": 3.6531212650547026e-05, "loss": 0.7337, "step": 29300 }, { "epoch": 2.69, "learning_rate": 3.652661579479636e-05, "loss": 0.8762, "step": 29310 }, { "epoch": 2.7, "learning_rate": 3.652201893904569e-05, "loss": 0.8762, "step": 29320 }, { "epoch": 2.7, "learning_rate": 3.651742208329503e-05, "loss": 0.8018, "step": 29330 }, { "epoch": 2.7, "learning_rate": 3.6512825227544365e-05, "loss": 0.8937, "step": 29340 }, { "epoch": 2.7, "learning_rate": 3.6508228371793695e-05, "loss": 0.8185, "step": 29350 }, { "epoch": 2.7, "learning_rate": 3.650363151604303e-05, "loss": 0.9088, "step": 29360 }, { "epoch": 2.7, "learning_rate": 3.649903466029236e-05, "loss": 1.0045, "step": 29370 }, { "epoch": 2.7, "learning_rate": 3.64944378045417e-05, "loss": 0.9125, "step": 29380 }, { "epoch": 2.7, "learning_rate": 3.648984094879103e-05, "loss": 0.8722, "step": 29390 }, { "epoch": 2.7, "learning_rate": 3.6485244093040364e-05, "loss": 0.9845, "step": 29400 }, { "epoch": 2.7, "learning_rate": 3.648064723728969e-05, "loss": 0.8878, "step": 29410 }, { "epoch": 2.7, "learning_rate": 3.647605038153903e-05, "loss": 0.9384, "step": 29420 }, { "epoch": 2.71, "learning_rate": 3.647145352578836e-05, "loss": 0.9229, "step": 29430 }, { "epoch": 2.71, "learning_rate": 3.6466856670037696e-05, "loss": 0.8151, "step": 29440 }, { "epoch": 2.71, "learning_rate": 3.646225981428703e-05, "loss": 0.7212, "step": 29450 }, { "epoch": 2.71, "learning_rate": 3.645766295853636e-05, "loss": 0.8427, "step": 29460 }, { "epoch": 2.71, "learning_rate": 3.64530661027857e-05, "loss": 0.8722, "step": 29470 }, { "epoch": 2.71, "learning_rate": 3.644846924703503e-05, "loss": 1.0086, "step": 29480 }, { "epoch": 2.71, "learning_rate": 3.6443872391284365e-05, "loss": 0.7967, "step": 29490 }, { "epoch": 2.71, "learning_rate": 3.6439275535533694e-05, "loss": 1.0429, "step": 29500 }, { "epoch": 2.71, "learning_rate": 3.643467867978303e-05, "loss": 0.918, "step": 29510 }, { "epoch": 2.71, "learning_rate": 3.643008182403236e-05, "loss": 0.8492, "step": 29520 }, { "epoch": 2.71, "learning_rate": 3.64254849682817e-05, "loss": 0.9702, "step": 29530 }, { "epoch": 2.72, "learning_rate": 3.6420888112531034e-05, "loss": 0.7587, "step": 29540 }, { "epoch": 2.72, "learning_rate": 3.641629125678036e-05, "loss": 0.8733, "step": 29550 }, { "epoch": 2.72, "learning_rate": 3.64116944010297e-05, "loss": 0.9371, "step": 29560 }, { "epoch": 2.72, "learning_rate": 3.640709754527903e-05, "loss": 0.7424, "step": 29570 }, { "epoch": 2.72, "learning_rate": 3.6402500689528366e-05, "loss": 0.8093, "step": 29580 }, { "epoch": 2.72, "learning_rate": 3.6397903833777696e-05, "loss": 0.8467, "step": 29590 }, { "epoch": 2.72, "learning_rate": 3.639330697802703e-05, "loss": 0.9544, "step": 29600 }, { "epoch": 2.72, "learning_rate": 3.638871012227636e-05, "loss": 0.8509, "step": 29610 }, { "epoch": 2.72, "learning_rate": 3.63841132665257e-05, "loss": 1.1141, "step": 29620 }, { "epoch": 2.72, "learning_rate": 3.6379516410775035e-05, "loss": 0.7867, "step": 29630 }, { "epoch": 2.73, "learning_rate": 3.6374919555024364e-05, "loss": 0.8139, "step": 29640 }, { "epoch": 2.73, "learning_rate": 3.63703226992737e-05, "loss": 0.9693, "step": 29650 }, { "epoch": 2.73, "learning_rate": 3.636572584352303e-05, "loss": 0.8542, "step": 29660 }, { "epoch": 2.73, "learning_rate": 3.636112898777237e-05, "loss": 0.9574, "step": 29670 }, { "epoch": 2.73, "learning_rate": 3.63565321320217e-05, "loss": 0.8562, "step": 29680 }, { "epoch": 2.73, "learning_rate": 3.635193527627103e-05, "loss": 0.931, "step": 29690 }, { "epoch": 2.73, "learning_rate": 3.634733842052036e-05, "loss": 0.7428, "step": 29700 }, { "epoch": 2.73, "learning_rate": 3.63427415647697e-05, "loss": 0.8784, "step": 29710 }, { "epoch": 2.73, "learning_rate": 3.6338144709019036e-05, "loss": 0.7163, "step": 29720 }, { "epoch": 2.73, "learning_rate": 3.6333547853268366e-05, "loss": 0.9485, "step": 29730 }, { "epoch": 2.73, "learning_rate": 3.63289509975177e-05, "loss": 0.8657, "step": 29740 }, { "epoch": 2.74, "learning_rate": 3.632435414176703e-05, "loss": 1.0289, "step": 29750 }, { "epoch": 2.74, "learning_rate": 3.631975728601636e-05, "loss": 0.9391, "step": 29760 }, { "epoch": 2.74, "learning_rate": 3.63151604302657e-05, "loss": 0.876, "step": 29770 }, { "epoch": 2.74, "learning_rate": 3.6310563574515034e-05, "loss": 0.9762, "step": 29780 }, { "epoch": 2.74, "learning_rate": 3.6305966718764364e-05, "loss": 0.8632, "step": 29790 }, { "epoch": 2.74, "learning_rate": 3.63013698630137e-05, "loss": 0.8713, "step": 29800 }, { "epoch": 2.74, "learning_rate": 3.629677300726304e-05, "loss": 0.9934, "step": 29810 }, { "epoch": 2.74, "learning_rate": 3.629217615151237e-05, "loss": 0.9611, "step": 29820 }, { "epoch": 2.74, "learning_rate": 3.62875792957617e-05, "loss": 0.7934, "step": 29830 }, { "epoch": 2.74, "learning_rate": 3.628298244001103e-05, "loss": 0.9022, "step": 29840 }, { "epoch": 2.74, "learning_rate": 3.627838558426036e-05, "loss": 0.8645, "step": 29850 }, { "epoch": 2.75, "learning_rate": 3.62737887285097e-05, "loss": 0.934, "step": 29860 }, { "epoch": 2.75, "learning_rate": 3.6269191872759036e-05, "loss": 0.9354, "step": 29870 }, { "epoch": 2.75, "learning_rate": 3.6264595017008365e-05, "loss": 1.0926, "step": 29880 }, { "epoch": 2.75, "learning_rate": 3.62599981612577e-05, "loss": 0.9418, "step": 29890 }, { "epoch": 2.75, "learning_rate": 3.625540130550704e-05, "loss": 0.8486, "step": 29900 }, { "epoch": 2.75, "learning_rate": 3.625080444975637e-05, "loss": 0.7942, "step": 29910 }, { "epoch": 2.75, "learning_rate": 3.6246207594005705e-05, "loss": 0.9272, "step": 29920 }, { "epoch": 2.75, "learning_rate": 3.6241610738255034e-05, "loss": 0.9225, "step": 29930 }, { "epoch": 2.75, "learning_rate": 3.6237013882504364e-05, "loss": 0.8867, "step": 29940 }, { "epoch": 2.75, "learning_rate": 3.62324170267537e-05, "loss": 0.8152, "step": 29950 }, { "epoch": 2.75, "learning_rate": 3.622782017100304e-05, "loss": 0.9313, "step": 29960 }, { "epoch": 2.76, "learning_rate": 3.6223223315252367e-05, "loss": 0.88, "step": 29970 }, { "epoch": 2.76, "learning_rate": 3.62186264595017e-05, "loss": 0.9225, "step": 29980 }, { "epoch": 2.76, "learning_rate": 3.621402960375104e-05, "loss": 0.9339, "step": 29990 }, { "epoch": 2.76, "learning_rate": 3.620943274800037e-05, "loss": 0.9095, "step": 30000 }, { "epoch": 2.76, "eval_accuracy": 0.5524017467248908, "eval_loss": 0.9138294458389282, "eval_runtime": 159.9782, "eval_samples_per_second": 28.629, "eval_steps_per_second": 3.582, "step": 30000 }, { "epoch": 2.76, "learning_rate": 3.6204835892249706e-05, "loss": 0.9047, "step": 30010 }, { "epoch": 2.76, "learning_rate": 3.6200239036499035e-05, "loss": 1.0454, "step": 30020 }, { "epoch": 2.76, "learning_rate": 3.6195642180748365e-05, "loss": 0.9781, "step": 30030 }, { "epoch": 2.76, "learning_rate": 3.61910453249977e-05, "loss": 0.9568, "step": 30040 }, { "epoch": 2.76, "learning_rate": 3.618644846924704e-05, "loss": 1.0032, "step": 30050 }, { "epoch": 2.76, "learning_rate": 3.618185161349637e-05, "loss": 0.7939, "step": 30060 }, { "epoch": 2.76, "learning_rate": 3.6177254757745704e-05, "loss": 0.9115, "step": 30070 }, { "epoch": 2.77, "learning_rate": 3.617265790199504e-05, "loss": 1.0297, "step": 30080 }, { "epoch": 2.77, "learning_rate": 3.616806104624437e-05, "loss": 0.9112, "step": 30090 }, { "epoch": 2.77, "learning_rate": 3.616346419049371e-05, "loss": 0.8132, "step": 30100 }, { "epoch": 2.77, "learning_rate": 3.6158867334743037e-05, "loss": 0.8641, "step": 30110 }, { "epoch": 2.77, "learning_rate": 3.6154270478992366e-05, "loss": 0.9127, "step": 30120 }, { "epoch": 2.77, "learning_rate": 3.61496736232417e-05, "loss": 0.8912, "step": 30130 }, { "epoch": 2.77, "learning_rate": 3.614507676749104e-05, "loss": 0.792, "step": 30140 }, { "epoch": 2.77, "learning_rate": 3.614047991174037e-05, "loss": 0.9145, "step": 30150 }, { "epoch": 2.77, "learning_rate": 3.6135883055989705e-05, "loss": 0.9977, "step": 30160 }, { "epoch": 2.77, "learning_rate": 3.613128620023904e-05, "loss": 0.9927, "step": 30170 }, { "epoch": 2.77, "learning_rate": 3.612668934448837e-05, "loss": 0.8512, "step": 30180 }, { "epoch": 2.78, "learning_rate": 3.612209248873771e-05, "loss": 0.8856, "step": 30190 }, { "epoch": 2.78, "learning_rate": 3.611749563298704e-05, "loss": 0.9467, "step": 30200 }, { "epoch": 2.78, "learning_rate": 3.611289877723637e-05, "loss": 0.8253, "step": 30210 }, { "epoch": 2.78, "learning_rate": 3.6108301921485704e-05, "loss": 0.7917, "step": 30220 }, { "epoch": 2.78, "learning_rate": 3.610370506573504e-05, "loss": 0.9191, "step": 30230 }, { "epoch": 2.78, "learning_rate": 3.609910820998437e-05, "loss": 0.8509, "step": 30240 }, { "epoch": 2.78, "learning_rate": 3.609451135423371e-05, "loss": 0.8336, "step": 30250 }, { "epoch": 2.78, "learning_rate": 3.608991449848304e-05, "loss": 0.8611, "step": 30260 }, { "epoch": 2.78, "learning_rate": 3.608531764273237e-05, "loss": 1.0097, "step": 30270 }, { "epoch": 2.78, "learning_rate": 3.608072078698171e-05, "loss": 0.9097, "step": 30280 }, { "epoch": 2.78, "learning_rate": 3.607612393123104e-05, "loss": 0.8024, "step": 30290 }, { "epoch": 2.79, "learning_rate": 3.607152707548037e-05, "loss": 0.8922, "step": 30300 }, { "epoch": 2.79, "learning_rate": 3.6066930219729705e-05, "loss": 0.81, "step": 30310 }, { "epoch": 2.79, "learning_rate": 3.606233336397904e-05, "loss": 0.9186, "step": 30320 }, { "epoch": 2.79, "learning_rate": 3.605773650822837e-05, "loss": 0.946, "step": 30330 }, { "epoch": 2.79, "learning_rate": 3.605313965247771e-05, "loss": 0.8909, "step": 30340 }, { "epoch": 2.79, "learning_rate": 3.6048542796727044e-05, "loss": 0.84, "step": 30350 }, { "epoch": 2.79, "learning_rate": 3.6043945940976374e-05, "loss": 0.9662, "step": 30360 }, { "epoch": 2.79, "learning_rate": 3.603934908522571e-05, "loss": 0.9178, "step": 30370 }, { "epoch": 2.79, "learning_rate": 3.603475222947504e-05, "loss": 0.9426, "step": 30380 }, { "epoch": 2.79, "learning_rate": 3.603015537372437e-05, "loss": 0.8586, "step": 30390 }, { "epoch": 2.79, "learning_rate": 3.6025558517973706e-05, "loss": 0.9276, "step": 30400 }, { "epoch": 2.8, "learning_rate": 3.602096166222304e-05, "loss": 0.9734, "step": 30410 }, { "epoch": 2.8, "learning_rate": 3.601636480647237e-05, "loss": 0.9207, "step": 30420 }, { "epoch": 2.8, "learning_rate": 3.601176795072171e-05, "loss": 0.9561, "step": 30430 }, { "epoch": 2.8, "learning_rate": 3.6007171094971045e-05, "loss": 0.8937, "step": 30440 }, { "epoch": 2.8, "learning_rate": 3.6002574239220375e-05, "loss": 0.8601, "step": 30450 }, { "epoch": 2.8, "learning_rate": 3.599797738346971e-05, "loss": 0.9284, "step": 30460 }, { "epoch": 2.8, "learning_rate": 3.599338052771904e-05, "loss": 0.8643, "step": 30470 }, { "epoch": 2.8, "learning_rate": 3.598878367196837e-05, "loss": 0.9427, "step": 30480 }, { "epoch": 2.8, "learning_rate": 3.598418681621771e-05, "loss": 0.8803, "step": 30490 }, { "epoch": 2.8, "learning_rate": 3.5979589960467044e-05, "loss": 0.8936, "step": 30500 }, { "epoch": 2.81, "learning_rate": 3.5974993104716374e-05, "loss": 0.9068, "step": 30510 }, { "epoch": 2.81, "learning_rate": 3.597039624896571e-05, "loss": 0.8888, "step": 30520 }, { "epoch": 2.81, "learning_rate": 3.596579939321505e-05, "loss": 0.9055, "step": 30530 }, { "epoch": 2.81, "learning_rate": 3.5961202537464376e-05, "loss": 0.8384, "step": 30540 }, { "epoch": 2.81, "learning_rate": 3.595660568171371e-05, "loss": 0.8522, "step": 30550 }, { "epoch": 2.81, "learning_rate": 3.595200882596304e-05, "loss": 0.9822, "step": 30560 }, { "epoch": 2.81, "learning_rate": 3.594741197021237e-05, "loss": 1.0557, "step": 30570 }, { "epoch": 2.81, "learning_rate": 3.594281511446171e-05, "loss": 0.8559, "step": 30580 }, { "epoch": 2.81, "learning_rate": 3.5938218258711045e-05, "loss": 0.8977, "step": 30590 }, { "epoch": 2.81, "learning_rate": 3.5933621402960375e-05, "loss": 0.9428, "step": 30600 }, { "epoch": 2.81, "learning_rate": 3.592902454720971e-05, "loss": 0.8189, "step": 30610 }, { "epoch": 2.82, "learning_rate": 3.592442769145905e-05, "loss": 0.8973, "step": 30620 }, { "epoch": 2.82, "learning_rate": 3.591983083570838e-05, "loss": 0.8575, "step": 30630 }, { "epoch": 2.82, "learning_rate": 3.5915233979957714e-05, "loss": 0.8597, "step": 30640 }, { "epoch": 2.82, "learning_rate": 3.5910637124207044e-05, "loss": 0.8427, "step": 30650 }, { "epoch": 2.82, "learning_rate": 3.5906040268456373e-05, "loss": 0.8322, "step": 30660 }, { "epoch": 2.82, "learning_rate": 3.590144341270571e-05, "loss": 0.9015, "step": 30670 }, { "epoch": 2.82, "learning_rate": 3.5896846556955046e-05, "loss": 1.1105, "step": 30680 }, { "epoch": 2.82, "learning_rate": 3.5892249701204376e-05, "loss": 0.9852, "step": 30690 }, { "epoch": 2.82, "learning_rate": 3.588765284545371e-05, "loss": 0.9548, "step": 30700 }, { "epoch": 2.82, "learning_rate": 3.588305598970305e-05, "loss": 0.8499, "step": 30710 }, { "epoch": 2.82, "learning_rate": 3.587845913395238e-05, "loss": 1.0323, "step": 30720 }, { "epoch": 2.83, "learning_rate": 3.5873862278201715e-05, "loss": 1.0364, "step": 30730 }, { "epoch": 2.83, "learning_rate": 3.5869265422451045e-05, "loss": 0.9993, "step": 30740 }, { "epoch": 2.83, "learning_rate": 3.5864668566700375e-05, "loss": 0.798, "step": 30750 }, { "epoch": 2.83, "learning_rate": 3.586007171094971e-05, "loss": 0.8833, "step": 30760 }, { "epoch": 2.83, "learning_rate": 3.585547485519905e-05, "loss": 0.9174, "step": 30770 }, { "epoch": 2.83, "learning_rate": 3.585087799944838e-05, "loss": 0.8387, "step": 30780 }, { "epoch": 2.83, "learning_rate": 3.5846281143697714e-05, "loss": 0.9369, "step": 30790 }, { "epoch": 2.83, "learning_rate": 3.584168428794705e-05, "loss": 0.8246, "step": 30800 }, { "epoch": 2.83, "learning_rate": 3.583708743219638e-05, "loss": 0.7546, "step": 30810 }, { "epoch": 2.83, "learning_rate": 3.5832490576445716e-05, "loss": 0.8403, "step": 30820 }, { "epoch": 2.83, "learning_rate": 3.5827893720695046e-05, "loss": 0.8796, "step": 30830 }, { "epoch": 2.84, "learning_rate": 3.5823296864944376e-05, "loss": 0.9445, "step": 30840 }, { "epoch": 2.84, "learning_rate": 3.581870000919371e-05, "loss": 0.8512, "step": 30850 }, { "epoch": 2.84, "learning_rate": 3.581410315344305e-05, "loss": 0.7977, "step": 30860 }, { "epoch": 2.84, "learning_rate": 3.580950629769238e-05, "loss": 0.9442, "step": 30870 }, { "epoch": 2.84, "learning_rate": 3.5804909441941715e-05, "loss": 0.8771, "step": 30880 }, { "epoch": 2.84, "learning_rate": 3.580031258619105e-05, "loss": 0.9272, "step": 30890 }, { "epoch": 2.84, "learning_rate": 3.579571573044038e-05, "loss": 0.8357, "step": 30900 }, { "epoch": 2.84, "learning_rate": 3.579111887468972e-05, "loss": 0.8801, "step": 30910 }, { "epoch": 2.84, "learning_rate": 3.578652201893905e-05, "loss": 0.9564, "step": 30920 }, { "epoch": 2.84, "learning_rate": 3.578192516318838e-05, "loss": 0.9837, "step": 30930 }, { "epoch": 2.84, "learning_rate": 3.5777328307437714e-05, "loss": 0.885, "step": 30940 }, { "epoch": 2.85, "learning_rate": 3.577273145168705e-05, "loss": 0.9861, "step": 30950 }, { "epoch": 2.85, "learning_rate": 3.576813459593638e-05, "loss": 0.7833, "step": 30960 }, { "epoch": 2.85, "learning_rate": 3.5763537740185716e-05, "loss": 0.8507, "step": 30970 }, { "epoch": 2.85, "learning_rate": 3.575894088443505e-05, "loss": 0.8672, "step": 30980 }, { "epoch": 2.85, "learning_rate": 3.575434402868438e-05, "loss": 0.8665, "step": 30990 }, { "epoch": 2.85, "learning_rate": 3.574974717293372e-05, "loss": 0.7605, "step": 31000 }, { "epoch": 2.85, "learning_rate": 3.574515031718305e-05, "loss": 0.9407, "step": 31010 }, { "epoch": 2.85, "learning_rate": 3.574055346143238e-05, "loss": 1.0566, "step": 31020 }, { "epoch": 2.85, "learning_rate": 3.5735956605681715e-05, "loss": 0.8567, "step": 31030 }, { "epoch": 2.85, "learning_rate": 3.573135974993105e-05, "loss": 0.8425, "step": 31040 }, { "epoch": 2.85, "learning_rate": 3.572676289418038e-05, "loss": 0.9141, "step": 31050 }, { "epoch": 2.86, "learning_rate": 3.572216603842972e-05, "loss": 0.9592, "step": 31060 }, { "epoch": 2.86, "learning_rate": 3.571756918267905e-05, "loss": 0.857, "step": 31070 }, { "epoch": 2.86, "learning_rate": 3.5712972326928384e-05, "loss": 0.9337, "step": 31080 }, { "epoch": 2.86, "learning_rate": 3.570837547117772e-05, "loss": 0.8753, "step": 31090 }, { "epoch": 2.86, "learning_rate": 3.570377861542705e-05, "loss": 0.8457, "step": 31100 }, { "epoch": 2.86, "learning_rate": 3.569918175967638e-05, "loss": 0.8675, "step": 31110 }, { "epoch": 2.86, "learning_rate": 3.5694584903925716e-05, "loss": 0.9133, "step": 31120 }, { "epoch": 2.86, "learning_rate": 3.568998804817505e-05, "loss": 0.9223, "step": 31130 }, { "epoch": 2.86, "learning_rate": 3.568539119242438e-05, "loss": 0.917, "step": 31140 }, { "epoch": 2.86, "learning_rate": 3.568079433667372e-05, "loss": 0.9247, "step": 31150 }, { "epoch": 2.86, "learning_rate": 3.567619748092305e-05, "loss": 0.8154, "step": 31160 }, { "epoch": 2.87, "learning_rate": 3.5671600625172385e-05, "loss": 0.8565, "step": 31170 }, { "epoch": 2.87, "learning_rate": 3.566700376942172e-05, "loss": 0.8969, "step": 31180 }, { "epoch": 2.87, "learning_rate": 3.566240691367105e-05, "loss": 1.0131, "step": 31190 }, { "epoch": 2.87, "learning_rate": 3.565781005792038e-05, "loss": 0.8677, "step": 31200 }, { "epoch": 2.87, "learning_rate": 3.565321320216972e-05, "loss": 0.9481, "step": 31210 }, { "epoch": 2.87, "learning_rate": 3.5648616346419054e-05, "loss": 0.898, "step": 31220 }, { "epoch": 2.87, "learning_rate": 3.564401949066838e-05, "loss": 0.8948, "step": 31230 }, { "epoch": 2.87, "learning_rate": 3.563942263491772e-05, "loss": 0.9554, "step": 31240 }, { "epoch": 2.87, "learning_rate": 3.563482577916705e-05, "loss": 0.8506, "step": 31250 }, { "epoch": 2.87, "learning_rate": 3.5630228923416386e-05, "loss": 0.851, "step": 31260 }, { "epoch": 2.87, "learning_rate": 3.562563206766572e-05, "loss": 0.9866, "step": 31270 }, { "epoch": 2.88, "learning_rate": 3.562103521191505e-05, "loss": 0.9141, "step": 31280 }, { "epoch": 2.88, "learning_rate": 3.561643835616438e-05, "loss": 0.8915, "step": 31290 }, { "epoch": 2.88, "learning_rate": 3.561184150041372e-05, "loss": 0.8771, "step": 31300 }, { "epoch": 2.88, "learning_rate": 3.5607244644663055e-05, "loss": 0.8912, "step": 31310 }, { "epoch": 2.88, "learning_rate": 3.5602647788912384e-05, "loss": 0.9788, "step": 31320 }, { "epoch": 2.88, "learning_rate": 3.559805093316172e-05, "loss": 0.9649, "step": 31330 }, { "epoch": 2.88, "learning_rate": 3.559345407741105e-05, "loss": 1.0151, "step": 31340 }, { "epoch": 2.88, "learning_rate": 3.558885722166039e-05, "loss": 0.9154, "step": 31350 }, { "epoch": 2.88, "learning_rate": 3.5584260365909724e-05, "loss": 0.863, "step": 31360 }, { "epoch": 2.88, "learning_rate": 3.557966351015905e-05, "loss": 0.9154, "step": 31370 }, { "epoch": 2.88, "learning_rate": 3.557506665440838e-05, "loss": 0.8416, "step": 31380 }, { "epoch": 2.89, "learning_rate": 3.557046979865772e-05, "loss": 0.8957, "step": 31390 }, { "epoch": 2.89, "learning_rate": 3.556587294290705e-05, "loss": 0.8719, "step": 31400 }, { "epoch": 2.89, "learning_rate": 3.5561276087156386e-05, "loss": 0.88, "step": 31410 }, { "epoch": 2.89, "learning_rate": 3.555667923140572e-05, "loss": 0.9562, "step": 31420 }, { "epoch": 2.89, "learning_rate": 3.555208237565505e-05, "loss": 0.8366, "step": 31430 }, { "epoch": 2.89, "learning_rate": 3.554748551990439e-05, "loss": 0.8943, "step": 31440 }, { "epoch": 2.89, "learning_rate": 3.5542888664153725e-05, "loss": 0.9081, "step": 31450 }, { "epoch": 2.89, "learning_rate": 3.5538291808403055e-05, "loss": 0.8791, "step": 31460 }, { "epoch": 2.89, "learning_rate": 3.5533694952652384e-05, "loss": 0.944, "step": 31470 }, { "epoch": 2.89, "learning_rate": 3.552909809690172e-05, "loss": 0.952, "step": 31480 }, { "epoch": 2.9, "learning_rate": 3.552450124115105e-05, "loss": 0.964, "step": 31490 }, { "epoch": 2.9, "learning_rate": 3.551990438540039e-05, "loss": 1.0657, "step": 31500 }, { "epoch": 2.9, "learning_rate": 3.551530752964972e-05, "loss": 0.8545, "step": 31510 }, { "epoch": 2.9, "learning_rate": 3.551071067389905e-05, "loss": 0.9135, "step": 31520 }, { "epoch": 2.9, "learning_rate": 3.550611381814839e-05, "loss": 0.7681, "step": 31530 }, { "epoch": 2.9, "learning_rate": 3.5501516962397726e-05, "loss": 0.8097, "step": 31540 }, { "epoch": 2.9, "learning_rate": 3.5496920106647056e-05, "loss": 0.7565, "step": 31550 }, { "epoch": 2.9, "learning_rate": 3.5492323250896385e-05, "loss": 0.8217, "step": 31560 }, { "epoch": 2.9, "learning_rate": 3.548772639514572e-05, "loss": 0.9366, "step": 31570 }, { "epoch": 2.9, "learning_rate": 3.548312953939505e-05, "loss": 0.8259, "step": 31580 }, { "epoch": 2.9, "learning_rate": 3.547853268364439e-05, "loss": 0.8972, "step": 31590 }, { "epoch": 2.91, "learning_rate": 3.5473935827893725e-05, "loss": 0.8894, "step": 31600 }, { "epoch": 2.91, "learning_rate": 3.5469338972143054e-05, "loss": 0.8256, "step": 31610 }, { "epoch": 2.91, "learning_rate": 3.546474211639239e-05, "loss": 0.9246, "step": 31620 }, { "epoch": 2.91, "learning_rate": 3.546014526064173e-05, "loss": 0.8964, "step": 31630 }, { "epoch": 2.91, "learning_rate": 3.545554840489106e-05, "loss": 0.8336, "step": 31640 }, { "epoch": 2.91, "learning_rate": 3.545095154914039e-05, "loss": 0.9342, "step": 31650 }, { "epoch": 2.91, "learning_rate": 3.544635469338972e-05, "loss": 0.9682, "step": 31660 }, { "epoch": 2.91, "learning_rate": 3.544175783763905e-05, "loss": 0.9029, "step": 31670 }, { "epoch": 2.91, "learning_rate": 3.543716098188839e-05, "loss": 0.9227, "step": 31680 }, { "epoch": 2.91, "learning_rate": 3.5432564126137726e-05, "loss": 0.7535, "step": 31690 }, { "epoch": 2.91, "learning_rate": 3.5427967270387055e-05, "loss": 0.9752, "step": 31700 }, { "epoch": 2.92, "learning_rate": 3.542337041463639e-05, "loss": 0.9554, "step": 31710 }, { "epoch": 2.92, "learning_rate": 3.541877355888573e-05, "loss": 0.8541, "step": 31720 }, { "epoch": 2.92, "learning_rate": 3.541417670313505e-05, "loss": 0.8784, "step": 31730 }, { "epoch": 2.92, "learning_rate": 3.540957984738439e-05, "loss": 0.9193, "step": 31740 }, { "epoch": 2.92, "learning_rate": 3.5404982991633724e-05, "loss": 0.9018, "step": 31750 }, { "epoch": 2.92, "learning_rate": 3.5400386135883054e-05, "loss": 1.0052, "step": 31760 }, { "epoch": 2.92, "learning_rate": 3.539578928013239e-05, "loss": 0.9537, "step": 31770 }, { "epoch": 2.92, "learning_rate": 3.539119242438173e-05, "loss": 0.8938, "step": 31780 }, { "epoch": 2.92, "learning_rate": 3.538659556863106e-05, "loss": 0.9044, "step": 31790 }, { "epoch": 2.92, "learning_rate": 3.538199871288039e-05, "loss": 0.8573, "step": 31800 }, { "epoch": 2.92, "learning_rate": 3.537740185712973e-05, "loss": 0.8894, "step": 31810 }, { "epoch": 2.93, "learning_rate": 3.537280500137905e-05, "loss": 0.7973, "step": 31820 }, { "epoch": 2.93, "learning_rate": 3.536820814562839e-05, "loss": 0.9955, "step": 31830 }, { "epoch": 2.93, "learning_rate": 3.5363611289877725e-05, "loss": 0.8496, "step": 31840 }, { "epoch": 2.93, "learning_rate": 3.5359014434127055e-05, "loss": 0.8532, "step": 31850 }, { "epoch": 2.93, "learning_rate": 3.535441757837639e-05, "loss": 0.869, "step": 31860 }, { "epoch": 2.93, "learning_rate": 3.534982072262573e-05, "loss": 0.8285, "step": 31870 }, { "epoch": 2.93, "learning_rate": 3.534522386687506e-05, "loss": 0.9447, "step": 31880 }, { "epoch": 2.93, "learning_rate": 3.5340627011124394e-05, "loss": 0.8467, "step": 31890 }, { "epoch": 2.93, "learning_rate": 3.533603015537373e-05, "loss": 0.8278, "step": 31900 }, { "epoch": 2.93, "learning_rate": 3.5331433299623054e-05, "loss": 0.8292, "step": 31910 }, { "epoch": 2.93, "learning_rate": 3.532683644387239e-05, "loss": 0.8126, "step": 31920 }, { "epoch": 2.94, "learning_rate": 3.532223958812173e-05, "loss": 0.9435, "step": 31930 }, { "epoch": 2.94, "learning_rate": 3.5317642732371056e-05, "loss": 0.8329, "step": 31940 }, { "epoch": 2.94, "learning_rate": 3.531304587662039e-05, "loss": 0.8652, "step": 31950 }, { "epoch": 2.94, "learning_rate": 3.530844902086973e-05, "loss": 0.9273, "step": 31960 }, { "epoch": 2.94, "learning_rate": 3.530385216511906e-05, "loss": 0.8626, "step": 31970 }, { "epoch": 2.94, "learning_rate": 3.5299255309368396e-05, "loss": 0.773, "step": 31980 }, { "epoch": 2.94, "learning_rate": 3.529465845361773e-05, "loss": 0.9094, "step": 31990 }, { "epoch": 2.94, "learning_rate": 3.5290061597867055e-05, "loss": 0.8314, "step": 32000 }, { "epoch": 2.94, "learning_rate": 3.528546474211639e-05, "loss": 0.7143, "step": 32010 }, { "epoch": 2.94, "learning_rate": 3.528086788636573e-05, "loss": 0.858, "step": 32020 }, { "epoch": 2.94, "learning_rate": 3.527627103061506e-05, "loss": 0.8345, "step": 32030 }, { "epoch": 2.95, "learning_rate": 3.5271674174864394e-05, "loss": 0.8045, "step": 32040 }, { "epoch": 2.95, "learning_rate": 3.526707731911373e-05, "loss": 0.86, "step": 32050 }, { "epoch": 2.95, "learning_rate": 3.526248046336306e-05, "loss": 0.8732, "step": 32060 }, { "epoch": 2.95, "learning_rate": 3.52578836076124e-05, "loss": 0.941, "step": 32070 }, { "epoch": 2.95, "learning_rate": 3.525328675186173e-05, "loss": 0.8274, "step": 32080 }, { "epoch": 2.95, "learning_rate": 3.5248689896111056e-05, "loss": 0.9423, "step": 32090 }, { "epoch": 2.95, "learning_rate": 3.524409304036039e-05, "loss": 0.9156, "step": 32100 }, { "epoch": 2.95, "learning_rate": 3.523949618460973e-05, "loss": 0.9606, "step": 32110 }, { "epoch": 2.95, "learning_rate": 3.523489932885906e-05, "loss": 0.7461, "step": 32120 }, { "epoch": 2.95, "learning_rate": 3.5230302473108395e-05, "loss": 0.9809, "step": 32130 }, { "epoch": 2.95, "learning_rate": 3.522570561735773e-05, "loss": 0.9367, "step": 32140 }, { "epoch": 2.96, "learning_rate": 3.522110876160706e-05, "loss": 0.9909, "step": 32150 }, { "epoch": 2.96, "learning_rate": 3.52165119058564e-05, "loss": 0.8856, "step": 32160 }, { "epoch": 2.96, "learning_rate": 3.5211915050105734e-05, "loss": 0.8986, "step": 32170 }, { "epoch": 2.96, "learning_rate": 3.520731819435506e-05, "loss": 1.0041, "step": 32180 }, { "epoch": 2.96, "learning_rate": 3.5202721338604394e-05, "loss": 0.7575, "step": 32190 }, { "epoch": 2.96, "learning_rate": 3.519812448285373e-05, "loss": 0.8732, "step": 32200 }, { "epoch": 2.96, "learning_rate": 3.519352762710306e-05, "loss": 0.8166, "step": 32210 }, { "epoch": 2.96, "learning_rate": 3.5188930771352396e-05, "loss": 0.7986, "step": 32220 }, { "epoch": 2.96, "learning_rate": 3.518433391560173e-05, "loss": 1.0318, "step": 32230 }, { "epoch": 2.96, "learning_rate": 3.517973705985106e-05, "loss": 0.7335, "step": 32240 }, { "epoch": 2.96, "learning_rate": 3.51751402041004e-05, "loss": 0.856, "step": 32250 }, { "epoch": 2.97, "learning_rate": 3.5170543348349736e-05, "loss": 1.0326, "step": 32260 }, { "epoch": 2.97, "learning_rate": 3.516594649259906e-05, "loss": 1.0132, "step": 32270 }, { "epoch": 2.97, "learning_rate": 3.5161349636848395e-05, "loss": 0.7998, "step": 32280 }, { "epoch": 2.97, "learning_rate": 3.515675278109773e-05, "loss": 0.894, "step": 32290 }, { "epoch": 2.97, "learning_rate": 3.515215592534706e-05, "loss": 0.8154, "step": 32300 }, { "epoch": 2.97, "learning_rate": 3.51475590695964e-05, "loss": 1.1191, "step": 32310 }, { "epoch": 2.97, "learning_rate": 3.5142962213845734e-05, "loss": 0.8067, "step": 32320 }, { "epoch": 2.97, "learning_rate": 3.5138365358095064e-05, "loss": 0.7815, "step": 32330 }, { "epoch": 2.97, "learning_rate": 3.51337685023444e-05, "loss": 0.9119, "step": 32340 }, { "epoch": 2.97, "learning_rate": 3.512917164659374e-05, "loss": 0.8206, "step": 32350 }, { "epoch": 2.98, "learning_rate": 3.5124574790843066e-05, "loss": 0.9211, "step": 32360 }, { "epoch": 2.98, "learning_rate": 3.5119977935092396e-05, "loss": 0.8894, "step": 32370 }, { "epoch": 2.98, "learning_rate": 3.511538107934173e-05, "loss": 0.8662, "step": 32380 }, { "epoch": 2.98, "learning_rate": 3.511078422359106e-05, "loss": 0.9926, "step": 32390 }, { "epoch": 2.98, "learning_rate": 3.51061873678404e-05, "loss": 0.8154, "step": 32400 }, { "epoch": 2.98, "learning_rate": 3.5101590512089735e-05, "loss": 0.8794, "step": 32410 }, { "epoch": 2.98, "learning_rate": 3.5096993656339065e-05, "loss": 0.9837, "step": 32420 }, { "epoch": 2.98, "learning_rate": 3.50923968005884e-05, "loss": 0.9557, "step": 32430 }, { "epoch": 2.98, "learning_rate": 3.508779994483774e-05, "loss": 0.967, "step": 32440 }, { "epoch": 2.98, "learning_rate": 3.508320308908707e-05, "loss": 0.8409, "step": 32450 }, { "epoch": 2.98, "learning_rate": 3.50786062333364e-05, "loss": 0.8271, "step": 32460 }, { "epoch": 2.99, "learning_rate": 3.5074009377585734e-05, "loss": 0.8614, "step": 32470 }, { "epoch": 2.99, "learning_rate": 3.5069412521835064e-05, "loss": 0.8372, "step": 32480 }, { "epoch": 2.99, "learning_rate": 3.50648156660844e-05, "loss": 0.8546, "step": 32490 }, { "epoch": 2.99, "learning_rate": 3.5060218810333737e-05, "loss": 0.9386, "step": 32500 }, { "epoch": 2.99, "learning_rate": 3.5055621954583066e-05, "loss": 0.8898, "step": 32510 }, { "epoch": 2.99, "learning_rate": 3.50510250988324e-05, "loss": 0.7773, "step": 32520 }, { "epoch": 2.99, "learning_rate": 3.504642824308174e-05, "loss": 0.844, "step": 32530 }, { "epoch": 2.99, "learning_rate": 3.504183138733107e-05, "loss": 0.958, "step": 32540 }, { "epoch": 2.99, "learning_rate": 3.50372345315804e-05, "loss": 0.9427, "step": 32550 }, { "epoch": 2.99, "learning_rate": 3.5032637675829735e-05, "loss": 0.9484, "step": 32560 }, { "epoch": 2.99, "learning_rate": 3.5028040820079065e-05, "loss": 0.9066, "step": 32570 }, { "epoch": 3.0, "learning_rate": 3.50234439643284e-05, "loss": 0.806, "step": 32580 }, { "epoch": 3.0, "learning_rate": 3.501884710857774e-05, "loss": 0.9167, "step": 32590 }, { "epoch": 3.0, "learning_rate": 3.501425025282707e-05, "loss": 0.9701, "step": 32600 }, { "epoch": 3.0, "learning_rate": 3.5009653397076404e-05, "loss": 0.8778, "step": 32610 }, { "epoch": 3.0, "learning_rate": 3.500505654132574e-05, "loss": 0.917, "step": 32620 }, { "epoch": 3.0, "learning_rate": 3.500045968557507e-05, "loss": 0.8526, "step": 32630 }, { "epoch": 3.0, "learning_rate": 3.49958628298244e-05, "loss": 0.811, "step": 32640 }, { "epoch": 3.0, "learning_rate": 3.4991265974073736e-05, "loss": 0.8757, "step": 32650 }, { "epoch": 3.0, "learning_rate": 3.4986669118323066e-05, "loss": 0.9319, "step": 32660 }, { "epoch": 3.0, "learning_rate": 3.49820722625724e-05, "loss": 0.8513, "step": 32670 }, { "epoch": 3.0, "learning_rate": 3.497747540682174e-05, "loss": 0.9146, "step": 32680 }, { "epoch": 3.01, "learning_rate": 3.497287855107107e-05, "loss": 0.8735, "step": 32690 }, { "epoch": 3.01, "learning_rate": 3.4968281695320405e-05, "loss": 0.8673, "step": 32700 }, { "epoch": 3.01, "learning_rate": 3.4963684839569735e-05, "loss": 0.8298, "step": 32710 }, { "epoch": 3.01, "learning_rate": 3.495908798381907e-05, "loss": 0.9395, "step": 32720 }, { "epoch": 3.01, "learning_rate": 3.49544911280684e-05, "loss": 0.9158, "step": 32730 }, { "epoch": 3.01, "learning_rate": 3.494989427231774e-05, "loss": 0.9218, "step": 32740 }, { "epoch": 3.01, "learning_rate": 3.494529741656707e-05, "loss": 0.8711, "step": 32750 }, { "epoch": 3.01, "learning_rate": 3.4940700560816404e-05, "loss": 0.9014, "step": 32760 }, { "epoch": 3.01, "learning_rate": 3.493610370506574e-05, "loss": 0.8895, "step": 32770 }, { "epoch": 3.01, "learning_rate": 3.493150684931507e-05, "loss": 0.8424, "step": 32780 }, { "epoch": 3.01, "learning_rate": 3.4926909993564406e-05, "loss": 0.8303, "step": 32790 }, { "epoch": 3.02, "learning_rate": 3.4922313137813736e-05, "loss": 0.8968, "step": 32800 }, { "epoch": 3.02, "learning_rate": 3.491771628206307e-05, "loss": 0.8644, "step": 32810 }, { "epoch": 3.02, "learning_rate": 3.49131194263124e-05, "loss": 0.8902, "step": 32820 }, { "epoch": 3.02, "learning_rate": 3.490852257056174e-05, "loss": 0.9506, "step": 32830 }, { "epoch": 3.02, "learning_rate": 3.490392571481107e-05, "loss": 0.869, "step": 32840 }, { "epoch": 3.02, "learning_rate": 3.4899328859060405e-05, "loss": 0.8403, "step": 32850 }, { "epoch": 3.02, "learning_rate": 3.489473200330974e-05, "loss": 0.7674, "step": 32860 }, { "epoch": 3.02, "learning_rate": 3.489013514755907e-05, "loss": 0.7903, "step": 32870 }, { "epoch": 3.02, "learning_rate": 3.488553829180841e-05, "loss": 0.9089, "step": 32880 }, { "epoch": 3.02, "learning_rate": 3.488094143605774e-05, "loss": 0.9097, "step": 32890 }, { "epoch": 3.02, "learning_rate": 3.4876344580307074e-05, "loss": 0.8355, "step": 32900 }, { "epoch": 3.03, "learning_rate": 3.48717477245564e-05, "loss": 0.7751, "step": 32910 }, { "epoch": 3.03, "learning_rate": 3.486715086880574e-05, "loss": 0.7893, "step": 32920 }, { "epoch": 3.03, "learning_rate": 3.486255401305507e-05, "loss": 0.7556, "step": 32930 }, { "epoch": 3.03, "learning_rate": 3.4857957157304406e-05, "loss": 0.9097, "step": 32940 }, { "epoch": 3.03, "learning_rate": 3.485336030155374e-05, "loss": 0.9149, "step": 32950 }, { "epoch": 3.03, "learning_rate": 3.484876344580307e-05, "loss": 0.8415, "step": 32960 }, { "epoch": 3.03, "learning_rate": 3.484416659005241e-05, "loss": 0.9314, "step": 32970 }, { "epoch": 3.03, "learning_rate": 3.483956973430174e-05, "loss": 0.8608, "step": 32980 }, { "epoch": 3.03, "learning_rate": 3.4834972878551075e-05, "loss": 0.9932, "step": 32990 }, { "epoch": 3.03, "learning_rate": 3.4830376022800405e-05, "loss": 0.9321, "step": 33000 }, { "epoch": 3.03, "learning_rate": 3.482577916704974e-05, "loss": 0.8707, "step": 33010 }, { "epoch": 3.04, "learning_rate": 3.482118231129907e-05, "loss": 0.8776, "step": 33020 }, { "epoch": 3.04, "learning_rate": 3.481658545554841e-05, "loss": 1.0177, "step": 33030 }, { "epoch": 3.04, "learning_rate": 3.481198859979774e-05, "loss": 0.8404, "step": 33040 }, { "epoch": 3.04, "learning_rate": 3.480739174404707e-05, "loss": 0.858, "step": 33050 }, { "epoch": 3.04, "learning_rate": 3.480279488829641e-05, "loss": 1.0405, "step": 33060 }, { "epoch": 3.04, "learning_rate": 3.479819803254574e-05, "loss": 0.8556, "step": 33070 }, { "epoch": 3.04, "learning_rate": 3.4793601176795076e-05, "loss": 0.8453, "step": 33080 }, { "epoch": 3.04, "learning_rate": 3.4789004321044406e-05, "loss": 1.0249, "step": 33090 }, { "epoch": 3.04, "learning_rate": 3.478440746529374e-05, "loss": 0.8704, "step": 33100 }, { "epoch": 3.04, "learning_rate": 3.477981060954307e-05, "loss": 0.8805, "step": 33110 }, { "epoch": 3.04, "learning_rate": 3.477521375379241e-05, "loss": 0.9669, "step": 33120 }, { "epoch": 3.05, "learning_rate": 3.477061689804174e-05, "loss": 0.8225, "step": 33130 }, { "epoch": 3.05, "learning_rate": 3.4766020042291075e-05, "loss": 0.8664, "step": 33140 }, { "epoch": 3.05, "learning_rate": 3.476142318654041e-05, "loss": 0.827, "step": 33150 }, { "epoch": 3.05, "learning_rate": 3.475682633078974e-05, "loss": 0.7586, "step": 33160 }, { "epoch": 3.05, "learning_rate": 3.475222947503908e-05, "loss": 0.9104, "step": 33170 }, { "epoch": 3.05, "learning_rate": 3.474763261928841e-05, "loss": 0.9251, "step": 33180 }, { "epoch": 3.05, "learning_rate": 3.4743035763537743e-05, "loss": 0.9114, "step": 33190 }, { "epoch": 3.05, "learning_rate": 3.473843890778707e-05, "loss": 0.8971, "step": 33200 }, { "epoch": 3.05, "learning_rate": 3.473384205203641e-05, "loss": 0.8651, "step": 33210 }, { "epoch": 3.05, "learning_rate": 3.472924519628574e-05, "loss": 0.903, "step": 33220 }, { "epoch": 3.06, "learning_rate": 3.4724648340535076e-05, "loss": 0.8125, "step": 33230 }, { "epoch": 3.06, "learning_rate": 3.472005148478441e-05, "loss": 0.8595, "step": 33240 }, { "epoch": 3.06, "learning_rate": 3.471545462903374e-05, "loss": 0.8387, "step": 33250 }, { "epoch": 3.06, "learning_rate": 3.471085777328308e-05, "loss": 0.9094, "step": 33260 }, { "epoch": 3.06, "learning_rate": 3.470626091753241e-05, "loss": 0.9021, "step": 33270 }, { "epoch": 3.06, "learning_rate": 3.4701664061781745e-05, "loss": 0.8743, "step": 33280 }, { "epoch": 3.06, "learning_rate": 3.4697067206031074e-05, "loss": 0.828, "step": 33290 }, { "epoch": 3.06, "learning_rate": 3.469247035028041e-05, "loss": 0.9327, "step": 33300 }, { "epoch": 3.06, "learning_rate": 3.468787349452974e-05, "loss": 0.8925, "step": 33310 }, { "epoch": 3.06, "learning_rate": 3.468327663877908e-05, "loss": 0.9683, "step": 33320 }, { "epoch": 3.06, "learning_rate": 3.4678679783028413e-05, "loss": 1.0238, "step": 33330 }, { "epoch": 3.07, "learning_rate": 3.467408292727774e-05, "loss": 0.8342, "step": 33340 }, { "epoch": 3.07, "learning_rate": 3.466948607152708e-05, "loss": 0.845, "step": 33350 }, { "epoch": 3.07, "learning_rate": 3.466488921577641e-05, "loss": 1.0131, "step": 33360 }, { "epoch": 3.07, "learning_rate": 3.466029236002574e-05, "loss": 0.8594, "step": 33370 }, { "epoch": 3.07, "learning_rate": 3.4655695504275076e-05, "loss": 0.9532, "step": 33380 }, { "epoch": 3.07, "learning_rate": 3.465109864852441e-05, "loss": 1.0231, "step": 33390 }, { "epoch": 3.07, "learning_rate": 3.464650179277374e-05, "loss": 0.8886, "step": 33400 }, { "epoch": 3.07, "learning_rate": 3.464190493702308e-05, "loss": 0.8014, "step": 33410 }, { "epoch": 3.07, "learning_rate": 3.4637308081272415e-05, "loss": 0.7966, "step": 33420 }, { "epoch": 3.07, "learning_rate": 3.4632711225521744e-05, "loss": 0.9497, "step": 33430 }, { "epoch": 3.07, "learning_rate": 3.462811436977108e-05, "loss": 0.8833, "step": 33440 }, { "epoch": 3.08, "learning_rate": 3.462351751402041e-05, "loss": 0.8407, "step": 33450 }, { "epoch": 3.08, "learning_rate": 3.461892065826974e-05, "loss": 0.8643, "step": 33460 }, { "epoch": 3.08, "learning_rate": 3.461432380251908e-05, "loss": 0.8864, "step": 33470 }, { "epoch": 3.08, "learning_rate": 3.460972694676841e-05, "loss": 0.9059, "step": 33480 }, { "epoch": 3.08, "learning_rate": 3.460513009101774e-05, "loss": 0.7874, "step": 33490 }, { "epoch": 3.08, "learning_rate": 3.460053323526708e-05, "loss": 0.9192, "step": 33500 }, { "epoch": 3.08, "learning_rate": 3.4595936379516416e-05, "loss": 0.7992, "step": 33510 }, { "epoch": 3.08, "learning_rate": 3.4591339523765746e-05, "loss": 0.8628, "step": 33520 }, { "epoch": 3.08, "learning_rate": 3.458674266801508e-05, "loss": 0.9868, "step": 33530 }, { "epoch": 3.08, "learning_rate": 3.458214581226441e-05, "loss": 0.8393, "step": 33540 }, { "epoch": 3.08, "learning_rate": 3.457754895651374e-05, "loss": 0.7542, "step": 33550 }, { "epoch": 3.09, "learning_rate": 3.457295210076308e-05, "loss": 0.8694, "step": 33560 }, { "epoch": 3.09, "learning_rate": 3.4568355245012414e-05, "loss": 0.9286, "step": 33570 }, { "epoch": 3.09, "learning_rate": 3.4563758389261744e-05, "loss": 0.9105, "step": 33580 }, { "epoch": 3.09, "learning_rate": 3.455916153351108e-05, "loss": 0.9148, "step": 33590 }, { "epoch": 3.09, "learning_rate": 3.455456467776042e-05, "loss": 0.8502, "step": 33600 }, { "epoch": 3.09, "learning_rate": 3.454996782200975e-05, "loss": 0.9311, "step": 33610 }, { "epoch": 3.09, "learning_rate": 3.454537096625908e-05, "loss": 0.8994, "step": 33620 }, { "epoch": 3.09, "learning_rate": 3.454077411050841e-05, "loss": 0.8622, "step": 33630 }, { "epoch": 3.09, "learning_rate": 3.453617725475774e-05, "loss": 1.0268, "step": 33640 }, { "epoch": 3.09, "learning_rate": 3.453158039900708e-05, "loss": 0.8633, "step": 33650 }, { "epoch": 3.09, "learning_rate": 3.4526983543256416e-05, "loss": 0.912, "step": 33660 }, { "epoch": 3.1, "learning_rate": 3.4522386687505745e-05, "loss": 0.8812, "step": 33670 }, { "epoch": 3.1, "learning_rate": 3.451778983175508e-05, "loss": 0.8773, "step": 33680 }, { "epoch": 3.1, "learning_rate": 3.451319297600442e-05, "loss": 0.9104, "step": 33690 }, { "epoch": 3.1, "learning_rate": 3.450859612025375e-05, "loss": 0.8925, "step": 33700 }, { "epoch": 3.1, "learning_rate": 3.4503999264503084e-05, "loss": 0.83, "step": 33710 }, { "epoch": 3.1, "learning_rate": 3.4499402408752414e-05, "loss": 0.8465, "step": 33720 }, { "epoch": 3.1, "learning_rate": 3.4494805553001744e-05, "loss": 0.9713, "step": 33730 }, { "epoch": 3.1, "learning_rate": 3.449020869725108e-05, "loss": 0.7953, "step": 33740 }, { "epoch": 3.1, "learning_rate": 3.448561184150042e-05, "loss": 0.8282, "step": 33750 }, { "epoch": 3.1, "learning_rate": 3.4481014985749746e-05, "loss": 0.8653, "step": 33760 }, { "epoch": 3.1, "learning_rate": 3.447641812999908e-05, "loss": 0.8329, "step": 33770 }, { "epoch": 3.11, "learning_rate": 3.447182127424842e-05, "loss": 0.9064, "step": 33780 }, { "epoch": 3.11, "learning_rate": 3.446722441849775e-05, "loss": 0.8352, "step": 33790 }, { "epoch": 3.11, "learning_rate": 3.4462627562747086e-05, "loss": 0.8989, "step": 33800 }, { "epoch": 3.11, "learning_rate": 3.4458030706996415e-05, "loss": 1.0296, "step": 33810 }, { "epoch": 3.11, "learning_rate": 3.4453433851245745e-05, "loss": 0.8311, "step": 33820 }, { "epoch": 3.11, "learning_rate": 3.444883699549508e-05, "loss": 0.8216, "step": 33830 }, { "epoch": 3.11, "learning_rate": 3.444424013974442e-05, "loss": 0.8395, "step": 33840 }, { "epoch": 3.11, "learning_rate": 3.443964328399375e-05, "loss": 0.8814, "step": 33850 }, { "epoch": 3.11, "learning_rate": 3.4435046428243084e-05, "loss": 0.9564, "step": 33860 }, { "epoch": 3.11, "learning_rate": 3.443044957249242e-05, "loss": 0.8521, "step": 33870 }, { "epoch": 3.11, "learning_rate": 3.442585271674175e-05, "loss": 0.8549, "step": 33880 }, { "epoch": 3.12, "learning_rate": 3.442125586099109e-05, "loss": 0.8562, "step": 33890 }, { "epoch": 3.12, "learning_rate": 3.4416659005240416e-05, "loss": 0.9864, "step": 33900 }, { "epoch": 3.12, "learning_rate": 3.4412062149489746e-05, "loss": 0.8203, "step": 33910 }, { "epoch": 3.12, "learning_rate": 3.440746529373908e-05, "loss": 0.8682, "step": 33920 }, { "epoch": 3.12, "learning_rate": 3.440286843798842e-05, "loss": 0.9021, "step": 33930 }, { "epoch": 3.12, "learning_rate": 3.439827158223775e-05, "loss": 0.9314, "step": 33940 }, { "epoch": 3.12, "learning_rate": 3.4393674726487085e-05, "loss": 1.0293, "step": 33950 }, { "epoch": 3.12, "learning_rate": 3.438907787073642e-05, "loss": 0.8919, "step": 33960 }, { "epoch": 3.12, "learning_rate": 3.438448101498575e-05, "loss": 0.9307, "step": 33970 }, { "epoch": 3.12, "learning_rate": 3.437988415923509e-05, "loss": 0.9444, "step": 33980 }, { "epoch": 3.12, "learning_rate": 3.437528730348442e-05, "loss": 1.0228, "step": 33990 }, { "epoch": 3.13, "learning_rate": 3.437069044773375e-05, "loss": 0.893, "step": 34000 }, { "epoch": 3.13, "learning_rate": 3.4366093591983084e-05, "loss": 0.9029, "step": 34010 }, { "epoch": 3.13, "learning_rate": 3.436149673623242e-05, "loss": 0.8588, "step": 34020 }, { "epoch": 3.13, "learning_rate": 3.435689988048175e-05, "loss": 0.9057, "step": 34030 }, { "epoch": 3.13, "learning_rate": 3.4352303024731087e-05, "loss": 0.869, "step": 34040 }, { "epoch": 3.13, "learning_rate": 3.434770616898042e-05, "loss": 0.9487, "step": 34050 }, { "epoch": 3.13, "learning_rate": 3.434310931322975e-05, "loss": 0.8867, "step": 34060 }, { "epoch": 3.13, "learning_rate": 3.433851245747909e-05, "loss": 0.818, "step": 34070 }, { "epoch": 3.13, "learning_rate": 3.433391560172842e-05, "loss": 0.8417, "step": 34080 }, { "epoch": 3.13, "learning_rate": 3.432931874597775e-05, "loss": 0.9572, "step": 34090 }, { "epoch": 3.14, "learning_rate": 3.4324721890227085e-05, "loss": 0.9749, "step": 34100 }, { "epoch": 3.14, "learning_rate": 3.432012503447642e-05, "loss": 0.8815, "step": 34110 }, { "epoch": 3.14, "learning_rate": 3.431552817872575e-05, "loss": 0.9157, "step": 34120 }, { "epoch": 3.14, "learning_rate": 3.431093132297509e-05, "loss": 0.9155, "step": 34130 }, { "epoch": 3.14, "learning_rate": 3.4306334467224424e-05, "loss": 0.9308, "step": 34140 }, { "epoch": 3.14, "learning_rate": 3.4301737611473754e-05, "loss": 0.9008, "step": 34150 }, { "epoch": 3.14, "learning_rate": 3.429714075572309e-05, "loss": 0.8021, "step": 34160 }, { "epoch": 3.14, "learning_rate": 3.429254389997242e-05, "loss": 0.8611, "step": 34170 }, { "epoch": 3.14, "learning_rate": 3.428794704422175e-05, "loss": 0.9336, "step": 34180 }, { "epoch": 3.14, "learning_rate": 3.4283350188471086e-05, "loss": 0.7743, "step": 34190 }, { "epoch": 3.14, "learning_rate": 3.427875333272042e-05, "loss": 0.9723, "step": 34200 }, { "epoch": 3.15, "learning_rate": 3.427415647696975e-05, "loss": 1.0241, "step": 34210 }, { "epoch": 3.15, "learning_rate": 3.426955962121909e-05, "loss": 0.8538, "step": 34220 }, { "epoch": 3.15, "learning_rate": 3.4264962765468425e-05, "loss": 0.9589, "step": 34230 }, { "epoch": 3.15, "learning_rate": 3.4260365909717755e-05, "loss": 0.9358, "step": 34240 }, { "epoch": 3.15, "learning_rate": 3.425576905396709e-05, "loss": 0.7696, "step": 34250 }, { "epoch": 3.15, "learning_rate": 3.425117219821642e-05, "loss": 0.9368, "step": 34260 }, { "epoch": 3.15, "learning_rate": 3.424657534246575e-05, "loss": 0.9699, "step": 34270 }, { "epoch": 3.15, "learning_rate": 3.424197848671509e-05, "loss": 0.8378, "step": 34280 }, { "epoch": 3.15, "learning_rate": 3.4237381630964424e-05, "loss": 0.8822, "step": 34290 }, { "epoch": 3.15, "learning_rate": 3.4232784775213754e-05, "loss": 0.8159, "step": 34300 }, { "epoch": 3.15, "learning_rate": 3.422818791946309e-05, "loss": 0.9017, "step": 34310 }, { "epoch": 3.16, "learning_rate": 3.4223591063712427e-05, "loss": 0.8932, "step": 34320 }, { "epoch": 3.16, "learning_rate": 3.4218994207961756e-05, "loss": 0.8677, "step": 34330 }, { "epoch": 3.16, "learning_rate": 3.421439735221109e-05, "loss": 0.9006, "step": 34340 }, { "epoch": 3.16, "learning_rate": 3.420980049646042e-05, "loss": 0.8388, "step": 34350 }, { "epoch": 3.16, "learning_rate": 3.420520364070975e-05, "loss": 0.968, "step": 34360 }, { "epoch": 3.16, "learning_rate": 3.420060678495909e-05, "loss": 0.8436, "step": 34370 }, { "epoch": 3.16, "learning_rate": 3.4196009929208425e-05, "loss": 0.9324, "step": 34380 }, { "epoch": 3.16, "learning_rate": 3.4191413073457755e-05, "loss": 0.8686, "step": 34390 }, { "epoch": 3.16, "learning_rate": 3.418681621770709e-05, "loss": 0.8334, "step": 34400 }, { "epoch": 3.16, "learning_rate": 3.418221936195643e-05, "loss": 0.8932, "step": 34410 }, { "epoch": 3.16, "learning_rate": 3.417762250620576e-05, "loss": 0.8233, "step": 34420 }, { "epoch": 3.17, "learning_rate": 3.4173025650455094e-05, "loss": 0.9848, "step": 34430 }, { "epoch": 3.17, "learning_rate": 3.4168428794704424e-05, "loss": 0.9797, "step": 34440 }, { "epoch": 3.17, "learning_rate": 3.416383193895375e-05, "loss": 0.813, "step": 34450 }, { "epoch": 3.17, "learning_rate": 3.415923508320309e-05, "loss": 0.9829, "step": 34460 }, { "epoch": 3.17, "learning_rate": 3.4154638227452426e-05, "loss": 0.924, "step": 34470 }, { "epoch": 3.17, "learning_rate": 3.4150041371701756e-05, "loss": 0.887, "step": 34480 }, { "epoch": 3.17, "learning_rate": 3.414544451595109e-05, "loss": 0.8847, "step": 34490 }, { "epoch": 3.17, "learning_rate": 3.414084766020043e-05, "loss": 0.8208, "step": 34500 }, { "epoch": 3.17, "learning_rate": 3.413625080444976e-05, "loss": 0.9585, "step": 34510 }, { "epoch": 3.17, "learning_rate": 3.4131653948699095e-05, "loss": 0.849, "step": 34520 }, { "epoch": 3.17, "learning_rate": 3.4127057092948425e-05, "loss": 0.9063, "step": 34530 }, { "epoch": 3.18, "learning_rate": 3.4122460237197755e-05, "loss": 0.8693, "step": 34540 }, { "epoch": 3.18, "learning_rate": 3.411786338144709e-05, "loss": 0.8926, "step": 34550 }, { "epoch": 3.18, "learning_rate": 3.411326652569643e-05, "loss": 0.9074, "step": 34560 }, { "epoch": 3.18, "learning_rate": 3.410866966994576e-05, "loss": 0.9194, "step": 34570 }, { "epoch": 3.18, "learning_rate": 3.4104072814195094e-05, "loss": 0.8922, "step": 34580 }, { "epoch": 3.18, "learning_rate": 3.4099475958444423e-05, "loss": 0.9205, "step": 34590 }, { "epoch": 3.18, "learning_rate": 3.409487910269376e-05, "loss": 0.9657, "step": 34600 }, { "epoch": 3.18, "learning_rate": 3.4090282246943096e-05, "loss": 0.7817, "step": 34610 }, { "epoch": 3.18, "learning_rate": 3.4085685391192426e-05, "loss": 0.9077, "step": 34620 }, { "epoch": 3.18, "learning_rate": 3.4081088535441756e-05, "loss": 0.9608, "step": 34630 }, { "epoch": 3.18, "learning_rate": 3.407649167969109e-05, "loss": 0.8699, "step": 34640 }, { "epoch": 3.19, "learning_rate": 3.407189482394043e-05, "loss": 0.9433, "step": 34650 }, { "epoch": 3.19, "learning_rate": 3.406729796818976e-05, "loss": 1.0109, "step": 34660 }, { "epoch": 3.19, "learning_rate": 3.4062701112439095e-05, "loss": 0.902, "step": 34670 }, { "epoch": 3.19, "learning_rate": 3.4058104256688425e-05, "loss": 0.8089, "step": 34680 }, { "epoch": 3.19, "learning_rate": 3.405350740093776e-05, "loss": 0.9388, "step": 34690 }, { "epoch": 3.19, "learning_rate": 3.40489105451871e-05, "loss": 0.8453, "step": 34700 }, { "epoch": 3.19, "learning_rate": 3.404431368943643e-05, "loss": 0.908, "step": 34710 }, { "epoch": 3.19, "learning_rate": 3.403971683368576e-05, "loss": 0.9422, "step": 34720 }, { "epoch": 3.19, "learning_rate": 3.4035119977935093e-05, "loss": 0.9895, "step": 34730 }, { "epoch": 3.19, "learning_rate": 3.403052312218443e-05, "loss": 0.816, "step": 34740 }, { "epoch": 3.19, "learning_rate": 3.402592626643376e-05, "loss": 0.9757, "step": 34750 }, { "epoch": 3.2, "learning_rate": 3.4021329410683096e-05, "loss": 0.9101, "step": 34760 }, { "epoch": 3.2, "learning_rate": 3.4016732554932426e-05, "loss": 0.869, "step": 34770 }, { "epoch": 3.2, "learning_rate": 3.401213569918176e-05, "loss": 0.8528, "step": 34780 }, { "epoch": 3.2, "learning_rate": 3.40075388434311e-05, "loss": 0.9195, "step": 34790 }, { "epoch": 3.2, "learning_rate": 3.400294198768043e-05, "loss": 0.7873, "step": 34800 }, { "epoch": 3.2, "learning_rate": 3.399834513192976e-05, "loss": 0.9415, "step": 34810 }, { "epoch": 3.2, "learning_rate": 3.3993748276179095e-05, "loss": 0.944, "step": 34820 }, { "epoch": 3.2, "learning_rate": 3.398915142042843e-05, "loss": 0.9178, "step": 34830 }, { "epoch": 3.2, "learning_rate": 3.398455456467776e-05, "loss": 0.9838, "step": 34840 }, { "epoch": 3.2, "learning_rate": 3.39799577089271e-05, "loss": 0.844, "step": 34850 }, { "epoch": 3.2, "learning_rate": 3.397536085317643e-05, "loss": 0.8828, "step": 34860 }, { "epoch": 3.21, "learning_rate": 3.3970763997425763e-05, "loss": 0.8451, "step": 34870 }, { "epoch": 3.21, "learning_rate": 3.39661671416751e-05, "loss": 0.8773, "step": 34880 }, { "epoch": 3.21, "learning_rate": 3.396157028592443e-05, "loss": 0.8892, "step": 34890 }, { "epoch": 3.21, "learning_rate": 3.395697343017376e-05, "loss": 0.9517, "step": 34900 }, { "epoch": 3.21, "learning_rate": 3.3952376574423096e-05, "loss": 0.8766, "step": 34910 }, { "epoch": 3.21, "learning_rate": 3.3947779718672426e-05, "loss": 0.9569, "step": 34920 }, { "epoch": 3.21, "learning_rate": 3.394318286292176e-05, "loss": 0.9277, "step": 34930 }, { "epoch": 3.21, "learning_rate": 3.39385860071711e-05, "loss": 0.8348, "step": 34940 }, { "epoch": 3.21, "learning_rate": 3.393398915142043e-05, "loss": 0.8151, "step": 34950 }, { "epoch": 3.21, "learning_rate": 3.3929392295669765e-05, "loss": 0.8842, "step": 34960 }, { "epoch": 3.22, "learning_rate": 3.39247954399191e-05, "loss": 0.7975, "step": 34970 }, { "epoch": 3.22, "learning_rate": 3.392019858416843e-05, "loss": 0.9039, "step": 34980 }, { "epoch": 3.22, "learning_rate": 3.391560172841776e-05, "loss": 0.8564, "step": 34990 }, { "epoch": 3.22, "learning_rate": 3.39110048726671e-05, "loss": 0.8602, "step": 35000 }, { "epoch": 3.22, "eval_accuracy": 0.5812227074235807, "eval_loss": 0.8809640407562256, "eval_runtime": 159.4592, "eval_samples_per_second": 28.722, "eval_steps_per_second": 3.593, "step": 35000 }, { "epoch": 3.22, "learning_rate": 3.390640801691643e-05, "loss": 0.7987, "step": 35010 }, { "epoch": 3.22, "learning_rate": 3.390181116116576e-05, "loss": 0.768, "step": 35020 }, { "epoch": 3.22, "learning_rate": 3.38972143054151e-05, "loss": 0.9813, "step": 35030 }, { "epoch": 3.22, "learning_rate": 3.389261744966443e-05, "loss": 0.91, "step": 35040 }, { "epoch": 3.22, "learning_rate": 3.3888020593913766e-05, "loss": 1.0066, "step": 35050 }, { "epoch": 3.22, "learning_rate": 3.38834237381631e-05, "loss": 0.8476, "step": 35060 }, { "epoch": 3.22, "learning_rate": 3.387882688241243e-05, "loss": 0.8752, "step": 35070 }, { "epoch": 3.23, "learning_rate": 3.387423002666176e-05, "loss": 0.9049, "step": 35080 }, { "epoch": 3.23, "learning_rate": 3.38696331709111e-05, "loss": 0.9203, "step": 35090 }, { "epoch": 3.23, "learning_rate": 3.386503631516043e-05, "loss": 0.9157, "step": 35100 }, { "epoch": 3.23, "learning_rate": 3.3860439459409764e-05, "loss": 0.8949, "step": 35110 }, { "epoch": 3.23, "learning_rate": 3.38558426036591e-05, "loss": 0.8332, "step": 35120 }, { "epoch": 3.23, "learning_rate": 3.385124574790843e-05, "loss": 0.8988, "step": 35130 }, { "epoch": 3.23, "learning_rate": 3.384664889215777e-05, "loss": 0.8319, "step": 35140 }, { "epoch": 3.23, "learning_rate": 3.3842052036407104e-05, "loss": 0.8825, "step": 35150 }, { "epoch": 3.23, "learning_rate": 3.383745518065643e-05, "loss": 0.8962, "step": 35160 }, { "epoch": 3.23, "learning_rate": 3.383285832490576e-05, "loss": 0.8713, "step": 35170 }, { "epoch": 3.23, "learning_rate": 3.38282614691551e-05, "loss": 0.8424, "step": 35180 }, { "epoch": 3.24, "learning_rate": 3.382366461340443e-05, "loss": 0.9886, "step": 35190 }, { "epoch": 3.24, "learning_rate": 3.3819067757653766e-05, "loss": 0.8718, "step": 35200 }, { "epoch": 3.24, "learning_rate": 3.38144709019031e-05, "loss": 0.8977, "step": 35210 }, { "epoch": 3.24, "learning_rate": 3.380987404615243e-05, "loss": 0.8814, "step": 35220 }, { "epoch": 3.24, "learning_rate": 3.380527719040177e-05, "loss": 0.8163, "step": 35230 }, { "epoch": 3.24, "learning_rate": 3.3800680334651105e-05, "loss": 0.838, "step": 35240 }, { "epoch": 3.24, "learning_rate": 3.379608347890043e-05, "loss": 0.9469, "step": 35250 }, { "epoch": 3.24, "learning_rate": 3.3791486623149764e-05, "loss": 0.8695, "step": 35260 }, { "epoch": 3.24, "learning_rate": 3.37868897673991e-05, "loss": 0.944, "step": 35270 }, { "epoch": 3.24, "learning_rate": 3.378229291164843e-05, "loss": 0.8787, "step": 35280 }, { "epoch": 3.24, "learning_rate": 3.377769605589777e-05, "loss": 0.8329, "step": 35290 }, { "epoch": 3.25, "learning_rate": 3.37730992001471e-05, "loss": 0.9235, "step": 35300 }, { "epoch": 3.25, "learning_rate": 3.376850234439643e-05, "loss": 0.8671, "step": 35310 }, { "epoch": 3.25, "learning_rate": 3.376390548864577e-05, "loss": 0.9332, "step": 35320 }, { "epoch": 3.25, "learning_rate": 3.3759308632895106e-05, "loss": 0.9524, "step": 35330 }, { "epoch": 3.25, "learning_rate": 3.375471177714443e-05, "loss": 0.8416, "step": 35340 }, { "epoch": 3.25, "learning_rate": 3.3750114921393765e-05, "loss": 0.9621, "step": 35350 }, { "epoch": 3.25, "learning_rate": 3.37455180656431e-05, "loss": 0.8339, "step": 35360 }, { "epoch": 3.25, "learning_rate": 3.374092120989243e-05, "loss": 0.9628, "step": 35370 }, { "epoch": 3.25, "learning_rate": 3.373632435414177e-05, "loss": 0.877, "step": 35380 }, { "epoch": 3.25, "learning_rate": 3.3731727498391104e-05, "loss": 0.9715, "step": 35390 }, { "epoch": 3.25, "learning_rate": 3.3727130642640434e-05, "loss": 0.9656, "step": 35400 }, { "epoch": 3.26, "learning_rate": 3.372253378688977e-05, "loss": 0.881, "step": 35410 }, { "epoch": 3.26, "learning_rate": 3.371793693113911e-05, "loss": 0.7468, "step": 35420 }, { "epoch": 3.26, "learning_rate": 3.371334007538843e-05, "loss": 0.9173, "step": 35430 }, { "epoch": 3.26, "learning_rate": 3.3708743219637767e-05, "loss": 0.7676, "step": 35440 }, { "epoch": 3.26, "learning_rate": 3.37041463638871e-05, "loss": 0.9244, "step": 35450 }, { "epoch": 3.26, "learning_rate": 3.369954950813643e-05, "loss": 0.8826, "step": 35460 }, { "epoch": 3.26, "learning_rate": 3.369495265238577e-05, "loss": 0.9039, "step": 35470 }, { "epoch": 3.26, "learning_rate": 3.3690355796635106e-05, "loss": 0.827, "step": 35480 }, { "epoch": 3.26, "learning_rate": 3.3685758940884435e-05, "loss": 0.9102, "step": 35490 }, { "epoch": 3.26, "learning_rate": 3.368116208513377e-05, "loss": 0.9399, "step": 35500 }, { "epoch": 3.26, "learning_rate": 3.367656522938311e-05, "loss": 0.9373, "step": 35510 }, { "epoch": 3.27, "learning_rate": 3.367196837363243e-05, "loss": 0.8053, "step": 35520 }, { "epoch": 3.27, "learning_rate": 3.366737151788177e-05, "loss": 0.9292, "step": 35530 }, { "epoch": 3.27, "learning_rate": 3.3662774662131104e-05, "loss": 0.8045, "step": 35540 }, { "epoch": 3.27, "learning_rate": 3.3658177806380434e-05, "loss": 0.9666, "step": 35550 }, { "epoch": 3.27, "learning_rate": 3.365358095062977e-05, "loss": 0.7391, "step": 35560 }, { "epoch": 3.27, "learning_rate": 3.364898409487911e-05, "loss": 0.8733, "step": 35570 }, { "epoch": 3.27, "learning_rate": 3.3644387239128437e-05, "loss": 0.8671, "step": 35580 }, { "epoch": 3.27, "learning_rate": 3.363979038337777e-05, "loss": 0.7993, "step": 35590 }, { "epoch": 3.27, "learning_rate": 3.363519352762711e-05, "loss": 0.8466, "step": 35600 }, { "epoch": 3.27, "learning_rate": 3.363059667187643e-05, "loss": 0.913, "step": 35610 }, { "epoch": 3.27, "learning_rate": 3.362599981612577e-05, "loss": 0.993, "step": 35620 }, { "epoch": 3.28, "learning_rate": 3.3621402960375105e-05, "loss": 0.8669, "step": 35630 }, { "epoch": 3.28, "learning_rate": 3.3616806104624435e-05, "loss": 0.8559, "step": 35640 }, { "epoch": 3.28, "learning_rate": 3.361220924887377e-05, "loss": 0.8669, "step": 35650 }, { "epoch": 3.28, "learning_rate": 3.360761239312311e-05, "loss": 0.9047, "step": 35660 }, { "epoch": 3.28, "learning_rate": 3.360301553737244e-05, "loss": 0.7842, "step": 35670 }, { "epoch": 3.28, "learning_rate": 3.3598418681621774e-05, "loss": 1.0393, "step": 35680 }, { "epoch": 3.28, "learning_rate": 3.359382182587111e-05, "loss": 0.8198, "step": 35690 }, { "epoch": 3.28, "learning_rate": 3.358922497012044e-05, "loss": 0.9653, "step": 35700 }, { "epoch": 3.28, "learning_rate": 3.358462811436977e-05, "loss": 1.1026, "step": 35710 }, { "epoch": 3.28, "learning_rate": 3.3580031258619107e-05, "loss": 0.7926, "step": 35720 }, { "epoch": 3.28, "learning_rate": 3.3575434402868436e-05, "loss": 0.9226, "step": 35730 }, { "epoch": 3.29, "learning_rate": 3.357083754711777e-05, "loss": 0.8428, "step": 35740 }, { "epoch": 3.29, "learning_rate": 3.356624069136711e-05, "loss": 0.8515, "step": 35750 }, { "epoch": 3.29, "learning_rate": 3.356164383561644e-05, "loss": 0.9077, "step": 35760 }, { "epoch": 3.29, "learning_rate": 3.3557046979865775e-05, "loss": 0.7909, "step": 35770 }, { "epoch": 3.29, "learning_rate": 3.355245012411511e-05, "loss": 0.8438, "step": 35780 }, { "epoch": 3.29, "learning_rate": 3.354785326836444e-05, "loss": 0.8066, "step": 35790 }, { "epoch": 3.29, "learning_rate": 3.354325641261377e-05, "loss": 0.9167, "step": 35800 }, { "epoch": 3.29, "learning_rate": 3.353865955686311e-05, "loss": 1.0451, "step": 35810 }, { "epoch": 3.29, "learning_rate": 3.353406270111244e-05, "loss": 0.9119, "step": 35820 }, { "epoch": 3.29, "learning_rate": 3.3529465845361774e-05, "loss": 0.9137, "step": 35830 }, { "epoch": 3.3, "learning_rate": 3.352486898961111e-05, "loss": 0.9197, "step": 35840 }, { "epoch": 3.3, "learning_rate": 3.352027213386044e-05, "loss": 0.8213, "step": 35850 }, { "epoch": 3.3, "learning_rate": 3.351567527810978e-05, "loss": 0.7945, "step": 35860 }, { "epoch": 3.3, "learning_rate": 3.351107842235911e-05, "loss": 0.9421, "step": 35870 }, { "epoch": 3.3, "learning_rate": 3.350648156660844e-05, "loss": 0.8511, "step": 35880 }, { "epoch": 3.3, "learning_rate": 3.350188471085777e-05, "loss": 0.9058, "step": 35890 }, { "epoch": 3.3, "learning_rate": 3.349728785510711e-05, "loss": 0.9864, "step": 35900 }, { "epoch": 3.3, "learning_rate": 3.349269099935644e-05, "loss": 0.9045, "step": 35910 }, { "epoch": 3.3, "learning_rate": 3.3488094143605775e-05, "loss": 0.7825, "step": 35920 }, { "epoch": 3.3, "learning_rate": 3.348349728785511e-05, "loss": 1.0111, "step": 35930 }, { "epoch": 3.3, "learning_rate": 3.347890043210444e-05, "loss": 0.9093, "step": 35940 }, { "epoch": 3.31, "learning_rate": 3.347430357635378e-05, "loss": 0.8687, "step": 35950 }, { "epoch": 3.31, "learning_rate": 3.3469706720603114e-05, "loss": 0.8731, "step": 35960 }, { "epoch": 3.31, "learning_rate": 3.3465109864852444e-05, "loss": 0.8436, "step": 35970 }, { "epoch": 3.31, "learning_rate": 3.3460513009101774e-05, "loss": 0.9515, "step": 35980 }, { "epoch": 3.31, "learning_rate": 3.345591615335111e-05, "loss": 0.8847, "step": 35990 }, { "epoch": 3.31, "learning_rate": 3.345131929760044e-05, "loss": 0.9414, "step": 36000 }, { "epoch": 3.31, "learning_rate": 3.3446722441849776e-05, "loss": 0.7895, "step": 36010 }, { "epoch": 3.31, "learning_rate": 3.344212558609911e-05, "loss": 0.9047, "step": 36020 }, { "epoch": 3.31, "learning_rate": 3.343752873034844e-05, "loss": 0.8892, "step": 36030 }, { "epoch": 3.31, "learning_rate": 3.343293187459778e-05, "loss": 0.9091, "step": 36040 }, { "epoch": 3.31, "learning_rate": 3.3428335018847115e-05, "loss": 0.8406, "step": 36050 }, { "epoch": 3.32, "learning_rate": 3.3423738163096445e-05, "loss": 0.9807, "step": 36060 }, { "epoch": 3.32, "learning_rate": 3.3419141307345775e-05, "loss": 0.8558, "step": 36070 }, { "epoch": 3.32, "learning_rate": 3.341454445159511e-05, "loss": 0.8092, "step": 36080 }, { "epoch": 3.32, "learning_rate": 3.340994759584444e-05, "loss": 0.8839, "step": 36090 }, { "epoch": 3.32, "learning_rate": 3.340535074009378e-05, "loss": 1.0304, "step": 36100 }, { "epoch": 3.32, "learning_rate": 3.3400753884343114e-05, "loss": 0.9681, "step": 36110 }, { "epoch": 3.32, "learning_rate": 3.3396157028592444e-05, "loss": 0.8815, "step": 36120 }, { "epoch": 3.32, "learning_rate": 3.339156017284178e-05, "loss": 0.8635, "step": 36130 }, { "epoch": 3.32, "learning_rate": 3.338696331709112e-05, "loss": 1.0072, "step": 36140 }, { "epoch": 3.32, "learning_rate": 3.3382366461340446e-05, "loss": 0.925, "step": 36150 }, { "epoch": 3.32, "learning_rate": 3.3377769605589776e-05, "loss": 0.7863, "step": 36160 }, { "epoch": 3.33, "learning_rate": 3.337317274983911e-05, "loss": 0.8686, "step": 36170 }, { "epoch": 3.33, "learning_rate": 3.336857589408844e-05, "loss": 0.8808, "step": 36180 }, { "epoch": 3.33, "learning_rate": 3.336397903833778e-05, "loss": 0.8699, "step": 36190 }, { "epoch": 3.33, "learning_rate": 3.3359382182587115e-05, "loss": 0.8704, "step": 36200 }, { "epoch": 3.33, "learning_rate": 3.3354785326836445e-05, "loss": 0.7814, "step": 36210 }, { "epoch": 3.33, "learning_rate": 3.335018847108578e-05, "loss": 0.9641, "step": 36220 }, { "epoch": 3.33, "learning_rate": 3.334559161533511e-05, "loss": 0.9732, "step": 36230 }, { "epoch": 3.33, "learning_rate": 3.334099475958445e-05, "loss": 0.8606, "step": 36240 }, { "epoch": 3.33, "learning_rate": 3.333639790383378e-05, "loss": 0.8566, "step": 36250 }, { "epoch": 3.33, "learning_rate": 3.3331801048083114e-05, "loss": 0.7959, "step": 36260 }, { "epoch": 3.33, "learning_rate": 3.3327204192332443e-05, "loss": 0.8075, "step": 36270 }, { "epoch": 3.34, "learning_rate": 3.332260733658178e-05, "loss": 0.9688, "step": 36280 }, { "epoch": 3.34, "learning_rate": 3.3318010480831116e-05, "loss": 0.9285, "step": 36290 }, { "epoch": 3.34, "learning_rate": 3.3313413625080446e-05, "loss": 0.8178, "step": 36300 }, { "epoch": 3.34, "learning_rate": 3.330881676932978e-05, "loss": 0.9312, "step": 36310 }, { "epoch": 3.34, "learning_rate": 3.330421991357911e-05, "loss": 0.7951, "step": 36320 }, { "epoch": 3.34, "learning_rate": 3.329962305782845e-05, "loss": 0.7111, "step": 36330 }, { "epoch": 3.34, "learning_rate": 3.329502620207778e-05, "loss": 0.8959, "step": 36340 }, { "epoch": 3.34, "learning_rate": 3.3290429346327115e-05, "loss": 0.8613, "step": 36350 }, { "epoch": 3.34, "learning_rate": 3.3285832490576445e-05, "loss": 0.8583, "step": 36360 }, { "epoch": 3.34, "learning_rate": 3.328123563482578e-05, "loss": 0.8979, "step": 36370 }, { "epoch": 3.34, "learning_rate": 3.327663877907512e-05, "loss": 0.7978, "step": 36380 }, { "epoch": 3.35, "learning_rate": 3.327204192332445e-05, "loss": 0.802, "step": 36390 }, { "epoch": 3.35, "learning_rate": 3.3267445067573784e-05, "loss": 0.8505, "step": 36400 }, { "epoch": 3.35, "learning_rate": 3.3262848211823113e-05, "loss": 0.9354, "step": 36410 }, { "epoch": 3.35, "learning_rate": 3.325825135607245e-05, "loss": 0.8736, "step": 36420 }, { "epoch": 3.35, "learning_rate": 3.325365450032178e-05, "loss": 0.8526, "step": 36430 }, { "epoch": 3.35, "learning_rate": 3.3249057644571116e-05, "loss": 0.8188, "step": 36440 }, { "epoch": 3.35, "learning_rate": 3.3244460788820446e-05, "loss": 0.8639, "step": 36450 }, { "epoch": 3.35, "learning_rate": 3.323986393306978e-05, "loss": 0.9014, "step": 36460 }, { "epoch": 3.35, "learning_rate": 3.323526707731912e-05, "loss": 0.8125, "step": 36470 }, { "epoch": 3.35, "learning_rate": 3.323067022156845e-05, "loss": 0.8564, "step": 36480 }, { "epoch": 3.35, "learning_rate": 3.3226073365817785e-05, "loss": 0.9903, "step": 36490 }, { "epoch": 3.36, "learning_rate": 3.3221476510067115e-05, "loss": 0.8932, "step": 36500 }, { "epoch": 3.36, "learning_rate": 3.321687965431645e-05, "loss": 0.8649, "step": 36510 }, { "epoch": 3.36, "learning_rate": 3.321228279856578e-05, "loss": 0.9021, "step": 36520 }, { "epoch": 3.36, "learning_rate": 3.320768594281512e-05, "loss": 0.8525, "step": 36530 }, { "epoch": 3.36, "learning_rate": 3.320308908706445e-05, "loss": 0.9203, "step": 36540 }, { "epoch": 3.36, "learning_rate": 3.3198492231313784e-05, "loss": 0.8906, "step": 36550 }, { "epoch": 3.36, "learning_rate": 3.319389537556311e-05, "loss": 0.9099, "step": 36560 }, { "epoch": 3.36, "learning_rate": 3.318929851981245e-05, "loss": 0.8686, "step": 36570 }, { "epoch": 3.36, "learning_rate": 3.3184701664061786e-05, "loss": 1.0292, "step": 36580 }, { "epoch": 3.36, "learning_rate": 3.3180104808311116e-05, "loss": 0.8747, "step": 36590 }, { "epoch": 3.36, "learning_rate": 3.317550795256045e-05, "loss": 0.8584, "step": 36600 }, { "epoch": 3.37, "learning_rate": 3.317091109680978e-05, "loss": 0.8519, "step": 36610 }, { "epoch": 3.37, "learning_rate": 3.316631424105912e-05, "loss": 0.9149, "step": 36620 }, { "epoch": 3.37, "learning_rate": 3.316171738530845e-05, "loss": 0.8592, "step": 36630 }, { "epoch": 3.37, "learning_rate": 3.3157120529557785e-05, "loss": 0.8492, "step": 36640 }, { "epoch": 3.37, "learning_rate": 3.3152523673807114e-05, "loss": 0.8362, "step": 36650 }, { "epoch": 3.37, "learning_rate": 3.314792681805645e-05, "loss": 0.8104, "step": 36660 }, { "epoch": 3.37, "learning_rate": 3.314332996230579e-05, "loss": 0.8128, "step": 36670 }, { "epoch": 3.37, "learning_rate": 3.313873310655512e-05, "loss": 0.8348, "step": 36680 }, { "epoch": 3.37, "learning_rate": 3.3134136250804454e-05, "loss": 0.8807, "step": 36690 }, { "epoch": 3.37, "learning_rate": 3.312953939505378e-05, "loss": 0.8559, "step": 36700 }, { "epoch": 3.38, "learning_rate": 3.312494253930312e-05, "loss": 0.9648, "step": 36710 }, { "epoch": 3.38, "learning_rate": 3.312034568355245e-05, "loss": 0.8726, "step": 36720 }, { "epoch": 3.38, "learning_rate": 3.3115748827801786e-05, "loss": 0.8292, "step": 36730 }, { "epoch": 3.38, "learning_rate": 3.3111151972051116e-05, "loss": 1.0946, "step": 36740 }, { "epoch": 3.38, "learning_rate": 3.310655511630045e-05, "loss": 0.8669, "step": 36750 }, { "epoch": 3.38, "learning_rate": 3.310195826054979e-05, "loss": 0.9603, "step": 36760 }, { "epoch": 3.38, "learning_rate": 3.309736140479912e-05, "loss": 0.9598, "step": 36770 }, { "epoch": 3.38, "learning_rate": 3.3092764549048455e-05, "loss": 0.778, "step": 36780 }, { "epoch": 3.38, "learning_rate": 3.3088167693297784e-05, "loss": 0.9678, "step": 36790 }, { "epoch": 3.38, "learning_rate": 3.308357083754712e-05, "loss": 0.8948, "step": 36800 }, { "epoch": 3.38, "learning_rate": 3.307897398179645e-05, "loss": 0.8723, "step": 36810 }, { "epoch": 3.39, "learning_rate": 3.307437712604579e-05, "loss": 0.7964, "step": 36820 }, { "epoch": 3.39, "learning_rate": 3.306978027029512e-05, "loss": 0.8847, "step": 36830 }, { "epoch": 3.39, "learning_rate": 3.306518341454445e-05, "loss": 0.9146, "step": 36840 }, { "epoch": 3.39, "learning_rate": 3.306058655879379e-05, "loss": 0.9829, "step": 36850 }, { "epoch": 3.39, "learning_rate": 3.305598970304312e-05, "loss": 1.044, "step": 36860 }, { "epoch": 3.39, "learning_rate": 3.3051392847292456e-05, "loss": 0.9265, "step": 36870 }, { "epoch": 3.39, "learning_rate": 3.3046795991541786e-05, "loss": 0.8669, "step": 36880 }, { "epoch": 3.39, "learning_rate": 3.3042199135791115e-05, "loss": 0.8347, "step": 36890 }, { "epoch": 3.39, "learning_rate": 3.303760228004045e-05, "loss": 0.9204, "step": 36900 }, { "epoch": 3.39, "learning_rate": 3.303300542428979e-05, "loss": 1.0002, "step": 36910 }, { "epoch": 3.39, "learning_rate": 3.302840856853912e-05, "loss": 0.9019, "step": 36920 }, { "epoch": 3.4, "learning_rate": 3.3023811712788454e-05, "loss": 0.9164, "step": 36930 }, { "epoch": 3.4, "learning_rate": 3.301921485703779e-05, "loss": 0.9361, "step": 36940 }, { "epoch": 3.4, "learning_rate": 3.301461800128712e-05, "loss": 0.9598, "step": 36950 }, { "epoch": 3.4, "learning_rate": 3.301002114553646e-05, "loss": 0.9448, "step": 36960 }, { "epoch": 3.4, "learning_rate": 3.300542428978579e-05, "loss": 0.8678, "step": 36970 }, { "epoch": 3.4, "learning_rate": 3.3000827434035117e-05, "loss": 0.8453, "step": 36980 }, { "epoch": 3.4, "learning_rate": 3.299623057828445e-05, "loss": 0.7808, "step": 36990 }, { "epoch": 3.4, "learning_rate": 3.299163372253379e-05, "loss": 1.0593, "step": 37000 }, { "epoch": 3.4, "learning_rate": 3.298703686678312e-05, "loss": 0.8958, "step": 37010 }, { "epoch": 3.4, "learning_rate": 3.2982440011032456e-05, "loss": 0.896, "step": 37020 }, { "epoch": 3.4, "learning_rate": 3.297784315528179e-05, "loss": 0.7152, "step": 37030 }, { "epoch": 3.41, "learning_rate": 3.297324629953112e-05, "loss": 0.8523, "step": 37040 }, { "epoch": 3.41, "learning_rate": 3.296864944378046e-05, "loss": 0.7689, "step": 37050 }, { "epoch": 3.41, "learning_rate": 3.296405258802979e-05, "loss": 0.8885, "step": 37060 }, { "epoch": 3.41, "learning_rate": 3.295945573227912e-05, "loss": 0.8705, "step": 37070 }, { "epoch": 3.41, "learning_rate": 3.2954858876528454e-05, "loss": 1.0037, "step": 37080 }, { "epoch": 3.41, "learning_rate": 3.295026202077779e-05, "loss": 0.9165, "step": 37090 }, { "epoch": 3.41, "learning_rate": 3.294566516502712e-05, "loss": 0.8806, "step": 37100 }, { "epoch": 3.41, "learning_rate": 3.294106830927646e-05, "loss": 0.9214, "step": 37110 }, { "epoch": 3.41, "learning_rate": 3.293647145352579e-05, "loss": 0.7886, "step": 37120 }, { "epoch": 3.41, "learning_rate": 3.293187459777512e-05, "loss": 0.8393, "step": 37130 }, { "epoch": 3.41, "learning_rate": 3.292727774202446e-05, "loss": 0.9017, "step": 37140 }, { "epoch": 3.42, "learning_rate": 3.292268088627379e-05, "loss": 0.9121, "step": 37150 }, { "epoch": 3.42, "learning_rate": 3.291808403052312e-05, "loss": 1.052, "step": 37160 }, { "epoch": 3.42, "learning_rate": 3.2913487174772455e-05, "loss": 0.8703, "step": 37170 }, { "epoch": 3.42, "learning_rate": 3.290889031902179e-05, "loss": 0.8159, "step": 37180 }, { "epoch": 3.42, "learning_rate": 3.290429346327112e-05, "loss": 0.8253, "step": 37190 }, { "epoch": 3.42, "learning_rate": 3.289969660752046e-05, "loss": 0.9078, "step": 37200 }, { "epoch": 3.42, "learning_rate": 3.2895099751769795e-05, "loss": 0.9397, "step": 37210 }, { "epoch": 3.42, "learning_rate": 3.2890502896019124e-05, "loss": 0.9293, "step": 37220 }, { "epoch": 3.42, "learning_rate": 3.288590604026846e-05, "loss": 0.827, "step": 37230 }, { "epoch": 3.42, "learning_rate": 3.288130918451779e-05, "loss": 0.9028, "step": 37240 }, { "epoch": 3.42, "learning_rate": 3.287671232876712e-05, "loss": 1.0455, "step": 37250 }, { "epoch": 3.43, "learning_rate": 3.2872115473016457e-05, "loss": 1.0202, "step": 37260 }, { "epoch": 3.43, "learning_rate": 3.286751861726579e-05, "loss": 0.8092, "step": 37270 }, { "epoch": 3.43, "learning_rate": 3.286292176151512e-05, "loss": 0.9324, "step": 37280 }, { "epoch": 3.43, "learning_rate": 3.285832490576446e-05, "loss": 0.7294, "step": 37290 }, { "epoch": 3.43, "learning_rate": 3.2853728050013796e-05, "loss": 0.8586, "step": 37300 }, { "epoch": 3.43, "learning_rate": 3.2849131194263125e-05, "loss": 0.9207, "step": 37310 }, { "epoch": 3.43, "learning_rate": 3.284453433851246e-05, "loss": 0.844, "step": 37320 }, { "epoch": 3.43, "learning_rate": 3.283993748276179e-05, "loss": 0.9576, "step": 37330 }, { "epoch": 3.43, "learning_rate": 3.283534062701112e-05, "loss": 1.0501, "step": 37340 }, { "epoch": 3.43, "learning_rate": 3.283074377126046e-05, "loss": 0.8832, "step": 37350 }, { "epoch": 3.43, "learning_rate": 3.2826146915509794e-05, "loss": 0.9086, "step": 37360 }, { "epoch": 3.44, "learning_rate": 3.2821550059759124e-05, "loss": 0.8788, "step": 37370 }, { "epoch": 3.44, "learning_rate": 3.281695320400846e-05, "loss": 0.8528, "step": 37380 }, { "epoch": 3.44, "learning_rate": 3.28123563482578e-05, "loss": 0.8061, "step": 37390 }, { "epoch": 3.44, "learning_rate": 3.280775949250713e-05, "loss": 0.9123, "step": 37400 }, { "epoch": 3.44, "learning_rate": 3.280316263675646e-05, "loss": 0.9123, "step": 37410 }, { "epoch": 3.44, "learning_rate": 3.279856578100579e-05, "loss": 0.9227, "step": 37420 }, { "epoch": 3.44, "learning_rate": 3.279396892525512e-05, "loss": 0.9081, "step": 37430 }, { "epoch": 3.44, "learning_rate": 3.278937206950446e-05, "loss": 0.9125, "step": 37440 }, { "epoch": 3.44, "learning_rate": 3.2784775213753795e-05, "loss": 0.7992, "step": 37450 }, { "epoch": 3.44, "learning_rate": 3.2780178358003125e-05, "loss": 0.7881, "step": 37460 }, { "epoch": 3.44, "learning_rate": 3.277558150225246e-05, "loss": 0.8513, "step": 37470 }, { "epoch": 3.45, "learning_rate": 3.27709846465018e-05, "loss": 0.8091, "step": 37480 }, { "epoch": 3.45, "learning_rate": 3.276638779075113e-05, "loss": 0.9469, "step": 37490 }, { "epoch": 3.45, "learning_rate": 3.2761790935000464e-05, "loss": 0.8446, "step": 37500 }, { "epoch": 3.45, "learning_rate": 3.2757194079249794e-05, "loss": 0.8915, "step": 37510 }, { "epoch": 3.45, "learning_rate": 3.2752597223499124e-05, "loss": 0.9035, "step": 37520 }, { "epoch": 3.45, "learning_rate": 3.274800036774846e-05, "loss": 0.9351, "step": 37530 }, { "epoch": 3.45, "learning_rate": 3.27434035119978e-05, "loss": 0.914, "step": 37540 }, { "epoch": 3.45, "learning_rate": 3.2738806656247126e-05, "loss": 0.8497, "step": 37550 }, { "epoch": 3.45, "learning_rate": 3.273420980049646e-05, "loss": 0.7977, "step": 37560 }, { "epoch": 3.45, "learning_rate": 3.27296129447458e-05, "loss": 0.9454, "step": 37570 }, { "epoch": 3.45, "learning_rate": 3.272501608899513e-05, "loss": 0.8863, "step": 37580 }, { "epoch": 3.46, "learning_rate": 3.2720419233244466e-05, "loss": 0.8454, "step": 37590 }, { "epoch": 3.46, "learning_rate": 3.2715822377493795e-05, "loss": 0.9411, "step": 37600 }, { "epoch": 3.46, "learning_rate": 3.2711225521743125e-05, "loss": 0.8989, "step": 37610 }, { "epoch": 3.46, "learning_rate": 3.270662866599246e-05, "loss": 0.9459, "step": 37620 }, { "epoch": 3.46, "learning_rate": 3.27020318102418e-05, "loss": 0.8186, "step": 37630 }, { "epoch": 3.46, "learning_rate": 3.269743495449113e-05, "loss": 0.9418, "step": 37640 }, { "epoch": 3.46, "learning_rate": 3.2692838098740464e-05, "loss": 0.681, "step": 37650 }, { "epoch": 3.46, "learning_rate": 3.26882412429898e-05, "loss": 0.8827, "step": 37660 }, { "epoch": 3.46, "learning_rate": 3.268364438723913e-05, "loss": 0.9151, "step": 37670 }, { "epoch": 3.46, "learning_rate": 3.267904753148847e-05, "loss": 0.763, "step": 37680 }, { "epoch": 3.47, "learning_rate": 3.2674450675737796e-05, "loss": 0.8708, "step": 37690 }, { "epoch": 3.47, "learning_rate": 3.2669853819987126e-05, "loss": 0.8471, "step": 37700 }, { "epoch": 3.47, "learning_rate": 3.266525696423646e-05, "loss": 0.8583, "step": 37710 }, { "epoch": 3.47, "learning_rate": 3.26606601084858e-05, "loss": 0.8899, "step": 37720 }, { "epoch": 3.47, "learning_rate": 3.265606325273513e-05, "loss": 0.781, "step": 37730 }, { "epoch": 3.47, "learning_rate": 3.2651466396984465e-05, "loss": 0.9328, "step": 37740 }, { "epoch": 3.47, "learning_rate": 3.26468695412338e-05, "loss": 0.9911, "step": 37750 }, { "epoch": 3.47, "learning_rate": 3.264227268548313e-05, "loss": 0.9712, "step": 37760 }, { "epoch": 3.47, "learning_rate": 3.263767582973247e-05, "loss": 0.8592, "step": 37770 }, { "epoch": 3.47, "learning_rate": 3.26330789739818e-05, "loss": 0.8645, "step": 37780 }, { "epoch": 3.47, "learning_rate": 3.262848211823113e-05, "loss": 0.906, "step": 37790 }, { "epoch": 3.48, "learning_rate": 3.2623885262480464e-05, "loss": 0.7746, "step": 37800 }, { "epoch": 3.48, "learning_rate": 3.26192884067298e-05, "loss": 0.8373, "step": 37810 }, { "epoch": 3.48, "learning_rate": 3.261469155097913e-05, "loss": 0.9504, "step": 37820 }, { "epoch": 3.48, "learning_rate": 3.2610094695228466e-05, "loss": 0.8487, "step": 37830 }, { "epoch": 3.48, "learning_rate": 3.26054978394778e-05, "loss": 0.8725, "step": 37840 }, { "epoch": 3.48, "learning_rate": 3.260090098372713e-05, "loss": 0.8452, "step": 37850 }, { "epoch": 3.48, "learning_rate": 3.259630412797647e-05, "loss": 0.8772, "step": 37860 }, { "epoch": 3.48, "learning_rate": 3.25917072722258e-05, "loss": 0.9077, "step": 37870 }, { "epoch": 3.48, "learning_rate": 3.258711041647513e-05, "loss": 0.8437, "step": 37880 }, { "epoch": 3.48, "learning_rate": 3.2582513560724465e-05, "loss": 0.8211, "step": 37890 }, { "epoch": 3.48, "learning_rate": 3.25779167049738e-05, "loss": 0.9532, "step": 37900 }, { "epoch": 3.49, "learning_rate": 3.257331984922313e-05, "loss": 0.8704, "step": 37910 }, { "epoch": 3.49, "learning_rate": 3.256872299347247e-05, "loss": 0.8078, "step": 37920 }, { "epoch": 3.49, "learning_rate": 3.2564126137721804e-05, "loss": 0.8503, "step": 37930 }, { "epoch": 3.49, "learning_rate": 3.2559529281971134e-05, "loss": 0.8316, "step": 37940 }, { "epoch": 3.49, "learning_rate": 3.255493242622047e-05, "loss": 0.9086, "step": 37950 }, { "epoch": 3.49, "learning_rate": 3.25503355704698e-05, "loss": 0.8748, "step": 37960 }, { "epoch": 3.49, "learning_rate": 3.254573871471913e-05, "loss": 0.9292, "step": 37970 }, { "epoch": 3.49, "learning_rate": 3.2541141858968466e-05, "loss": 0.7879, "step": 37980 }, { "epoch": 3.49, "learning_rate": 3.25365450032178e-05, "loss": 0.8707, "step": 37990 }, { "epoch": 3.49, "learning_rate": 3.253194814746713e-05, "loss": 0.9451, "step": 38000 }, { "epoch": 3.49, "learning_rate": 3.252735129171647e-05, "loss": 0.8885, "step": 38010 }, { "epoch": 3.5, "learning_rate": 3.2522754435965805e-05, "loss": 1.0151, "step": 38020 }, { "epoch": 3.5, "learning_rate": 3.2518157580215135e-05, "loss": 0.8495, "step": 38030 }, { "epoch": 3.5, "learning_rate": 3.251356072446447e-05, "loss": 0.7566, "step": 38040 }, { "epoch": 3.5, "learning_rate": 3.25089638687138e-05, "loss": 0.7904, "step": 38050 }, { "epoch": 3.5, "learning_rate": 3.250436701296313e-05, "loss": 0.9226, "step": 38060 }, { "epoch": 3.5, "learning_rate": 3.249977015721247e-05, "loss": 0.7924, "step": 38070 }, { "epoch": 3.5, "learning_rate": 3.2495173301461804e-05, "loss": 0.9719, "step": 38080 }, { "epoch": 3.5, "learning_rate": 3.2490576445711134e-05, "loss": 0.8114, "step": 38090 }, { "epoch": 3.5, "learning_rate": 3.248597958996047e-05, "loss": 0.8696, "step": 38100 }, { "epoch": 3.5, "learning_rate": 3.2481382734209806e-05, "loss": 0.9212, "step": 38110 }, { "epoch": 3.5, "learning_rate": 3.2476785878459136e-05, "loss": 0.788, "step": 38120 }, { "epoch": 3.51, "learning_rate": 3.247218902270847e-05, "loss": 0.8213, "step": 38130 }, { "epoch": 3.51, "learning_rate": 3.24675921669578e-05, "loss": 0.7917, "step": 38140 }, { "epoch": 3.51, "learning_rate": 3.246299531120713e-05, "loss": 0.7881, "step": 38150 }, { "epoch": 3.51, "learning_rate": 3.245839845545647e-05, "loss": 0.8178, "step": 38160 }, { "epoch": 3.51, "learning_rate": 3.2453801599705805e-05, "loss": 0.8202, "step": 38170 }, { "epoch": 3.51, "learning_rate": 3.2449204743955135e-05, "loss": 0.8411, "step": 38180 }, { "epoch": 3.51, "learning_rate": 3.244460788820447e-05, "loss": 0.956, "step": 38190 }, { "epoch": 3.51, "learning_rate": 3.24400110324538e-05, "loss": 0.9721, "step": 38200 }, { "epoch": 3.51, "learning_rate": 3.243541417670314e-05, "loss": 0.9031, "step": 38210 }, { "epoch": 3.51, "learning_rate": 3.2430817320952474e-05, "loss": 0.8784, "step": 38220 }, { "epoch": 3.51, "learning_rate": 3.2426220465201804e-05, "loss": 0.9162, "step": 38230 }, { "epoch": 3.52, "learning_rate": 3.242162360945113e-05, "loss": 0.825, "step": 38240 }, { "epoch": 3.52, "learning_rate": 3.241702675370047e-05, "loss": 0.9369, "step": 38250 }, { "epoch": 3.52, "learning_rate": 3.2412429897949806e-05, "loss": 0.8403, "step": 38260 }, { "epoch": 3.52, "learning_rate": 3.2407833042199136e-05, "loss": 0.8308, "step": 38270 }, { "epoch": 3.52, "learning_rate": 3.240323618644847e-05, "loss": 0.9391, "step": 38280 }, { "epoch": 3.52, "learning_rate": 3.23986393306978e-05, "loss": 0.8427, "step": 38290 }, { "epoch": 3.52, "learning_rate": 3.239404247494714e-05, "loss": 0.9708, "step": 38300 }, { "epoch": 3.52, "learning_rate": 3.2389445619196475e-05, "loss": 0.9392, "step": 38310 }, { "epoch": 3.52, "learning_rate": 3.2384848763445805e-05, "loss": 0.988, "step": 38320 }, { "epoch": 3.52, "learning_rate": 3.2380251907695134e-05, "loss": 0.9211, "step": 38330 }, { "epoch": 3.52, "learning_rate": 3.237565505194447e-05, "loss": 1.0102, "step": 38340 }, { "epoch": 3.53, "learning_rate": 3.237105819619381e-05, "loss": 0.8922, "step": 38350 }, { "epoch": 3.53, "learning_rate": 3.236646134044314e-05, "loss": 0.7884, "step": 38360 }, { "epoch": 3.53, "learning_rate": 3.2361864484692474e-05, "loss": 0.937, "step": 38370 }, { "epoch": 3.53, "learning_rate": 3.23572676289418e-05, "loss": 0.8829, "step": 38380 }, { "epoch": 3.53, "learning_rate": 3.235267077319114e-05, "loss": 0.9776, "step": 38390 }, { "epoch": 3.53, "learning_rate": 3.2348073917440476e-05, "loss": 0.8541, "step": 38400 }, { "epoch": 3.53, "learning_rate": 3.2343477061689806e-05, "loss": 0.9413, "step": 38410 }, { "epoch": 3.53, "learning_rate": 3.2338880205939136e-05, "loss": 0.8704, "step": 38420 }, { "epoch": 3.53, "learning_rate": 3.233428335018847e-05, "loss": 0.9513, "step": 38430 }, { "epoch": 3.53, "learning_rate": 3.232968649443781e-05, "loss": 0.8166, "step": 38440 }, { "epoch": 3.53, "learning_rate": 3.232508963868714e-05, "loss": 0.8516, "step": 38450 }, { "epoch": 3.54, "learning_rate": 3.2320492782936475e-05, "loss": 0.9293, "step": 38460 }, { "epoch": 3.54, "learning_rate": 3.2315895927185805e-05, "loss": 0.8951, "step": 38470 }, { "epoch": 3.54, "learning_rate": 3.231129907143514e-05, "loss": 0.9, "step": 38480 }, { "epoch": 3.54, "learning_rate": 3.230670221568448e-05, "loss": 0.9131, "step": 38490 }, { "epoch": 3.54, "learning_rate": 3.230210535993381e-05, "loss": 0.744, "step": 38500 }, { "epoch": 3.54, "learning_rate": 3.229750850418314e-05, "loss": 0.8385, "step": 38510 }, { "epoch": 3.54, "learning_rate": 3.229291164843247e-05, "loss": 0.8589, "step": 38520 }, { "epoch": 3.54, "learning_rate": 3.22883147926818e-05, "loss": 0.9197, "step": 38530 }, { "epoch": 3.54, "learning_rate": 3.228371793693114e-05, "loss": 0.8356, "step": 38540 }, { "epoch": 3.54, "learning_rate": 3.2279121081180476e-05, "loss": 0.9629, "step": 38550 }, { "epoch": 3.55, "learning_rate": 3.2274524225429806e-05, "loss": 1.0082, "step": 38560 }, { "epoch": 3.55, "learning_rate": 3.226992736967914e-05, "loss": 0.9093, "step": 38570 }, { "epoch": 3.55, "learning_rate": 3.226533051392848e-05, "loss": 0.9927, "step": 38580 }, { "epoch": 3.55, "learning_rate": 3.226073365817781e-05, "loss": 0.904, "step": 38590 }, { "epoch": 3.55, "learning_rate": 3.225613680242714e-05, "loss": 0.8669, "step": 38600 }, { "epoch": 3.55, "learning_rate": 3.2251539946676475e-05, "loss": 0.9235, "step": 38610 }, { "epoch": 3.55, "learning_rate": 3.2246943090925804e-05, "loss": 0.9136, "step": 38620 }, { "epoch": 3.55, "learning_rate": 3.224234623517514e-05, "loss": 0.8728, "step": 38630 }, { "epoch": 3.55, "learning_rate": 3.223774937942448e-05, "loss": 0.9509, "step": 38640 }, { "epoch": 3.55, "learning_rate": 3.223315252367381e-05, "loss": 0.8315, "step": 38650 }, { "epoch": 3.55, "learning_rate": 3.222855566792314e-05, "loss": 0.982, "step": 38660 }, { "epoch": 3.56, "learning_rate": 3.222395881217248e-05, "loss": 0.9317, "step": 38670 }, { "epoch": 3.56, "learning_rate": 3.221936195642181e-05, "loss": 0.8272, "step": 38680 }, { "epoch": 3.56, "learning_rate": 3.221476510067114e-05, "loss": 0.9112, "step": 38690 }, { "epoch": 3.56, "learning_rate": 3.2210168244920476e-05, "loss": 0.8537, "step": 38700 }, { "epoch": 3.56, "learning_rate": 3.2205571389169805e-05, "loss": 0.8476, "step": 38710 }, { "epoch": 3.56, "learning_rate": 3.220097453341914e-05, "loss": 0.9291, "step": 38720 }, { "epoch": 3.56, "learning_rate": 3.219637767766848e-05, "loss": 0.9132, "step": 38730 }, { "epoch": 3.56, "learning_rate": 3.219178082191781e-05, "loss": 0.9712, "step": 38740 }, { "epoch": 3.56, "learning_rate": 3.2187183966167145e-05, "loss": 0.9464, "step": 38750 }, { "epoch": 3.56, "learning_rate": 3.218258711041648e-05, "loss": 0.8728, "step": 38760 }, { "epoch": 3.56, "learning_rate": 3.217799025466581e-05, "loss": 0.8487, "step": 38770 }, { "epoch": 3.57, "learning_rate": 3.217339339891514e-05, "loss": 0.7779, "step": 38780 }, { "epoch": 3.57, "learning_rate": 3.216879654316448e-05, "loss": 1.0355, "step": 38790 }, { "epoch": 3.57, "learning_rate": 3.216419968741381e-05, "loss": 0.8285, "step": 38800 }, { "epoch": 3.57, "learning_rate": 3.215960283166314e-05, "loss": 0.8808, "step": 38810 }, { "epoch": 3.57, "learning_rate": 3.215500597591248e-05, "loss": 0.8815, "step": 38820 }, { "epoch": 3.57, "learning_rate": 3.215040912016181e-05, "loss": 0.8875, "step": 38830 }, { "epoch": 3.57, "learning_rate": 3.2145812264411146e-05, "loss": 0.8101, "step": 38840 }, { "epoch": 3.57, "learning_rate": 3.214121540866048e-05, "loss": 0.9092, "step": 38850 }, { "epoch": 3.57, "learning_rate": 3.2136618552909805e-05, "loss": 0.9472, "step": 38860 }, { "epoch": 3.57, "learning_rate": 3.213202169715914e-05, "loss": 0.8876, "step": 38870 }, { "epoch": 3.57, "learning_rate": 3.212742484140848e-05, "loss": 0.8674, "step": 38880 }, { "epoch": 3.58, "learning_rate": 3.212282798565781e-05, "loss": 0.8565, "step": 38890 }, { "epoch": 3.58, "learning_rate": 3.2118231129907144e-05, "loss": 0.8711, "step": 38900 }, { "epoch": 3.58, "learning_rate": 3.211363427415648e-05, "loss": 0.9591, "step": 38910 }, { "epoch": 3.58, "learning_rate": 3.210903741840581e-05, "loss": 0.8694, "step": 38920 }, { "epoch": 3.58, "learning_rate": 3.210444056265515e-05, "loss": 0.8543, "step": 38930 }, { "epoch": 3.58, "learning_rate": 3.2099843706904483e-05, "loss": 0.7765, "step": 38940 }, { "epoch": 3.58, "learning_rate": 3.2095246851153806e-05, "loss": 0.9699, "step": 38950 }, { "epoch": 3.58, "learning_rate": 3.209064999540314e-05, "loss": 0.9343, "step": 38960 }, { "epoch": 3.58, "learning_rate": 3.208605313965248e-05, "loss": 0.8523, "step": 38970 }, { "epoch": 3.58, "learning_rate": 3.208145628390181e-05, "loss": 0.7564, "step": 38980 }, { "epoch": 3.58, "learning_rate": 3.2076859428151145e-05, "loss": 0.8022, "step": 38990 }, { "epoch": 3.59, "learning_rate": 3.207226257240048e-05, "loss": 0.8665, "step": 39000 }, { "epoch": 3.59, "learning_rate": 3.206766571664981e-05, "loss": 0.9385, "step": 39010 }, { "epoch": 3.59, "learning_rate": 3.206306886089915e-05, "loss": 0.8166, "step": 39020 }, { "epoch": 3.59, "learning_rate": 3.2058472005148485e-05, "loss": 0.7974, "step": 39030 }, { "epoch": 3.59, "learning_rate": 3.205387514939781e-05, "loss": 0.8444, "step": 39040 }, { "epoch": 3.59, "learning_rate": 3.2049278293647144e-05, "loss": 0.8498, "step": 39050 }, { "epoch": 3.59, "learning_rate": 3.204468143789648e-05, "loss": 0.9195, "step": 39060 }, { "epoch": 3.59, "learning_rate": 3.204008458214581e-05, "loss": 0.7424, "step": 39070 }, { "epoch": 3.59, "learning_rate": 3.203548772639515e-05, "loss": 0.8781, "step": 39080 }, { "epoch": 3.59, "learning_rate": 3.203089087064448e-05, "loss": 0.8479, "step": 39090 }, { "epoch": 3.59, "learning_rate": 3.202629401489381e-05, "loss": 0.8595, "step": 39100 }, { "epoch": 3.6, "learning_rate": 3.202169715914315e-05, "loss": 1.0309, "step": 39110 }, { "epoch": 3.6, "learning_rate": 3.2017100303392486e-05, "loss": 0.8203, "step": 39120 }, { "epoch": 3.6, "learning_rate": 3.2012503447641816e-05, "loss": 0.8268, "step": 39130 }, { "epoch": 3.6, "learning_rate": 3.2007906591891145e-05, "loss": 0.8853, "step": 39140 }, { "epoch": 3.6, "learning_rate": 3.200330973614048e-05, "loss": 0.9168, "step": 39150 }, { "epoch": 3.6, "learning_rate": 3.199871288038981e-05, "loss": 1.0125, "step": 39160 }, { "epoch": 3.6, "learning_rate": 3.199411602463915e-05, "loss": 0.8512, "step": 39170 }, { "epoch": 3.6, "learning_rate": 3.1989519168888484e-05, "loss": 0.8941, "step": 39180 }, { "epoch": 3.6, "learning_rate": 3.1984922313137814e-05, "loss": 0.858, "step": 39190 }, { "epoch": 3.6, "learning_rate": 3.198032545738715e-05, "loss": 0.9017, "step": 39200 }, { "epoch": 3.6, "learning_rate": 3.197572860163649e-05, "loss": 0.9731, "step": 39210 }, { "epoch": 3.61, "learning_rate": 3.197113174588582e-05, "loss": 0.8289, "step": 39220 }, { "epoch": 3.61, "learning_rate": 3.1966534890135146e-05, "loss": 0.9207, "step": 39230 }, { "epoch": 3.61, "learning_rate": 3.196193803438448e-05, "loss": 0.9677, "step": 39240 }, { "epoch": 3.61, "learning_rate": 3.195734117863381e-05, "loss": 0.8984, "step": 39250 }, { "epoch": 3.61, "learning_rate": 3.195274432288315e-05, "loss": 0.8621, "step": 39260 }, { "epoch": 3.61, "learning_rate": 3.1948147467132486e-05, "loss": 0.8493, "step": 39270 }, { "epoch": 3.61, "learning_rate": 3.1943550611381815e-05, "loss": 0.8552, "step": 39280 }, { "epoch": 3.61, "learning_rate": 3.193895375563115e-05, "loss": 0.9748, "step": 39290 }, { "epoch": 3.61, "learning_rate": 3.193435689988049e-05, "loss": 0.868, "step": 39300 }, { "epoch": 3.61, "learning_rate": 3.192976004412982e-05, "loss": 0.8885, "step": 39310 }, { "epoch": 3.61, "learning_rate": 3.192516318837915e-05, "loss": 0.9271, "step": 39320 }, { "epoch": 3.62, "learning_rate": 3.1920566332628484e-05, "loss": 0.8825, "step": 39330 }, { "epoch": 3.62, "learning_rate": 3.1915969476877814e-05, "loss": 0.8979, "step": 39340 }, { "epoch": 3.62, "learning_rate": 3.191137262112715e-05, "loss": 0.8076, "step": 39350 }, { "epoch": 3.62, "learning_rate": 3.190677576537649e-05, "loss": 0.8937, "step": 39360 }, { "epoch": 3.62, "learning_rate": 3.1902178909625816e-05, "loss": 0.8418, "step": 39370 }, { "epoch": 3.62, "learning_rate": 3.189758205387515e-05, "loss": 0.9028, "step": 39380 }, { "epoch": 3.62, "learning_rate": 3.189298519812449e-05, "loss": 0.9685, "step": 39390 }, { "epoch": 3.62, "learning_rate": 3.188838834237382e-05, "loss": 0.867, "step": 39400 }, { "epoch": 3.62, "learning_rate": 3.188379148662315e-05, "loss": 0.8308, "step": 39410 }, { "epoch": 3.62, "learning_rate": 3.1879194630872485e-05, "loss": 0.9582, "step": 39420 }, { "epoch": 3.63, "learning_rate": 3.1874597775121815e-05, "loss": 1.0232, "step": 39430 }, { "epoch": 3.63, "learning_rate": 3.187000091937115e-05, "loss": 0.8616, "step": 39440 }, { "epoch": 3.63, "learning_rate": 3.186540406362049e-05, "loss": 0.8189, "step": 39450 }, { "epoch": 3.63, "learning_rate": 3.186080720786982e-05, "loss": 0.8929, "step": 39460 }, { "epoch": 3.63, "learning_rate": 3.1856210352119154e-05, "loss": 0.8565, "step": 39470 }, { "epoch": 3.63, "learning_rate": 3.185161349636849e-05, "loss": 0.902, "step": 39480 }, { "epoch": 3.63, "learning_rate": 3.184701664061782e-05, "loss": 1.0307, "step": 39490 }, { "epoch": 3.63, "learning_rate": 3.184241978486715e-05, "loss": 0.8915, "step": 39500 }, { "epoch": 3.63, "learning_rate": 3.1837822929116486e-05, "loss": 0.8105, "step": 39510 }, { "epoch": 3.63, "learning_rate": 3.1833226073365816e-05, "loss": 0.8996, "step": 39520 }, { "epoch": 3.63, "learning_rate": 3.182862921761515e-05, "loss": 0.7682, "step": 39530 }, { "epoch": 3.64, "learning_rate": 3.182403236186449e-05, "loss": 0.8926, "step": 39540 }, { "epoch": 3.64, "learning_rate": 3.181943550611382e-05, "loss": 0.8573, "step": 39550 }, { "epoch": 3.64, "learning_rate": 3.1814838650363155e-05, "loss": 0.8451, "step": 39560 }, { "epoch": 3.64, "learning_rate": 3.181024179461249e-05, "loss": 0.8709, "step": 39570 }, { "epoch": 3.64, "learning_rate": 3.180564493886182e-05, "loss": 0.9109, "step": 39580 }, { "epoch": 3.64, "learning_rate": 3.180104808311115e-05, "loss": 0.9761, "step": 39590 }, { "epoch": 3.64, "learning_rate": 3.179645122736049e-05, "loss": 0.9657, "step": 39600 }, { "epoch": 3.64, "learning_rate": 3.179185437160982e-05, "loss": 0.8977, "step": 39610 }, { "epoch": 3.64, "learning_rate": 3.1787257515859154e-05, "loss": 0.9573, "step": 39620 }, { "epoch": 3.64, "learning_rate": 3.178266066010849e-05, "loss": 0.8582, "step": 39630 }, { "epoch": 3.64, "learning_rate": 3.177806380435782e-05, "loss": 0.9792, "step": 39640 }, { "epoch": 3.65, "learning_rate": 3.1773466948607157e-05, "loss": 0.8773, "step": 39650 }, { "epoch": 3.65, "learning_rate": 3.176887009285649e-05, "loss": 0.9229, "step": 39660 }, { "epoch": 3.65, "learning_rate": 3.176427323710582e-05, "loss": 0.889, "step": 39670 }, { "epoch": 3.65, "learning_rate": 3.175967638135515e-05, "loss": 0.7453, "step": 39680 }, { "epoch": 3.65, "learning_rate": 3.175507952560449e-05, "loss": 0.9183, "step": 39690 }, { "epoch": 3.65, "learning_rate": 3.175048266985382e-05, "loss": 0.8286, "step": 39700 }, { "epoch": 3.65, "learning_rate": 3.1745885814103155e-05, "loss": 0.9237, "step": 39710 }, { "epoch": 3.65, "learning_rate": 3.174128895835249e-05, "loss": 0.9175, "step": 39720 }, { "epoch": 3.65, "learning_rate": 3.173669210260182e-05, "loss": 0.9063, "step": 39730 }, { "epoch": 3.65, "learning_rate": 3.173209524685116e-05, "loss": 0.8732, "step": 39740 }, { "epoch": 3.65, "learning_rate": 3.1727498391100494e-05, "loss": 0.7564, "step": 39750 }, { "epoch": 3.66, "learning_rate": 3.1722901535349824e-05, "loss": 0.9568, "step": 39760 }, { "epoch": 3.66, "learning_rate": 3.1718304679599154e-05, "loss": 0.8059, "step": 39770 }, { "epoch": 3.66, "learning_rate": 3.171370782384849e-05, "loss": 0.8726, "step": 39780 }, { "epoch": 3.66, "learning_rate": 3.170911096809782e-05, "loss": 0.9589, "step": 39790 }, { "epoch": 3.66, "learning_rate": 3.1704514112347156e-05, "loss": 0.8015, "step": 39800 }, { "epoch": 3.66, "learning_rate": 3.169991725659649e-05, "loss": 0.7882, "step": 39810 }, { "epoch": 3.66, "learning_rate": 3.169532040084582e-05, "loss": 0.9419, "step": 39820 }, { "epoch": 3.66, "learning_rate": 3.169072354509516e-05, "loss": 0.8445, "step": 39830 }, { "epoch": 3.66, "learning_rate": 3.168612668934449e-05, "loss": 0.9885, "step": 39840 }, { "epoch": 3.66, "learning_rate": 3.1681529833593825e-05, "loss": 0.7575, "step": 39850 }, { "epoch": 3.66, "learning_rate": 3.1676932977843155e-05, "loss": 0.8401, "step": 39860 }, { "epoch": 3.67, "learning_rate": 3.167233612209249e-05, "loss": 0.7462, "step": 39870 }, { "epoch": 3.67, "learning_rate": 3.166773926634182e-05, "loss": 0.9699, "step": 39880 }, { "epoch": 3.67, "learning_rate": 3.166314241059116e-05, "loss": 0.9577, "step": 39890 }, { "epoch": 3.67, "learning_rate": 3.1658545554840494e-05, "loss": 0.821, "step": 39900 }, { "epoch": 3.67, "learning_rate": 3.1653948699089824e-05, "loss": 0.8404, "step": 39910 }, { "epoch": 3.67, "learning_rate": 3.164935184333916e-05, "loss": 0.9361, "step": 39920 }, { "epoch": 3.67, "learning_rate": 3.164475498758849e-05, "loss": 0.9821, "step": 39930 }, { "epoch": 3.67, "learning_rate": 3.1640158131837826e-05, "loss": 0.8874, "step": 39940 }, { "epoch": 3.67, "learning_rate": 3.1635561276087156e-05, "loss": 0.9069, "step": 39950 }, { "epoch": 3.67, "learning_rate": 3.163096442033649e-05, "loss": 0.9384, "step": 39960 }, { "epoch": 3.67, "learning_rate": 3.162636756458582e-05, "loss": 1.0188, "step": 39970 }, { "epoch": 3.68, "learning_rate": 3.162177070883516e-05, "loss": 0.9306, "step": 39980 }, { "epoch": 3.68, "learning_rate": 3.1617173853084495e-05, "loss": 0.8851, "step": 39990 }, { "epoch": 3.68, "learning_rate": 3.1612576997333825e-05, "loss": 1.0461, "step": 40000 }, { "epoch": 3.68, "eval_accuracy": 0.5711790393013101, "eval_loss": 0.8903296589851379, "eval_runtime": 159.9604, "eval_samples_per_second": 28.632, "eval_steps_per_second": 3.582, "step": 40000 }, { "epoch": 3.68, "learning_rate": 3.160798014158316e-05, "loss": 0.8495, "step": 40010 }, { "epoch": 3.68, "learning_rate": 3.160338328583249e-05, "loss": 0.9084, "step": 40020 }, { "epoch": 3.68, "learning_rate": 3.159878643008183e-05, "loss": 0.9206, "step": 40030 }, { "epoch": 3.68, "learning_rate": 3.159418957433116e-05, "loss": 0.9453, "step": 40040 }, { "epoch": 3.68, "learning_rate": 3.1589592718580494e-05, "loss": 0.8069, "step": 40050 }, { "epoch": 3.68, "learning_rate": 3.158499586282982e-05, "loss": 0.8489, "step": 40060 }, { "epoch": 3.68, "learning_rate": 3.158039900707916e-05, "loss": 0.9141, "step": 40070 }, { "epoch": 3.68, "learning_rate": 3.1575802151328496e-05, "loss": 0.8765, "step": 40080 }, { "epoch": 3.69, "learning_rate": 3.1571205295577826e-05, "loss": 0.8629, "step": 40090 }, { "epoch": 3.69, "learning_rate": 3.156660843982716e-05, "loss": 0.907, "step": 40100 }, { "epoch": 3.69, "learning_rate": 3.156201158407649e-05, "loss": 0.9083, "step": 40110 }, { "epoch": 3.69, "learning_rate": 3.155741472832583e-05, "loss": 0.9925, "step": 40120 }, { "epoch": 3.69, "learning_rate": 3.155281787257516e-05, "loss": 0.869, "step": 40130 }, { "epoch": 3.69, "learning_rate": 3.1548221016824495e-05, "loss": 0.9737, "step": 40140 }, { "epoch": 3.69, "learning_rate": 3.1543624161073825e-05, "loss": 1.0478, "step": 40150 }, { "epoch": 3.69, "learning_rate": 3.153902730532316e-05, "loss": 0.9175, "step": 40160 }, { "epoch": 3.69, "learning_rate": 3.153443044957249e-05, "loss": 0.9192, "step": 40170 }, { "epoch": 3.69, "learning_rate": 3.152983359382183e-05, "loss": 0.8736, "step": 40180 }, { "epoch": 3.69, "learning_rate": 3.1525236738071164e-05, "loss": 0.8674, "step": 40190 }, { "epoch": 3.7, "learning_rate": 3.1520639882320493e-05, "loss": 0.8745, "step": 40200 }, { "epoch": 3.7, "learning_rate": 3.151604302656983e-05, "loss": 0.8358, "step": 40210 }, { "epoch": 3.7, "learning_rate": 3.151144617081916e-05, "loss": 0.9177, "step": 40220 }, { "epoch": 3.7, "learning_rate": 3.1506849315068496e-05, "loss": 0.7849, "step": 40230 }, { "epoch": 3.7, "learning_rate": 3.1502252459317826e-05, "loss": 0.8817, "step": 40240 }, { "epoch": 3.7, "learning_rate": 3.149765560356716e-05, "loss": 1.0461, "step": 40250 }, { "epoch": 3.7, "learning_rate": 3.149305874781649e-05, "loss": 0.8643, "step": 40260 }, { "epoch": 3.7, "learning_rate": 3.148846189206583e-05, "loss": 0.8535, "step": 40270 }, { "epoch": 3.7, "learning_rate": 3.1483865036315165e-05, "loss": 0.925, "step": 40280 }, { "epoch": 3.7, "learning_rate": 3.1479268180564495e-05, "loss": 0.9472, "step": 40290 }, { "epoch": 3.71, "learning_rate": 3.147467132481383e-05, "loss": 0.9185, "step": 40300 }, { "epoch": 3.71, "learning_rate": 3.147007446906316e-05, "loss": 0.9544, "step": 40310 }, { "epoch": 3.71, "learning_rate": 3.14654776133125e-05, "loss": 0.8489, "step": 40320 }, { "epoch": 3.71, "learning_rate": 3.146088075756183e-05, "loss": 0.8162, "step": 40330 }, { "epoch": 3.71, "learning_rate": 3.1456283901811163e-05, "loss": 0.9296, "step": 40340 }, { "epoch": 3.71, "learning_rate": 3.145168704606049e-05, "loss": 1.0184, "step": 40350 }, { "epoch": 3.71, "learning_rate": 3.144709019030983e-05, "loss": 0.8434, "step": 40360 }, { "epoch": 3.71, "learning_rate": 3.1442493334559166e-05, "loss": 0.8768, "step": 40370 }, { "epoch": 3.71, "learning_rate": 3.1437896478808496e-05, "loss": 0.9059, "step": 40380 }, { "epoch": 3.71, "learning_rate": 3.143329962305783e-05, "loss": 0.9881, "step": 40390 }, { "epoch": 3.71, "learning_rate": 3.142870276730716e-05, "loss": 0.8797, "step": 40400 }, { "epoch": 3.72, "learning_rate": 3.14241059115565e-05, "loss": 0.9426, "step": 40410 }, { "epoch": 3.72, "learning_rate": 3.141950905580583e-05, "loss": 0.8451, "step": 40420 }, { "epoch": 3.72, "learning_rate": 3.1414912200055165e-05, "loss": 0.9316, "step": 40430 }, { "epoch": 3.72, "learning_rate": 3.1410315344304494e-05, "loss": 0.8221, "step": 40440 }, { "epoch": 3.72, "learning_rate": 3.140571848855383e-05, "loss": 0.8418, "step": 40450 }, { "epoch": 3.72, "learning_rate": 3.140112163280317e-05, "loss": 0.8477, "step": 40460 }, { "epoch": 3.72, "learning_rate": 3.13965247770525e-05, "loss": 0.9107, "step": 40470 }, { "epoch": 3.72, "learning_rate": 3.1391927921301833e-05, "loss": 0.98, "step": 40480 }, { "epoch": 3.72, "learning_rate": 3.138733106555116e-05, "loss": 0.8492, "step": 40490 }, { "epoch": 3.72, "learning_rate": 3.138273420980049e-05, "loss": 0.9016, "step": 40500 }, { "epoch": 3.72, "learning_rate": 3.137813735404983e-05, "loss": 0.9132, "step": 40510 }, { "epoch": 3.73, "learning_rate": 3.1373540498299166e-05, "loss": 0.8439, "step": 40520 }, { "epoch": 3.73, "learning_rate": 3.1368943642548496e-05, "loss": 0.9044, "step": 40530 }, { "epoch": 3.73, "learning_rate": 3.136434678679783e-05, "loss": 0.9514, "step": 40540 }, { "epoch": 3.73, "learning_rate": 3.135974993104717e-05, "loss": 0.913, "step": 40550 }, { "epoch": 3.73, "learning_rate": 3.13551530752965e-05, "loss": 0.9432, "step": 40560 }, { "epoch": 3.73, "learning_rate": 3.1350556219545835e-05, "loss": 0.7828, "step": 40570 }, { "epoch": 3.73, "learning_rate": 3.1345959363795164e-05, "loss": 0.884, "step": 40580 }, { "epoch": 3.73, "learning_rate": 3.1341362508044494e-05, "loss": 0.9422, "step": 40590 }, { "epoch": 3.73, "learning_rate": 3.133676565229383e-05, "loss": 0.7815, "step": 40600 }, { "epoch": 3.73, "learning_rate": 3.133216879654317e-05, "loss": 0.8147, "step": 40610 }, { "epoch": 3.73, "learning_rate": 3.13275719407925e-05, "loss": 0.8345, "step": 40620 }, { "epoch": 3.74, "learning_rate": 3.132297508504183e-05, "loss": 0.865, "step": 40630 }, { "epoch": 3.74, "learning_rate": 3.131837822929117e-05, "loss": 0.8247, "step": 40640 }, { "epoch": 3.74, "learning_rate": 3.13137813735405e-05, "loss": 0.9588, "step": 40650 }, { "epoch": 3.74, "learning_rate": 3.1309184517789836e-05, "loss": 0.7586, "step": 40660 }, { "epoch": 3.74, "learning_rate": 3.1304587662039166e-05, "loss": 0.9119, "step": 40670 }, { "epoch": 3.74, "learning_rate": 3.1299990806288495e-05, "loss": 0.8504, "step": 40680 }, { "epoch": 3.74, "learning_rate": 3.129539395053783e-05, "loss": 0.8237, "step": 40690 }, { "epoch": 3.74, "learning_rate": 3.129079709478717e-05, "loss": 0.8695, "step": 40700 }, { "epoch": 3.74, "learning_rate": 3.12862002390365e-05, "loss": 0.8677, "step": 40710 }, { "epoch": 3.74, "learning_rate": 3.1281603383285834e-05, "loss": 0.9085, "step": 40720 }, { "epoch": 3.74, "learning_rate": 3.127700652753517e-05, "loss": 0.8356, "step": 40730 }, { "epoch": 3.75, "learning_rate": 3.12724096717845e-05, "loss": 0.8699, "step": 40740 }, { "epoch": 3.75, "learning_rate": 3.126781281603384e-05, "loss": 0.853, "step": 40750 }, { "epoch": 3.75, "learning_rate": 3.126321596028317e-05, "loss": 0.8297, "step": 40760 }, { "epoch": 3.75, "learning_rate": 3.1258619104532496e-05, "loss": 0.8302, "step": 40770 }, { "epoch": 3.75, "learning_rate": 3.125402224878183e-05, "loss": 0.8745, "step": 40780 }, { "epoch": 3.75, "learning_rate": 3.124942539303117e-05, "loss": 0.7968, "step": 40790 }, { "epoch": 3.75, "learning_rate": 3.12448285372805e-05, "loss": 0.8859, "step": 40800 }, { "epoch": 3.75, "learning_rate": 3.1240231681529836e-05, "loss": 0.93, "step": 40810 }, { "epoch": 3.75, "learning_rate": 3.123563482577917e-05, "loss": 0.9607, "step": 40820 }, { "epoch": 3.75, "learning_rate": 3.12310379700285e-05, "loss": 0.8351, "step": 40830 }, { "epoch": 3.75, "learning_rate": 3.122644111427784e-05, "loss": 0.9292, "step": 40840 }, { "epoch": 3.76, "learning_rate": 3.122184425852717e-05, "loss": 0.8545, "step": 40850 }, { "epoch": 3.76, "learning_rate": 3.12172474027765e-05, "loss": 0.8323, "step": 40860 }, { "epoch": 3.76, "learning_rate": 3.1212650547025834e-05, "loss": 0.9687, "step": 40870 }, { "epoch": 3.76, "learning_rate": 3.120805369127517e-05, "loss": 0.973, "step": 40880 }, { "epoch": 3.76, "learning_rate": 3.12034568355245e-05, "loss": 0.931, "step": 40890 }, { "epoch": 3.76, "learning_rate": 3.119885997977384e-05, "loss": 0.9597, "step": 40900 }, { "epoch": 3.76, "learning_rate": 3.119426312402317e-05, "loss": 0.803, "step": 40910 }, { "epoch": 3.76, "learning_rate": 3.11896662682725e-05, "loss": 0.8682, "step": 40920 }, { "epoch": 3.76, "learning_rate": 3.118506941252184e-05, "loss": 1.0133, "step": 40930 }, { "epoch": 3.76, "learning_rate": 3.118047255677117e-05, "loss": 0.9114, "step": 40940 }, { "epoch": 3.76, "learning_rate": 3.11758757010205e-05, "loss": 0.8648, "step": 40950 }, { "epoch": 3.77, "learning_rate": 3.1171278845269835e-05, "loss": 0.8299, "step": 40960 }, { "epoch": 3.77, "learning_rate": 3.116668198951917e-05, "loss": 0.8276, "step": 40970 }, { "epoch": 3.77, "learning_rate": 3.11620851337685e-05, "loss": 0.7509, "step": 40980 }, { "epoch": 3.77, "learning_rate": 3.115748827801784e-05, "loss": 0.9291, "step": 40990 }, { "epoch": 3.77, "learning_rate": 3.1152891422267174e-05, "loss": 0.821, "step": 41000 }, { "epoch": 3.77, "learning_rate": 3.1148294566516504e-05, "loss": 0.8429, "step": 41010 }, { "epoch": 3.77, "learning_rate": 3.114369771076584e-05, "loss": 0.9444, "step": 41020 }, { "epoch": 3.77, "learning_rate": 3.113910085501517e-05, "loss": 0.9157, "step": 41030 }, { "epoch": 3.77, "learning_rate": 3.11345039992645e-05, "loss": 0.9582, "step": 41040 }, { "epoch": 3.77, "learning_rate": 3.1129907143513837e-05, "loss": 0.8934, "step": 41050 }, { "epoch": 3.77, "learning_rate": 3.112531028776317e-05, "loss": 0.8774, "step": 41060 }, { "epoch": 3.78, "learning_rate": 3.11207134320125e-05, "loss": 0.8553, "step": 41070 }, { "epoch": 3.78, "learning_rate": 3.111611657626184e-05, "loss": 0.9137, "step": 41080 }, { "epoch": 3.78, "learning_rate": 3.1111519720511176e-05, "loss": 0.8901, "step": 41090 }, { "epoch": 3.78, "learning_rate": 3.1106922864760505e-05, "loss": 0.9264, "step": 41100 }, { "epoch": 3.78, "learning_rate": 3.110232600900984e-05, "loss": 0.9345, "step": 41110 }, { "epoch": 3.78, "learning_rate": 3.109772915325917e-05, "loss": 0.8781, "step": 41120 }, { "epoch": 3.78, "learning_rate": 3.10931322975085e-05, "loss": 0.7769, "step": 41130 }, { "epoch": 3.78, "learning_rate": 3.108853544175784e-05, "loss": 0.8278, "step": 41140 }, { "epoch": 3.78, "learning_rate": 3.1083938586007174e-05, "loss": 0.9533, "step": 41150 }, { "epoch": 3.78, "learning_rate": 3.1079341730256504e-05, "loss": 0.8985, "step": 41160 }, { "epoch": 3.79, "learning_rate": 3.107474487450584e-05, "loss": 0.7846, "step": 41170 }, { "epoch": 3.79, "learning_rate": 3.107014801875518e-05, "loss": 0.9568, "step": 41180 }, { "epoch": 3.79, "learning_rate": 3.1065551163004507e-05, "loss": 0.9479, "step": 41190 }, { "epoch": 3.79, "learning_rate": 3.106095430725384e-05, "loss": 0.8945, "step": 41200 }, { "epoch": 3.79, "learning_rate": 3.105635745150317e-05, "loss": 0.8083, "step": 41210 }, { "epoch": 3.79, "learning_rate": 3.10517605957525e-05, "loss": 0.8954, "step": 41220 }, { "epoch": 3.79, "learning_rate": 3.104716374000184e-05, "loss": 0.9698, "step": 41230 }, { "epoch": 3.79, "learning_rate": 3.1042566884251175e-05, "loss": 0.9145, "step": 41240 }, { "epoch": 3.79, "learning_rate": 3.1037970028500505e-05, "loss": 0.9828, "step": 41250 }, { "epoch": 3.79, "learning_rate": 3.103337317274984e-05, "loss": 0.8397, "step": 41260 }, { "epoch": 3.79, "learning_rate": 3.102877631699918e-05, "loss": 0.9298, "step": 41270 }, { "epoch": 3.8, "learning_rate": 3.102417946124851e-05, "loss": 0.8518, "step": 41280 }, { "epoch": 3.8, "learning_rate": 3.1019582605497844e-05, "loss": 0.8993, "step": 41290 }, { "epoch": 3.8, "learning_rate": 3.1014985749747174e-05, "loss": 0.8147, "step": 41300 }, { "epoch": 3.8, "learning_rate": 3.1010388893996504e-05, "loss": 0.8108, "step": 41310 }, { "epoch": 3.8, "learning_rate": 3.100579203824584e-05, "loss": 0.9162, "step": 41320 }, { "epoch": 3.8, "learning_rate": 3.1001195182495177e-05, "loss": 0.8643, "step": 41330 }, { "epoch": 3.8, "learning_rate": 3.0996598326744506e-05, "loss": 0.8399, "step": 41340 }, { "epoch": 3.8, "learning_rate": 3.099200147099384e-05, "loss": 0.8777, "step": 41350 }, { "epoch": 3.8, "learning_rate": 3.098740461524318e-05, "loss": 0.8291, "step": 41360 }, { "epoch": 3.8, "learning_rate": 3.098280775949251e-05, "loss": 0.878, "step": 41370 }, { "epoch": 3.8, "learning_rate": 3.0978210903741845e-05, "loss": 0.9397, "step": 41380 }, { "epoch": 3.81, "learning_rate": 3.0973614047991175e-05, "loss": 0.8369, "step": 41390 }, { "epoch": 3.81, "learning_rate": 3.0969017192240505e-05, "loss": 0.8541, "step": 41400 }, { "epoch": 3.81, "learning_rate": 3.096442033648984e-05, "loss": 0.9817, "step": 41410 }, { "epoch": 3.81, "learning_rate": 3.095982348073918e-05, "loss": 0.9803, "step": 41420 }, { "epoch": 3.81, "learning_rate": 3.095522662498851e-05, "loss": 0.9146, "step": 41430 }, { "epoch": 3.81, "learning_rate": 3.0950629769237844e-05, "loss": 0.9183, "step": 41440 }, { "epoch": 3.81, "learning_rate": 3.094603291348718e-05, "loss": 0.8752, "step": 41450 }, { "epoch": 3.81, "learning_rate": 3.094143605773651e-05, "loss": 0.8445, "step": 41460 }, { "epoch": 3.81, "learning_rate": 3.0936839201985847e-05, "loss": 0.8907, "step": 41470 }, { "epoch": 3.81, "learning_rate": 3.0932242346235176e-05, "loss": 0.792, "step": 41480 }, { "epoch": 3.81, "learning_rate": 3.0927645490484506e-05, "loss": 0.9074, "step": 41490 }, { "epoch": 3.82, "learning_rate": 3.092304863473384e-05, "loss": 0.9132, "step": 41500 }, { "epoch": 3.82, "learning_rate": 3.091845177898318e-05, "loss": 0.9387, "step": 41510 }, { "epoch": 3.82, "learning_rate": 3.091385492323251e-05, "loss": 0.8942, "step": 41520 }, { "epoch": 3.82, "learning_rate": 3.0909258067481845e-05, "loss": 0.7805, "step": 41530 }, { "epoch": 3.82, "learning_rate": 3.090466121173118e-05, "loss": 0.8145, "step": 41540 }, { "epoch": 3.82, "learning_rate": 3.090006435598051e-05, "loss": 0.8346, "step": 41550 }, { "epoch": 3.82, "learning_rate": 3.089546750022985e-05, "loss": 0.9382, "step": 41560 }, { "epoch": 3.82, "learning_rate": 3.089087064447918e-05, "loss": 0.8944, "step": 41570 }, { "epoch": 3.82, "learning_rate": 3.088627378872851e-05, "loss": 0.8041, "step": 41580 }, { "epoch": 3.82, "learning_rate": 3.0881676932977844e-05, "loss": 0.9367, "step": 41590 }, { "epoch": 3.82, "learning_rate": 3.087708007722718e-05, "loss": 0.9475, "step": 41600 }, { "epoch": 3.83, "learning_rate": 3.087248322147651e-05, "loss": 0.9961, "step": 41610 }, { "epoch": 3.83, "learning_rate": 3.0867886365725846e-05, "loss": 0.7667, "step": 41620 }, { "epoch": 3.83, "learning_rate": 3.086328950997518e-05, "loss": 0.876, "step": 41630 }, { "epoch": 3.83, "learning_rate": 3.085869265422451e-05, "loss": 0.8992, "step": 41640 }, { "epoch": 3.83, "learning_rate": 3.085409579847385e-05, "loss": 0.9273, "step": 41650 }, { "epoch": 3.83, "learning_rate": 3.084949894272318e-05, "loss": 0.8722, "step": 41660 }, { "epoch": 3.83, "learning_rate": 3.084490208697251e-05, "loss": 0.8965, "step": 41670 }, { "epoch": 3.83, "learning_rate": 3.0840305231221845e-05, "loss": 0.797, "step": 41680 }, { "epoch": 3.83, "learning_rate": 3.083570837547118e-05, "loss": 0.806, "step": 41690 }, { "epoch": 3.83, "learning_rate": 3.083111151972051e-05, "loss": 0.8519, "step": 41700 }, { "epoch": 3.83, "learning_rate": 3.082651466396985e-05, "loss": 0.8404, "step": 41710 }, { "epoch": 3.84, "learning_rate": 3.082191780821918e-05, "loss": 0.8612, "step": 41720 }, { "epoch": 3.84, "learning_rate": 3.0817320952468514e-05, "loss": 0.808, "step": 41730 }, { "epoch": 3.84, "learning_rate": 3.081272409671785e-05, "loss": 0.8971, "step": 41740 }, { "epoch": 3.84, "learning_rate": 3.080812724096718e-05, "loss": 0.8107, "step": 41750 }, { "epoch": 3.84, "learning_rate": 3.080353038521651e-05, "loss": 0.8548, "step": 41760 }, { "epoch": 3.84, "learning_rate": 3.0798933529465846e-05, "loss": 0.9165, "step": 41770 }, { "epoch": 3.84, "learning_rate": 3.079433667371518e-05, "loss": 1.1106, "step": 41780 }, { "epoch": 3.84, "learning_rate": 3.078973981796451e-05, "loss": 0.9052, "step": 41790 }, { "epoch": 3.84, "learning_rate": 3.078514296221385e-05, "loss": 0.9145, "step": 41800 }, { "epoch": 3.84, "learning_rate": 3.078054610646318e-05, "loss": 1.0149, "step": 41810 }, { "epoch": 3.84, "learning_rate": 3.0775949250712515e-05, "loss": 0.9759, "step": 41820 }, { "epoch": 3.85, "learning_rate": 3.077135239496185e-05, "loss": 0.8888, "step": 41830 }, { "epoch": 3.85, "learning_rate": 3.076675553921118e-05, "loss": 0.9497, "step": 41840 }, { "epoch": 3.85, "learning_rate": 3.076215868346051e-05, "loss": 0.9366, "step": 41850 }, { "epoch": 3.85, "learning_rate": 3.075756182770985e-05, "loss": 0.882, "step": 41860 }, { "epoch": 3.85, "learning_rate": 3.0752964971959184e-05, "loss": 0.8673, "step": 41870 }, { "epoch": 3.85, "learning_rate": 3.0748368116208513e-05, "loss": 0.914, "step": 41880 }, { "epoch": 3.85, "learning_rate": 3.074377126045785e-05, "loss": 0.8619, "step": 41890 }, { "epoch": 3.85, "learning_rate": 3.073917440470718e-05, "loss": 0.819, "step": 41900 }, { "epoch": 3.85, "learning_rate": 3.0734577548956516e-05, "loss": 0.8295, "step": 41910 }, { "epoch": 3.85, "learning_rate": 3.072998069320585e-05, "loss": 0.9092, "step": 41920 }, { "epoch": 3.85, "learning_rate": 3.072538383745518e-05, "loss": 0.9288, "step": 41930 }, { "epoch": 3.86, "learning_rate": 3.072078698170451e-05, "loss": 0.8331, "step": 41940 }, { "epoch": 3.86, "learning_rate": 3.071619012595385e-05, "loss": 0.9216, "step": 41950 }, { "epoch": 3.86, "learning_rate": 3.0711593270203185e-05, "loss": 0.84, "step": 41960 }, { "epoch": 3.86, "learning_rate": 3.0706996414452515e-05, "loss": 0.9453, "step": 41970 }, { "epoch": 3.86, "learning_rate": 3.070239955870185e-05, "loss": 0.8445, "step": 41980 }, { "epoch": 3.86, "learning_rate": 3.069780270295118e-05, "loss": 0.9038, "step": 41990 }, { "epoch": 3.86, "learning_rate": 3.069320584720052e-05, "loss": 0.883, "step": 42000 }, { "epoch": 3.86, "learning_rate": 3.0688608991449854e-05, "loss": 0.7748, "step": 42010 }, { "epoch": 3.86, "learning_rate": 3.0684012135699183e-05, "loss": 0.9604, "step": 42020 }, { "epoch": 3.86, "learning_rate": 3.067941527994851e-05, "loss": 0.9242, "step": 42030 }, { "epoch": 3.87, "learning_rate": 3.067481842419785e-05, "loss": 0.7849, "step": 42040 }, { "epoch": 3.87, "learning_rate": 3.0670221568447186e-05, "loss": 0.916, "step": 42050 }, { "epoch": 3.87, "learning_rate": 3.0665624712696516e-05, "loss": 0.758, "step": 42060 }, { "epoch": 3.87, "learning_rate": 3.066102785694585e-05, "loss": 0.8098, "step": 42070 }, { "epoch": 3.87, "learning_rate": 3.065643100119518e-05, "loss": 0.836, "step": 42080 }, { "epoch": 3.87, "learning_rate": 3.065183414544452e-05, "loss": 0.7924, "step": 42090 }, { "epoch": 3.87, "learning_rate": 3.0647237289693855e-05, "loss": 0.9925, "step": 42100 }, { "epoch": 3.87, "learning_rate": 3.0642640433943185e-05, "loss": 0.8834, "step": 42110 }, { "epoch": 3.87, "learning_rate": 3.0638043578192514e-05, "loss": 0.8127, "step": 42120 }, { "epoch": 3.87, "learning_rate": 3.063344672244185e-05, "loss": 0.9137, "step": 42130 }, { "epoch": 3.87, "learning_rate": 3.062884986669118e-05, "loss": 1.0019, "step": 42140 }, { "epoch": 3.88, "learning_rate": 3.062425301094052e-05, "loss": 0.8705, "step": 42150 }, { "epoch": 3.88, "learning_rate": 3.0619656155189854e-05, "loss": 0.9899, "step": 42160 }, { "epoch": 3.88, "learning_rate": 3.061505929943918e-05, "loss": 0.7396, "step": 42170 }, { "epoch": 3.88, "learning_rate": 3.061046244368852e-05, "loss": 0.9206, "step": 42180 }, { "epoch": 3.88, "learning_rate": 3.0605865587937856e-05, "loss": 0.7959, "step": 42190 }, { "epoch": 3.88, "learning_rate": 3.0601268732187186e-05, "loss": 0.8946, "step": 42200 }, { "epoch": 3.88, "learning_rate": 3.0596671876436516e-05, "loss": 0.7907, "step": 42210 }, { "epoch": 3.88, "learning_rate": 3.059207502068585e-05, "loss": 0.9861, "step": 42220 }, { "epoch": 3.88, "learning_rate": 3.058747816493518e-05, "loss": 0.8206, "step": 42230 }, { "epoch": 3.88, "learning_rate": 3.058288130918452e-05, "loss": 1.0817, "step": 42240 }, { "epoch": 3.88, "learning_rate": 3.0578284453433855e-05, "loss": 0.8752, "step": 42250 }, { "epoch": 3.89, "learning_rate": 3.0573687597683184e-05, "loss": 0.7689, "step": 42260 }, { "epoch": 3.89, "learning_rate": 3.056909074193252e-05, "loss": 0.812, "step": 42270 }, { "epoch": 3.89, "learning_rate": 3.056449388618186e-05, "loss": 0.7954, "step": 42280 }, { "epoch": 3.89, "learning_rate": 3.055989703043119e-05, "loss": 1.0381, "step": 42290 }, { "epoch": 3.89, "learning_rate": 3.055530017468052e-05, "loss": 0.8971, "step": 42300 }, { "epoch": 3.89, "learning_rate": 3.055070331892985e-05, "loss": 0.8743, "step": 42310 }, { "epoch": 3.89, "learning_rate": 3.054610646317918e-05, "loss": 0.7803, "step": 42320 }, { "epoch": 3.89, "learning_rate": 3.054150960742852e-05, "loss": 0.8167, "step": 42330 }, { "epoch": 3.89, "learning_rate": 3.0536912751677856e-05, "loss": 0.7777, "step": 42340 }, { "epoch": 3.89, "learning_rate": 3.0532315895927186e-05, "loss": 0.8836, "step": 42350 }, { "epoch": 3.89, "learning_rate": 3.052771904017652e-05, "loss": 0.8275, "step": 42360 }, { "epoch": 3.9, "learning_rate": 3.052312218442586e-05, "loss": 0.8699, "step": 42370 }, { "epoch": 3.9, "learning_rate": 3.051852532867519e-05, "loss": 0.8643, "step": 42380 }, { "epoch": 3.9, "learning_rate": 3.0513928472924518e-05, "loss": 0.8414, "step": 42390 }, { "epoch": 3.9, "learning_rate": 3.0509331617173854e-05, "loss": 0.8447, "step": 42400 }, { "epoch": 3.9, "learning_rate": 3.0504734761423188e-05, "loss": 0.8043, "step": 42410 }, { "epoch": 3.9, "learning_rate": 3.050013790567252e-05, "loss": 0.9356, "step": 42420 }, { "epoch": 3.9, "learning_rate": 3.0495541049921854e-05, "loss": 0.9499, "step": 42430 }, { "epoch": 3.9, "learning_rate": 3.049094419417119e-05, "loss": 0.8151, "step": 42440 }, { "epoch": 3.9, "learning_rate": 3.0486347338420523e-05, "loss": 0.8695, "step": 42450 }, { "epoch": 3.9, "learning_rate": 3.0481750482669856e-05, "loss": 0.7283, "step": 42460 }, { "epoch": 3.9, "learning_rate": 3.0477153626919186e-05, "loss": 0.9512, "step": 42470 }, { "epoch": 3.91, "learning_rate": 3.047255677116852e-05, "loss": 0.8692, "step": 42480 }, { "epoch": 3.91, "learning_rate": 3.0467959915417856e-05, "loss": 0.8116, "step": 42490 }, { "epoch": 3.91, "learning_rate": 3.046336305966719e-05, "loss": 0.7749, "step": 42500 }, { "epoch": 3.91, "learning_rate": 3.0458766203916522e-05, "loss": 0.9691, "step": 42510 }, { "epoch": 3.91, "learning_rate": 3.0454169348165855e-05, "loss": 0.7368, "step": 42520 }, { "epoch": 3.91, "learning_rate": 3.044957249241519e-05, "loss": 0.9554, "step": 42530 }, { "epoch": 3.91, "learning_rate": 3.0444975636664524e-05, "loss": 0.8346, "step": 42540 }, { "epoch": 3.91, "learning_rate": 3.0440378780913858e-05, "loss": 0.7552, "step": 42550 }, { "epoch": 3.91, "learning_rate": 3.0435781925163194e-05, "loss": 0.9676, "step": 42560 }, { "epoch": 3.91, "learning_rate": 3.043118506941252e-05, "loss": 0.9258, "step": 42570 }, { "epoch": 3.91, "learning_rate": 3.0426588213661857e-05, "loss": 0.8877, "step": 42580 }, { "epoch": 3.92, "learning_rate": 3.042199135791119e-05, "loss": 0.9122, "step": 42590 }, { "epoch": 3.92, "learning_rate": 3.0417394502160523e-05, "loss": 0.9229, "step": 42600 }, { "epoch": 3.92, "learning_rate": 3.0412797646409856e-05, "loss": 0.8311, "step": 42610 }, { "epoch": 3.92, "learning_rate": 3.0408200790659193e-05, "loss": 0.922, "step": 42620 }, { "epoch": 3.92, "learning_rate": 3.0403603934908526e-05, "loss": 0.7823, "step": 42630 }, { "epoch": 3.92, "learning_rate": 3.039900707915786e-05, "loss": 0.9912, "step": 42640 }, { "epoch": 3.92, "learning_rate": 3.0394410223407195e-05, "loss": 0.7825, "step": 42650 }, { "epoch": 3.92, "learning_rate": 3.038981336765652e-05, "loss": 0.8328, "step": 42660 }, { "epoch": 3.92, "learning_rate": 3.0385216511905855e-05, "loss": 0.8377, "step": 42670 }, { "epoch": 3.92, "learning_rate": 3.038061965615519e-05, "loss": 0.8936, "step": 42680 }, { "epoch": 3.92, "learning_rate": 3.0376022800404524e-05, "loss": 0.8505, "step": 42690 }, { "epoch": 3.93, "learning_rate": 3.0371425944653857e-05, "loss": 0.8924, "step": 42700 }, { "epoch": 3.93, "learning_rate": 3.0366829088903194e-05, "loss": 0.7185, "step": 42710 }, { "epoch": 3.93, "learning_rate": 3.0362232233152527e-05, "loss": 0.9606, "step": 42720 }, { "epoch": 3.93, "learning_rate": 3.035763537740186e-05, "loss": 0.8341, "step": 42730 }, { "epoch": 3.93, "learning_rate": 3.0353038521651196e-05, "loss": 0.9106, "step": 42740 }, { "epoch": 3.93, "learning_rate": 3.0348441665900523e-05, "loss": 0.9915, "step": 42750 }, { "epoch": 3.93, "learning_rate": 3.0343844810149856e-05, "loss": 0.9164, "step": 42760 }, { "epoch": 3.93, "learning_rate": 3.0339247954399192e-05, "loss": 0.8399, "step": 42770 }, { "epoch": 3.93, "learning_rate": 3.0334651098648525e-05, "loss": 0.8349, "step": 42780 }, { "epoch": 3.93, "learning_rate": 3.033005424289786e-05, "loss": 0.9067, "step": 42790 }, { "epoch": 3.93, "learning_rate": 3.0325457387147195e-05, "loss": 0.8488, "step": 42800 }, { "epoch": 3.94, "learning_rate": 3.0320860531396528e-05, "loss": 0.9553, "step": 42810 }, { "epoch": 3.94, "learning_rate": 3.031626367564586e-05, "loss": 0.9252, "step": 42820 }, { "epoch": 3.94, "learning_rate": 3.0311666819895194e-05, "loss": 0.8189, "step": 42830 }, { "epoch": 3.94, "learning_rate": 3.0307069964144524e-05, "loss": 0.9564, "step": 42840 }, { "epoch": 3.94, "learning_rate": 3.0302473108393857e-05, "loss": 0.9466, "step": 42850 }, { "epoch": 3.94, "learning_rate": 3.0297876252643194e-05, "loss": 0.808, "step": 42860 }, { "epoch": 3.94, "learning_rate": 3.0293279396892527e-05, "loss": 0.8741, "step": 42870 }, { "epoch": 3.94, "learning_rate": 3.028868254114186e-05, "loss": 0.8869, "step": 42880 }, { "epoch": 3.94, "learning_rate": 3.0284085685391196e-05, "loss": 0.7599, "step": 42890 }, { "epoch": 3.94, "learning_rate": 3.027948882964053e-05, "loss": 1.0043, "step": 42900 }, { "epoch": 3.95, "learning_rate": 3.0274891973889862e-05, "loss": 0.9561, "step": 42910 }, { "epoch": 3.95, "learning_rate": 3.0270295118139195e-05, "loss": 0.8839, "step": 42920 }, { "epoch": 3.95, "learning_rate": 3.0265698262388525e-05, "loss": 0.9131, "step": 42930 }, { "epoch": 3.95, "learning_rate": 3.0261101406637858e-05, "loss": 0.8582, "step": 42940 }, { "epoch": 3.95, "learning_rate": 3.0256504550887195e-05, "loss": 0.9294, "step": 42950 }, { "epoch": 3.95, "learning_rate": 3.0251907695136528e-05, "loss": 0.88, "step": 42960 }, { "epoch": 3.95, "learning_rate": 3.024731083938586e-05, "loss": 0.8744, "step": 42970 }, { "epoch": 3.95, "learning_rate": 3.0242713983635197e-05, "loss": 0.8961, "step": 42980 }, { "epoch": 3.95, "learning_rate": 3.023811712788453e-05, "loss": 0.8908, "step": 42990 }, { "epoch": 3.95, "learning_rate": 3.0233520272133864e-05, "loss": 0.8268, "step": 43000 }, { "epoch": 3.95, "learning_rate": 3.0228923416383197e-05, "loss": 0.8455, "step": 43010 }, { "epoch": 3.96, "learning_rate": 3.0224326560632526e-05, "loss": 0.9264, "step": 43020 }, { "epoch": 3.96, "learning_rate": 3.021972970488186e-05, "loss": 0.9129, "step": 43030 }, { "epoch": 3.96, "learning_rate": 3.0215132849131196e-05, "loss": 0.7947, "step": 43040 }, { "epoch": 3.96, "learning_rate": 3.021053599338053e-05, "loss": 0.8085, "step": 43050 }, { "epoch": 3.96, "learning_rate": 3.0205939137629862e-05, "loss": 0.8246, "step": 43060 }, { "epoch": 3.96, "learning_rate": 3.02013422818792e-05, "loss": 0.9389, "step": 43070 }, { "epoch": 3.96, "learning_rate": 3.019674542612853e-05, "loss": 0.998, "step": 43080 }, { "epoch": 3.96, "learning_rate": 3.0192148570377865e-05, "loss": 1.0268, "step": 43090 }, { "epoch": 3.96, "learning_rate": 3.0187551714627198e-05, "loss": 0.8891, "step": 43100 }, { "epoch": 3.96, "learning_rate": 3.0182954858876528e-05, "loss": 0.8556, "step": 43110 }, { "epoch": 3.96, "learning_rate": 3.017835800312586e-05, "loss": 0.8966, "step": 43120 }, { "epoch": 3.97, "learning_rate": 3.0173761147375197e-05, "loss": 0.8852, "step": 43130 }, { "epoch": 3.97, "learning_rate": 3.016916429162453e-05, "loss": 0.8995, "step": 43140 }, { "epoch": 3.97, "learning_rate": 3.0164567435873863e-05, "loss": 0.8653, "step": 43150 }, { "epoch": 3.97, "learning_rate": 3.0159970580123196e-05, "loss": 0.9161, "step": 43160 }, { "epoch": 3.97, "learning_rate": 3.0155373724372533e-05, "loss": 0.8953, "step": 43170 }, { "epoch": 3.97, "learning_rate": 3.0150776868621866e-05, "loss": 0.8979, "step": 43180 }, { "epoch": 3.97, "learning_rate": 3.01461800128712e-05, "loss": 0.8475, "step": 43190 }, { "epoch": 3.97, "learning_rate": 3.014158315712053e-05, "loss": 0.9134, "step": 43200 }, { "epoch": 3.97, "learning_rate": 3.0136986301369862e-05, "loss": 0.8336, "step": 43210 }, { "epoch": 3.97, "learning_rate": 3.0132389445619198e-05, "loss": 0.9151, "step": 43220 }, { "epoch": 3.97, "learning_rate": 3.012779258986853e-05, "loss": 0.7819, "step": 43230 }, { "epoch": 3.98, "learning_rate": 3.0123195734117864e-05, "loss": 0.8685, "step": 43240 }, { "epoch": 3.98, "learning_rate": 3.0118598878367198e-05, "loss": 0.8687, "step": 43250 }, { "epoch": 3.98, "learning_rate": 3.0114002022616534e-05, "loss": 0.9862, "step": 43260 }, { "epoch": 3.98, "learning_rate": 3.0109405166865867e-05, "loss": 0.933, "step": 43270 }, { "epoch": 3.98, "learning_rate": 3.01048083111152e-05, "loss": 0.8555, "step": 43280 }, { "epoch": 3.98, "learning_rate": 3.010021145536453e-05, "loss": 0.8464, "step": 43290 }, { "epoch": 3.98, "learning_rate": 3.0095614599613863e-05, "loss": 0.8757, "step": 43300 }, { "epoch": 3.98, "learning_rate": 3.00910177438632e-05, "loss": 0.9042, "step": 43310 }, { "epoch": 3.98, "learning_rate": 3.0086420888112533e-05, "loss": 0.9396, "step": 43320 }, { "epoch": 3.98, "learning_rate": 3.0081824032361866e-05, "loss": 0.9152, "step": 43330 }, { "epoch": 3.98, "learning_rate": 3.00772271766112e-05, "loss": 0.9607, "step": 43340 }, { "epoch": 3.99, "learning_rate": 3.0072630320860535e-05, "loss": 0.9476, "step": 43350 }, { "epoch": 3.99, "learning_rate": 3.006803346510987e-05, "loss": 0.9484, "step": 43360 }, { "epoch": 3.99, "learning_rate": 3.00634366093592e-05, "loss": 0.7395, "step": 43370 }, { "epoch": 3.99, "learning_rate": 3.005883975360853e-05, "loss": 0.8743, "step": 43380 }, { "epoch": 3.99, "learning_rate": 3.0054242897857864e-05, "loss": 0.936, "step": 43390 }, { "epoch": 3.99, "learning_rate": 3.00496460421072e-05, "loss": 0.8477, "step": 43400 }, { "epoch": 3.99, "learning_rate": 3.0045049186356534e-05, "loss": 0.8152, "step": 43410 }, { "epoch": 3.99, "learning_rate": 3.0040452330605867e-05, "loss": 0.9444, "step": 43420 }, { "epoch": 3.99, "learning_rate": 3.00358554748552e-05, "loss": 0.8634, "step": 43430 }, { "epoch": 3.99, "learning_rate": 3.0031258619104536e-05, "loss": 0.9626, "step": 43440 }, { "epoch": 3.99, "learning_rate": 3.002666176335387e-05, "loss": 0.834, "step": 43450 }, { "epoch": 4.0, "learning_rate": 3.0022064907603203e-05, "loss": 0.8572, "step": 43460 }, { "epoch": 4.0, "learning_rate": 3.0017468051852532e-05, "loss": 0.8622, "step": 43470 }, { "epoch": 4.0, "learning_rate": 3.0012871196101865e-05, "loss": 0.8662, "step": 43480 }, { "epoch": 4.0, "learning_rate": 3.00082743403512e-05, "loss": 0.8424, "step": 43490 }, { "epoch": 4.0, "learning_rate": 3.0003677484600535e-05, "loss": 0.7547, "step": 43500 }, { "epoch": 4.0, "learning_rate": 2.9999080628849868e-05, "loss": 0.9156, "step": 43510 }, { "epoch": 4.0, "learning_rate": 2.99944837730992e-05, "loss": 0.9816, "step": 43520 }, { "epoch": 4.0, "learning_rate": 2.9989886917348538e-05, "loss": 0.8725, "step": 43530 }, { "epoch": 4.0, "learning_rate": 2.998529006159787e-05, "loss": 1.0294, "step": 43540 }, { "epoch": 4.0, "learning_rate": 2.9980693205847204e-05, "loss": 0.9817, "step": 43550 }, { "epoch": 4.0, "learning_rate": 2.9976096350096534e-05, "loss": 0.9537, "step": 43560 }, { "epoch": 4.01, "learning_rate": 2.9971499494345867e-05, "loss": 0.9162, "step": 43570 }, { "epoch": 4.01, "learning_rate": 2.99669026385952e-05, "loss": 0.9587, "step": 43580 }, { "epoch": 4.01, "learning_rate": 2.9962305782844536e-05, "loss": 0.905, "step": 43590 }, { "epoch": 4.01, "learning_rate": 2.995770892709387e-05, "loss": 0.8672, "step": 43600 }, { "epoch": 4.01, "learning_rate": 2.9953112071343202e-05, "loss": 0.8944, "step": 43610 }, { "epoch": 4.01, "learning_rate": 2.994851521559254e-05, "loss": 0.8947, "step": 43620 }, { "epoch": 4.01, "learning_rate": 2.9943918359841872e-05, "loss": 0.9051, "step": 43630 }, { "epoch": 4.01, "learning_rate": 2.9939321504091205e-05, "loss": 0.888, "step": 43640 }, { "epoch": 4.01, "learning_rate": 2.9934724648340535e-05, "loss": 0.8833, "step": 43650 }, { "epoch": 4.01, "learning_rate": 2.9930127792589868e-05, "loss": 0.8431, "step": 43660 }, { "epoch": 4.01, "learning_rate": 2.99255309368392e-05, "loss": 0.8909, "step": 43670 }, { "epoch": 4.02, "learning_rate": 2.9920934081088537e-05, "loss": 0.8028, "step": 43680 }, { "epoch": 4.02, "learning_rate": 2.991633722533787e-05, "loss": 0.8323, "step": 43690 }, { "epoch": 4.02, "learning_rate": 2.9911740369587204e-05, "loss": 0.9193, "step": 43700 }, { "epoch": 4.02, "learning_rate": 2.990714351383654e-05, "loss": 1.002, "step": 43710 }, { "epoch": 4.02, "learning_rate": 2.9902546658085873e-05, "loss": 0.9419, "step": 43720 }, { "epoch": 4.02, "learning_rate": 2.9897949802335206e-05, "loss": 0.8497, "step": 43730 }, { "epoch": 4.02, "learning_rate": 2.9893352946584536e-05, "loss": 0.9422, "step": 43740 }, { "epoch": 4.02, "learning_rate": 2.988875609083387e-05, "loss": 0.836, "step": 43750 }, { "epoch": 4.02, "learning_rate": 2.9884159235083202e-05, "loss": 0.8739, "step": 43760 }, { "epoch": 4.02, "learning_rate": 2.987956237933254e-05, "loss": 0.9939, "step": 43770 }, { "epoch": 4.03, "learning_rate": 2.987496552358187e-05, "loss": 0.868, "step": 43780 }, { "epoch": 4.03, "learning_rate": 2.9870368667831205e-05, "loss": 0.9282, "step": 43790 }, { "epoch": 4.03, "learning_rate": 2.986577181208054e-05, "loss": 0.8898, "step": 43800 }, { "epoch": 4.03, "learning_rate": 2.9861174956329874e-05, "loss": 0.7745, "step": 43810 }, { "epoch": 4.03, "learning_rate": 2.9856578100579207e-05, "loss": 0.9487, "step": 43820 }, { "epoch": 4.03, "learning_rate": 2.9851981244828537e-05, "loss": 0.8048, "step": 43830 }, { "epoch": 4.03, "learning_rate": 2.984738438907787e-05, "loss": 0.8449, "step": 43840 }, { "epoch": 4.03, "learning_rate": 2.9842787533327203e-05, "loss": 0.9738, "step": 43850 }, { "epoch": 4.03, "learning_rate": 2.983819067757654e-05, "loss": 0.8115, "step": 43860 }, { "epoch": 4.03, "learning_rate": 2.9833593821825873e-05, "loss": 1.0735, "step": 43870 }, { "epoch": 4.03, "learning_rate": 2.9828996966075206e-05, "loss": 0.7842, "step": 43880 }, { "epoch": 4.04, "learning_rate": 2.9824400110324542e-05, "loss": 0.8472, "step": 43890 }, { "epoch": 4.04, "learning_rate": 2.9819803254573875e-05, "loss": 0.8473, "step": 43900 }, { "epoch": 4.04, "learning_rate": 2.981520639882321e-05, "loss": 0.8801, "step": 43910 }, { "epoch": 4.04, "learning_rate": 2.9810609543072538e-05, "loss": 0.9102, "step": 43920 }, { "epoch": 4.04, "learning_rate": 2.980601268732187e-05, "loss": 0.8883, "step": 43930 }, { "epoch": 4.04, "learning_rate": 2.9801415831571204e-05, "loss": 0.8694, "step": 43940 }, { "epoch": 4.04, "learning_rate": 2.979681897582054e-05, "loss": 0.9237, "step": 43950 }, { "epoch": 4.04, "learning_rate": 2.9792222120069874e-05, "loss": 0.9043, "step": 43960 }, { "epoch": 4.04, "learning_rate": 2.9787625264319207e-05, "loss": 0.9667, "step": 43970 }, { "epoch": 4.04, "learning_rate": 2.978302840856854e-05, "loss": 0.938, "step": 43980 }, { "epoch": 4.04, "learning_rate": 2.9778431552817877e-05, "loss": 0.8549, "step": 43990 }, { "epoch": 4.05, "learning_rate": 2.977383469706721e-05, "loss": 0.9912, "step": 44000 }, { "epoch": 4.05, "learning_rate": 2.976923784131654e-05, "loss": 0.8521, "step": 44010 }, { "epoch": 4.05, "learning_rate": 2.9764640985565873e-05, "loss": 0.8274, "step": 44020 }, { "epoch": 4.05, "learning_rate": 2.9760044129815206e-05, "loss": 0.9357, "step": 44030 }, { "epoch": 4.05, "learning_rate": 2.9755447274064542e-05, "loss": 0.8905, "step": 44040 }, { "epoch": 4.05, "learning_rate": 2.9750850418313875e-05, "loss": 0.9101, "step": 44050 }, { "epoch": 4.05, "learning_rate": 2.974625356256321e-05, "loss": 0.9312, "step": 44060 }, { "epoch": 4.05, "learning_rate": 2.974165670681254e-05, "loss": 0.857, "step": 44070 }, { "epoch": 4.05, "learning_rate": 2.9737059851061878e-05, "loss": 0.9014, "step": 44080 }, { "epoch": 4.05, "learning_rate": 2.973246299531121e-05, "loss": 0.8814, "step": 44090 }, { "epoch": 4.05, "learning_rate": 2.972786613956054e-05, "loss": 0.9555, "step": 44100 }, { "epoch": 4.06, "learning_rate": 2.9723269283809874e-05, "loss": 0.8838, "step": 44110 }, { "epoch": 4.06, "learning_rate": 2.9718672428059207e-05, "loss": 0.8776, "step": 44120 }, { "epoch": 4.06, "learning_rate": 2.9714075572308543e-05, "loss": 0.8877, "step": 44130 }, { "epoch": 4.06, "learning_rate": 2.9709478716557876e-05, "loss": 0.8879, "step": 44140 }, { "epoch": 4.06, "learning_rate": 2.970488186080721e-05, "loss": 0.9778, "step": 44150 }, { "epoch": 4.06, "learning_rate": 2.9700285005056543e-05, "loss": 0.8975, "step": 44160 }, { "epoch": 4.06, "learning_rate": 2.969568814930588e-05, "loss": 0.8363, "step": 44170 }, { "epoch": 4.06, "learning_rate": 2.9691091293555212e-05, "loss": 0.9667, "step": 44180 }, { "epoch": 4.06, "learning_rate": 2.9686494437804542e-05, "loss": 0.9142, "step": 44190 }, { "epoch": 4.06, "learning_rate": 2.9681897582053875e-05, "loss": 0.8858, "step": 44200 }, { "epoch": 4.06, "learning_rate": 2.9677300726303208e-05, "loss": 0.8257, "step": 44210 }, { "epoch": 4.07, "learning_rate": 2.9672703870552545e-05, "loss": 0.9642, "step": 44220 }, { "epoch": 4.07, "learning_rate": 2.9668107014801878e-05, "loss": 0.9065, "step": 44230 }, { "epoch": 4.07, "learning_rate": 2.966351015905121e-05, "loss": 1.0083, "step": 44240 }, { "epoch": 4.07, "learning_rate": 2.9658913303300544e-05, "loss": 0.8783, "step": 44250 }, { "epoch": 4.07, "learning_rate": 2.965431644754988e-05, "loss": 0.8379, "step": 44260 }, { "epoch": 4.07, "learning_rate": 2.9649719591799213e-05, "loss": 0.9289, "step": 44270 }, { "epoch": 4.07, "learning_rate": 2.9645122736048543e-05, "loss": 0.8475, "step": 44280 }, { "epoch": 4.07, "learning_rate": 2.9640525880297876e-05, "loss": 0.9395, "step": 44290 }, { "epoch": 4.07, "learning_rate": 2.963592902454721e-05, "loss": 0.9234, "step": 44300 }, { "epoch": 4.07, "learning_rate": 2.9631332168796542e-05, "loss": 0.91, "step": 44310 }, { "epoch": 4.07, "learning_rate": 2.962673531304588e-05, "loss": 0.8589, "step": 44320 }, { "epoch": 4.08, "learning_rate": 2.9622138457295212e-05, "loss": 0.7401, "step": 44330 }, { "epoch": 4.08, "learning_rate": 2.9617541601544545e-05, "loss": 0.9458, "step": 44340 }, { "epoch": 4.08, "learning_rate": 2.961294474579388e-05, "loss": 0.9489, "step": 44350 }, { "epoch": 4.08, "learning_rate": 2.9608347890043215e-05, "loss": 0.836, "step": 44360 }, { "epoch": 4.08, "learning_rate": 2.9603751034292544e-05, "loss": 1.0322, "step": 44370 }, { "epoch": 4.08, "learning_rate": 2.9599154178541877e-05, "loss": 0.7793, "step": 44380 }, { "epoch": 4.08, "learning_rate": 2.959455732279121e-05, "loss": 0.8548, "step": 44390 }, { "epoch": 4.08, "learning_rate": 2.9589960467040544e-05, "loss": 0.7747, "step": 44400 }, { "epoch": 4.08, "learning_rate": 2.958536361128988e-05, "loss": 0.9053, "step": 44410 }, { "epoch": 4.08, "learning_rate": 2.9580766755539213e-05, "loss": 0.8873, "step": 44420 }, { "epoch": 4.08, "learning_rate": 2.9576169899788546e-05, "loss": 0.8838, "step": 44430 }, { "epoch": 4.09, "learning_rate": 2.9571573044037883e-05, "loss": 0.8129, "step": 44440 }, { "epoch": 4.09, "learning_rate": 2.9566976188287216e-05, "loss": 0.8477, "step": 44450 }, { "epoch": 4.09, "learning_rate": 2.9562379332536545e-05, "loss": 0.938, "step": 44460 }, { "epoch": 4.09, "learning_rate": 2.955778247678588e-05, "loss": 0.9835, "step": 44470 }, { "epoch": 4.09, "learning_rate": 2.955318562103521e-05, "loss": 0.9287, "step": 44480 }, { "epoch": 4.09, "learning_rate": 2.9548588765284545e-05, "loss": 0.8399, "step": 44490 }, { "epoch": 4.09, "learning_rate": 2.954399190953388e-05, "loss": 0.888, "step": 44500 }, { "epoch": 4.09, "learning_rate": 2.9539395053783214e-05, "loss": 0.9236, "step": 44510 }, { "epoch": 4.09, "learning_rate": 2.9534798198032547e-05, "loss": 0.814, "step": 44520 }, { "epoch": 4.09, "learning_rate": 2.9530201342281884e-05, "loss": 0.8212, "step": 44530 }, { "epoch": 4.09, "learning_rate": 2.9525604486531217e-05, "loss": 0.8162, "step": 44540 }, { "epoch": 4.1, "learning_rate": 2.9521007630780547e-05, "loss": 0.9067, "step": 44550 }, { "epoch": 4.1, "learning_rate": 2.951641077502988e-05, "loss": 0.8105, "step": 44560 }, { "epoch": 4.1, "learning_rate": 2.9511813919279213e-05, "loss": 0.8814, "step": 44570 }, { "epoch": 4.1, "learning_rate": 2.9507217063528546e-05, "loss": 0.989, "step": 44580 }, { "epoch": 4.1, "learning_rate": 2.9502620207777882e-05, "loss": 0.9304, "step": 44590 }, { "epoch": 4.1, "learning_rate": 2.9498023352027215e-05, "loss": 1.0213, "step": 44600 }, { "epoch": 4.1, "learning_rate": 2.949342649627655e-05, "loss": 0.8219, "step": 44610 }, { "epoch": 4.1, "learning_rate": 2.9488829640525885e-05, "loss": 0.7803, "step": 44620 }, { "epoch": 4.1, "learning_rate": 2.9484232784775218e-05, "loss": 0.8794, "step": 44630 }, { "epoch": 4.1, "learning_rate": 2.9479635929024544e-05, "loss": 0.951, "step": 44640 }, { "epoch": 4.1, "learning_rate": 2.947503907327388e-05, "loss": 0.7624, "step": 44650 }, { "epoch": 4.11, "learning_rate": 2.9470442217523214e-05, "loss": 0.8456, "step": 44660 }, { "epoch": 4.11, "learning_rate": 2.9465845361772547e-05, "loss": 0.9056, "step": 44670 }, { "epoch": 4.11, "learning_rate": 2.9461248506021884e-05, "loss": 0.8483, "step": 44680 }, { "epoch": 4.11, "learning_rate": 2.9456651650271217e-05, "loss": 0.8666, "step": 44690 }, { "epoch": 4.11, "learning_rate": 2.945205479452055e-05, "loss": 0.9194, "step": 44700 }, { "epoch": 4.11, "learning_rate": 2.9447457938769886e-05, "loss": 0.9423, "step": 44710 }, { "epoch": 4.11, "learning_rate": 2.944286108301922e-05, "loss": 0.8534, "step": 44720 }, { "epoch": 4.11, "learning_rate": 2.9438264227268546e-05, "loss": 0.9967, "step": 44730 }, { "epoch": 4.11, "learning_rate": 2.9433667371517882e-05, "loss": 0.9155, "step": 44740 }, { "epoch": 4.11, "learning_rate": 2.9429070515767215e-05, "loss": 0.9448, "step": 44750 }, { "epoch": 4.12, "learning_rate": 2.942447366001655e-05, "loss": 0.8542, "step": 44760 }, { "epoch": 4.12, "learning_rate": 2.9419876804265885e-05, "loss": 0.8171, "step": 44770 }, { "epoch": 4.12, "learning_rate": 2.9415279948515218e-05, "loss": 0.8834, "step": 44780 }, { "epoch": 4.12, "learning_rate": 2.941068309276455e-05, "loss": 0.9426, "step": 44790 }, { "epoch": 4.12, "learning_rate": 2.9406086237013884e-05, "loss": 0.868, "step": 44800 }, { "epoch": 4.12, "learning_rate": 2.940148938126322e-05, "loss": 0.8253, "step": 44810 }, { "epoch": 4.12, "learning_rate": 2.9396892525512547e-05, "loss": 0.9652, "step": 44820 }, { "epoch": 4.12, "learning_rate": 2.9392295669761883e-05, "loss": 0.869, "step": 44830 }, { "epoch": 4.12, "learning_rate": 2.9387698814011216e-05, "loss": 0.7678, "step": 44840 }, { "epoch": 4.12, "learning_rate": 2.938310195826055e-05, "loss": 0.9411, "step": 44850 }, { "epoch": 4.12, "learning_rate": 2.9378505102509886e-05, "loss": 0.8681, "step": 44860 }, { "epoch": 4.13, "learning_rate": 2.937390824675922e-05, "loss": 0.9634, "step": 44870 }, { "epoch": 4.13, "learning_rate": 2.9369311391008552e-05, "loss": 0.9139, "step": 44880 }, { "epoch": 4.13, "learning_rate": 2.9364714535257885e-05, "loss": 0.8367, "step": 44890 }, { "epoch": 4.13, "learning_rate": 2.9360117679507222e-05, "loss": 0.8018, "step": 44900 }, { "epoch": 4.13, "learning_rate": 2.9355520823756548e-05, "loss": 0.9205, "step": 44910 }, { "epoch": 4.13, "learning_rate": 2.9350923968005885e-05, "loss": 0.7798, "step": 44920 }, { "epoch": 4.13, "learning_rate": 2.9346327112255218e-05, "loss": 0.8618, "step": 44930 }, { "epoch": 4.13, "learning_rate": 2.934173025650455e-05, "loss": 0.8529, "step": 44940 }, { "epoch": 4.13, "learning_rate": 2.9337133400753887e-05, "loss": 0.855, "step": 44950 }, { "epoch": 4.13, "learning_rate": 2.933253654500322e-05, "loss": 0.9378, "step": 44960 }, { "epoch": 4.13, "learning_rate": 2.9327939689252553e-05, "loss": 0.9093, "step": 44970 }, { "epoch": 4.14, "learning_rate": 2.9323342833501886e-05, "loss": 0.9621, "step": 44980 }, { "epoch": 4.14, "learning_rate": 2.9318745977751223e-05, "loss": 0.8816, "step": 44990 }, { "epoch": 4.14, "learning_rate": 2.931414912200055e-05, "loss": 1.0072, "step": 45000 }, { "epoch": 4.14, "eval_accuracy": 0.5637554585152839, "eval_loss": 0.8915139436721802, "eval_runtime": 160.2386, "eval_samples_per_second": 28.582, "eval_steps_per_second": 3.576, "step": 45000 }, { "epoch": 4.14, "learning_rate": 2.9309552266249886e-05, "loss": 0.8402, "step": 45010 }, { "epoch": 4.14, "learning_rate": 2.930495541049922e-05, "loss": 0.8683, "step": 45020 }, { "epoch": 4.14, "learning_rate": 2.9300358554748552e-05, "loss": 0.8742, "step": 45030 }, { "epoch": 4.14, "learning_rate": 2.929576169899789e-05, "loss": 0.9364, "step": 45040 }, { "epoch": 4.14, "learning_rate": 2.929116484324722e-05, "loss": 0.8282, "step": 45050 }, { "epoch": 4.14, "learning_rate": 2.9286567987496555e-05, "loss": 0.8019, "step": 45060 }, { "epoch": 4.14, "learning_rate": 2.9281971131745888e-05, "loss": 0.9511, "step": 45070 }, { "epoch": 4.14, "learning_rate": 2.9277374275995224e-05, "loss": 0.8089, "step": 45080 }, { "epoch": 4.15, "learning_rate": 2.927277742024455e-05, "loss": 0.8325, "step": 45090 }, { "epoch": 4.15, "learning_rate": 2.9268180564493887e-05, "loss": 0.8496, "step": 45100 }, { "epoch": 4.15, "learning_rate": 2.926358370874322e-05, "loss": 0.8471, "step": 45110 }, { "epoch": 4.15, "learning_rate": 2.9258986852992553e-05, "loss": 0.9354, "step": 45120 }, { "epoch": 4.15, "learning_rate": 2.9254389997241886e-05, "loss": 0.8697, "step": 45130 }, { "epoch": 4.15, "learning_rate": 2.9249793141491223e-05, "loss": 0.9789, "step": 45140 }, { "epoch": 4.15, "learning_rate": 2.9245196285740556e-05, "loss": 0.9036, "step": 45150 }, { "epoch": 4.15, "learning_rate": 2.924059942998989e-05, "loss": 0.8226, "step": 45160 }, { "epoch": 4.15, "learning_rate": 2.9236002574239225e-05, "loss": 0.8475, "step": 45170 }, { "epoch": 4.15, "learning_rate": 2.923140571848855e-05, "loss": 0.9216, "step": 45180 }, { "epoch": 4.15, "learning_rate": 2.9226808862737888e-05, "loss": 0.7838, "step": 45190 }, { "epoch": 4.16, "learning_rate": 2.922221200698722e-05, "loss": 0.9608, "step": 45200 }, { "epoch": 4.16, "learning_rate": 2.9217615151236554e-05, "loss": 0.9213, "step": 45210 }, { "epoch": 4.16, "learning_rate": 2.9213018295485887e-05, "loss": 0.8651, "step": 45220 }, { "epoch": 4.16, "learning_rate": 2.9208421439735224e-05, "loss": 0.8244, "step": 45230 }, { "epoch": 4.16, "learning_rate": 2.9203824583984557e-05, "loss": 0.7939, "step": 45240 }, { "epoch": 4.16, "learning_rate": 2.919922772823389e-05, "loss": 0.8044, "step": 45250 }, { "epoch": 4.16, "learning_rate": 2.9194630872483227e-05, "loss": 0.8593, "step": 45260 }, { "epoch": 4.16, "learning_rate": 2.9190034016732553e-05, "loss": 0.9031, "step": 45270 }, { "epoch": 4.16, "learning_rate": 2.918543716098189e-05, "loss": 0.8773, "step": 45280 }, { "epoch": 4.16, "learning_rate": 2.9180840305231222e-05, "loss": 0.86, "step": 45290 }, { "epoch": 4.16, "learning_rate": 2.9176243449480555e-05, "loss": 0.8204, "step": 45300 }, { "epoch": 4.17, "learning_rate": 2.917164659372989e-05, "loss": 0.9093, "step": 45310 }, { "epoch": 4.17, "learning_rate": 2.9167049737979225e-05, "loss": 0.7821, "step": 45320 }, { "epoch": 4.17, "learning_rate": 2.9162452882228558e-05, "loss": 1.0377, "step": 45330 }, { "epoch": 4.17, "learning_rate": 2.915785602647789e-05, "loss": 0.946, "step": 45340 }, { "epoch": 4.17, "learning_rate": 2.9153259170727228e-05, "loss": 0.8228, "step": 45350 }, { "epoch": 4.17, "learning_rate": 2.9148662314976554e-05, "loss": 0.9079, "step": 45360 }, { "epoch": 4.17, "learning_rate": 2.914406545922589e-05, "loss": 0.9341, "step": 45370 }, { "epoch": 4.17, "learning_rate": 2.9139468603475224e-05, "loss": 0.8577, "step": 45380 }, { "epoch": 4.17, "learning_rate": 2.9134871747724557e-05, "loss": 1.0103, "step": 45390 }, { "epoch": 4.17, "learning_rate": 2.913027489197389e-05, "loss": 0.822, "step": 45400 }, { "epoch": 4.17, "learning_rate": 2.9125678036223226e-05, "loss": 0.917, "step": 45410 }, { "epoch": 4.18, "learning_rate": 2.912108118047256e-05, "loss": 0.8093, "step": 45420 }, { "epoch": 4.18, "learning_rate": 2.9116484324721892e-05, "loss": 0.9749, "step": 45430 }, { "epoch": 4.18, "learning_rate": 2.911188746897123e-05, "loss": 0.7892, "step": 45440 }, { "epoch": 4.18, "learning_rate": 2.9107290613220555e-05, "loss": 1.0393, "step": 45450 }, { "epoch": 4.18, "learning_rate": 2.910269375746989e-05, "loss": 0.8884, "step": 45460 }, { "epoch": 4.18, "learning_rate": 2.9098096901719225e-05, "loss": 0.9458, "step": 45470 }, { "epoch": 4.18, "learning_rate": 2.9093500045968558e-05, "loss": 0.8521, "step": 45480 }, { "epoch": 4.18, "learning_rate": 2.908890319021789e-05, "loss": 0.8889, "step": 45490 }, { "epoch": 4.18, "learning_rate": 2.9084306334467227e-05, "loss": 0.8185, "step": 45500 }, { "epoch": 4.18, "learning_rate": 2.907970947871656e-05, "loss": 0.8895, "step": 45510 }, { "epoch": 4.18, "learning_rate": 2.9075112622965894e-05, "loss": 0.8546, "step": 45520 }, { "epoch": 4.19, "learning_rate": 2.9070515767215227e-05, "loss": 0.905, "step": 45530 }, { "epoch": 4.19, "learning_rate": 2.9065918911464556e-05, "loss": 0.8038, "step": 45540 }, { "epoch": 4.19, "learning_rate": 2.906132205571389e-05, "loss": 0.8871, "step": 45550 }, { "epoch": 4.19, "learning_rate": 2.9056725199963226e-05, "loss": 0.8666, "step": 45560 }, { "epoch": 4.19, "learning_rate": 2.905212834421256e-05, "loss": 0.8744, "step": 45570 }, { "epoch": 4.19, "learning_rate": 2.9047531488461892e-05, "loss": 0.8089, "step": 45580 }, { "epoch": 4.19, "learning_rate": 2.904293463271123e-05, "loss": 0.8781, "step": 45590 }, { "epoch": 4.19, "learning_rate": 2.9038337776960562e-05, "loss": 0.8753, "step": 45600 }, { "epoch": 4.19, "learning_rate": 2.9033740921209895e-05, "loss": 0.679, "step": 45610 }, { "epoch": 4.19, "learning_rate": 2.9029144065459228e-05, "loss": 0.8873, "step": 45620 }, { "epoch": 4.2, "learning_rate": 2.9024547209708558e-05, "loss": 0.8534, "step": 45630 }, { "epoch": 4.2, "learning_rate": 2.901995035395789e-05, "loss": 0.8848, "step": 45640 }, { "epoch": 4.2, "learning_rate": 2.9015353498207227e-05, "loss": 0.8519, "step": 45650 }, { "epoch": 4.2, "learning_rate": 2.901075664245656e-05, "loss": 0.811, "step": 45660 }, { "epoch": 4.2, "learning_rate": 2.9006159786705893e-05, "loss": 0.893, "step": 45670 }, { "epoch": 4.2, "learning_rate": 2.900156293095523e-05, "loss": 0.8887, "step": 45680 }, { "epoch": 4.2, "learning_rate": 2.8996966075204563e-05, "loss": 0.8748, "step": 45690 }, { "epoch": 4.2, "learning_rate": 2.8992369219453896e-05, "loss": 0.8665, "step": 45700 }, { "epoch": 4.2, "learning_rate": 2.898777236370323e-05, "loss": 0.8046, "step": 45710 }, { "epoch": 4.2, "learning_rate": 2.898317550795256e-05, "loss": 0.9339, "step": 45720 }, { "epoch": 4.2, "learning_rate": 2.8978578652201892e-05, "loss": 0.9129, "step": 45730 }, { "epoch": 4.21, "learning_rate": 2.897398179645123e-05, "loss": 0.9049, "step": 45740 }, { "epoch": 4.21, "learning_rate": 2.896938494070056e-05, "loss": 0.8198, "step": 45750 }, { "epoch": 4.21, "learning_rate": 2.8964788084949895e-05, "loss": 0.9131, "step": 45760 }, { "epoch": 4.21, "learning_rate": 2.896019122919923e-05, "loss": 0.8828, "step": 45770 }, { "epoch": 4.21, "learning_rate": 2.8955594373448564e-05, "loss": 0.87, "step": 45780 }, { "epoch": 4.21, "learning_rate": 2.8950997517697897e-05, "loss": 0.8534, "step": 45790 }, { "epoch": 4.21, "learning_rate": 2.894640066194723e-05, "loss": 0.8454, "step": 45800 }, { "epoch": 4.21, "learning_rate": 2.894180380619656e-05, "loss": 0.8951, "step": 45810 }, { "epoch": 4.21, "learning_rate": 2.8937206950445893e-05, "loss": 0.9974, "step": 45820 }, { "epoch": 4.21, "learning_rate": 2.893261009469523e-05, "loss": 0.8434, "step": 45830 }, { "epoch": 4.21, "learning_rate": 2.8928013238944563e-05, "loss": 0.8201, "step": 45840 }, { "epoch": 4.22, "learning_rate": 2.8923416383193896e-05, "loss": 0.8596, "step": 45850 }, { "epoch": 4.22, "learning_rate": 2.8918819527443232e-05, "loss": 0.7693, "step": 45860 }, { "epoch": 4.22, "learning_rate": 2.8914222671692565e-05, "loss": 1.0439, "step": 45870 }, { "epoch": 4.22, "learning_rate": 2.89096258159419e-05, "loss": 0.7455, "step": 45880 }, { "epoch": 4.22, "learning_rate": 2.890502896019123e-05, "loss": 0.9354, "step": 45890 }, { "epoch": 4.22, "learning_rate": 2.8900432104440568e-05, "loss": 0.8932, "step": 45900 }, { "epoch": 4.22, "learning_rate": 2.8895835248689894e-05, "loss": 0.8291, "step": 45910 }, { "epoch": 4.22, "learning_rate": 2.889123839293923e-05, "loss": 0.8358, "step": 45920 }, { "epoch": 4.22, "learning_rate": 2.8886641537188564e-05, "loss": 0.8867, "step": 45930 }, { "epoch": 4.22, "learning_rate": 2.8882044681437897e-05, "loss": 0.9469, "step": 45940 }, { "epoch": 4.22, "learning_rate": 2.887744782568723e-05, "loss": 0.7835, "step": 45950 }, { "epoch": 4.23, "learning_rate": 2.8872850969936567e-05, "loss": 0.8752, "step": 45960 }, { "epoch": 4.23, "learning_rate": 2.88682541141859e-05, "loss": 0.932, "step": 45970 }, { "epoch": 4.23, "learning_rate": 2.8863657258435233e-05, "loss": 0.8932, "step": 45980 }, { "epoch": 4.23, "learning_rate": 2.885906040268457e-05, "loss": 0.9243, "step": 45990 }, { "epoch": 4.23, "learning_rate": 2.8854463546933895e-05, "loss": 0.7882, "step": 46000 }, { "epoch": 4.23, "learning_rate": 2.8849866691183232e-05, "loss": 0.8893, "step": 46010 }, { "epoch": 4.23, "learning_rate": 2.8845269835432565e-05, "loss": 0.8427, "step": 46020 }, { "epoch": 4.23, "learning_rate": 2.8840672979681898e-05, "loss": 0.8256, "step": 46030 }, { "epoch": 4.23, "learning_rate": 2.883607612393123e-05, "loss": 0.8124, "step": 46040 }, { "epoch": 4.23, "learning_rate": 2.8831479268180568e-05, "loss": 0.8948, "step": 46050 }, { "epoch": 4.23, "learning_rate": 2.88268824124299e-05, "loss": 0.905, "step": 46060 }, { "epoch": 4.24, "learning_rate": 2.8822285556679234e-05, "loss": 0.8779, "step": 46070 }, { "epoch": 4.24, "learning_rate": 2.881768870092857e-05, "loss": 0.953, "step": 46080 }, { "epoch": 4.24, "learning_rate": 2.8813091845177897e-05, "loss": 0.8714, "step": 46090 }, { "epoch": 4.24, "learning_rate": 2.8808494989427233e-05, "loss": 0.9171, "step": 46100 }, { "epoch": 4.24, "learning_rate": 2.8803898133676566e-05, "loss": 0.9198, "step": 46110 }, { "epoch": 4.24, "learning_rate": 2.87993012779259e-05, "loss": 0.919, "step": 46120 }, { "epoch": 4.24, "learning_rate": 2.8794704422175232e-05, "loss": 0.956, "step": 46130 }, { "epoch": 4.24, "learning_rate": 2.879010756642457e-05, "loss": 0.8256, "step": 46140 }, { "epoch": 4.24, "learning_rate": 2.8785510710673902e-05, "loss": 0.858, "step": 46150 }, { "epoch": 4.24, "learning_rate": 2.8780913854923235e-05, "loss": 0.9408, "step": 46160 }, { "epoch": 4.24, "learning_rate": 2.877631699917257e-05, "loss": 1.0134, "step": 46170 }, { "epoch": 4.25, "learning_rate": 2.8771720143421898e-05, "loss": 0.8125, "step": 46180 }, { "epoch": 4.25, "learning_rate": 2.8767123287671234e-05, "loss": 0.8345, "step": 46190 }, { "epoch": 4.25, "learning_rate": 2.8762526431920567e-05, "loss": 0.9336, "step": 46200 }, { "epoch": 4.25, "learning_rate": 2.87579295761699e-05, "loss": 0.8685, "step": 46210 }, { "epoch": 4.25, "learning_rate": 2.8753332720419234e-05, "loss": 1.0485, "step": 46220 }, { "epoch": 4.25, "learning_rate": 2.874873586466857e-05, "loss": 0.8721, "step": 46230 }, { "epoch": 4.25, "learning_rate": 2.8744139008917903e-05, "loss": 0.8702, "step": 46240 }, { "epoch": 4.25, "learning_rate": 2.8739542153167236e-05, "loss": 0.7427, "step": 46250 }, { "epoch": 4.25, "learning_rate": 2.8734945297416573e-05, "loss": 0.8843, "step": 46260 }, { "epoch": 4.25, "learning_rate": 2.87303484416659e-05, "loss": 0.9359, "step": 46270 }, { "epoch": 4.25, "learning_rate": 2.8725751585915232e-05, "loss": 0.8657, "step": 46280 }, { "epoch": 4.26, "learning_rate": 2.872115473016457e-05, "loss": 0.9032, "step": 46290 }, { "epoch": 4.26, "learning_rate": 2.8716557874413902e-05, "loss": 0.8536, "step": 46300 }, { "epoch": 4.26, "learning_rate": 2.8711961018663235e-05, "loss": 0.9287, "step": 46310 }, { "epoch": 4.26, "learning_rate": 2.870736416291257e-05, "loss": 0.8799, "step": 46320 }, { "epoch": 4.26, "learning_rate": 2.8702767307161904e-05, "loss": 0.8899, "step": 46330 }, { "epoch": 4.26, "learning_rate": 2.8698170451411237e-05, "loss": 0.7844, "step": 46340 }, { "epoch": 4.26, "learning_rate": 2.869357359566057e-05, "loss": 0.8816, "step": 46350 }, { "epoch": 4.26, "learning_rate": 2.86889767399099e-05, "loss": 0.9764, "step": 46360 }, { "epoch": 4.26, "learning_rate": 2.8684379884159233e-05, "loss": 0.8704, "step": 46370 }, { "epoch": 4.26, "learning_rate": 2.867978302840857e-05, "loss": 0.9101, "step": 46380 }, { "epoch": 4.26, "learning_rate": 2.8675186172657903e-05, "loss": 0.8332, "step": 46390 }, { "epoch": 4.27, "learning_rate": 2.8670589316907236e-05, "loss": 0.922, "step": 46400 }, { "epoch": 4.27, "learning_rate": 2.8665992461156572e-05, "loss": 0.7925, "step": 46410 }, { "epoch": 4.27, "learning_rate": 2.8661395605405906e-05, "loss": 0.9326, "step": 46420 }, { "epoch": 4.27, "learning_rate": 2.865679874965524e-05, "loss": 1.0456, "step": 46430 }, { "epoch": 4.27, "learning_rate": 2.8652201893904572e-05, "loss": 0.8699, "step": 46440 }, { "epoch": 4.27, "learning_rate": 2.86476050381539e-05, "loss": 0.8291, "step": 46450 }, { "epoch": 4.27, "learning_rate": 2.8643008182403235e-05, "loss": 0.9095, "step": 46460 }, { "epoch": 4.27, "learning_rate": 2.863841132665257e-05, "loss": 0.8876, "step": 46470 }, { "epoch": 4.27, "learning_rate": 2.8633814470901904e-05, "loss": 0.8033, "step": 46480 }, { "epoch": 4.27, "learning_rate": 2.8629217615151237e-05, "loss": 0.8633, "step": 46490 }, { "epoch": 4.28, "learning_rate": 2.8624620759400574e-05, "loss": 0.8178, "step": 46500 }, { "epoch": 4.28, "learning_rate": 2.8620023903649907e-05, "loss": 0.9744, "step": 46510 }, { "epoch": 4.28, "learning_rate": 2.861542704789924e-05, "loss": 0.9828, "step": 46520 }, { "epoch": 4.28, "learning_rate": 2.8610830192148573e-05, "loss": 0.89, "step": 46530 }, { "epoch": 4.28, "learning_rate": 2.8606233336397903e-05, "loss": 0.9365, "step": 46540 }, { "epoch": 4.28, "learning_rate": 2.8601636480647236e-05, "loss": 0.9177, "step": 46550 }, { "epoch": 4.28, "learning_rate": 2.8597039624896572e-05, "loss": 0.778, "step": 46560 }, { "epoch": 4.28, "learning_rate": 2.8592442769145905e-05, "loss": 0.8607, "step": 46570 }, { "epoch": 4.28, "learning_rate": 2.858784591339524e-05, "loss": 0.7784, "step": 46580 }, { "epoch": 4.28, "learning_rate": 2.8583249057644575e-05, "loss": 0.9981, "step": 46590 }, { "epoch": 4.28, "learning_rate": 2.8578652201893908e-05, "loss": 0.8495, "step": 46600 }, { "epoch": 4.29, "learning_rate": 2.857405534614324e-05, "loss": 1.0469, "step": 46610 }, { "epoch": 4.29, "learning_rate": 2.8569458490392574e-05, "loss": 0.8481, "step": 46620 }, { "epoch": 4.29, "learning_rate": 2.8564861634641904e-05, "loss": 0.8666, "step": 46630 }, { "epoch": 4.29, "learning_rate": 2.8560264778891237e-05, "loss": 0.8656, "step": 46640 }, { "epoch": 4.29, "learning_rate": 2.8555667923140573e-05, "loss": 0.8738, "step": 46650 }, { "epoch": 4.29, "learning_rate": 2.8551071067389907e-05, "loss": 0.8721, "step": 46660 }, { "epoch": 4.29, "learning_rate": 2.854647421163924e-05, "loss": 0.7976, "step": 46670 }, { "epoch": 4.29, "learning_rate": 2.8541877355888573e-05, "loss": 1.0445, "step": 46680 }, { "epoch": 4.29, "learning_rate": 2.853728050013791e-05, "loss": 0.92, "step": 46690 }, { "epoch": 4.29, "learning_rate": 2.8532683644387242e-05, "loss": 0.9039, "step": 46700 }, { "epoch": 4.29, "learning_rate": 2.8528086788636575e-05, "loss": 0.899, "step": 46710 }, { "epoch": 4.3, "learning_rate": 2.8523489932885905e-05, "loss": 0.8439, "step": 46720 }, { "epoch": 4.3, "learning_rate": 2.8518893077135238e-05, "loss": 0.7966, "step": 46730 }, { "epoch": 4.3, "learning_rate": 2.8514296221384575e-05, "loss": 0.7881, "step": 46740 }, { "epoch": 4.3, "learning_rate": 2.8509699365633908e-05, "loss": 1.0037, "step": 46750 }, { "epoch": 4.3, "learning_rate": 2.850510250988324e-05, "loss": 0.9418, "step": 46760 }, { "epoch": 4.3, "learning_rate": 2.8500505654132574e-05, "loss": 0.8342, "step": 46770 }, { "epoch": 4.3, "learning_rate": 2.849590879838191e-05, "loss": 0.9072, "step": 46780 }, { "epoch": 4.3, "learning_rate": 2.8491311942631243e-05, "loss": 0.931, "step": 46790 }, { "epoch": 4.3, "learning_rate": 2.8486715086880577e-05, "loss": 0.855, "step": 46800 }, { "epoch": 4.3, "learning_rate": 2.8482118231129906e-05, "loss": 0.9333, "step": 46810 }, { "epoch": 4.3, "learning_rate": 2.847752137537924e-05, "loss": 0.7762, "step": 46820 }, { "epoch": 4.31, "learning_rate": 2.8472924519628576e-05, "loss": 0.7546, "step": 46830 }, { "epoch": 4.31, "learning_rate": 2.846832766387791e-05, "loss": 0.9403, "step": 46840 }, { "epoch": 4.31, "learning_rate": 2.8463730808127242e-05, "loss": 0.972, "step": 46850 }, { "epoch": 4.31, "learning_rate": 2.8459133952376575e-05, "loss": 0.8973, "step": 46860 }, { "epoch": 4.31, "learning_rate": 2.845453709662591e-05, "loss": 0.8504, "step": 46870 }, { "epoch": 4.31, "learning_rate": 2.8449940240875245e-05, "loss": 0.8674, "step": 46880 }, { "epoch": 4.31, "learning_rate": 2.8445343385124578e-05, "loss": 0.9234, "step": 46890 }, { "epoch": 4.31, "learning_rate": 2.8440746529373907e-05, "loss": 0.8575, "step": 46900 }, { "epoch": 4.31, "learning_rate": 2.843614967362324e-05, "loss": 0.9547, "step": 46910 }, { "epoch": 4.31, "learning_rate": 2.8431552817872577e-05, "loss": 0.883, "step": 46920 }, { "epoch": 4.31, "learning_rate": 2.842695596212191e-05, "loss": 0.9156, "step": 46930 }, { "epoch": 4.32, "learning_rate": 2.8422359106371243e-05, "loss": 0.827, "step": 46940 }, { "epoch": 4.32, "learning_rate": 2.8417762250620576e-05, "loss": 0.8525, "step": 46950 }, { "epoch": 4.32, "learning_rate": 2.8413165394869913e-05, "loss": 0.7925, "step": 46960 }, { "epoch": 4.32, "learning_rate": 2.8408568539119246e-05, "loss": 0.8562, "step": 46970 }, { "epoch": 4.32, "learning_rate": 2.840397168336858e-05, "loss": 0.7826, "step": 46980 }, { "epoch": 4.32, "learning_rate": 2.839937482761791e-05, "loss": 0.7767, "step": 46990 }, { "epoch": 4.32, "learning_rate": 2.8394777971867242e-05, "loss": 0.9147, "step": 47000 }, { "epoch": 4.32, "learning_rate": 2.8390181116116575e-05, "loss": 0.8441, "step": 47010 }, { "epoch": 4.32, "learning_rate": 2.838558426036591e-05, "loss": 1.0553, "step": 47020 }, { "epoch": 4.32, "learning_rate": 2.8380987404615244e-05, "loss": 0.9, "step": 47030 }, { "epoch": 4.32, "learning_rate": 2.8376390548864577e-05, "loss": 0.7648, "step": 47040 }, { "epoch": 4.33, "learning_rate": 2.8371793693113914e-05, "loss": 0.9048, "step": 47050 }, { "epoch": 4.33, "learning_rate": 2.8367196837363247e-05, "loss": 0.8749, "step": 47060 }, { "epoch": 4.33, "learning_rate": 2.836259998161258e-05, "loss": 0.9421, "step": 47070 }, { "epoch": 4.33, "learning_rate": 2.835800312586191e-05, "loss": 0.7991, "step": 47080 }, { "epoch": 4.33, "learning_rate": 2.8353406270111243e-05, "loss": 0.8868, "step": 47090 }, { "epoch": 4.33, "learning_rate": 2.8348809414360576e-05, "loss": 0.9554, "step": 47100 }, { "epoch": 4.33, "learning_rate": 2.8344212558609912e-05, "loss": 0.8974, "step": 47110 }, { "epoch": 4.33, "learning_rate": 2.8339615702859246e-05, "loss": 0.8546, "step": 47120 }, { "epoch": 4.33, "learning_rate": 2.833501884710858e-05, "loss": 0.8603, "step": 47130 }, { "epoch": 4.33, "learning_rate": 2.8330421991357915e-05, "loss": 0.949, "step": 47140 }, { "epoch": 4.33, "learning_rate": 2.8325825135607248e-05, "loss": 1.0152, "step": 47150 }, { "epoch": 4.34, "learning_rate": 2.832122827985658e-05, "loss": 0.727, "step": 47160 }, { "epoch": 4.34, "learning_rate": 2.831663142410591e-05, "loss": 0.865, "step": 47170 }, { "epoch": 4.34, "learning_rate": 2.8312034568355244e-05, "loss": 0.9026, "step": 47180 }, { "epoch": 4.34, "learning_rate": 2.8307437712604577e-05, "loss": 0.8823, "step": 47190 }, { "epoch": 4.34, "learning_rate": 2.8302840856853914e-05, "loss": 0.9217, "step": 47200 }, { "epoch": 4.34, "learning_rate": 2.8298244001103247e-05, "loss": 0.8541, "step": 47210 }, { "epoch": 4.34, "learning_rate": 2.829364714535258e-05, "loss": 0.9426, "step": 47220 }, { "epoch": 4.34, "learning_rate": 2.8289050289601916e-05, "loss": 0.8733, "step": 47230 }, { "epoch": 4.34, "learning_rate": 2.828445343385125e-05, "loss": 0.8407, "step": 47240 }, { "epoch": 4.34, "learning_rate": 2.8279856578100583e-05, "loss": 0.8767, "step": 47250 }, { "epoch": 4.34, "learning_rate": 2.8275259722349912e-05, "loss": 0.8204, "step": 47260 }, { "epoch": 4.35, "learning_rate": 2.8270662866599245e-05, "loss": 0.8518, "step": 47270 }, { "epoch": 4.35, "learning_rate": 2.826606601084858e-05, "loss": 0.8966, "step": 47280 }, { "epoch": 4.35, "learning_rate": 2.8261469155097915e-05, "loss": 0.9042, "step": 47290 }, { "epoch": 4.35, "learning_rate": 2.8256872299347248e-05, "loss": 0.8162, "step": 47300 }, { "epoch": 4.35, "learning_rate": 2.825227544359658e-05, "loss": 0.9475, "step": 47310 }, { "epoch": 4.35, "learning_rate": 2.8247678587845918e-05, "loss": 0.8407, "step": 47320 }, { "epoch": 4.35, "learning_rate": 2.824308173209525e-05, "loss": 0.8438, "step": 47330 }, { "epoch": 4.35, "learning_rate": 2.8238484876344584e-05, "loss": 0.9688, "step": 47340 }, { "epoch": 4.35, "learning_rate": 2.8233888020593913e-05, "loss": 0.7882, "step": 47350 }, { "epoch": 4.35, "learning_rate": 2.8229291164843246e-05, "loss": 0.815, "step": 47360 }, { "epoch": 4.36, "learning_rate": 2.822469430909258e-05, "loss": 0.8119, "step": 47370 }, { "epoch": 4.36, "learning_rate": 2.8220097453341916e-05, "loss": 0.7793, "step": 47380 }, { "epoch": 4.36, "learning_rate": 2.821550059759125e-05, "loss": 0.7947, "step": 47390 }, { "epoch": 4.36, "learning_rate": 2.8210903741840582e-05, "loss": 0.8964, "step": 47400 }, { "epoch": 4.36, "learning_rate": 2.820630688608992e-05, "loss": 0.8775, "step": 47410 }, { "epoch": 4.36, "learning_rate": 2.8201710030339252e-05, "loss": 1.0027, "step": 47420 }, { "epoch": 4.36, "learning_rate": 2.8197113174588585e-05, "loss": 0.9307, "step": 47430 }, { "epoch": 4.36, "learning_rate": 2.8192516318837915e-05, "loss": 0.9554, "step": 47440 }, { "epoch": 4.36, "learning_rate": 2.8187919463087248e-05, "loss": 0.8541, "step": 47450 }, { "epoch": 4.36, "learning_rate": 2.818332260733658e-05, "loss": 0.9209, "step": 47460 }, { "epoch": 4.36, "learning_rate": 2.8178725751585917e-05, "loss": 0.747, "step": 47470 }, { "epoch": 4.37, "learning_rate": 2.817412889583525e-05, "loss": 0.926, "step": 47480 }, { "epoch": 4.37, "learning_rate": 2.8169532040084583e-05, "loss": 1.0352, "step": 47490 }, { "epoch": 4.37, "learning_rate": 2.8164935184333917e-05, "loss": 0.8286, "step": 47500 }, { "epoch": 4.37, "learning_rate": 2.8160338328583253e-05, "loss": 0.9808, "step": 47510 }, { "epoch": 4.37, "learning_rate": 2.8155741472832586e-05, "loss": 0.7877, "step": 47520 }, { "epoch": 4.37, "learning_rate": 2.8151144617081916e-05, "loss": 0.8536, "step": 47530 }, { "epoch": 4.37, "learning_rate": 2.814654776133125e-05, "loss": 0.8894, "step": 47540 }, { "epoch": 4.37, "learning_rate": 2.8141950905580582e-05, "loss": 0.8619, "step": 47550 }, { "epoch": 4.37, "learning_rate": 2.813735404982992e-05, "loss": 0.9029, "step": 47560 }, { "epoch": 4.37, "learning_rate": 2.813275719407925e-05, "loss": 0.9162, "step": 47570 }, { "epoch": 4.37, "learning_rate": 2.8128160338328585e-05, "loss": 0.7798, "step": 47580 }, { "epoch": 4.38, "learning_rate": 2.8123563482577918e-05, "loss": 0.8246, "step": 47590 }, { "epoch": 4.38, "learning_rate": 2.8118966626827254e-05, "loss": 0.8959, "step": 47600 }, { "epoch": 4.38, "learning_rate": 2.8114369771076587e-05, "loss": 0.8096, "step": 47610 }, { "epoch": 4.38, "learning_rate": 2.8109772915325917e-05, "loss": 0.8525, "step": 47620 }, { "epoch": 4.38, "learning_rate": 2.810517605957525e-05, "loss": 0.8278, "step": 47630 }, { "epoch": 4.38, "learning_rate": 2.8100579203824583e-05, "loss": 0.8675, "step": 47640 }, { "epoch": 4.38, "learning_rate": 2.809598234807392e-05, "loss": 0.8743, "step": 47650 }, { "epoch": 4.38, "learning_rate": 2.8091385492323253e-05, "loss": 0.9187, "step": 47660 }, { "epoch": 4.38, "learning_rate": 2.8086788636572586e-05, "loss": 0.8874, "step": 47670 }, { "epoch": 4.38, "learning_rate": 2.808219178082192e-05, "loss": 0.9647, "step": 47680 }, { "epoch": 4.38, "learning_rate": 2.8077594925071255e-05, "loss": 0.8283, "step": 47690 }, { "epoch": 4.39, "learning_rate": 2.807299806932059e-05, "loss": 0.8396, "step": 47700 }, { "epoch": 4.39, "learning_rate": 2.8068401213569918e-05, "loss": 1.0054, "step": 47710 }, { "epoch": 4.39, "learning_rate": 2.806380435781925e-05, "loss": 0.9078, "step": 47720 }, { "epoch": 4.39, "learning_rate": 2.8059207502068584e-05, "loss": 0.9348, "step": 47730 }, { "epoch": 4.39, "learning_rate": 2.805461064631792e-05, "loss": 0.9229, "step": 47740 }, { "epoch": 4.39, "learning_rate": 2.8050013790567254e-05, "loss": 0.9466, "step": 47750 }, { "epoch": 4.39, "learning_rate": 2.8045416934816587e-05, "loss": 0.8142, "step": 47760 }, { "epoch": 4.39, "learning_rate": 2.804082007906592e-05, "loss": 0.7719, "step": 47770 }, { "epoch": 4.39, "learning_rate": 2.8036223223315257e-05, "loss": 0.9456, "step": 47780 }, { "epoch": 4.39, "learning_rate": 2.803162636756459e-05, "loss": 0.8545, "step": 47790 }, { "epoch": 4.39, "learning_rate": 2.802702951181392e-05, "loss": 0.8728, "step": 47800 }, { "epoch": 4.4, "learning_rate": 2.8022432656063252e-05, "loss": 0.8595, "step": 47810 }, { "epoch": 4.4, "learning_rate": 2.8017835800312586e-05, "loss": 0.9465, "step": 47820 }, { "epoch": 4.4, "learning_rate": 2.801323894456192e-05, "loss": 0.894, "step": 47830 }, { "epoch": 4.4, "learning_rate": 2.8008642088811255e-05, "loss": 0.7734, "step": 47840 }, { "epoch": 4.4, "learning_rate": 2.8004045233060588e-05, "loss": 1.0048, "step": 47850 }, { "epoch": 4.4, "learning_rate": 2.799944837730992e-05, "loss": 0.8117, "step": 47860 }, { "epoch": 4.4, "learning_rate": 2.7994851521559258e-05, "loss": 0.9319, "step": 47870 }, { "epoch": 4.4, "learning_rate": 2.799025466580859e-05, "loss": 0.9058, "step": 47880 }, { "epoch": 4.4, "learning_rate": 2.798565781005792e-05, "loss": 0.7815, "step": 47890 }, { "epoch": 4.4, "learning_rate": 2.7981060954307254e-05, "loss": 0.7765, "step": 47900 }, { "epoch": 4.4, "learning_rate": 2.7976464098556587e-05, "loss": 0.8025, "step": 47910 }, { "epoch": 4.41, "learning_rate": 2.797186724280592e-05, "loss": 1.0571, "step": 47920 }, { "epoch": 4.41, "learning_rate": 2.7967270387055256e-05, "loss": 0.8848, "step": 47930 }, { "epoch": 4.41, "learning_rate": 2.796267353130459e-05, "loss": 0.7901, "step": 47940 }, { "epoch": 4.41, "learning_rate": 2.7958076675553923e-05, "loss": 0.912, "step": 47950 }, { "epoch": 4.41, "learning_rate": 2.795347981980326e-05, "loss": 0.9207, "step": 47960 }, { "epoch": 4.41, "learning_rate": 2.7948882964052592e-05, "loss": 0.8375, "step": 47970 }, { "epoch": 4.41, "learning_rate": 2.7944286108301922e-05, "loss": 0.9963, "step": 47980 }, { "epoch": 4.41, "learning_rate": 2.7939689252551255e-05, "loss": 0.7746, "step": 47990 }, { "epoch": 4.41, "learning_rate": 2.7935092396800588e-05, "loss": 0.8483, "step": 48000 }, { "epoch": 4.41, "learning_rate": 2.793049554104992e-05, "loss": 0.7638, "step": 48010 }, { "epoch": 4.41, "learning_rate": 2.7925898685299258e-05, "loss": 0.8339, "step": 48020 }, { "epoch": 4.42, "learning_rate": 2.792130182954859e-05, "loss": 0.8411, "step": 48030 }, { "epoch": 4.42, "learning_rate": 2.7916704973797924e-05, "loss": 0.7975, "step": 48040 }, { "epoch": 4.42, "learning_rate": 2.791210811804726e-05, "loss": 0.8786, "step": 48050 }, { "epoch": 4.42, "learning_rate": 2.7907511262296593e-05, "loss": 0.9097, "step": 48060 }, { "epoch": 4.42, "learning_rate": 2.7902914406545923e-05, "loss": 0.7992, "step": 48070 }, { "epoch": 4.42, "learning_rate": 2.7898317550795256e-05, "loss": 0.8072, "step": 48080 }, { "epoch": 4.42, "learning_rate": 2.789372069504459e-05, "loss": 0.8874, "step": 48090 }, { "epoch": 4.42, "learning_rate": 2.7889123839293922e-05, "loss": 0.8907, "step": 48100 }, { "epoch": 4.42, "learning_rate": 2.788452698354326e-05, "loss": 0.8031, "step": 48110 }, { "epoch": 4.42, "learning_rate": 2.7879930127792592e-05, "loss": 0.9776, "step": 48120 }, { "epoch": 4.42, "learning_rate": 2.7875333272041925e-05, "loss": 0.8674, "step": 48130 }, { "epoch": 4.43, "learning_rate": 2.787073641629126e-05, "loss": 0.9637, "step": 48140 }, { "epoch": 4.43, "learning_rate": 2.7866139560540594e-05, "loss": 0.8328, "step": 48150 }, { "epoch": 4.43, "learning_rate": 2.786154270478992e-05, "loss": 0.9469, "step": 48160 }, { "epoch": 4.43, "learning_rate": 2.7856945849039257e-05, "loss": 0.8071, "step": 48170 }, { "epoch": 4.43, "learning_rate": 2.785234899328859e-05, "loss": 0.8577, "step": 48180 }, { "epoch": 4.43, "learning_rate": 2.7847752137537923e-05, "loss": 0.8496, "step": 48190 }, { "epoch": 4.43, "learning_rate": 2.784315528178726e-05, "loss": 0.95, "step": 48200 }, { "epoch": 4.43, "learning_rate": 2.7838558426036593e-05, "loss": 0.9018, "step": 48210 }, { "epoch": 4.43, "learning_rate": 2.7833961570285926e-05, "loss": 0.9186, "step": 48220 }, { "epoch": 4.43, "learning_rate": 2.7829364714535263e-05, "loss": 0.9743, "step": 48230 }, { "epoch": 4.44, "learning_rate": 2.7824767858784596e-05, "loss": 0.9739, "step": 48240 }, { "epoch": 4.44, "learning_rate": 2.7820171003033922e-05, "loss": 0.8641, "step": 48250 }, { "epoch": 4.44, "learning_rate": 2.781557414728326e-05, "loss": 0.8546, "step": 48260 }, { "epoch": 4.44, "learning_rate": 2.781097729153259e-05, "loss": 0.8458, "step": 48270 }, { "epoch": 4.44, "learning_rate": 2.7806380435781925e-05, "loss": 0.8754, "step": 48280 }, { "epoch": 4.44, "learning_rate": 2.780178358003126e-05, "loss": 0.7402, "step": 48290 }, { "epoch": 4.44, "learning_rate": 2.7797186724280594e-05, "loss": 0.9776, "step": 48300 }, { "epoch": 4.44, "learning_rate": 2.7792589868529927e-05, "loss": 0.9203, "step": 48310 }, { "epoch": 4.44, "learning_rate": 2.778799301277926e-05, "loss": 0.7745, "step": 48320 }, { "epoch": 4.44, "learning_rate": 2.7783396157028597e-05, "loss": 0.925, "step": 48330 }, { "epoch": 4.44, "learning_rate": 2.7778799301277923e-05, "loss": 0.7831, "step": 48340 }, { "epoch": 4.45, "learning_rate": 2.777420244552726e-05, "loss": 0.9424, "step": 48350 }, { "epoch": 4.45, "learning_rate": 2.7769605589776593e-05, "loss": 0.7792, "step": 48360 }, { "epoch": 4.45, "learning_rate": 2.7765008734025926e-05, "loss": 0.9017, "step": 48370 }, { "epoch": 4.45, "learning_rate": 2.7760411878275262e-05, "loss": 0.937, "step": 48380 }, { "epoch": 4.45, "learning_rate": 2.7755815022524595e-05, "loss": 0.9153, "step": 48390 }, { "epoch": 4.45, "learning_rate": 2.775121816677393e-05, "loss": 1.1007, "step": 48400 }, { "epoch": 4.45, "learning_rate": 2.774662131102326e-05, "loss": 0.7897, "step": 48410 }, { "epoch": 4.45, "learning_rate": 2.7742024455272598e-05, "loss": 0.8301, "step": 48420 }, { "epoch": 4.45, "learning_rate": 2.7737427599521924e-05, "loss": 0.85, "step": 48430 }, { "epoch": 4.45, "learning_rate": 2.773283074377126e-05, "loss": 0.873, "step": 48440 }, { "epoch": 4.45, "learning_rate": 2.7728233888020594e-05, "loss": 0.7952, "step": 48450 }, { "epoch": 4.46, "learning_rate": 2.7723637032269927e-05, "loss": 0.7536, "step": 48460 }, { "epoch": 4.46, "learning_rate": 2.7719040176519264e-05, "loss": 0.8795, "step": 48470 }, { "epoch": 4.46, "learning_rate": 2.7714443320768597e-05, "loss": 0.8286, "step": 48480 }, { "epoch": 4.46, "learning_rate": 2.770984646501793e-05, "loss": 0.9061, "step": 48490 }, { "epoch": 4.46, "learning_rate": 2.7705249609267263e-05, "loss": 0.7486, "step": 48500 }, { "epoch": 4.46, "learning_rate": 2.77006527535166e-05, "loss": 0.9408, "step": 48510 }, { "epoch": 4.46, "learning_rate": 2.7696055897765926e-05, "loss": 0.713, "step": 48520 }, { "epoch": 4.46, "learning_rate": 2.7691459042015262e-05, "loss": 0.8713, "step": 48530 }, { "epoch": 4.46, "learning_rate": 2.7686862186264595e-05, "loss": 0.8397, "step": 48540 }, { "epoch": 4.46, "learning_rate": 2.7682265330513928e-05, "loss": 0.9915, "step": 48550 }, { "epoch": 4.46, "learning_rate": 2.7677668474763265e-05, "loss": 0.8052, "step": 48560 }, { "epoch": 4.47, "learning_rate": 2.7673071619012598e-05, "loss": 0.9319, "step": 48570 }, { "epoch": 4.47, "learning_rate": 2.766847476326193e-05, "loss": 0.8942, "step": 48580 }, { "epoch": 4.47, "learning_rate": 2.7663877907511264e-05, "loss": 0.8921, "step": 48590 }, { "epoch": 4.47, "learning_rate": 2.76592810517606e-05, "loss": 0.7868, "step": 48600 }, { "epoch": 4.47, "learning_rate": 2.7654684196009927e-05, "loss": 0.9063, "step": 48610 }, { "epoch": 4.47, "learning_rate": 2.7650087340259263e-05, "loss": 0.8388, "step": 48620 }, { "epoch": 4.47, "learning_rate": 2.7645490484508596e-05, "loss": 0.8887, "step": 48630 }, { "epoch": 4.47, "learning_rate": 2.764089362875793e-05, "loss": 0.7966, "step": 48640 }, { "epoch": 4.47, "learning_rate": 2.7636296773007263e-05, "loss": 0.7931, "step": 48650 }, { "epoch": 4.47, "learning_rate": 2.76316999172566e-05, "loss": 0.9414, "step": 48660 }, { "epoch": 4.47, "learning_rate": 2.7627103061505932e-05, "loss": 0.9431, "step": 48670 }, { "epoch": 4.48, "learning_rate": 2.7622506205755265e-05, "loss": 0.9686, "step": 48680 }, { "epoch": 4.48, "learning_rate": 2.76179093500046e-05, "loss": 0.8394, "step": 48690 }, { "epoch": 4.48, "learning_rate": 2.7613312494253928e-05, "loss": 0.7169, "step": 48700 }, { "epoch": 4.48, "learning_rate": 2.7608715638503264e-05, "loss": 0.8862, "step": 48710 }, { "epoch": 4.48, "learning_rate": 2.7604118782752598e-05, "loss": 0.8381, "step": 48720 }, { "epoch": 4.48, "learning_rate": 2.759952192700193e-05, "loss": 0.8931, "step": 48730 }, { "epoch": 4.48, "learning_rate": 2.7594925071251264e-05, "loss": 0.8251, "step": 48740 }, { "epoch": 4.48, "learning_rate": 2.75903282155006e-05, "loss": 0.9216, "step": 48750 }, { "epoch": 4.48, "learning_rate": 2.7585731359749933e-05, "loss": 0.9096, "step": 48760 }, { "epoch": 4.48, "learning_rate": 2.7581134503999266e-05, "loss": 1.0064, "step": 48770 }, { "epoch": 4.48, "learning_rate": 2.7576537648248603e-05, "loss": 0.8973, "step": 48780 }, { "epoch": 4.49, "learning_rate": 2.757194079249793e-05, "loss": 0.9279, "step": 48790 }, { "epoch": 4.49, "learning_rate": 2.7567343936747266e-05, "loss": 0.9575, "step": 48800 }, { "epoch": 4.49, "learning_rate": 2.75627470809966e-05, "loss": 0.8549, "step": 48810 }, { "epoch": 4.49, "learning_rate": 2.7558150225245932e-05, "loss": 0.9699, "step": 48820 }, { "epoch": 4.49, "learning_rate": 2.7553553369495265e-05, "loss": 0.86, "step": 48830 }, { "epoch": 4.49, "learning_rate": 2.75489565137446e-05, "loss": 0.9589, "step": 48840 }, { "epoch": 4.49, "learning_rate": 2.7544359657993934e-05, "loss": 0.8465, "step": 48850 }, { "epoch": 4.49, "learning_rate": 2.7539762802243268e-05, "loss": 0.9466, "step": 48860 }, { "epoch": 4.49, "learning_rate": 2.7535165946492604e-05, "loss": 0.8894, "step": 48870 }, { "epoch": 4.49, "learning_rate": 2.753056909074193e-05, "loss": 0.9688, "step": 48880 }, { "epoch": 4.49, "learning_rate": 2.7525972234991267e-05, "loss": 0.92, "step": 48890 }, { "epoch": 4.5, "learning_rate": 2.75213753792406e-05, "loss": 0.8699, "step": 48900 }, { "epoch": 4.5, "learning_rate": 2.7516778523489933e-05, "loss": 0.8281, "step": 48910 }, { "epoch": 4.5, "learning_rate": 2.7512181667739266e-05, "loss": 0.9057, "step": 48920 }, { "epoch": 4.5, "learning_rate": 2.7507584811988603e-05, "loss": 0.8682, "step": 48930 }, { "epoch": 4.5, "learning_rate": 2.7502987956237936e-05, "loss": 0.9167, "step": 48940 }, { "epoch": 4.5, "learning_rate": 2.749839110048727e-05, "loss": 0.7963, "step": 48950 }, { "epoch": 4.5, "learning_rate": 2.7493794244736605e-05, "loss": 0.7963, "step": 48960 }, { "epoch": 4.5, "learning_rate": 2.748919738898593e-05, "loss": 0.9014, "step": 48970 }, { "epoch": 4.5, "learning_rate": 2.7484600533235265e-05, "loss": 0.7991, "step": 48980 }, { "epoch": 4.5, "learning_rate": 2.74800036774846e-05, "loss": 0.8788, "step": 48990 }, { "epoch": 4.5, "learning_rate": 2.7475406821733934e-05, "loss": 0.9506, "step": 49000 }, { "epoch": 4.51, "learning_rate": 2.7470809965983267e-05, "loss": 0.8512, "step": 49010 }, { "epoch": 4.51, "learning_rate": 2.7466213110232604e-05, "loss": 0.8877, "step": 49020 }, { "epoch": 4.51, "learning_rate": 2.7461616254481937e-05, "loss": 0.8801, "step": 49030 }, { "epoch": 4.51, "learning_rate": 2.745701939873127e-05, "loss": 0.9427, "step": 49040 }, { "epoch": 4.51, "learning_rate": 2.7452422542980606e-05, "loss": 0.8973, "step": 49050 }, { "epoch": 4.51, "learning_rate": 2.7447825687229933e-05, "loss": 0.9723, "step": 49060 }, { "epoch": 4.51, "learning_rate": 2.7443228831479266e-05, "loss": 0.9029, "step": 49070 }, { "epoch": 4.51, "learning_rate": 2.7438631975728602e-05, "loss": 0.8191, "step": 49080 }, { "epoch": 4.51, "learning_rate": 2.7434035119977935e-05, "loss": 0.7408, "step": 49090 }, { "epoch": 4.51, "learning_rate": 2.742943826422727e-05, "loss": 0.8917, "step": 49100 }, { "epoch": 4.52, "learning_rate": 2.7424841408476605e-05, "loss": 0.8561, "step": 49110 }, { "epoch": 4.52, "learning_rate": 2.7420244552725938e-05, "loss": 0.8473, "step": 49120 }, { "epoch": 4.52, "learning_rate": 2.741564769697527e-05, "loss": 0.8674, "step": 49130 }, { "epoch": 4.52, "learning_rate": 2.7411050841224604e-05, "loss": 1.0502, "step": 49140 }, { "epoch": 4.52, "learning_rate": 2.7406453985473934e-05, "loss": 0.8272, "step": 49150 }, { "epoch": 4.52, "learning_rate": 2.7401857129723267e-05, "loss": 0.83, "step": 49160 }, { "epoch": 4.52, "learning_rate": 2.7397260273972603e-05, "loss": 0.7981, "step": 49170 }, { "epoch": 4.52, "learning_rate": 2.7392663418221937e-05, "loss": 0.9214, "step": 49180 }, { "epoch": 4.52, "learning_rate": 2.738806656247127e-05, "loss": 0.8317, "step": 49190 }, { "epoch": 4.52, "learning_rate": 2.7383469706720606e-05, "loss": 0.902, "step": 49200 }, { "epoch": 4.52, "learning_rate": 2.737887285096994e-05, "loss": 0.7654, "step": 49210 }, { "epoch": 4.53, "learning_rate": 2.7374275995219272e-05, "loss": 0.9585, "step": 49220 }, { "epoch": 4.53, "learning_rate": 2.7369679139468605e-05, "loss": 0.9021, "step": 49230 }, { "epoch": 4.53, "learning_rate": 2.7365082283717935e-05, "loss": 0.9739, "step": 49240 }, { "epoch": 4.53, "learning_rate": 2.7360485427967268e-05, "loss": 0.6743, "step": 49250 }, { "epoch": 4.53, "learning_rate": 2.7355888572216605e-05, "loss": 0.7694, "step": 49260 }, { "epoch": 4.53, "learning_rate": 2.7351291716465938e-05, "loss": 0.7134, "step": 49270 }, { "epoch": 4.53, "learning_rate": 2.734669486071527e-05, "loss": 0.8815, "step": 49280 }, { "epoch": 4.53, "learning_rate": 2.7342098004964607e-05, "loss": 0.8595, "step": 49290 }, { "epoch": 4.53, "learning_rate": 2.733750114921394e-05, "loss": 1.0906, "step": 49300 }, { "epoch": 4.53, "learning_rate": 2.7332904293463274e-05, "loss": 0.9937, "step": 49310 }, { "epoch": 4.53, "learning_rate": 2.7328307437712607e-05, "loss": 0.8216, "step": 49320 }, { "epoch": 4.54, "learning_rate": 2.7323710581961943e-05, "loss": 0.9512, "step": 49330 }, { "epoch": 4.54, "learning_rate": 2.731911372621127e-05, "loss": 0.8539, "step": 49340 }, { "epoch": 4.54, "learning_rate": 2.7314516870460606e-05, "loss": 0.9839, "step": 49350 }, { "epoch": 4.54, "learning_rate": 2.730992001470994e-05, "loss": 0.8374, "step": 49360 }, { "epoch": 4.54, "learning_rate": 2.7305323158959272e-05, "loss": 0.8728, "step": 49370 }, { "epoch": 4.54, "learning_rate": 2.730072630320861e-05, "loss": 0.8229, "step": 49380 }, { "epoch": 4.54, "learning_rate": 2.729612944745794e-05, "loss": 0.9108, "step": 49390 }, { "epoch": 4.54, "learning_rate": 2.7291532591707275e-05, "loss": 0.8027, "step": 49400 }, { "epoch": 4.54, "learning_rate": 2.7286935735956608e-05, "loss": 0.944, "step": 49410 }, { "epoch": 4.54, "learning_rate": 2.7282338880205944e-05, "loss": 0.836, "step": 49420 }, { "epoch": 4.54, "learning_rate": 2.727774202445527e-05, "loss": 0.7581, "step": 49430 }, { "epoch": 4.55, "learning_rate": 2.7273145168704607e-05, "loss": 0.9314, "step": 49440 }, { "epoch": 4.55, "learning_rate": 2.726854831295394e-05, "loss": 0.8202, "step": 49450 }, { "epoch": 4.55, "learning_rate": 2.7263951457203273e-05, "loss": 0.8887, "step": 49460 }, { "epoch": 4.55, "learning_rate": 2.7259354601452606e-05, "loss": 0.8689, "step": 49470 }, { "epoch": 4.55, "learning_rate": 2.7254757745701943e-05, "loss": 0.7099, "step": 49480 }, { "epoch": 4.55, "learning_rate": 2.7250160889951276e-05, "loss": 0.8208, "step": 49490 }, { "epoch": 4.55, "learning_rate": 2.724556403420061e-05, "loss": 0.9629, "step": 49500 }, { "epoch": 4.55, "learning_rate": 2.7240967178449945e-05, "loss": 0.8895, "step": 49510 }, { "epoch": 4.55, "learning_rate": 2.7236370322699272e-05, "loss": 0.8261, "step": 49520 }, { "epoch": 4.55, "learning_rate": 2.7231773466948608e-05, "loss": 0.8029, "step": 49530 }, { "epoch": 4.55, "learning_rate": 2.722717661119794e-05, "loss": 0.937, "step": 49540 }, { "epoch": 4.56, "learning_rate": 2.7222579755447274e-05, "loss": 0.835, "step": 49550 }, { "epoch": 4.56, "learning_rate": 2.7217982899696608e-05, "loss": 0.9548, "step": 49560 }, { "epoch": 4.56, "learning_rate": 2.7213386043945944e-05, "loss": 0.8881, "step": 49570 }, { "epoch": 4.56, "learning_rate": 2.7208789188195277e-05, "loss": 0.8977, "step": 49580 }, { "epoch": 4.56, "learning_rate": 2.720419233244461e-05, "loss": 0.922, "step": 49590 }, { "epoch": 4.56, "learning_rate": 2.7199595476693947e-05, "loss": 0.8171, "step": 49600 }, { "epoch": 4.56, "learning_rate": 2.7194998620943273e-05, "loss": 1.0238, "step": 49610 }, { "epoch": 4.56, "learning_rate": 2.719040176519261e-05, "loss": 0.9327, "step": 49620 }, { "epoch": 4.56, "learning_rate": 2.7185804909441943e-05, "loss": 0.831, "step": 49630 }, { "epoch": 4.56, "learning_rate": 2.7181208053691276e-05, "loss": 0.8423, "step": 49640 }, { "epoch": 4.56, "learning_rate": 2.717661119794061e-05, "loss": 0.7356, "step": 49650 }, { "epoch": 4.57, "learning_rate": 2.7172014342189945e-05, "loss": 0.9189, "step": 49660 }, { "epoch": 4.57, "learning_rate": 2.716741748643928e-05, "loss": 0.8041, "step": 49670 }, { "epoch": 4.57, "learning_rate": 2.716282063068861e-05, "loss": 0.9195, "step": 49680 }, { "epoch": 4.57, "learning_rate": 2.7158223774937948e-05, "loss": 0.8655, "step": 49690 }, { "epoch": 4.57, "learning_rate": 2.7153626919187274e-05, "loss": 0.8352, "step": 49700 }, { "epoch": 4.57, "learning_rate": 2.714903006343661e-05, "loss": 1.0295, "step": 49710 }, { "epoch": 4.57, "learning_rate": 2.7144433207685944e-05, "loss": 0.9077, "step": 49720 }, { "epoch": 4.57, "learning_rate": 2.7139836351935277e-05, "loss": 0.7652, "step": 49730 }, { "epoch": 4.57, "learning_rate": 2.713523949618461e-05, "loss": 0.8936, "step": 49740 }, { "epoch": 4.57, "learning_rate": 2.7130642640433946e-05, "loss": 0.9459, "step": 49750 }, { "epoch": 4.57, "learning_rate": 2.712604578468328e-05, "loss": 0.7694, "step": 49760 }, { "epoch": 4.58, "learning_rate": 2.7121448928932613e-05, "loss": 0.7964, "step": 49770 }, { "epoch": 4.58, "learning_rate": 2.711685207318195e-05, "loss": 0.733, "step": 49780 }, { "epoch": 4.58, "learning_rate": 2.7112255217431275e-05, "loss": 0.8699, "step": 49790 }, { "epoch": 4.58, "learning_rate": 2.710765836168061e-05, "loss": 0.8864, "step": 49800 }, { "epoch": 4.58, "learning_rate": 2.7103061505929945e-05, "loss": 0.9037, "step": 49810 }, { "epoch": 4.58, "learning_rate": 2.7098464650179278e-05, "loss": 0.9634, "step": 49820 }, { "epoch": 4.58, "learning_rate": 2.709386779442861e-05, "loss": 0.8603, "step": 49830 }, { "epoch": 4.58, "learning_rate": 2.7089270938677948e-05, "loss": 0.9086, "step": 49840 }, { "epoch": 4.58, "learning_rate": 2.708467408292728e-05, "loss": 0.7415, "step": 49850 }, { "epoch": 4.58, "learning_rate": 2.7080077227176614e-05, "loss": 0.8008, "step": 49860 }, { "epoch": 4.58, "learning_rate": 2.707548037142595e-05, "loss": 0.864, "step": 49870 }, { "epoch": 4.59, "learning_rate": 2.7070883515675277e-05, "loss": 0.8207, "step": 49880 }, { "epoch": 4.59, "learning_rate": 2.706628665992461e-05, "loss": 0.8281, "step": 49890 }, { "epoch": 4.59, "learning_rate": 2.7061689804173946e-05, "loss": 0.9277, "step": 49900 }, { "epoch": 4.59, "learning_rate": 2.705709294842328e-05, "loss": 0.7534, "step": 49910 }, { "epoch": 4.59, "learning_rate": 2.7052496092672612e-05, "loss": 0.9227, "step": 49920 }, { "epoch": 4.59, "learning_rate": 2.704789923692195e-05, "loss": 0.9049, "step": 49930 }, { "epoch": 4.59, "learning_rate": 2.7043302381171282e-05, "loss": 0.8882, "step": 49940 }, { "epoch": 4.59, "learning_rate": 2.7038705525420615e-05, "loss": 0.8415, "step": 49950 }, { "epoch": 4.59, "learning_rate": 2.7034108669669948e-05, "loss": 0.729, "step": 49960 }, { "epoch": 4.59, "learning_rate": 2.7029511813919278e-05, "loss": 0.9083, "step": 49970 }, { "epoch": 4.6, "learning_rate": 2.702491495816861e-05, "loss": 0.7266, "step": 49980 }, { "epoch": 4.6, "learning_rate": 2.7020318102417947e-05, "loss": 1.0837, "step": 49990 }, { "epoch": 4.6, "learning_rate": 2.701572124666728e-05, "loss": 0.83, "step": 50000 }, { "epoch": 4.6, "eval_accuracy": 0.5803493449781659, "eval_loss": 0.8779388666152954, "eval_runtime": 159.6304, "eval_samples_per_second": 28.691, "eval_steps_per_second": 3.59, "step": 50000 }, { "epoch": 4.6, "learning_rate": 2.7011124390916614e-05, "loss": 0.8445, "step": 50010 }, { "epoch": 4.6, "learning_rate": 2.700652753516595e-05, "loss": 0.6649, "step": 50020 }, { "epoch": 4.6, "learning_rate": 2.7001930679415283e-05, "loss": 0.8419, "step": 50030 }, { "epoch": 4.6, "learning_rate": 2.6997333823664616e-05, "loss": 0.8287, "step": 50040 }, { "epoch": 4.6, "learning_rate": 2.699273696791395e-05, "loss": 0.8785, "step": 50050 }, { "epoch": 4.6, "learning_rate": 2.698814011216328e-05, "loss": 0.855, "step": 50060 }, { "epoch": 4.6, "learning_rate": 2.6983543256412612e-05, "loss": 0.9627, "step": 50070 }, { "epoch": 4.6, "learning_rate": 2.697894640066195e-05, "loss": 0.7754, "step": 50080 }, { "epoch": 4.61, "learning_rate": 2.697434954491128e-05, "loss": 0.8684, "step": 50090 }, { "epoch": 4.61, "learning_rate": 2.6969752689160615e-05, "loss": 0.9453, "step": 50100 }, { "epoch": 4.61, "learning_rate": 2.696515583340995e-05, "loss": 0.9281, "step": 50110 }, { "epoch": 4.61, "learning_rate": 2.6960558977659284e-05, "loss": 0.8317, "step": 50120 }, { "epoch": 4.61, "learning_rate": 2.6955962121908617e-05, "loss": 0.8989, "step": 50130 }, { "epoch": 4.61, "learning_rate": 2.695136526615795e-05, "loss": 0.9424, "step": 50140 }, { "epoch": 4.61, "learning_rate": 2.694676841040728e-05, "loss": 0.915, "step": 50150 }, { "epoch": 4.61, "learning_rate": 2.6942171554656613e-05, "loss": 0.9707, "step": 50160 }, { "epoch": 4.61, "learning_rate": 2.693757469890595e-05, "loss": 0.9163, "step": 50170 }, { "epoch": 4.61, "learning_rate": 2.6932977843155283e-05, "loss": 0.8871, "step": 50180 }, { "epoch": 4.61, "learning_rate": 2.6928380987404616e-05, "loss": 0.8082, "step": 50190 }, { "epoch": 4.62, "learning_rate": 2.6923784131653952e-05, "loss": 0.9226, "step": 50200 }, { "epoch": 4.62, "learning_rate": 2.6919187275903285e-05, "loss": 0.8025, "step": 50210 }, { "epoch": 4.62, "learning_rate": 2.691459042015262e-05, "loss": 0.9529, "step": 50220 }, { "epoch": 4.62, "learning_rate": 2.690999356440195e-05, "loss": 0.9286, "step": 50230 }, { "epoch": 4.62, "learning_rate": 2.690539670865128e-05, "loss": 0.9266, "step": 50240 }, { "epoch": 4.62, "learning_rate": 2.6900799852900614e-05, "loss": 0.9465, "step": 50250 }, { "epoch": 4.62, "learning_rate": 2.689620299714995e-05, "loss": 1.0059, "step": 50260 }, { "epoch": 4.62, "learning_rate": 2.6891606141399284e-05, "loss": 0.8727, "step": 50270 }, { "epoch": 4.62, "learning_rate": 2.6887009285648617e-05, "loss": 0.8341, "step": 50280 }, { "epoch": 4.62, "learning_rate": 2.688241242989795e-05, "loss": 0.8892, "step": 50290 }, { "epoch": 4.62, "learning_rate": 2.6877815574147287e-05, "loss": 0.8551, "step": 50300 }, { "epoch": 4.63, "learning_rate": 2.687321871839662e-05, "loss": 0.9622, "step": 50310 }, { "epoch": 4.63, "learning_rate": 2.6868621862645953e-05, "loss": 0.952, "step": 50320 }, { "epoch": 4.63, "learning_rate": 2.6864025006895283e-05, "loss": 0.9877, "step": 50330 }, { "epoch": 4.63, "learning_rate": 2.6859428151144616e-05, "loss": 0.9173, "step": 50340 }, { "epoch": 4.63, "learning_rate": 2.6854831295393952e-05, "loss": 0.7489, "step": 50350 }, { "epoch": 4.63, "learning_rate": 2.6850234439643285e-05, "loss": 0.9353, "step": 50360 }, { "epoch": 4.63, "learning_rate": 2.6845637583892618e-05, "loss": 0.9447, "step": 50370 }, { "epoch": 4.63, "learning_rate": 2.684104072814195e-05, "loss": 0.7913, "step": 50380 }, { "epoch": 4.63, "learning_rate": 2.6836443872391288e-05, "loss": 0.8951, "step": 50390 }, { "epoch": 4.63, "learning_rate": 2.683184701664062e-05, "loss": 0.9132, "step": 50400 }, { "epoch": 4.63, "learning_rate": 2.6827250160889954e-05, "loss": 0.8863, "step": 50410 }, { "epoch": 4.64, "learning_rate": 2.6822653305139284e-05, "loss": 0.836, "step": 50420 }, { "epoch": 4.64, "learning_rate": 2.6818056449388617e-05, "loss": 0.9322, "step": 50430 }, { "epoch": 4.64, "learning_rate": 2.6813459593637953e-05, "loss": 0.9141, "step": 50440 }, { "epoch": 4.64, "learning_rate": 2.6808862737887286e-05, "loss": 0.8345, "step": 50450 }, { "epoch": 4.64, "learning_rate": 2.680426588213662e-05, "loss": 0.883, "step": 50460 }, { "epoch": 4.64, "learning_rate": 2.6799669026385953e-05, "loss": 0.9256, "step": 50470 }, { "epoch": 4.64, "learning_rate": 2.679507217063529e-05, "loss": 0.8171, "step": 50480 }, { "epoch": 4.64, "learning_rate": 2.6790475314884622e-05, "loss": 0.8259, "step": 50490 }, { "epoch": 4.64, "learning_rate": 2.6785878459133955e-05, "loss": 0.8009, "step": 50500 }, { "epoch": 4.64, "learning_rate": 2.6781281603383285e-05, "loss": 1.0404, "step": 50510 }, { "epoch": 4.64, "learning_rate": 2.6776684747632618e-05, "loss": 0.8992, "step": 50520 }, { "epoch": 4.65, "learning_rate": 2.6772087891881955e-05, "loss": 0.8607, "step": 50530 }, { "epoch": 4.65, "learning_rate": 2.6767491036131288e-05, "loss": 0.8318, "step": 50540 }, { "epoch": 4.65, "learning_rate": 2.676289418038062e-05, "loss": 0.8607, "step": 50550 }, { "epoch": 4.65, "learning_rate": 2.6758297324629954e-05, "loss": 0.8744, "step": 50560 }, { "epoch": 4.65, "learning_rate": 2.675370046887929e-05, "loss": 0.9337, "step": 50570 }, { "epoch": 4.65, "learning_rate": 2.6749103613128623e-05, "loss": 0.826, "step": 50580 }, { "epoch": 4.65, "learning_rate": 2.6744506757377956e-05, "loss": 0.8597, "step": 50590 }, { "epoch": 4.65, "learning_rate": 2.6739909901627286e-05, "loss": 0.9297, "step": 50600 }, { "epoch": 4.65, "learning_rate": 2.673531304587662e-05, "loss": 0.8989, "step": 50610 }, { "epoch": 4.65, "learning_rate": 2.6730716190125952e-05, "loss": 0.9289, "step": 50620 }, { "epoch": 4.65, "learning_rate": 2.672611933437529e-05, "loss": 0.9136, "step": 50630 }, { "epoch": 4.66, "learning_rate": 2.6721522478624622e-05, "loss": 0.8412, "step": 50640 }, { "epoch": 4.66, "learning_rate": 2.6716925622873955e-05, "loss": 0.8362, "step": 50650 }, { "epoch": 4.66, "learning_rate": 2.671232876712329e-05, "loss": 0.961, "step": 50660 }, { "epoch": 4.66, "learning_rate": 2.6707731911372625e-05, "loss": 0.7808, "step": 50670 }, { "epoch": 4.66, "learning_rate": 2.6703135055621958e-05, "loss": 0.7968, "step": 50680 }, { "epoch": 4.66, "learning_rate": 2.6698538199871287e-05, "loss": 0.885, "step": 50690 }, { "epoch": 4.66, "learning_rate": 2.669394134412062e-05, "loss": 0.9704, "step": 50700 }, { "epoch": 4.66, "learning_rate": 2.6689344488369954e-05, "loss": 0.8842, "step": 50710 }, { "epoch": 4.66, "learning_rate": 2.668474763261929e-05, "loss": 0.8323, "step": 50720 }, { "epoch": 4.66, "learning_rate": 2.6680150776868623e-05, "loss": 0.8292, "step": 50730 }, { "epoch": 4.66, "learning_rate": 2.6675553921117956e-05, "loss": 0.9217, "step": 50740 }, { "epoch": 4.67, "learning_rate": 2.6670957065367293e-05, "loss": 0.8719, "step": 50750 }, { "epoch": 4.67, "learning_rate": 2.6666360209616626e-05, "loss": 0.9303, "step": 50760 }, { "epoch": 4.67, "learning_rate": 2.666176335386596e-05, "loss": 0.8212, "step": 50770 }, { "epoch": 4.67, "learning_rate": 2.665716649811529e-05, "loss": 0.8679, "step": 50780 }, { "epoch": 4.67, "learning_rate": 2.665256964236462e-05, "loss": 0.8237, "step": 50790 }, { "epoch": 4.67, "learning_rate": 2.6647972786613955e-05, "loss": 0.8318, "step": 50800 }, { "epoch": 4.67, "learning_rate": 2.664337593086329e-05, "loss": 0.9479, "step": 50810 }, { "epoch": 4.67, "learning_rate": 2.6638779075112624e-05, "loss": 0.9581, "step": 50820 }, { "epoch": 4.67, "learning_rate": 2.6634182219361957e-05, "loss": 0.9461, "step": 50830 }, { "epoch": 4.67, "learning_rate": 2.6629585363611294e-05, "loss": 0.8883, "step": 50840 }, { "epoch": 4.68, "learning_rate": 2.6624988507860627e-05, "loss": 0.9643, "step": 50850 }, { "epoch": 4.68, "learning_rate": 2.662039165210996e-05, "loss": 1.0772, "step": 50860 }, { "epoch": 4.68, "learning_rate": 2.661579479635929e-05, "loss": 0.7682, "step": 50870 }, { "epoch": 4.68, "learning_rate": 2.6611197940608623e-05, "loss": 0.9346, "step": 50880 }, { "epoch": 4.68, "learning_rate": 2.6606601084857956e-05, "loss": 0.9026, "step": 50890 }, { "epoch": 4.68, "learning_rate": 2.6602004229107292e-05, "loss": 0.9374, "step": 50900 }, { "epoch": 4.68, "learning_rate": 2.6597407373356625e-05, "loss": 0.869, "step": 50910 }, { "epoch": 4.68, "learning_rate": 2.659281051760596e-05, "loss": 0.8596, "step": 50920 }, { "epoch": 4.68, "learning_rate": 2.6588213661855295e-05, "loss": 0.8435, "step": 50930 }, { "epoch": 4.68, "learning_rate": 2.6583616806104628e-05, "loss": 0.8886, "step": 50940 }, { "epoch": 4.68, "learning_rate": 2.657901995035396e-05, "loss": 0.9091, "step": 50950 }, { "epoch": 4.69, "learning_rate": 2.657442309460329e-05, "loss": 0.8025, "step": 50960 }, { "epoch": 4.69, "learning_rate": 2.6569826238852624e-05, "loss": 0.9175, "step": 50970 }, { "epoch": 4.69, "learning_rate": 2.6565229383101957e-05, "loss": 0.8856, "step": 50980 }, { "epoch": 4.69, "learning_rate": 2.6560632527351294e-05, "loss": 0.9063, "step": 50990 }, { "epoch": 4.69, "learning_rate": 2.6556035671600627e-05, "loss": 0.8348, "step": 51000 }, { "epoch": 4.69, "learning_rate": 2.655143881584996e-05, "loss": 0.92, "step": 51010 }, { "epoch": 4.69, "learning_rate": 2.6546841960099296e-05, "loss": 0.9289, "step": 51020 }, { "epoch": 4.69, "learning_rate": 2.654224510434863e-05, "loss": 0.8535, "step": 51030 }, { "epoch": 4.69, "learning_rate": 2.6537648248597962e-05, "loss": 0.9998, "step": 51040 }, { "epoch": 4.69, "learning_rate": 2.6533051392847292e-05, "loss": 0.9593, "step": 51050 }, { "epoch": 4.69, "learning_rate": 2.6528454537096625e-05, "loss": 0.8861, "step": 51060 }, { "epoch": 4.7, "learning_rate": 2.6523857681345958e-05, "loss": 0.8441, "step": 51070 }, { "epoch": 4.7, "learning_rate": 2.6519260825595295e-05, "loss": 0.8479, "step": 51080 }, { "epoch": 4.7, "learning_rate": 2.6514663969844628e-05, "loss": 0.824, "step": 51090 }, { "epoch": 4.7, "learning_rate": 2.651006711409396e-05, "loss": 0.8438, "step": 51100 }, { "epoch": 4.7, "learning_rate": 2.6505470258343294e-05, "loss": 0.7748, "step": 51110 }, { "epoch": 4.7, "learning_rate": 2.650087340259263e-05, "loss": 0.7988, "step": 51120 }, { "epoch": 4.7, "learning_rate": 2.6496276546841964e-05, "loss": 0.9921, "step": 51130 }, { "epoch": 4.7, "learning_rate": 2.6491679691091293e-05, "loss": 0.9049, "step": 51140 }, { "epoch": 4.7, "learning_rate": 2.6487082835340626e-05, "loss": 0.9944, "step": 51150 }, { "epoch": 4.7, "learning_rate": 2.648248597958996e-05, "loss": 0.8932, "step": 51160 }, { "epoch": 4.7, "learning_rate": 2.6477889123839296e-05, "loss": 0.7897, "step": 51170 }, { "epoch": 4.71, "learning_rate": 2.647329226808863e-05, "loss": 0.9216, "step": 51180 }, { "epoch": 4.71, "learning_rate": 2.6468695412337962e-05, "loss": 0.9274, "step": 51190 }, { "epoch": 4.71, "learning_rate": 2.6464098556587295e-05, "loss": 0.7645, "step": 51200 }, { "epoch": 4.71, "learning_rate": 2.6459501700836632e-05, "loss": 1.0111, "step": 51210 }, { "epoch": 4.71, "learning_rate": 2.6454904845085965e-05, "loss": 0.8902, "step": 51220 }, { "epoch": 4.71, "learning_rate": 2.6450307989335295e-05, "loss": 0.8836, "step": 51230 }, { "epoch": 4.71, "learning_rate": 2.6445711133584628e-05, "loss": 0.8934, "step": 51240 }, { "epoch": 4.71, "learning_rate": 2.644111427783396e-05, "loss": 0.8772, "step": 51250 }, { "epoch": 4.71, "learning_rate": 2.6436517422083297e-05, "loss": 0.8492, "step": 51260 }, { "epoch": 4.71, "learning_rate": 2.643192056633263e-05, "loss": 0.8671, "step": 51270 }, { "epoch": 4.71, "learning_rate": 2.6427323710581963e-05, "loss": 0.9104, "step": 51280 }, { "epoch": 4.72, "learning_rate": 2.6422726854831296e-05, "loss": 1.0197, "step": 51290 }, { "epoch": 4.72, "learning_rate": 2.6418129999080633e-05, "loss": 0.7811, "step": 51300 }, { "epoch": 4.72, "learning_rate": 2.6413533143329966e-05, "loss": 0.8248, "step": 51310 }, { "epoch": 4.72, "learning_rate": 2.6408936287579296e-05, "loss": 0.8957, "step": 51320 }, { "epoch": 4.72, "learning_rate": 2.640433943182863e-05, "loss": 0.9322, "step": 51330 }, { "epoch": 4.72, "learning_rate": 2.6399742576077962e-05, "loss": 0.9086, "step": 51340 }, { "epoch": 4.72, "learning_rate": 2.63951457203273e-05, "loss": 1.0306, "step": 51350 }, { "epoch": 4.72, "learning_rate": 2.639054886457663e-05, "loss": 0.9506, "step": 51360 }, { "epoch": 4.72, "learning_rate": 2.6385952008825965e-05, "loss": 0.9135, "step": 51370 }, { "epoch": 4.72, "learning_rate": 2.6381355153075298e-05, "loss": 1.023, "step": 51380 }, { "epoch": 4.72, "learning_rate": 2.6376758297324634e-05, "loss": 1.0377, "step": 51390 }, { "epoch": 4.73, "learning_rate": 2.6372161441573967e-05, "loss": 0.9436, "step": 51400 }, { "epoch": 4.73, "learning_rate": 2.6367564585823297e-05, "loss": 0.919, "step": 51410 }, { "epoch": 4.73, "learning_rate": 2.636296773007263e-05, "loss": 0.9182, "step": 51420 }, { "epoch": 4.73, "learning_rate": 2.6358370874321963e-05, "loss": 0.8771, "step": 51430 }, { "epoch": 4.73, "learning_rate": 2.6353774018571296e-05, "loss": 0.9078, "step": 51440 }, { "epoch": 4.73, "learning_rate": 2.6349177162820633e-05, "loss": 0.8106, "step": 51450 }, { "epoch": 4.73, "learning_rate": 2.6344580307069966e-05, "loss": 0.8999, "step": 51460 }, { "epoch": 4.73, "learning_rate": 2.63399834513193e-05, "loss": 0.8866, "step": 51470 }, { "epoch": 4.73, "learning_rate": 2.6335386595568635e-05, "loss": 0.781, "step": 51480 }, { "epoch": 4.73, "learning_rate": 2.633078973981797e-05, "loss": 0.7774, "step": 51490 }, { "epoch": 4.73, "learning_rate": 2.6326192884067298e-05, "loss": 0.889, "step": 51500 }, { "epoch": 4.74, "learning_rate": 2.632159602831663e-05, "loss": 0.9658, "step": 51510 }, { "epoch": 4.74, "learning_rate": 2.6316999172565964e-05, "loss": 0.8539, "step": 51520 }, { "epoch": 4.74, "learning_rate": 2.6312402316815297e-05, "loss": 0.863, "step": 51530 }, { "epoch": 4.74, "learning_rate": 2.6307805461064634e-05, "loss": 0.7877, "step": 51540 }, { "epoch": 4.74, "learning_rate": 2.6303208605313967e-05, "loss": 0.982, "step": 51550 }, { "epoch": 4.74, "learning_rate": 2.62986117495633e-05, "loss": 0.9533, "step": 51560 }, { "epoch": 4.74, "learning_rate": 2.6294014893812637e-05, "loss": 0.9807, "step": 51570 }, { "epoch": 4.74, "learning_rate": 2.628941803806197e-05, "loss": 0.8277, "step": 51580 }, { "epoch": 4.74, "learning_rate": 2.62848211823113e-05, "loss": 0.9131, "step": 51590 }, { "epoch": 4.74, "learning_rate": 2.6280224326560632e-05, "loss": 0.9062, "step": 51600 }, { "epoch": 4.74, "learning_rate": 2.6275627470809965e-05, "loss": 0.9347, "step": 51610 }, { "epoch": 4.75, "learning_rate": 2.62710306150593e-05, "loss": 0.9438, "step": 51620 }, { "epoch": 4.75, "learning_rate": 2.6266433759308635e-05, "loss": 0.7752, "step": 51630 }, { "epoch": 4.75, "learning_rate": 2.6261836903557968e-05, "loss": 0.813, "step": 51640 }, { "epoch": 4.75, "learning_rate": 2.62572400478073e-05, "loss": 0.935, "step": 51650 }, { "epoch": 4.75, "learning_rate": 2.6252643192056638e-05, "loss": 0.943, "step": 51660 }, { "epoch": 4.75, "learning_rate": 2.624804633630597e-05, "loss": 0.9554, "step": 51670 }, { "epoch": 4.75, "learning_rate": 2.62434494805553e-05, "loss": 0.8154, "step": 51680 }, { "epoch": 4.75, "learning_rate": 2.6238852624804634e-05, "loss": 0.7389, "step": 51690 }, { "epoch": 4.75, "learning_rate": 2.6234255769053967e-05, "loss": 0.8479, "step": 51700 }, { "epoch": 4.75, "learning_rate": 2.62296589133033e-05, "loss": 0.8776, "step": 51710 }, { "epoch": 4.75, "learning_rate": 2.6225062057552636e-05, "loss": 0.9495, "step": 51720 }, { "epoch": 4.76, "learning_rate": 2.622046520180197e-05, "loss": 0.8969, "step": 51730 }, { "epoch": 4.76, "learning_rate": 2.6215868346051302e-05, "loss": 0.9842, "step": 51740 }, { "epoch": 4.76, "learning_rate": 2.621127149030064e-05, "loss": 0.8632, "step": 51750 }, { "epoch": 4.76, "learning_rate": 2.6206674634549972e-05, "loss": 0.8384, "step": 51760 }, { "epoch": 4.76, "learning_rate": 2.6202077778799298e-05, "loss": 0.8757, "step": 51770 }, { "epoch": 4.76, "learning_rate": 2.6197480923048635e-05, "loss": 0.7867, "step": 51780 }, { "epoch": 4.76, "learning_rate": 2.6192884067297968e-05, "loss": 1.0321, "step": 51790 }, { "epoch": 4.76, "learning_rate": 2.61882872115473e-05, "loss": 0.8448, "step": 51800 }, { "epoch": 4.76, "learning_rate": 2.6183690355796637e-05, "loss": 0.9332, "step": 51810 }, { "epoch": 4.76, "learning_rate": 2.617909350004597e-05, "loss": 0.9388, "step": 51820 }, { "epoch": 4.77, "learning_rate": 2.6174496644295304e-05, "loss": 0.9202, "step": 51830 }, { "epoch": 4.77, "learning_rate": 2.616989978854464e-05, "loss": 0.8806, "step": 51840 }, { "epoch": 4.77, "learning_rate": 2.6165302932793973e-05, "loss": 0.8246, "step": 51850 }, { "epoch": 4.77, "learning_rate": 2.61607060770433e-05, "loss": 0.943, "step": 51860 }, { "epoch": 4.77, "learning_rate": 2.6156109221292636e-05, "loss": 0.9131, "step": 51870 }, { "epoch": 4.77, "learning_rate": 2.615151236554197e-05, "loss": 0.8579, "step": 51880 }, { "epoch": 4.77, "learning_rate": 2.6146915509791302e-05, "loss": 0.9344, "step": 51890 }, { "epoch": 4.77, "learning_rate": 2.614231865404064e-05, "loss": 0.9077, "step": 51900 }, { "epoch": 4.77, "learning_rate": 2.6137721798289972e-05, "loss": 0.7841, "step": 51910 }, { "epoch": 4.77, "learning_rate": 2.6133124942539305e-05, "loss": 0.7951, "step": 51920 }, { "epoch": 4.77, "learning_rate": 2.6128528086788638e-05, "loss": 0.8049, "step": 51930 }, { "epoch": 4.78, "learning_rate": 2.6123931231037974e-05, "loss": 0.873, "step": 51940 }, { "epoch": 4.78, "learning_rate": 2.61193343752873e-05, "loss": 0.9173, "step": 51950 }, { "epoch": 4.78, "learning_rate": 2.6114737519536637e-05, "loss": 0.8607, "step": 51960 }, { "epoch": 4.78, "learning_rate": 2.611014066378597e-05, "loss": 0.8662, "step": 51970 }, { "epoch": 4.78, "learning_rate": 2.6105543808035303e-05, "loss": 0.9766, "step": 51980 }, { "epoch": 4.78, "learning_rate": 2.610094695228464e-05, "loss": 0.8034, "step": 51990 }, { "epoch": 4.78, "learning_rate": 2.6096350096533973e-05, "loss": 0.8759, "step": 52000 }, { "epoch": 4.78, "learning_rate": 2.6091753240783306e-05, "loss": 0.8653, "step": 52010 }, { "epoch": 4.78, "learning_rate": 2.608715638503264e-05, "loss": 0.8343, "step": 52020 }, { "epoch": 4.78, "learning_rate": 2.6082559529281976e-05, "loss": 0.8049, "step": 52030 }, { "epoch": 4.78, "learning_rate": 2.6077962673531302e-05, "loss": 0.9001, "step": 52040 }, { "epoch": 4.79, "learning_rate": 2.607336581778064e-05, "loss": 0.866, "step": 52050 }, { "epoch": 4.79, "learning_rate": 2.606876896202997e-05, "loss": 1.0011, "step": 52060 }, { "epoch": 4.79, "learning_rate": 2.6064172106279305e-05, "loss": 1.007, "step": 52070 }, { "epoch": 4.79, "learning_rate": 2.605957525052864e-05, "loss": 0.8373, "step": 52080 }, { "epoch": 4.79, "learning_rate": 2.6054978394777974e-05, "loss": 0.7447, "step": 52090 }, { "epoch": 4.79, "learning_rate": 2.6050381539027307e-05, "loss": 0.8904, "step": 52100 }, { "epoch": 4.79, "learning_rate": 2.604578468327664e-05, "loss": 0.8515, "step": 52110 }, { "epoch": 4.79, "learning_rate": 2.6041187827525977e-05, "loss": 0.8491, "step": 52120 }, { "epoch": 4.79, "learning_rate": 2.6036590971775303e-05, "loss": 0.7905, "step": 52130 }, { "epoch": 4.79, "learning_rate": 2.603199411602464e-05, "loss": 0.886, "step": 52140 }, { "epoch": 4.79, "learning_rate": 2.6027397260273973e-05, "loss": 0.9144, "step": 52150 }, { "epoch": 4.8, "learning_rate": 2.6022800404523306e-05, "loss": 0.7743, "step": 52160 }, { "epoch": 4.8, "learning_rate": 2.6018203548772642e-05, "loss": 0.9028, "step": 52170 }, { "epoch": 4.8, "learning_rate": 2.6013606693021975e-05, "loss": 0.9628, "step": 52180 }, { "epoch": 4.8, "learning_rate": 2.600900983727131e-05, "loss": 0.8116, "step": 52190 }, { "epoch": 4.8, "learning_rate": 2.600441298152064e-05, "loss": 0.8684, "step": 52200 }, { "epoch": 4.8, "learning_rate": 2.5999816125769978e-05, "loss": 0.8152, "step": 52210 }, { "epoch": 4.8, "learning_rate": 2.5995219270019304e-05, "loss": 0.8058, "step": 52220 }, { "epoch": 4.8, "learning_rate": 2.599062241426864e-05, "loss": 0.897, "step": 52230 }, { "epoch": 4.8, "learning_rate": 2.5986025558517974e-05, "loss": 0.929, "step": 52240 }, { "epoch": 4.8, "learning_rate": 2.5981428702767307e-05, "loss": 1.0103, "step": 52250 }, { "epoch": 4.8, "learning_rate": 2.597683184701664e-05, "loss": 0.9344, "step": 52260 }, { "epoch": 4.81, "learning_rate": 2.5972234991265976e-05, "loss": 0.8249, "step": 52270 }, { "epoch": 4.81, "learning_rate": 2.596763813551531e-05, "loss": 0.8546, "step": 52280 }, { "epoch": 4.81, "learning_rate": 2.5963041279764643e-05, "loss": 0.7462, "step": 52290 }, { "epoch": 4.81, "learning_rate": 2.595844442401398e-05, "loss": 0.9635, "step": 52300 }, { "epoch": 4.81, "learning_rate": 2.5953847568263305e-05, "loss": 0.8262, "step": 52310 }, { "epoch": 4.81, "learning_rate": 2.5949250712512642e-05, "loss": 0.8048, "step": 52320 }, { "epoch": 4.81, "learning_rate": 2.5944653856761975e-05, "loss": 1.0075, "step": 52330 }, { "epoch": 4.81, "learning_rate": 2.5940057001011308e-05, "loss": 0.9017, "step": 52340 }, { "epoch": 4.81, "learning_rate": 2.593546014526064e-05, "loss": 1.0119, "step": 52350 }, { "epoch": 4.81, "learning_rate": 2.5930863289509978e-05, "loss": 0.7725, "step": 52360 }, { "epoch": 4.81, "learning_rate": 2.592626643375931e-05, "loss": 0.8419, "step": 52370 }, { "epoch": 4.82, "learning_rate": 2.5921669578008644e-05, "loss": 0.7826, "step": 52380 }, { "epoch": 4.82, "learning_rate": 2.591707272225798e-05, "loss": 0.8574, "step": 52390 }, { "epoch": 4.82, "learning_rate": 2.5912475866507307e-05, "loss": 0.8753, "step": 52400 }, { "epoch": 4.82, "learning_rate": 2.5907879010756643e-05, "loss": 0.9236, "step": 52410 }, { "epoch": 4.82, "learning_rate": 2.5903282155005976e-05, "loss": 0.9128, "step": 52420 }, { "epoch": 4.82, "learning_rate": 2.589868529925531e-05, "loss": 0.8835, "step": 52430 }, { "epoch": 4.82, "learning_rate": 2.5894088443504642e-05, "loss": 0.8457, "step": 52440 }, { "epoch": 4.82, "learning_rate": 2.588949158775398e-05, "loss": 0.9376, "step": 52450 }, { "epoch": 4.82, "learning_rate": 2.5884894732003312e-05, "loss": 0.8774, "step": 52460 }, { "epoch": 4.82, "learning_rate": 2.5880297876252645e-05, "loss": 0.8814, "step": 52470 }, { "epoch": 4.82, "learning_rate": 2.587570102050198e-05, "loss": 0.9285, "step": 52480 }, { "epoch": 4.83, "learning_rate": 2.5871104164751308e-05, "loss": 0.9096, "step": 52490 }, { "epoch": 4.83, "learning_rate": 2.5866507309000644e-05, "loss": 0.9448, "step": 52500 }, { "epoch": 4.83, "learning_rate": 2.5861910453249977e-05, "loss": 0.8288, "step": 52510 }, { "epoch": 4.83, "learning_rate": 2.585731359749931e-05, "loss": 0.8107, "step": 52520 }, { "epoch": 4.83, "learning_rate": 2.5852716741748644e-05, "loss": 0.8019, "step": 52530 }, { "epoch": 4.83, "learning_rate": 2.584811988599798e-05, "loss": 0.8552, "step": 52540 }, { "epoch": 4.83, "learning_rate": 2.5843523030247313e-05, "loss": 0.8744, "step": 52550 }, { "epoch": 4.83, "learning_rate": 2.5838926174496646e-05, "loss": 1.0223, "step": 52560 }, { "epoch": 4.83, "learning_rate": 2.5834329318745983e-05, "loss": 0.8923, "step": 52570 }, { "epoch": 4.83, "learning_rate": 2.582973246299531e-05, "loss": 0.8611, "step": 52580 }, { "epoch": 4.83, "learning_rate": 2.5825135607244642e-05, "loss": 0.8632, "step": 52590 }, { "epoch": 4.84, "learning_rate": 2.582053875149398e-05, "loss": 1.0215, "step": 52600 }, { "epoch": 4.84, "learning_rate": 2.5815941895743312e-05, "loss": 0.7743, "step": 52610 }, { "epoch": 4.84, "learning_rate": 2.5811345039992645e-05, "loss": 0.8406, "step": 52620 }, { "epoch": 4.84, "learning_rate": 2.580674818424198e-05, "loss": 0.8455, "step": 52630 }, { "epoch": 4.84, "learning_rate": 2.5802151328491314e-05, "loss": 0.8138, "step": 52640 }, { "epoch": 4.84, "learning_rate": 2.5797554472740647e-05, "loss": 0.8717, "step": 52650 }, { "epoch": 4.84, "learning_rate": 2.5792957616989984e-05, "loss": 0.8454, "step": 52660 }, { "epoch": 4.84, "learning_rate": 2.578836076123931e-05, "loss": 0.8429, "step": 52670 }, { "epoch": 4.84, "learning_rate": 2.5783763905488643e-05, "loss": 0.8464, "step": 52680 }, { "epoch": 4.84, "learning_rate": 2.577916704973798e-05, "loss": 0.8701, "step": 52690 }, { "epoch": 4.85, "learning_rate": 2.5774570193987313e-05, "loss": 0.8752, "step": 52700 }, { "epoch": 4.85, "learning_rate": 2.5769973338236646e-05, "loss": 0.8493, "step": 52710 }, { "epoch": 4.85, "learning_rate": 2.5765376482485982e-05, "loss": 0.937, "step": 52720 }, { "epoch": 4.85, "learning_rate": 2.5760779626735316e-05, "loss": 0.8947, "step": 52730 }, { "epoch": 4.85, "learning_rate": 2.575618277098465e-05, "loss": 0.8661, "step": 52740 }, { "epoch": 4.85, "learning_rate": 2.5751585915233982e-05, "loss": 0.8669, "step": 52750 }, { "epoch": 4.85, "learning_rate": 2.5746989059483318e-05, "loss": 0.8884, "step": 52760 }, { "epoch": 4.85, "learning_rate": 2.5742392203732645e-05, "loss": 0.8584, "step": 52770 }, { "epoch": 4.85, "learning_rate": 2.573779534798198e-05, "loss": 0.7858, "step": 52780 }, { "epoch": 4.85, "learning_rate": 2.5733198492231314e-05, "loss": 0.9176, "step": 52790 }, { "epoch": 4.85, "learning_rate": 2.5728601636480647e-05, "loss": 0.8127, "step": 52800 }, { "epoch": 4.86, "learning_rate": 2.5724004780729984e-05, "loss": 0.8937, "step": 52810 }, { "epoch": 4.86, "learning_rate": 2.5719407924979317e-05, "loss": 0.7939, "step": 52820 }, { "epoch": 4.86, "learning_rate": 2.571481106922865e-05, "loss": 0.8024, "step": 52830 }, { "epoch": 4.86, "learning_rate": 2.5710214213477983e-05, "loss": 0.8961, "step": 52840 }, { "epoch": 4.86, "learning_rate": 2.570561735772732e-05, "loss": 0.9322, "step": 52850 }, { "epoch": 4.86, "learning_rate": 2.5701020501976646e-05, "loss": 0.8168, "step": 52860 }, { "epoch": 4.86, "learning_rate": 2.5696423646225982e-05, "loss": 0.841, "step": 52870 }, { "epoch": 4.86, "learning_rate": 2.5691826790475315e-05, "loss": 0.8792, "step": 52880 }, { "epoch": 4.86, "learning_rate": 2.568722993472465e-05, "loss": 0.8607, "step": 52890 }, { "epoch": 4.86, "learning_rate": 2.5682633078973985e-05, "loss": 0.9123, "step": 52900 }, { "epoch": 4.86, "learning_rate": 2.5678036223223318e-05, "loss": 0.9976, "step": 52910 }, { "epoch": 4.87, "learning_rate": 2.567343936747265e-05, "loss": 0.8538, "step": 52920 }, { "epoch": 4.87, "learning_rate": 2.5668842511721984e-05, "loss": 0.8046, "step": 52930 }, { "epoch": 4.87, "learning_rate": 2.566424565597132e-05, "loss": 0.9453, "step": 52940 }, { "epoch": 4.87, "learning_rate": 2.5659648800220647e-05, "loss": 0.9267, "step": 52950 }, { "epoch": 4.87, "learning_rate": 2.5655051944469983e-05, "loss": 0.9479, "step": 52960 }, { "epoch": 4.87, "learning_rate": 2.5650455088719316e-05, "loss": 0.9615, "step": 52970 }, { "epoch": 4.87, "learning_rate": 2.564585823296865e-05, "loss": 0.8079, "step": 52980 }, { "epoch": 4.87, "learning_rate": 2.5641261377217986e-05, "loss": 0.8712, "step": 52990 }, { "epoch": 4.87, "learning_rate": 2.563666452146732e-05, "loss": 0.8296, "step": 53000 }, { "epoch": 4.87, "learning_rate": 2.5632067665716652e-05, "loss": 0.8692, "step": 53010 }, { "epoch": 4.87, "learning_rate": 2.5627470809965985e-05, "loss": 0.8878, "step": 53020 }, { "epoch": 4.88, "learning_rate": 2.5622873954215322e-05, "loss": 0.7844, "step": 53030 }, { "epoch": 4.88, "learning_rate": 2.5618277098464648e-05, "loss": 0.8335, "step": 53040 }, { "epoch": 4.88, "learning_rate": 2.5613680242713985e-05, "loss": 0.9055, "step": 53050 }, { "epoch": 4.88, "learning_rate": 2.5609083386963318e-05, "loss": 0.885, "step": 53060 }, { "epoch": 4.88, "learning_rate": 2.560448653121265e-05, "loss": 0.8754, "step": 53070 }, { "epoch": 4.88, "learning_rate": 2.5599889675461984e-05, "loss": 0.8067, "step": 53080 }, { "epoch": 4.88, "learning_rate": 2.559529281971132e-05, "loss": 0.8867, "step": 53090 }, { "epoch": 4.88, "learning_rate": 2.5590695963960653e-05, "loss": 0.841, "step": 53100 }, { "epoch": 4.88, "learning_rate": 2.5586099108209987e-05, "loss": 0.9648, "step": 53110 }, { "epoch": 4.88, "learning_rate": 2.5581502252459323e-05, "loss": 0.938, "step": 53120 }, { "epoch": 4.88, "learning_rate": 2.557690539670865e-05, "loss": 0.8538, "step": 53130 }, { "epoch": 4.89, "learning_rate": 2.5572308540957986e-05, "loss": 0.9718, "step": 53140 }, { "epoch": 4.89, "learning_rate": 2.556771168520732e-05, "loss": 0.9588, "step": 53150 }, { "epoch": 4.89, "learning_rate": 2.5563114829456652e-05, "loss": 0.7171, "step": 53160 }, { "epoch": 4.89, "learning_rate": 2.5558517973705985e-05, "loss": 0.8828, "step": 53170 }, { "epoch": 4.89, "learning_rate": 2.555392111795532e-05, "loss": 0.8831, "step": 53180 }, { "epoch": 4.89, "learning_rate": 2.5549324262204655e-05, "loss": 0.8076, "step": 53190 }, { "epoch": 4.89, "learning_rate": 2.5544727406453988e-05, "loss": 0.666, "step": 53200 }, { "epoch": 4.89, "learning_rate": 2.5540130550703324e-05, "loss": 0.9843, "step": 53210 }, { "epoch": 4.89, "learning_rate": 2.553553369495265e-05, "loss": 0.8388, "step": 53220 }, { "epoch": 4.89, "learning_rate": 2.5530936839201987e-05, "loss": 0.7749, "step": 53230 }, { "epoch": 4.89, "learning_rate": 2.552633998345132e-05, "loss": 0.7887, "step": 53240 }, { "epoch": 4.9, "learning_rate": 2.5521743127700653e-05, "loss": 0.7947, "step": 53250 }, { "epoch": 4.9, "learning_rate": 2.5517146271949986e-05, "loss": 0.6509, "step": 53260 }, { "epoch": 4.9, "learning_rate": 2.5512549416199323e-05, "loss": 0.9097, "step": 53270 }, { "epoch": 4.9, "learning_rate": 2.5507952560448656e-05, "loss": 0.9015, "step": 53280 }, { "epoch": 4.9, "learning_rate": 2.550335570469799e-05, "loss": 0.8597, "step": 53290 }, { "epoch": 4.9, "learning_rate": 2.5498758848947325e-05, "loss": 0.9584, "step": 53300 }, { "epoch": 4.9, "learning_rate": 2.5494161993196652e-05, "loss": 0.8125, "step": 53310 }, { "epoch": 4.9, "learning_rate": 2.5489565137445988e-05, "loss": 0.9039, "step": 53320 }, { "epoch": 4.9, "learning_rate": 2.548496828169532e-05, "loss": 0.8682, "step": 53330 }, { "epoch": 4.9, "learning_rate": 2.5480371425944654e-05, "loss": 0.9619, "step": 53340 }, { "epoch": 4.9, "learning_rate": 2.5475774570193987e-05, "loss": 0.8805, "step": 53350 }, { "epoch": 4.91, "learning_rate": 2.5471177714443324e-05, "loss": 0.8385, "step": 53360 }, { "epoch": 4.91, "learning_rate": 2.5466580858692657e-05, "loss": 0.8486, "step": 53370 }, { "epoch": 4.91, "learning_rate": 2.546198400294199e-05, "loss": 0.883, "step": 53380 }, { "epoch": 4.91, "learning_rate": 2.5457387147191327e-05, "loss": 0.9559, "step": 53390 }, { "epoch": 4.91, "learning_rate": 2.5452790291440653e-05, "loss": 0.9408, "step": 53400 }, { "epoch": 4.91, "learning_rate": 2.5448193435689986e-05, "loss": 0.974, "step": 53410 }, { "epoch": 4.91, "learning_rate": 2.5443596579939322e-05, "loss": 0.8191, "step": 53420 }, { "epoch": 4.91, "learning_rate": 2.5438999724188656e-05, "loss": 0.918, "step": 53430 }, { "epoch": 4.91, "learning_rate": 2.543440286843799e-05, "loss": 0.8794, "step": 53440 }, { "epoch": 4.91, "learning_rate": 2.5429806012687325e-05, "loss": 0.8769, "step": 53450 }, { "epoch": 4.91, "learning_rate": 2.5425209156936658e-05, "loss": 1.0246, "step": 53460 }, { "epoch": 4.92, "learning_rate": 2.542061230118599e-05, "loss": 0.7889, "step": 53470 }, { "epoch": 4.92, "learning_rate": 2.5416015445435328e-05, "loss": 0.845, "step": 53480 }, { "epoch": 4.92, "learning_rate": 2.5411418589684654e-05, "loss": 0.7512, "step": 53490 }, { "epoch": 4.92, "learning_rate": 2.5406821733933987e-05, "loss": 1.1174, "step": 53500 }, { "epoch": 4.92, "learning_rate": 2.5402224878183324e-05, "loss": 0.7857, "step": 53510 }, { "epoch": 4.92, "learning_rate": 2.5397628022432657e-05, "loss": 0.832, "step": 53520 }, { "epoch": 4.92, "learning_rate": 2.539303116668199e-05, "loss": 0.8143, "step": 53530 }, { "epoch": 4.92, "learning_rate": 2.5388434310931326e-05, "loss": 0.8601, "step": 53540 }, { "epoch": 4.92, "learning_rate": 2.538383745518066e-05, "loss": 0.9089, "step": 53550 }, { "epoch": 4.92, "learning_rate": 2.5379240599429992e-05, "loss": 0.9162, "step": 53560 }, { "epoch": 4.93, "learning_rate": 2.5374643743679326e-05, "loss": 0.8816, "step": 53570 }, { "epoch": 4.93, "learning_rate": 2.5370046887928655e-05, "loss": 0.7697, "step": 53580 }, { "epoch": 4.93, "learning_rate": 2.536545003217799e-05, "loss": 0.9161, "step": 53590 }, { "epoch": 4.93, "learning_rate": 2.5360853176427325e-05, "loss": 0.7837, "step": 53600 }, { "epoch": 4.93, "learning_rate": 2.5356256320676658e-05, "loss": 0.7899, "step": 53610 }, { "epoch": 4.93, "learning_rate": 2.535165946492599e-05, "loss": 0.9379, "step": 53620 }, { "epoch": 4.93, "learning_rate": 2.5347062609175328e-05, "loss": 0.934, "step": 53630 }, { "epoch": 4.93, "learning_rate": 2.534246575342466e-05, "loss": 0.8444, "step": 53640 }, { "epoch": 4.93, "learning_rate": 2.5337868897673994e-05, "loss": 0.8825, "step": 53650 }, { "epoch": 4.93, "learning_rate": 2.5333272041923327e-05, "loss": 0.8952, "step": 53660 }, { "epoch": 4.93, "learning_rate": 2.5328675186172656e-05, "loss": 0.8694, "step": 53670 }, { "epoch": 4.94, "learning_rate": 2.532407833042199e-05, "loss": 0.872, "step": 53680 }, { "epoch": 4.94, "learning_rate": 2.5319481474671326e-05, "loss": 0.7738, "step": 53690 }, { "epoch": 4.94, "learning_rate": 2.531488461892066e-05, "loss": 0.8711, "step": 53700 }, { "epoch": 4.94, "learning_rate": 2.5310287763169992e-05, "loss": 0.8984, "step": 53710 }, { "epoch": 4.94, "learning_rate": 2.530569090741933e-05, "loss": 0.8901, "step": 53720 }, { "epoch": 4.94, "learning_rate": 2.5301094051668662e-05, "loss": 0.8974, "step": 53730 }, { "epoch": 4.94, "learning_rate": 2.5296497195917995e-05, "loss": 0.7442, "step": 53740 }, { "epoch": 4.94, "learning_rate": 2.5291900340167328e-05, "loss": 0.915, "step": 53750 }, { "epoch": 4.94, "learning_rate": 2.5287303484416658e-05, "loss": 0.7707, "step": 53760 }, { "epoch": 4.94, "learning_rate": 2.528270662866599e-05, "loss": 0.8067, "step": 53770 }, { "epoch": 4.94, "learning_rate": 2.5278109772915327e-05, "loss": 0.8651, "step": 53780 }, { "epoch": 4.95, "learning_rate": 2.527351291716466e-05, "loss": 0.7279, "step": 53790 }, { "epoch": 4.95, "learning_rate": 2.5268916061413993e-05, "loss": 0.9325, "step": 53800 }, { "epoch": 4.95, "learning_rate": 2.526431920566333e-05, "loss": 0.8847, "step": 53810 }, { "epoch": 4.95, "learning_rate": 2.5259722349912663e-05, "loss": 0.9364, "step": 53820 }, { "epoch": 4.95, "learning_rate": 2.5255125494161996e-05, "loss": 0.8919, "step": 53830 }, { "epoch": 4.95, "learning_rate": 2.525052863841133e-05, "loss": 0.976, "step": 53840 }, { "epoch": 4.95, "learning_rate": 2.524593178266066e-05, "loss": 0.8656, "step": 53850 }, { "epoch": 4.95, "learning_rate": 2.5241334926909992e-05, "loss": 0.8761, "step": 53860 }, { "epoch": 4.95, "learning_rate": 2.523673807115933e-05, "loss": 0.8918, "step": 53870 }, { "epoch": 4.95, "learning_rate": 2.523214121540866e-05, "loss": 0.8536, "step": 53880 }, { "epoch": 4.95, "learning_rate": 2.5227544359657995e-05, "loss": 0.8764, "step": 53890 }, { "epoch": 4.96, "learning_rate": 2.5222947503907328e-05, "loss": 0.9813, "step": 53900 }, { "epoch": 4.96, "learning_rate": 2.5218350648156664e-05, "loss": 0.7394, "step": 53910 }, { "epoch": 4.96, "learning_rate": 2.5213753792405997e-05, "loss": 0.8418, "step": 53920 }, { "epoch": 4.96, "learning_rate": 2.520915693665533e-05, "loss": 0.8528, "step": 53930 }, { "epoch": 4.96, "learning_rate": 2.520456008090466e-05, "loss": 0.8297, "step": 53940 }, { "epoch": 4.96, "learning_rate": 2.5199963225153993e-05, "loss": 0.951, "step": 53950 }, { "epoch": 4.96, "learning_rate": 2.519536636940333e-05, "loss": 0.8858, "step": 53960 }, { "epoch": 4.96, "learning_rate": 2.5190769513652663e-05, "loss": 0.8764, "step": 53970 }, { "epoch": 4.96, "learning_rate": 2.5186172657901996e-05, "loss": 0.8233, "step": 53980 }, { "epoch": 4.96, "learning_rate": 2.518157580215133e-05, "loss": 0.8742, "step": 53990 }, { "epoch": 4.96, "learning_rate": 2.5176978946400665e-05, "loss": 0.9035, "step": 54000 }, { "epoch": 4.97, "learning_rate": 2.517238209065e-05, "loss": 0.8846, "step": 54010 }, { "epoch": 4.97, "learning_rate": 2.516778523489933e-05, "loss": 0.9921, "step": 54020 }, { "epoch": 4.97, "learning_rate": 2.516318837914866e-05, "loss": 0.8176, "step": 54030 }, { "epoch": 4.97, "learning_rate": 2.5158591523397994e-05, "loss": 0.9272, "step": 54040 }, { "epoch": 4.97, "learning_rate": 2.515399466764733e-05, "loss": 0.9713, "step": 54050 }, { "epoch": 4.97, "learning_rate": 2.5149397811896664e-05, "loss": 0.8964, "step": 54060 }, { "epoch": 4.97, "learning_rate": 2.5144800956145997e-05, "loss": 0.9541, "step": 54070 }, { "epoch": 4.97, "learning_rate": 2.514020410039533e-05, "loss": 0.8445, "step": 54080 }, { "epoch": 4.97, "learning_rate": 2.5135607244644667e-05, "loss": 0.8405, "step": 54090 }, { "epoch": 4.97, "learning_rate": 2.5131010388894e-05, "loss": 0.9137, "step": 54100 }, { "epoch": 4.97, "learning_rate": 2.5126413533143333e-05, "loss": 0.9354, "step": 54110 }, { "epoch": 4.98, "learning_rate": 2.5121816677392662e-05, "loss": 0.8205, "step": 54120 }, { "epoch": 4.98, "learning_rate": 2.5117219821641996e-05, "loss": 0.815, "step": 54130 }, { "epoch": 4.98, "learning_rate": 2.5112622965891332e-05, "loss": 0.831, "step": 54140 }, { "epoch": 4.98, "learning_rate": 2.5108026110140665e-05, "loss": 0.9018, "step": 54150 }, { "epoch": 4.98, "learning_rate": 2.5103429254389998e-05, "loss": 0.8079, "step": 54160 }, { "epoch": 4.98, "learning_rate": 2.509883239863933e-05, "loss": 0.9851, "step": 54170 }, { "epoch": 4.98, "learning_rate": 2.5094235542888668e-05, "loss": 1.0549, "step": 54180 }, { "epoch": 4.98, "learning_rate": 2.5089638687138e-05, "loss": 1.0831, "step": 54190 }, { "epoch": 4.98, "learning_rate": 2.5085041831387334e-05, "loss": 0.8287, "step": 54200 }, { "epoch": 4.98, "learning_rate": 2.5080444975636664e-05, "loss": 0.8688, "step": 54210 }, { "epoch": 4.98, "learning_rate": 2.5075848119885997e-05, "loss": 0.9395, "step": 54220 }, { "epoch": 4.99, "learning_rate": 2.507125126413533e-05, "loss": 0.9291, "step": 54230 }, { "epoch": 4.99, "learning_rate": 2.5066654408384666e-05, "loss": 0.838, "step": 54240 }, { "epoch": 4.99, "learning_rate": 2.5062057552634e-05, "loss": 0.8023, "step": 54250 }, { "epoch": 4.99, "learning_rate": 2.5057460696883332e-05, "loss": 0.9375, "step": 54260 }, { "epoch": 4.99, "learning_rate": 2.505286384113267e-05, "loss": 0.9308, "step": 54270 }, { "epoch": 4.99, "learning_rate": 2.5048266985382002e-05, "loss": 0.8996, "step": 54280 }, { "epoch": 4.99, "learning_rate": 2.5043670129631335e-05, "loss": 0.9203, "step": 54290 }, { "epoch": 4.99, "learning_rate": 2.5039073273880665e-05, "loss": 0.9082, "step": 54300 }, { "epoch": 4.99, "learning_rate": 2.5034476418129998e-05, "loss": 0.8718, "step": 54310 }, { "epoch": 4.99, "learning_rate": 2.502987956237933e-05, "loss": 0.9969, "step": 54320 }, { "epoch": 4.99, "learning_rate": 2.5025282706628668e-05, "loss": 0.8418, "step": 54330 }, { "epoch": 5.0, "learning_rate": 2.5020685850878e-05, "loss": 0.8164, "step": 54340 }, { "epoch": 5.0, "learning_rate": 2.5016088995127334e-05, "loss": 0.8714, "step": 54350 }, { "epoch": 5.0, "learning_rate": 2.501149213937667e-05, "loss": 0.906, "step": 54360 }, { "epoch": 5.0, "learning_rate": 2.5006895283626003e-05, "loss": 1.0229, "step": 54370 }, { "epoch": 5.0, "learning_rate": 2.5002298427875336e-05, "loss": 0.8465, "step": 54380 }, { "epoch": 5.0, "learning_rate": 2.499770157212467e-05, "loss": 0.7364, "step": 54390 }, { "epoch": 5.0, "learning_rate": 2.4993104716374003e-05, "loss": 0.8708, "step": 54400 }, { "epoch": 5.0, "learning_rate": 2.4988507860623332e-05, "loss": 0.87, "step": 54410 }, { "epoch": 5.0, "learning_rate": 2.498391100487267e-05, "loss": 0.9139, "step": 54420 }, { "epoch": 5.0, "learning_rate": 2.4979314149122002e-05, "loss": 0.8573, "step": 54430 }, { "epoch": 5.01, "learning_rate": 2.4974717293371335e-05, "loss": 0.8405, "step": 54440 }, { "epoch": 5.01, "learning_rate": 2.497012043762067e-05, "loss": 0.8107, "step": 54450 }, { "epoch": 5.01, "learning_rate": 2.496552358187e-05, "loss": 0.8783, "step": 54460 }, { "epoch": 5.01, "learning_rate": 2.4960926726119334e-05, "loss": 0.7549, "step": 54470 }, { "epoch": 5.01, "learning_rate": 2.495632987036867e-05, "loss": 0.9356, "step": 54480 }, { "epoch": 5.01, "learning_rate": 2.4951733014618004e-05, "loss": 0.8738, "step": 54490 }, { "epoch": 5.01, "learning_rate": 2.4947136158867333e-05, "loss": 0.8858, "step": 54500 }, { "epoch": 5.01, "learning_rate": 2.494253930311667e-05, "loss": 0.7526, "step": 54510 }, { "epoch": 5.01, "learning_rate": 2.4937942447366003e-05, "loss": 0.9778, "step": 54520 }, { "epoch": 5.01, "learning_rate": 2.4933345591615336e-05, "loss": 0.8449, "step": 54530 }, { "epoch": 5.01, "learning_rate": 2.4928748735864673e-05, "loss": 0.8822, "step": 54540 }, { "epoch": 5.02, "learning_rate": 2.4924151880114002e-05, "loss": 0.9717, "step": 54550 }, { "epoch": 5.02, "learning_rate": 2.4919555024363335e-05, "loss": 0.8209, "step": 54560 }, { "epoch": 5.02, "learning_rate": 2.4914958168612672e-05, "loss": 0.81, "step": 54570 }, { "epoch": 5.02, "learning_rate": 2.4910361312862005e-05, "loss": 0.8161, "step": 54580 }, { "epoch": 5.02, "learning_rate": 2.4905764457111335e-05, "loss": 0.9514, "step": 54590 }, { "epoch": 5.02, "learning_rate": 2.490116760136067e-05, "loss": 0.8838, "step": 54600 }, { "epoch": 5.02, "learning_rate": 2.4896570745610004e-05, "loss": 0.9603, "step": 54610 }, { "epoch": 5.02, "learning_rate": 2.4891973889859337e-05, "loss": 0.9488, "step": 54620 }, { "epoch": 5.02, "learning_rate": 2.488737703410867e-05, "loss": 0.9692, "step": 54630 }, { "epoch": 5.02, "learning_rate": 2.4882780178358003e-05, "loss": 0.8756, "step": 54640 }, { "epoch": 5.02, "learning_rate": 2.4878183322607337e-05, "loss": 0.8533, "step": 54650 }, { "epoch": 5.03, "learning_rate": 2.4873586466856673e-05, "loss": 0.93, "step": 54660 }, { "epoch": 5.03, "learning_rate": 2.4868989611106006e-05, "loss": 0.868, "step": 54670 }, { "epoch": 5.03, "learning_rate": 2.4864392755355336e-05, "loss": 0.9038, "step": 54680 }, { "epoch": 5.03, "learning_rate": 2.4859795899604672e-05, "loss": 0.8172, "step": 54690 }, { "epoch": 5.03, "learning_rate": 2.4855199043854005e-05, "loss": 0.9856, "step": 54700 }, { "epoch": 5.03, "learning_rate": 2.485060218810334e-05, "loss": 0.8279, "step": 54710 }, { "epoch": 5.03, "learning_rate": 2.484600533235267e-05, "loss": 0.8424, "step": 54720 }, { "epoch": 5.03, "learning_rate": 2.4841408476602005e-05, "loss": 0.8985, "step": 54730 }, { "epoch": 5.03, "learning_rate": 2.4836811620851338e-05, "loss": 0.8354, "step": 54740 }, { "epoch": 5.03, "learning_rate": 2.4832214765100674e-05, "loss": 0.8585, "step": 54750 }, { "epoch": 5.03, "learning_rate": 2.4827617909350007e-05, "loss": 0.8391, "step": 54760 }, { "epoch": 5.04, "learning_rate": 2.4823021053599337e-05, "loss": 0.9709, "step": 54770 }, { "epoch": 5.04, "learning_rate": 2.4818424197848673e-05, "loss": 0.7965, "step": 54780 }, { "epoch": 5.04, "learning_rate": 2.4813827342098007e-05, "loss": 0.7613, "step": 54790 }, { "epoch": 5.04, "learning_rate": 2.480923048634734e-05, "loss": 0.8948, "step": 54800 }, { "epoch": 5.04, "learning_rate": 2.4804633630596673e-05, "loss": 1.0639, "step": 54810 }, { "epoch": 5.04, "learning_rate": 2.4800036774846006e-05, "loss": 0.9097, "step": 54820 }, { "epoch": 5.04, "learning_rate": 2.479543991909534e-05, "loss": 0.9558, "step": 54830 }, { "epoch": 5.04, "learning_rate": 2.4790843063344672e-05, "loss": 0.8721, "step": 54840 }, { "epoch": 5.04, "learning_rate": 2.478624620759401e-05, "loss": 0.7567, "step": 54850 }, { "epoch": 5.04, "learning_rate": 2.4781649351843338e-05, "loss": 0.7926, "step": 54860 }, { "epoch": 5.04, "learning_rate": 2.4777052496092675e-05, "loss": 0.9319, "step": 54870 }, { "epoch": 5.05, "learning_rate": 2.4772455640342008e-05, "loss": 0.8568, "step": 54880 }, { "epoch": 5.05, "learning_rate": 2.476785878459134e-05, "loss": 0.8532, "step": 54890 }, { "epoch": 5.05, "learning_rate": 2.4763261928840674e-05, "loss": 0.8778, "step": 54900 }, { "epoch": 5.05, "learning_rate": 2.4758665073090007e-05, "loss": 0.9659, "step": 54910 }, { "epoch": 5.05, "learning_rate": 2.475406821733934e-05, "loss": 0.8472, "step": 54920 }, { "epoch": 5.05, "learning_rate": 2.4749471361588673e-05, "loss": 0.8947, "step": 54930 }, { "epoch": 5.05, "learning_rate": 2.474487450583801e-05, "loss": 0.7866, "step": 54940 }, { "epoch": 5.05, "learning_rate": 2.474027765008734e-05, "loss": 0.8705, "step": 54950 }, { "epoch": 5.05, "learning_rate": 2.4735680794336672e-05, "loss": 0.8883, "step": 54960 }, { "epoch": 5.05, "learning_rate": 2.473108393858601e-05, "loss": 0.9454, "step": 54970 }, { "epoch": 5.05, "learning_rate": 2.4726487082835342e-05, "loss": 0.9701, "step": 54980 }, { "epoch": 5.06, "learning_rate": 2.4721890227084675e-05, "loss": 0.9546, "step": 54990 }, { "epoch": 5.06, "learning_rate": 2.4717293371334008e-05, "loss": 0.8483, "step": 55000 }, { "epoch": 5.06, "eval_accuracy": 0.5908296943231441, "eval_loss": 0.8710007667541504, "eval_runtime": 159.9604, "eval_samples_per_second": 28.632, "eval_steps_per_second": 3.582, "step": 55000 }, { "epoch": 5.06, "learning_rate": 2.471269651558334e-05, "loss": 0.9456, "step": 55010 }, { "epoch": 5.06, "learning_rate": 2.4708099659832674e-05, "loss": 0.8691, "step": 55020 }, { "epoch": 5.06, "learning_rate": 2.470350280408201e-05, "loss": 0.9334, "step": 55030 }, { "epoch": 5.06, "learning_rate": 2.469890594833134e-05, "loss": 0.8488, "step": 55040 }, { "epoch": 5.06, "learning_rate": 2.4694309092580674e-05, "loss": 0.9485, "step": 55050 }, { "epoch": 5.06, "learning_rate": 2.468971223683001e-05, "loss": 0.9147, "step": 55060 }, { "epoch": 5.06, "learning_rate": 2.4685115381079343e-05, "loss": 0.8021, "step": 55070 }, { "epoch": 5.06, "learning_rate": 2.4680518525328676e-05, "loss": 0.9539, "step": 55080 }, { "epoch": 5.06, "learning_rate": 2.467592166957801e-05, "loss": 0.7796, "step": 55090 }, { "epoch": 5.07, "learning_rate": 2.4671324813827343e-05, "loss": 0.8627, "step": 55100 }, { "epoch": 5.07, "learning_rate": 2.4666727958076676e-05, "loss": 0.759, "step": 55110 }, { "epoch": 5.07, "learning_rate": 2.4662131102326012e-05, "loss": 0.7689, "step": 55120 }, { "epoch": 5.07, "learning_rate": 2.4657534246575342e-05, "loss": 0.9118, "step": 55130 }, { "epoch": 5.07, "learning_rate": 2.4652937390824675e-05, "loss": 0.8572, "step": 55140 }, { "epoch": 5.07, "learning_rate": 2.464834053507401e-05, "loss": 0.77, "step": 55150 }, { "epoch": 5.07, "learning_rate": 2.4643743679323344e-05, "loss": 0.802, "step": 55160 }, { "epoch": 5.07, "learning_rate": 2.4639146823572678e-05, "loss": 0.7213, "step": 55170 }, { "epoch": 5.07, "learning_rate": 2.463454996782201e-05, "loss": 0.8494, "step": 55180 }, { "epoch": 5.07, "learning_rate": 2.4629953112071344e-05, "loss": 0.8589, "step": 55190 }, { "epoch": 5.07, "learning_rate": 2.4625356256320677e-05, "loss": 0.8936, "step": 55200 }, { "epoch": 5.08, "learning_rate": 2.4620759400570013e-05, "loss": 0.8901, "step": 55210 }, { "epoch": 5.08, "learning_rate": 2.4616162544819343e-05, "loss": 0.8469, "step": 55220 }, { "epoch": 5.08, "learning_rate": 2.4611565689068676e-05, "loss": 0.8742, "step": 55230 }, { "epoch": 5.08, "learning_rate": 2.4606968833318013e-05, "loss": 0.8185, "step": 55240 }, { "epoch": 5.08, "learning_rate": 2.4602371977567346e-05, "loss": 0.926, "step": 55250 }, { "epoch": 5.08, "learning_rate": 2.459777512181668e-05, "loss": 0.9239, "step": 55260 }, { "epoch": 5.08, "learning_rate": 2.4593178266066012e-05, "loss": 0.9143, "step": 55270 }, { "epoch": 5.08, "learning_rate": 2.4588581410315345e-05, "loss": 0.9368, "step": 55280 }, { "epoch": 5.08, "learning_rate": 2.4583984554564678e-05, "loss": 0.9351, "step": 55290 }, { "epoch": 5.08, "learning_rate": 2.4579387698814014e-05, "loss": 0.768, "step": 55300 }, { "epoch": 5.09, "learning_rate": 2.4574790843063348e-05, "loss": 0.8918, "step": 55310 }, { "epoch": 5.09, "learning_rate": 2.4570193987312677e-05, "loss": 1.0555, "step": 55320 }, { "epoch": 5.09, "learning_rate": 2.4565597131562014e-05, "loss": 0.9447, "step": 55330 }, { "epoch": 5.09, "learning_rate": 2.4561000275811347e-05, "loss": 1.004, "step": 55340 }, { "epoch": 5.09, "learning_rate": 2.455640342006068e-05, "loss": 0.8283, "step": 55350 }, { "epoch": 5.09, "learning_rate": 2.4551806564310013e-05, "loss": 0.8379, "step": 55360 }, { "epoch": 5.09, "learning_rate": 2.4547209708559346e-05, "loss": 0.9561, "step": 55370 }, { "epoch": 5.09, "learning_rate": 2.454261285280868e-05, "loss": 0.8845, "step": 55380 }, { "epoch": 5.09, "learning_rate": 2.4538015997058016e-05, "loss": 0.7831, "step": 55390 }, { "epoch": 5.09, "learning_rate": 2.453341914130735e-05, "loss": 0.819, "step": 55400 }, { "epoch": 5.09, "learning_rate": 2.452882228555668e-05, "loss": 0.9339, "step": 55410 }, { "epoch": 5.1, "learning_rate": 2.4524225429806015e-05, "loss": 0.9016, "step": 55420 }, { "epoch": 5.1, "learning_rate": 2.4519628574055348e-05, "loss": 0.9277, "step": 55430 }, { "epoch": 5.1, "learning_rate": 2.451503171830468e-05, "loss": 0.8486, "step": 55440 }, { "epoch": 5.1, "learning_rate": 2.4510434862554014e-05, "loss": 0.8605, "step": 55450 }, { "epoch": 5.1, "learning_rate": 2.4505838006803347e-05, "loss": 0.8512, "step": 55460 }, { "epoch": 5.1, "learning_rate": 2.450124115105268e-05, "loss": 0.7934, "step": 55470 }, { "epoch": 5.1, "learning_rate": 2.4496644295302017e-05, "loss": 0.9188, "step": 55480 }, { "epoch": 5.1, "learning_rate": 2.449204743955135e-05, "loss": 0.8711, "step": 55490 }, { "epoch": 5.1, "learning_rate": 2.448745058380068e-05, "loss": 0.9917, "step": 55500 } ], "logging_steps": 10, "max_steps": 108770, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.1721851374961664e+17, "trial_name": null, "trial_params": null }