{ "best_metric": null, "best_model_checkpoint": null, "epoch": 76.60298846226594, "global_step": 405000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18914318138831096, "learning_rate": 4.990542840930585e-05, "loss": 2.2949150390625, "step": 1000 }, { "epoch": 0.3782863627766219, "learning_rate": 4.981085681861169e-05, "loss": 2.31815234375, "step": 2000 }, { "epoch": 0.5674295441649329, "learning_rate": 4.9716285227917534e-05, "loss": 2.32872216796875, "step": 3000 }, { "epoch": 0.7565727255532438, "learning_rate": 4.962171363722338e-05, "loss": 2.33826904296875, "step": 4000 }, { "epoch": 0.9457159069415547, "learning_rate": 4.9527142046529224e-05, "loss": 2.340685546875, "step": 5000 }, { "epoch": 1.1348590883298657, "learning_rate": 4.943257045583507e-05, "loss": 2.28626953125, "step": 6000 }, { "epoch": 1.3240022697181766, "learning_rate": 4.9337998865140915e-05, "loss": 2.27450390625, "step": 7000 }, { "epoch": 1.5131454511064875, "learning_rate": 4.9243427274446756e-05, "loss": 2.287498046875, "step": 8000 }, { "epoch": 1.7022886324947986, "learning_rate": 4.9148855683752605e-05, "loss": 2.30305078125, "step": 9000 }, { "epoch": 1.8914318138831097, "learning_rate": 4.905428409305845e-05, "loss": 2.30912890625, "step": 10000 }, { "epoch": 2.0805749952714203, "learning_rate": 4.895971250236429e-05, "loss": 2.272595703125, "step": 11000 }, { "epoch": 2.2697181766597314, "learning_rate": 4.886514091167014e-05, "loss": 2.238087890625, "step": 12000 }, { "epoch": 2.4588613580480425, "learning_rate": 4.877056932097598e-05, "loss": 2.253140625, "step": 13000 }, { "epoch": 2.648004539436353, "learning_rate": 4.867599773028183e-05, "loss": 2.26483203125, "step": 14000 }, { "epoch": 2.8371477208246643, "learning_rate": 4.858142613958767e-05, "loss": 2.2735078125, "step": 15000 }, { "epoch": 3.0262909022129754, "learning_rate": 4.848685454889351e-05, "loss": 2.27125, "step": 16000 }, { "epoch": 3.215434083601286, "learning_rate": 4.839228295819936e-05, "loss": 2.20089453125, "step": 17000 }, { "epoch": 3.404577264989597, "learning_rate": 4.829771136750521e-05, "loss": 2.2166953125, "step": 18000 }, { "epoch": 3.593720446377908, "learning_rate": 4.820313977681105e-05, "loss": 2.2287421875, "step": 19000 }, { "epoch": 3.782863627766219, "learning_rate": 4.810856818611689e-05, "loss": 2.242296875, "step": 20000 }, { "epoch": 3.97200680915453, "learning_rate": 4.801399659542274e-05, "loss": 2.25162109375, "step": 21000 }, { "epoch": 4.161149990542841, "learning_rate": 4.791942500472858e-05, "loss": 2.17917578125, "step": 22000 }, { "epoch": 4.350293171931152, "learning_rate": 4.782485341403443e-05, "loss": 2.1837734375, "step": 23000 }, { "epoch": 4.539436353319463, "learning_rate": 4.773028182334027e-05, "loss": 2.1981953125, "step": 24000 }, { "epoch": 4.728579534707774, "learning_rate": 4.763571023264611e-05, "loss": 2.2091328125, "step": 25000 }, { "epoch": 4.917722716096085, "learning_rate": 4.754113864195196e-05, "loss": 2.22294140625, "step": 26000 }, { "epoch": 5.106865897484396, "learning_rate": 4.744656705125781e-05, "loss": 2.17415625, "step": 27000 }, { "epoch": 5.296009078872706, "learning_rate": 4.7351995460563645e-05, "loss": 2.1536875, "step": 28000 }, { "epoch": 5.485152260261017, "learning_rate": 4.725742386986949e-05, "loss": 2.17046484375, "step": 29000 }, { "epoch": 5.6742954416493285, "learning_rate": 4.716285227917534e-05, "loss": 2.17616796875, "step": 30000 }, { "epoch": 5.86343862303764, "learning_rate": 4.706828068848118e-05, "loss": 2.18503125, "step": 31000 }, { "epoch": 6.052581804425951, "learning_rate": 4.6973709097787025e-05, "loss": 2.172359375, "step": 32000 }, { "epoch": 6.241724985814262, "learning_rate": 4.687913750709287e-05, "loss": 2.118359375, "step": 33000 }, { "epoch": 6.430868167202572, "learning_rate": 4.6784565916398715e-05, "loss": 2.133546875, "step": 34000 }, { "epoch": 6.620011348590883, "learning_rate": 4.6689994325704564e-05, "loss": 2.1423671875, "step": 35000 }, { "epoch": 6.809154529979194, "learning_rate": 4.6595422735010405e-05, "loss": 2.1594921875, "step": 36000 }, { "epoch": 6.998297711367505, "learning_rate": 4.650085114431625e-05, "loss": 2.17365625, "step": 37000 }, { "epoch": 7.187440892755816, "learning_rate": 4.6406279553622095e-05, "loss": 2.0884140625, "step": 38000 }, { "epoch": 7.376584074144127, "learning_rate": 4.631170796292794e-05, "loss": 2.1023359375, "step": 39000 }, { "epoch": 7.565727255532438, "learning_rate": 4.6217136372233786e-05, "loss": 2.1240625, "step": 40000 }, { "epoch": 7.754870436920749, "learning_rate": 4.612256478153963e-05, "loss": 2.126953125, "step": 41000 }, { "epoch": 7.94401361830906, "learning_rate": 4.602799319084547e-05, "loss": 2.1407265625, "step": 42000 }, { "epoch": 8.133156799697371, "learning_rate": 4.593342160015132e-05, "loss": 2.080640625, "step": 43000 }, { "epoch": 8.322299981085681, "learning_rate": 4.5838850009457166e-05, "loss": 2.07196875, "step": 44000 }, { "epoch": 8.511443162473993, "learning_rate": 4.5744278418763e-05, "loss": 2.0906640625, "step": 45000 }, { "epoch": 8.700586343862303, "learning_rate": 4.564970682806885e-05, "loss": 2.10053125, "step": 46000 }, { "epoch": 8.889729525250615, "learning_rate": 4.55551352373747e-05, "loss": 2.112328125, "step": 47000 }, { "epoch": 9.078872706638926, "learning_rate": 4.546056364668054e-05, "loss": 2.08459375, "step": 48000 }, { "epoch": 9.268015888027236, "learning_rate": 4.536599205598638e-05, "loss": 2.039546875, "step": 49000 }, { "epoch": 9.457159069415548, "learning_rate": 4.527142046529223e-05, "loss": 2.06175, "step": 50000 }, { "epoch": 9.646302250803858, "learning_rate": 4.517684887459807e-05, "loss": 2.076625, "step": 51000 }, { "epoch": 9.83544543219217, "learning_rate": 4.508227728390392e-05, "loss": 2.088609375, "step": 52000 }, { "epoch": 10.02458861358048, "learning_rate": 4.498770569320976e-05, "loss": 2.07996875, "step": 53000 }, { "epoch": 10.213731794968792, "learning_rate": 4.48931341025156e-05, "loss": 2.0183828125, "step": 54000 }, { "epoch": 10.402874976357102, "learning_rate": 4.479856251182145e-05, "loss": 2.0310546875, "step": 55000 }, { "epoch": 10.592018157745413, "learning_rate": 4.47039909211273e-05, "loss": 2.0436796875, "step": 56000 }, { "epoch": 10.781161339133725, "learning_rate": 4.460941933043314e-05, "loss": 2.0622109375, "step": 57000 }, { "epoch": 10.970304520522035, "learning_rate": 4.4514847739738984e-05, "loss": 2.06621875, "step": 58000 }, { "epoch": 11.159447701910347, "learning_rate": 4.442027614904483e-05, "loss": 2.0049921875, "step": 59000 }, { "epoch": 11.348590883298657, "learning_rate": 4.4325704558350674e-05, "loss": 2.00809375, "step": 60000 }, { "epoch": 11.537734064686967, "learning_rate": 4.423113296765652e-05, "loss": 2.017078125, "step": 61000 }, { "epoch": 11.72687724607528, "learning_rate": 4.4136561376962364e-05, "loss": 2.03634375, "step": 62000 }, { "epoch": 11.91602042746359, "learning_rate": 4.4041989786268206e-05, "loss": 2.04240625, "step": 63000 }, { "epoch": 12.105163608851901, "learning_rate": 4.3947418195574054e-05, "loss": 1.99821875, "step": 64000 }, { "epoch": 12.294306790240212, "learning_rate": 4.3852846604879896e-05, "loss": 1.9760625, "step": 65000 }, { "epoch": 12.483449971628524, "learning_rate": 4.375827501418574e-05, "loss": 1.995484375, "step": 66000 }, { "epoch": 12.672593153016834, "learning_rate": 4.3663703423491586e-05, "loss": 2.00321875, "step": 67000 }, { "epoch": 12.861736334405144, "learning_rate": 4.356913183279743e-05, "loss": 2.021078125, "step": 68000 }, { "epoch": 13.050879515793456, "learning_rate": 4.3474560242103276e-05, "loss": 2.004171875, "step": 69000 }, { "epoch": 13.240022697181766, "learning_rate": 4.337998865140912e-05, "loss": 1.9488125, "step": 70000 }, { "epoch": 13.429165878570078, "learning_rate": 4.328541706071496e-05, "loss": 1.967921875, "step": 71000 }, { "epoch": 13.618309059958388, "learning_rate": 4.319084547002081e-05, "loss": 1.984046875, "step": 72000 }, { "epoch": 13.807452241346699, "learning_rate": 4.3096273879326657e-05, "loss": 1.998828125, "step": 73000 }, { "epoch": 13.99659542273501, "learning_rate": 4.30017022886325e-05, "loss": 2.0024375, "step": 74000 }, { "epoch": 14.18573860412332, "learning_rate": 4.290713069793834e-05, "loss": 1.927890625, "step": 75000 }, { "epoch": 14.374881785511633, "learning_rate": 4.281255910724419e-05, "loss": 1.94315625, "step": 76000 }, { "epoch": 14.564024966899943, "learning_rate": 4.271798751655003e-05, "loss": 1.95571875, "step": 77000 }, { "epoch": 14.753168148288253, "learning_rate": 4.262341592585587e-05, "loss": 1.96771875, "step": 78000 }, { "epoch": 14.942311329676565, "learning_rate": 4.252884433516172e-05, "loss": 1.981109375, "step": 79000 }, { "epoch": 15.131454511064875, "learning_rate": 4.243427274446756e-05, "loss": 1.929875, "step": 80000 }, { "epoch": 15.320597692453187, "learning_rate": 4.233970115377341e-05, "loss": 1.91628125, "step": 81000 }, { "epoch": 15.509740873841498, "learning_rate": 4.224512956307925e-05, "loss": 1.938640625, "step": 82000 }, { "epoch": 15.69888405522981, "learning_rate": 4.2150557972385094e-05, "loss": 1.9478125, "step": 83000 }, { "epoch": 15.88802723661812, "learning_rate": 4.205598638169094e-05, "loss": 1.95146875, "step": 84000 }, { "epoch": 16.077170418006432, "learning_rate": 4.196141479099679e-05, "loss": 1.928953125, "step": 85000 }, { "epoch": 16.266313599394742, "learning_rate": 4.186684320030263e-05, "loss": 1.895140625, "step": 86000 }, { "epoch": 16.455456780783052, "learning_rate": 4.1772271609608474e-05, "loss": 1.909171875, "step": 87000 }, { "epoch": 16.644599962171363, "learning_rate": 4.1677700018914316e-05, "loss": 1.92403125, "step": 88000 }, { "epoch": 16.833743143559676, "learning_rate": 4.1583128428220164e-05, "loss": 1.935015625, "step": 89000 }, { "epoch": 17.022886324947986, "learning_rate": 4.148855683752601e-05, "loss": 1.92928125, "step": 90000 }, { "epoch": 17.212029506336297, "learning_rate": 4.139398524683185e-05, "loss": 1.86678125, "step": 91000 }, { "epoch": 17.401172687724607, "learning_rate": 4.1299413656137696e-05, "loss": 1.8859375, "step": 92000 }, { "epoch": 17.590315869112917, "learning_rate": 4.1204842065443545e-05, "loss": 1.90540625, "step": 93000 }, { "epoch": 17.77945905050123, "learning_rate": 4.1110270474749386e-05, "loss": 1.911765625, "step": 94000 }, { "epoch": 17.96860223188954, "learning_rate": 4.101569888405523e-05, "loss": 1.924671875, "step": 95000 }, { "epoch": 18.15774541327785, "learning_rate": 4.092112729336108e-05, "loss": 1.85646875, "step": 96000 }, { "epoch": 18.34688859466616, "learning_rate": 4.082655570266692e-05, "loss": 1.862640625, "step": 97000 }, { "epoch": 18.53603177605447, "learning_rate": 4.073198411197277e-05, "loss": 1.880921875, "step": 98000 }, { "epoch": 18.725174957442785, "learning_rate": 4.063741252127861e-05, "loss": 1.8903125, "step": 99000 }, { "epoch": 18.914318138831096, "learning_rate": 4.054284093058445e-05, "loss": 1.904015625, "step": 100000 }, { "epoch": 19.103461320219406, "learning_rate": 4.04482693398903e-05, "loss": 1.8575625, "step": 101000 }, { "epoch": 19.292604501607716, "learning_rate": 4.035369774919615e-05, "loss": 1.837125, "step": 102000 }, { "epoch": 19.481747682996026, "learning_rate": 4.025912615850199e-05, "loss": 1.859359375, "step": 103000 }, { "epoch": 19.67089086438434, "learning_rate": 4.016455456780783e-05, "loss": 1.867921875, "step": 104000 }, { "epoch": 19.86003404577265, "learning_rate": 4.006998297711368e-05, "loss": 1.88125, "step": 105000 }, { "epoch": 20.04917722716096, "learning_rate": 3.997541138641952e-05, "loss": 1.866546875, "step": 106000 }, { "epoch": 20.23832040854927, "learning_rate": 3.988083979572537e-05, "loss": 1.811046875, "step": 107000 }, { "epoch": 20.427463589937584, "learning_rate": 3.978626820503121e-05, "loss": 1.8318125, "step": 108000 }, { "epoch": 20.616606771325895, "learning_rate": 3.969169661433705e-05, "loss": 1.83990625, "step": 109000 }, { "epoch": 20.805749952714205, "learning_rate": 3.95971250236429e-05, "loss": 1.91721875, "step": 110000 }, { "epoch": 20.994893134102515, "learning_rate": 3.950255343294875e-05, "loss": 1.923984375, "step": 111000 }, { "epoch": 21.184036315490825, "learning_rate": 3.9407981842254585e-05, "loss": 1.846875, "step": 112000 }, { "epoch": 21.37317949687914, "learning_rate": 3.931341025156043e-05, "loss": 1.860984375, "step": 113000 }, { "epoch": 21.56232267826745, "learning_rate": 3.9218838660866275e-05, "loss": 1.8764375, "step": 114000 }, { "epoch": 21.75146585965576, "learning_rate": 3.912426707017212e-05, "loss": 1.88171875, "step": 115000 }, { "epoch": 21.94060904104407, "learning_rate": 3.9029695479477965e-05, "loss": 1.89409375, "step": 116000 }, { "epoch": 22.12975222243238, "learning_rate": 3.8935123888783807e-05, "loss": 1.847390625, "step": 117000 }, { "epoch": 22.318895403820694, "learning_rate": 3.8840552298089655e-05, "loss": 1.83696875, "step": 118000 }, { "epoch": 22.508038585209004, "learning_rate": 3.8745980707395504e-05, "loss": 1.852484375, "step": 119000 }, { "epoch": 22.697181766597314, "learning_rate": 3.8651409116701345e-05, "loss": 1.86578125, "step": 120000 }, { "epoch": 22.886324947985624, "learning_rate": 3.855683752600719e-05, "loss": 1.877734375, "step": 121000 }, { "epoch": 23.075468129373935, "learning_rate": 3.8462265935313035e-05, "loss": 1.8471875, "step": 122000 }, { "epoch": 23.26461131076225, "learning_rate": 3.836769434461888e-05, "loss": 1.816078125, "step": 123000 }, { "epoch": 23.45375449215056, "learning_rate": 3.8273122753924726e-05, "loss": 1.833703125, "step": 124000 }, { "epoch": 23.64289767353887, "learning_rate": 3.817855116323057e-05, "loss": 1.840984375, "step": 125000 }, { "epoch": 23.83204085492718, "learning_rate": 3.808397957253641e-05, "loss": 1.8568125, "step": 126000 }, { "epoch": 24.02118403631549, "learning_rate": 3.798940798184226e-05, "loss": 1.855609375, "step": 127000 }, { "epoch": 24.210327217703803, "learning_rate": 3.7894836391148106e-05, "loss": 1.7931875, "step": 128000 }, { "epoch": 24.399470399092113, "learning_rate": 3.780026480045394e-05, "loss": 1.805828125, "step": 129000 }, { "epoch": 24.588613580480423, "learning_rate": 3.770569320975979e-05, "loss": 1.825140625, "step": 130000 }, { "epoch": 24.777756761868734, "learning_rate": 3.761112161906564e-05, "loss": 1.83425, "step": 131000 }, { "epoch": 24.966899943257047, "learning_rate": 3.751655002837148e-05, "loss": 1.8449375, "step": 132000 }, { "epoch": 25.156043124645358, "learning_rate": 3.742197843767732e-05, "loss": 1.78215625, "step": 133000 }, { "epoch": 25.345186306033668, "learning_rate": 3.732740684698317e-05, "loss": 1.79159375, "step": 134000 }, { "epoch": 25.534329487421978, "learning_rate": 3.723283525628901e-05, "loss": 1.80753125, "step": 135000 }, { "epoch": 25.723472668810288, "learning_rate": 3.713826366559486e-05, "loss": 1.81053125, "step": 136000 }, { "epoch": 25.912615850198602, "learning_rate": 3.70436920749007e-05, "loss": 1.8265, "step": 137000 }, { "epoch": 26.101759031586912, "learning_rate": 3.694912048420654e-05, "loss": 1.79653125, "step": 138000 }, { "epoch": 26.290902212975222, "learning_rate": 3.685454889351239e-05, "loss": 1.76934375, "step": 139000 }, { "epoch": 26.480045394363533, "learning_rate": 3.6759977302818233e-05, "loss": 1.78228125, "step": 140000 }, { "epoch": 26.669188575751843, "learning_rate": 3.666540571212408e-05, "loss": 1.79384375, "step": 141000 }, { "epoch": 26.858331757140157, "learning_rate": 3.6570834121429924e-05, "loss": 1.8095625, "step": 142000 }, { "epoch": 27.047474938528467, "learning_rate": 3.6476262530735765e-05, "loss": 1.7923125, "step": 143000 }, { "epoch": 27.236618119916777, "learning_rate": 3.6381690940041614e-05, "loss": 1.7471875, "step": 144000 }, { "epoch": 27.425761301305087, "learning_rate": 3.628711934934746e-05, "loss": 1.76446875, "step": 145000 }, { "epoch": 27.614904482693397, "learning_rate": 3.61925477586533e-05, "loss": 1.77759375, "step": 146000 }, { "epoch": 27.80404766408171, "learning_rate": 3.6097976167959146e-05, "loss": 1.78840625, "step": 147000 }, { "epoch": 27.99319084547002, "learning_rate": 3.6003404577264994e-05, "loss": 1.799125, "step": 148000 }, { "epoch": 28.18233402685833, "learning_rate": 3.5908832986570836e-05, "loss": 1.73171875, "step": 149000 }, { "epoch": 28.37147720824664, "learning_rate": 3.581426139587668e-05, "loss": 1.73996875, "step": 150000 }, { "epoch": 28.560620389634952, "learning_rate": 3.5719689805182526e-05, "loss": 1.75671875, "step": 151000 }, { "epoch": 28.749763571023266, "learning_rate": 3.562511821448837e-05, "loss": 1.7723125, "step": 152000 }, { "epoch": 28.938906752411576, "learning_rate": 3.5530546623794216e-05, "loss": 1.77990625, "step": 153000 }, { "epoch": 29.128049933799886, "learning_rate": 3.543597503310006e-05, "loss": 1.73996875, "step": 154000 }, { "epoch": 29.317193115188196, "learning_rate": 3.53414034424059e-05, "loss": 1.726625, "step": 155000 }, { "epoch": 29.50633629657651, "learning_rate": 3.524683185171175e-05, "loss": 1.74075, "step": 156000 }, { "epoch": 29.69547947796482, "learning_rate": 3.5152260261017597e-05, "loss": 1.753, "step": 157000 }, { "epoch": 29.88462265935313, "learning_rate": 3.505768867032344e-05, "loss": 1.7575, "step": 158000 }, { "epoch": 30.07376584074144, "learning_rate": 3.496311707962928e-05, "loss": 1.73771875, "step": 159000 }, { "epoch": 30.26290902212975, "learning_rate": 3.486854548893513e-05, "loss": 1.70875, "step": 160000 }, { "epoch": 30.452052203518065, "learning_rate": 3.477397389824097e-05, "loss": 1.72415625, "step": 161000 }, { "epoch": 30.641195384906375, "learning_rate": 3.467940230754682e-05, "loss": 1.7310625, "step": 162000 }, { "epoch": 30.830338566294685, "learning_rate": 3.4584830716852654e-05, "loss": 1.74175, "step": 163000 }, { "epoch": 31.019481747682995, "learning_rate": 3.44902591261585e-05, "loss": 1.74609375, "step": 164000 }, { "epoch": 31.208624929071306, "learning_rate": 3.439568753546435e-05, "loss": 1.684875, "step": 165000 }, { "epoch": 31.39776811045962, "learning_rate": 3.430111594477019e-05, "loss": 1.69925, "step": 166000 }, { "epoch": 31.58691129184793, "learning_rate": 3.4206544354076034e-05, "loss": 1.7169375, "step": 167000 }, { "epoch": 31.77605447323624, "learning_rate": 3.411197276338188e-05, "loss": 1.7275, "step": 168000 }, { "epoch": 31.96519765462455, "learning_rate": 3.4017401172687724e-05, "loss": 1.74096875, "step": 169000 }, { "epoch": 32.154340836012864, "learning_rate": 3.392282958199357e-05, "loss": 1.68128125, "step": 170000 }, { "epoch": 32.343484017401174, "learning_rate": 3.3828257991299414e-05, "loss": 1.6851875, "step": 171000 }, { "epoch": 32.532627198789484, "learning_rate": 3.3733686400605256e-05, "loss": 1.69896875, "step": 172000 }, { "epoch": 32.721770380177794, "learning_rate": 3.3639114809911104e-05, "loss": 1.7080625, "step": 173000 }, { "epoch": 32.910913561566105, "learning_rate": 3.354454321921695e-05, "loss": 1.7200625, "step": 174000 }, { "epoch": 33.100056742954415, "learning_rate": 3.3449971628522795e-05, "loss": 1.69040625, "step": 175000 }, { "epoch": 33.289199924342725, "learning_rate": 3.3355400037828636e-05, "loss": 1.666625, "step": 176000 }, { "epoch": 33.478343105731035, "learning_rate": 3.3260828447134485e-05, "loss": 1.6798125, "step": 177000 }, { "epoch": 33.66748628711935, "learning_rate": 3.3166256856440326e-05, "loss": 1.69575, "step": 178000 }, { "epoch": 33.85662946850766, "learning_rate": 3.3071685265746175e-05, "loss": 1.7008125, "step": 179000 }, { "epoch": 34.04577264989597, "learning_rate": 3.297711367505202e-05, "loss": 1.69340625, "step": 180000 }, { "epoch": 34.23491583128428, "learning_rate": 3.288254208435786e-05, "loss": 1.6509375, "step": 181000 }, { "epoch": 34.42405901267259, "learning_rate": 3.278797049366371e-05, "loss": 1.66703125, "step": 182000 }, { "epoch": 34.613202194060904, "learning_rate": 3.2693398902969555e-05, "loss": 1.67615625, "step": 183000 }, { "epoch": 34.802345375449214, "learning_rate": 3.259882731227539e-05, "loss": 1.6876875, "step": 184000 }, { "epoch": 34.991488556837524, "learning_rate": 3.250425572158124e-05, "loss": 1.695, "step": 185000 }, { "epoch": 35.180631738225834, "learning_rate": 3.240968413088709e-05, "loss": 1.6354375, "step": 186000 }, { "epoch": 35.369774919614144, "learning_rate": 3.231511254019293e-05, "loss": 1.64653125, "step": 187000 }, { "epoch": 35.55891810100246, "learning_rate": 3.222054094949877e-05, "loss": 1.66346875, "step": 188000 }, { "epoch": 35.74806128239077, "learning_rate": 3.212596935880461e-05, "loss": 1.66990625, "step": 189000 }, { "epoch": 35.93720446377908, "learning_rate": 3.203139776811046e-05, "loss": 1.67878125, "step": 190000 }, { "epoch": 36.12634764516739, "learning_rate": 3.193682617741631e-05, "loss": 1.63953125, "step": 191000 }, { "epoch": 36.3154908265557, "learning_rate": 3.184225458672215e-05, "loss": 1.62646875, "step": 192000 }, { "epoch": 36.50463400794401, "learning_rate": 3.174768299602799e-05, "loss": 1.6480625, "step": 193000 }, { "epoch": 36.69377718933232, "learning_rate": 3.165311140533384e-05, "loss": 1.65371875, "step": 194000 }, { "epoch": 36.88292037072063, "learning_rate": 3.155853981463968e-05, "loss": 1.66778125, "step": 195000 }, { "epoch": 37.07206355210894, "learning_rate": 3.146396822394553e-05, "loss": 1.64275, "step": 196000 }, { "epoch": 37.26120673349726, "learning_rate": 3.136939663325137e-05, "loss": 1.61565625, "step": 197000 }, { "epoch": 37.45034991488557, "learning_rate": 3.1274825042557215e-05, "loss": 1.631875, "step": 198000 }, { "epoch": 37.63949309627388, "learning_rate": 3.118025345186306e-05, "loss": 1.64078125, "step": 199000 }, { "epoch": 37.82863627766219, "learning_rate": 3.108568186116891e-05, "loss": 1.6473125, "step": 200000 }, { "epoch": 38.0177794590505, "learning_rate": 3.0991110270474747e-05, "loss": 1.65090625, "step": 201000 }, { "epoch": 38.20692264043881, "learning_rate": 3.0896538679780595e-05, "loss": 1.59846875, "step": 202000 }, { "epoch": 38.39606582182712, "learning_rate": 3.0801967089086443e-05, "loss": 1.6114375, "step": 203000 }, { "epoch": 38.58520900321543, "learning_rate": 3.0707395498392285e-05, "loss": 1.6266875, "step": 204000 }, { "epoch": 38.77435218460374, "learning_rate": 3.061282390769813e-05, "loss": 1.6340625, "step": 205000 }, { "epoch": 38.96349536599205, "learning_rate": 3.0518252317003975e-05, "loss": 1.64478125, "step": 206000 }, { "epoch": 39.15263854738037, "learning_rate": 3.0423680726309817e-05, "loss": 1.59403125, "step": 207000 }, { "epoch": 39.34178172876868, "learning_rate": 3.0329109135615662e-05, "loss": 1.5961875, "step": 208000 }, { "epoch": 39.53092491015699, "learning_rate": 3.023453754492151e-05, "loss": 1.612375, "step": 209000 }, { "epoch": 39.7200680915453, "learning_rate": 3.013996595422735e-05, "loss": 1.62103125, "step": 210000 }, { "epoch": 39.90921127293361, "learning_rate": 3.0045394363533197e-05, "loss": 1.62684375, "step": 211000 }, { "epoch": 40.09835445432192, "learning_rate": 2.995082277283904e-05, "loss": 1.60059375, "step": 212000 }, { "epoch": 40.28749763571023, "learning_rate": 2.9856251182144884e-05, "loss": 1.58271875, "step": 213000 }, { "epoch": 40.47664081709854, "learning_rate": 2.976167959145073e-05, "loss": 1.59703125, "step": 214000 }, { "epoch": 40.66578399848685, "learning_rate": 2.966710800075657e-05, "loss": 1.604875, "step": 215000 }, { "epoch": 40.85492717987517, "learning_rate": 2.957253641006242e-05, "loss": 1.6115625, "step": 216000 }, { "epoch": 41.04407036126348, "learning_rate": 2.9477964819368265e-05, "loss": 1.60434375, "step": 217000 }, { "epoch": 41.23321354265179, "learning_rate": 2.9383393228674106e-05, "loss": 1.566625, "step": 218000 }, { "epoch": 41.4223567240401, "learning_rate": 2.928882163797995e-05, "loss": 1.58084375, "step": 219000 }, { "epoch": 41.61149990542841, "learning_rate": 2.91942500472858e-05, "loss": 1.590625, "step": 220000 }, { "epoch": 41.80064308681672, "learning_rate": 2.9099678456591638e-05, "loss": 1.60096875, "step": 221000 }, { "epoch": 41.98978626820503, "learning_rate": 2.9005106865897487e-05, "loss": 1.6100625, "step": 222000 }, { "epoch": 42.17892944959334, "learning_rate": 2.8910535275203332e-05, "loss": 1.552625, "step": 223000 }, { "epoch": 42.36807263098165, "learning_rate": 2.8815963684509173e-05, "loss": 1.56346875, "step": 224000 }, { "epoch": 42.55721581236996, "learning_rate": 2.872139209381502e-05, "loss": 1.57903125, "step": 225000 }, { "epoch": 42.74635899375828, "learning_rate": 2.8626820503120867e-05, "loss": 1.5851875, "step": 226000 }, { "epoch": 42.93550217514659, "learning_rate": 2.8532248912426705e-05, "loss": 1.59359375, "step": 227000 }, { "epoch": 43.1246453565349, "learning_rate": 2.8437677321732554e-05, "loss": 1.55915625, "step": 228000 }, { "epoch": 43.31378853792321, "learning_rate": 2.83431057310384e-05, "loss": 1.5494375, "step": 229000 }, { "epoch": 43.50293171931152, "learning_rate": 2.824853414034424e-05, "loss": 1.55978125, "step": 230000 }, { "epoch": 43.69207490069983, "learning_rate": 2.8153962549650086e-05, "loss": 1.57346875, "step": 231000 }, { "epoch": 43.88121808208814, "learning_rate": 2.8059390958955934e-05, "loss": 1.58403125, "step": 232000 }, { "epoch": 44.07036126347645, "learning_rate": 2.7964819368261776e-05, "loss": 1.568125, "step": 233000 }, { "epoch": 44.25950444486476, "learning_rate": 2.787024777756762e-05, "loss": 1.535125, "step": 234000 }, { "epoch": 44.44864762625307, "learning_rate": 2.7775676186873466e-05, "loss": 1.5455, "step": 235000 }, { "epoch": 44.63779080764139, "learning_rate": 2.7681104596179308e-05, "loss": 1.55828125, "step": 236000 }, { "epoch": 44.8269339890297, "learning_rate": 2.7586533005485156e-05, "loss": 1.56821875, "step": 237000 }, { "epoch": 45.01607717041801, "learning_rate": 2.7491961414790994e-05, "loss": 1.573875, "step": 238000 }, { "epoch": 45.20522035180632, "learning_rate": 2.7397389824096843e-05, "loss": 1.52146875, "step": 239000 }, { "epoch": 45.39436353319463, "learning_rate": 2.7302818233402688e-05, "loss": 1.53628125, "step": 240000 }, { "epoch": 45.58350671458294, "learning_rate": 2.720824664270853e-05, "loss": 1.549875, "step": 241000 }, { "epoch": 45.77264989597125, "learning_rate": 2.7113675052014375e-05, "loss": 1.55240625, "step": 242000 }, { "epoch": 45.96179307735956, "learning_rate": 2.7019103461320223e-05, "loss": 1.56171875, "step": 243000 }, { "epoch": 46.15093625874787, "learning_rate": 2.692453187062606e-05, "loss": 1.52228125, "step": 244000 }, { "epoch": 46.340079440136186, "learning_rate": 2.682996027993191e-05, "loss": 1.5250625, "step": 245000 }, { "epoch": 46.5292226215245, "learning_rate": 2.6735388689237755e-05, "loss": 1.5320625, "step": 246000 }, { "epoch": 46.71836580291281, "learning_rate": 2.6640817098543597e-05, "loss": 1.5440625, "step": 247000 }, { "epoch": 46.90750898430112, "learning_rate": 2.6546245507849442e-05, "loss": 1.5495, "step": 248000 }, { "epoch": 47.09665216568943, "learning_rate": 2.645167391715529e-05, "loss": 1.5265625, "step": 249000 }, { "epoch": 47.28579534707774, "learning_rate": 2.6357102326461132e-05, "loss": 1.50359375, "step": 250000 }, { "epoch": 47.47493852846605, "learning_rate": 2.6262530735766977e-05, "loss": 1.5239375, "step": 251000 }, { "epoch": 47.66408170985436, "learning_rate": 2.6167959145072822e-05, "loss": 1.5295625, "step": 252000 }, { "epoch": 47.85322489124267, "learning_rate": 2.6073387554378664e-05, "loss": 1.53828125, "step": 253000 }, { "epoch": 48.04236807263098, "learning_rate": 2.597881596368451e-05, "loss": 1.53, "step": 254000 }, { "epoch": 48.231511254019296, "learning_rate": 2.5884244372990358e-05, "loss": 1.496, "step": 255000 }, { "epoch": 48.420654435407606, "learning_rate": 2.57896727822962e-05, "loss": 1.503375, "step": 256000 }, { "epoch": 48.609797616795916, "learning_rate": 2.5695101191602044e-05, "loss": 1.521125, "step": 257000 }, { "epoch": 48.798940798184226, "learning_rate": 2.560052960090789e-05, "loss": 1.5279375, "step": 258000 }, { "epoch": 48.98808397957254, "learning_rate": 2.550595801021373e-05, "loss": 1.5339375, "step": 259000 }, { "epoch": 49.17722716096085, "learning_rate": 2.541138641951958e-05, "loss": 1.4859375, "step": 260000 }, { "epoch": 49.36637034234916, "learning_rate": 2.5316814828825425e-05, "loss": 1.4956875, "step": 261000 }, { "epoch": 49.55551352373747, "learning_rate": 2.5222243238131266e-05, "loss": 1.50375, "step": 262000 }, { "epoch": 49.74465670512578, "learning_rate": 2.512767164743711e-05, "loss": 1.51525, "step": 263000 }, { "epoch": 49.933799886514095, "learning_rate": 2.5033100056742953e-05, "loss": 1.52084375, "step": 264000 }, { "epoch": 50.122943067902405, "learning_rate": 2.4938528466048798e-05, "loss": 1.49025, "step": 265000 }, { "epoch": 50.312086249290715, "learning_rate": 2.4843956875354647e-05, "loss": 1.48290625, "step": 266000 }, { "epoch": 50.501229430679025, "learning_rate": 2.474938528466049e-05, "loss": 1.49121875, "step": 267000 }, { "epoch": 50.690372612067335, "learning_rate": 2.4654813693966334e-05, "loss": 1.502875, "step": 268000 }, { "epoch": 50.879515793455646, "learning_rate": 2.4560242103272175e-05, "loss": 1.51284375, "step": 269000 }, { "epoch": 51.068658974843956, "learning_rate": 2.4465670512578024e-05, "loss": 1.4948125, "step": 270000 }, { "epoch": 51.257802156232266, "learning_rate": 2.4371098921883865e-05, "loss": 1.4699375, "step": 271000 }, { "epoch": 51.446945337620576, "learning_rate": 2.427652733118971e-05, "loss": 1.48165625, "step": 272000 }, { "epoch": 51.63608851900889, "learning_rate": 2.4181955740495556e-05, "loss": 1.491875, "step": 273000 }, { "epoch": 51.825231700397204, "learning_rate": 2.40873841498014e-05, "loss": 1.498875, "step": 274000 }, { "epoch": 52.014374881785514, "learning_rate": 2.3992812559107246e-05, "loss": 1.50025, "step": 275000 }, { "epoch": 52.203518063173824, "learning_rate": 2.389824096841309e-05, "loss": 1.45865625, "step": 276000 }, { "epoch": 52.392661244562134, "learning_rate": 2.3803669377718936e-05, "loss": 1.4689375, "step": 277000 }, { "epoch": 52.581804425950445, "learning_rate": 2.3709097787024778e-05, "loss": 1.4756875, "step": 278000 }, { "epoch": 52.770947607338755, "learning_rate": 2.3614526196330626e-05, "loss": 1.48815625, "step": 279000 }, { "epoch": 52.960090788727065, "learning_rate": 2.3519954605636468e-05, "loss": 1.4954375, "step": 280000 }, { "epoch": 53.149233970115375, "learning_rate": 2.3425383014942313e-05, "loss": 1.4575625, "step": 281000 }, { "epoch": 53.338377151503686, "learning_rate": 2.3330811424248155e-05, "loss": 1.4586875, "step": 282000 }, { "epoch": 53.527520332891996, "learning_rate": 2.3236239833554003e-05, "loss": 1.46946875, "step": 283000 }, { "epoch": 53.71666351428031, "learning_rate": 2.3141668242859845e-05, "loss": 1.47496875, "step": 284000 }, { "epoch": 53.90580669566862, "learning_rate": 2.304709665216569e-05, "loss": 1.48246875, "step": 285000 }, { "epoch": 54.09494987705693, "learning_rate": 2.2952525061471535e-05, "loss": 1.458375, "step": 286000 }, { "epoch": 54.284093058445244, "learning_rate": 2.285795347077738e-05, "loss": 1.44609375, "step": 287000 }, { "epoch": 54.473236239833554, "learning_rate": 2.2763381880083222e-05, "loss": 1.45584375, "step": 288000 }, { "epoch": 54.662379421221864, "learning_rate": 2.266881028938907e-05, "loss": 1.46325, "step": 289000 }, { "epoch": 54.851522602610174, "learning_rate": 2.2574238698694912e-05, "loss": 1.475625, "step": 290000 }, { "epoch": 55.040665783998485, "learning_rate": 2.2479667108000757e-05, "loss": 1.46834375, "step": 291000 }, { "epoch": 55.229808965386795, "learning_rate": 2.2385095517306602e-05, "loss": 1.43265625, "step": 292000 }, { "epoch": 55.41895214677511, "learning_rate": 2.2290523926612447e-05, "loss": 1.4436875, "step": 293000 }, { "epoch": 55.60809532816342, "learning_rate": 2.2195952335918292e-05, "loss": 1.455, "step": 294000 }, { "epoch": 55.79723850955173, "learning_rate": 2.2101380745224134e-05, "loss": 1.46475, "step": 295000 }, { "epoch": 55.98638169094004, "learning_rate": 2.2006809154529982e-05, "loss": 1.46825, "step": 296000 }, { "epoch": 56.17552487232835, "learning_rate": 2.1912237563835824e-05, "loss": 1.4260625, "step": 297000 }, { "epoch": 56.36466805371666, "learning_rate": 2.181766597314167e-05, "loss": 1.43375, "step": 298000 }, { "epoch": 56.55381123510497, "learning_rate": 2.1723094382447514e-05, "loss": 1.444375, "step": 299000 }, { "epoch": 56.74295441649328, "learning_rate": 2.162852279175336e-05, "loss": 1.4503125, "step": 300000 }, { "epoch": 56.932097597881594, "learning_rate": 2.15339512010592e-05, "loss": 1.4611875, "step": 301000 }, { "epoch": 57.121240779269904, "learning_rate": 2.143937961036505e-05, "loss": 1.4291875, "step": 302000 }, { "epoch": 57.31038396065822, "learning_rate": 2.134480801967089e-05, "loss": 1.422, "step": 303000 }, { "epoch": 57.49952714204653, "learning_rate": 2.1250236428976736e-05, "loss": 1.434625, "step": 304000 }, { "epoch": 57.68867032343484, "learning_rate": 2.115566483828258e-05, "loss": 1.437625, "step": 305000 }, { "epoch": 57.87781350482315, "learning_rate": 2.1061093247588427e-05, "loss": 1.4495625, "step": 306000 }, { "epoch": 58.06695668621146, "learning_rate": 2.0966521656894268e-05, "loss": 1.440125, "step": 307000 }, { "epoch": 58.25609986759977, "learning_rate": 2.0871950066200113e-05, "loss": 1.413375, "step": 308000 }, { "epoch": 58.44524304898808, "learning_rate": 2.077737847550596e-05, "loss": 1.4220625, "step": 309000 }, { "epoch": 58.63438623037639, "learning_rate": 2.0682806884811804e-05, "loss": 1.4315, "step": 310000 }, { "epoch": 58.8235294117647, "learning_rate": 2.058823529411765e-05, "loss": 1.4355625, "step": 311000 }, { "epoch": 59.01267259315302, "learning_rate": 2.0493663703423494e-05, "loss": 1.440125, "step": 312000 }, { "epoch": 59.20181577454133, "learning_rate": 2.039909211272934e-05, "loss": 1.4, "step": 313000 }, { "epoch": 59.39095895592964, "learning_rate": 2.030452052203518e-05, "loss": 1.4098125, "step": 314000 }, { "epoch": 59.58010213731795, "learning_rate": 2.0209948931341026e-05, "loss": 1.4241875, "step": 315000 }, { "epoch": 59.76924531870626, "learning_rate": 2.011537734064687e-05, "loss": 1.43, "step": 316000 }, { "epoch": 59.95838850009457, "learning_rate": 2.0020805749952716e-05, "loss": 1.43575, "step": 317000 }, { "epoch": 60.14753168148288, "learning_rate": 1.9926234159258557e-05, "loss": 1.402625, "step": 318000 }, { "epoch": 60.33667486287119, "learning_rate": 1.9831662568564406e-05, "loss": 1.4025, "step": 319000 }, { "epoch": 60.5258180442595, "learning_rate": 1.9737090977870248e-05, "loss": 1.413, "step": 320000 }, { "epoch": 60.71496122564781, "learning_rate": 1.9642519387176093e-05, "loss": 1.4200625, "step": 321000 }, { "epoch": 60.90410440703613, "learning_rate": 1.9547947796481938e-05, "loss": 1.4270625, "step": 322000 }, { "epoch": 61.09324758842444, "learning_rate": 1.9453376205787783e-05, "loss": 1.4055625, "step": 323000 }, { "epoch": 61.28239076981275, "learning_rate": 1.9358804615093625e-05, "loss": 1.3949375, "step": 324000 }, { "epoch": 61.47153395120106, "learning_rate": 1.9264233024399473e-05, "loss": 1.4008125, "step": 325000 }, { "epoch": 61.66067713258937, "learning_rate": 1.9169661433705315e-05, "loss": 1.4103125, "step": 326000 }, { "epoch": 61.84982031397768, "learning_rate": 1.907508984301116e-05, "loss": 1.41725, "step": 327000 }, { "epoch": 62.03896349536599, "learning_rate": 1.8980518252317005e-05, "loss": 1.4091875, "step": 328000 }, { "epoch": 62.2281066767543, "learning_rate": 1.888594666162285e-05, "loss": 1.382125, "step": 329000 }, { "epoch": 62.41724985814261, "learning_rate": 1.8791375070928692e-05, "loss": 1.39175, "step": 330000 }, { "epoch": 62.60639303953093, "learning_rate": 1.8696803480234537e-05, "loss": 1.3970625, "step": 331000 }, { "epoch": 62.79553622091924, "learning_rate": 1.8602231889540382e-05, "loss": 1.409375, "step": 332000 }, { "epoch": 62.98467940230755, "learning_rate": 1.8507660298846227e-05, "loss": 1.4141875, "step": 333000 }, { "epoch": 63.17382258369586, "learning_rate": 1.8413088708152072e-05, "loss": 1.371875, "step": 334000 }, { "epoch": 63.36296576508417, "learning_rate": 1.8318517117457917e-05, "loss": 1.382125, "step": 335000 }, { "epoch": 63.55210894647248, "learning_rate": 1.8223945526763762e-05, "loss": 1.3925, "step": 336000 }, { "epoch": 63.74125212786079, "learning_rate": 1.8129373936069604e-05, "loss": 1.3989375, "step": 337000 }, { "epoch": 63.9303953092491, "learning_rate": 1.8034802345375452e-05, "loss": 1.4040625, "step": 338000 }, { "epoch": 64.11953849063741, "learning_rate": 1.7940230754681294e-05, "loss": 1.3838125, "step": 339000 }, { "epoch": 64.30868167202573, "learning_rate": 1.784565916398714e-05, "loss": 1.3745625, "step": 340000 }, { "epoch": 64.49782485341403, "learning_rate": 1.7751087573292984e-05, "loss": 1.381875, "step": 341000 }, { "epoch": 64.68696803480235, "learning_rate": 1.765651598259883e-05, "loss": 1.3859375, "step": 342000 }, { "epoch": 64.87611121619065, "learning_rate": 1.756194439190467e-05, "loss": 1.4001875, "step": 343000 }, { "epoch": 65.06525439757897, "learning_rate": 1.7467372801210516e-05, "loss": 1.387, "step": 344000 }, { "epoch": 65.25439757896727, "learning_rate": 1.737280121051636e-05, "loss": 1.3645625, "step": 345000 }, { "epoch": 65.44354076035559, "learning_rate": 1.7278229619822206e-05, "loss": 1.373375, "step": 346000 }, { "epoch": 65.6326839417439, "learning_rate": 1.7183658029128048e-05, "loss": 1.384, "step": 347000 }, { "epoch": 65.82182712313221, "learning_rate": 1.7089086438433897e-05, "loss": 1.386875, "step": 348000 }, { "epoch": 66.01097030452053, "learning_rate": 1.6994514847739738e-05, "loss": 1.3924375, "step": 349000 }, { "epoch": 66.20011348590883, "learning_rate": 1.6899943257045583e-05, "loss": 1.357375, "step": 350000 }, { "epoch": 66.38925666729715, "learning_rate": 1.680537166635143e-05, "loss": 1.3655625, "step": 351000 }, { "epoch": 66.57839984868545, "learning_rate": 1.6710800075657274e-05, "loss": 1.3670625, "step": 352000 }, { "epoch": 66.76754303007377, "learning_rate": 1.661622848496312e-05, "loss": 1.3799375, "step": 353000 }, { "epoch": 66.95668621146207, "learning_rate": 1.6521656894268964e-05, "loss": 1.3880625, "step": 354000 }, { "epoch": 67.14582939285039, "learning_rate": 1.642708530357481e-05, "loss": 1.3585, "step": 355000 }, { "epoch": 67.3349725742387, "learning_rate": 1.633251371288065e-05, "loss": 1.3579375, "step": 356000 }, { "epoch": 67.52411575562701, "learning_rate": 1.6237942122186496e-05, "loss": 1.3641875, "step": 357000 }, { "epoch": 67.71325893701533, "learning_rate": 1.614337053149234e-05, "loss": 1.3701875, "step": 358000 }, { "epoch": 67.90240211840363, "learning_rate": 1.6048798940798186e-05, "loss": 1.375875, "step": 359000 }, { "epoch": 68.09154529979195, "learning_rate": 1.5954227350104027e-05, "loss": 1.359, "step": 360000 }, { "epoch": 68.28068848118025, "learning_rate": 1.5859655759409876e-05, "loss": 1.3483125, "step": 361000 }, { "epoch": 68.46983166256857, "learning_rate": 1.5765084168715718e-05, "loss": 1.3565, "step": 362000 }, { "epoch": 68.65897484395687, "learning_rate": 1.5670512578021563e-05, "loss": 1.362125, "step": 363000 }, { "epoch": 68.84811802534519, "learning_rate": 1.5575940987327408e-05, "loss": 1.36725, "step": 364000 }, { "epoch": 69.0372612067335, "learning_rate": 1.5481369396633253e-05, "loss": 1.3636875, "step": 365000 }, { "epoch": 69.22640438812181, "learning_rate": 1.5386797805939095e-05, "loss": 1.33725, "step": 366000 }, { "epoch": 69.41554756951012, "learning_rate": 1.5292226215244943e-05, "loss": 1.3505, "step": 367000 }, { "epoch": 69.60469075089843, "learning_rate": 1.5197654624550786e-05, "loss": 1.3563125, "step": 368000 }, { "epoch": 69.79383393228674, "learning_rate": 1.510308303385663e-05, "loss": 1.3588125, "step": 369000 }, { "epoch": 69.98297711367505, "learning_rate": 1.5008511443162473e-05, "loss": 1.369125, "step": 370000 }, { "epoch": 70.17212029506337, "learning_rate": 1.491393985246832e-05, "loss": 1.3346875, "step": 371000 }, { "epoch": 70.36126347645167, "learning_rate": 1.4819368261774163e-05, "loss": 1.338, "step": 372000 }, { "epoch": 70.55040665783999, "learning_rate": 1.4724796671080007e-05, "loss": 1.3443125, "step": 373000 }, { "epoch": 70.73954983922829, "learning_rate": 1.4630225080385854e-05, "loss": 1.3544375, "step": 374000 }, { "epoch": 70.9286930206166, "learning_rate": 1.4535653489691697e-05, "loss": 1.3595625, "step": 375000 }, { "epoch": 71.11783620200492, "learning_rate": 1.444108189899754e-05, "loss": 1.343875, "step": 376000 }, { "epoch": 71.30697938339323, "learning_rate": 1.4346510308303387e-05, "loss": 1.332625, "step": 377000 }, { "epoch": 71.49612256478154, "learning_rate": 1.425193871760923e-05, "loss": 1.3395625, "step": 378000 }, { "epoch": 71.68526574616985, "learning_rate": 1.4157367126915074e-05, "loss": 1.344125, "step": 379000 }, { "epoch": 71.87440892755816, "learning_rate": 1.406279553622092e-05, "loss": 1.3505625, "step": 380000 }, { "epoch": 72.06355210894647, "learning_rate": 1.3968223945526764e-05, "loss": 1.3428125, "step": 381000 }, { "epoch": 72.25269529033478, "learning_rate": 1.387365235483261e-05, "loss": 1.324875, "step": 382000 }, { "epoch": 72.44183847172309, "learning_rate": 1.3779080764138453e-05, "loss": 1.332125, "step": 383000 }, { "epoch": 72.6309816531114, "learning_rate": 1.36845091734443e-05, "loss": 1.339, "step": 384000 }, { "epoch": 72.82012483449972, "learning_rate": 1.3589937582750143e-05, "loss": 1.3420625, "step": 385000 }, { "epoch": 73.00926801588803, "learning_rate": 1.3495365992055986e-05, "loss": 1.342875, "step": 386000 }, { "epoch": 73.19841119727634, "learning_rate": 1.3400794401361833e-05, "loss": 1.3198125, "step": 387000 }, { "epoch": 73.38755437866465, "learning_rate": 1.3306222810667676e-05, "loss": 1.3253125, "step": 388000 }, { "epoch": 73.57669756005296, "learning_rate": 1.321165121997352e-05, "loss": 1.328875, "step": 389000 }, { "epoch": 73.76584074144127, "learning_rate": 1.3117079629279367e-05, "loss": 1.335875, "step": 390000 }, { "epoch": 73.95498392282958, "learning_rate": 1.302250803858521e-05, "loss": 1.3428125, "step": 391000 }, { "epoch": 74.14412710421789, "learning_rate": 1.2927936447891053e-05, "loss": 1.3216875, "step": 392000 }, { "epoch": 74.3332702856062, "learning_rate": 1.2833364857196897e-05, "loss": 1.3190625, "step": 393000 }, { "epoch": 74.52241346699452, "learning_rate": 1.2738793266502744e-05, "loss": 1.3250625, "step": 394000 }, { "epoch": 74.71155664838282, "learning_rate": 1.2644221675808587e-05, "loss": 1.3285625, "step": 395000 }, { "epoch": 74.90069982977114, "learning_rate": 1.254965008511443e-05, "loss": 1.3365625, "step": 396000 }, { "epoch": 75.08984301115945, "learning_rate": 1.2455078494420275e-05, "loss": 1.3183125, "step": 397000 }, { "epoch": 75.27898619254776, "learning_rate": 1.236050690372612e-05, "loss": 1.309375, "step": 398000 }, { "epoch": 75.46812937393607, "learning_rate": 1.2265935313031966e-05, "loss": 1.31425, "step": 399000 }, { "epoch": 75.65727255532438, "learning_rate": 1.217136372233781e-05, "loss": 1.32575, "step": 400000 }, { "epoch": 75.84641573671269, "learning_rate": 1.2076792131643656e-05, "loss": 1.330375, "step": 401000 }, { "epoch": 76.035558918101, "learning_rate": 1.1982220540949501e-05, "loss": 1.3319375, "step": 402000 }, { "epoch": 76.22470209948932, "learning_rate": 1.1887648950255344e-05, "loss": 1.2995625, "step": 403000 }, { "epoch": 76.41384528087762, "learning_rate": 1.179307735956119e-05, "loss": 1.309, "step": 404000 }, { "epoch": 76.60298846226594, "learning_rate": 1.1698505768867033e-05, "loss": 1.3225, "step": 405000 } ], "max_steps": 528700, "num_train_epochs": 100, "total_flos": 619270336176979968, "trial_name": null, "trial_params": null }