gpt2-exomachina / trainer_state.json
pearsonkyle's picture
first
a6f4731
raw
history blame
49.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 63.36296576508417,
"global_step": 335000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.18914318138831096,
"learning_rate": 4.990542840930585e-05,
"loss": 2.2949150390625,
"step": 1000
},
{
"epoch": 0.3782863627766219,
"learning_rate": 4.981085681861169e-05,
"loss": 2.31815234375,
"step": 2000
},
{
"epoch": 0.5674295441649329,
"learning_rate": 4.9716285227917534e-05,
"loss": 2.32872216796875,
"step": 3000
},
{
"epoch": 0.7565727255532438,
"learning_rate": 4.962171363722338e-05,
"loss": 2.33826904296875,
"step": 4000
},
{
"epoch": 0.9457159069415547,
"learning_rate": 4.9527142046529224e-05,
"loss": 2.340685546875,
"step": 5000
},
{
"epoch": 1.1348590883298657,
"learning_rate": 4.943257045583507e-05,
"loss": 2.28626953125,
"step": 6000
},
{
"epoch": 1.3240022697181766,
"learning_rate": 4.9337998865140915e-05,
"loss": 2.27450390625,
"step": 7000
},
{
"epoch": 1.5131454511064875,
"learning_rate": 4.9243427274446756e-05,
"loss": 2.287498046875,
"step": 8000
},
{
"epoch": 1.7022886324947986,
"learning_rate": 4.9148855683752605e-05,
"loss": 2.30305078125,
"step": 9000
},
{
"epoch": 1.8914318138831097,
"learning_rate": 4.905428409305845e-05,
"loss": 2.30912890625,
"step": 10000
},
{
"epoch": 2.0805749952714203,
"learning_rate": 4.895971250236429e-05,
"loss": 2.272595703125,
"step": 11000
},
{
"epoch": 2.2697181766597314,
"learning_rate": 4.886514091167014e-05,
"loss": 2.238087890625,
"step": 12000
},
{
"epoch": 2.4588613580480425,
"learning_rate": 4.877056932097598e-05,
"loss": 2.253140625,
"step": 13000
},
{
"epoch": 2.648004539436353,
"learning_rate": 4.867599773028183e-05,
"loss": 2.26483203125,
"step": 14000
},
{
"epoch": 2.8371477208246643,
"learning_rate": 4.858142613958767e-05,
"loss": 2.2735078125,
"step": 15000
},
{
"epoch": 3.0262909022129754,
"learning_rate": 4.848685454889351e-05,
"loss": 2.27125,
"step": 16000
},
{
"epoch": 3.215434083601286,
"learning_rate": 4.839228295819936e-05,
"loss": 2.20089453125,
"step": 17000
},
{
"epoch": 3.404577264989597,
"learning_rate": 4.829771136750521e-05,
"loss": 2.2166953125,
"step": 18000
},
{
"epoch": 3.593720446377908,
"learning_rate": 4.820313977681105e-05,
"loss": 2.2287421875,
"step": 19000
},
{
"epoch": 3.782863627766219,
"learning_rate": 4.810856818611689e-05,
"loss": 2.242296875,
"step": 20000
},
{
"epoch": 3.97200680915453,
"learning_rate": 4.801399659542274e-05,
"loss": 2.25162109375,
"step": 21000
},
{
"epoch": 4.161149990542841,
"learning_rate": 4.791942500472858e-05,
"loss": 2.17917578125,
"step": 22000
},
{
"epoch": 4.350293171931152,
"learning_rate": 4.782485341403443e-05,
"loss": 2.1837734375,
"step": 23000
},
{
"epoch": 4.539436353319463,
"learning_rate": 4.773028182334027e-05,
"loss": 2.1981953125,
"step": 24000
},
{
"epoch": 4.728579534707774,
"learning_rate": 4.763571023264611e-05,
"loss": 2.2091328125,
"step": 25000
},
{
"epoch": 4.917722716096085,
"learning_rate": 4.754113864195196e-05,
"loss": 2.22294140625,
"step": 26000
},
{
"epoch": 5.106865897484396,
"learning_rate": 4.744656705125781e-05,
"loss": 2.17415625,
"step": 27000
},
{
"epoch": 5.296009078872706,
"learning_rate": 4.7351995460563645e-05,
"loss": 2.1536875,
"step": 28000
},
{
"epoch": 5.485152260261017,
"learning_rate": 4.725742386986949e-05,
"loss": 2.17046484375,
"step": 29000
},
{
"epoch": 5.6742954416493285,
"learning_rate": 4.716285227917534e-05,
"loss": 2.17616796875,
"step": 30000
},
{
"epoch": 5.86343862303764,
"learning_rate": 4.706828068848118e-05,
"loss": 2.18503125,
"step": 31000
},
{
"epoch": 6.052581804425951,
"learning_rate": 4.6973709097787025e-05,
"loss": 2.172359375,
"step": 32000
},
{
"epoch": 6.241724985814262,
"learning_rate": 4.687913750709287e-05,
"loss": 2.118359375,
"step": 33000
},
{
"epoch": 6.430868167202572,
"learning_rate": 4.6784565916398715e-05,
"loss": 2.133546875,
"step": 34000
},
{
"epoch": 6.620011348590883,
"learning_rate": 4.6689994325704564e-05,
"loss": 2.1423671875,
"step": 35000
},
{
"epoch": 6.809154529979194,
"learning_rate": 4.6595422735010405e-05,
"loss": 2.1594921875,
"step": 36000
},
{
"epoch": 6.998297711367505,
"learning_rate": 4.650085114431625e-05,
"loss": 2.17365625,
"step": 37000
},
{
"epoch": 7.187440892755816,
"learning_rate": 4.6406279553622095e-05,
"loss": 2.0884140625,
"step": 38000
},
{
"epoch": 7.376584074144127,
"learning_rate": 4.631170796292794e-05,
"loss": 2.1023359375,
"step": 39000
},
{
"epoch": 7.565727255532438,
"learning_rate": 4.6217136372233786e-05,
"loss": 2.1240625,
"step": 40000
},
{
"epoch": 7.754870436920749,
"learning_rate": 4.612256478153963e-05,
"loss": 2.126953125,
"step": 41000
},
{
"epoch": 7.94401361830906,
"learning_rate": 4.602799319084547e-05,
"loss": 2.1407265625,
"step": 42000
},
{
"epoch": 8.133156799697371,
"learning_rate": 4.593342160015132e-05,
"loss": 2.080640625,
"step": 43000
},
{
"epoch": 8.322299981085681,
"learning_rate": 4.5838850009457166e-05,
"loss": 2.07196875,
"step": 44000
},
{
"epoch": 8.511443162473993,
"learning_rate": 4.5744278418763e-05,
"loss": 2.0906640625,
"step": 45000
},
{
"epoch": 8.700586343862303,
"learning_rate": 4.564970682806885e-05,
"loss": 2.10053125,
"step": 46000
},
{
"epoch": 8.889729525250615,
"learning_rate": 4.55551352373747e-05,
"loss": 2.112328125,
"step": 47000
},
{
"epoch": 9.078872706638926,
"learning_rate": 4.546056364668054e-05,
"loss": 2.08459375,
"step": 48000
},
{
"epoch": 9.268015888027236,
"learning_rate": 4.536599205598638e-05,
"loss": 2.039546875,
"step": 49000
},
{
"epoch": 9.457159069415548,
"learning_rate": 4.527142046529223e-05,
"loss": 2.06175,
"step": 50000
},
{
"epoch": 9.646302250803858,
"learning_rate": 4.517684887459807e-05,
"loss": 2.076625,
"step": 51000
},
{
"epoch": 9.83544543219217,
"learning_rate": 4.508227728390392e-05,
"loss": 2.088609375,
"step": 52000
},
{
"epoch": 10.02458861358048,
"learning_rate": 4.498770569320976e-05,
"loss": 2.07996875,
"step": 53000
},
{
"epoch": 10.213731794968792,
"learning_rate": 4.48931341025156e-05,
"loss": 2.0183828125,
"step": 54000
},
{
"epoch": 10.402874976357102,
"learning_rate": 4.479856251182145e-05,
"loss": 2.0310546875,
"step": 55000
},
{
"epoch": 10.592018157745413,
"learning_rate": 4.47039909211273e-05,
"loss": 2.0436796875,
"step": 56000
},
{
"epoch": 10.781161339133725,
"learning_rate": 4.460941933043314e-05,
"loss": 2.0622109375,
"step": 57000
},
{
"epoch": 10.970304520522035,
"learning_rate": 4.4514847739738984e-05,
"loss": 2.06621875,
"step": 58000
},
{
"epoch": 11.159447701910347,
"learning_rate": 4.442027614904483e-05,
"loss": 2.0049921875,
"step": 59000
},
{
"epoch": 11.348590883298657,
"learning_rate": 4.4325704558350674e-05,
"loss": 2.00809375,
"step": 60000
},
{
"epoch": 11.537734064686967,
"learning_rate": 4.423113296765652e-05,
"loss": 2.017078125,
"step": 61000
},
{
"epoch": 11.72687724607528,
"learning_rate": 4.4136561376962364e-05,
"loss": 2.03634375,
"step": 62000
},
{
"epoch": 11.91602042746359,
"learning_rate": 4.4041989786268206e-05,
"loss": 2.04240625,
"step": 63000
},
{
"epoch": 12.105163608851901,
"learning_rate": 4.3947418195574054e-05,
"loss": 1.99821875,
"step": 64000
},
{
"epoch": 12.294306790240212,
"learning_rate": 4.3852846604879896e-05,
"loss": 1.9760625,
"step": 65000
},
{
"epoch": 12.483449971628524,
"learning_rate": 4.375827501418574e-05,
"loss": 1.995484375,
"step": 66000
},
{
"epoch": 12.672593153016834,
"learning_rate": 4.3663703423491586e-05,
"loss": 2.00321875,
"step": 67000
},
{
"epoch": 12.861736334405144,
"learning_rate": 4.356913183279743e-05,
"loss": 2.021078125,
"step": 68000
},
{
"epoch": 13.050879515793456,
"learning_rate": 4.3474560242103276e-05,
"loss": 2.004171875,
"step": 69000
},
{
"epoch": 13.240022697181766,
"learning_rate": 4.337998865140912e-05,
"loss": 1.9488125,
"step": 70000
},
{
"epoch": 13.429165878570078,
"learning_rate": 4.328541706071496e-05,
"loss": 1.967921875,
"step": 71000
},
{
"epoch": 13.618309059958388,
"learning_rate": 4.319084547002081e-05,
"loss": 1.984046875,
"step": 72000
},
{
"epoch": 13.807452241346699,
"learning_rate": 4.3096273879326657e-05,
"loss": 1.998828125,
"step": 73000
},
{
"epoch": 13.99659542273501,
"learning_rate": 4.30017022886325e-05,
"loss": 2.0024375,
"step": 74000
},
{
"epoch": 14.18573860412332,
"learning_rate": 4.290713069793834e-05,
"loss": 1.927890625,
"step": 75000
},
{
"epoch": 14.374881785511633,
"learning_rate": 4.281255910724419e-05,
"loss": 1.94315625,
"step": 76000
},
{
"epoch": 14.564024966899943,
"learning_rate": 4.271798751655003e-05,
"loss": 1.95571875,
"step": 77000
},
{
"epoch": 14.753168148288253,
"learning_rate": 4.262341592585587e-05,
"loss": 1.96771875,
"step": 78000
},
{
"epoch": 14.942311329676565,
"learning_rate": 4.252884433516172e-05,
"loss": 1.981109375,
"step": 79000
},
{
"epoch": 15.131454511064875,
"learning_rate": 4.243427274446756e-05,
"loss": 1.929875,
"step": 80000
},
{
"epoch": 15.320597692453187,
"learning_rate": 4.233970115377341e-05,
"loss": 1.91628125,
"step": 81000
},
{
"epoch": 15.509740873841498,
"learning_rate": 4.224512956307925e-05,
"loss": 1.938640625,
"step": 82000
},
{
"epoch": 15.69888405522981,
"learning_rate": 4.2150557972385094e-05,
"loss": 1.9478125,
"step": 83000
},
{
"epoch": 15.88802723661812,
"learning_rate": 4.205598638169094e-05,
"loss": 1.95146875,
"step": 84000
},
{
"epoch": 16.077170418006432,
"learning_rate": 4.196141479099679e-05,
"loss": 1.928953125,
"step": 85000
},
{
"epoch": 16.266313599394742,
"learning_rate": 4.186684320030263e-05,
"loss": 1.895140625,
"step": 86000
},
{
"epoch": 16.455456780783052,
"learning_rate": 4.1772271609608474e-05,
"loss": 1.909171875,
"step": 87000
},
{
"epoch": 16.644599962171363,
"learning_rate": 4.1677700018914316e-05,
"loss": 1.92403125,
"step": 88000
},
{
"epoch": 16.833743143559676,
"learning_rate": 4.1583128428220164e-05,
"loss": 1.935015625,
"step": 89000
},
{
"epoch": 17.022886324947986,
"learning_rate": 4.148855683752601e-05,
"loss": 1.92928125,
"step": 90000
},
{
"epoch": 17.212029506336297,
"learning_rate": 4.139398524683185e-05,
"loss": 1.86678125,
"step": 91000
},
{
"epoch": 17.401172687724607,
"learning_rate": 4.1299413656137696e-05,
"loss": 1.8859375,
"step": 92000
},
{
"epoch": 17.590315869112917,
"learning_rate": 4.1204842065443545e-05,
"loss": 1.90540625,
"step": 93000
},
{
"epoch": 17.77945905050123,
"learning_rate": 4.1110270474749386e-05,
"loss": 1.911765625,
"step": 94000
},
{
"epoch": 17.96860223188954,
"learning_rate": 4.101569888405523e-05,
"loss": 1.924671875,
"step": 95000
},
{
"epoch": 18.15774541327785,
"learning_rate": 4.092112729336108e-05,
"loss": 1.85646875,
"step": 96000
},
{
"epoch": 18.34688859466616,
"learning_rate": 4.082655570266692e-05,
"loss": 1.862640625,
"step": 97000
},
{
"epoch": 18.53603177605447,
"learning_rate": 4.073198411197277e-05,
"loss": 1.880921875,
"step": 98000
},
{
"epoch": 18.725174957442785,
"learning_rate": 4.063741252127861e-05,
"loss": 1.8903125,
"step": 99000
},
{
"epoch": 18.914318138831096,
"learning_rate": 4.054284093058445e-05,
"loss": 1.904015625,
"step": 100000
},
{
"epoch": 19.103461320219406,
"learning_rate": 4.04482693398903e-05,
"loss": 1.8575625,
"step": 101000
},
{
"epoch": 19.292604501607716,
"learning_rate": 4.035369774919615e-05,
"loss": 1.837125,
"step": 102000
},
{
"epoch": 19.481747682996026,
"learning_rate": 4.025912615850199e-05,
"loss": 1.859359375,
"step": 103000
},
{
"epoch": 19.67089086438434,
"learning_rate": 4.016455456780783e-05,
"loss": 1.867921875,
"step": 104000
},
{
"epoch": 19.86003404577265,
"learning_rate": 4.006998297711368e-05,
"loss": 1.88125,
"step": 105000
},
{
"epoch": 20.04917722716096,
"learning_rate": 3.997541138641952e-05,
"loss": 1.866546875,
"step": 106000
},
{
"epoch": 20.23832040854927,
"learning_rate": 3.988083979572537e-05,
"loss": 1.811046875,
"step": 107000
},
{
"epoch": 20.427463589937584,
"learning_rate": 3.978626820503121e-05,
"loss": 1.8318125,
"step": 108000
},
{
"epoch": 20.616606771325895,
"learning_rate": 3.969169661433705e-05,
"loss": 1.83990625,
"step": 109000
},
{
"epoch": 20.805749952714205,
"learning_rate": 3.95971250236429e-05,
"loss": 1.91721875,
"step": 110000
},
{
"epoch": 20.994893134102515,
"learning_rate": 3.950255343294875e-05,
"loss": 1.923984375,
"step": 111000
},
{
"epoch": 21.184036315490825,
"learning_rate": 3.9407981842254585e-05,
"loss": 1.846875,
"step": 112000
},
{
"epoch": 21.37317949687914,
"learning_rate": 3.931341025156043e-05,
"loss": 1.860984375,
"step": 113000
},
{
"epoch": 21.56232267826745,
"learning_rate": 3.9218838660866275e-05,
"loss": 1.8764375,
"step": 114000
},
{
"epoch": 21.75146585965576,
"learning_rate": 3.912426707017212e-05,
"loss": 1.88171875,
"step": 115000
},
{
"epoch": 21.94060904104407,
"learning_rate": 3.9029695479477965e-05,
"loss": 1.89409375,
"step": 116000
},
{
"epoch": 22.12975222243238,
"learning_rate": 3.8935123888783807e-05,
"loss": 1.847390625,
"step": 117000
},
{
"epoch": 22.318895403820694,
"learning_rate": 3.8840552298089655e-05,
"loss": 1.83696875,
"step": 118000
},
{
"epoch": 22.508038585209004,
"learning_rate": 3.8745980707395504e-05,
"loss": 1.852484375,
"step": 119000
},
{
"epoch": 22.697181766597314,
"learning_rate": 3.8651409116701345e-05,
"loss": 1.86578125,
"step": 120000
},
{
"epoch": 22.886324947985624,
"learning_rate": 3.855683752600719e-05,
"loss": 1.877734375,
"step": 121000
},
{
"epoch": 23.075468129373935,
"learning_rate": 3.8462265935313035e-05,
"loss": 1.8471875,
"step": 122000
},
{
"epoch": 23.26461131076225,
"learning_rate": 3.836769434461888e-05,
"loss": 1.816078125,
"step": 123000
},
{
"epoch": 23.45375449215056,
"learning_rate": 3.8273122753924726e-05,
"loss": 1.833703125,
"step": 124000
},
{
"epoch": 23.64289767353887,
"learning_rate": 3.817855116323057e-05,
"loss": 1.840984375,
"step": 125000
},
{
"epoch": 23.83204085492718,
"learning_rate": 3.808397957253641e-05,
"loss": 1.8568125,
"step": 126000
},
{
"epoch": 24.02118403631549,
"learning_rate": 3.798940798184226e-05,
"loss": 1.855609375,
"step": 127000
},
{
"epoch": 24.210327217703803,
"learning_rate": 3.7894836391148106e-05,
"loss": 1.7931875,
"step": 128000
},
{
"epoch": 24.399470399092113,
"learning_rate": 3.780026480045394e-05,
"loss": 1.805828125,
"step": 129000
},
{
"epoch": 24.588613580480423,
"learning_rate": 3.770569320975979e-05,
"loss": 1.825140625,
"step": 130000
},
{
"epoch": 24.777756761868734,
"learning_rate": 3.761112161906564e-05,
"loss": 1.83425,
"step": 131000
},
{
"epoch": 24.966899943257047,
"learning_rate": 3.751655002837148e-05,
"loss": 1.8449375,
"step": 132000
},
{
"epoch": 25.156043124645358,
"learning_rate": 3.742197843767732e-05,
"loss": 1.78215625,
"step": 133000
},
{
"epoch": 25.345186306033668,
"learning_rate": 3.732740684698317e-05,
"loss": 1.79159375,
"step": 134000
},
{
"epoch": 25.534329487421978,
"learning_rate": 3.723283525628901e-05,
"loss": 1.80753125,
"step": 135000
},
{
"epoch": 25.723472668810288,
"learning_rate": 3.713826366559486e-05,
"loss": 1.81053125,
"step": 136000
},
{
"epoch": 25.912615850198602,
"learning_rate": 3.70436920749007e-05,
"loss": 1.8265,
"step": 137000
},
{
"epoch": 26.101759031586912,
"learning_rate": 3.694912048420654e-05,
"loss": 1.79653125,
"step": 138000
},
{
"epoch": 26.290902212975222,
"learning_rate": 3.685454889351239e-05,
"loss": 1.76934375,
"step": 139000
},
{
"epoch": 26.480045394363533,
"learning_rate": 3.6759977302818233e-05,
"loss": 1.78228125,
"step": 140000
},
{
"epoch": 26.669188575751843,
"learning_rate": 3.666540571212408e-05,
"loss": 1.79384375,
"step": 141000
},
{
"epoch": 26.858331757140157,
"learning_rate": 3.6570834121429924e-05,
"loss": 1.8095625,
"step": 142000
},
{
"epoch": 27.047474938528467,
"learning_rate": 3.6476262530735765e-05,
"loss": 1.7923125,
"step": 143000
},
{
"epoch": 27.236618119916777,
"learning_rate": 3.6381690940041614e-05,
"loss": 1.7471875,
"step": 144000
},
{
"epoch": 27.425761301305087,
"learning_rate": 3.628711934934746e-05,
"loss": 1.76446875,
"step": 145000
},
{
"epoch": 27.614904482693397,
"learning_rate": 3.61925477586533e-05,
"loss": 1.77759375,
"step": 146000
},
{
"epoch": 27.80404766408171,
"learning_rate": 3.6097976167959146e-05,
"loss": 1.78840625,
"step": 147000
},
{
"epoch": 27.99319084547002,
"learning_rate": 3.6003404577264994e-05,
"loss": 1.799125,
"step": 148000
},
{
"epoch": 28.18233402685833,
"learning_rate": 3.5908832986570836e-05,
"loss": 1.73171875,
"step": 149000
},
{
"epoch": 28.37147720824664,
"learning_rate": 3.581426139587668e-05,
"loss": 1.73996875,
"step": 150000
},
{
"epoch": 28.560620389634952,
"learning_rate": 3.5719689805182526e-05,
"loss": 1.75671875,
"step": 151000
},
{
"epoch": 28.749763571023266,
"learning_rate": 3.562511821448837e-05,
"loss": 1.7723125,
"step": 152000
},
{
"epoch": 28.938906752411576,
"learning_rate": 3.5530546623794216e-05,
"loss": 1.77990625,
"step": 153000
},
{
"epoch": 29.128049933799886,
"learning_rate": 3.543597503310006e-05,
"loss": 1.73996875,
"step": 154000
},
{
"epoch": 29.317193115188196,
"learning_rate": 3.53414034424059e-05,
"loss": 1.726625,
"step": 155000
},
{
"epoch": 29.50633629657651,
"learning_rate": 3.524683185171175e-05,
"loss": 1.74075,
"step": 156000
},
{
"epoch": 29.69547947796482,
"learning_rate": 3.5152260261017597e-05,
"loss": 1.753,
"step": 157000
},
{
"epoch": 29.88462265935313,
"learning_rate": 3.505768867032344e-05,
"loss": 1.7575,
"step": 158000
},
{
"epoch": 30.07376584074144,
"learning_rate": 3.496311707962928e-05,
"loss": 1.73771875,
"step": 159000
},
{
"epoch": 30.26290902212975,
"learning_rate": 3.486854548893513e-05,
"loss": 1.70875,
"step": 160000
},
{
"epoch": 30.452052203518065,
"learning_rate": 3.477397389824097e-05,
"loss": 1.72415625,
"step": 161000
},
{
"epoch": 30.641195384906375,
"learning_rate": 3.467940230754682e-05,
"loss": 1.7310625,
"step": 162000
},
{
"epoch": 30.830338566294685,
"learning_rate": 3.4584830716852654e-05,
"loss": 1.74175,
"step": 163000
},
{
"epoch": 31.019481747682995,
"learning_rate": 3.44902591261585e-05,
"loss": 1.74609375,
"step": 164000
},
{
"epoch": 31.208624929071306,
"learning_rate": 3.439568753546435e-05,
"loss": 1.684875,
"step": 165000
},
{
"epoch": 31.39776811045962,
"learning_rate": 3.430111594477019e-05,
"loss": 1.69925,
"step": 166000
},
{
"epoch": 31.58691129184793,
"learning_rate": 3.4206544354076034e-05,
"loss": 1.7169375,
"step": 167000
},
{
"epoch": 31.77605447323624,
"learning_rate": 3.411197276338188e-05,
"loss": 1.7275,
"step": 168000
},
{
"epoch": 31.96519765462455,
"learning_rate": 3.4017401172687724e-05,
"loss": 1.74096875,
"step": 169000
},
{
"epoch": 32.154340836012864,
"learning_rate": 3.392282958199357e-05,
"loss": 1.68128125,
"step": 170000
},
{
"epoch": 32.343484017401174,
"learning_rate": 3.3828257991299414e-05,
"loss": 1.6851875,
"step": 171000
},
{
"epoch": 32.532627198789484,
"learning_rate": 3.3733686400605256e-05,
"loss": 1.69896875,
"step": 172000
},
{
"epoch": 32.721770380177794,
"learning_rate": 3.3639114809911104e-05,
"loss": 1.7080625,
"step": 173000
},
{
"epoch": 32.910913561566105,
"learning_rate": 3.354454321921695e-05,
"loss": 1.7200625,
"step": 174000
},
{
"epoch": 33.100056742954415,
"learning_rate": 3.3449971628522795e-05,
"loss": 1.69040625,
"step": 175000
},
{
"epoch": 33.289199924342725,
"learning_rate": 3.3355400037828636e-05,
"loss": 1.666625,
"step": 176000
},
{
"epoch": 33.478343105731035,
"learning_rate": 3.3260828447134485e-05,
"loss": 1.6798125,
"step": 177000
},
{
"epoch": 33.66748628711935,
"learning_rate": 3.3166256856440326e-05,
"loss": 1.69575,
"step": 178000
},
{
"epoch": 33.85662946850766,
"learning_rate": 3.3071685265746175e-05,
"loss": 1.7008125,
"step": 179000
},
{
"epoch": 34.04577264989597,
"learning_rate": 3.297711367505202e-05,
"loss": 1.69340625,
"step": 180000
},
{
"epoch": 34.23491583128428,
"learning_rate": 3.288254208435786e-05,
"loss": 1.6509375,
"step": 181000
},
{
"epoch": 34.42405901267259,
"learning_rate": 3.278797049366371e-05,
"loss": 1.66703125,
"step": 182000
},
{
"epoch": 34.613202194060904,
"learning_rate": 3.2693398902969555e-05,
"loss": 1.67615625,
"step": 183000
},
{
"epoch": 34.802345375449214,
"learning_rate": 3.259882731227539e-05,
"loss": 1.6876875,
"step": 184000
},
{
"epoch": 34.991488556837524,
"learning_rate": 3.250425572158124e-05,
"loss": 1.695,
"step": 185000
},
{
"epoch": 35.180631738225834,
"learning_rate": 3.240968413088709e-05,
"loss": 1.6354375,
"step": 186000
},
{
"epoch": 35.369774919614144,
"learning_rate": 3.231511254019293e-05,
"loss": 1.64653125,
"step": 187000
},
{
"epoch": 35.55891810100246,
"learning_rate": 3.222054094949877e-05,
"loss": 1.66346875,
"step": 188000
},
{
"epoch": 35.74806128239077,
"learning_rate": 3.212596935880461e-05,
"loss": 1.66990625,
"step": 189000
},
{
"epoch": 35.93720446377908,
"learning_rate": 3.203139776811046e-05,
"loss": 1.67878125,
"step": 190000
},
{
"epoch": 36.12634764516739,
"learning_rate": 3.193682617741631e-05,
"loss": 1.63953125,
"step": 191000
},
{
"epoch": 36.3154908265557,
"learning_rate": 3.184225458672215e-05,
"loss": 1.62646875,
"step": 192000
},
{
"epoch": 36.50463400794401,
"learning_rate": 3.174768299602799e-05,
"loss": 1.6480625,
"step": 193000
},
{
"epoch": 36.69377718933232,
"learning_rate": 3.165311140533384e-05,
"loss": 1.65371875,
"step": 194000
},
{
"epoch": 36.88292037072063,
"learning_rate": 3.155853981463968e-05,
"loss": 1.66778125,
"step": 195000
},
{
"epoch": 37.07206355210894,
"learning_rate": 3.146396822394553e-05,
"loss": 1.64275,
"step": 196000
},
{
"epoch": 37.26120673349726,
"learning_rate": 3.136939663325137e-05,
"loss": 1.61565625,
"step": 197000
},
{
"epoch": 37.45034991488557,
"learning_rate": 3.1274825042557215e-05,
"loss": 1.631875,
"step": 198000
},
{
"epoch": 37.63949309627388,
"learning_rate": 3.118025345186306e-05,
"loss": 1.64078125,
"step": 199000
},
{
"epoch": 37.82863627766219,
"learning_rate": 3.108568186116891e-05,
"loss": 1.6473125,
"step": 200000
},
{
"epoch": 38.0177794590505,
"learning_rate": 3.0991110270474747e-05,
"loss": 1.65090625,
"step": 201000
},
{
"epoch": 38.20692264043881,
"learning_rate": 3.0896538679780595e-05,
"loss": 1.59846875,
"step": 202000
},
{
"epoch": 38.39606582182712,
"learning_rate": 3.0801967089086443e-05,
"loss": 1.6114375,
"step": 203000
},
{
"epoch": 38.58520900321543,
"learning_rate": 3.0707395498392285e-05,
"loss": 1.6266875,
"step": 204000
},
{
"epoch": 38.77435218460374,
"learning_rate": 3.061282390769813e-05,
"loss": 1.6340625,
"step": 205000
},
{
"epoch": 38.96349536599205,
"learning_rate": 3.0518252317003975e-05,
"loss": 1.64478125,
"step": 206000
},
{
"epoch": 39.15263854738037,
"learning_rate": 3.0423680726309817e-05,
"loss": 1.59403125,
"step": 207000
},
{
"epoch": 39.34178172876868,
"learning_rate": 3.0329109135615662e-05,
"loss": 1.5961875,
"step": 208000
},
{
"epoch": 39.53092491015699,
"learning_rate": 3.023453754492151e-05,
"loss": 1.612375,
"step": 209000
},
{
"epoch": 39.7200680915453,
"learning_rate": 3.013996595422735e-05,
"loss": 1.62103125,
"step": 210000
},
{
"epoch": 39.90921127293361,
"learning_rate": 3.0045394363533197e-05,
"loss": 1.62684375,
"step": 211000
},
{
"epoch": 40.09835445432192,
"learning_rate": 2.995082277283904e-05,
"loss": 1.60059375,
"step": 212000
},
{
"epoch": 40.28749763571023,
"learning_rate": 2.9856251182144884e-05,
"loss": 1.58271875,
"step": 213000
},
{
"epoch": 40.47664081709854,
"learning_rate": 2.976167959145073e-05,
"loss": 1.59703125,
"step": 214000
},
{
"epoch": 40.66578399848685,
"learning_rate": 2.966710800075657e-05,
"loss": 1.604875,
"step": 215000
},
{
"epoch": 40.85492717987517,
"learning_rate": 2.957253641006242e-05,
"loss": 1.6115625,
"step": 216000
},
{
"epoch": 41.04407036126348,
"learning_rate": 2.9477964819368265e-05,
"loss": 1.60434375,
"step": 217000
},
{
"epoch": 41.23321354265179,
"learning_rate": 2.9383393228674106e-05,
"loss": 1.566625,
"step": 218000
},
{
"epoch": 41.4223567240401,
"learning_rate": 2.928882163797995e-05,
"loss": 1.58084375,
"step": 219000
},
{
"epoch": 41.61149990542841,
"learning_rate": 2.91942500472858e-05,
"loss": 1.590625,
"step": 220000
},
{
"epoch": 41.80064308681672,
"learning_rate": 2.9099678456591638e-05,
"loss": 1.60096875,
"step": 221000
},
{
"epoch": 41.98978626820503,
"learning_rate": 2.9005106865897487e-05,
"loss": 1.6100625,
"step": 222000
},
{
"epoch": 42.17892944959334,
"learning_rate": 2.8910535275203332e-05,
"loss": 1.552625,
"step": 223000
},
{
"epoch": 42.36807263098165,
"learning_rate": 2.8815963684509173e-05,
"loss": 1.56346875,
"step": 224000
},
{
"epoch": 42.55721581236996,
"learning_rate": 2.872139209381502e-05,
"loss": 1.57903125,
"step": 225000
},
{
"epoch": 42.74635899375828,
"learning_rate": 2.8626820503120867e-05,
"loss": 1.5851875,
"step": 226000
},
{
"epoch": 42.93550217514659,
"learning_rate": 2.8532248912426705e-05,
"loss": 1.59359375,
"step": 227000
},
{
"epoch": 43.1246453565349,
"learning_rate": 2.8437677321732554e-05,
"loss": 1.55915625,
"step": 228000
},
{
"epoch": 43.31378853792321,
"learning_rate": 2.83431057310384e-05,
"loss": 1.5494375,
"step": 229000
},
{
"epoch": 43.50293171931152,
"learning_rate": 2.824853414034424e-05,
"loss": 1.55978125,
"step": 230000
},
{
"epoch": 43.69207490069983,
"learning_rate": 2.8153962549650086e-05,
"loss": 1.57346875,
"step": 231000
},
{
"epoch": 43.88121808208814,
"learning_rate": 2.8059390958955934e-05,
"loss": 1.58403125,
"step": 232000
},
{
"epoch": 44.07036126347645,
"learning_rate": 2.7964819368261776e-05,
"loss": 1.568125,
"step": 233000
},
{
"epoch": 44.25950444486476,
"learning_rate": 2.787024777756762e-05,
"loss": 1.535125,
"step": 234000
},
{
"epoch": 44.44864762625307,
"learning_rate": 2.7775676186873466e-05,
"loss": 1.5455,
"step": 235000
},
{
"epoch": 44.63779080764139,
"learning_rate": 2.7681104596179308e-05,
"loss": 1.55828125,
"step": 236000
},
{
"epoch": 44.8269339890297,
"learning_rate": 2.7586533005485156e-05,
"loss": 1.56821875,
"step": 237000
},
{
"epoch": 45.01607717041801,
"learning_rate": 2.7491961414790994e-05,
"loss": 1.573875,
"step": 238000
},
{
"epoch": 45.20522035180632,
"learning_rate": 2.7397389824096843e-05,
"loss": 1.52146875,
"step": 239000
},
{
"epoch": 45.39436353319463,
"learning_rate": 2.7302818233402688e-05,
"loss": 1.53628125,
"step": 240000
},
{
"epoch": 45.58350671458294,
"learning_rate": 2.720824664270853e-05,
"loss": 1.549875,
"step": 241000
},
{
"epoch": 45.77264989597125,
"learning_rate": 2.7113675052014375e-05,
"loss": 1.55240625,
"step": 242000
},
{
"epoch": 45.96179307735956,
"learning_rate": 2.7019103461320223e-05,
"loss": 1.56171875,
"step": 243000
},
{
"epoch": 46.15093625874787,
"learning_rate": 2.692453187062606e-05,
"loss": 1.52228125,
"step": 244000
},
{
"epoch": 46.340079440136186,
"learning_rate": 2.682996027993191e-05,
"loss": 1.5250625,
"step": 245000
},
{
"epoch": 46.5292226215245,
"learning_rate": 2.6735388689237755e-05,
"loss": 1.5320625,
"step": 246000
},
{
"epoch": 46.71836580291281,
"learning_rate": 2.6640817098543597e-05,
"loss": 1.5440625,
"step": 247000
},
{
"epoch": 46.90750898430112,
"learning_rate": 2.6546245507849442e-05,
"loss": 1.5495,
"step": 248000
},
{
"epoch": 47.09665216568943,
"learning_rate": 2.645167391715529e-05,
"loss": 1.5265625,
"step": 249000
},
{
"epoch": 47.28579534707774,
"learning_rate": 2.6357102326461132e-05,
"loss": 1.50359375,
"step": 250000
},
{
"epoch": 47.47493852846605,
"learning_rate": 2.6262530735766977e-05,
"loss": 1.5239375,
"step": 251000
},
{
"epoch": 47.66408170985436,
"learning_rate": 2.6167959145072822e-05,
"loss": 1.5295625,
"step": 252000
},
{
"epoch": 47.85322489124267,
"learning_rate": 2.6073387554378664e-05,
"loss": 1.53828125,
"step": 253000
},
{
"epoch": 48.04236807263098,
"learning_rate": 2.597881596368451e-05,
"loss": 1.53,
"step": 254000
},
{
"epoch": 48.231511254019296,
"learning_rate": 2.5884244372990358e-05,
"loss": 1.496,
"step": 255000
},
{
"epoch": 48.420654435407606,
"learning_rate": 2.57896727822962e-05,
"loss": 1.503375,
"step": 256000
},
{
"epoch": 48.609797616795916,
"learning_rate": 2.5695101191602044e-05,
"loss": 1.521125,
"step": 257000
},
{
"epoch": 48.798940798184226,
"learning_rate": 2.560052960090789e-05,
"loss": 1.5279375,
"step": 258000
},
{
"epoch": 48.98808397957254,
"learning_rate": 2.550595801021373e-05,
"loss": 1.5339375,
"step": 259000
},
{
"epoch": 49.17722716096085,
"learning_rate": 2.541138641951958e-05,
"loss": 1.4859375,
"step": 260000
},
{
"epoch": 49.36637034234916,
"learning_rate": 2.5316814828825425e-05,
"loss": 1.4956875,
"step": 261000
},
{
"epoch": 49.55551352373747,
"learning_rate": 2.5222243238131266e-05,
"loss": 1.50375,
"step": 262000
},
{
"epoch": 49.74465670512578,
"learning_rate": 2.512767164743711e-05,
"loss": 1.51525,
"step": 263000
},
{
"epoch": 49.933799886514095,
"learning_rate": 2.5033100056742953e-05,
"loss": 1.52084375,
"step": 264000
},
{
"epoch": 50.122943067902405,
"learning_rate": 2.4938528466048798e-05,
"loss": 1.49025,
"step": 265000
},
{
"epoch": 50.312086249290715,
"learning_rate": 2.4843956875354647e-05,
"loss": 1.48290625,
"step": 266000
},
{
"epoch": 50.501229430679025,
"learning_rate": 2.474938528466049e-05,
"loss": 1.49121875,
"step": 267000
},
{
"epoch": 50.690372612067335,
"learning_rate": 2.4654813693966334e-05,
"loss": 1.502875,
"step": 268000
},
{
"epoch": 50.879515793455646,
"learning_rate": 2.4560242103272175e-05,
"loss": 1.51284375,
"step": 269000
},
{
"epoch": 51.068658974843956,
"learning_rate": 2.4465670512578024e-05,
"loss": 1.4948125,
"step": 270000
},
{
"epoch": 51.257802156232266,
"learning_rate": 2.4371098921883865e-05,
"loss": 1.4699375,
"step": 271000
},
{
"epoch": 51.446945337620576,
"learning_rate": 2.427652733118971e-05,
"loss": 1.48165625,
"step": 272000
},
{
"epoch": 51.63608851900889,
"learning_rate": 2.4181955740495556e-05,
"loss": 1.491875,
"step": 273000
},
{
"epoch": 51.825231700397204,
"learning_rate": 2.40873841498014e-05,
"loss": 1.498875,
"step": 274000
},
{
"epoch": 52.014374881785514,
"learning_rate": 2.3992812559107246e-05,
"loss": 1.50025,
"step": 275000
},
{
"epoch": 52.203518063173824,
"learning_rate": 2.389824096841309e-05,
"loss": 1.45865625,
"step": 276000
},
{
"epoch": 52.392661244562134,
"learning_rate": 2.3803669377718936e-05,
"loss": 1.4689375,
"step": 277000
},
{
"epoch": 52.581804425950445,
"learning_rate": 2.3709097787024778e-05,
"loss": 1.4756875,
"step": 278000
},
{
"epoch": 52.770947607338755,
"learning_rate": 2.3614526196330626e-05,
"loss": 1.48815625,
"step": 279000
},
{
"epoch": 52.960090788727065,
"learning_rate": 2.3519954605636468e-05,
"loss": 1.4954375,
"step": 280000
},
{
"epoch": 53.149233970115375,
"learning_rate": 2.3425383014942313e-05,
"loss": 1.4575625,
"step": 281000
},
{
"epoch": 53.338377151503686,
"learning_rate": 2.3330811424248155e-05,
"loss": 1.4586875,
"step": 282000
},
{
"epoch": 53.527520332891996,
"learning_rate": 2.3236239833554003e-05,
"loss": 1.46946875,
"step": 283000
},
{
"epoch": 53.71666351428031,
"learning_rate": 2.3141668242859845e-05,
"loss": 1.47496875,
"step": 284000
},
{
"epoch": 53.90580669566862,
"learning_rate": 2.304709665216569e-05,
"loss": 1.48246875,
"step": 285000
},
{
"epoch": 54.09494987705693,
"learning_rate": 2.2952525061471535e-05,
"loss": 1.458375,
"step": 286000
},
{
"epoch": 54.284093058445244,
"learning_rate": 2.285795347077738e-05,
"loss": 1.44609375,
"step": 287000
},
{
"epoch": 54.473236239833554,
"learning_rate": 2.2763381880083222e-05,
"loss": 1.45584375,
"step": 288000
},
{
"epoch": 54.662379421221864,
"learning_rate": 2.266881028938907e-05,
"loss": 1.46325,
"step": 289000
},
{
"epoch": 54.851522602610174,
"learning_rate": 2.2574238698694912e-05,
"loss": 1.475625,
"step": 290000
},
{
"epoch": 55.040665783998485,
"learning_rate": 2.2479667108000757e-05,
"loss": 1.46834375,
"step": 291000
},
{
"epoch": 55.229808965386795,
"learning_rate": 2.2385095517306602e-05,
"loss": 1.43265625,
"step": 292000
},
{
"epoch": 55.41895214677511,
"learning_rate": 2.2290523926612447e-05,
"loss": 1.4436875,
"step": 293000
},
{
"epoch": 55.60809532816342,
"learning_rate": 2.2195952335918292e-05,
"loss": 1.455,
"step": 294000
},
{
"epoch": 55.79723850955173,
"learning_rate": 2.2101380745224134e-05,
"loss": 1.46475,
"step": 295000
},
{
"epoch": 55.98638169094004,
"learning_rate": 2.2006809154529982e-05,
"loss": 1.46825,
"step": 296000
},
{
"epoch": 56.17552487232835,
"learning_rate": 2.1912237563835824e-05,
"loss": 1.4260625,
"step": 297000
},
{
"epoch": 56.36466805371666,
"learning_rate": 2.181766597314167e-05,
"loss": 1.43375,
"step": 298000
},
{
"epoch": 56.55381123510497,
"learning_rate": 2.1723094382447514e-05,
"loss": 1.444375,
"step": 299000
},
{
"epoch": 56.74295441649328,
"learning_rate": 2.162852279175336e-05,
"loss": 1.4503125,
"step": 300000
},
{
"epoch": 56.932097597881594,
"learning_rate": 2.15339512010592e-05,
"loss": 1.4611875,
"step": 301000
},
{
"epoch": 57.121240779269904,
"learning_rate": 2.143937961036505e-05,
"loss": 1.4291875,
"step": 302000
},
{
"epoch": 57.31038396065822,
"learning_rate": 2.134480801967089e-05,
"loss": 1.422,
"step": 303000
},
{
"epoch": 57.49952714204653,
"learning_rate": 2.1250236428976736e-05,
"loss": 1.434625,
"step": 304000
},
{
"epoch": 57.68867032343484,
"learning_rate": 2.115566483828258e-05,
"loss": 1.437625,
"step": 305000
},
{
"epoch": 57.87781350482315,
"learning_rate": 2.1061093247588427e-05,
"loss": 1.4495625,
"step": 306000
},
{
"epoch": 58.06695668621146,
"learning_rate": 2.0966521656894268e-05,
"loss": 1.440125,
"step": 307000
},
{
"epoch": 58.25609986759977,
"learning_rate": 2.0871950066200113e-05,
"loss": 1.413375,
"step": 308000
},
{
"epoch": 58.44524304898808,
"learning_rate": 2.077737847550596e-05,
"loss": 1.4220625,
"step": 309000
},
{
"epoch": 58.63438623037639,
"learning_rate": 2.0682806884811804e-05,
"loss": 1.4315,
"step": 310000
},
{
"epoch": 58.8235294117647,
"learning_rate": 2.058823529411765e-05,
"loss": 1.4355625,
"step": 311000
},
{
"epoch": 59.01267259315302,
"learning_rate": 2.0493663703423494e-05,
"loss": 1.440125,
"step": 312000
},
{
"epoch": 59.20181577454133,
"learning_rate": 2.039909211272934e-05,
"loss": 1.4,
"step": 313000
},
{
"epoch": 59.39095895592964,
"learning_rate": 2.030452052203518e-05,
"loss": 1.4098125,
"step": 314000
},
{
"epoch": 59.58010213731795,
"learning_rate": 2.0209948931341026e-05,
"loss": 1.4241875,
"step": 315000
},
{
"epoch": 59.76924531870626,
"learning_rate": 2.011537734064687e-05,
"loss": 1.43,
"step": 316000
},
{
"epoch": 59.95838850009457,
"learning_rate": 2.0020805749952716e-05,
"loss": 1.43575,
"step": 317000
},
{
"epoch": 60.14753168148288,
"learning_rate": 1.9926234159258557e-05,
"loss": 1.402625,
"step": 318000
},
{
"epoch": 60.33667486287119,
"learning_rate": 1.9831662568564406e-05,
"loss": 1.4025,
"step": 319000
},
{
"epoch": 60.5258180442595,
"learning_rate": 1.9737090977870248e-05,
"loss": 1.413,
"step": 320000
},
{
"epoch": 60.71496122564781,
"learning_rate": 1.9642519387176093e-05,
"loss": 1.4200625,
"step": 321000
},
{
"epoch": 60.90410440703613,
"learning_rate": 1.9547947796481938e-05,
"loss": 1.4270625,
"step": 322000
},
{
"epoch": 61.09324758842444,
"learning_rate": 1.9453376205787783e-05,
"loss": 1.4055625,
"step": 323000
},
{
"epoch": 61.28239076981275,
"learning_rate": 1.9358804615093625e-05,
"loss": 1.3949375,
"step": 324000
},
{
"epoch": 61.47153395120106,
"learning_rate": 1.9264233024399473e-05,
"loss": 1.4008125,
"step": 325000
},
{
"epoch": 61.66067713258937,
"learning_rate": 1.9169661433705315e-05,
"loss": 1.4103125,
"step": 326000
},
{
"epoch": 61.84982031397768,
"learning_rate": 1.907508984301116e-05,
"loss": 1.41725,
"step": 327000
},
{
"epoch": 62.03896349536599,
"learning_rate": 1.8980518252317005e-05,
"loss": 1.4091875,
"step": 328000
},
{
"epoch": 62.2281066767543,
"learning_rate": 1.888594666162285e-05,
"loss": 1.382125,
"step": 329000
},
{
"epoch": 62.41724985814261,
"learning_rate": 1.8791375070928692e-05,
"loss": 1.39175,
"step": 330000
},
{
"epoch": 62.60639303953093,
"learning_rate": 1.8696803480234537e-05,
"loss": 1.3970625,
"step": 331000
},
{
"epoch": 62.79553622091924,
"learning_rate": 1.8602231889540382e-05,
"loss": 1.409375,
"step": 332000
},
{
"epoch": 62.98467940230755,
"learning_rate": 1.8507660298846227e-05,
"loss": 1.4141875,
"step": 333000
},
{
"epoch": 63.17382258369586,
"learning_rate": 1.8413088708152072e-05,
"loss": 1.371875,
"step": 334000
},
{
"epoch": 63.36296576508417,
"learning_rate": 1.8318517117457917e-05,
"loss": 1.382125,
"step": 335000
}
],
"max_steps": 528700,
"num_train_epochs": 100,
"total_flos": 512235918148829184,
"trial_name": null,
"trial_params": null
}