|
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 76.60298846226594,
|
|
"global_step": 405000,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.18914318138831096,
|
|
"learning_rate": 4.990542840930585e-05,
|
|
"loss": 2.2949150390625,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.3782863627766219,
|
|
"learning_rate": 4.981085681861169e-05,
|
|
"loss": 2.31815234375,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.5674295441649329,
|
|
"learning_rate": 4.9716285227917534e-05,
|
|
"loss": 2.32872216796875,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.7565727255532438,
|
|
"learning_rate": 4.962171363722338e-05,
|
|
"loss": 2.33826904296875,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.9457159069415547,
|
|
"learning_rate": 4.9527142046529224e-05,
|
|
"loss": 2.340685546875,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 1.1348590883298657,
|
|
"learning_rate": 4.943257045583507e-05,
|
|
"loss": 2.28626953125,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 1.3240022697181766,
|
|
"learning_rate": 4.9337998865140915e-05,
|
|
"loss": 2.27450390625,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 1.5131454511064875,
|
|
"learning_rate": 4.9243427274446756e-05,
|
|
"loss": 2.287498046875,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 1.7022886324947986,
|
|
"learning_rate": 4.9148855683752605e-05,
|
|
"loss": 2.30305078125,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 1.8914318138831097,
|
|
"learning_rate": 4.905428409305845e-05,
|
|
"loss": 2.30912890625,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 2.0805749952714203,
|
|
"learning_rate": 4.895971250236429e-05,
|
|
"loss": 2.272595703125,
|
|
"step": 11000
|
|
},
|
|
{
|
|
"epoch": 2.2697181766597314,
|
|
"learning_rate": 4.886514091167014e-05,
|
|
"loss": 2.238087890625,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 2.4588613580480425,
|
|
"learning_rate": 4.877056932097598e-05,
|
|
"loss": 2.253140625,
|
|
"step": 13000
|
|
},
|
|
{
|
|
"epoch": 2.648004539436353,
|
|
"learning_rate": 4.867599773028183e-05,
|
|
"loss": 2.26483203125,
|
|
"step": 14000
|
|
},
|
|
{
|
|
"epoch": 2.8371477208246643,
|
|
"learning_rate": 4.858142613958767e-05,
|
|
"loss": 2.2735078125,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 3.0262909022129754,
|
|
"learning_rate": 4.848685454889351e-05,
|
|
"loss": 2.27125,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"epoch": 3.215434083601286,
|
|
"learning_rate": 4.839228295819936e-05,
|
|
"loss": 2.20089453125,
|
|
"step": 17000
|
|
},
|
|
{
|
|
"epoch": 3.404577264989597,
|
|
"learning_rate": 4.829771136750521e-05,
|
|
"loss": 2.2166953125,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 3.593720446377908,
|
|
"learning_rate": 4.820313977681105e-05,
|
|
"loss": 2.2287421875,
|
|
"step": 19000
|
|
},
|
|
{
|
|
"epoch": 3.782863627766219,
|
|
"learning_rate": 4.810856818611689e-05,
|
|
"loss": 2.242296875,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 3.97200680915453,
|
|
"learning_rate": 4.801399659542274e-05,
|
|
"loss": 2.25162109375,
|
|
"step": 21000
|
|
},
|
|
{
|
|
"epoch": 4.161149990542841,
|
|
"learning_rate": 4.791942500472858e-05,
|
|
"loss": 2.17917578125,
|
|
"step": 22000
|
|
},
|
|
{
|
|
"epoch": 4.350293171931152,
|
|
"learning_rate": 4.782485341403443e-05,
|
|
"loss": 2.1837734375,
|
|
"step": 23000
|
|
},
|
|
{
|
|
"epoch": 4.539436353319463,
|
|
"learning_rate": 4.773028182334027e-05,
|
|
"loss": 2.1981953125,
|
|
"step": 24000
|
|
},
|
|
{
|
|
"epoch": 4.728579534707774,
|
|
"learning_rate": 4.763571023264611e-05,
|
|
"loss": 2.2091328125,
|
|
"step": 25000
|
|
},
|
|
{
|
|
"epoch": 4.917722716096085,
|
|
"learning_rate": 4.754113864195196e-05,
|
|
"loss": 2.22294140625,
|
|
"step": 26000
|
|
},
|
|
{
|
|
"epoch": 5.106865897484396,
|
|
"learning_rate": 4.744656705125781e-05,
|
|
"loss": 2.17415625,
|
|
"step": 27000
|
|
},
|
|
{
|
|
"epoch": 5.296009078872706,
|
|
"learning_rate": 4.7351995460563645e-05,
|
|
"loss": 2.1536875,
|
|
"step": 28000
|
|
},
|
|
{
|
|
"epoch": 5.485152260261017,
|
|
"learning_rate": 4.725742386986949e-05,
|
|
"loss": 2.17046484375,
|
|
"step": 29000
|
|
},
|
|
{
|
|
"epoch": 5.6742954416493285,
|
|
"learning_rate": 4.716285227917534e-05,
|
|
"loss": 2.17616796875,
|
|
"step": 30000
|
|
},
|
|
{
|
|
"epoch": 5.86343862303764,
|
|
"learning_rate": 4.706828068848118e-05,
|
|
"loss": 2.18503125,
|
|
"step": 31000
|
|
},
|
|
{
|
|
"epoch": 6.052581804425951,
|
|
"learning_rate": 4.6973709097787025e-05,
|
|
"loss": 2.172359375,
|
|
"step": 32000
|
|
},
|
|
{
|
|
"epoch": 6.241724985814262,
|
|
"learning_rate": 4.687913750709287e-05,
|
|
"loss": 2.118359375,
|
|
"step": 33000
|
|
},
|
|
{
|
|
"epoch": 6.430868167202572,
|
|
"learning_rate": 4.6784565916398715e-05,
|
|
"loss": 2.133546875,
|
|
"step": 34000
|
|
},
|
|
{
|
|
"epoch": 6.620011348590883,
|
|
"learning_rate": 4.6689994325704564e-05,
|
|
"loss": 2.1423671875,
|
|
"step": 35000
|
|
},
|
|
{
|
|
"epoch": 6.809154529979194,
|
|
"learning_rate": 4.6595422735010405e-05,
|
|
"loss": 2.1594921875,
|
|
"step": 36000
|
|
},
|
|
{
|
|
"epoch": 6.998297711367505,
|
|
"learning_rate": 4.650085114431625e-05,
|
|
"loss": 2.17365625,
|
|
"step": 37000
|
|
},
|
|
{
|
|
"epoch": 7.187440892755816,
|
|
"learning_rate": 4.6406279553622095e-05,
|
|
"loss": 2.0884140625,
|
|
"step": 38000
|
|
},
|
|
{
|
|
"epoch": 7.376584074144127,
|
|
"learning_rate": 4.631170796292794e-05,
|
|
"loss": 2.1023359375,
|
|
"step": 39000
|
|
},
|
|
{
|
|
"epoch": 7.565727255532438,
|
|
"learning_rate": 4.6217136372233786e-05,
|
|
"loss": 2.1240625,
|
|
"step": 40000
|
|
},
|
|
{
|
|
"epoch": 7.754870436920749,
|
|
"learning_rate": 4.612256478153963e-05,
|
|
"loss": 2.126953125,
|
|
"step": 41000
|
|
},
|
|
{
|
|
"epoch": 7.94401361830906,
|
|
"learning_rate": 4.602799319084547e-05,
|
|
"loss": 2.1407265625,
|
|
"step": 42000
|
|
},
|
|
{
|
|
"epoch": 8.133156799697371,
|
|
"learning_rate": 4.593342160015132e-05,
|
|
"loss": 2.080640625,
|
|
"step": 43000
|
|
},
|
|
{
|
|
"epoch": 8.322299981085681,
|
|
"learning_rate": 4.5838850009457166e-05,
|
|
"loss": 2.07196875,
|
|
"step": 44000
|
|
},
|
|
{
|
|
"epoch": 8.511443162473993,
|
|
"learning_rate": 4.5744278418763e-05,
|
|
"loss": 2.0906640625,
|
|
"step": 45000
|
|
},
|
|
{
|
|
"epoch": 8.700586343862303,
|
|
"learning_rate": 4.564970682806885e-05,
|
|
"loss": 2.10053125,
|
|
"step": 46000
|
|
},
|
|
{
|
|
"epoch": 8.889729525250615,
|
|
"learning_rate": 4.55551352373747e-05,
|
|
"loss": 2.112328125,
|
|
"step": 47000
|
|
},
|
|
{
|
|
"epoch": 9.078872706638926,
|
|
"learning_rate": 4.546056364668054e-05,
|
|
"loss": 2.08459375,
|
|
"step": 48000
|
|
},
|
|
{
|
|
"epoch": 9.268015888027236,
|
|
"learning_rate": 4.536599205598638e-05,
|
|
"loss": 2.039546875,
|
|
"step": 49000
|
|
},
|
|
{
|
|
"epoch": 9.457159069415548,
|
|
"learning_rate": 4.527142046529223e-05,
|
|
"loss": 2.06175,
|
|
"step": 50000
|
|
},
|
|
{
|
|
"epoch": 9.646302250803858,
|
|
"learning_rate": 4.517684887459807e-05,
|
|
"loss": 2.076625,
|
|
"step": 51000
|
|
},
|
|
{
|
|
"epoch": 9.83544543219217,
|
|
"learning_rate": 4.508227728390392e-05,
|
|
"loss": 2.088609375,
|
|
"step": 52000
|
|
},
|
|
{
|
|
"epoch": 10.02458861358048,
|
|
"learning_rate": 4.498770569320976e-05,
|
|
"loss": 2.07996875,
|
|
"step": 53000
|
|
},
|
|
{
|
|
"epoch": 10.213731794968792,
|
|
"learning_rate": 4.48931341025156e-05,
|
|
"loss": 2.0183828125,
|
|
"step": 54000
|
|
},
|
|
{
|
|
"epoch": 10.402874976357102,
|
|
"learning_rate": 4.479856251182145e-05,
|
|
"loss": 2.0310546875,
|
|
"step": 55000
|
|
},
|
|
{
|
|
"epoch": 10.592018157745413,
|
|
"learning_rate": 4.47039909211273e-05,
|
|
"loss": 2.0436796875,
|
|
"step": 56000
|
|
},
|
|
{
|
|
"epoch": 10.781161339133725,
|
|
"learning_rate": 4.460941933043314e-05,
|
|
"loss": 2.0622109375,
|
|
"step": 57000
|
|
},
|
|
{
|
|
"epoch": 10.970304520522035,
|
|
"learning_rate": 4.4514847739738984e-05,
|
|
"loss": 2.06621875,
|
|
"step": 58000
|
|
},
|
|
{
|
|
"epoch": 11.159447701910347,
|
|
"learning_rate": 4.442027614904483e-05,
|
|
"loss": 2.0049921875,
|
|
"step": 59000
|
|
},
|
|
{
|
|
"epoch": 11.348590883298657,
|
|
"learning_rate": 4.4325704558350674e-05,
|
|
"loss": 2.00809375,
|
|
"step": 60000
|
|
},
|
|
{
|
|
"epoch": 11.537734064686967,
|
|
"learning_rate": 4.423113296765652e-05,
|
|
"loss": 2.017078125,
|
|
"step": 61000
|
|
},
|
|
{
|
|
"epoch": 11.72687724607528,
|
|
"learning_rate": 4.4136561376962364e-05,
|
|
"loss": 2.03634375,
|
|
"step": 62000
|
|
},
|
|
{
|
|
"epoch": 11.91602042746359,
|
|
"learning_rate": 4.4041989786268206e-05,
|
|
"loss": 2.04240625,
|
|
"step": 63000
|
|
},
|
|
{
|
|
"epoch": 12.105163608851901,
|
|
"learning_rate": 4.3947418195574054e-05,
|
|
"loss": 1.99821875,
|
|
"step": 64000
|
|
},
|
|
{
|
|
"epoch": 12.294306790240212,
|
|
"learning_rate": 4.3852846604879896e-05,
|
|
"loss": 1.9760625,
|
|
"step": 65000
|
|
},
|
|
{
|
|
"epoch": 12.483449971628524,
|
|
"learning_rate": 4.375827501418574e-05,
|
|
"loss": 1.995484375,
|
|
"step": 66000
|
|
},
|
|
{
|
|
"epoch": 12.672593153016834,
|
|
"learning_rate": 4.3663703423491586e-05,
|
|
"loss": 2.00321875,
|
|
"step": 67000
|
|
},
|
|
{
|
|
"epoch": 12.861736334405144,
|
|
"learning_rate": 4.356913183279743e-05,
|
|
"loss": 2.021078125,
|
|
"step": 68000
|
|
},
|
|
{
|
|
"epoch": 13.050879515793456,
|
|
"learning_rate": 4.3474560242103276e-05,
|
|
"loss": 2.004171875,
|
|
"step": 69000
|
|
},
|
|
{
|
|
"epoch": 13.240022697181766,
|
|
"learning_rate": 4.337998865140912e-05,
|
|
"loss": 1.9488125,
|
|
"step": 70000
|
|
},
|
|
{
|
|
"epoch": 13.429165878570078,
|
|
"learning_rate": 4.328541706071496e-05,
|
|
"loss": 1.967921875,
|
|
"step": 71000
|
|
},
|
|
{
|
|
"epoch": 13.618309059958388,
|
|
"learning_rate": 4.319084547002081e-05,
|
|
"loss": 1.984046875,
|
|
"step": 72000
|
|
},
|
|
{
|
|
"epoch": 13.807452241346699,
|
|
"learning_rate": 4.3096273879326657e-05,
|
|
"loss": 1.998828125,
|
|
"step": 73000
|
|
},
|
|
{
|
|
"epoch": 13.99659542273501,
|
|
"learning_rate": 4.30017022886325e-05,
|
|
"loss": 2.0024375,
|
|
"step": 74000
|
|
},
|
|
{
|
|
"epoch": 14.18573860412332,
|
|
"learning_rate": 4.290713069793834e-05,
|
|
"loss": 1.927890625,
|
|
"step": 75000
|
|
},
|
|
{
|
|
"epoch": 14.374881785511633,
|
|
"learning_rate": 4.281255910724419e-05,
|
|
"loss": 1.94315625,
|
|
"step": 76000
|
|
},
|
|
{
|
|
"epoch": 14.564024966899943,
|
|
"learning_rate": 4.271798751655003e-05,
|
|
"loss": 1.95571875,
|
|
"step": 77000
|
|
},
|
|
{
|
|
"epoch": 14.753168148288253,
|
|
"learning_rate": 4.262341592585587e-05,
|
|
"loss": 1.96771875,
|
|
"step": 78000
|
|
},
|
|
{
|
|
"epoch": 14.942311329676565,
|
|
"learning_rate": 4.252884433516172e-05,
|
|
"loss": 1.981109375,
|
|
"step": 79000
|
|
},
|
|
{
|
|
"epoch": 15.131454511064875,
|
|
"learning_rate": 4.243427274446756e-05,
|
|
"loss": 1.929875,
|
|
"step": 80000
|
|
},
|
|
{
|
|
"epoch": 15.320597692453187,
|
|
"learning_rate": 4.233970115377341e-05,
|
|
"loss": 1.91628125,
|
|
"step": 81000
|
|
},
|
|
{
|
|
"epoch": 15.509740873841498,
|
|
"learning_rate": 4.224512956307925e-05,
|
|
"loss": 1.938640625,
|
|
"step": 82000
|
|
},
|
|
{
|
|
"epoch": 15.69888405522981,
|
|
"learning_rate": 4.2150557972385094e-05,
|
|
"loss": 1.9478125,
|
|
"step": 83000
|
|
},
|
|
{
|
|
"epoch": 15.88802723661812,
|
|
"learning_rate": 4.205598638169094e-05,
|
|
"loss": 1.95146875,
|
|
"step": 84000
|
|
},
|
|
{
|
|
"epoch": 16.077170418006432,
|
|
"learning_rate": 4.196141479099679e-05,
|
|
"loss": 1.928953125,
|
|
"step": 85000
|
|
},
|
|
{
|
|
"epoch": 16.266313599394742,
|
|
"learning_rate": 4.186684320030263e-05,
|
|
"loss": 1.895140625,
|
|
"step": 86000
|
|
},
|
|
{
|
|
"epoch": 16.455456780783052,
|
|
"learning_rate": 4.1772271609608474e-05,
|
|
"loss": 1.909171875,
|
|
"step": 87000
|
|
},
|
|
{
|
|
"epoch": 16.644599962171363,
|
|
"learning_rate": 4.1677700018914316e-05,
|
|
"loss": 1.92403125,
|
|
"step": 88000
|
|
},
|
|
{
|
|
"epoch": 16.833743143559676,
|
|
"learning_rate": 4.1583128428220164e-05,
|
|
"loss": 1.935015625,
|
|
"step": 89000
|
|
},
|
|
{
|
|
"epoch": 17.022886324947986,
|
|
"learning_rate": 4.148855683752601e-05,
|
|
"loss": 1.92928125,
|
|
"step": 90000
|
|
},
|
|
{
|
|
"epoch": 17.212029506336297,
|
|
"learning_rate": 4.139398524683185e-05,
|
|
"loss": 1.86678125,
|
|
"step": 91000
|
|
},
|
|
{
|
|
"epoch": 17.401172687724607,
|
|
"learning_rate": 4.1299413656137696e-05,
|
|
"loss": 1.8859375,
|
|
"step": 92000
|
|
},
|
|
{
|
|
"epoch": 17.590315869112917,
|
|
"learning_rate": 4.1204842065443545e-05,
|
|
"loss": 1.90540625,
|
|
"step": 93000
|
|
},
|
|
{
|
|
"epoch": 17.77945905050123,
|
|
"learning_rate": 4.1110270474749386e-05,
|
|
"loss": 1.911765625,
|
|
"step": 94000
|
|
},
|
|
{
|
|
"epoch": 17.96860223188954,
|
|
"learning_rate": 4.101569888405523e-05,
|
|
"loss": 1.924671875,
|
|
"step": 95000
|
|
},
|
|
{
|
|
"epoch": 18.15774541327785,
|
|
"learning_rate": 4.092112729336108e-05,
|
|
"loss": 1.85646875,
|
|
"step": 96000
|
|
},
|
|
{
|
|
"epoch": 18.34688859466616,
|
|
"learning_rate": 4.082655570266692e-05,
|
|
"loss": 1.862640625,
|
|
"step": 97000
|
|
},
|
|
{
|
|
"epoch": 18.53603177605447,
|
|
"learning_rate": 4.073198411197277e-05,
|
|
"loss": 1.880921875,
|
|
"step": 98000
|
|
},
|
|
{
|
|
"epoch": 18.725174957442785,
|
|
"learning_rate": 4.063741252127861e-05,
|
|
"loss": 1.8903125,
|
|
"step": 99000
|
|
},
|
|
{
|
|
"epoch": 18.914318138831096,
|
|
"learning_rate": 4.054284093058445e-05,
|
|
"loss": 1.904015625,
|
|
"step": 100000
|
|
},
|
|
{
|
|
"epoch": 19.103461320219406,
|
|
"learning_rate": 4.04482693398903e-05,
|
|
"loss": 1.8575625,
|
|
"step": 101000
|
|
},
|
|
{
|
|
"epoch": 19.292604501607716,
|
|
"learning_rate": 4.035369774919615e-05,
|
|
"loss": 1.837125,
|
|
"step": 102000
|
|
},
|
|
{
|
|
"epoch": 19.481747682996026,
|
|
"learning_rate": 4.025912615850199e-05,
|
|
"loss": 1.859359375,
|
|
"step": 103000
|
|
},
|
|
{
|
|
"epoch": 19.67089086438434,
|
|
"learning_rate": 4.016455456780783e-05,
|
|
"loss": 1.867921875,
|
|
"step": 104000
|
|
},
|
|
{
|
|
"epoch": 19.86003404577265,
|
|
"learning_rate": 4.006998297711368e-05,
|
|
"loss": 1.88125,
|
|
"step": 105000
|
|
},
|
|
{
|
|
"epoch": 20.04917722716096,
|
|
"learning_rate": 3.997541138641952e-05,
|
|
"loss": 1.866546875,
|
|
"step": 106000
|
|
},
|
|
{
|
|
"epoch": 20.23832040854927,
|
|
"learning_rate": 3.988083979572537e-05,
|
|
"loss": 1.811046875,
|
|
"step": 107000
|
|
},
|
|
{
|
|
"epoch": 20.427463589937584,
|
|
"learning_rate": 3.978626820503121e-05,
|
|
"loss": 1.8318125,
|
|
"step": 108000
|
|
},
|
|
{
|
|
"epoch": 20.616606771325895,
|
|
"learning_rate": 3.969169661433705e-05,
|
|
"loss": 1.83990625,
|
|
"step": 109000
|
|
},
|
|
{
|
|
"epoch": 20.805749952714205,
|
|
"learning_rate": 3.95971250236429e-05,
|
|
"loss": 1.91721875,
|
|
"step": 110000
|
|
},
|
|
{
|
|
"epoch": 20.994893134102515,
|
|
"learning_rate": 3.950255343294875e-05,
|
|
"loss": 1.923984375,
|
|
"step": 111000
|
|
},
|
|
{
|
|
"epoch": 21.184036315490825,
|
|
"learning_rate": 3.9407981842254585e-05,
|
|
"loss": 1.846875,
|
|
"step": 112000
|
|
},
|
|
{
|
|
"epoch": 21.37317949687914,
|
|
"learning_rate": 3.931341025156043e-05,
|
|
"loss": 1.860984375,
|
|
"step": 113000
|
|
},
|
|
{
|
|
"epoch": 21.56232267826745,
|
|
"learning_rate": 3.9218838660866275e-05,
|
|
"loss": 1.8764375,
|
|
"step": 114000
|
|
},
|
|
{
|
|
"epoch": 21.75146585965576,
|
|
"learning_rate": 3.912426707017212e-05,
|
|
"loss": 1.88171875,
|
|
"step": 115000
|
|
},
|
|
{
|
|
"epoch": 21.94060904104407,
|
|
"learning_rate": 3.9029695479477965e-05,
|
|
"loss": 1.89409375,
|
|
"step": 116000
|
|
},
|
|
{
|
|
"epoch": 22.12975222243238,
|
|
"learning_rate": 3.8935123888783807e-05,
|
|
"loss": 1.847390625,
|
|
"step": 117000
|
|
},
|
|
{
|
|
"epoch": 22.318895403820694,
|
|
"learning_rate": 3.8840552298089655e-05,
|
|
"loss": 1.83696875,
|
|
"step": 118000
|
|
},
|
|
{
|
|
"epoch": 22.508038585209004,
|
|
"learning_rate": 3.8745980707395504e-05,
|
|
"loss": 1.852484375,
|
|
"step": 119000
|
|
},
|
|
{
|
|
"epoch": 22.697181766597314,
|
|
"learning_rate": 3.8651409116701345e-05,
|
|
"loss": 1.86578125,
|
|
"step": 120000
|
|
},
|
|
{
|
|
"epoch": 22.886324947985624,
|
|
"learning_rate": 3.855683752600719e-05,
|
|
"loss": 1.877734375,
|
|
"step": 121000
|
|
},
|
|
{
|
|
"epoch": 23.075468129373935,
|
|
"learning_rate": 3.8462265935313035e-05,
|
|
"loss": 1.8471875,
|
|
"step": 122000
|
|
},
|
|
{
|
|
"epoch": 23.26461131076225,
|
|
"learning_rate": 3.836769434461888e-05,
|
|
"loss": 1.816078125,
|
|
"step": 123000
|
|
},
|
|
{
|
|
"epoch": 23.45375449215056,
|
|
"learning_rate": 3.8273122753924726e-05,
|
|
"loss": 1.833703125,
|
|
"step": 124000
|
|
},
|
|
{
|
|
"epoch": 23.64289767353887,
|
|
"learning_rate": 3.817855116323057e-05,
|
|
"loss": 1.840984375,
|
|
"step": 125000
|
|
},
|
|
{
|
|
"epoch": 23.83204085492718,
|
|
"learning_rate": 3.808397957253641e-05,
|
|
"loss": 1.8568125,
|
|
"step": 126000
|
|
},
|
|
{
|
|
"epoch": 24.02118403631549,
|
|
"learning_rate": 3.798940798184226e-05,
|
|
"loss": 1.855609375,
|
|
"step": 127000
|
|
},
|
|
{
|
|
"epoch": 24.210327217703803,
|
|
"learning_rate": 3.7894836391148106e-05,
|
|
"loss": 1.7931875,
|
|
"step": 128000
|
|
},
|
|
{
|
|
"epoch": 24.399470399092113,
|
|
"learning_rate": 3.780026480045394e-05,
|
|
"loss": 1.805828125,
|
|
"step": 129000
|
|
},
|
|
{
|
|
"epoch": 24.588613580480423,
|
|
"learning_rate": 3.770569320975979e-05,
|
|
"loss": 1.825140625,
|
|
"step": 130000
|
|
},
|
|
{
|
|
"epoch": 24.777756761868734,
|
|
"learning_rate": 3.761112161906564e-05,
|
|
"loss": 1.83425,
|
|
"step": 131000
|
|
},
|
|
{
|
|
"epoch": 24.966899943257047,
|
|
"learning_rate": 3.751655002837148e-05,
|
|
"loss": 1.8449375,
|
|
"step": 132000
|
|
},
|
|
{
|
|
"epoch": 25.156043124645358,
|
|
"learning_rate": 3.742197843767732e-05,
|
|
"loss": 1.78215625,
|
|
"step": 133000
|
|
},
|
|
{
|
|
"epoch": 25.345186306033668,
|
|
"learning_rate": 3.732740684698317e-05,
|
|
"loss": 1.79159375,
|
|
"step": 134000
|
|
},
|
|
{
|
|
"epoch": 25.534329487421978,
|
|
"learning_rate": 3.723283525628901e-05,
|
|
"loss": 1.80753125,
|
|
"step": 135000
|
|
},
|
|
{
|
|
"epoch": 25.723472668810288,
|
|
"learning_rate": 3.713826366559486e-05,
|
|
"loss": 1.81053125,
|
|
"step": 136000
|
|
},
|
|
{
|
|
"epoch": 25.912615850198602,
|
|
"learning_rate": 3.70436920749007e-05,
|
|
"loss": 1.8265,
|
|
"step": 137000
|
|
},
|
|
{
|
|
"epoch": 26.101759031586912,
|
|
"learning_rate": 3.694912048420654e-05,
|
|
"loss": 1.79653125,
|
|
"step": 138000
|
|
},
|
|
{
|
|
"epoch": 26.290902212975222,
|
|
"learning_rate": 3.685454889351239e-05,
|
|
"loss": 1.76934375,
|
|
"step": 139000
|
|
},
|
|
{
|
|
"epoch": 26.480045394363533,
|
|
"learning_rate": 3.6759977302818233e-05,
|
|
"loss": 1.78228125,
|
|
"step": 140000
|
|
},
|
|
{
|
|
"epoch": 26.669188575751843,
|
|
"learning_rate": 3.666540571212408e-05,
|
|
"loss": 1.79384375,
|
|
"step": 141000
|
|
},
|
|
{
|
|
"epoch": 26.858331757140157,
|
|
"learning_rate": 3.6570834121429924e-05,
|
|
"loss": 1.8095625,
|
|
"step": 142000
|
|
},
|
|
{
|
|
"epoch": 27.047474938528467,
|
|
"learning_rate": 3.6476262530735765e-05,
|
|
"loss": 1.7923125,
|
|
"step": 143000
|
|
},
|
|
{
|
|
"epoch": 27.236618119916777,
|
|
"learning_rate": 3.6381690940041614e-05,
|
|
"loss": 1.7471875,
|
|
"step": 144000
|
|
},
|
|
{
|
|
"epoch": 27.425761301305087,
|
|
"learning_rate": 3.628711934934746e-05,
|
|
"loss": 1.76446875,
|
|
"step": 145000
|
|
},
|
|
{
|
|
"epoch": 27.614904482693397,
|
|
"learning_rate": 3.61925477586533e-05,
|
|
"loss": 1.77759375,
|
|
"step": 146000
|
|
},
|
|
{
|
|
"epoch": 27.80404766408171,
|
|
"learning_rate": 3.6097976167959146e-05,
|
|
"loss": 1.78840625,
|
|
"step": 147000
|
|
},
|
|
{
|
|
"epoch": 27.99319084547002,
|
|
"learning_rate": 3.6003404577264994e-05,
|
|
"loss": 1.799125,
|
|
"step": 148000
|
|
},
|
|
{
|
|
"epoch": 28.18233402685833,
|
|
"learning_rate": 3.5908832986570836e-05,
|
|
"loss": 1.73171875,
|
|
"step": 149000
|
|
},
|
|
{
|
|
"epoch": 28.37147720824664,
|
|
"learning_rate": 3.581426139587668e-05,
|
|
"loss": 1.73996875,
|
|
"step": 150000
|
|
},
|
|
{
|
|
"epoch": 28.560620389634952,
|
|
"learning_rate": 3.5719689805182526e-05,
|
|
"loss": 1.75671875,
|
|
"step": 151000
|
|
},
|
|
{
|
|
"epoch": 28.749763571023266,
|
|
"learning_rate": 3.562511821448837e-05,
|
|
"loss": 1.7723125,
|
|
"step": 152000
|
|
},
|
|
{
|
|
"epoch": 28.938906752411576,
|
|
"learning_rate": 3.5530546623794216e-05,
|
|
"loss": 1.77990625,
|
|
"step": 153000
|
|
},
|
|
{
|
|
"epoch": 29.128049933799886,
|
|
"learning_rate": 3.543597503310006e-05,
|
|
"loss": 1.73996875,
|
|
"step": 154000
|
|
},
|
|
{
|
|
"epoch": 29.317193115188196,
|
|
"learning_rate": 3.53414034424059e-05,
|
|
"loss": 1.726625,
|
|
"step": 155000
|
|
},
|
|
{
|
|
"epoch": 29.50633629657651,
|
|
"learning_rate": 3.524683185171175e-05,
|
|
"loss": 1.74075,
|
|
"step": 156000
|
|
},
|
|
{
|
|
"epoch": 29.69547947796482,
|
|
"learning_rate": 3.5152260261017597e-05,
|
|
"loss": 1.753,
|
|
"step": 157000
|
|
},
|
|
{
|
|
"epoch": 29.88462265935313,
|
|
"learning_rate": 3.505768867032344e-05,
|
|
"loss": 1.7575,
|
|
"step": 158000
|
|
},
|
|
{
|
|
"epoch": 30.07376584074144,
|
|
"learning_rate": 3.496311707962928e-05,
|
|
"loss": 1.73771875,
|
|
"step": 159000
|
|
},
|
|
{
|
|
"epoch": 30.26290902212975,
|
|
"learning_rate": 3.486854548893513e-05,
|
|
"loss": 1.70875,
|
|
"step": 160000
|
|
},
|
|
{
|
|
"epoch": 30.452052203518065,
|
|
"learning_rate": 3.477397389824097e-05,
|
|
"loss": 1.72415625,
|
|
"step": 161000
|
|
},
|
|
{
|
|
"epoch": 30.641195384906375,
|
|
"learning_rate": 3.467940230754682e-05,
|
|
"loss": 1.7310625,
|
|
"step": 162000
|
|
},
|
|
{
|
|
"epoch": 30.830338566294685,
|
|
"learning_rate": 3.4584830716852654e-05,
|
|
"loss": 1.74175,
|
|
"step": 163000
|
|
},
|
|
{
|
|
"epoch": 31.019481747682995,
|
|
"learning_rate": 3.44902591261585e-05,
|
|
"loss": 1.74609375,
|
|
"step": 164000
|
|
},
|
|
{
|
|
"epoch": 31.208624929071306,
|
|
"learning_rate": 3.439568753546435e-05,
|
|
"loss": 1.684875,
|
|
"step": 165000
|
|
},
|
|
{
|
|
"epoch": 31.39776811045962,
|
|
"learning_rate": 3.430111594477019e-05,
|
|
"loss": 1.69925,
|
|
"step": 166000
|
|
},
|
|
{
|
|
"epoch": 31.58691129184793,
|
|
"learning_rate": 3.4206544354076034e-05,
|
|
"loss": 1.7169375,
|
|
"step": 167000
|
|
},
|
|
{
|
|
"epoch": 31.77605447323624,
|
|
"learning_rate": 3.411197276338188e-05,
|
|
"loss": 1.7275,
|
|
"step": 168000
|
|
},
|
|
{
|
|
"epoch": 31.96519765462455,
|
|
"learning_rate": 3.4017401172687724e-05,
|
|
"loss": 1.74096875,
|
|
"step": 169000
|
|
},
|
|
{
|
|
"epoch": 32.154340836012864,
|
|
"learning_rate": 3.392282958199357e-05,
|
|
"loss": 1.68128125,
|
|
"step": 170000
|
|
},
|
|
{
|
|
"epoch": 32.343484017401174,
|
|
"learning_rate": 3.3828257991299414e-05,
|
|
"loss": 1.6851875,
|
|
"step": 171000
|
|
},
|
|
{
|
|
"epoch": 32.532627198789484,
|
|
"learning_rate": 3.3733686400605256e-05,
|
|
"loss": 1.69896875,
|
|
"step": 172000
|
|
},
|
|
{
|
|
"epoch": 32.721770380177794,
|
|
"learning_rate": 3.3639114809911104e-05,
|
|
"loss": 1.7080625,
|
|
"step": 173000
|
|
},
|
|
{
|
|
"epoch": 32.910913561566105,
|
|
"learning_rate": 3.354454321921695e-05,
|
|
"loss": 1.7200625,
|
|
"step": 174000
|
|
},
|
|
{
|
|
"epoch": 33.100056742954415,
|
|
"learning_rate": 3.3449971628522795e-05,
|
|
"loss": 1.69040625,
|
|
"step": 175000
|
|
},
|
|
{
|
|
"epoch": 33.289199924342725,
|
|
"learning_rate": 3.3355400037828636e-05,
|
|
"loss": 1.666625,
|
|
"step": 176000
|
|
},
|
|
{
|
|
"epoch": 33.478343105731035,
|
|
"learning_rate": 3.3260828447134485e-05,
|
|
"loss": 1.6798125,
|
|
"step": 177000
|
|
},
|
|
{
|
|
"epoch": 33.66748628711935,
|
|
"learning_rate": 3.3166256856440326e-05,
|
|
"loss": 1.69575,
|
|
"step": 178000
|
|
},
|
|
{
|
|
"epoch": 33.85662946850766,
|
|
"learning_rate": 3.3071685265746175e-05,
|
|
"loss": 1.7008125,
|
|
"step": 179000
|
|
},
|
|
{
|
|
"epoch": 34.04577264989597,
|
|
"learning_rate": 3.297711367505202e-05,
|
|
"loss": 1.69340625,
|
|
"step": 180000
|
|
},
|
|
{
|
|
"epoch": 34.23491583128428,
|
|
"learning_rate": 3.288254208435786e-05,
|
|
"loss": 1.6509375,
|
|
"step": 181000
|
|
},
|
|
{
|
|
"epoch": 34.42405901267259,
|
|
"learning_rate": 3.278797049366371e-05,
|
|
"loss": 1.66703125,
|
|
"step": 182000
|
|
},
|
|
{
|
|
"epoch": 34.613202194060904,
|
|
"learning_rate": 3.2693398902969555e-05,
|
|
"loss": 1.67615625,
|
|
"step": 183000
|
|
},
|
|
{
|
|
"epoch": 34.802345375449214,
|
|
"learning_rate": 3.259882731227539e-05,
|
|
"loss": 1.6876875,
|
|
"step": 184000
|
|
},
|
|
{
|
|
"epoch": 34.991488556837524,
|
|
"learning_rate": 3.250425572158124e-05,
|
|
"loss": 1.695,
|
|
"step": 185000
|
|
},
|
|
{
|
|
"epoch": 35.180631738225834,
|
|
"learning_rate": 3.240968413088709e-05,
|
|
"loss": 1.6354375,
|
|
"step": 186000
|
|
},
|
|
{
|
|
"epoch": 35.369774919614144,
|
|
"learning_rate": 3.231511254019293e-05,
|
|
"loss": 1.64653125,
|
|
"step": 187000
|
|
},
|
|
{
|
|
"epoch": 35.55891810100246,
|
|
"learning_rate": 3.222054094949877e-05,
|
|
"loss": 1.66346875,
|
|
"step": 188000
|
|
},
|
|
{
|
|
"epoch": 35.74806128239077,
|
|
"learning_rate": 3.212596935880461e-05,
|
|
"loss": 1.66990625,
|
|
"step": 189000
|
|
},
|
|
{
|
|
"epoch": 35.93720446377908,
|
|
"learning_rate": 3.203139776811046e-05,
|
|
"loss": 1.67878125,
|
|
"step": 190000
|
|
},
|
|
{
|
|
"epoch": 36.12634764516739,
|
|
"learning_rate": 3.193682617741631e-05,
|
|
"loss": 1.63953125,
|
|
"step": 191000
|
|
},
|
|
{
|
|
"epoch": 36.3154908265557,
|
|
"learning_rate": 3.184225458672215e-05,
|
|
"loss": 1.62646875,
|
|
"step": 192000
|
|
},
|
|
{
|
|
"epoch": 36.50463400794401,
|
|
"learning_rate": 3.174768299602799e-05,
|
|
"loss": 1.6480625,
|
|
"step": 193000
|
|
},
|
|
{
|
|
"epoch": 36.69377718933232,
|
|
"learning_rate": 3.165311140533384e-05,
|
|
"loss": 1.65371875,
|
|
"step": 194000
|
|
},
|
|
{
|
|
"epoch": 36.88292037072063,
|
|
"learning_rate": 3.155853981463968e-05,
|
|
"loss": 1.66778125,
|
|
"step": 195000
|
|
},
|
|
{
|
|
"epoch": 37.07206355210894,
|
|
"learning_rate": 3.146396822394553e-05,
|
|
"loss": 1.64275,
|
|
"step": 196000
|
|
},
|
|
{
|
|
"epoch": 37.26120673349726,
|
|
"learning_rate": 3.136939663325137e-05,
|
|
"loss": 1.61565625,
|
|
"step": 197000
|
|
},
|
|
{
|
|
"epoch": 37.45034991488557,
|
|
"learning_rate": 3.1274825042557215e-05,
|
|
"loss": 1.631875,
|
|
"step": 198000
|
|
},
|
|
{
|
|
"epoch": 37.63949309627388,
|
|
"learning_rate": 3.118025345186306e-05,
|
|
"loss": 1.64078125,
|
|
"step": 199000
|
|
},
|
|
{
|
|
"epoch": 37.82863627766219,
|
|
"learning_rate": 3.108568186116891e-05,
|
|
"loss": 1.6473125,
|
|
"step": 200000
|
|
},
|
|
{
|
|
"epoch": 38.0177794590505,
|
|
"learning_rate": 3.0991110270474747e-05,
|
|
"loss": 1.65090625,
|
|
"step": 201000
|
|
},
|
|
{
|
|
"epoch": 38.20692264043881,
|
|
"learning_rate": 3.0896538679780595e-05,
|
|
"loss": 1.59846875,
|
|
"step": 202000
|
|
},
|
|
{
|
|
"epoch": 38.39606582182712,
|
|
"learning_rate": 3.0801967089086443e-05,
|
|
"loss": 1.6114375,
|
|
"step": 203000
|
|
},
|
|
{
|
|
"epoch": 38.58520900321543,
|
|
"learning_rate": 3.0707395498392285e-05,
|
|
"loss": 1.6266875,
|
|
"step": 204000
|
|
},
|
|
{
|
|
"epoch": 38.77435218460374,
|
|
"learning_rate": 3.061282390769813e-05,
|
|
"loss": 1.6340625,
|
|
"step": 205000
|
|
},
|
|
{
|
|
"epoch": 38.96349536599205,
|
|
"learning_rate": 3.0518252317003975e-05,
|
|
"loss": 1.64478125,
|
|
"step": 206000
|
|
},
|
|
{
|
|
"epoch": 39.15263854738037,
|
|
"learning_rate": 3.0423680726309817e-05,
|
|
"loss": 1.59403125,
|
|
"step": 207000
|
|
},
|
|
{
|
|
"epoch": 39.34178172876868,
|
|
"learning_rate": 3.0329109135615662e-05,
|
|
"loss": 1.5961875,
|
|
"step": 208000
|
|
},
|
|
{
|
|
"epoch": 39.53092491015699,
|
|
"learning_rate": 3.023453754492151e-05,
|
|
"loss": 1.612375,
|
|
"step": 209000
|
|
},
|
|
{
|
|
"epoch": 39.7200680915453,
|
|
"learning_rate": 3.013996595422735e-05,
|
|
"loss": 1.62103125,
|
|
"step": 210000
|
|
},
|
|
{
|
|
"epoch": 39.90921127293361,
|
|
"learning_rate": 3.0045394363533197e-05,
|
|
"loss": 1.62684375,
|
|
"step": 211000
|
|
},
|
|
{
|
|
"epoch": 40.09835445432192,
|
|
"learning_rate": 2.995082277283904e-05,
|
|
"loss": 1.60059375,
|
|
"step": 212000
|
|
},
|
|
{
|
|
"epoch": 40.28749763571023,
|
|
"learning_rate": 2.9856251182144884e-05,
|
|
"loss": 1.58271875,
|
|
"step": 213000
|
|
},
|
|
{
|
|
"epoch": 40.47664081709854,
|
|
"learning_rate": 2.976167959145073e-05,
|
|
"loss": 1.59703125,
|
|
"step": 214000
|
|
},
|
|
{
|
|
"epoch": 40.66578399848685,
|
|
"learning_rate": 2.966710800075657e-05,
|
|
"loss": 1.604875,
|
|
"step": 215000
|
|
},
|
|
{
|
|
"epoch": 40.85492717987517,
|
|
"learning_rate": 2.957253641006242e-05,
|
|
"loss": 1.6115625,
|
|
"step": 216000
|
|
},
|
|
{
|
|
"epoch": 41.04407036126348,
|
|
"learning_rate": 2.9477964819368265e-05,
|
|
"loss": 1.60434375,
|
|
"step": 217000
|
|
},
|
|
{
|
|
"epoch": 41.23321354265179,
|
|
"learning_rate": 2.9383393228674106e-05,
|
|
"loss": 1.566625,
|
|
"step": 218000
|
|
},
|
|
{
|
|
"epoch": 41.4223567240401,
|
|
"learning_rate": 2.928882163797995e-05,
|
|
"loss": 1.58084375,
|
|
"step": 219000
|
|
},
|
|
{
|
|
"epoch": 41.61149990542841,
|
|
"learning_rate": 2.91942500472858e-05,
|
|
"loss": 1.590625,
|
|
"step": 220000
|
|
},
|
|
{
|
|
"epoch": 41.80064308681672,
|
|
"learning_rate": 2.9099678456591638e-05,
|
|
"loss": 1.60096875,
|
|
"step": 221000
|
|
},
|
|
{
|
|
"epoch": 41.98978626820503,
|
|
"learning_rate": 2.9005106865897487e-05,
|
|
"loss": 1.6100625,
|
|
"step": 222000
|
|
},
|
|
{
|
|
"epoch": 42.17892944959334,
|
|
"learning_rate": 2.8910535275203332e-05,
|
|
"loss": 1.552625,
|
|
"step": 223000
|
|
},
|
|
{
|
|
"epoch": 42.36807263098165,
|
|
"learning_rate": 2.8815963684509173e-05,
|
|
"loss": 1.56346875,
|
|
"step": 224000
|
|
},
|
|
{
|
|
"epoch": 42.55721581236996,
|
|
"learning_rate": 2.872139209381502e-05,
|
|
"loss": 1.57903125,
|
|
"step": 225000
|
|
},
|
|
{
|
|
"epoch": 42.74635899375828,
|
|
"learning_rate": 2.8626820503120867e-05,
|
|
"loss": 1.5851875,
|
|
"step": 226000
|
|
},
|
|
{
|
|
"epoch": 42.93550217514659,
|
|
"learning_rate": 2.8532248912426705e-05,
|
|
"loss": 1.59359375,
|
|
"step": 227000
|
|
},
|
|
{
|
|
"epoch": 43.1246453565349,
|
|
"learning_rate": 2.8437677321732554e-05,
|
|
"loss": 1.55915625,
|
|
"step": 228000
|
|
},
|
|
{
|
|
"epoch": 43.31378853792321,
|
|
"learning_rate": 2.83431057310384e-05,
|
|
"loss": 1.5494375,
|
|
"step": 229000
|
|
},
|
|
{
|
|
"epoch": 43.50293171931152,
|
|
"learning_rate": 2.824853414034424e-05,
|
|
"loss": 1.55978125,
|
|
"step": 230000
|
|
},
|
|
{
|
|
"epoch": 43.69207490069983,
|
|
"learning_rate": 2.8153962549650086e-05,
|
|
"loss": 1.57346875,
|
|
"step": 231000
|
|
},
|
|
{
|
|
"epoch": 43.88121808208814,
|
|
"learning_rate": 2.8059390958955934e-05,
|
|
"loss": 1.58403125,
|
|
"step": 232000
|
|
},
|
|
{
|
|
"epoch": 44.07036126347645,
|
|
"learning_rate": 2.7964819368261776e-05,
|
|
"loss": 1.568125,
|
|
"step": 233000
|
|
},
|
|
{
|
|
"epoch": 44.25950444486476,
|
|
"learning_rate": 2.787024777756762e-05,
|
|
"loss": 1.535125,
|
|
"step": 234000
|
|
},
|
|
{
|
|
"epoch": 44.44864762625307,
|
|
"learning_rate": 2.7775676186873466e-05,
|
|
"loss": 1.5455,
|
|
"step": 235000
|
|
},
|
|
{
|
|
"epoch": 44.63779080764139,
|
|
"learning_rate": 2.7681104596179308e-05,
|
|
"loss": 1.55828125,
|
|
"step": 236000
|
|
},
|
|
{
|
|
"epoch": 44.8269339890297,
|
|
"learning_rate": 2.7586533005485156e-05,
|
|
"loss": 1.56821875,
|
|
"step": 237000
|
|
},
|
|
{
|
|
"epoch": 45.01607717041801,
|
|
"learning_rate": 2.7491961414790994e-05,
|
|
"loss": 1.573875,
|
|
"step": 238000
|
|
},
|
|
{
|
|
"epoch": 45.20522035180632,
|
|
"learning_rate": 2.7397389824096843e-05,
|
|
"loss": 1.52146875,
|
|
"step": 239000
|
|
},
|
|
{
|
|
"epoch": 45.39436353319463,
|
|
"learning_rate": 2.7302818233402688e-05,
|
|
"loss": 1.53628125,
|
|
"step": 240000
|
|
},
|
|
{
|
|
"epoch": 45.58350671458294,
|
|
"learning_rate": 2.720824664270853e-05,
|
|
"loss": 1.549875,
|
|
"step": 241000
|
|
},
|
|
{
|
|
"epoch": 45.77264989597125,
|
|
"learning_rate": 2.7113675052014375e-05,
|
|
"loss": 1.55240625,
|
|
"step": 242000
|
|
},
|
|
{
|
|
"epoch": 45.96179307735956,
|
|
"learning_rate": 2.7019103461320223e-05,
|
|
"loss": 1.56171875,
|
|
"step": 243000
|
|
},
|
|
{
|
|
"epoch": 46.15093625874787,
|
|
"learning_rate": 2.692453187062606e-05,
|
|
"loss": 1.52228125,
|
|
"step": 244000
|
|
},
|
|
{
|
|
"epoch": 46.340079440136186,
|
|
"learning_rate": 2.682996027993191e-05,
|
|
"loss": 1.5250625,
|
|
"step": 245000
|
|
},
|
|
{
|
|
"epoch": 46.5292226215245,
|
|
"learning_rate": 2.6735388689237755e-05,
|
|
"loss": 1.5320625,
|
|
"step": 246000
|
|
},
|
|
{
|
|
"epoch": 46.71836580291281,
|
|
"learning_rate": 2.6640817098543597e-05,
|
|
"loss": 1.5440625,
|
|
"step": 247000
|
|
},
|
|
{
|
|
"epoch": 46.90750898430112,
|
|
"learning_rate": 2.6546245507849442e-05,
|
|
"loss": 1.5495,
|
|
"step": 248000
|
|
},
|
|
{
|
|
"epoch": 47.09665216568943,
|
|
"learning_rate": 2.645167391715529e-05,
|
|
"loss": 1.5265625,
|
|
"step": 249000
|
|
},
|
|
{
|
|
"epoch": 47.28579534707774,
|
|
"learning_rate": 2.6357102326461132e-05,
|
|
"loss": 1.50359375,
|
|
"step": 250000
|
|
},
|
|
{
|
|
"epoch": 47.47493852846605,
|
|
"learning_rate": 2.6262530735766977e-05,
|
|
"loss": 1.5239375,
|
|
"step": 251000
|
|
},
|
|
{
|
|
"epoch": 47.66408170985436,
|
|
"learning_rate": 2.6167959145072822e-05,
|
|
"loss": 1.5295625,
|
|
"step": 252000
|
|
},
|
|
{
|
|
"epoch": 47.85322489124267,
|
|
"learning_rate": 2.6073387554378664e-05,
|
|
"loss": 1.53828125,
|
|
"step": 253000
|
|
},
|
|
{
|
|
"epoch": 48.04236807263098,
|
|
"learning_rate": 2.597881596368451e-05,
|
|
"loss": 1.53,
|
|
"step": 254000
|
|
},
|
|
{
|
|
"epoch": 48.231511254019296,
|
|
"learning_rate": 2.5884244372990358e-05,
|
|
"loss": 1.496,
|
|
"step": 255000
|
|
},
|
|
{
|
|
"epoch": 48.420654435407606,
|
|
"learning_rate": 2.57896727822962e-05,
|
|
"loss": 1.503375,
|
|
"step": 256000
|
|
},
|
|
{
|
|
"epoch": 48.609797616795916,
|
|
"learning_rate": 2.5695101191602044e-05,
|
|
"loss": 1.521125,
|
|
"step": 257000
|
|
},
|
|
{
|
|
"epoch": 48.798940798184226,
|
|
"learning_rate": 2.560052960090789e-05,
|
|
"loss": 1.5279375,
|
|
"step": 258000
|
|
},
|
|
{
|
|
"epoch": 48.98808397957254,
|
|
"learning_rate": 2.550595801021373e-05,
|
|
"loss": 1.5339375,
|
|
"step": 259000
|
|
},
|
|
{
|
|
"epoch": 49.17722716096085,
|
|
"learning_rate": 2.541138641951958e-05,
|
|
"loss": 1.4859375,
|
|
"step": 260000
|
|
},
|
|
{
|
|
"epoch": 49.36637034234916,
|
|
"learning_rate": 2.5316814828825425e-05,
|
|
"loss": 1.4956875,
|
|
"step": 261000
|
|
},
|
|
{
|
|
"epoch": 49.55551352373747,
|
|
"learning_rate": 2.5222243238131266e-05,
|
|
"loss": 1.50375,
|
|
"step": 262000
|
|
},
|
|
{
|
|
"epoch": 49.74465670512578,
|
|
"learning_rate": 2.512767164743711e-05,
|
|
"loss": 1.51525,
|
|
"step": 263000
|
|
},
|
|
{
|
|
"epoch": 49.933799886514095,
|
|
"learning_rate": 2.5033100056742953e-05,
|
|
"loss": 1.52084375,
|
|
"step": 264000
|
|
},
|
|
{
|
|
"epoch": 50.122943067902405,
|
|
"learning_rate": 2.4938528466048798e-05,
|
|
"loss": 1.49025,
|
|
"step": 265000
|
|
},
|
|
{
|
|
"epoch": 50.312086249290715,
|
|
"learning_rate": 2.4843956875354647e-05,
|
|
"loss": 1.48290625,
|
|
"step": 266000
|
|
},
|
|
{
|
|
"epoch": 50.501229430679025,
|
|
"learning_rate": 2.474938528466049e-05,
|
|
"loss": 1.49121875,
|
|
"step": 267000
|
|
},
|
|
{
|
|
"epoch": 50.690372612067335,
|
|
"learning_rate": 2.4654813693966334e-05,
|
|
"loss": 1.502875,
|
|
"step": 268000
|
|
},
|
|
{
|
|
"epoch": 50.879515793455646,
|
|
"learning_rate": 2.4560242103272175e-05,
|
|
"loss": 1.51284375,
|
|
"step": 269000
|
|
},
|
|
{
|
|
"epoch": 51.068658974843956,
|
|
"learning_rate": 2.4465670512578024e-05,
|
|
"loss": 1.4948125,
|
|
"step": 270000
|
|
},
|
|
{
|
|
"epoch": 51.257802156232266,
|
|
"learning_rate": 2.4371098921883865e-05,
|
|
"loss": 1.4699375,
|
|
"step": 271000
|
|
},
|
|
{
|
|
"epoch": 51.446945337620576,
|
|
"learning_rate": 2.427652733118971e-05,
|
|
"loss": 1.48165625,
|
|
"step": 272000
|
|
},
|
|
{
|
|
"epoch": 51.63608851900889,
|
|
"learning_rate": 2.4181955740495556e-05,
|
|
"loss": 1.491875,
|
|
"step": 273000
|
|
},
|
|
{
|
|
"epoch": 51.825231700397204,
|
|
"learning_rate": 2.40873841498014e-05,
|
|
"loss": 1.498875,
|
|
"step": 274000
|
|
},
|
|
{
|
|
"epoch": 52.014374881785514,
|
|
"learning_rate": 2.3992812559107246e-05,
|
|
"loss": 1.50025,
|
|
"step": 275000
|
|
},
|
|
{
|
|
"epoch": 52.203518063173824,
|
|
"learning_rate": 2.389824096841309e-05,
|
|
"loss": 1.45865625,
|
|
"step": 276000
|
|
},
|
|
{
|
|
"epoch": 52.392661244562134,
|
|
"learning_rate": 2.3803669377718936e-05,
|
|
"loss": 1.4689375,
|
|
"step": 277000
|
|
},
|
|
{
|
|
"epoch": 52.581804425950445,
|
|
"learning_rate": 2.3709097787024778e-05,
|
|
"loss": 1.4756875,
|
|
"step": 278000
|
|
},
|
|
{
|
|
"epoch": 52.770947607338755,
|
|
"learning_rate": 2.3614526196330626e-05,
|
|
"loss": 1.48815625,
|
|
"step": 279000
|
|
},
|
|
{
|
|
"epoch": 52.960090788727065,
|
|
"learning_rate": 2.3519954605636468e-05,
|
|
"loss": 1.4954375,
|
|
"step": 280000
|
|
},
|
|
{
|
|
"epoch": 53.149233970115375,
|
|
"learning_rate": 2.3425383014942313e-05,
|
|
"loss": 1.4575625,
|
|
"step": 281000
|
|
},
|
|
{
|
|
"epoch": 53.338377151503686,
|
|
"learning_rate": 2.3330811424248155e-05,
|
|
"loss": 1.4586875,
|
|
"step": 282000
|
|
},
|
|
{
|
|
"epoch": 53.527520332891996,
|
|
"learning_rate": 2.3236239833554003e-05,
|
|
"loss": 1.46946875,
|
|
"step": 283000
|
|
},
|
|
{
|
|
"epoch": 53.71666351428031,
|
|
"learning_rate": 2.3141668242859845e-05,
|
|
"loss": 1.47496875,
|
|
"step": 284000
|
|
},
|
|
{
|
|
"epoch": 53.90580669566862,
|
|
"learning_rate": 2.304709665216569e-05,
|
|
"loss": 1.48246875,
|
|
"step": 285000
|
|
},
|
|
{
|
|
"epoch": 54.09494987705693,
|
|
"learning_rate": 2.2952525061471535e-05,
|
|
"loss": 1.458375,
|
|
"step": 286000
|
|
},
|
|
{
|
|
"epoch": 54.284093058445244,
|
|
"learning_rate": 2.285795347077738e-05,
|
|
"loss": 1.44609375,
|
|
"step": 287000
|
|
},
|
|
{
|
|
"epoch": 54.473236239833554,
|
|
"learning_rate": 2.2763381880083222e-05,
|
|
"loss": 1.45584375,
|
|
"step": 288000
|
|
},
|
|
{
|
|
"epoch": 54.662379421221864,
|
|
"learning_rate": 2.266881028938907e-05,
|
|
"loss": 1.46325,
|
|
"step": 289000
|
|
},
|
|
{
|
|
"epoch": 54.851522602610174,
|
|
"learning_rate": 2.2574238698694912e-05,
|
|
"loss": 1.475625,
|
|
"step": 290000
|
|
},
|
|
{
|
|
"epoch": 55.040665783998485,
|
|
"learning_rate": 2.2479667108000757e-05,
|
|
"loss": 1.46834375,
|
|
"step": 291000
|
|
},
|
|
{
|
|
"epoch": 55.229808965386795,
|
|
"learning_rate": 2.2385095517306602e-05,
|
|
"loss": 1.43265625,
|
|
"step": 292000
|
|
},
|
|
{
|
|
"epoch": 55.41895214677511,
|
|
"learning_rate": 2.2290523926612447e-05,
|
|
"loss": 1.4436875,
|
|
"step": 293000
|
|
},
|
|
{
|
|
"epoch": 55.60809532816342,
|
|
"learning_rate": 2.2195952335918292e-05,
|
|
"loss": 1.455,
|
|
"step": 294000
|
|
},
|
|
{
|
|
"epoch": 55.79723850955173,
|
|
"learning_rate": 2.2101380745224134e-05,
|
|
"loss": 1.46475,
|
|
"step": 295000
|
|
},
|
|
{
|
|
"epoch": 55.98638169094004,
|
|
"learning_rate": 2.2006809154529982e-05,
|
|
"loss": 1.46825,
|
|
"step": 296000
|
|
},
|
|
{
|
|
"epoch": 56.17552487232835,
|
|
"learning_rate": 2.1912237563835824e-05,
|
|
"loss": 1.4260625,
|
|
"step": 297000
|
|
},
|
|
{
|
|
"epoch": 56.36466805371666,
|
|
"learning_rate": 2.181766597314167e-05,
|
|
"loss": 1.43375,
|
|
"step": 298000
|
|
},
|
|
{
|
|
"epoch": 56.55381123510497,
|
|
"learning_rate": 2.1723094382447514e-05,
|
|
"loss": 1.444375,
|
|
"step": 299000
|
|
},
|
|
{
|
|
"epoch": 56.74295441649328,
|
|
"learning_rate": 2.162852279175336e-05,
|
|
"loss": 1.4503125,
|
|
"step": 300000
|
|
},
|
|
{
|
|
"epoch": 56.932097597881594,
|
|
"learning_rate": 2.15339512010592e-05,
|
|
"loss": 1.4611875,
|
|
"step": 301000
|
|
},
|
|
{
|
|
"epoch": 57.121240779269904,
|
|
"learning_rate": 2.143937961036505e-05,
|
|
"loss": 1.4291875,
|
|
"step": 302000
|
|
},
|
|
{
|
|
"epoch": 57.31038396065822,
|
|
"learning_rate": 2.134480801967089e-05,
|
|
"loss": 1.422,
|
|
"step": 303000
|
|
},
|
|
{
|
|
"epoch": 57.49952714204653,
|
|
"learning_rate": 2.1250236428976736e-05,
|
|
"loss": 1.434625,
|
|
"step": 304000
|
|
},
|
|
{
|
|
"epoch": 57.68867032343484,
|
|
"learning_rate": 2.115566483828258e-05,
|
|
"loss": 1.437625,
|
|
"step": 305000
|
|
},
|
|
{
|
|
"epoch": 57.87781350482315,
|
|
"learning_rate": 2.1061093247588427e-05,
|
|
"loss": 1.4495625,
|
|
"step": 306000
|
|
},
|
|
{
|
|
"epoch": 58.06695668621146,
|
|
"learning_rate": 2.0966521656894268e-05,
|
|
"loss": 1.440125,
|
|
"step": 307000
|
|
},
|
|
{
|
|
"epoch": 58.25609986759977,
|
|
"learning_rate": 2.0871950066200113e-05,
|
|
"loss": 1.413375,
|
|
"step": 308000
|
|
},
|
|
{
|
|
"epoch": 58.44524304898808,
|
|
"learning_rate": 2.077737847550596e-05,
|
|
"loss": 1.4220625,
|
|
"step": 309000
|
|
},
|
|
{
|
|
"epoch": 58.63438623037639,
|
|
"learning_rate": 2.0682806884811804e-05,
|
|
"loss": 1.4315,
|
|
"step": 310000
|
|
},
|
|
{
|
|
"epoch": 58.8235294117647,
|
|
"learning_rate": 2.058823529411765e-05,
|
|
"loss": 1.4355625,
|
|
"step": 311000
|
|
},
|
|
{
|
|
"epoch": 59.01267259315302,
|
|
"learning_rate": 2.0493663703423494e-05,
|
|
"loss": 1.440125,
|
|
"step": 312000
|
|
},
|
|
{
|
|
"epoch": 59.20181577454133,
|
|
"learning_rate": 2.039909211272934e-05,
|
|
"loss": 1.4,
|
|
"step": 313000
|
|
},
|
|
{
|
|
"epoch": 59.39095895592964,
|
|
"learning_rate": 2.030452052203518e-05,
|
|
"loss": 1.4098125,
|
|
"step": 314000
|
|
},
|
|
{
|
|
"epoch": 59.58010213731795,
|
|
"learning_rate": 2.0209948931341026e-05,
|
|
"loss": 1.4241875,
|
|
"step": 315000
|
|
},
|
|
{
|
|
"epoch": 59.76924531870626,
|
|
"learning_rate": 2.011537734064687e-05,
|
|
"loss": 1.43,
|
|
"step": 316000
|
|
},
|
|
{
|
|
"epoch": 59.95838850009457,
|
|
"learning_rate": 2.0020805749952716e-05,
|
|
"loss": 1.43575,
|
|
"step": 317000
|
|
},
|
|
{
|
|
"epoch": 60.14753168148288,
|
|
"learning_rate": 1.9926234159258557e-05,
|
|
"loss": 1.402625,
|
|
"step": 318000
|
|
},
|
|
{
|
|
"epoch": 60.33667486287119,
|
|
"learning_rate": 1.9831662568564406e-05,
|
|
"loss": 1.4025,
|
|
"step": 319000
|
|
},
|
|
{
|
|
"epoch": 60.5258180442595,
|
|
"learning_rate": 1.9737090977870248e-05,
|
|
"loss": 1.413,
|
|
"step": 320000
|
|
},
|
|
{
|
|
"epoch": 60.71496122564781,
|
|
"learning_rate": 1.9642519387176093e-05,
|
|
"loss": 1.4200625,
|
|
"step": 321000
|
|
},
|
|
{
|
|
"epoch": 60.90410440703613,
|
|
"learning_rate": 1.9547947796481938e-05,
|
|
"loss": 1.4270625,
|
|
"step": 322000
|
|
},
|
|
{
|
|
"epoch": 61.09324758842444,
|
|
"learning_rate": 1.9453376205787783e-05,
|
|
"loss": 1.4055625,
|
|
"step": 323000
|
|
},
|
|
{
|
|
"epoch": 61.28239076981275,
|
|
"learning_rate": 1.9358804615093625e-05,
|
|
"loss": 1.3949375,
|
|
"step": 324000
|
|
},
|
|
{
|
|
"epoch": 61.47153395120106,
|
|
"learning_rate": 1.9264233024399473e-05,
|
|
"loss": 1.4008125,
|
|
"step": 325000
|
|
},
|
|
{
|
|
"epoch": 61.66067713258937,
|
|
"learning_rate": 1.9169661433705315e-05,
|
|
"loss": 1.4103125,
|
|
"step": 326000
|
|
},
|
|
{
|
|
"epoch": 61.84982031397768,
|
|
"learning_rate": 1.907508984301116e-05,
|
|
"loss": 1.41725,
|
|
"step": 327000
|
|
},
|
|
{
|
|
"epoch": 62.03896349536599,
|
|
"learning_rate": 1.8980518252317005e-05,
|
|
"loss": 1.4091875,
|
|
"step": 328000
|
|
},
|
|
{
|
|
"epoch": 62.2281066767543,
|
|
"learning_rate": 1.888594666162285e-05,
|
|
"loss": 1.382125,
|
|
"step": 329000
|
|
},
|
|
{
|
|
"epoch": 62.41724985814261,
|
|
"learning_rate": 1.8791375070928692e-05,
|
|
"loss": 1.39175,
|
|
"step": 330000
|
|
},
|
|
{
|
|
"epoch": 62.60639303953093,
|
|
"learning_rate": 1.8696803480234537e-05,
|
|
"loss": 1.3970625,
|
|
"step": 331000
|
|
},
|
|
{
|
|
"epoch": 62.79553622091924,
|
|
"learning_rate": 1.8602231889540382e-05,
|
|
"loss": 1.409375,
|
|
"step": 332000
|
|
},
|
|
{
|
|
"epoch": 62.98467940230755,
|
|
"learning_rate": 1.8507660298846227e-05,
|
|
"loss": 1.4141875,
|
|
"step": 333000
|
|
},
|
|
{
|
|
"epoch": 63.17382258369586,
|
|
"learning_rate": 1.8413088708152072e-05,
|
|
"loss": 1.371875,
|
|
"step": 334000
|
|
},
|
|
{
|
|
"epoch": 63.36296576508417,
|
|
"learning_rate": 1.8318517117457917e-05,
|
|
"loss": 1.382125,
|
|
"step": 335000
|
|
},
|
|
{
|
|
"epoch": 63.55210894647248,
|
|
"learning_rate": 1.8223945526763762e-05,
|
|
"loss": 1.3925,
|
|
"step": 336000
|
|
},
|
|
{
|
|
"epoch": 63.74125212786079,
|
|
"learning_rate": 1.8129373936069604e-05,
|
|
"loss": 1.3989375,
|
|
"step": 337000
|
|
},
|
|
{
|
|
"epoch": 63.9303953092491,
|
|
"learning_rate": 1.8034802345375452e-05,
|
|
"loss": 1.4040625,
|
|
"step": 338000
|
|
},
|
|
{
|
|
"epoch": 64.11953849063741,
|
|
"learning_rate": 1.7940230754681294e-05,
|
|
"loss": 1.3838125,
|
|
"step": 339000
|
|
},
|
|
{
|
|
"epoch": 64.30868167202573,
|
|
"learning_rate": 1.784565916398714e-05,
|
|
"loss": 1.3745625,
|
|
"step": 340000
|
|
},
|
|
{
|
|
"epoch": 64.49782485341403,
|
|
"learning_rate": 1.7751087573292984e-05,
|
|
"loss": 1.381875,
|
|
"step": 341000
|
|
},
|
|
{
|
|
"epoch": 64.68696803480235,
|
|
"learning_rate": 1.765651598259883e-05,
|
|
"loss": 1.3859375,
|
|
"step": 342000
|
|
},
|
|
{
|
|
"epoch": 64.87611121619065,
|
|
"learning_rate": 1.756194439190467e-05,
|
|
"loss": 1.4001875,
|
|
"step": 343000
|
|
},
|
|
{
|
|
"epoch": 65.06525439757897,
|
|
"learning_rate": 1.7467372801210516e-05,
|
|
"loss": 1.387,
|
|
"step": 344000
|
|
},
|
|
{
|
|
"epoch": 65.25439757896727,
|
|
"learning_rate": 1.737280121051636e-05,
|
|
"loss": 1.3645625,
|
|
"step": 345000
|
|
},
|
|
{
|
|
"epoch": 65.44354076035559,
|
|
"learning_rate": 1.7278229619822206e-05,
|
|
"loss": 1.373375,
|
|
"step": 346000
|
|
},
|
|
{
|
|
"epoch": 65.6326839417439,
|
|
"learning_rate": 1.7183658029128048e-05,
|
|
"loss": 1.384,
|
|
"step": 347000
|
|
},
|
|
{
|
|
"epoch": 65.82182712313221,
|
|
"learning_rate": 1.7089086438433897e-05,
|
|
"loss": 1.386875,
|
|
"step": 348000
|
|
},
|
|
{
|
|
"epoch": 66.01097030452053,
|
|
"learning_rate": 1.6994514847739738e-05,
|
|
"loss": 1.3924375,
|
|
"step": 349000
|
|
},
|
|
{
|
|
"epoch": 66.20011348590883,
|
|
"learning_rate": 1.6899943257045583e-05,
|
|
"loss": 1.357375,
|
|
"step": 350000
|
|
},
|
|
{
|
|
"epoch": 66.38925666729715,
|
|
"learning_rate": 1.680537166635143e-05,
|
|
"loss": 1.3655625,
|
|
"step": 351000
|
|
},
|
|
{
|
|
"epoch": 66.57839984868545,
|
|
"learning_rate": 1.6710800075657274e-05,
|
|
"loss": 1.3670625,
|
|
"step": 352000
|
|
},
|
|
{
|
|
"epoch": 66.76754303007377,
|
|
"learning_rate": 1.661622848496312e-05,
|
|
"loss": 1.3799375,
|
|
"step": 353000
|
|
},
|
|
{
|
|
"epoch": 66.95668621146207,
|
|
"learning_rate": 1.6521656894268964e-05,
|
|
"loss": 1.3880625,
|
|
"step": 354000
|
|
},
|
|
{
|
|
"epoch": 67.14582939285039,
|
|
"learning_rate": 1.642708530357481e-05,
|
|
"loss": 1.3585,
|
|
"step": 355000
|
|
},
|
|
{
|
|
"epoch": 67.3349725742387,
|
|
"learning_rate": 1.633251371288065e-05,
|
|
"loss": 1.3579375,
|
|
"step": 356000
|
|
},
|
|
{
|
|
"epoch": 67.52411575562701,
|
|
"learning_rate": 1.6237942122186496e-05,
|
|
"loss": 1.3641875,
|
|
"step": 357000
|
|
},
|
|
{
|
|
"epoch": 67.71325893701533,
|
|
"learning_rate": 1.614337053149234e-05,
|
|
"loss": 1.3701875,
|
|
"step": 358000
|
|
},
|
|
{
|
|
"epoch": 67.90240211840363,
|
|
"learning_rate": 1.6048798940798186e-05,
|
|
"loss": 1.375875,
|
|
"step": 359000
|
|
},
|
|
{
|
|
"epoch": 68.09154529979195,
|
|
"learning_rate": 1.5954227350104027e-05,
|
|
"loss": 1.359,
|
|
"step": 360000
|
|
},
|
|
{
|
|
"epoch": 68.28068848118025,
|
|
"learning_rate": 1.5859655759409876e-05,
|
|
"loss": 1.3483125,
|
|
"step": 361000
|
|
},
|
|
{
|
|
"epoch": 68.46983166256857,
|
|
"learning_rate": 1.5765084168715718e-05,
|
|
"loss": 1.3565,
|
|
"step": 362000
|
|
},
|
|
{
|
|
"epoch": 68.65897484395687,
|
|
"learning_rate": 1.5670512578021563e-05,
|
|
"loss": 1.362125,
|
|
"step": 363000
|
|
},
|
|
{
|
|
"epoch": 68.84811802534519,
|
|
"learning_rate": 1.5575940987327408e-05,
|
|
"loss": 1.36725,
|
|
"step": 364000
|
|
},
|
|
{
|
|
"epoch": 69.0372612067335,
|
|
"learning_rate": 1.5481369396633253e-05,
|
|
"loss": 1.3636875,
|
|
"step": 365000
|
|
},
|
|
{
|
|
"epoch": 69.22640438812181,
|
|
"learning_rate": 1.5386797805939095e-05,
|
|
"loss": 1.33725,
|
|
"step": 366000
|
|
},
|
|
{
|
|
"epoch": 69.41554756951012,
|
|
"learning_rate": 1.5292226215244943e-05,
|
|
"loss": 1.3505,
|
|
"step": 367000
|
|
},
|
|
{
|
|
"epoch": 69.60469075089843,
|
|
"learning_rate": 1.5197654624550786e-05,
|
|
"loss": 1.3563125,
|
|
"step": 368000
|
|
},
|
|
{
|
|
"epoch": 69.79383393228674,
|
|
"learning_rate": 1.510308303385663e-05,
|
|
"loss": 1.3588125,
|
|
"step": 369000
|
|
},
|
|
{
|
|
"epoch": 69.98297711367505,
|
|
"learning_rate": 1.5008511443162473e-05,
|
|
"loss": 1.369125,
|
|
"step": 370000
|
|
},
|
|
{
|
|
"epoch": 70.17212029506337,
|
|
"learning_rate": 1.491393985246832e-05,
|
|
"loss": 1.3346875,
|
|
"step": 371000
|
|
},
|
|
{
|
|
"epoch": 70.36126347645167,
|
|
"learning_rate": 1.4819368261774163e-05,
|
|
"loss": 1.338,
|
|
"step": 372000
|
|
},
|
|
{
|
|
"epoch": 70.55040665783999,
|
|
"learning_rate": 1.4724796671080007e-05,
|
|
"loss": 1.3443125,
|
|
"step": 373000
|
|
},
|
|
{
|
|
"epoch": 70.73954983922829,
|
|
"learning_rate": 1.4630225080385854e-05,
|
|
"loss": 1.3544375,
|
|
"step": 374000
|
|
},
|
|
{
|
|
"epoch": 70.9286930206166,
|
|
"learning_rate": 1.4535653489691697e-05,
|
|
"loss": 1.3595625,
|
|
"step": 375000
|
|
},
|
|
{
|
|
"epoch": 71.11783620200492,
|
|
"learning_rate": 1.444108189899754e-05,
|
|
"loss": 1.343875,
|
|
"step": 376000
|
|
},
|
|
{
|
|
"epoch": 71.30697938339323,
|
|
"learning_rate": 1.4346510308303387e-05,
|
|
"loss": 1.332625,
|
|
"step": 377000
|
|
},
|
|
{
|
|
"epoch": 71.49612256478154,
|
|
"learning_rate": 1.425193871760923e-05,
|
|
"loss": 1.3395625,
|
|
"step": 378000
|
|
},
|
|
{
|
|
"epoch": 71.68526574616985,
|
|
"learning_rate": 1.4157367126915074e-05,
|
|
"loss": 1.344125,
|
|
"step": 379000
|
|
},
|
|
{
|
|
"epoch": 71.87440892755816,
|
|
"learning_rate": 1.406279553622092e-05,
|
|
"loss": 1.3505625,
|
|
"step": 380000
|
|
},
|
|
{
|
|
"epoch": 72.06355210894647,
|
|
"learning_rate": 1.3968223945526764e-05,
|
|
"loss": 1.3428125,
|
|
"step": 381000
|
|
},
|
|
{
|
|
"epoch": 72.25269529033478,
|
|
"learning_rate": 1.387365235483261e-05,
|
|
"loss": 1.324875,
|
|
"step": 382000
|
|
},
|
|
{
|
|
"epoch": 72.44183847172309,
|
|
"learning_rate": 1.3779080764138453e-05,
|
|
"loss": 1.332125,
|
|
"step": 383000
|
|
},
|
|
{
|
|
"epoch": 72.6309816531114,
|
|
"learning_rate": 1.36845091734443e-05,
|
|
"loss": 1.339,
|
|
"step": 384000
|
|
},
|
|
{
|
|
"epoch": 72.82012483449972,
|
|
"learning_rate": 1.3589937582750143e-05,
|
|
"loss": 1.3420625,
|
|
"step": 385000
|
|
},
|
|
{
|
|
"epoch": 73.00926801588803,
|
|
"learning_rate": 1.3495365992055986e-05,
|
|
"loss": 1.342875,
|
|
"step": 386000
|
|
},
|
|
{
|
|
"epoch": 73.19841119727634,
|
|
"learning_rate": 1.3400794401361833e-05,
|
|
"loss": 1.3198125,
|
|
"step": 387000
|
|
},
|
|
{
|
|
"epoch": 73.38755437866465,
|
|
"learning_rate": 1.3306222810667676e-05,
|
|
"loss": 1.3253125,
|
|
"step": 388000
|
|
},
|
|
{
|
|
"epoch": 73.57669756005296,
|
|
"learning_rate": 1.321165121997352e-05,
|
|
"loss": 1.328875,
|
|
"step": 389000
|
|
},
|
|
{
|
|
"epoch": 73.76584074144127,
|
|
"learning_rate": 1.3117079629279367e-05,
|
|
"loss": 1.335875,
|
|
"step": 390000
|
|
},
|
|
{
|
|
"epoch": 73.95498392282958,
|
|
"learning_rate": 1.302250803858521e-05,
|
|
"loss": 1.3428125,
|
|
"step": 391000
|
|
},
|
|
{
|
|
"epoch": 74.14412710421789,
|
|
"learning_rate": 1.2927936447891053e-05,
|
|
"loss": 1.3216875,
|
|
"step": 392000
|
|
},
|
|
{
|
|
"epoch": 74.3332702856062,
|
|
"learning_rate": 1.2833364857196897e-05,
|
|
"loss": 1.3190625,
|
|
"step": 393000
|
|
},
|
|
{
|
|
"epoch": 74.52241346699452,
|
|
"learning_rate": 1.2738793266502744e-05,
|
|
"loss": 1.3250625,
|
|
"step": 394000
|
|
},
|
|
{
|
|
"epoch": 74.71155664838282,
|
|
"learning_rate": 1.2644221675808587e-05,
|
|
"loss": 1.3285625,
|
|
"step": 395000
|
|
},
|
|
{
|
|
"epoch": 74.90069982977114,
|
|
"learning_rate": 1.254965008511443e-05,
|
|
"loss": 1.3365625,
|
|
"step": 396000
|
|
},
|
|
{
|
|
"epoch": 75.08984301115945,
|
|
"learning_rate": 1.2455078494420275e-05,
|
|
"loss": 1.3183125,
|
|
"step": 397000
|
|
},
|
|
{
|
|
"epoch": 75.27898619254776,
|
|
"learning_rate": 1.236050690372612e-05,
|
|
"loss": 1.309375,
|
|
"step": 398000
|
|
},
|
|
{
|
|
"epoch": 75.46812937393607,
|
|
"learning_rate": 1.2265935313031966e-05,
|
|
"loss": 1.31425,
|
|
"step": 399000
|
|
},
|
|
{
|
|
"epoch": 75.65727255532438,
|
|
"learning_rate": 1.217136372233781e-05,
|
|
"loss": 1.32575,
|
|
"step": 400000
|
|
},
|
|
{
|
|
"epoch": 75.84641573671269,
|
|
"learning_rate": 1.2076792131643656e-05,
|
|
"loss": 1.330375,
|
|
"step": 401000
|
|
},
|
|
{
|
|
"epoch": 76.035558918101,
|
|
"learning_rate": 1.1982220540949501e-05,
|
|
"loss": 1.3319375,
|
|
"step": 402000
|
|
},
|
|
{
|
|
"epoch": 76.22470209948932,
|
|
"learning_rate": 1.1887648950255344e-05,
|
|
"loss": 1.2995625,
|
|
"step": 403000
|
|
},
|
|
{
|
|
"epoch": 76.41384528087762,
|
|
"learning_rate": 1.179307735956119e-05,
|
|
"loss": 1.309,
|
|
"step": 404000
|
|
},
|
|
{
|
|
"epoch": 76.60298846226594,
|
|
"learning_rate": 1.1698505768867033e-05,
|
|
"loss": 1.3225,
|
|
"step": 405000
|
|
}
|
|
],
|
|
"max_steps": 528700,
|
|
"num_train_epochs": 100,
|
|
"total_flos": 619270336176979968,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|