m2m100_418M_en_zul_rel / trainer_state.json
Davlan's picture
add MT model
eafb3f7
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 199170,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.98744790882161e-05,
"loss": 3.7619,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 4.97489581764322e-05,
"loss": 2.8947,
"step": 1000
},
{
"epoch": 0.02,
"learning_rate": 4.962343726464829e-05,
"loss": 2.5182,
"step": 1500
},
{
"epoch": 0.03,
"learning_rate": 4.949791635286439e-05,
"loss": 2.3038,
"step": 2000
},
{
"epoch": 0.04,
"learning_rate": 4.937239544108049e-05,
"loss": 2.1453,
"step": 2500
},
{
"epoch": 0.05,
"learning_rate": 4.9246874529296584e-05,
"loss": 2.0297,
"step": 3000
},
{
"epoch": 0.05,
"learning_rate": 4.912135361751268e-05,
"loss": 1.9309,
"step": 3500
},
{
"epoch": 0.06,
"learning_rate": 4.899583270572878e-05,
"loss": 1.8563,
"step": 4000
},
{
"epoch": 0.07,
"learning_rate": 4.8870311793944875e-05,
"loss": 1.8049,
"step": 4500
},
{
"epoch": 0.08,
"learning_rate": 4.874479088216097e-05,
"loss": 1.7541,
"step": 5000
},
{
"epoch": 0.08,
"learning_rate": 4.861926997037707e-05,
"loss": 1.7037,
"step": 5500
},
{
"epoch": 0.09,
"learning_rate": 4.8493749058593166e-05,
"loss": 1.6645,
"step": 6000
},
{
"epoch": 0.1,
"learning_rate": 4.836822814680926e-05,
"loss": 1.621,
"step": 6500
},
{
"epoch": 0.11,
"learning_rate": 4.824270723502536e-05,
"loss": 1.6029,
"step": 7000
},
{
"epoch": 0.11,
"learning_rate": 4.8117186323241456e-05,
"loss": 1.5655,
"step": 7500
},
{
"epoch": 0.12,
"learning_rate": 4.799166541145755e-05,
"loss": 1.5463,
"step": 8000
},
{
"epoch": 0.13,
"learning_rate": 4.7866144499673646e-05,
"loss": 1.523,
"step": 8500
},
{
"epoch": 0.14,
"learning_rate": 4.774062358788974e-05,
"loss": 1.4924,
"step": 9000
},
{
"epoch": 0.14,
"learning_rate": 4.761510267610584e-05,
"loss": 1.4706,
"step": 9500
},
{
"epoch": 0.15,
"learning_rate": 4.7489581764321937e-05,
"loss": 1.4786,
"step": 10000
},
{
"epoch": 0.16,
"learning_rate": 4.736406085253803e-05,
"loss": 1.4326,
"step": 10500
},
{
"epoch": 0.17,
"learning_rate": 4.723853994075413e-05,
"loss": 1.4092,
"step": 11000
},
{
"epoch": 0.17,
"learning_rate": 4.711301902897023e-05,
"loss": 1.4245,
"step": 11500
},
{
"epoch": 0.18,
"learning_rate": 4.698749811718632e-05,
"loss": 1.4063,
"step": 12000
},
{
"epoch": 0.19,
"learning_rate": 4.6861977205402423e-05,
"loss": 1.3734,
"step": 12500
},
{
"epoch": 0.2,
"learning_rate": 4.673645629361852e-05,
"loss": 1.3613,
"step": 13000
},
{
"epoch": 0.2,
"learning_rate": 4.661093538183461e-05,
"loss": 1.3539,
"step": 13500
},
{
"epoch": 0.21,
"learning_rate": 4.6485414470050714e-05,
"loss": 1.3356,
"step": 14000
},
{
"epoch": 0.22,
"learning_rate": 4.635989355826681e-05,
"loss": 1.358,
"step": 14500
},
{
"epoch": 0.23,
"learning_rate": 4.6234372646482904e-05,
"loss": 1.311,
"step": 15000
},
{
"epoch": 0.23,
"learning_rate": 4.6108851734699005e-05,
"loss": 1.3026,
"step": 15500
},
{
"epoch": 0.24,
"learning_rate": 4.59833308229151e-05,
"loss": 1.308,
"step": 16000
},
{
"epoch": 0.25,
"learning_rate": 4.5857809911131194e-05,
"loss": 1.2758,
"step": 16500
},
{
"epoch": 0.26,
"learning_rate": 4.5732288999347296e-05,
"loss": 1.2894,
"step": 17000
},
{
"epoch": 0.26,
"learning_rate": 4.560676808756339e-05,
"loss": 1.2631,
"step": 17500
},
{
"epoch": 0.27,
"learning_rate": 4.5481247175779485e-05,
"loss": 1.2737,
"step": 18000
},
{
"epoch": 0.28,
"learning_rate": 4.535572626399559e-05,
"loss": 1.2568,
"step": 18500
},
{
"epoch": 0.29,
"learning_rate": 4.523020535221168e-05,
"loss": 1.2442,
"step": 19000
},
{
"epoch": 0.29,
"learning_rate": 4.510468444042778e-05,
"loss": 1.2455,
"step": 19500
},
{
"epoch": 0.3,
"learning_rate": 4.497916352864388e-05,
"loss": 1.244,
"step": 20000
},
{
"epoch": 0.31,
"learning_rate": 4.485364261685997e-05,
"loss": 1.23,
"step": 20500
},
{
"epoch": 0.32,
"learning_rate": 4.4728121705076074e-05,
"loss": 1.2089,
"step": 21000
},
{
"epoch": 0.32,
"learning_rate": 4.460260079329217e-05,
"loss": 1.2161,
"step": 21500
},
{
"epoch": 0.33,
"learning_rate": 4.447707988150826e-05,
"loss": 1.217,
"step": 22000
},
{
"epoch": 0.34,
"learning_rate": 4.435155896972436e-05,
"loss": 1.1856,
"step": 22500
},
{
"epoch": 0.35,
"learning_rate": 4.422603805794045e-05,
"loss": 1.1964,
"step": 23000
},
{
"epoch": 0.35,
"learning_rate": 4.410051714615655e-05,
"loss": 1.1793,
"step": 23500
},
{
"epoch": 0.36,
"learning_rate": 4.397499623437265e-05,
"loss": 1.1949,
"step": 24000
},
{
"epoch": 0.37,
"learning_rate": 4.384947532258874e-05,
"loss": 1.1823,
"step": 24500
},
{
"epoch": 0.38,
"learning_rate": 4.372395441080484e-05,
"loss": 1.1872,
"step": 25000
},
{
"epoch": 0.38,
"learning_rate": 4.359843349902094e-05,
"loss": 1.1712,
"step": 25500
},
{
"epoch": 0.39,
"learning_rate": 4.3472912587237034e-05,
"loss": 1.1792,
"step": 26000
},
{
"epoch": 0.4,
"learning_rate": 4.334739167545313e-05,
"loss": 1.1628,
"step": 26500
},
{
"epoch": 0.41,
"learning_rate": 4.322187076366923e-05,
"loss": 1.1464,
"step": 27000
},
{
"epoch": 0.41,
"learning_rate": 4.3096349851885325e-05,
"loss": 1.1668,
"step": 27500
},
{
"epoch": 0.42,
"learning_rate": 4.297082894010142e-05,
"loss": 1.1377,
"step": 28000
},
{
"epoch": 0.43,
"learning_rate": 4.284530802831752e-05,
"loss": 1.1426,
"step": 28500
},
{
"epoch": 0.44,
"learning_rate": 4.2719787116533615e-05,
"loss": 1.1347,
"step": 29000
},
{
"epoch": 0.44,
"learning_rate": 4.259426620474971e-05,
"loss": 1.1226,
"step": 29500
},
{
"epoch": 0.45,
"learning_rate": 4.246874529296581e-05,
"loss": 1.1094,
"step": 30000
},
{
"epoch": 0.46,
"learning_rate": 4.2343224381181906e-05,
"loss": 1.1204,
"step": 30500
},
{
"epoch": 0.47,
"learning_rate": 4.2217703469398e-05,
"loss": 1.1171,
"step": 31000
},
{
"epoch": 0.47,
"learning_rate": 4.20921825576141e-05,
"loss": 1.1354,
"step": 31500
},
{
"epoch": 0.48,
"learning_rate": 4.19666616458302e-05,
"loss": 1.0998,
"step": 32000
},
{
"epoch": 0.49,
"learning_rate": 4.18411407340463e-05,
"loss": 1.0948,
"step": 32500
},
{
"epoch": 0.5,
"learning_rate": 4.171561982226239e-05,
"loss": 1.0967,
"step": 33000
},
{
"epoch": 0.5,
"learning_rate": 4.159009891047849e-05,
"loss": 1.0943,
"step": 33500
},
{
"epoch": 0.51,
"learning_rate": 4.146457799869459e-05,
"loss": 1.1228,
"step": 34000
},
{
"epoch": 0.52,
"learning_rate": 4.1339057086910684e-05,
"loss": 1.0944,
"step": 34500
},
{
"epoch": 0.53,
"learning_rate": 4.121353617512678e-05,
"loss": 1.0928,
"step": 35000
},
{
"epoch": 0.53,
"learning_rate": 4.108801526334288e-05,
"loss": 1.1025,
"step": 35500
},
{
"epoch": 0.54,
"learning_rate": 4.0962494351558975e-05,
"loss": 1.0873,
"step": 36000
},
{
"epoch": 0.55,
"learning_rate": 4.083697343977507e-05,
"loss": 1.0977,
"step": 36500
},
{
"epoch": 0.56,
"learning_rate": 4.0711452527991164e-05,
"loss": 1.0785,
"step": 37000
},
{
"epoch": 0.56,
"learning_rate": 4.0585931616207266e-05,
"loss": 1.0754,
"step": 37500
},
{
"epoch": 0.57,
"learning_rate": 4.046041070442336e-05,
"loss": 1.0702,
"step": 38000
},
{
"epoch": 0.58,
"learning_rate": 4.0334889792639455e-05,
"loss": 1.0656,
"step": 38500
},
{
"epoch": 0.59,
"learning_rate": 4.020936888085555e-05,
"loss": 1.0817,
"step": 39000
},
{
"epoch": 0.59,
"learning_rate": 4.0083847969071644e-05,
"loss": 1.0647,
"step": 39500
},
{
"epoch": 0.6,
"learning_rate": 3.9958327057287746e-05,
"loss": 1.0403,
"step": 40000
},
{
"epoch": 0.61,
"learning_rate": 3.983280614550384e-05,
"loss": 1.052,
"step": 40500
},
{
"epoch": 0.62,
"learning_rate": 3.9707285233719935e-05,
"loss": 1.0472,
"step": 41000
},
{
"epoch": 0.63,
"learning_rate": 3.9581764321936036e-05,
"loss": 1.0509,
"step": 41500
},
{
"epoch": 0.63,
"learning_rate": 3.945624341015213e-05,
"loss": 1.0534,
"step": 42000
},
{
"epoch": 0.64,
"learning_rate": 3.9330722498368226e-05,
"loss": 1.0461,
"step": 42500
},
{
"epoch": 0.65,
"learning_rate": 3.920520158658433e-05,
"loss": 1.0376,
"step": 43000
},
{
"epoch": 0.66,
"learning_rate": 3.907968067480042e-05,
"loss": 1.0298,
"step": 43500
},
{
"epoch": 0.66,
"learning_rate": 3.895415976301652e-05,
"loss": 1.0333,
"step": 44000
},
{
"epoch": 0.67,
"learning_rate": 3.882863885123262e-05,
"loss": 1.0267,
"step": 44500
},
{
"epoch": 0.68,
"learning_rate": 3.870311793944871e-05,
"loss": 1.0263,
"step": 45000
},
{
"epoch": 0.69,
"learning_rate": 3.8577597027664814e-05,
"loss": 1.0276,
"step": 45500
},
{
"epoch": 0.69,
"learning_rate": 3.845207611588091e-05,
"loss": 1.0267,
"step": 46000
},
{
"epoch": 0.7,
"learning_rate": 3.8326555204097004e-05,
"loss": 1.0242,
"step": 46500
},
{
"epoch": 0.71,
"learning_rate": 3.8201034292313105e-05,
"loss": 1.0191,
"step": 47000
},
{
"epoch": 0.72,
"learning_rate": 3.80755133805292e-05,
"loss": 1.0134,
"step": 47500
},
{
"epoch": 0.72,
"learning_rate": 3.7949992468745294e-05,
"loss": 1.0165,
"step": 48000
},
{
"epoch": 0.73,
"learning_rate": 3.7824471556961396e-05,
"loss": 1.0201,
"step": 48500
},
{
"epoch": 0.74,
"learning_rate": 3.769895064517749e-05,
"loss": 1.0201,
"step": 49000
},
{
"epoch": 0.75,
"learning_rate": 3.7573429733393585e-05,
"loss": 1.02,
"step": 49500
},
{
"epoch": 0.75,
"learning_rate": 3.7447908821609687e-05,
"loss": 1.0144,
"step": 50000
},
{
"epoch": 0.76,
"learning_rate": 3.732238790982578e-05,
"loss": 0.9911,
"step": 50500
},
{
"epoch": 0.77,
"learning_rate": 3.7196866998041876e-05,
"loss": 0.9944,
"step": 51000
},
{
"epoch": 0.78,
"learning_rate": 3.707134608625798e-05,
"loss": 0.9963,
"step": 51500
},
{
"epoch": 0.78,
"learning_rate": 3.694582517447407e-05,
"loss": 0.9875,
"step": 52000
},
{
"epoch": 0.79,
"learning_rate": 3.682030426269017e-05,
"loss": 1.0059,
"step": 52500
},
{
"epoch": 0.8,
"learning_rate": 3.669478335090626e-05,
"loss": 0.9848,
"step": 53000
},
{
"epoch": 0.81,
"learning_rate": 3.6569262439122356e-05,
"loss": 0.9947,
"step": 53500
},
{
"epoch": 0.81,
"learning_rate": 3.644374152733845e-05,
"loss": 0.9865,
"step": 54000
},
{
"epoch": 0.82,
"learning_rate": 3.631822061555455e-05,
"loss": 0.9884,
"step": 54500
},
{
"epoch": 0.83,
"learning_rate": 3.619269970377065e-05,
"loss": 0.9781,
"step": 55000
},
{
"epoch": 0.84,
"learning_rate": 3.606717879198674e-05,
"loss": 0.9934,
"step": 55500
},
{
"epoch": 0.84,
"learning_rate": 3.594165788020284e-05,
"loss": 0.9937,
"step": 56000
},
{
"epoch": 0.85,
"learning_rate": 3.581613696841894e-05,
"loss": 0.9714,
"step": 56500
},
{
"epoch": 0.86,
"learning_rate": 3.569061605663503e-05,
"loss": 0.9809,
"step": 57000
},
{
"epoch": 0.87,
"learning_rate": 3.5565095144851134e-05,
"loss": 0.9709,
"step": 57500
},
{
"epoch": 0.87,
"learning_rate": 3.543957423306723e-05,
"loss": 0.9656,
"step": 58000
},
{
"epoch": 0.88,
"learning_rate": 3.531405332128333e-05,
"loss": 0.9632,
"step": 58500
},
{
"epoch": 0.89,
"learning_rate": 3.5188532409499425e-05,
"loss": 0.9767,
"step": 59000
},
{
"epoch": 0.9,
"learning_rate": 3.506301149771552e-05,
"loss": 0.9669,
"step": 59500
},
{
"epoch": 0.9,
"learning_rate": 3.493749058593162e-05,
"loss": 0.9692,
"step": 60000
},
{
"epoch": 0.91,
"learning_rate": 3.4811969674147715e-05,
"loss": 0.97,
"step": 60500
},
{
"epoch": 0.92,
"learning_rate": 3.468644876236381e-05,
"loss": 0.9514,
"step": 61000
},
{
"epoch": 0.93,
"learning_rate": 3.456092785057991e-05,
"loss": 0.9421,
"step": 61500
},
{
"epoch": 0.93,
"learning_rate": 3.4435406938796006e-05,
"loss": 0.9734,
"step": 62000
},
{
"epoch": 0.94,
"learning_rate": 3.43098860270121e-05,
"loss": 0.9562,
"step": 62500
},
{
"epoch": 0.95,
"learning_rate": 3.41843651152282e-05,
"loss": 0.9625,
"step": 63000
},
{
"epoch": 0.96,
"learning_rate": 3.40588442034443e-05,
"loss": 0.9542,
"step": 63500
},
{
"epoch": 0.96,
"learning_rate": 3.393332329166039e-05,
"loss": 0.9497,
"step": 64000
},
{
"epoch": 0.97,
"learning_rate": 3.380780237987649e-05,
"loss": 0.9431,
"step": 64500
},
{
"epoch": 0.98,
"learning_rate": 3.368228146809259e-05,
"loss": 0.9456,
"step": 65000
},
{
"epoch": 0.99,
"learning_rate": 3.355676055630868e-05,
"loss": 0.9395,
"step": 65500
},
{
"epoch": 0.99,
"learning_rate": 3.3431239644524784e-05,
"loss": 0.9393,
"step": 66000
},
{
"epoch": 1.0,
"learning_rate": 3.330571873274088e-05,
"loss": 0.9243,
"step": 66500
},
{
"epoch": 1.01,
"learning_rate": 3.318019782095697e-05,
"loss": 0.8449,
"step": 67000
},
{
"epoch": 1.02,
"learning_rate": 3.305467690917307e-05,
"loss": 0.8542,
"step": 67500
},
{
"epoch": 1.02,
"learning_rate": 3.292915599738917e-05,
"loss": 0.8496,
"step": 68000
},
{
"epoch": 1.03,
"learning_rate": 3.2803635085605264e-05,
"loss": 0.8452,
"step": 68500
},
{
"epoch": 1.04,
"learning_rate": 3.267811417382136e-05,
"loss": 0.8361,
"step": 69000
},
{
"epoch": 1.05,
"learning_rate": 3.2552593262037453e-05,
"loss": 0.8441,
"step": 69500
},
{
"epoch": 1.05,
"learning_rate": 3.242707235025355e-05,
"loss": 0.8536,
"step": 70000
},
{
"epoch": 1.06,
"learning_rate": 3.230155143846965e-05,
"loss": 0.8338,
"step": 70500
},
{
"epoch": 1.07,
"learning_rate": 3.2176030526685744e-05,
"loss": 0.845,
"step": 71000
},
{
"epoch": 1.08,
"learning_rate": 3.2050509614901846e-05,
"loss": 0.8544,
"step": 71500
},
{
"epoch": 1.08,
"learning_rate": 3.192498870311794e-05,
"loss": 0.8487,
"step": 72000
},
{
"epoch": 1.09,
"learning_rate": 3.1799467791334035e-05,
"loss": 0.8273,
"step": 72500
},
{
"epoch": 1.1,
"learning_rate": 3.1673946879550136e-05,
"loss": 0.8581,
"step": 73000
},
{
"epoch": 1.11,
"learning_rate": 3.154842596776623e-05,
"loss": 0.8493,
"step": 73500
},
{
"epoch": 1.11,
"learning_rate": 3.1422905055982326e-05,
"loss": 0.8321,
"step": 74000
},
{
"epoch": 1.12,
"learning_rate": 3.129738414419843e-05,
"loss": 0.8404,
"step": 74500
},
{
"epoch": 1.13,
"learning_rate": 3.117186323241452e-05,
"loss": 0.8207,
"step": 75000
},
{
"epoch": 1.14,
"learning_rate": 3.104634232063062e-05,
"loss": 0.8439,
"step": 75500
},
{
"epoch": 1.14,
"learning_rate": 3.092082140884672e-05,
"loss": 0.8462,
"step": 76000
},
{
"epoch": 1.15,
"learning_rate": 3.079530049706281e-05,
"loss": 0.8441,
"step": 76500
},
{
"epoch": 1.16,
"learning_rate": 3.066977958527891e-05,
"loss": 0.8394,
"step": 77000
},
{
"epoch": 1.17,
"learning_rate": 3.054425867349501e-05,
"loss": 0.8441,
"step": 77500
},
{
"epoch": 1.17,
"learning_rate": 3.0418737761711104e-05,
"loss": 0.8462,
"step": 78000
},
{
"epoch": 1.18,
"learning_rate": 3.0293216849927198e-05,
"loss": 0.8392,
"step": 78500
},
{
"epoch": 1.19,
"learning_rate": 3.0167695938143296e-05,
"loss": 0.835,
"step": 79000
},
{
"epoch": 1.2,
"learning_rate": 3.004217502635939e-05,
"loss": 0.8544,
"step": 79500
},
{
"epoch": 1.21,
"learning_rate": 2.991665411457549e-05,
"loss": 0.8275,
"step": 80000
},
{
"epoch": 1.21,
"learning_rate": 2.9791133202791587e-05,
"loss": 0.8288,
"step": 80500
},
{
"epoch": 1.22,
"learning_rate": 2.9665612291007682e-05,
"loss": 0.838,
"step": 81000
},
{
"epoch": 1.23,
"learning_rate": 2.9540091379223776e-05,
"loss": 0.8369,
"step": 81500
},
{
"epoch": 1.24,
"learning_rate": 2.9414570467439878e-05,
"loss": 0.8309,
"step": 82000
},
{
"epoch": 1.24,
"learning_rate": 2.9289049555655973e-05,
"loss": 0.8313,
"step": 82500
},
{
"epoch": 1.25,
"learning_rate": 2.9163528643872067e-05,
"loss": 0.845,
"step": 83000
},
{
"epoch": 1.26,
"learning_rate": 2.903800773208817e-05,
"loss": 0.8348,
"step": 83500
},
{
"epoch": 1.27,
"learning_rate": 2.8912486820304263e-05,
"loss": 0.8231,
"step": 84000
},
{
"epoch": 1.27,
"learning_rate": 2.8786965908520365e-05,
"loss": 0.8286,
"step": 84500
},
{
"epoch": 1.28,
"learning_rate": 2.866144499673646e-05,
"loss": 0.8203,
"step": 85000
},
{
"epoch": 1.29,
"learning_rate": 2.8535924084952554e-05,
"loss": 0.8264,
"step": 85500
},
{
"epoch": 1.3,
"learning_rate": 2.8410403173168652e-05,
"loss": 0.8291,
"step": 86000
},
{
"epoch": 1.3,
"learning_rate": 2.828488226138475e-05,
"loss": 0.8222,
"step": 86500
},
{
"epoch": 1.31,
"learning_rate": 2.8159361349600845e-05,
"loss": 0.8357,
"step": 87000
},
{
"epoch": 1.32,
"learning_rate": 2.8033840437816943e-05,
"loss": 0.8327,
"step": 87500
},
{
"epoch": 1.33,
"learning_rate": 2.7908319526033038e-05,
"loss": 0.834,
"step": 88000
},
{
"epoch": 1.33,
"learning_rate": 2.7782798614249132e-05,
"loss": 0.8187,
"step": 88500
},
{
"epoch": 1.34,
"learning_rate": 2.7657277702465234e-05,
"loss": 0.8252,
"step": 89000
},
{
"epoch": 1.35,
"learning_rate": 2.753175679068133e-05,
"loss": 0.8313,
"step": 89500
},
{
"epoch": 1.36,
"learning_rate": 2.7406235878897423e-05,
"loss": 0.8255,
"step": 90000
},
{
"epoch": 1.36,
"learning_rate": 2.7280714967113525e-05,
"loss": 0.827,
"step": 90500
},
{
"epoch": 1.37,
"learning_rate": 2.715519405532962e-05,
"loss": 0.8251,
"step": 91000
},
{
"epoch": 1.38,
"learning_rate": 2.7029673143545714e-05,
"loss": 0.8173,
"step": 91500
},
{
"epoch": 1.39,
"learning_rate": 2.6904152231761815e-05,
"loss": 0.82,
"step": 92000
},
{
"epoch": 1.39,
"learning_rate": 2.677863131997791e-05,
"loss": 0.814,
"step": 92500
},
{
"epoch": 1.4,
"learning_rate": 2.6653110408194005e-05,
"loss": 0.8218,
"step": 93000
},
{
"epoch": 1.41,
"learning_rate": 2.6527589496410106e-05,
"loss": 0.8118,
"step": 93500
},
{
"epoch": 1.42,
"learning_rate": 2.64020685846262e-05,
"loss": 0.8189,
"step": 94000
},
{
"epoch": 1.42,
"learning_rate": 2.6276547672842296e-05,
"loss": 0.8215,
"step": 94500
},
{
"epoch": 1.43,
"learning_rate": 2.6151026761058394e-05,
"loss": 0.822,
"step": 95000
},
{
"epoch": 1.44,
"learning_rate": 2.6025505849274488e-05,
"loss": 0.8149,
"step": 95500
},
{
"epoch": 1.45,
"learning_rate": 2.5899984937490583e-05,
"loss": 0.8271,
"step": 96000
},
{
"epoch": 1.45,
"learning_rate": 2.5774464025706684e-05,
"loss": 0.823,
"step": 96500
},
{
"epoch": 1.46,
"learning_rate": 2.564894311392278e-05,
"loss": 0.8167,
"step": 97000
},
{
"epoch": 1.47,
"learning_rate": 2.552342220213888e-05,
"loss": 0.8127,
"step": 97500
},
{
"epoch": 1.48,
"learning_rate": 2.5397901290354975e-05,
"loss": 0.7968,
"step": 98000
},
{
"epoch": 1.48,
"learning_rate": 2.527238037857107e-05,
"loss": 0.8144,
"step": 98500
},
{
"epoch": 1.49,
"learning_rate": 2.514685946678717e-05,
"loss": 0.8111,
"step": 99000
},
{
"epoch": 1.5,
"learning_rate": 2.5021338555003266e-05,
"loss": 0.8125,
"step": 99500
},
{
"epoch": 1.51,
"learning_rate": 2.489581764321936e-05,
"loss": 0.8158,
"step": 100000
},
{
"epoch": 1.51,
"learning_rate": 2.477029673143546e-05,
"loss": 0.8132,
"step": 100500
},
{
"epoch": 1.52,
"learning_rate": 2.4644775819651557e-05,
"loss": 0.8119,
"step": 101000
},
{
"epoch": 1.53,
"learning_rate": 2.451925490786765e-05,
"loss": 0.8121,
"step": 101500
},
{
"epoch": 1.54,
"learning_rate": 2.439373399608375e-05,
"loss": 0.7989,
"step": 102000
},
{
"epoch": 1.54,
"learning_rate": 2.4268213084299844e-05,
"loss": 0.8099,
"step": 102500
},
{
"epoch": 1.55,
"learning_rate": 2.4142692172515942e-05,
"loss": 0.8157,
"step": 103000
},
{
"epoch": 1.56,
"learning_rate": 2.4017171260732037e-05,
"loss": 0.807,
"step": 103500
},
{
"epoch": 1.57,
"learning_rate": 2.3891650348948135e-05,
"loss": 0.8021,
"step": 104000
},
{
"epoch": 1.57,
"learning_rate": 2.3766129437164233e-05,
"loss": 0.7899,
"step": 104500
},
{
"epoch": 1.58,
"learning_rate": 2.3640608525380328e-05,
"loss": 0.8051,
"step": 105000
},
{
"epoch": 1.59,
"learning_rate": 2.3515087613596426e-05,
"loss": 0.8025,
"step": 105500
},
{
"epoch": 1.6,
"learning_rate": 2.3389566701812524e-05,
"loss": 0.7956,
"step": 106000
},
{
"epoch": 1.6,
"learning_rate": 2.326404579002862e-05,
"loss": 0.8237,
"step": 106500
},
{
"epoch": 1.61,
"learning_rate": 2.3138524878244717e-05,
"loss": 0.8056,
"step": 107000
},
{
"epoch": 1.62,
"learning_rate": 2.3013003966460815e-05,
"loss": 0.7946,
"step": 107500
},
{
"epoch": 1.63,
"learning_rate": 2.2887483054676913e-05,
"loss": 0.7888,
"step": 108000
},
{
"epoch": 1.63,
"learning_rate": 2.2761962142893007e-05,
"loss": 0.7957,
"step": 108500
},
{
"epoch": 1.64,
"learning_rate": 2.2636441231109105e-05,
"loss": 0.7968,
"step": 109000
},
{
"epoch": 1.65,
"learning_rate": 2.25109203193252e-05,
"loss": 0.7996,
"step": 109500
},
{
"epoch": 1.66,
"learning_rate": 2.2385399407541298e-05,
"loss": 0.7899,
"step": 110000
},
{
"epoch": 1.66,
"learning_rate": 2.2259878495757393e-05,
"loss": 0.8012,
"step": 110500
},
{
"epoch": 1.67,
"learning_rate": 2.213435758397349e-05,
"loss": 0.7944,
"step": 111000
},
{
"epoch": 1.68,
"learning_rate": 2.2008836672189586e-05,
"loss": 0.7916,
"step": 111500
},
{
"epoch": 1.69,
"learning_rate": 2.1883315760405684e-05,
"loss": 0.7932,
"step": 112000
},
{
"epoch": 1.69,
"learning_rate": 2.1757794848621782e-05,
"loss": 0.7904,
"step": 112500
},
{
"epoch": 1.7,
"learning_rate": 2.1632273936837876e-05,
"loss": 0.7935,
"step": 113000
},
{
"epoch": 1.71,
"learning_rate": 2.1506753025053974e-05,
"loss": 0.7881,
"step": 113500
},
{
"epoch": 1.72,
"learning_rate": 2.1381232113270073e-05,
"loss": 0.7754,
"step": 114000
},
{
"epoch": 1.72,
"learning_rate": 2.125571120148617e-05,
"loss": 0.7974,
"step": 114500
},
{
"epoch": 1.73,
"learning_rate": 2.1130190289702265e-05,
"loss": 0.7857,
"step": 115000
},
{
"epoch": 1.74,
"learning_rate": 2.1004669377918363e-05,
"loss": 0.7714,
"step": 115500
},
{
"epoch": 1.75,
"learning_rate": 2.087914846613446e-05,
"loss": 0.7925,
"step": 116000
},
{
"epoch": 1.75,
"learning_rate": 2.0753627554350556e-05,
"loss": 0.7864,
"step": 116500
},
{
"epoch": 1.76,
"learning_rate": 2.0628106642566654e-05,
"loss": 0.7866,
"step": 117000
},
{
"epoch": 1.77,
"learning_rate": 2.050258573078275e-05,
"loss": 0.789,
"step": 117500
},
{
"epoch": 1.78,
"learning_rate": 2.0377064818998843e-05,
"loss": 0.7667,
"step": 118000
},
{
"epoch": 1.78,
"learning_rate": 2.025154390721494e-05,
"loss": 0.7824,
"step": 118500
},
{
"epoch": 1.79,
"learning_rate": 2.012602299543104e-05,
"loss": 0.7811,
"step": 119000
},
{
"epoch": 1.8,
"learning_rate": 2.0000502083647138e-05,
"loss": 0.7861,
"step": 119500
},
{
"epoch": 1.81,
"learning_rate": 1.9874981171863232e-05,
"loss": 0.779,
"step": 120000
},
{
"epoch": 1.82,
"learning_rate": 1.974946026007933e-05,
"loss": 0.7766,
"step": 120500
},
{
"epoch": 1.82,
"learning_rate": 1.962393934829543e-05,
"loss": 0.772,
"step": 121000
},
{
"epoch": 1.83,
"learning_rate": 1.9498418436511523e-05,
"loss": 0.7826,
"step": 121500
},
{
"epoch": 1.84,
"learning_rate": 1.937289752472762e-05,
"loss": 0.7771,
"step": 122000
},
{
"epoch": 1.85,
"learning_rate": 1.924737661294372e-05,
"loss": 0.7636,
"step": 122500
},
{
"epoch": 1.85,
"learning_rate": 1.9121855701159814e-05,
"loss": 0.7749,
"step": 123000
},
{
"epoch": 1.86,
"learning_rate": 1.8996334789375912e-05,
"loss": 0.7696,
"step": 123500
},
{
"epoch": 1.87,
"learning_rate": 1.887081387759201e-05,
"loss": 0.7742,
"step": 124000
},
{
"epoch": 1.88,
"learning_rate": 1.8745292965808105e-05,
"loss": 0.7568,
"step": 124500
},
{
"epoch": 1.88,
"learning_rate": 1.86197720540242e-05,
"loss": 0.7782,
"step": 125000
},
{
"epoch": 1.89,
"learning_rate": 1.8494251142240297e-05,
"loss": 0.7762,
"step": 125500
},
{
"epoch": 1.9,
"learning_rate": 1.8368730230456396e-05,
"loss": 0.7607,
"step": 126000
},
{
"epoch": 1.91,
"learning_rate": 1.824320931867249e-05,
"loss": 0.764,
"step": 126500
},
{
"epoch": 1.91,
"learning_rate": 1.8117688406888588e-05,
"loss": 0.7698,
"step": 127000
},
{
"epoch": 1.92,
"learning_rate": 1.7992167495104686e-05,
"loss": 0.768,
"step": 127500
},
{
"epoch": 1.93,
"learning_rate": 1.786664658332078e-05,
"loss": 0.7729,
"step": 128000
},
{
"epoch": 1.94,
"learning_rate": 1.774112567153688e-05,
"loss": 0.7641,
"step": 128500
},
{
"epoch": 1.94,
"learning_rate": 1.7615604759752977e-05,
"loss": 0.7623,
"step": 129000
},
{
"epoch": 1.95,
"learning_rate": 1.7490083847969072e-05,
"loss": 0.7729,
"step": 129500
},
{
"epoch": 1.96,
"learning_rate": 1.736456293618517e-05,
"loss": 0.7548,
"step": 130000
},
{
"epoch": 1.97,
"learning_rate": 1.7239042024401268e-05,
"loss": 0.7738,
"step": 130500
},
{
"epoch": 1.97,
"learning_rate": 1.7113521112617363e-05,
"loss": 0.7659,
"step": 131000
},
{
"epoch": 1.98,
"learning_rate": 1.698800020083346e-05,
"loss": 0.7525,
"step": 131500
},
{
"epoch": 1.99,
"learning_rate": 1.6862479289049555e-05,
"loss": 0.7809,
"step": 132000
},
{
"epoch": 2.0,
"learning_rate": 1.6736958377265653e-05,
"loss": 0.777,
"step": 132500
},
{
"epoch": 2.0,
"learning_rate": 1.6611437465481748e-05,
"loss": 0.725,
"step": 133000
},
{
"epoch": 2.01,
"learning_rate": 1.6485916553697846e-05,
"loss": 0.657,
"step": 133500
},
{
"epoch": 2.02,
"learning_rate": 1.6360395641913944e-05,
"loss": 0.6593,
"step": 134000
},
{
"epoch": 2.03,
"learning_rate": 1.623487473013004e-05,
"loss": 0.6657,
"step": 134500
},
{
"epoch": 2.03,
"learning_rate": 1.6109353818346137e-05,
"loss": 0.6536,
"step": 135000
},
{
"epoch": 2.04,
"learning_rate": 1.5983832906562235e-05,
"loss": 0.6721,
"step": 135500
},
{
"epoch": 2.05,
"learning_rate": 1.585831199477833e-05,
"loss": 0.6559,
"step": 136000
},
{
"epoch": 2.06,
"learning_rate": 1.5732791082994428e-05,
"loss": 0.6645,
"step": 136500
},
{
"epoch": 2.06,
"learning_rate": 1.5607270171210526e-05,
"loss": 0.6506,
"step": 137000
},
{
"epoch": 2.07,
"learning_rate": 1.548174925942662e-05,
"loss": 0.673,
"step": 137500
},
{
"epoch": 2.08,
"learning_rate": 1.535622834764272e-05,
"loss": 0.6579,
"step": 138000
},
{
"epoch": 2.09,
"learning_rate": 1.5230707435858815e-05,
"loss": 0.6589,
"step": 138500
},
{
"epoch": 2.09,
"learning_rate": 1.5105186524074913e-05,
"loss": 0.6698,
"step": 139000
},
{
"epoch": 2.1,
"learning_rate": 1.4979665612291008e-05,
"loss": 0.6613,
"step": 139500
},
{
"epoch": 2.11,
"learning_rate": 1.4854144700507106e-05,
"loss": 0.6528,
"step": 140000
},
{
"epoch": 2.12,
"learning_rate": 1.4728623788723204e-05,
"loss": 0.6667,
"step": 140500
},
{
"epoch": 2.12,
"learning_rate": 1.4603102876939298e-05,
"loss": 0.6694,
"step": 141000
},
{
"epoch": 2.13,
"learning_rate": 1.4477581965155396e-05,
"loss": 0.6735,
"step": 141500
},
{
"epoch": 2.14,
"learning_rate": 1.4352061053371493e-05,
"loss": 0.6559,
"step": 142000
},
{
"epoch": 2.15,
"learning_rate": 1.4226540141587587e-05,
"loss": 0.6551,
"step": 142500
},
{
"epoch": 2.15,
"learning_rate": 1.4101019229803686e-05,
"loss": 0.6628,
"step": 143000
},
{
"epoch": 2.16,
"learning_rate": 1.3975498318019784e-05,
"loss": 0.6638,
"step": 143500
},
{
"epoch": 2.17,
"learning_rate": 1.3849977406235878e-05,
"loss": 0.6602,
"step": 144000
},
{
"epoch": 2.18,
"learning_rate": 1.3724456494451976e-05,
"loss": 0.6618,
"step": 144500
},
{
"epoch": 2.18,
"learning_rate": 1.3598935582668074e-05,
"loss": 0.6663,
"step": 145000
},
{
"epoch": 2.19,
"learning_rate": 1.347341467088417e-05,
"loss": 0.6533,
"step": 145500
},
{
"epoch": 2.2,
"learning_rate": 1.3347893759100265e-05,
"loss": 0.6451,
"step": 146000
},
{
"epoch": 2.21,
"learning_rate": 1.3222372847316364e-05,
"loss": 0.6701,
"step": 146500
},
{
"epoch": 2.21,
"learning_rate": 1.3096851935532462e-05,
"loss": 0.6705,
"step": 147000
},
{
"epoch": 2.22,
"learning_rate": 1.2971331023748556e-05,
"loss": 0.6605,
"step": 147500
},
{
"epoch": 2.23,
"learning_rate": 1.2845810111964654e-05,
"loss": 0.6517,
"step": 148000
},
{
"epoch": 2.24,
"learning_rate": 1.2720289200180752e-05,
"loss": 0.6596,
"step": 148500
},
{
"epoch": 2.24,
"learning_rate": 1.2594768288396847e-05,
"loss": 0.6531,
"step": 149000
},
{
"epoch": 2.25,
"learning_rate": 1.2469247376612943e-05,
"loss": 0.6542,
"step": 149500
},
{
"epoch": 2.26,
"learning_rate": 1.234372646482904e-05,
"loss": 0.6586,
"step": 150000
},
{
"epoch": 2.27,
"learning_rate": 1.2218205553045138e-05,
"loss": 0.647,
"step": 150500
},
{
"epoch": 2.27,
"learning_rate": 1.2092684641261234e-05,
"loss": 0.6634,
"step": 151000
},
{
"epoch": 2.28,
"learning_rate": 1.1967163729477332e-05,
"loss": 0.65,
"step": 151500
},
{
"epoch": 2.29,
"learning_rate": 1.1841642817693429e-05,
"loss": 0.6467,
"step": 152000
},
{
"epoch": 2.3,
"learning_rate": 1.1716121905909525e-05,
"loss": 0.6595,
"step": 152500
},
{
"epoch": 2.3,
"learning_rate": 1.1590600994125621e-05,
"loss": 0.6564,
"step": 153000
},
{
"epoch": 2.31,
"learning_rate": 1.1465080082341718e-05,
"loss": 0.6583,
"step": 153500
},
{
"epoch": 2.32,
"learning_rate": 1.1339559170557816e-05,
"loss": 0.6623,
"step": 154000
},
{
"epoch": 2.33,
"learning_rate": 1.1214038258773912e-05,
"loss": 0.6557,
"step": 154500
},
{
"epoch": 2.33,
"learning_rate": 1.1088517346990009e-05,
"loss": 0.6523,
"step": 155000
},
{
"epoch": 2.34,
"learning_rate": 1.0962996435206107e-05,
"loss": 0.6707,
"step": 155500
},
{
"epoch": 2.35,
"learning_rate": 1.0837475523422203e-05,
"loss": 0.658,
"step": 156000
},
{
"epoch": 2.36,
"learning_rate": 1.07119546116383e-05,
"loss": 0.6381,
"step": 156500
},
{
"epoch": 2.36,
"learning_rate": 1.0586433699854396e-05,
"loss": 0.6633,
"step": 157000
},
{
"epoch": 2.37,
"learning_rate": 1.0460912788070492e-05,
"loss": 0.6568,
"step": 157500
},
{
"epoch": 2.38,
"learning_rate": 1.033539187628659e-05,
"loss": 0.6483,
"step": 158000
},
{
"epoch": 2.39,
"learning_rate": 1.0209870964502687e-05,
"loss": 0.6478,
"step": 158500
},
{
"epoch": 2.39,
"learning_rate": 1.0084350052718783e-05,
"loss": 0.6565,
"step": 159000
},
{
"epoch": 2.4,
"learning_rate": 9.958829140934881e-06,
"loss": 0.6487,
"step": 159500
},
{
"epoch": 2.41,
"learning_rate": 9.833308229150977e-06,
"loss": 0.6547,
"step": 160000
},
{
"epoch": 2.42,
"learning_rate": 9.707787317367074e-06,
"loss": 0.6511,
"step": 160500
},
{
"epoch": 2.43,
"learning_rate": 9.58226640558317e-06,
"loss": 0.6421,
"step": 161000
},
{
"epoch": 2.43,
"learning_rate": 9.456745493799266e-06,
"loss": 0.6417,
"step": 161500
},
{
"epoch": 2.44,
"learning_rate": 9.331224582015364e-06,
"loss": 0.6452,
"step": 162000
},
{
"epoch": 2.45,
"learning_rate": 9.20570367023146e-06,
"loss": 0.6525,
"step": 162500
},
{
"epoch": 2.46,
"learning_rate": 9.080182758447557e-06,
"loss": 0.6516,
"step": 163000
},
{
"epoch": 2.46,
"learning_rate": 8.954661846663655e-06,
"loss": 0.646,
"step": 163500
},
{
"epoch": 2.47,
"learning_rate": 8.829140934879752e-06,
"loss": 0.6428,
"step": 164000
},
{
"epoch": 2.48,
"learning_rate": 8.703620023095848e-06,
"loss": 0.6431,
"step": 164500
},
{
"epoch": 2.49,
"learning_rate": 8.578099111311944e-06,
"loss": 0.6477,
"step": 165000
},
{
"epoch": 2.49,
"learning_rate": 8.45257819952804e-06,
"loss": 0.6491,
"step": 165500
},
{
"epoch": 2.5,
"learning_rate": 8.327057287744139e-06,
"loss": 0.6482,
"step": 166000
},
{
"epoch": 2.51,
"learning_rate": 8.201536375960235e-06,
"loss": 0.6458,
"step": 166500
},
{
"epoch": 2.52,
"learning_rate": 8.076015464176333e-06,
"loss": 0.6483,
"step": 167000
},
{
"epoch": 2.52,
"learning_rate": 7.95049455239243e-06,
"loss": 0.6491,
"step": 167500
},
{
"epoch": 2.53,
"learning_rate": 7.824973640608526e-06,
"loss": 0.6408,
"step": 168000
},
{
"epoch": 2.54,
"learning_rate": 7.699452728824622e-06,
"loss": 0.6491,
"step": 168500
},
{
"epoch": 2.55,
"learning_rate": 7.5739318170407196e-06,
"loss": 0.6472,
"step": 169000
},
{
"epoch": 2.55,
"learning_rate": 7.448410905256815e-06,
"loss": 0.6422,
"step": 169500
},
{
"epoch": 2.56,
"learning_rate": 7.322889993472913e-06,
"loss": 0.6471,
"step": 170000
},
{
"epoch": 2.57,
"learning_rate": 7.1973690816890095e-06,
"loss": 0.6343,
"step": 170500
},
{
"epoch": 2.58,
"learning_rate": 7.071848169905107e-06,
"loss": 0.6446,
"step": 171000
},
{
"epoch": 2.58,
"learning_rate": 6.946327258121203e-06,
"loss": 0.6444,
"step": 171500
},
{
"epoch": 2.59,
"learning_rate": 6.8208063463372995e-06,
"loss": 0.6455,
"step": 172000
},
{
"epoch": 2.6,
"learning_rate": 6.6952854345533975e-06,
"loss": 0.6448,
"step": 172500
},
{
"epoch": 2.61,
"learning_rate": 6.569764522769493e-06,
"loss": 0.6367,
"step": 173000
},
{
"epoch": 2.61,
"learning_rate": 6.444243610985591e-06,
"loss": 0.6303,
"step": 173500
},
{
"epoch": 2.62,
"learning_rate": 6.3187226992016875e-06,
"loss": 0.6392,
"step": 174000
},
{
"epoch": 2.63,
"learning_rate": 6.193201787417785e-06,
"loss": 0.6296,
"step": 174500
},
{
"epoch": 2.64,
"learning_rate": 6.06768087563388e-06,
"loss": 0.6411,
"step": 175000
},
{
"epoch": 2.64,
"learning_rate": 5.942159963849977e-06,
"loss": 0.6432,
"step": 175500
},
{
"epoch": 2.65,
"learning_rate": 5.816639052066075e-06,
"loss": 0.6483,
"step": 176000
},
{
"epoch": 2.66,
"learning_rate": 5.691118140282172e-06,
"loss": 0.6355,
"step": 176500
},
{
"epoch": 2.67,
"learning_rate": 5.565597228498268e-06,
"loss": 0.6426,
"step": 177000
},
{
"epoch": 2.67,
"learning_rate": 5.440076316714365e-06,
"loss": 0.64,
"step": 177500
},
{
"epoch": 2.68,
"learning_rate": 5.314555404930462e-06,
"loss": 0.6434,
"step": 178000
},
{
"epoch": 2.69,
"learning_rate": 5.189034493146558e-06,
"loss": 0.6373,
"step": 178500
},
{
"epoch": 2.7,
"learning_rate": 5.063513581362655e-06,
"loss": 0.6281,
"step": 179000
},
{
"epoch": 2.7,
"learning_rate": 4.937992669578752e-06,
"loss": 0.6323,
"step": 179500
},
{
"epoch": 2.71,
"learning_rate": 4.812471757794849e-06,
"loss": 0.6334,
"step": 180000
},
{
"epoch": 2.72,
"learning_rate": 4.686950846010945e-06,
"loss": 0.6316,
"step": 180500
},
{
"epoch": 2.73,
"learning_rate": 4.5614299342270425e-06,
"loss": 0.6442,
"step": 181000
},
{
"epoch": 2.73,
"learning_rate": 4.435909022443139e-06,
"loss": 0.6429,
"step": 181500
},
{
"epoch": 2.74,
"learning_rate": 4.310388110659236e-06,
"loss": 0.6224,
"step": 182000
},
{
"epoch": 2.75,
"learning_rate": 4.1848671988753325e-06,
"loss": 0.6314,
"step": 182500
},
{
"epoch": 2.76,
"learning_rate": 4.05934628709143e-06,
"loss": 0.6576,
"step": 183000
},
{
"epoch": 2.76,
"learning_rate": 3.933825375307527e-06,
"loss": 0.6348,
"step": 183500
},
{
"epoch": 2.77,
"learning_rate": 3.808304463523623e-06,
"loss": 0.635,
"step": 184000
},
{
"epoch": 2.78,
"learning_rate": 3.68278355173972e-06,
"loss": 0.6332,
"step": 184500
},
{
"epoch": 2.79,
"learning_rate": 3.557262639955817e-06,
"loss": 0.6282,
"step": 185000
},
{
"epoch": 2.79,
"learning_rate": 3.4317417281719137e-06,
"loss": 0.6333,
"step": 185500
},
{
"epoch": 2.8,
"learning_rate": 3.30622081638801e-06,
"loss": 0.6345,
"step": 186000
},
{
"epoch": 2.81,
"learning_rate": 3.1806999046041072e-06,
"loss": 0.6408,
"step": 186500
},
{
"epoch": 2.82,
"learning_rate": 3.055178992820204e-06,
"loss": 0.6284,
"step": 187000
},
{
"epoch": 2.82,
"learning_rate": 2.929658081036301e-06,
"loss": 0.6332,
"step": 187500
},
{
"epoch": 2.83,
"learning_rate": 2.8041371692523976e-06,
"loss": 0.6361,
"step": 188000
},
{
"epoch": 2.84,
"learning_rate": 2.6786162574684944e-06,
"loss": 0.6307,
"step": 188500
},
{
"epoch": 2.85,
"learning_rate": 2.553095345684591e-06,
"loss": 0.6373,
"step": 189000
},
{
"epoch": 2.85,
"learning_rate": 2.427574433900688e-06,
"loss": 0.6421,
"step": 189500
},
{
"epoch": 2.86,
"learning_rate": 2.3020535221167848e-06,
"loss": 0.635,
"step": 190000
},
{
"epoch": 2.87,
"learning_rate": 2.1765326103328816e-06,
"loss": 0.6273,
"step": 190500
},
{
"epoch": 2.88,
"learning_rate": 2.0510116985489784e-06,
"loss": 0.6374,
"step": 191000
},
{
"epoch": 2.88,
"learning_rate": 1.925490786765075e-06,
"loss": 0.6333,
"step": 191500
},
{
"epoch": 2.89,
"learning_rate": 1.799969874981172e-06,
"loss": 0.6285,
"step": 192000
},
{
"epoch": 2.9,
"learning_rate": 1.6744489631972687e-06,
"loss": 0.6266,
"step": 192500
},
{
"epoch": 2.91,
"learning_rate": 1.5489280514133655e-06,
"loss": 0.6358,
"step": 193000
},
{
"epoch": 2.91,
"learning_rate": 1.4234071396294623e-06,
"loss": 0.6263,
"step": 193500
},
{
"epoch": 2.92,
"learning_rate": 1.2978862278455591e-06,
"loss": 0.6428,
"step": 194000
},
{
"epoch": 2.93,
"learning_rate": 1.172365316061656e-06,
"loss": 0.6295,
"step": 194500
},
{
"epoch": 2.94,
"learning_rate": 1.0468444042777527e-06,
"loss": 0.632,
"step": 195000
},
{
"epoch": 2.94,
"learning_rate": 9.213234924938495e-07,
"loss": 0.6203,
"step": 195500
},
{
"epoch": 2.95,
"learning_rate": 7.958025807099463e-07,
"loss": 0.6223,
"step": 196000
},
{
"epoch": 2.96,
"learning_rate": 6.702816689260432e-07,
"loss": 0.6341,
"step": 196500
},
{
"epoch": 2.97,
"learning_rate": 5.4476075714214e-07,
"loss": 0.628,
"step": 197000
},
{
"epoch": 2.97,
"learning_rate": 4.192398453582367e-07,
"loss": 0.6267,
"step": 197500
},
{
"epoch": 2.98,
"learning_rate": 2.937189335743335e-07,
"loss": 0.6196,
"step": 198000
},
{
"epoch": 2.99,
"learning_rate": 1.681980217904303e-07,
"loss": 0.6263,
"step": 198500
},
{
"epoch": 3.0,
"learning_rate": 4.267711000652709e-08,
"loss": 0.6211,
"step": 199000
},
{
"epoch": 3.0,
"step": 199170,
"total_flos": 2.210416975580037e+17,
"train_loss": 0.564786915779832,
"train_runtime": 39960.6259,
"train_samples_per_second": 49.841,
"train_steps_per_second": 4.984
}
],
"max_steps": 199170,
"num_train_epochs": 3,
"total_flos": 2.210416975580037e+17,
"trial_name": null,
"trial_params": null
}