{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.99957905961919, "eval_steps": 500, "global_step": 1069020, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.00015, "loss": 33.6336, "step": 500 }, { "epoch": 0.03, "learning_rate": 0.0003, "loss": 5.981, "step": 1000 }, { "epoch": 0.04, "learning_rate": 0.0002998595531918878, "loss": 5.8563, "step": 1500 }, { "epoch": 0.06, "learning_rate": 0.00029971910638377554, "loss": 5.7975, "step": 2000 }, { "epoch": 0.07, "learning_rate": 0.00029957865957566334, "loss": 5.7589, "step": 2500 }, { "epoch": 0.08, "learning_rate": 0.00029943821276755115, "loss": 5.7376, "step": 3000 }, { "epoch": 0.1, "learning_rate": 0.00029929776595943896, "loss": 5.7336, "step": 3500 }, { "epoch": 0.11, "learning_rate": 0.0002991573191513267, "loss": 5.6985, "step": 4000 }, { "epoch": 0.13, "learning_rate": 0.0002990168723432145, "loss": 5.6467, "step": 4500 }, { "epoch": 0.14, "learning_rate": 0.00029887642553510233, "loss": 5.5431, "step": 5000 }, { "epoch": 0.15, "learning_rate": 0.0002987359787269901, "loss": 5.2658, "step": 5500 }, { "epoch": 0.17, "learning_rate": 0.0002985955319188779, "loss": 4.7901, "step": 6000 }, { "epoch": 0.18, "learning_rate": 0.0002984550851107657, "loss": 4.3833, "step": 6500 }, { "epoch": 0.2, "learning_rate": 0.0002983146383026535, "loss": 4.1403, "step": 7000 }, { "epoch": 0.21, "learning_rate": 0.00029817419149454126, "loss": 3.8533, "step": 7500 }, { "epoch": 0.22, "learning_rate": 0.00029803374468642907, "loss": 3.7172, "step": 8000 }, { "epoch": 0.24, "learning_rate": 0.0002978932978783169, "loss": 3.6096, "step": 8500 }, { "epoch": 0.25, "learning_rate": 0.00029775285107020463, "loss": 3.4788, "step": 9000 }, { "epoch": 0.27, "learning_rate": 0.00029761240426209244, "loss": 3.3928, "step": 9500 }, { "epoch": 0.28, "learning_rate": 0.00029747195745398025, "loss": 3.2847, "step": 10000 }, { "epoch": 0.29, "learning_rate": 0.00029733151064586806, "loss": 3.2708, "step": 10500 }, { "epoch": 0.31, "learning_rate": 0.0002971910638377558, "loss": 3.2045, "step": 11000 }, { "epoch": 0.32, "learning_rate": 0.0002970506170296436, "loss": 3.1776, "step": 11500 }, { "epoch": 0.34, "learning_rate": 0.0002969101702215314, "loss": 3.1367, "step": 12000 }, { "epoch": 0.35, "learning_rate": 0.0002967697234134192, "loss": 3.0834, "step": 12500 }, { "epoch": 0.36, "learning_rate": 0.000296629276605307, "loss": 3.0597, "step": 13000 }, { "epoch": 0.38, "learning_rate": 0.0002964888297971948, "loss": 3.047, "step": 13500 }, { "epoch": 0.39, "learning_rate": 0.0002963483829890826, "loss": 3.0075, "step": 14000 }, { "epoch": 0.41, "learning_rate": 0.0002962079361809704, "loss": 3.0121, "step": 14500 }, { "epoch": 0.42, "learning_rate": 0.00029606748937285816, "loss": 2.9503, "step": 15000 }, { "epoch": 0.43, "learning_rate": 0.00029592704256474597, "loss": 2.9327, "step": 15500 }, { "epoch": 0.45, "learning_rate": 0.0002957865957566337, "loss": 2.9738, "step": 16000 }, { "epoch": 0.46, "learning_rate": 0.00029564614894852153, "loss": 2.9376, "step": 16500 }, { "epoch": 0.48, "learning_rate": 0.00029550570214040934, "loss": 2.8891, "step": 17000 }, { "epoch": 0.49, "learning_rate": 0.00029536525533229715, "loss": 2.9003, "step": 17500 }, { "epoch": 0.51, "learning_rate": 0.0002952248085241849, "loss": 2.8601, "step": 18000 }, { "epoch": 0.52, "learning_rate": 0.0002950843617160727, "loss": 2.8786, "step": 18500 }, { "epoch": 0.53, "learning_rate": 0.0002949439149079605, "loss": 2.8824, "step": 19000 }, { "epoch": 0.55, "learning_rate": 0.0002948034680998483, "loss": 2.8358, "step": 19500 }, { "epoch": 0.56, "learning_rate": 0.0002946630212917361, "loss": 2.8757, "step": 20000 }, { "epoch": 0.58, "learning_rate": 0.0002945225744836239, "loss": 2.8083, "step": 20500 }, { "epoch": 0.59, "learning_rate": 0.00029438212767551164, "loss": 2.7675, "step": 21000 }, { "epoch": 0.6, "learning_rate": 0.00029424168086739945, "loss": 2.8155, "step": 21500 }, { "epoch": 0.62, "learning_rate": 0.00029410123405928726, "loss": 2.7924, "step": 22000 }, { "epoch": 0.63, "learning_rate": 0.00029396078725117507, "loss": 2.7935, "step": 22500 }, { "epoch": 0.65, "learning_rate": 0.0002938203404430628, "loss": 2.7332, "step": 23000 }, { "epoch": 0.66, "learning_rate": 0.00029367989363495063, "loss": 2.7284, "step": 23500 }, { "epoch": 0.67, "learning_rate": 0.0002935394468268384, "loss": 2.757, "step": 24000 }, { "epoch": 0.69, "learning_rate": 0.0002933990000187262, "loss": 2.7514, "step": 24500 }, { "epoch": 0.7, "learning_rate": 0.000293258553210614, "loss": 2.7135, "step": 25000 }, { "epoch": 0.72, "learning_rate": 0.0002931181064025018, "loss": 2.7346, "step": 25500 }, { "epoch": 0.73, "learning_rate": 0.0002929776595943896, "loss": 2.7471, "step": 26000 }, { "epoch": 0.74, "learning_rate": 0.00029283721278627737, "loss": 2.7027, "step": 26500 }, { "epoch": 0.76, "learning_rate": 0.0002926967659781652, "loss": 2.7163, "step": 27000 }, { "epoch": 0.77, "learning_rate": 0.00029255631917005293, "loss": 2.7118, "step": 27500 }, { "epoch": 0.79, "learning_rate": 0.00029241587236194074, "loss": 2.668, "step": 28000 }, { "epoch": 0.8, "learning_rate": 0.00029227542555382855, "loss": 2.6531, "step": 28500 }, { "epoch": 0.81, "learning_rate": 0.00029213497874571636, "loss": 2.6297, "step": 29000 }, { "epoch": 0.83, "learning_rate": 0.00029199453193760416, "loss": 2.6589, "step": 29500 }, { "epoch": 0.84, "learning_rate": 0.0002918540851294919, "loss": 2.6096, "step": 30000 }, { "epoch": 0.86, "learning_rate": 0.0002917136383213797, "loss": 2.643, "step": 30500 }, { "epoch": 0.87, "learning_rate": 0.0002915731915132675, "loss": 2.621, "step": 31000 }, { "epoch": 0.88, "learning_rate": 0.0002914327447051553, "loss": 2.6194, "step": 31500 }, { "epoch": 0.9, "learning_rate": 0.0002912922978970431, "loss": 2.649, "step": 32000 }, { "epoch": 0.91, "learning_rate": 0.0002911518510889309, "loss": 2.6221, "step": 32500 }, { "epoch": 0.93, "learning_rate": 0.0002910114042808187, "loss": 2.5794, "step": 33000 }, { "epoch": 0.94, "learning_rate": 0.00029087095747270646, "loss": 2.6582, "step": 33500 }, { "epoch": 0.95, "learning_rate": 0.0002907305106645943, "loss": 2.6264, "step": 34000 }, { "epoch": 0.97, "learning_rate": 0.000290590063856482, "loss": 2.6119, "step": 34500 }, { "epoch": 0.98, "learning_rate": 0.00029044961704836983, "loss": 2.5911, "step": 35000 }, { "epoch": 1.0, "learning_rate": 0.00029030917024025764, "loss": 2.5688, "step": 35500 }, { "epoch": 1.01, "learning_rate": 0.00029016872343214545, "loss": 2.5316, "step": 36000 }, { "epoch": 1.02, "learning_rate": 0.00029002827662403326, "loss": 2.5186, "step": 36500 }, { "epoch": 1.04, "learning_rate": 0.000289887829815921, "loss": 2.4959, "step": 37000 }, { "epoch": 1.05, "learning_rate": 0.0002897473830078088, "loss": 2.5466, "step": 37500 }, { "epoch": 1.07, "learning_rate": 0.0002896069361996966, "loss": 2.5292, "step": 38000 }, { "epoch": 1.08, "learning_rate": 0.0002894664893915844, "loss": 2.5283, "step": 38500 }, { "epoch": 1.09, "learning_rate": 0.0002893260425834722, "loss": 2.5334, "step": 39000 }, { "epoch": 1.11, "learning_rate": 0.00028918559577536, "loss": 2.5344, "step": 39500 }, { "epoch": 1.12, "learning_rate": 0.0002890451489672478, "loss": 2.5227, "step": 40000 }, { "epoch": 1.14, "learning_rate": 0.00028890470215913556, "loss": 2.5043, "step": 40500 }, { "epoch": 1.15, "learning_rate": 0.00028876425535102337, "loss": 2.4737, "step": 41000 }, { "epoch": 1.16, "learning_rate": 0.0002886238085429112, "loss": 2.5168, "step": 41500 }, { "epoch": 1.18, "learning_rate": 0.00028848336173479893, "loss": 2.5557, "step": 42000 }, { "epoch": 1.19, "learning_rate": 0.00028834291492668674, "loss": 2.4842, "step": 42500 }, { "epoch": 1.21, "learning_rate": 0.00028820246811857455, "loss": 2.5212, "step": 43000 }, { "epoch": 1.22, "learning_rate": 0.00028806202131046235, "loss": 2.5265, "step": 43500 }, { "epoch": 1.23, "learning_rate": 0.0002879215745023501, "loss": 2.5013, "step": 44000 }, { "epoch": 1.25, "learning_rate": 0.0002877811276942379, "loss": 2.5131, "step": 44500 }, { "epoch": 1.26, "learning_rate": 0.0002876406808861257, "loss": 2.4899, "step": 45000 }, { "epoch": 1.28, "learning_rate": 0.0002875002340780135, "loss": 2.5271, "step": 45500 }, { "epoch": 1.29, "learning_rate": 0.0002873597872699013, "loss": 2.4909, "step": 46000 }, { "epoch": 1.3, "learning_rate": 0.0002872193404617891, "loss": 2.4901, "step": 46500 }, { "epoch": 1.32, "learning_rate": 0.0002870788936536769, "loss": 2.4997, "step": 47000 }, { "epoch": 1.33, "learning_rate": 0.00028693844684556466, "loss": 2.4705, "step": 47500 }, { "epoch": 1.35, "learning_rate": 0.00028679800003745246, "loss": 2.453, "step": 48000 }, { "epoch": 1.36, "learning_rate": 0.00028665755322934027, "loss": 2.4742, "step": 48500 }, { "epoch": 1.38, "learning_rate": 0.000286517106421228, "loss": 2.4825, "step": 49000 }, { "epoch": 1.39, "learning_rate": 0.00028637665961311583, "loss": 2.4236, "step": 49500 }, { "epoch": 1.4, "learning_rate": 0.00028623621280500364, "loss": 2.4295, "step": 50000 }, { "epoch": 1.42, "learning_rate": 0.00028609576599689145, "loss": 2.4636, "step": 50500 }, { "epoch": 1.43, "learning_rate": 0.0002859553191887792, "loss": 2.4772, "step": 51000 }, { "epoch": 1.45, "learning_rate": 0.000285814872380667, "loss": 2.4243, "step": 51500 }, { "epoch": 1.46, "learning_rate": 0.0002856744255725548, "loss": 2.4275, "step": 52000 }, { "epoch": 1.47, "learning_rate": 0.0002855339787644426, "loss": 2.4304, "step": 52500 }, { "epoch": 1.49, "learning_rate": 0.0002853935319563304, "loss": 2.4375, "step": 53000 }, { "epoch": 1.5, "learning_rate": 0.0002852530851482182, "loss": 2.4304, "step": 53500 }, { "epoch": 1.52, "learning_rate": 0.00028511263834010594, "loss": 2.4101, "step": 54000 }, { "epoch": 1.53, "learning_rate": 0.00028497219153199375, "loss": 2.4256, "step": 54500 }, { "epoch": 1.54, "learning_rate": 0.00028483174472388156, "loss": 2.4022, "step": 55000 }, { "epoch": 1.56, "learning_rate": 0.00028469129791576937, "loss": 2.405, "step": 55500 }, { "epoch": 1.57, "learning_rate": 0.0002845508511076571, "loss": 2.4053, "step": 56000 }, { "epoch": 1.59, "learning_rate": 0.00028441040429954493, "loss": 2.4268, "step": 56500 }, { "epoch": 1.6, "learning_rate": 0.0002842699574914327, "loss": 2.3901, "step": 57000 }, { "epoch": 1.61, "learning_rate": 0.0002841295106833205, "loss": 2.4176, "step": 57500 }, { "epoch": 1.63, "learning_rate": 0.0002839890638752083, "loss": 2.42, "step": 58000 }, { "epoch": 1.64, "learning_rate": 0.0002838486170670961, "loss": 2.3913, "step": 58500 }, { "epoch": 1.66, "learning_rate": 0.0002837081702589839, "loss": 2.382, "step": 59000 }, { "epoch": 1.67, "learning_rate": 0.00028356772345087167, "loss": 2.3878, "step": 59500 }, { "epoch": 1.68, "learning_rate": 0.0002834272766427595, "loss": 2.4147, "step": 60000 }, { "epoch": 1.7, "learning_rate": 0.00028328682983464723, "loss": 2.38, "step": 60500 }, { "epoch": 1.71, "learning_rate": 0.00028314638302653504, "loss": 2.3794, "step": 61000 }, { "epoch": 1.73, "learning_rate": 0.00028300593621842285, "loss": 2.3822, "step": 61500 }, { "epoch": 1.74, "learning_rate": 0.00028286548941031065, "loss": 2.3941, "step": 62000 }, { "epoch": 1.75, "learning_rate": 0.00028272504260219846, "loss": 2.3859, "step": 62500 }, { "epoch": 1.77, "learning_rate": 0.0002825845957940862, "loss": 2.387, "step": 63000 }, { "epoch": 1.78, "learning_rate": 0.000282444148985974, "loss": 2.3683, "step": 63500 }, { "epoch": 1.8, "learning_rate": 0.0002823037021778618, "loss": 2.3984, "step": 64000 }, { "epoch": 1.81, "learning_rate": 0.0002821632553697496, "loss": 2.3278, "step": 64500 }, { "epoch": 1.82, "learning_rate": 0.0002820228085616374, "loss": 2.3648, "step": 65000 }, { "epoch": 1.84, "learning_rate": 0.0002818823617535252, "loss": 2.3494, "step": 65500 }, { "epoch": 1.85, "learning_rate": 0.000281741914945413, "loss": 2.3724, "step": 66000 }, { "epoch": 1.87, "learning_rate": 0.00028160146813730076, "loss": 2.3507, "step": 66500 }, { "epoch": 1.88, "learning_rate": 0.00028146102132918857, "loss": 2.3449, "step": 67000 }, { "epoch": 1.89, "learning_rate": 0.0002813205745210763, "loss": 2.371, "step": 67500 }, { "epoch": 1.91, "learning_rate": 0.00028118012771296413, "loss": 2.3572, "step": 68000 }, { "epoch": 1.92, "learning_rate": 0.00028103968090485194, "loss": 2.3779, "step": 68500 }, { "epoch": 1.94, "learning_rate": 0.00028089923409673975, "loss": 2.3376, "step": 69000 }, { "epoch": 1.95, "learning_rate": 0.00028075878728862756, "loss": 2.3384, "step": 69500 }, { "epoch": 1.96, "learning_rate": 0.0002806183404805153, "loss": 2.3622, "step": 70000 }, { "epoch": 1.98, "learning_rate": 0.0002804778936724031, "loss": 2.3338, "step": 70500 }, { "epoch": 1.99, "learning_rate": 0.0002803374468642909, "loss": 2.3579, "step": 71000 }, { "epoch": 2.01, "learning_rate": 0.0002801970000561787, "loss": 2.3414, "step": 71500 }, { "epoch": 2.02, "learning_rate": 0.0002800565532480665, "loss": 2.2625, "step": 72000 }, { "epoch": 2.03, "learning_rate": 0.0002799161064399543, "loss": 2.2404, "step": 72500 }, { "epoch": 2.05, "learning_rate": 0.0002797756596318421, "loss": 2.2747, "step": 73000 }, { "epoch": 2.06, "learning_rate": 0.00027963521282372986, "loss": 2.2791, "step": 73500 }, { "epoch": 2.08, "learning_rate": 0.00027949476601561767, "loss": 2.3042, "step": 74000 }, { "epoch": 2.09, "learning_rate": 0.0002793543192075054, "loss": 2.2652, "step": 74500 }, { "epoch": 2.1, "learning_rate": 0.00027921387239939323, "loss": 2.236, "step": 75000 }, { "epoch": 2.12, "learning_rate": 0.00027907342559128104, "loss": 2.2508, "step": 75500 }, { "epoch": 2.13, "learning_rate": 0.00027893297878316885, "loss": 2.2145, "step": 76000 }, { "epoch": 2.15, "learning_rate": 0.00027879253197505665, "loss": 2.2401, "step": 76500 }, { "epoch": 2.16, "learning_rate": 0.0002786520851669444, "loss": 2.2362, "step": 77000 }, { "epoch": 2.17, "learning_rate": 0.0002785116383588322, "loss": 2.237, "step": 77500 }, { "epoch": 2.19, "learning_rate": 0.00027837119155071997, "loss": 2.2818, "step": 78000 }, { "epoch": 2.2, "learning_rate": 0.0002782307447426078, "loss": 2.2515, "step": 78500 }, { "epoch": 2.22, "learning_rate": 0.0002780902979344956, "loss": 2.2559, "step": 79000 }, { "epoch": 2.23, "learning_rate": 0.0002779498511263834, "loss": 2.2788, "step": 79500 }, { "epoch": 2.25, "learning_rate": 0.0002778094043182712, "loss": 2.2364, "step": 80000 }, { "epoch": 2.26, "learning_rate": 0.00027766895751015896, "loss": 2.2632, "step": 80500 }, { "epoch": 2.27, "learning_rate": 0.00027752851070204676, "loss": 2.2095, "step": 81000 }, { "epoch": 2.29, "learning_rate": 0.0002773880638939345, "loss": 2.256, "step": 81500 }, { "epoch": 2.3, "learning_rate": 0.0002772476170858223, "loss": 2.2417, "step": 82000 }, { "epoch": 2.32, "learning_rate": 0.00027710717027771013, "loss": 2.2227, "step": 82500 }, { "epoch": 2.33, "learning_rate": 0.00027696672346959794, "loss": 2.2372, "step": 83000 }, { "epoch": 2.34, "learning_rate": 0.00027682627666148575, "loss": 2.2767, "step": 83500 }, { "epoch": 2.36, "learning_rate": 0.0002766858298533735, "loss": 2.2501, "step": 84000 }, { "epoch": 2.37, "learning_rate": 0.0002765453830452613, "loss": 2.2325, "step": 84500 }, { "epoch": 2.39, "learning_rate": 0.00027640493623714906, "loss": 2.2462, "step": 85000 }, { "epoch": 2.4, "learning_rate": 0.00027626448942903687, "loss": 2.2773, "step": 85500 }, { "epoch": 2.41, "learning_rate": 0.0002761240426209247, "loss": 2.2363, "step": 86000 }, { "epoch": 2.43, "learning_rate": 0.0002759835958128125, "loss": 2.2692, "step": 86500 }, { "epoch": 2.44, "learning_rate": 0.0002758431490047003, "loss": 2.2882, "step": 87000 }, { "epoch": 2.46, "learning_rate": 0.00027570270219658805, "loss": 2.2471, "step": 87500 }, { "epoch": 2.47, "learning_rate": 0.00027556225538847586, "loss": 2.2483, "step": 88000 }, { "epoch": 2.48, "learning_rate": 0.0002754218085803636, "loss": 2.2677, "step": 88500 }, { "epoch": 2.5, "learning_rate": 0.0002752813617722514, "loss": 2.2497, "step": 89000 }, { "epoch": 2.51, "learning_rate": 0.00027514091496413923, "loss": 2.2268, "step": 89500 }, { "epoch": 2.53, "learning_rate": 0.00027500046815602704, "loss": 2.2396, "step": 90000 }, { "epoch": 2.54, "learning_rate": 0.0002748600213479148, "loss": 2.2197, "step": 90500 }, { "epoch": 2.55, "learning_rate": 0.0002747195745398026, "loss": 2.216, "step": 91000 }, { "epoch": 2.57, "learning_rate": 0.0002745791277316904, "loss": 2.2479, "step": 91500 }, { "epoch": 2.58, "learning_rate": 0.0002744386809235782, "loss": 2.238, "step": 92000 }, { "epoch": 2.6, "learning_rate": 0.00027429823411546597, "loss": 2.2426, "step": 92500 }, { "epoch": 2.61, "learning_rate": 0.0002741577873073538, "loss": 2.2297, "step": 93000 }, { "epoch": 2.62, "learning_rate": 0.00027401734049924153, "loss": 2.183, "step": 93500 }, { "epoch": 2.64, "learning_rate": 0.00027387689369112934, "loss": 2.2415, "step": 94000 }, { "epoch": 2.65, "learning_rate": 0.00027373644688301715, "loss": 2.2219, "step": 94500 }, { "epoch": 2.67, "learning_rate": 0.00027359600007490495, "loss": 2.2549, "step": 95000 }, { "epoch": 2.68, "learning_rate": 0.00027345555326679276, "loss": 2.2516, "step": 95500 }, { "epoch": 2.69, "learning_rate": 0.0002733151064586805, "loss": 2.2183, "step": 96000 }, { "epoch": 2.71, "learning_rate": 0.0002731746596505683, "loss": 2.1943, "step": 96500 }, { "epoch": 2.72, "learning_rate": 0.0002730342128424561, "loss": 2.2435, "step": 97000 }, { "epoch": 2.74, "learning_rate": 0.0002728937660343439, "loss": 2.2291, "step": 97500 }, { "epoch": 2.75, "learning_rate": 0.0002727533192262317, "loss": 2.2371, "step": 98000 }, { "epoch": 2.76, "learning_rate": 0.0002726128724181195, "loss": 2.235, "step": 98500 }, { "epoch": 2.78, "learning_rate": 0.0002724724256100073, "loss": 2.2207, "step": 99000 }, { "epoch": 2.79, "learning_rate": 0.00027233197880189506, "loss": 2.26, "step": 99500 }, { "epoch": 2.81, "learning_rate": 0.00027219153199378287, "loss": 2.2456, "step": 100000 }, { "epoch": 2.82, "learning_rate": 0.0002720510851856706, "loss": 2.251, "step": 100500 }, { "epoch": 2.83, "learning_rate": 0.00027191063837755843, "loss": 2.2112, "step": 101000 }, { "epoch": 2.85, "learning_rate": 0.00027177019156944624, "loss": 2.2283, "step": 101500 }, { "epoch": 2.86, "learning_rate": 0.00027162974476133405, "loss": 2.2472, "step": 102000 }, { "epoch": 2.88, "learning_rate": 0.00027148929795322186, "loss": 2.2258, "step": 102500 }, { "epoch": 2.89, "learning_rate": 0.0002713488511451096, "loss": 2.225, "step": 103000 }, { "epoch": 2.9, "learning_rate": 0.0002712084043369974, "loss": 2.2183, "step": 103500 }, { "epoch": 2.92, "learning_rate": 0.0002710679575288852, "loss": 2.2316, "step": 104000 }, { "epoch": 2.93, "learning_rate": 0.000270927510720773, "loss": 2.2095, "step": 104500 }, { "epoch": 2.95, "learning_rate": 0.0002707870639126608, "loss": 2.2091, "step": 105000 }, { "epoch": 2.96, "learning_rate": 0.0002706466171045486, "loss": 2.2138, "step": 105500 }, { "epoch": 2.97, "learning_rate": 0.0002705061702964364, "loss": 2.2082, "step": 106000 }, { "epoch": 2.99, "learning_rate": 0.00027036572348832416, "loss": 2.2378, "step": 106500 }, { "epoch": 3.0, "learning_rate": 0.00027022527668021197, "loss": 2.211, "step": 107000 }, { "epoch": 3.02, "learning_rate": 0.0002700848298720997, "loss": 2.1808, "step": 107500 }, { "epoch": 3.03, "learning_rate": 0.00026994438306398753, "loss": 2.1297, "step": 108000 }, { "epoch": 3.04, "learning_rate": 0.00026980393625587534, "loss": 2.1099, "step": 108500 }, { "epoch": 3.06, "learning_rate": 0.00026966348944776314, "loss": 2.1498, "step": 109000 }, { "epoch": 3.07, "learning_rate": 0.00026952304263965095, "loss": 2.1412, "step": 109500 }, { "epoch": 3.09, "learning_rate": 0.0002693825958315387, "loss": 2.1386, "step": 110000 }, { "epoch": 3.1, "learning_rate": 0.0002692421490234265, "loss": 2.1541, "step": 110500 }, { "epoch": 3.11, "learning_rate": 0.00026910170221531427, "loss": 2.1623, "step": 111000 }, { "epoch": 3.13, "learning_rate": 0.0002689612554072021, "loss": 2.1315, "step": 111500 }, { "epoch": 3.14, "learning_rate": 0.0002688208085990899, "loss": 2.1556, "step": 112000 }, { "epoch": 3.16, "learning_rate": 0.0002686803617909777, "loss": 2.1764, "step": 112500 }, { "epoch": 3.17, "learning_rate": 0.0002685399149828655, "loss": 2.1254, "step": 113000 }, { "epoch": 3.19, "learning_rate": 0.00026839946817475325, "loss": 2.1201, "step": 113500 }, { "epoch": 3.2, "learning_rate": 0.00026825902136664106, "loss": 2.1488, "step": 114000 }, { "epoch": 3.21, "learning_rate": 0.0002681185745585288, "loss": 2.1311, "step": 114500 }, { "epoch": 3.23, "learning_rate": 0.0002679781277504166, "loss": 2.1196, "step": 115000 }, { "epoch": 3.24, "learning_rate": 0.00026783768094230443, "loss": 2.1513, "step": 115500 }, { "epoch": 3.26, "learning_rate": 0.00026769723413419224, "loss": 2.1543, "step": 116000 }, { "epoch": 3.27, "learning_rate": 0.00026755678732608005, "loss": 2.1223, "step": 116500 }, { "epoch": 3.28, "learning_rate": 0.0002674163405179678, "loss": 2.1536, "step": 117000 }, { "epoch": 3.3, "learning_rate": 0.0002672758937098556, "loss": 2.1219, "step": 117500 }, { "epoch": 3.31, "learning_rate": 0.00026713544690174336, "loss": 2.1435, "step": 118000 }, { "epoch": 3.33, "learning_rate": 0.00026699500009363117, "loss": 2.1299, "step": 118500 }, { "epoch": 3.34, "learning_rate": 0.000266854553285519, "loss": 2.1715, "step": 119000 }, { "epoch": 3.35, "learning_rate": 0.0002667141064774068, "loss": 2.1372, "step": 119500 }, { "epoch": 3.37, "learning_rate": 0.0002665736596692946, "loss": 2.1533, "step": 120000 }, { "epoch": 3.38, "learning_rate": 0.00026643321286118235, "loss": 2.1325, "step": 120500 }, { "epoch": 3.4, "learning_rate": 0.00026629276605307016, "loss": 2.1112, "step": 121000 }, { "epoch": 3.41, "learning_rate": 0.0002661523192449579, "loss": 2.1307, "step": 121500 }, { "epoch": 3.42, "learning_rate": 0.0002660118724368457, "loss": 2.1162, "step": 122000 }, { "epoch": 3.44, "learning_rate": 0.00026587142562873353, "loss": 2.1148, "step": 122500 }, { "epoch": 3.45, "learning_rate": 0.00026573097882062134, "loss": 2.1122, "step": 123000 }, { "epoch": 3.47, "learning_rate": 0.0002655905320125091, "loss": 2.1272, "step": 123500 }, { "epoch": 3.48, "learning_rate": 0.0002654500852043969, "loss": 2.1124, "step": 124000 }, { "epoch": 3.49, "learning_rate": 0.0002653096383962847, "loss": 2.1317, "step": 124500 }, { "epoch": 3.51, "learning_rate": 0.00026516919158817246, "loss": 2.0704, "step": 125000 }, { "epoch": 3.52, "learning_rate": 0.00026502874478006027, "loss": 2.1439, "step": 125500 }, { "epoch": 3.54, "learning_rate": 0.0002648882979719481, "loss": 2.1099, "step": 126000 }, { "epoch": 3.55, "learning_rate": 0.00026474785116383583, "loss": 2.1256, "step": 126500 }, { "epoch": 3.56, "learning_rate": 0.00026460740435572364, "loss": 2.1312, "step": 127000 }, { "epoch": 3.58, "learning_rate": 0.00026446695754761145, "loss": 2.1406, "step": 127500 }, { "epoch": 3.59, "learning_rate": 0.00026432651073949925, "loss": 2.0957, "step": 128000 }, { "epoch": 3.61, "learning_rate": 0.000264186063931387, "loss": 2.1054, "step": 128500 }, { "epoch": 3.62, "learning_rate": 0.0002640456171232748, "loss": 2.1584, "step": 129000 }, { "epoch": 3.63, "learning_rate": 0.0002639051703151626, "loss": 2.119, "step": 129500 }, { "epoch": 3.65, "learning_rate": 0.0002637647235070504, "loss": 2.1035, "step": 130000 }, { "epoch": 3.66, "learning_rate": 0.0002636242766989382, "loss": 2.1215, "step": 130500 }, { "epoch": 3.68, "learning_rate": 0.000263483829890826, "loss": 2.1211, "step": 131000 }, { "epoch": 3.69, "learning_rate": 0.0002633433830827138, "loss": 2.1282, "step": 131500 }, { "epoch": 3.7, "learning_rate": 0.00026320293627460155, "loss": 2.1391, "step": 132000 }, { "epoch": 3.72, "learning_rate": 0.00026306248946648936, "loss": 2.1249, "step": 132500 }, { "epoch": 3.73, "learning_rate": 0.00026292204265837717, "loss": 2.1576, "step": 133000 }, { "epoch": 3.75, "learning_rate": 0.0002627815958502649, "loss": 2.1529, "step": 133500 }, { "epoch": 3.76, "learning_rate": 0.00026264114904215273, "loss": 2.119, "step": 134000 }, { "epoch": 3.77, "learning_rate": 0.00026250070223404054, "loss": 2.1246, "step": 134500 }, { "epoch": 3.79, "learning_rate": 0.00026236025542592835, "loss": 2.1074, "step": 135000 }, { "epoch": 3.8, "learning_rate": 0.0002622198086178161, "loss": 2.1399, "step": 135500 }, { "epoch": 3.82, "learning_rate": 0.0002620793618097039, "loss": 2.1434, "step": 136000 }, { "epoch": 3.83, "learning_rate": 0.0002619389150015917, "loss": 2.1012, "step": 136500 }, { "epoch": 3.84, "learning_rate": 0.00026179846819347947, "loss": 2.103, "step": 137000 }, { "epoch": 3.86, "learning_rate": 0.0002616580213853673, "loss": 2.1803, "step": 137500 }, { "epoch": 3.87, "learning_rate": 0.0002615175745772551, "loss": 2.0987, "step": 138000 }, { "epoch": 3.89, "learning_rate": 0.0002613771277691429, "loss": 2.1171, "step": 138500 }, { "epoch": 3.9, "learning_rate": 0.00026123668096103065, "loss": 2.1175, "step": 139000 }, { "epoch": 3.91, "learning_rate": 0.00026109623415291846, "loss": 2.1562, "step": 139500 }, { "epoch": 3.93, "learning_rate": 0.00026095578734480627, "loss": 2.1321, "step": 140000 }, { "epoch": 3.94, "learning_rate": 0.000260815340536694, "loss": 2.1195, "step": 140500 }, { "epoch": 3.96, "learning_rate": 0.00026067489372858183, "loss": 2.1, "step": 141000 }, { "epoch": 3.97, "learning_rate": 0.00026053444692046964, "loss": 2.1052, "step": 141500 }, { "epoch": 3.98, "learning_rate": 0.00026039400011235744, "loss": 2.0947, "step": 142000 }, { "epoch": 4.0, "learning_rate": 0.00026025355330424525, "loss": 2.1094, "step": 142500 }, { "epoch": 4.01, "learning_rate": 0.000260113106496133, "loss": 2.0432, "step": 143000 }, { "epoch": 4.03, "learning_rate": 0.0002599726596880208, "loss": 2.0555, "step": 143500 }, { "epoch": 4.04, "learning_rate": 0.00025983221287990857, "loss": 2.007, "step": 144000 }, { "epoch": 4.06, "learning_rate": 0.0002596917660717964, "loss": 2.0206, "step": 144500 }, { "epoch": 4.07, "learning_rate": 0.0002595513192636842, "loss": 2.0597, "step": 145000 }, { "epoch": 4.08, "learning_rate": 0.000259410872455572, "loss": 2.0644, "step": 145500 }, { "epoch": 4.1, "learning_rate": 0.0002592704256474598, "loss": 2.0271, "step": 146000 }, { "epoch": 4.11, "learning_rate": 0.00025912997883934755, "loss": 2.0555, "step": 146500 }, { "epoch": 4.13, "learning_rate": 0.00025898953203123536, "loss": 2.0494, "step": 147000 }, { "epoch": 4.14, "learning_rate": 0.0002588490852231231, "loss": 2.0369, "step": 147500 }, { "epoch": 4.15, "learning_rate": 0.0002587086384150109, "loss": 2.0388, "step": 148000 }, { "epoch": 4.17, "learning_rate": 0.00025856819160689873, "loss": 2.0225, "step": 148500 }, { "epoch": 4.18, "learning_rate": 0.00025842774479878654, "loss": 2.0506, "step": 149000 }, { "epoch": 4.2, "learning_rate": 0.00025828729799067435, "loss": 2.028, "step": 149500 }, { "epoch": 4.21, "learning_rate": 0.0002581468511825621, "loss": 2.0405, "step": 150000 }, { "epoch": 4.22, "learning_rate": 0.0002580064043744499, "loss": 2.0818, "step": 150500 }, { "epoch": 4.24, "learning_rate": 0.00025786595756633766, "loss": 2.0557, "step": 151000 }, { "epoch": 4.25, "learning_rate": 0.00025772551075822547, "loss": 2.0426, "step": 151500 }, { "epoch": 4.27, "learning_rate": 0.0002575850639501133, "loss": 2.0414, "step": 152000 }, { "epoch": 4.28, "learning_rate": 0.0002574446171420011, "loss": 2.0591, "step": 152500 }, { "epoch": 4.29, "learning_rate": 0.0002573041703338889, "loss": 2.0769, "step": 153000 }, { "epoch": 4.31, "learning_rate": 0.00025716372352577665, "loss": 2.087, "step": 153500 }, { "epoch": 4.32, "learning_rate": 0.00025702327671766446, "loss": 2.0396, "step": 154000 }, { "epoch": 4.34, "learning_rate": 0.0002568828299095522, "loss": 2.0451, "step": 154500 }, { "epoch": 4.35, "learning_rate": 0.00025674238310144, "loss": 2.0115, "step": 155000 }, { "epoch": 4.36, "learning_rate": 0.0002566019362933278, "loss": 2.0424, "step": 155500 }, { "epoch": 4.38, "learning_rate": 0.00025646148948521563, "loss": 2.0356, "step": 156000 }, { "epoch": 4.39, "learning_rate": 0.00025632104267710344, "loss": 2.0349, "step": 156500 }, { "epoch": 4.41, "learning_rate": 0.0002561805958689912, "loss": 2.0458, "step": 157000 }, { "epoch": 4.42, "learning_rate": 0.000256040149060879, "loss": 2.0379, "step": 157500 }, { "epoch": 4.43, "learning_rate": 0.00025589970225276676, "loss": 2.0478, "step": 158000 }, { "epoch": 4.45, "learning_rate": 0.00025575925544465457, "loss": 2.042, "step": 158500 }, { "epoch": 4.46, "learning_rate": 0.0002556188086365424, "loss": 2.0609, "step": 159000 }, { "epoch": 4.48, "learning_rate": 0.0002554783618284302, "loss": 2.0513, "step": 159500 }, { "epoch": 4.49, "learning_rate": 0.00025533791502031794, "loss": 2.0489, "step": 160000 }, { "epoch": 4.5, "learning_rate": 0.00025519746821220574, "loss": 2.049, "step": 160500 }, { "epoch": 4.52, "learning_rate": 0.00025505702140409355, "loss": 2.0444, "step": 161000 }, { "epoch": 4.53, "learning_rate": 0.0002549165745959813, "loss": 2.0658, "step": 161500 }, { "epoch": 4.55, "learning_rate": 0.0002547761277878691, "loss": 2.0258, "step": 162000 }, { "epoch": 4.56, "learning_rate": 0.0002546356809797569, "loss": 2.0551, "step": 162500 }, { "epoch": 4.57, "learning_rate": 0.0002544952341716447, "loss": 2.0555, "step": 163000 }, { "epoch": 4.59, "learning_rate": 0.0002543547873635325, "loss": 2.0466, "step": 163500 }, { "epoch": 4.6, "learning_rate": 0.0002542143405554203, "loss": 2.0336, "step": 164000 }, { "epoch": 4.62, "learning_rate": 0.0002540738937473081, "loss": 2.0412, "step": 164500 }, { "epoch": 4.63, "learning_rate": 0.00025393344693919585, "loss": 2.0094, "step": 165000 }, { "epoch": 4.64, "learning_rate": 0.00025379300013108366, "loss": 2.0189, "step": 165500 }, { "epoch": 4.66, "learning_rate": 0.0002536525533229714, "loss": 2.0634, "step": 166000 }, { "epoch": 4.67, "learning_rate": 0.0002535121065148592, "loss": 2.0543, "step": 166500 }, { "epoch": 4.69, "learning_rate": 0.00025337165970674703, "loss": 2.0689, "step": 167000 }, { "epoch": 4.7, "learning_rate": 0.00025323121289863484, "loss": 2.0068, "step": 167500 }, { "epoch": 4.71, "learning_rate": 0.00025309076609052265, "loss": 2.0627, "step": 168000 }, { "epoch": 4.73, "learning_rate": 0.0002529503192824104, "loss": 2.0485, "step": 168500 }, { "epoch": 4.74, "learning_rate": 0.0002528098724742982, "loss": 2.0589, "step": 169000 }, { "epoch": 4.76, "learning_rate": 0.000252669425666186, "loss": 2.0407, "step": 169500 }, { "epoch": 4.77, "learning_rate": 0.00025252897885807377, "loss": 2.0687, "step": 170000 }, { "epoch": 4.78, "learning_rate": 0.0002523885320499616, "loss": 2.0451, "step": 170500 }, { "epoch": 4.8, "learning_rate": 0.0002522480852418494, "loss": 2.0376, "step": 171000 }, { "epoch": 4.81, "learning_rate": 0.0002521076384337372, "loss": 2.0302, "step": 171500 }, { "epoch": 4.83, "learning_rate": 0.00025196719162562495, "loss": 2.0364, "step": 172000 }, { "epoch": 4.84, "learning_rate": 0.00025182674481751276, "loss": 2.0172, "step": 172500 }, { "epoch": 4.85, "learning_rate": 0.00025168629800940057, "loss": 2.0511, "step": 173000 }, { "epoch": 4.87, "learning_rate": 0.0002515458512012883, "loss": 2.0463, "step": 173500 }, { "epoch": 4.88, "learning_rate": 0.00025140540439317613, "loss": 2.0661, "step": 174000 }, { "epoch": 4.9, "learning_rate": 0.00025126495758506394, "loss": 2.0165, "step": 174500 }, { "epoch": 4.91, "learning_rate": 0.00025112451077695174, "loss": 2.031, "step": 175000 }, { "epoch": 4.93, "learning_rate": 0.0002509840639688395, "loss": 2.0353, "step": 175500 }, { "epoch": 4.94, "learning_rate": 0.0002508436171607273, "loss": 2.0225, "step": 176000 }, { "epoch": 4.95, "learning_rate": 0.0002507031703526151, "loss": 2.0854, "step": 176500 }, { "epoch": 4.97, "learning_rate": 0.00025056272354450287, "loss": 2.0487, "step": 177000 }, { "epoch": 4.98, "learning_rate": 0.0002504222767363907, "loss": 2.0354, "step": 177500 }, { "epoch": 5.0, "learning_rate": 0.0002502818299282785, "loss": 2.04, "step": 178000 }, { "epoch": 5.01, "learning_rate": 0.0002501413831201663, "loss": 2.0023, "step": 178500 }, { "epoch": 5.02, "learning_rate": 0.00025000093631205404, "loss": 1.9267, "step": 179000 }, { "epoch": 5.04, "learning_rate": 0.00024986048950394185, "loss": 1.9619, "step": 179500 }, { "epoch": 5.05, "learning_rate": 0.00024972004269582966, "loss": 1.9616, "step": 180000 }, { "epoch": 5.07, "learning_rate": 0.0002495795958877174, "loss": 1.9733, "step": 180500 }, { "epoch": 5.08, "learning_rate": 0.0002494391490796052, "loss": 1.9445, "step": 181000 }, { "epoch": 5.09, "learning_rate": 0.00024929870227149303, "loss": 1.9845, "step": 181500 }, { "epoch": 5.11, "learning_rate": 0.00024915825546338084, "loss": 1.9633, "step": 182000 }, { "epoch": 5.12, "learning_rate": 0.0002490178086552686, "loss": 1.9904, "step": 182500 }, { "epoch": 5.14, "learning_rate": 0.0002488773618471564, "loss": 1.9854, "step": 183000 }, { "epoch": 5.15, "learning_rate": 0.0002487369150390442, "loss": 1.952, "step": 183500 }, { "epoch": 5.16, "learning_rate": 0.00024859646823093196, "loss": 2.003, "step": 184000 }, { "epoch": 5.18, "learning_rate": 0.00024845602142281977, "loss": 1.9374, "step": 184500 }, { "epoch": 5.19, "learning_rate": 0.0002483155746147076, "loss": 1.9553, "step": 185000 }, { "epoch": 5.21, "learning_rate": 0.0002481751278065954, "loss": 1.9394, "step": 185500 }, { "epoch": 5.22, "learning_rate": 0.00024803468099848314, "loss": 1.9391, "step": 186000 }, { "epoch": 5.23, "learning_rate": 0.00024789423419037095, "loss": 1.9604, "step": 186500 }, { "epoch": 5.25, "learning_rate": 0.00024775378738225876, "loss": 1.9474, "step": 187000 }, { "epoch": 5.26, "learning_rate": 0.0002476133405741465, "loss": 1.9424, "step": 187500 }, { "epoch": 5.28, "learning_rate": 0.0002474728937660343, "loss": 1.9742, "step": 188000 }, { "epoch": 5.29, "learning_rate": 0.0002473324469579221, "loss": 1.9361, "step": 188500 }, { "epoch": 5.3, "learning_rate": 0.00024719200014980993, "loss": 1.9689, "step": 189000 }, { "epoch": 5.32, "learning_rate": 0.0002470515533416977, "loss": 1.9674, "step": 189500 }, { "epoch": 5.33, "learning_rate": 0.0002469111065335855, "loss": 1.9602, "step": 190000 }, { "epoch": 5.35, "learning_rate": 0.0002467706597254733, "loss": 1.9455, "step": 190500 }, { "epoch": 5.36, "learning_rate": 0.00024663021291736106, "loss": 1.9798, "step": 191000 }, { "epoch": 5.37, "learning_rate": 0.00024648976610924887, "loss": 1.9809, "step": 191500 }, { "epoch": 5.39, "learning_rate": 0.0002463493193011367, "loss": 1.9677, "step": 192000 }, { "epoch": 5.4, "learning_rate": 0.0002462088724930245, "loss": 1.981, "step": 192500 }, { "epoch": 5.42, "learning_rate": 0.00024606842568491224, "loss": 1.9866, "step": 193000 }, { "epoch": 5.43, "learning_rate": 0.00024592797887680004, "loss": 1.9806, "step": 193500 }, { "epoch": 5.44, "learning_rate": 0.00024578753206868785, "loss": 1.9721, "step": 194000 }, { "epoch": 5.46, "learning_rate": 0.0002456470852605756, "loss": 1.9762, "step": 194500 }, { "epoch": 5.47, "learning_rate": 0.0002455066384524634, "loss": 2.0143, "step": 195000 }, { "epoch": 5.49, "learning_rate": 0.0002453661916443512, "loss": 1.9659, "step": 195500 }, { "epoch": 5.5, "learning_rate": 0.000245225744836239, "loss": 1.9384, "step": 196000 }, { "epoch": 5.51, "learning_rate": 0.0002450852980281268, "loss": 1.9937, "step": 196500 }, { "epoch": 5.53, "learning_rate": 0.0002449448512200146, "loss": 1.9358, "step": 197000 }, { "epoch": 5.54, "learning_rate": 0.0002448044044119024, "loss": 1.9805, "step": 197500 }, { "epoch": 5.56, "learning_rate": 0.00024466395760379015, "loss": 1.952, "step": 198000 }, { "epoch": 5.57, "learning_rate": 0.00024452351079567796, "loss": 2.03, "step": 198500 }, { "epoch": 5.58, "learning_rate": 0.0002443830639875657, "loss": 2.0039, "step": 199000 }, { "epoch": 5.6, "learning_rate": 0.0002442426171794535, "loss": 1.9566, "step": 199500 }, { "epoch": 5.61, "learning_rate": 0.00024410217037134136, "loss": 1.9538, "step": 200000 }, { "epoch": 5.63, "learning_rate": 0.00024396172356322914, "loss": 1.9754, "step": 200500 }, { "epoch": 5.64, "learning_rate": 0.00024382127675511695, "loss": 1.977, "step": 201000 }, { "epoch": 5.65, "learning_rate": 0.0002436808299470047, "loss": 1.9565, "step": 201500 }, { "epoch": 5.67, "learning_rate": 0.0002435403831388925, "loss": 1.9479, "step": 202000 }, { "epoch": 5.68, "learning_rate": 0.0002433999363307803, "loss": 2.0019, "step": 202500 }, { "epoch": 5.7, "learning_rate": 0.0002432594895226681, "loss": 1.9587, "step": 203000 }, { "epoch": 5.71, "learning_rate": 0.0002431190427145559, "loss": 1.9554, "step": 203500 }, { "epoch": 5.72, "learning_rate": 0.0002429785959064437, "loss": 1.9694, "step": 204000 }, { "epoch": 5.74, "learning_rate": 0.00024283814909833147, "loss": 1.9901, "step": 204500 }, { "epoch": 5.75, "learning_rate": 0.00024269770229021925, "loss": 1.9503, "step": 205000 }, { "epoch": 5.77, "learning_rate": 0.00024255725548210706, "loss": 1.9554, "step": 205500 }, { "epoch": 5.78, "learning_rate": 0.00024241680867399484, "loss": 1.9525, "step": 206000 }, { "epoch": 5.79, "learning_rate": 0.00024227636186588265, "loss": 1.981, "step": 206500 }, { "epoch": 5.81, "learning_rate": 0.00024213591505777045, "loss": 1.9451, "step": 207000 }, { "epoch": 5.82, "learning_rate": 0.0002419954682496582, "loss": 1.9553, "step": 207500 }, { "epoch": 5.84, "learning_rate": 0.00024185502144154602, "loss": 1.9474, "step": 208000 }, { "epoch": 5.85, "learning_rate": 0.0002417145746334338, "loss": 1.9642, "step": 208500 }, { "epoch": 5.87, "learning_rate": 0.0002415741278253216, "loss": 1.9603, "step": 209000 }, { "epoch": 5.88, "learning_rate": 0.00024143368101720939, "loss": 1.9093, "step": 209500 }, { "epoch": 5.89, "learning_rate": 0.0002412932342090972, "loss": 1.9364, "step": 210000 }, { "epoch": 5.91, "learning_rate": 0.000241152787400985, "loss": 1.9891, "step": 210500 }, { "epoch": 5.92, "learning_rate": 0.00024101234059287276, "loss": 1.9717, "step": 211000 }, { "epoch": 5.94, "learning_rate": 0.00024087189378476056, "loss": 1.9513, "step": 211500 }, { "epoch": 5.95, "learning_rate": 0.00024073144697664834, "loss": 1.9571, "step": 212000 }, { "epoch": 5.96, "learning_rate": 0.00024059100016853615, "loss": 1.9442, "step": 212500 }, { "epoch": 5.98, "learning_rate": 0.00024045055336042393, "loss": 1.9624, "step": 213000 }, { "epoch": 5.99, "learning_rate": 0.00024031010655231174, "loss": 1.9352, "step": 213500 }, { "epoch": 6.01, "learning_rate": 0.00024016965974419955, "loss": 1.9092, "step": 214000 }, { "epoch": 6.02, "learning_rate": 0.0002400292129360873, "loss": 1.8456, "step": 214500 }, { "epoch": 6.03, "learning_rate": 0.0002398887661279751, "loss": 1.8846, "step": 215000 }, { "epoch": 6.05, "learning_rate": 0.0002397483193198629, "loss": 1.8422, "step": 215500 }, { "epoch": 6.06, "learning_rate": 0.0002396078725117507, "loss": 1.8811, "step": 216000 }, { "epoch": 6.08, "learning_rate": 0.00023946742570363848, "loss": 1.8762, "step": 216500 }, { "epoch": 6.09, "learning_rate": 0.0002393269788955263, "loss": 1.8813, "step": 217000 }, { "epoch": 6.1, "learning_rate": 0.0002391865320874141, "loss": 1.8543, "step": 217500 }, { "epoch": 6.12, "learning_rate": 0.00023904608527930185, "loss": 1.8821, "step": 218000 }, { "epoch": 6.13, "learning_rate": 0.00023890563847118966, "loss": 1.8509, "step": 218500 }, { "epoch": 6.15, "learning_rate": 0.00023876519166307744, "loss": 1.8799, "step": 219000 }, { "epoch": 6.16, "learning_rate": 0.00023862474485496525, "loss": 1.8657, "step": 219500 }, { "epoch": 6.17, "learning_rate": 0.00023848429804685306, "loss": 1.8988, "step": 220000 }, { "epoch": 6.19, "learning_rate": 0.00023834385123874084, "loss": 1.8873, "step": 220500 }, { "epoch": 6.2, "learning_rate": 0.00023820340443062862, "loss": 1.8665, "step": 221000 }, { "epoch": 6.22, "learning_rate": 0.0002380629576225164, "loss": 1.8889, "step": 221500 }, { "epoch": 6.23, "learning_rate": 0.0002379225108144042, "loss": 1.8704, "step": 222000 }, { "epoch": 6.24, "learning_rate": 0.000237782064006292, "loss": 1.8989, "step": 222500 }, { "epoch": 6.26, "learning_rate": 0.0002376416171981798, "loss": 1.8646, "step": 223000 }, { "epoch": 6.27, "learning_rate": 0.0002375011703900676, "loss": 1.8908, "step": 223500 }, { "epoch": 6.29, "learning_rate": 0.00023736072358195536, "loss": 1.9154, "step": 224000 }, { "epoch": 6.3, "learning_rate": 0.00023722027677384317, "loss": 1.8759, "step": 224500 }, { "epoch": 6.31, "learning_rate": 0.00023707982996573095, "loss": 1.8696, "step": 225000 }, { "epoch": 6.33, "learning_rate": 0.00023693938315761875, "loss": 1.8735, "step": 225500 }, { "epoch": 6.34, "learning_rate": 0.00023679893634950654, "loss": 1.8781, "step": 226000 }, { "epoch": 6.36, "learning_rate": 0.00023665848954139434, "loss": 1.8739, "step": 226500 }, { "epoch": 6.37, "learning_rate": 0.00023651804273328215, "loss": 1.8942, "step": 227000 }, { "epoch": 6.38, "learning_rate": 0.0002363775959251699, "loss": 1.8897, "step": 227500 }, { "epoch": 6.4, "learning_rate": 0.0002362371491170577, "loss": 1.8956, "step": 228000 }, { "epoch": 6.41, "learning_rate": 0.0002360967023089455, "loss": 1.8849, "step": 228500 }, { "epoch": 6.43, "learning_rate": 0.0002359562555008333, "loss": 1.8823, "step": 229000 }, { "epoch": 6.44, "learning_rate": 0.00023581580869272108, "loss": 1.8674, "step": 229500 }, { "epoch": 6.45, "learning_rate": 0.0002356753618846089, "loss": 1.896, "step": 230000 }, { "epoch": 6.47, "learning_rate": 0.0002355349150764967, "loss": 1.9278, "step": 230500 }, { "epoch": 6.48, "learning_rate": 0.00023539446826838445, "loss": 1.8707, "step": 231000 }, { "epoch": 6.5, "learning_rate": 0.00023525402146027226, "loss": 1.9087, "step": 231500 }, { "epoch": 6.51, "learning_rate": 0.00023511357465216004, "loss": 1.8762, "step": 232000 }, { "epoch": 6.52, "learning_rate": 0.00023497312784404785, "loss": 1.9027, "step": 232500 }, { "epoch": 6.54, "learning_rate": 0.00023483268103593563, "loss": 1.938, "step": 233000 }, { "epoch": 6.55, "learning_rate": 0.00023469223422782344, "loss": 1.9021, "step": 233500 }, { "epoch": 6.57, "learning_rate": 0.00023455178741971125, "loss": 1.9112, "step": 234000 }, { "epoch": 6.58, "learning_rate": 0.000234411340611599, "loss": 1.904, "step": 234500 }, { "epoch": 6.59, "learning_rate": 0.0002342708938034868, "loss": 1.9256, "step": 235000 }, { "epoch": 6.61, "learning_rate": 0.0002341304469953746, "loss": 1.9016, "step": 235500 }, { "epoch": 6.62, "learning_rate": 0.0002339900001872624, "loss": 1.8928, "step": 236000 }, { "epoch": 6.64, "learning_rate": 0.00023384955337915018, "loss": 1.8887, "step": 236500 }, { "epoch": 6.65, "learning_rate": 0.00023370910657103799, "loss": 1.8664, "step": 237000 }, { "epoch": 6.66, "learning_rate": 0.00023356865976292577, "loss": 1.9367, "step": 237500 }, { "epoch": 6.68, "learning_rate": 0.00023342821295481355, "loss": 1.9405, "step": 238000 }, { "epoch": 6.69, "learning_rate": 0.00023328776614670136, "loss": 1.8594, "step": 238500 }, { "epoch": 6.71, "learning_rate": 0.00023314731933858914, "loss": 1.895, "step": 239000 }, { "epoch": 6.72, "learning_rate": 0.00023300687253047695, "loss": 1.9265, "step": 239500 }, { "epoch": 6.74, "learning_rate": 0.00023286642572236473, "loss": 1.8762, "step": 240000 }, { "epoch": 6.75, "learning_rate": 0.0002327259789142525, "loss": 1.9104, "step": 240500 }, { "epoch": 6.76, "learning_rate": 0.00023258553210614031, "loss": 1.8946, "step": 241000 }, { "epoch": 6.78, "learning_rate": 0.0002324450852980281, "loss": 1.9149, "step": 241500 }, { "epoch": 6.79, "learning_rate": 0.0002323046384899159, "loss": 1.9136, "step": 242000 }, { "epoch": 6.81, "learning_rate": 0.00023216419168180368, "loss": 1.88, "step": 242500 }, { "epoch": 6.82, "learning_rate": 0.0002320237448736915, "loss": 1.8988, "step": 243000 }, { "epoch": 6.83, "learning_rate": 0.00023188329806557925, "loss": 1.9184, "step": 243500 }, { "epoch": 6.85, "learning_rate": 0.00023174285125746705, "loss": 1.8672, "step": 244000 }, { "epoch": 6.86, "learning_rate": 0.00023160240444935486, "loss": 1.9284, "step": 244500 }, { "epoch": 6.88, "learning_rate": 0.00023146195764124264, "loss": 1.9108, "step": 245000 }, { "epoch": 6.89, "learning_rate": 0.00023132151083313045, "loss": 1.8983, "step": 245500 }, { "epoch": 6.9, "learning_rate": 0.00023118106402501823, "loss": 1.8783, "step": 246000 }, { "epoch": 6.92, "learning_rate": 0.00023104061721690604, "loss": 1.9412, "step": 246500 }, { "epoch": 6.93, "learning_rate": 0.00023090017040879385, "loss": 1.9064, "step": 247000 }, { "epoch": 6.95, "learning_rate": 0.0002307597236006816, "loss": 1.9071, "step": 247500 }, { "epoch": 6.96, "learning_rate": 0.0002306192767925694, "loss": 1.9568, "step": 248000 }, { "epoch": 6.97, "learning_rate": 0.0002304788299844572, "loss": 1.9261, "step": 248500 }, { "epoch": 6.99, "learning_rate": 0.000230338383176345, "loss": 1.8863, "step": 249000 }, { "epoch": 7.0, "learning_rate": 0.00023019793636823278, "loss": 1.8946, "step": 249500 }, { "epoch": 7.02, "learning_rate": 0.0002300574895601206, "loss": 1.7976, "step": 250000 }, { "epoch": 7.03, "learning_rate": 0.0002299170427520084, "loss": 1.8215, "step": 250500 }, { "epoch": 7.04, "learning_rate": 0.00022977659594389615, "loss": 1.8041, "step": 251000 }, { "epoch": 7.06, "learning_rate": 0.00022963614913578396, "loss": 1.8455, "step": 251500 }, { "epoch": 7.07, "learning_rate": 0.00022949570232767174, "loss": 1.83, "step": 252000 }, { "epoch": 7.09, "learning_rate": 0.00022935525551955955, "loss": 1.8258, "step": 252500 }, { "epoch": 7.1, "learning_rate": 0.00022921480871144733, "loss": 1.7908, "step": 253000 }, { "epoch": 7.11, "learning_rate": 0.00022907436190333514, "loss": 1.8099, "step": 253500 }, { "epoch": 7.13, "learning_rate": 0.00022893391509522294, "loss": 1.8051, "step": 254000 }, { "epoch": 7.14, "learning_rate": 0.0002287934682871107, "loss": 1.8227, "step": 254500 }, { "epoch": 7.16, "learning_rate": 0.0002286530214789985, "loss": 1.8217, "step": 255000 }, { "epoch": 7.17, "learning_rate": 0.0002285125746708863, "loss": 1.818, "step": 255500 }, { "epoch": 7.18, "learning_rate": 0.0002283721278627741, "loss": 1.8631, "step": 256000 }, { "epoch": 7.2, "learning_rate": 0.00022823168105466188, "loss": 1.8189, "step": 256500 }, { "epoch": 7.21, "learning_rate": 0.00022809123424654968, "loss": 1.8172, "step": 257000 }, { "epoch": 7.23, "learning_rate": 0.00022795078743843746, "loss": 1.8375, "step": 257500 }, { "epoch": 7.24, "learning_rate": 0.00022781034063032525, "loss": 1.841, "step": 258000 }, { "epoch": 7.25, "learning_rate": 0.00022766989382221305, "loss": 1.8825, "step": 258500 }, { "epoch": 7.27, "learning_rate": 0.00022752944701410083, "loss": 1.8211, "step": 259000 }, { "epoch": 7.28, "learning_rate": 0.00022738900020598864, "loss": 1.835, "step": 259500 }, { "epoch": 7.3, "learning_rate": 0.0002272485533978764, "loss": 1.809, "step": 260000 }, { "epoch": 7.31, "learning_rate": 0.0002271081065897642, "loss": 1.833, "step": 260500 }, { "epoch": 7.32, "learning_rate": 0.000226967659781652, "loss": 1.8221, "step": 261000 }, { "epoch": 7.34, "learning_rate": 0.0002268272129735398, "loss": 1.842, "step": 261500 }, { "epoch": 7.35, "learning_rate": 0.0002266867661654276, "loss": 1.8132, "step": 262000 }, { "epoch": 7.37, "learning_rate": 0.00022654631935731538, "loss": 1.8774, "step": 262500 }, { "epoch": 7.38, "learning_rate": 0.0002264058725492032, "loss": 1.8435, "step": 263000 }, { "epoch": 7.39, "learning_rate": 0.00022626542574109094, "loss": 1.8275, "step": 263500 }, { "epoch": 7.41, "learning_rate": 0.00022612497893297875, "loss": 1.8354, "step": 264000 }, { "epoch": 7.42, "learning_rate": 0.00022598453212486656, "loss": 1.8332, "step": 264500 }, { "epoch": 7.44, "learning_rate": 0.00022584408531675434, "loss": 1.8744, "step": 265000 }, { "epoch": 7.45, "learning_rate": 0.00022570363850864215, "loss": 1.8174, "step": 265500 }, { "epoch": 7.46, "learning_rate": 0.00022556319170052993, "loss": 1.819, "step": 266000 }, { "epoch": 7.48, "learning_rate": 0.00022542274489241774, "loss": 1.8235, "step": 266500 }, { "epoch": 7.49, "learning_rate": 0.0002252822980843055, "loss": 1.8256, "step": 267000 }, { "epoch": 7.51, "learning_rate": 0.0002251418512761933, "loss": 1.8291, "step": 267500 }, { "epoch": 7.52, "learning_rate": 0.0002250014044680811, "loss": 1.8595, "step": 268000 }, { "epoch": 7.53, "learning_rate": 0.0002248609576599689, "loss": 1.839, "step": 268500 }, { "epoch": 7.55, "learning_rate": 0.0002247205108518567, "loss": 1.8658, "step": 269000 }, { "epoch": 7.56, "learning_rate": 0.00022458006404374448, "loss": 1.8508, "step": 269500 }, { "epoch": 7.58, "learning_rate": 0.00022443961723563229, "loss": 1.8332, "step": 270000 }, { "epoch": 7.59, "learning_rate": 0.0002242991704275201, "loss": 1.8551, "step": 270500 }, { "epoch": 7.6, "learning_rate": 0.00022415872361940785, "loss": 1.8145, "step": 271000 }, { "epoch": 7.62, "learning_rate": 0.00022401827681129566, "loss": 1.8381, "step": 271500 }, { "epoch": 7.63, "learning_rate": 0.00022387783000318344, "loss": 1.8425, "step": 272000 }, { "epoch": 7.65, "learning_rate": 0.00022373738319507124, "loss": 1.8686, "step": 272500 }, { "epoch": 7.66, "learning_rate": 0.00022359693638695903, "loss": 1.8304, "step": 273000 }, { "epoch": 7.68, "learning_rate": 0.00022345648957884683, "loss": 1.8242, "step": 273500 }, { "epoch": 7.69, "learning_rate": 0.00022331604277073461, "loss": 1.8065, "step": 274000 }, { "epoch": 7.7, "learning_rate": 0.0002231755959626224, "loss": 1.847, "step": 274500 }, { "epoch": 7.72, "learning_rate": 0.0002230351491545102, "loss": 1.8359, "step": 275000 }, { "epoch": 7.73, "learning_rate": 0.00022289470234639798, "loss": 1.8148, "step": 275500 }, { "epoch": 7.75, "learning_rate": 0.0002227542555382858, "loss": 1.8743, "step": 276000 }, { "epoch": 7.76, "learning_rate": 0.00022261380873017357, "loss": 1.8471, "step": 276500 }, { "epoch": 7.77, "learning_rate": 0.00022247336192206135, "loss": 1.8661, "step": 277000 }, { "epoch": 7.79, "learning_rate": 0.00022233291511394916, "loss": 1.8514, "step": 277500 }, { "epoch": 7.8, "learning_rate": 0.00022219246830583694, "loss": 1.8565, "step": 278000 }, { "epoch": 7.82, "learning_rate": 0.00022205202149772475, "loss": 1.8542, "step": 278500 }, { "epoch": 7.83, "learning_rate": 0.00022191157468961253, "loss": 1.8716, "step": 279000 }, { "epoch": 7.84, "learning_rate": 0.00022177112788150034, "loss": 1.8332, "step": 279500 }, { "epoch": 7.86, "learning_rate": 0.0002216306810733881, "loss": 1.8361, "step": 280000 }, { "epoch": 7.87, "learning_rate": 0.0002214902342652759, "loss": 1.8131, "step": 280500 }, { "epoch": 7.89, "learning_rate": 0.0002213497874571637, "loss": 1.8443, "step": 281000 }, { "epoch": 7.9, "learning_rate": 0.0002212093406490515, "loss": 1.8881, "step": 281500 }, { "epoch": 7.91, "learning_rate": 0.0002210688938409393, "loss": 1.8587, "step": 282000 }, { "epoch": 7.93, "learning_rate": 0.00022092844703282708, "loss": 1.846, "step": 282500 }, { "epoch": 7.94, "learning_rate": 0.0002207880002247149, "loss": 1.8216, "step": 283000 }, { "epoch": 7.96, "learning_rate": 0.00022064755341660264, "loss": 1.8285, "step": 283500 }, { "epoch": 7.97, "learning_rate": 0.00022050710660849045, "loss": 1.8622, "step": 284000 }, { "epoch": 7.98, "learning_rate": 0.00022036665980037826, "loss": 1.8458, "step": 284500 }, { "epoch": 8.0, "learning_rate": 0.00022022621299226604, "loss": 1.8345, "step": 285000 }, { "epoch": 8.01, "learning_rate": 0.00022008576618415385, "loss": 1.7663, "step": 285500 }, { "epoch": 8.03, "learning_rate": 0.00021994531937604163, "loss": 1.7715, "step": 286000 }, { "epoch": 8.04, "learning_rate": 0.00021980487256792944, "loss": 1.7414, "step": 286500 }, { "epoch": 8.05, "learning_rate": 0.0002196644257598172, "loss": 1.7456, "step": 287000 }, { "epoch": 8.07, "learning_rate": 0.000219523978951705, "loss": 1.7446, "step": 287500 }, { "epoch": 8.08, "learning_rate": 0.0002193835321435928, "loss": 1.76, "step": 288000 }, { "epoch": 8.1, "learning_rate": 0.00021924308533548059, "loss": 1.7493, "step": 288500 }, { "epoch": 8.11, "learning_rate": 0.0002191026385273684, "loss": 1.761, "step": 289000 }, { "epoch": 8.12, "learning_rate": 0.00021896219171925617, "loss": 1.7456, "step": 289500 }, { "epoch": 8.14, "learning_rate": 0.00021882174491114398, "loss": 1.7712, "step": 290000 }, { "epoch": 8.15, "learning_rate": 0.00021868129810303174, "loss": 1.7744, "step": 290500 }, { "epoch": 8.17, "learning_rate": 0.00021854085129491954, "loss": 1.7679, "step": 291000 }, { "epoch": 8.18, "learning_rate": 0.00021840040448680735, "loss": 1.7527, "step": 291500 }, { "epoch": 8.19, "learning_rate": 0.00021825995767869513, "loss": 1.7428, "step": 292000 }, { "epoch": 8.21, "learning_rate": 0.00021811951087058294, "loss": 1.7666, "step": 292500 }, { "epoch": 8.22, "learning_rate": 0.00021797906406247072, "loss": 1.7579, "step": 293000 }, { "epoch": 8.24, "learning_rate": 0.0002178386172543585, "loss": 1.7789, "step": 293500 }, { "epoch": 8.25, "learning_rate": 0.00021769817044624628, "loss": 1.7492, "step": 294000 }, { "epoch": 8.26, "learning_rate": 0.0002175577236381341, "loss": 1.7532, "step": 294500 }, { "epoch": 8.28, "learning_rate": 0.0002174172768300219, "loss": 1.7642, "step": 295000 }, { "epoch": 8.29, "learning_rate": 0.00021727683002190968, "loss": 1.7626, "step": 295500 }, { "epoch": 8.31, "learning_rate": 0.0002171363832137975, "loss": 1.7749, "step": 296000 }, { "epoch": 8.32, "learning_rate": 0.00021699593640568524, "loss": 1.7803, "step": 296500 }, { "epoch": 8.33, "learning_rate": 0.00021685548959757305, "loss": 1.7891, "step": 297000 }, { "epoch": 8.35, "learning_rate": 0.00021671504278946086, "loss": 1.7569, "step": 297500 }, { "epoch": 8.36, "learning_rate": 0.00021657459598134864, "loss": 1.7607, "step": 298000 }, { "epoch": 8.38, "learning_rate": 0.00021643414917323645, "loss": 1.7525, "step": 298500 }, { "epoch": 8.39, "learning_rate": 0.00021629370236512423, "loss": 1.8031, "step": 299000 }, { "epoch": 8.4, "learning_rate": 0.00021615325555701204, "loss": 1.768, "step": 299500 }, { "epoch": 8.42, "learning_rate": 0.0002160128087488998, "loss": 1.7674, "step": 300000 }, { "epoch": 8.43, "learning_rate": 0.0002158723619407876, "loss": 1.7714, "step": 300500 }, { "epoch": 8.45, "learning_rate": 0.0002157319151326754, "loss": 1.7616, "step": 301000 }, { "epoch": 8.46, "learning_rate": 0.0002155914683245632, "loss": 1.8042, "step": 301500 }, { "epoch": 8.47, "learning_rate": 0.000215451021516451, "loss": 1.7405, "step": 302000 }, { "epoch": 8.49, "learning_rate": 0.00021531057470833878, "loss": 1.793, "step": 302500 }, { "epoch": 8.5, "learning_rate": 0.00021517012790022658, "loss": 1.7702, "step": 303000 }, { "epoch": 8.52, "learning_rate": 0.00021502968109211434, "loss": 1.7444, "step": 303500 }, { "epoch": 8.53, "learning_rate": 0.00021488923428400215, "loss": 1.7789, "step": 304000 }, { "epoch": 8.55, "learning_rate": 0.00021474878747588995, "loss": 1.775, "step": 304500 }, { "epoch": 8.56, "learning_rate": 0.00021460834066777774, "loss": 1.7749, "step": 305000 }, { "epoch": 8.57, "learning_rate": 0.00021446789385966554, "loss": 1.7767, "step": 305500 }, { "epoch": 8.59, "learning_rate": 0.00021432744705155332, "loss": 1.7808, "step": 306000 }, { "epoch": 8.6, "learning_rate": 0.00021418700024344113, "loss": 1.7688, "step": 306500 }, { "epoch": 8.62, "learning_rate": 0.00021404655343532889, "loss": 1.7917, "step": 307000 }, { "epoch": 8.63, "learning_rate": 0.0002139061066272167, "loss": 1.7924, "step": 307500 }, { "epoch": 8.64, "learning_rate": 0.0002137656598191045, "loss": 1.7847, "step": 308000 }, { "epoch": 8.66, "learning_rate": 0.00021362521301099228, "loss": 1.7892, "step": 308500 }, { "epoch": 8.67, "learning_rate": 0.0002134847662028801, "loss": 1.7915, "step": 309000 }, { "epoch": 8.69, "learning_rate": 0.00021334431939476787, "loss": 1.7895, "step": 309500 }, { "epoch": 8.7, "learning_rate": 0.00021320387258665565, "loss": 1.7896, "step": 310000 }, { "epoch": 8.71, "learning_rate": 0.00021306342577854343, "loss": 1.785, "step": 310500 }, { "epoch": 8.73, "learning_rate": 0.00021292297897043124, "loss": 1.8127, "step": 311000 }, { "epoch": 8.74, "learning_rate": 0.00021278253216231905, "loss": 1.7889, "step": 311500 }, { "epoch": 8.76, "learning_rate": 0.00021264208535420683, "loss": 1.7924, "step": 312000 }, { "epoch": 8.77, "learning_rate": 0.00021250163854609464, "loss": 1.8108, "step": 312500 }, { "epoch": 8.78, "learning_rate": 0.0002123611917379824, "loss": 1.8106, "step": 313000 }, { "epoch": 8.8, "learning_rate": 0.0002122207449298702, "loss": 1.7852, "step": 313500 }, { "epoch": 8.81, "learning_rate": 0.00021208029812175798, "loss": 1.7817, "step": 314000 }, { "epoch": 8.83, "learning_rate": 0.0002119398513136458, "loss": 1.7985, "step": 314500 }, { "epoch": 8.84, "learning_rate": 0.0002117994045055336, "loss": 1.7904, "step": 315000 }, { "epoch": 8.85, "learning_rate": 0.00021165895769742138, "loss": 1.8134, "step": 315500 }, { "epoch": 8.87, "learning_rate": 0.0002115185108893092, "loss": 1.812, "step": 316000 }, { "epoch": 8.88, "learning_rate": 0.00021137806408119694, "loss": 1.7939, "step": 316500 }, { "epoch": 8.9, "learning_rate": 0.00021123761727308475, "loss": 1.7814, "step": 317000 }, { "epoch": 8.91, "learning_rate": 0.00021109717046497253, "loss": 1.7592, "step": 317500 }, { "epoch": 8.92, "learning_rate": 0.00021095672365686034, "loss": 1.7919, "step": 318000 }, { "epoch": 8.94, "learning_rate": 0.00021081627684874815, "loss": 1.7763, "step": 318500 }, { "epoch": 8.95, "learning_rate": 0.00021067583004063593, "loss": 1.8105, "step": 319000 }, { "epoch": 8.97, "learning_rate": 0.00021053538323252373, "loss": 1.7793, "step": 319500 }, { "epoch": 8.98, "learning_rate": 0.0002103949364244115, "loss": 1.8149, "step": 320000 }, { "epoch": 8.99, "learning_rate": 0.0002102544896162993, "loss": 1.7625, "step": 320500 }, { "epoch": 9.01, "learning_rate": 0.0002101140428081871, "loss": 1.6988, "step": 321000 }, { "epoch": 9.02, "learning_rate": 0.00020997359600007489, "loss": 1.6651, "step": 321500 }, { "epoch": 9.04, "learning_rate": 0.0002098331491919627, "loss": 1.6747, "step": 322000 }, { "epoch": 9.05, "learning_rate": 0.00020969270238385047, "loss": 1.7048, "step": 322500 }, { "epoch": 9.06, "learning_rate": 0.00020955225557573828, "loss": 1.6829, "step": 323000 }, { "epoch": 9.08, "learning_rate": 0.00020941180876762604, "loss": 1.7218, "step": 323500 }, { "epoch": 9.09, "learning_rate": 0.00020927136195951384, "loss": 1.7071, "step": 324000 }, { "epoch": 9.11, "learning_rate": 0.00020913091515140165, "loss": 1.7048, "step": 324500 }, { "epoch": 9.12, "learning_rate": 0.00020899046834328943, "loss": 1.7058, "step": 325000 }, { "epoch": 9.13, "learning_rate": 0.00020885002153517724, "loss": 1.7024, "step": 325500 }, { "epoch": 9.15, "learning_rate": 0.00020870957472706502, "loss": 1.6729, "step": 326000 }, { "epoch": 9.16, "learning_rate": 0.00020856912791895283, "loss": 1.7195, "step": 326500 }, { "epoch": 9.18, "learning_rate": 0.00020842868111084058, "loss": 1.723, "step": 327000 }, { "epoch": 9.19, "learning_rate": 0.0002082882343027284, "loss": 1.7097, "step": 327500 }, { "epoch": 9.2, "learning_rate": 0.0002081477874946162, "loss": 1.7152, "step": 328000 }, { "epoch": 9.22, "learning_rate": 0.00020800734068650398, "loss": 1.7013, "step": 328500 }, { "epoch": 9.23, "learning_rate": 0.0002078668938783918, "loss": 1.6876, "step": 329000 }, { "epoch": 9.25, "learning_rate": 0.00020772644707027954, "loss": 1.6929, "step": 329500 }, { "epoch": 9.26, "learning_rate": 0.00020758600026216735, "loss": 1.7123, "step": 330000 }, { "epoch": 9.27, "learning_rate": 0.00020744555345405513, "loss": 1.6763, "step": 330500 }, { "epoch": 9.29, "learning_rate": 0.00020730510664594294, "loss": 1.7054, "step": 331000 }, { "epoch": 9.3, "learning_rate": 0.00020716465983783075, "loss": 1.7192, "step": 331500 }, { "epoch": 9.32, "learning_rate": 0.00020702421302971853, "loss": 1.7188, "step": 332000 }, { "epoch": 9.33, "learning_rate": 0.00020688376622160634, "loss": 1.7263, "step": 332500 }, { "epoch": 9.34, "learning_rate": 0.0002067433194134941, "loss": 1.7191, "step": 333000 }, { "epoch": 9.36, "learning_rate": 0.0002066028726053819, "loss": 1.7121, "step": 333500 }, { "epoch": 9.37, "learning_rate": 0.00020646242579726968, "loss": 1.7201, "step": 334000 }, { "epoch": 9.39, "learning_rate": 0.0002063219789891575, "loss": 1.7105, "step": 334500 }, { "epoch": 9.4, "learning_rate": 0.0002061815321810453, "loss": 1.6985, "step": 335000 }, { "epoch": 9.42, "learning_rate": 0.00020604108537293308, "loss": 1.7075, "step": 335500 }, { "epoch": 9.43, "learning_rate": 0.00020590063856482088, "loss": 1.6683, "step": 336000 }, { "epoch": 9.44, "learning_rate": 0.00020576019175670864, "loss": 1.7711, "step": 336500 }, { "epoch": 9.46, "learning_rate": 0.00020561974494859645, "loss": 1.7415, "step": 337000 }, { "epoch": 9.47, "learning_rate": 0.00020547929814048423, "loss": 1.7425, "step": 337500 }, { "epoch": 9.49, "learning_rate": 0.00020533885133237203, "loss": 1.7103, "step": 338000 }, { "epoch": 9.5, "learning_rate": 0.00020519840452425984, "loss": 1.7212, "step": 338500 }, { "epoch": 9.51, "learning_rate": 0.00020505795771614762, "loss": 1.7515, "step": 339000 }, { "epoch": 9.53, "learning_rate": 0.00020491751090803543, "loss": 1.7124, "step": 339500 }, { "epoch": 9.54, "learning_rate": 0.00020477706409992319, "loss": 1.7298, "step": 340000 }, { "epoch": 9.56, "learning_rate": 0.000204636617291811, "loss": 1.6947, "step": 340500 }, { "epoch": 9.57, "learning_rate": 0.00020449617048369877, "loss": 1.7412, "step": 341000 }, { "epoch": 9.58, "learning_rate": 0.00020435572367558658, "loss": 1.7487, "step": 341500 }, { "epoch": 9.6, "learning_rate": 0.0002042152768674744, "loss": 1.7391, "step": 342000 }, { "epoch": 9.61, "learning_rate": 0.00020407483005936217, "loss": 1.7039, "step": 342500 }, { "epoch": 9.63, "learning_rate": 0.00020393438325124998, "loss": 1.7487, "step": 343000 }, { "epoch": 9.64, "learning_rate": 0.00020379393644313773, "loss": 1.7047, "step": 343500 }, { "epoch": 9.65, "learning_rate": 0.00020365348963502554, "loss": 1.7137, "step": 344000 }, { "epoch": 9.67, "learning_rate": 0.00020351304282691332, "loss": 1.7259, "step": 344500 }, { "epoch": 9.68, "learning_rate": 0.00020337259601880113, "loss": 1.7112, "step": 345000 }, { "epoch": 9.7, "learning_rate": 0.00020323214921068894, "loss": 1.7499, "step": 345500 }, { "epoch": 9.71, "learning_rate": 0.00020309170240257672, "loss": 1.741, "step": 346000 }, { "epoch": 9.72, "learning_rate": 0.0002029512555944645, "loss": 1.6921, "step": 346500 }, { "epoch": 9.74, "learning_rate": 0.00020281080878635228, "loss": 1.7582, "step": 347000 }, { "epoch": 9.75, "learning_rate": 0.0002026703619782401, "loss": 1.7167, "step": 347500 }, { "epoch": 9.77, "learning_rate": 0.0002025299151701279, "loss": 1.7155, "step": 348000 }, { "epoch": 9.78, "learning_rate": 0.00020238946836201568, "loss": 1.7296, "step": 348500 }, { "epoch": 9.79, "learning_rate": 0.00020224902155390349, "loss": 1.7333, "step": 349000 }, { "epoch": 9.81, "learning_rate": 0.00020210857474579124, "loss": 1.6987, "step": 349500 }, { "epoch": 9.82, "learning_rate": 0.00020196812793767905, "loss": 1.7412, "step": 350000 }, { "epoch": 9.84, "learning_rate": 0.00020182768112956683, "loss": 1.7516, "step": 350500 }, { "epoch": 9.85, "learning_rate": 0.00020168723432145464, "loss": 1.7316, "step": 351000 }, { "epoch": 9.86, "learning_rate": 0.00020154678751334244, "loss": 1.7409, "step": 351500 }, { "epoch": 9.88, "learning_rate": 0.00020140634070523023, "loss": 1.7477, "step": 352000 }, { "epoch": 9.89, "learning_rate": 0.00020126589389711803, "loss": 1.7584, "step": 352500 }, { "epoch": 9.91, "learning_rate": 0.0002011254470890058, "loss": 1.7328, "step": 353000 }, { "epoch": 9.92, "learning_rate": 0.0002009850002808936, "loss": 1.7421, "step": 353500 }, { "epoch": 9.93, "learning_rate": 0.00020084455347278138, "loss": 1.719, "step": 354000 }, { "epoch": 9.95, "learning_rate": 0.00020070410666466918, "loss": 1.7224, "step": 354500 }, { "epoch": 9.96, "learning_rate": 0.000200563659856557, "loss": 1.7464, "step": 355000 }, { "epoch": 9.98, "learning_rate": 0.00020042321304844477, "loss": 1.73, "step": 355500 }, { "epoch": 9.99, "learning_rate": 0.00020028276624033258, "loss": 1.7005, "step": 356000 }, { "epoch": 10.0, "learning_rate": 0.00020014231943222034, "loss": 1.7133, "step": 356500 }, { "epoch": 10.02, "learning_rate": 0.00020000187262410814, "loss": 1.6669, "step": 357000 }, { "epoch": 10.03, "learning_rate": 0.00019986142581599592, "loss": 1.6335, "step": 357500 }, { "epoch": 10.05, "learning_rate": 0.00019972097900788373, "loss": 1.641, "step": 358000 }, { "epoch": 10.06, "learning_rate": 0.00019958053219977154, "loss": 1.6476, "step": 358500 }, { "epoch": 10.07, "learning_rate": 0.00019944008539165932, "loss": 1.6382, "step": 359000 }, { "epoch": 10.09, "learning_rate": 0.00019929963858354713, "loss": 1.6289, "step": 359500 }, { "epoch": 10.1, "learning_rate": 0.00019915919177543488, "loss": 1.6553, "step": 360000 }, { "epoch": 10.12, "learning_rate": 0.0001990187449673227, "loss": 1.6524, "step": 360500 }, { "epoch": 10.13, "learning_rate": 0.00019887829815921047, "loss": 1.6656, "step": 361000 }, { "epoch": 10.14, "learning_rate": 0.00019873785135109828, "loss": 1.6646, "step": 361500 }, { "epoch": 10.16, "learning_rate": 0.0001985974045429861, "loss": 1.6444, "step": 362000 }, { "epoch": 10.17, "learning_rate": 0.00019845695773487387, "loss": 1.6786, "step": 362500 }, { "epoch": 10.19, "learning_rate": 0.00019831651092676165, "loss": 1.6439, "step": 363000 }, { "epoch": 10.2, "learning_rate": 0.00019817606411864943, "loss": 1.6175, "step": 363500 }, { "epoch": 10.21, "learning_rate": 0.00019803561731053724, "loss": 1.6708, "step": 364000 }, { "epoch": 10.23, "learning_rate": 0.00019789517050242502, "loss": 1.6821, "step": 364500 }, { "epoch": 10.24, "learning_rate": 0.00019775472369431283, "loss": 1.6408, "step": 365000 }, { "epoch": 10.26, "learning_rate": 0.00019761427688620064, "loss": 1.6807, "step": 365500 }, { "epoch": 10.27, "learning_rate": 0.0001974738300780884, "loss": 1.6527, "step": 366000 }, { "epoch": 10.28, "learning_rate": 0.0001973333832699762, "loss": 1.6521, "step": 366500 }, { "epoch": 10.3, "learning_rate": 0.00019719293646186398, "loss": 1.6517, "step": 367000 }, { "epoch": 10.31, "learning_rate": 0.00019705248965375179, "loss": 1.6752, "step": 367500 }, { "epoch": 10.33, "learning_rate": 0.00019691204284563957, "loss": 1.6419, "step": 368000 }, { "epoch": 10.34, "learning_rate": 0.00019677159603752738, "loss": 1.6271, "step": 368500 }, { "epoch": 10.36, "learning_rate": 0.00019663114922941518, "loss": 1.6934, "step": 369000 }, { "epoch": 10.37, "learning_rate": 0.00019649070242130294, "loss": 1.6474, "step": 369500 }, { "epoch": 10.38, "learning_rate": 0.00019635025561319075, "loss": 1.6374, "step": 370000 }, { "epoch": 10.4, "learning_rate": 0.00019620980880507853, "loss": 1.6623, "step": 370500 }, { "epoch": 10.41, "learning_rate": 0.00019606936199696633, "loss": 1.6459, "step": 371000 }, { "epoch": 10.43, "learning_rate": 0.00019592891518885412, "loss": 1.6437, "step": 371500 }, { "epoch": 10.44, "learning_rate": 0.00019578846838074192, "loss": 1.6553, "step": 372000 }, { "epoch": 10.45, "learning_rate": 0.00019564802157262973, "loss": 1.655, "step": 372500 }, { "epoch": 10.47, "learning_rate": 0.00019550757476451748, "loss": 1.6601, "step": 373000 }, { "epoch": 10.48, "learning_rate": 0.0001953671279564053, "loss": 1.6885, "step": 373500 }, { "epoch": 10.5, "learning_rate": 0.00019522668114829307, "loss": 1.6711, "step": 374000 }, { "epoch": 10.51, "learning_rate": 0.00019508623434018088, "loss": 1.6831, "step": 374500 }, { "epoch": 10.52, "learning_rate": 0.0001949457875320687, "loss": 1.6198, "step": 375000 }, { "epoch": 10.54, "learning_rate": 0.00019480534072395647, "loss": 1.6693, "step": 375500 }, { "epoch": 10.55, "learning_rate": 0.00019466489391584428, "loss": 1.651, "step": 376000 }, { "epoch": 10.57, "learning_rate": 0.00019452444710773203, "loss": 1.6937, "step": 376500 }, { "epoch": 10.58, "learning_rate": 0.00019438400029961984, "loss": 1.6631, "step": 377000 }, { "epoch": 10.59, "learning_rate": 0.00019424355349150762, "loss": 1.6963, "step": 377500 }, { "epoch": 10.61, "learning_rate": 0.00019410310668339543, "loss": 1.6647, "step": 378000 }, { "epoch": 10.62, "learning_rate": 0.00019396265987528324, "loss": 1.6662, "step": 378500 }, { "epoch": 10.64, "learning_rate": 0.00019382221306717102, "loss": 1.68, "step": 379000 }, { "epoch": 10.65, "learning_rate": 0.0001936817662590588, "loss": 1.6921, "step": 379500 }, { "epoch": 10.66, "learning_rate": 0.00019354131945094658, "loss": 1.6737, "step": 380000 }, { "epoch": 10.68, "learning_rate": 0.0001934008726428344, "loss": 1.6782, "step": 380500 }, { "epoch": 10.69, "learning_rate": 0.00019326042583472217, "loss": 1.667, "step": 381000 }, { "epoch": 10.71, "learning_rate": 0.00019311997902660998, "loss": 1.6799, "step": 381500 }, { "epoch": 10.72, "learning_rate": 0.00019297953221849779, "loss": 1.6438, "step": 382000 }, { "epoch": 10.73, "learning_rate": 0.00019283908541038554, "loss": 1.6626, "step": 382500 }, { "epoch": 10.75, "learning_rate": 0.00019269863860227335, "loss": 1.6698, "step": 383000 }, { "epoch": 10.76, "learning_rate": 0.00019255819179416113, "loss": 1.6847, "step": 383500 }, { "epoch": 10.78, "learning_rate": 0.00019241774498604894, "loss": 1.725, "step": 384000 }, { "epoch": 10.79, "learning_rate": 0.00019227729817793672, "loss": 1.6462, "step": 384500 }, { "epoch": 10.8, "learning_rate": 0.00019213685136982452, "loss": 1.6951, "step": 385000 }, { "epoch": 10.82, "learning_rate": 0.00019199640456171233, "loss": 1.6711, "step": 385500 }, { "epoch": 10.83, "learning_rate": 0.0001918559577536001, "loss": 1.6783, "step": 386000 }, { "epoch": 10.85, "learning_rate": 0.0001917155109454879, "loss": 1.6566, "step": 386500 }, { "epoch": 10.86, "learning_rate": 0.00019157506413737568, "loss": 1.6606, "step": 387000 }, { "epoch": 10.87, "learning_rate": 0.00019143461732926348, "loss": 1.6585, "step": 387500 }, { "epoch": 10.89, "learning_rate": 0.00019129417052115126, "loss": 1.7094, "step": 388000 }, { "epoch": 10.9, "learning_rate": 0.00019115372371303907, "loss": 1.638, "step": 388500 }, { "epoch": 10.92, "learning_rate": 0.00019101327690492688, "loss": 1.6686, "step": 389000 }, { "epoch": 10.93, "learning_rate": 0.00019087283009681463, "loss": 1.6696, "step": 389500 }, { "epoch": 10.94, "learning_rate": 0.00019073238328870244, "loss": 1.6516, "step": 390000 }, { "epoch": 10.96, "learning_rate": 0.00019059193648059022, "loss": 1.6625, "step": 390500 }, { "epoch": 10.97, "learning_rate": 0.00019045148967247803, "loss": 1.6865, "step": 391000 }, { "epoch": 10.99, "learning_rate": 0.0001903110428643658, "loss": 1.6903, "step": 391500 }, { "epoch": 11.0, "learning_rate": 0.00019017059605625362, "loss": 1.6686, "step": 392000 }, { "epoch": 11.01, "learning_rate": 0.00019003014924814143, "loss": 1.5473, "step": 392500 }, { "epoch": 11.03, "learning_rate": 0.00018988970244002918, "loss": 1.5807, "step": 393000 }, { "epoch": 11.04, "learning_rate": 0.000189749255631917, "loss": 1.5696, "step": 393500 }, { "epoch": 11.06, "learning_rate": 0.00018960880882380477, "loss": 1.5851, "step": 394000 }, { "epoch": 11.07, "learning_rate": 0.00018946836201569258, "loss": 1.5706, "step": 394500 }, { "epoch": 11.08, "learning_rate": 0.00018932791520758036, "loss": 1.5829, "step": 395000 }, { "epoch": 11.1, "learning_rate": 0.00018918746839946817, "loss": 1.6146, "step": 395500 }, { "epoch": 11.11, "learning_rate": 0.00018904702159135598, "loss": 1.6072, "step": 396000 }, { "epoch": 11.13, "learning_rate": 0.00018890657478324373, "loss": 1.587, "step": 396500 }, { "epoch": 11.14, "learning_rate": 0.00018876612797513154, "loss": 1.6045, "step": 397000 }, { "epoch": 11.15, "learning_rate": 0.00018862568116701932, "loss": 1.5912, "step": 397500 }, { "epoch": 11.17, "learning_rate": 0.00018848523435890713, "loss": 1.6025, "step": 398000 }, { "epoch": 11.18, "learning_rate": 0.00018834478755079493, "loss": 1.568, "step": 398500 }, { "epoch": 11.2, "learning_rate": 0.0001882043407426827, "loss": 1.5908, "step": 399000 }, { "epoch": 11.21, "learning_rate": 0.0001880638939345705, "loss": 1.5785, "step": 399500 }, { "epoch": 11.23, "learning_rate": 0.00018792344712645828, "loss": 1.5831, "step": 400000 }, { "epoch": 11.24, "learning_rate": 0.00018778300031834609, "loss": 1.6117, "step": 400500 }, { "epoch": 11.25, "learning_rate": 0.00018764255351023387, "loss": 1.5902, "step": 401000 }, { "epoch": 11.27, "learning_rate": 0.00018750210670212167, "loss": 1.6121, "step": 401500 }, { "epoch": 11.28, "learning_rate": 0.00018736165989400948, "loss": 1.619, "step": 402000 }, { "epoch": 11.3, "learning_rate": 0.00018722121308589724, "loss": 1.5974, "step": 402500 }, { "epoch": 11.31, "learning_rate": 0.00018708076627778504, "loss": 1.6051, "step": 403000 }, { "epoch": 11.32, "learning_rate": 0.00018694031946967283, "loss": 1.6172, "step": 403500 }, { "epoch": 11.34, "learning_rate": 0.00018679987266156063, "loss": 1.603, "step": 404000 }, { "epoch": 11.35, "learning_rate": 0.00018665942585344841, "loss": 1.6111, "step": 404500 }, { "epoch": 11.37, "learning_rate": 0.00018651897904533622, "loss": 1.5969, "step": 405000 }, { "epoch": 11.38, "learning_rate": 0.00018637853223722403, "loss": 1.6005, "step": 405500 }, { "epoch": 11.39, "learning_rate": 0.00018623808542911178, "loss": 1.6083, "step": 406000 }, { "epoch": 11.41, "learning_rate": 0.0001860976386209996, "loss": 1.6204, "step": 406500 }, { "epoch": 11.42, "learning_rate": 0.00018595719181288737, "loss": 1.6001, "step": 407000 }, { "epoch": 11.44, "learning_rate": 0.00018581674500477518, "loss": 1.5974, "step": 407500 }, { "epoch": 11.45, "learning_rate": 0.00018567629819666296, "loss": 1.6192, "step": 408000 }, { "epoch": 11.46, "learning_rate": 0.00018553585138855077, "loss": 1.6349, "step": 408500 }, { "epoch": 11.48, "learning_rate": 0.00018539540458043858, "loss": 1.6468, "step": 409000 }, { "epoch": 11.49, "learning_rate": 0.00018525495777232633, "loss": 1.5827, "step": 409500 }, { "epoch": 11.51, "learning_rate": 0.00018511451096421414, "loss": 1.5988, "step": 410000 }, { "epoch": 11.52, "learning_rate": 0.00018497406415610192, "loss": 1.6065, "step": 410500 }, { "epoch": 11.53, "learning_rate": 0.00018483361734798973, "loss": 1.628, "step": 411000 }, { "epoch": 11.55, "learning_rate": 0.0001846931705398775, "loss": 1.6265, "step": 411500 }, { "epoch": 11.56, "learning_rate": 0.00018455272373176532, "loss": 1.5977, "step": 412000 }, { "epoch": 11.58, "learning_rate": 0.00018441227692365313, "loss": 1.5836, "step": 412500 }, { "epoch": 11.59, "learning_rate": 0.00018427183011554088, "loss": 1.5906, "step": 413000 }, { "epoch": 11.6, "learning_rate": 0.0001841313833074287, "loss": 1.6158, "step": 413500 }, { "epoch": 11.62, "learning_rate": 0.00018399093649931647, "loss": 1.6166, "step": 414000 }, { "epoch": 11.63, "learning_rate": 0.00018385048969120428, "loss": 1.6381, "step": 414500 }, { "epoch": 11.65, "learning_rate": 0.00018371004288309206, "loss": 1.646, "step": 415000 }, { "epoch": 11.66, "learning_rate": 0.00018356959607497987, "loss": 1.6012, "step": 415500 }, { "epoch": 11.67, "learning_rate": 0.00018342914926686765, "loss": 1.6277, "step": 416000 }, { "epoch": 11.69, "learning_rate": 0.00018328870245875543, "loss": 1.6731, "step": 416500 }, { "epoch": 11.7, "learning_rate": 0.00018314825565064324, "loss": 1.6306, "step": 417000 }, { "epoch": 11.72, "learning_rate": 0.00018300780884253102, "loss": 1.6024, "step": 417500 }, { "epoch": 11.73, "learning_rate": 0.00018286736203441882, "loss": 1.6096, "step": 418000 }, { "epoch": 11.74, "learning_rate": 0.0001827269152263066, "loss": 1.6349, "step": 418500 }, { "epoch": 11.76, "learning_rate": 0.00018258646841819439, "loss": 1.611, "step": 419000 }, { "epoch": 11.77, "learning_rate": 0.0001824460216100822, "loss": 1.6175, "step": 419500 }, { "epoch": 11.79, "learning_rate": 0.00018230557480196997, "loss": 1.6383, "step": 420000 }, { "epoch": 11.8, "learning_rate": 0.00018216512799385778, "loss": 1.6082, "step": 420500 }, { "epoch": 11.81, "learning_rate": 0.00018202468118574556, "loss": 1.6501, "step": 421000 }, { "epoch": 11.83, "learning_rate": 0.00018188423437763337, "loss": 1.6195, "step": 421500 }, { "epoch": 11.84, "learning_rate": 0.00018174378756952113, "loss": 1.6106, "step": 422000 }, { "epoch": 11.86, "learning_rate": 0.00018160334076140893, "loss": 1.5999, "step": 422500 }, { "epoch": 11.87, "learning_rate": 0.00018146289395329674, "loss": 1.6126, "step": 423000 }, { "epoch": 11.88, "learning_rate": 0.00018132244714518452, "loss": 1.6388, "step": 423500 }, { "epoch": 11.9, "learning_rate": 0.00018118200033707233, "loss": 1.586, "step": 424000 }, { "epoch": 11.91, "learning_rate": 0.0001810415535289601, "loss": 1.6462, "step": 424500 }, { "epoch": 11.93, "learning_rate": 0.00018090110672084792, "loss": 1.6112, "step": 425000 }, { "epoch": 11.94, "learning_rate": 0.00018076065991273573, "loss": 1.5967, "step": 425500 }, { "epoch": 11.95, "learning_rate": 0.00018062021310462348, "loss": 1.6225, "step": 426000 }, { "epoch": 11.97, "learning_rate": 0.0001804797662965113, "loss": 1.6086, "step": 426500 }, { "epoch": 11.98, "learning_rate": 0.00018033931948839907, "loss": 1.6326, "step": 427000 }, { "epoch": 12.0, "learning_rate": 0.00018019887268028688, "loss": 1.6163, "step": 427500 }, { "epoch": 12.01, "learning_rate": 0.00018005842587217466, "loss": 1.5382, "step": 428000 }, { "epoch": 12.02, "learning_rate": 0.00017991797906406247, "loss": 1.5531, "step": 428500 }, { "epoch": 12.04, "learning_rate": 0.00017977753225595028, "loss": 1.5373, "step": 429000 }, { "epoch": 12.05, "learning_rate": 0.00017963708544783803, "loss": 1.5424, "step": 429500 }, { "epoch": 12.07, "learning_rate": 0.00017949663863972584, "loss": 1.5394, "step": 430000 }, { "epoch": 12.08, "learning_rate": 0.00017935619183161362, "loss": 1.5331, "step": 430500 }, { "epoch": 12.1, "learning_rate": 0.00017921574502350143, "loss": 1.5342, "step": 431000 }, { "epoch": 12.11, "learning_rate": 0.0001790752982153892, "loss": 1.524, "step": 431500 }, { "epoch": 12.12, "learning_rate": 0.00017893485140727702, "loss": 1.5285, "step": 432000 }, { "epoch": 12.14, "learning_rate": 0.0001787944045991648, "loss": 1.5422, "step": 432500 }, { "epoch": 12.15, "learning_rate": 0.00017865395779105258, "loss": 1.5073, "step": 433000 }, { "epoch": 12.17, "learning_rate": 0.00017851351098294038, "loss": 1.5426, "step": 433500 }, { "epoch": 12.18, "learning_rate": 0.00017837306417482817, "loss": 1.5532, "step": 434000 }, { "epoch": 12.19, "learning_rate": 0.00017823261736671597, "loss": 1.5142, "step": 434500 }, { "epoch": 12.21, "learning_rate": 0.00017809217055860375, "loss": 1.5332, "step": 435000 }, { "epoch": 12.22, "learning_rate": 0.00017795172375049154, "loss": 1.5548, "step": 435500 }, { "epoch": 12.24, "learning_rate": 0.00017781127694237934, "loss": 1.5717, "step": 436000 }, { "epoch": 12.25, "learning_rate": 0.00017767083013426712, "loss": 1.5633, "step": 436500 }, { "epoch": 12.26, "learning_rate": 0.00017753038332615493, "loss": 1.5532, "step": 437000 }, { "epoch": 12.28, "learning_rate": 0.00017738993651804271, "loss": 1.562, "step": 437500 }, { "epoch": 12.29, "learning_rate": 0.00017724948970993052, "loss": 1.5514, "step": 438000 }, { "epoch": 12.31, "learning_rate": 0.00017710904290181828, "loss": 1.5566, "step": 438500 }, { "epoch": 12.32, "learning_rate": 0.00017696859609370608, "loss": 1.5624, "step": 439000 }, { "epoch": 12.33, "learning_rate": 0.0001768281492855939, "loss": 1.51, "step": 439500 }, { "epoch": 12.35, "learning_rate": 0.00017668770247748167, "loss": 1.5532, "step": 440000 }, { "epoch": 12.36, "learning_rate": 0.00017654725566936948, "loss": 1.5621, "step": 440500 }, { "epoch": 12.38, "learning_rate": 0.00017640680886125726, "loss": 1.5415, "step": 441000 }, { "epoch": 12.39, "learning_rate": 0.00017626636205314507, "loss": 1.5547, "step": 441500 }, { "epoch": 12.4, "learning_rate": 0.00017612591524503282, "loss": 1.5488, "step": 442000 }, { "epoch": 12.42, "learning_rate": 0.00017598546843692063, "loss": 1.5488, "step": 442500 }, { "epoch": 12.43, "learning_rate": 0.00017584502162880844, "loss": 1.5959, "step": 443000 }, { "epoch": 12.45, "learning_rate": 0.00017570457482069622, "loss": 1.5253, "step": 443500 }, { "epoch": 12.46, "learning_rate": 0.00017556412801258403, "loss": 1.5767, "step": 444000 }, { "epoch": 12.47, "learning_rate": 0.0001754236812044718, "loss": 1.557, "step": 444500 }, { "epoch": 12.49, "learning_rate": 0.00017528323439635962, "loss": 1.5658, "step": 445000 }, { "epoch": 12.5, "learning_rate": 0.00017514278758824737, "loss": 1.5549, "step": 445500 }, { "epoch": 12.52, "learning_rate": 0.00017500234078013518, "loss": 1.555, "step": 446000 }, { "epoch": 12.53, "learning_rate": 0.000174861893972023, "loss": 1.5539, "step": 446500 }, { "epoch": 12.54, "learning_rate": 0.00017472144716391077, "loss": 1.5777, "step": 447000 }, { "epoch": 12.56, "learning_rate": 0.00017458100035579858, "loss": 1.543, "step": 447500 }, { "epoch": 12.57, "learning_rate": 0.00017444055354768636, "loss": 1.5948, "step": 448000 }, { "epoch": 12.59, "learning_rate": 0.00017430010673957416, "loss": 1.5819, "step": 448500 }, { "epoch": 12.6, "learning_rate": 0.00017415965993146195, "loss": 1.5539, "step": 449000 }, { "epoch": 12.61, "learning_rate": 0.00017401921312334973, "loss": 1.566, "step": 449500 }, { "epoch": 12.63, "learning_rate": 0.00017387876631523753, "loss": 1.5644, "step": 450000 }, { "epoch": 12.64, "learning_rate": 0.00017373831950712532, "loss": 1.556, "step": 450500 }, { "epoch": 12.66, "learning_rate": 0.00017359787269901312, "loss": 1.5502, "step": 451000 }, { "epoch": 12.67, "learning_rate": 0.0001734574258909009, "loss": 1.581, "step": 451500 }, { "epoch": 12.68, "learning_rate": 0.00017331697908278869, "loss": 1.5689, "step": 452000 }, { "epoch": 12.7, "learning_rate": 0.0001731765322746765, "loss": 1.594, "step": 452500 }, { "epoch": 12.71, "learning_rate": 0.00017303608546656427, "loss": 1.5912, "step": 453000 }, { "epoch": 12.73, "learning_rate": 0.00017289563865845208, "loss": 1.5534, "step": 453500 }, { "epoch": 12.74, "learning_rate": 0.00017275519185033986, "loss": 1.5707, "step": 454000 }, { "epoch": 12.75, "learning_rate": 0.00017261474504222767, "loss": 1.5652, "step": 454500 }, { "epoch": 12.77, "learning_rate": 0.00017247429823411543, "loss": 1.5836, "step": 455000 }, { "epoch": 12.78, "learning_rate": 0.00017233385142600323, "loss": 1.5521, "step": 455500 }, { "epoch": 12.8, "learning_rate": 0.00017219340461789104, "loss": 1.5621, "step": 456000 }, { "epoch": 12.81, "learning_rate": 0.00017205295780977882, "loss": 1.5712, "step": 456500 }, { "epoch": 12.82, "learning_rate": 0.00017191251100166663, "loss": 1.5442, "step": 457000 }, { "epoch": 12.84, "learning_rate": 0.0001717720641935544, "loss": 1.5823, "step": 457500 }, { "epoch": 12.85, "learning_rate": 0.00017163161738544222, "loss": 1.5797, "step": 458000 }, { "epoch": 12.87, "learning_rate": 0.00017149117057732997, "loss": 1.5101, "step": 458500 }, { "epoch": 12.88, "learning_rate": 0.00017135072376921778, "loss": 1.5733, "step": 459000 }, { "epoch": 12.89, "learning_rate": 0.0001712102769611056, "loss": 1.5874, "step": 459500 }, { "epoch": 12.91, "learning_rate": 0.00017106983015299337, "loss": 1.5539, "step": 460000 }, { "epoch": 12.92, "learning_rate": 0.00017092938334488118, "loss": 1.5474, "step": 460500 }, { "epoch": 12.94, "learning_rate": 0.00017078893653676896, "loss": 1.5657, "step": 461000 }, { "epoch": 12.95, "learning_rate": 0.00017064848972865677, "loss": 1.5742, "step": 461500 }, { "epoch": 12.96, "learning_rate": 0.00017050804292054452, "loss": 1.5759, "step": 462000 }, { "epoch": 12.98, "learning_rate": 0.00017036759611243233, "loss": 1.5867, "step": 462500 }, { "epoch": 12.99, "learning_rate": 0.00017022714930432014, "loss": 1.5741, "step": 463000 }, { "epoch": 13.01, "learning_rate": 0.00017008670249620792, "loss": 1.5301, "step": 463500 }, { "epoch": 13.02, "learning_rate": 0.00016994625568809573, "loss": 1.4812, "step": 464000 }, { "epoch": 13.04, "learning_rate": 0.0001698058088799835, "loss": 1.4959, "step": 464500 }, { "epoch": 13.05, "learning_rate": 0.00016966536207187131, "loss": 1.4608, "step": 465000 }, { "epoch": 13.06, "learning_rate": 0.00016952491526375907, "loss": 1.5079, "step": 465500 }, { "epoch": 13.08, "learning_rate": 0.00016938446845564688, "loss": 1.4575, "step": 466000 }, { "epoch": 13.09, "learning_rate": 0.00016924402164753468, "loss": 1.4994, "step": 466500 }, { "epoch": 13.11, "learning_rate": 0.00016910357483942247, "loss": 1.5024, "step": 467000 }, { "epoch": 13.12, "learning_rate": 0.00016896312803131027, "loss": 1.4771, "step": 467500 }, { "epoch": 13.13, "learning_rate": 0.00016882268122319805, "loss": 1.4988, "step": 468000 }, { "epoch": 13.15, "learning_rate": 0.00016868223441508583, "loss": 1.495, "step": 468500 }, { "epoch": 13.16, "learning_rate": 0.00016854178760697362, "loss": 1.4725, "step": 469000 }, { "epoch": 13.18, "learning_rate": 0.00016840134079886142, "loss": 1.4715, "step": 469500 }, { "epoch": 13.19, "learning_rate": 0.00016826089399074923, "loss": 1.5111, "step": 470000 }, { "epoch": 13.2, "learning_rate": 0.000168120447182637, "loss": 1.5205, "step": 470500 }, { "epoch": 13.22, "learning_rate": 0.00016798000037452482, "loss": 1.5102, "step": 471000 }, { "epoch": 13.23, "learning_rate": 0.00016783955356641257, "loss": 1.4871, "step": 471500 }, { "epoch": 13.25, "learning_rate": 0.00016769910675830038, "loss": 1.4828, "step": 472000 }, { "epoch": 13.26, "learning_rate": 0.00016755865995018816, "loss": 1.4935, "step": 472500 }, { "epoch": 13.27, "learning_rate": 0.00016741821314207597, "loss": 1.4811, "step": 473000 }, { "epoch": 13.29, "learning_rate": 0.00016727776633396378, "loss": 1.4903, "step": 473500 }, { "epoch": 13.3, "learning_rate": 0.00016713731952585156, "loss": 1.5206, "step": 474000 }, { "epoch": 13.32, "learning_rate": 0.00016699687271773937, "loss": 1.4896, "step": 474500 }, { "epoch": 13.33, "learning_rate": 0.00016685642590962712, "loss": 1.5221, "step": 475000 }, { "epoch": 13.34, "learning_rate": 0.00016671597910151493, "loss": 1.5004, "step": 475500 }, { "epoch": 13.36, "learning_rate": 0.00016657553229340274, "loss": 1.5036, "step": 476000 }, { "epoch": 13.37, "learning_rate": 0.00016643508548529052, "loss": 1.5516, "step": 476500 }, { "epoch": 13.39, "learning_rate": 0.00016629463867717833, "loss": 1.5031, "step": 477000 }, { "epoch": 13.4, "learning_rate": 0.0001661541918690661, "loss": 1.516, "step": 477500 }, { "epoch": 13.41, "learning_rate": 0.00016601374506095392, "loss": 1.5192, "step": 478000 }, { "epoch": 13.43, "learning_rate": 0.00016587329825284167, "loss": 1.5089, "step": 478500 }, { "epoch": 13.44, "learning_rate": 0.00016573285144472948, "loss": 1.5197, "step": 479000 }, { "epoch": 13.46, "learning_rate": 0.00016559240463661729, "loss": 1.5161, "step": 479500 }, { "epoch": 13.47, "learning_rate": 0.00016545195782850507, "loss": 1.5045, "step": 480000 }, { "epoch": 13.48, "learning_rate": 0.00016531151102039288, "loss": 1.4987, "step": 480500 }, { "epoch": 13.5, "learning_rate": 0.00016517106421228066, "loss": 1.5233, "step": 481000 }, { "epoch": 13.51, "learning_rate": 0.00016503061740416846, "loss": 1.5311, "step": 481500 }, { "epoch": 13.53, "learning_rate": 0.00016489017059605622, "loss": 1.507, "step": 482000 }, { "epoch": 13.54, "learning_rate": 0.00016474972378794403, "loss": 1.5194, "step": 482500 }, { "epoch": 13.55, "learning_rate": 0.00016460927697983183, "loss": 1.5057, "step": 483000 }, { "epoch": 13.57, "learning_rate": 0.00016446883017171961, "loss": 1.5187, "step": 483500 }, { "epoch": 13.58, "learning_rate": 0.00016432838336360742, "loss": 1.4982, "step": 484000 }, { "epoch": 13.6, "learning_rate": 0.0001641879365554952, "loss": 1.5115, "step": 484500 }, { "epoch": 13.61, "learning_rate": 0.000164047489747383, "loss": 1.4948, "step": 485000 }, { "epoch": 13.62, "learning_rate": 0.00016390704293927077, "loss": 1.5035, "step": 485500 }, { "epoch": 13.64, "learning_rate": 0.00016376659613115857, "loss": 1.4884, "step": 486000 }, { "epoch": 13.65, "learning_rate": 0.00016362614932304638, "loss": 1.518, "step": 486500 }, { "epoch": 13.67, "learning_rate": 0.00016348570251493416, "loss": 1.494, "step": 487000 }, { "epoch": 13.68, "learning_rate": 0.00016334525570682197, "loss": 1.4975, "step": 487500 }, { "epoch": 13.69, "learning_rate": 0.00016320480889870975, "loss": 1.5047, "step": 488000 }, { "epoch": 13.71, "learning_rate": 0.00016306436209059753, "loss": 1.5242, "step": 488500 }, { "epoch": 13.72, "learning_rate": 0.0001629239152824853, "loss": 1.5266, "step": 489000 }, { "epoch": 13.74, "learning_rate": 0.00016278346847437312, "loss": 1.5154, "step": 489500 }, { "epoch": 13.75, "learning_rate": 0.00016264302166626093, "loss": 1.5013, "step": 490000 }, { "epoch": 13.76, "learning_rate": 0.0001625025748581487, "loss": 1.5039, "step": 490500 }, { "epoch": 13.78, "learning_rate": 0.00016236212805003652, "loss": 1.4944, "step": 491000 }, { "epoch": 13.79, "learning_rate": 0.00016222168124192427, "loss": 1.5096, "step": 491500 }, { "epoch": 13.81, "learning_rate": 0.00016208123443381208, "loss": 1.5127, "step": 492000 }, { "epoch": 13.82, "learning_rate": 0.00016194078762569986, "loss": 1.4777, "step": 492500 }, { "epoch": 13.83, "learning_rate": 0.00016180034081758767, "loss": 1.5085, "step": 493000 }, { "epoch": 13.85, "learning_rate": 0.00016165989400947548, "loss": 1.5391, "step": 493500 }, { "epoch": 13.86, "learning_rate": 0.00016151944720136326, "loss": 1.5089, "step": 494000 }, { "epoch": 13.88, "learning_rate": 0.00016137900039325107, "loss": 1.5116, "step": 494500 }, { "epoch": 13.89, "learning_rate": 0.00016123855358513882, "loss": 1.5482, "step": 495000 }, { "epoch": 13.91, "learning_rate": 0.00016109810677702663, "loss": 1.5358, "step": 495500 }, { "epoch": 13.92, "learning_rate": 0.0001609576599689144, "loss": 1.507, "step": 496000 }, { "epoch": 13.93, "learning_rate": 0.00016081721316080222, "loss": 1.5343, "step": 496500 }, { "epoch": 13.95, "learning_rate": 0.00016067676635269002, "loss": 1.5384, "step": 497000 }, { "epoch": 13.96, "learning_rate": 0.0001605363195445778, "loss": 1.4906, "step": 497500 }, { "epoch": 13.98, "learning_rate": 0.00016039587273646561, "loss": 1.5225, "step": 498000 }, { "epoch": 13.99, "learning_rate": 0.00016025542592835337, "loss": 1.5258, "step": 498500 }, { "epoch": 14.0, "learning_rate": 0.00016011497912024118, "loss": 1.5214, "step": 499000 }, { "epoch": 14.02, "learning_rate": 0.00015997453231212896, "loss": 1.44, "step": 499500 }, { "epoch": 14.03, "learning_rate": 0.00015983408550401676, "loss": 1.4353, "step": 500000 }, { "epoch": 14.05, "learning_rate": 0.00015969363869590457, "loss": 1.4431, "step": 500500 }, { "epoch": 14.06, "learning_rate": 0.00015955319188779235, "loss": 1.4284, "step": 501000 }, { "epoch": 14.07, "learning_rate": 0.00015941274507968016, "loss": 1.4001, "step": 501500 }, { "epoch": 14.09, "learning_rate": 0.00015927229827156792, "loss": 1.453, "step": 502000 }, { "epoch": 14.1, "learning_rate": 0.00015913185146345572, "loss": 1.4384, "step": 502500 }, { "epoch": 14.12, "learning_rate": 0.00015899140465534353, "loss": 1.4264, "step": 503000 }, { "epoch": 14.13, "learning_rate": 0.0001588509578472313, "loss": 1.4246, "step": 503500 }, { "epoch": 14.14, "learning_rate": 0.00015871051103911912, "loss": 1.4032, "step": 504000 }, { "epoch": 14.16, "learning_rate": 0.0001585700642310069, "loss": 1.4715, "step": 504500 }, { "epoch": 14.17, "learning_rate": 0.00015842961742289468, "loss": 1.4753, "step": 505000 }, { "epoch": 14.19, "learning_rate": 0.00015828917061478246, "loss": 1.4413, "step": 505500 }, { "epoch": 14.2, "learning_rate": 0.00015814872380667027, "loss": 1.4542, "step": 506000 }, { "epoch": 14.21, "learning_rate": 0.00015800827699855808, "loss": 1.4645, "step": 506500 }, { "epoch": 14.23, "learning_rate": 0.00015786783019044586, "loss": 1.4672, "step": 507000 }, { "epoch": 14.24, "learning_rate": 0.00015772738338233367, "loss": 1.4443, "step": 507500 }, { "epoch": 14.26, "learning_rate": 0.00015758693657422142, "loss": 1.4625, "step": 508000 }, { "epoch": 14.27, "learning_rate": 0.00015744648976610923, "loss": 1.4585, "step": 508500 }, { "epoch": 14.28, "learning_rate": 0.000157306042957997, "loss": 1.4591, "step": 509000 }, { "epoch": 14.3, "learning_rate": 0.00015716559614988482, "loss": 1.4824, "step": 509500 }, { "epoch": 14.31, "learning_rate": 0.00015702514934177263, "loss": 1.4764, "step": 510000 }, { "epoch": 14.33, "learning_rate": 0.0001568847025336604, "loss": 1.476, "step": 510500 }, { "epoch": 14.34, "learning_rate": 0.00015674425572554822, "loss": 1.4494, "step": 511000 }, { "epoch": 14.35, "learning_rate": 0.00015660380891743597, "loss": 1.4923, "step": 511500 }, { "epoch": 14.37, "learning_rate": 0.00015646336210932378, "loss": 1.4689, "step": 512000 }, { "epoch": 14.38, "learning_rate": 0.00015632291530121156, "loss": 1.4711, "step": 512500 }, { "epoch": 14.4, "learning_rate": 0.00015618246849309937, "loss": 1.4899, "step": 513000 }, { "epoch": 14.41, "learning_rate": 0.00015604202168498717, "loss": 1.4408, "step": 513500 }, { "epoch": 14.42, "learning_rate": 0.00015590157487687496, "loss": 1.4241, "step": 514000 }, { "epoch": 14.44, "learning_rate": 0.00015576112806876276, "loss": 1.4586, "step": 514500 }, { "epoch": 14.45, "learning_rate": 0.00015562068126065052, "loss": 1.4789, "step": 515000 }, { "epoch": 14.47, "learning_rate": 0.00015548023445253833, "loss": 1.4828, "step": 515500 }, { "epoch": 14.48, "learning_rate": 0.0001553397876444261, "loss": 1.4457, "step": 516000 }, { "epoch": 14.49, "learning_rate": 0.00015519934083631391, "loss": 1.4695, "step": 516500 }, { "epoch": 14.51, "learning_rate": 0.00015505889402820172, "loss": 1.4421, "step": 517000 }, { "epoch": 14.52, "learning_rate": 0.0001549184472200895, "loss": 1.4464, "step": 517500 }, { "epoch": 14.54, "learning_rate": 0.0001547780004119773, "loss": 1.439, "step": 518000 }, { "epoch": 14.55, "learning_rate": 0.00015463755360386506, "loss": 1.4784, "step": 518500 }, { "epoch": 14.56, "learning_rate": 0.00015449710679575287, "loss": 1.423, "step": 519000 }, { "epoch": 14.58, "learning_rate": 0.00015435665998764065, "loss": 1.4751, "step": 519500 }, { "epoch": 14.59, "learning_rate": 0.00015421621317952846, "loss": 1.4603, "step": 520000 }, { "epoch": 14.61, "learning_rate": 0.00015407576637141627, "loss": 1.4713, "step": 520500 }, { "epoch": 14.62, "learning_rate": 0.00015393531956330405, "loss": 1.4577, "step": 521000 }, { "epoch": 14.63, "learning_rate": 0.00015379487275519183, "loss": 1.4449, "step": 521500 }, { "epoch": 14.65, "learning_rate": 0.0001536544259470796, "loss": 1.4836, "step": 522000 }, { "epoch": 14.66, "learning_rate": 0.00015351397913896742, "loss": 1.4734, "step": 522500 }, { "epoch": 14.68, "learning_rate": 0.0001533735323308552, "loss": 1.4549, "step": 523000 }, { "epoch": 14.69, "learning_rate": 0.000153233085522743, "loss": 1.4572, "step": 523500 }, { "epoch": 14.7, "learning_rate": 0.00015309263871463082, "loss": 1.4503, "step": 524000 }, { "epoch": 14.72, "learning_rate": 0.00015295219190651857, "loss": 1.4661, "step": 524500 }, { "epoch": 14.73, "learning_rate": 0.00015281174509840638, "loss": 1.4359, "step": 525000 }, { "epoch": 14.75, "learning_rate": 0.00015267129829029416, "loss": 1.4545, "step": 525500 }, { "epoch": 14.76, "learning_rate": 0.00015253085148218197, "loss": 1.4696, "step": 526000 }, { "epoch": 14.78, "learning_rate": 0.00015239040467406978, "loss": 1.4677, "step": 526500 }, { "epoch": 14.79, "learning_rate": 0.00015224995786595756, "loss": 1.4685, "step": 527000 }, { "epoch": 14.8, "learning_rate": 0.00015210951105784537, "loss": 1.4631, "step": 527500 }, { "epoch": 14.82, "learning_rate": 0.00015196906424973312, "loss": 1.4639, "step": 528000 }, { "epoch": 14.83, "learning_rate": 0.00015182861744162093, "loss": 1.4791, "step": 528500 }, { "epoch": 14.85, "learning_rate": 0.0001516881706335087, "loss": 1.4634, "step": 529000 }, { "epoch": 14.86, "learning_rate": 0.00015154772382539652, "loss": 1.4521, "step": 529500 }, { "epoch": 14.87, "learning_rate": 0.00015140727701728432, "loss": 1.4717, "step": 530000 }, { "epoch": 14.89, "learning_rate": 0.0001512668302091721, "loss": 1.4843, "step": 530500 }, { "epoch": 14.9, "learning_rate": 0.0001511263834010599, "loss": 1.4988, "step": 531000 }, { "epoch": 14.92, "learning_rate": 0.00015098593659294767, "loss": 1.4741, "step": 531500 }, { "epoch": 14.93, "learning_rate": 0.00015084548978483547, "loss": 1.471, "step": 532000 }, { "epoch": 14.94, "learning_rate": 0.00015070504297672326, "loss": 1.4618, "step": 532500 }, { "epoch": 14.96, "learning_rate": 0.00015056459616861106, "loss": 1.4727, "step": 533000 }, { "epoch": 14.97, "learning_rate": 0.00015042414936049887, "loss": 1.441, "step": 533500 }, { "epoch": 14.99, "learning_rate": 0.00015028370255238665, "loss": 1.513, "step": 534000 }, { "epoch": 15.0, "learning_rate": 0.00015014325574427446, "loss": 1.4718, "step": 534500 }, { "epoch": 15.01, "learning_rate": 0.00015000280893616221, "loss": 1.4022, "step": 535000 }, { "epoch": 15.03, "learning_rate": 0.00014986236212805002, "loss": 1.3613, "step": 535500 }, { "epoch": 15.04, "learning_rate": 0.00014972191531993783, "loss": 1.3951, "step": 536000 }, { "epoch": 15.06, "learning_rate": 0.0001495814685118256, "loss": 1.4099, "step": 536500 }, { "epoch": 15.07, "learning_rate": 0.0001494410217037134, "loss": 1.4049, "step": 537000 }, { "epoch": 15.08, "learning_rate": 0.0001493005748956012, "loss": 1.3861, "step": 537500 }, { "epoch": 15.1, "learning_rate": 0.00014916012808748898, "loss": 1.4068, "step": 538000 }, { "epoch": 15.11, "learning_rate": 0.0001490196812793768, "loss": 1.3857, "step": 538500 }, { "epoch": 15.13, "learning_rate": 0.00014887923447126457, "loss": 1.3978, "step": 539000 }, { "epoch": 15.14, "learning_rate": 0.00014873878766315235, "loss": 1.4054, "step": 539500 }, { "epoch": 15.15, "learning_rate": 0.00014859834085504016, "loss": 1.4441, "step": 540000 }, { "epoch": 15.17, "learning_rate": 0.00014845789404692794, "loss": 1.4118, "step": 540500 }, { "epoch": 15.18, "learning_rate": 0.00014831744723881572, "loss": 1.395, "step": 541000 }, { "epoch": 15.2, "learning_rate": 0.00014817700043070353, "loss": 1.3928, "step": 541500 }, { "epoch": 15.21, "learning_rate": 0.00014803655362259134, "loss": 1.4088, "step": 542000 }, { "epoch": 15.22, "learning_rate": 0.00014789610681447912, "loss": 1.4245, "step": 542500 }, { "epoch": 15.24, "learning_rate": 0.0001477556600063669, "loss": 1.3877, "step": 543000 }, { "epoch": 15.25, "learning_rate": 0.0001476152131982547, "loss": 1.4311, "step": 543500 }, { "epoch": 15.27, "learning_rate": 0.0001474747663901425, "loss": 1.4027, "step": 544000 }, { "epoch": 15.28, "learning_rate": 0.00014733431958203027, "loss": 1.4169, "step": 544500 }, { "epoch": 15.29, "learning_rate": 0.00014719387277391808, "loss": 1.3918, "step": 545000 }, { "epoch": 15.31, "learning_rate": 0.00014705342596580588, "loss": 1.4059, "step": 545500 }, { "epoch": 15.32, "learning_rate": 0.00014691297915769367, "loss": 1.4136, "step": 546000 }, { "epoch": 15.34, "learning_rate": 0.00014677253234958145, "loss": 1.3865, "step": 546500 }, { "epoch": 15.35, "learning_rate": 0.00014663208554146925, "loss": 1.3966, "step": 547000 }, { "epoch": 15.36, "learning_rate": 0.00014649163873335704, "loss": 1.4183, "step": 547500 }, { "epoch": 15.38, "learning_rate": 0.00014635119192524482, "loss": 1.4159, "step": 548000 }, { "epoch": 15.39, "learning_rate": 0.00014621074511713262, "loss": 1.4426, "step": 548500 }, { "epoch": 15.41, "learning_rate": 0.00014607029830902043, "loss": 1.4249, "step": 549000 }, { "epoch": 15.42, "learning_rate": 0.0001459298515009082, "loss": 1.4265, "step": 549500 }, { "epoch": 15.43, "learning_rate": 0.000145789404692796, "loss": 1.4052, "step": 550000 }, { "epoch": 15.45, "learning_rate": 0.0001456489578846838, "loss": 1.3974, "step": 550500 }, { "epoch": 15.46, "learning_rate": 0.00014550851107657158, "loss": 1.4252, "step": 551000 }, { "epoch": 15.48, "learning_rate": 0.00014536806426845936, "loss": 1.4379, "step": 551500 }, { "epoch": 15.49, "learning_rate": 0.00014522761746034717, "loss": 1.448, "step": 552000 }, { "epoch": 15.5, "learning_rate": 0.00014508717065223498, "loss": 1.3834, "step": 552500 }, { "epoch": 15.52, "learning_rate": 0.00014494672384412276, "loss": 1.44, "step": 553000 }, { "epoch": 15.53, "learning_rate": 0.00014480627703601054, "loss": 1.4171, "step": 553500 }, { "epoch": 15.55, "learning_rate": 0.00014466583022789835, "loss": 1.3753, "step": 554000 }, { "epoch": 15.56, "learning_rate": 0.00014452538341978613, "loss": 1.4184, "step": 554500 }, { "epoch": 15.57, "learning_rate": 0.0001443849366116739, "loss": 1.4294, "step": 555000 }, { "epoch": 15.59, "learning_rate": 0.00014424448980356172, "loss": 1.4299, "step": 555500 }, { "epoch": 15.6, "learning_rate": 0.00014410404299544953, "loss": 1.4341, "step": 556000 }, { "epoch": 15.62, "learning_rate": 0.0001439635961873373, "loss": 1.4158, "step": 556500 }, { "epoch": 15.63, "learning_rate": 0.0001438231493792251, "loss": 1.3985, "step": 557000 }, { "epoch": 15.64, "learning_rate": 0.0001436827025711129, "loss": 1.4194, "step": 557500 }, { "epoch": 15.66, "learning_rate": 0.00014354225576300068, "loss": 1.4301, "step": 558000 }, { "epoch": 15.67, "learning_rate": 0.0001434018089548885, "loss": 1.4088, "step": 558500 }, { "epoch": 15.69, "learning_rate": 0.00014326136214677627, "loss": 1.4206, "step": 559000 }, { "epoch": 15.7, "learning_rate": 0.00014312091533866405, "loss": 1.4305, "step": 559500 }, { "epoch": 15.72, "learning_rate": 0.00014298046853055186, "loss": 1.4144, "step": 560000 }, { "epoch": 15.73, "learning_rate": 0.00014284002172243964, "loss": 1.3909, "step": 560500 }, { "epoch": 15.74, "learning_rate": 0.00014269957491432742, "loss": 1.3907, "step": 561000 }, { "epoch": 15.76, "learning_rate": 0.00014255912810621523, "loss": 1.42, "step": 561500 }, { "epoch": 15.77, "learning_rate": 0.00014241868129810303, "loss": 1.4351, "step": 562000 }, { "epoch": 15.79, "learning_rate": 0.00014227823448999082, "loss": 1.4395, "step": 562500 }, { "epoch": 15.8, "learning_rate": 0.0001421377876818786, "loss": 1.4346, "step": 563000 }, { "epoch": 15.81, "learning_rate": 0.0001419973408737664, "loss": 1.4151, "step": 563500 }, { "epoch": 15.83, "learning_rate": 0.00014185689406565419, "loss": 1.411, "step": 564000 }, { "epoch": 15.84, "learning_rate": 0.00014171644725754197, "loss": 1.4364, "step": 564500 }, { "epoch": 15.86, "learning_rate": 0.00014157600044942977, "loss": 1.4227, "step": 565000 }, { "epoch": 15.87, "learning_rate": 0.00014143555364131758, "loss": 1.4068, "step": 565500 }, { "epoch": 15.88, "learning_rate": 0.00014129510683320536, "loss": 1.4267, "step": 566000 }, { "epoch": 15.9, "learning_rate": 0.00014115466002509314, "loss": 1.4291, "step": 566500 }, { "epoch": 15.91, "learning_rate": 0.00014101421321698095, "loss": 1.4357, "step": 567000 }, { "epoch": 15.93, "learning_rate": 0.00014087376640886873, "loss": 1.3937, "step": 567500 }, { "epoch": 15.94, "learning_rate": 0.00014073331960075651, "loss": 1.4021, "step": 568000 }, { "epoch": 15.95, "learning_rate": 0.00014059287279264432, "loss": 1.4269, "step": 568500 }, { "epoch": 15.97, "learning_rate": 0.00014045242598453213, "loss": 1.4335, "step": 569000 }, { "epoch": 15.98, "learning_rate": 0.0001403119791764199, "loss": 1.4595, "step": 569500 }, { "epoch": 16.0, "learning_rate": 0.0001401715323683077, "loss": 1.4421, "step": 570000 }, { "epoch": 16.01, "learning_rate": 0.0001400310855601955, "loss": 1.3778, "step": 570500 }, { "epoch": 16.02, "learning_rate": 0.00013989063875208328, "loss": 1.3223, "step": 571000 }, { "epoch": 16.04, "learning_rate": 0.00013975019194397106, "loss": 1.3708, "step": 571500 }, { "epoch": 16.05, "learning_rate": 0.00013960974513585887, "loss": 1.3409, "step": 572000 }, { "epoch": 16.07, "learning_rate": 0.00013946929832774668, "loss": 1.3737, "step": 572500 }, { "epoch": 16.08, "learning_rate": 0.00013932885151963446, "loss": 1.3662, "step": 573000 }, { "epoch": 16.09, "learning_rate": 0.00013918840471152224, "loss": 1.3599, "step": 573500 }, { "epoch": 16.11, "learning_rate": 0.00013904795790341005, "loss": 1.3449, "step": 574000 }, { "epoch": 16.12, "learning_rate": 0.00013890751109529783, "loss": 1.3371, "step": 574500 }, { "epoch": 16.14, "learning_rate": 0.0001387670642871856, "loss": 1.3623, "step": 575000 }, { "epoch": 16.15, "learning_rate": 0.00013862661747907342, "loss": 1.3336, "step": 575500 }, { "epoch": 16.16, "learning_rate": 0.0001384861706709612, "loss": 1.3426, "step": 576000 }, { "epoch": 16.18, "learning_rate": 0.000138345723862849, "loss": 1.389, "step": 576500 }, { "epoch": 16.19, "learning_rate": 0.0001382052770547368, "loss": 1.3399, "step": 577000 }, { "epoch": 16.21, "learning_rate": 0.00013806483024662457, "loss": 1.3722, "step": 577500 }, { "epoch": 16.22, "learning_rate": 0.00013792438343851238, "loss": 1.3475, "step": 578000 }, { "epoch": 16.23, "learning_rate": 0.00013778393663040016, "loss": 1.3582, "step": 578500 }, { "epoch": 16.25, "learning_rate": 0.00013764348982228796, "loss": 1.3363, "step": 579000 }, { "epoch": 16.26, "learning_rate": 0.00013750304301417575, "loss": 1.3506, "step": 579500 }, { "epoch": 16.28, "learning_rate": 0.00013736259620606355, "loss": 1.3908, "step": 580000 }, { "epoch": 16.29, "learning_rate": 0.00013722214939795133, "loss": 1.3727, "step": 580500 }, { "epoch": 16.3, "learning_rate": 0.00013708170258983912, "loss": 1.3497, "step": 581000 }, { "epoch": 16.32, "learning_rate": 0.00013694125578172692, "loss": 1.357, "step": 581500 }, { "epoch": 16.33, "learning_rate": 0.0001368008089736147, "loss": 1.3516, "step": 582000 }, { "epoch": 16.35, "learning_rate": 0.0001366603621655025, "loss": 1.3448, "step": 582500 }, { "epoch": 16.36, "learning_rate": 0.0001365199153573903, "loss": 1.3756, "step": 583000 }, { "epoch": 16.37, "learning_rate": 0.0001363794685492781, "loss": 1.3671, "step": 583500 }, { "epoch": 16.39, "learning_rate": 0.00013623902174116588, "loss": 1.3616, "step": 584000 }, { "epoch": 16.4, "learning_rate": 0.00013609857493305366, "loss": 1.3418, "step": 584500 }, { "epoch": 16.42, "learning_rate": 0.00013595812812494147, "loss": 1.3471, "step": 585000 }, { "epoch": 16.43, "learning_rate": 0.00013581768131682928, "loss": 1.358, "step": 585500 }, { "epoch": 16.44, "learning_rate": 0.00013567723450871706, "loss": 1.357, "step": 586000 }, { "epoch": 16.46, "learning_rate": 0.00013553678770060484, "loss": 1.3636, "step": 586500 }, { "epoch": 16.47, "learning_rate": 0.00013539634089249265, "loss": 1.3472, "step": 587000 }, { "epoch": 16.49, "learning_rate": 0.00013525589408438043, "loss": 1.3594, "step": 587500 }, { "epoch": 16.5, "learning_rate": 0.0001351154472762682, "loss": 1.3551, "step": 588000 }, { "epoch": 16.51, "learning_rate": 0.00013497500046815602, "loss": 1.3657, "step": 588500 }, { "epoch": 16.53, "learning_rate": 0.00013483455366004383, "loss": 1.3729, "step": 589000 }, { "epoch": 16.54, "learning_rate": 0.0001346941068519316, "loss": 1.3573, "step": 589500 }, { "epoch": 16.56, "learning_rate": 0.0001345536600438194, "loss": 1.336, "step": 590000 }, { "epoch": 16.57, "learning_rate": 0.0001344132132357072, "loss": 1.3719, "step": 590500 }, { "epoch": 16.59, "learning_rate": 0.00013427276642759498, "loss": 1.3597, "step": 591000 }, { "epoch": 16.6, "learning_rate": 0.00013413231961948276, "loss": 1.3644, "step": 591500 }, { "epoch": 16.61, "learning_rate": 0.00013399187281137057, "loss": 1.3769, "step": 592000 }, { "epoch": 16.63, "learning_rate": 0.00013385142600325835, "loss": 1.372, "step": 592500 }, { "epoch": 16.64, "learning_rate": 0.00013371097919514616, "loss": 1.3404, "step": 593000 }, { "epoch": 16.66, "learning_rate": 0.00013357053238703394, "loss": 1.3573, "step": 593500 }, { "epoch": 16.67, "learning_rate": 0.00013343008557892172, "loss": 1.3646, "step": 594000 }, { "epoch": 16.68, "learning_rate": 0.00013328963877080953, "loss": 1.3726, "step": 594500 }, { "epoch": 16.7, "learning_rate": 0.0001331491919626973, "loss": 1.347, "step": 595000 }, { "epoch": 16.71, "learning_rate": 0.0001330087451545851, "loss": 1.3938, "step": 595500 }, { "epoch": 16.73, "learning_rate": 0.0001328682983464729, "loss": 1.3703, "step": 596000 }, { "epoch": 16.74, "learning_rate": 0.0001327278515383607, "loss": 1.3553, "step": 596500 }, { "epoch": 16.75, "learning_rate": 0.00013258740473024848, "loss": 1.3985, "step": 597000 }, { "epoch": 16.77, "learning_rate": 0.00013244695792213627, "loss": 1.3638, "step": 597500 }, { "epoch": 16.78, "learning_rate": 0.00013230651111402407, "loss": 1.3921, "step": 598000 }, { "epoch": 16.8, "learning_rate": 0.00013216606430591185, "loss": 1.3586, "step": 598500 }, { "epoch": 16.81, "learning_rate": 0.00013202561749779966, "loss": 1.3723, "step": 599000 }, { "epoch": 16.82, "learning_rate": 0.00013188517068968744, "loss": 1.3877, "step": 599500 }, { "epoch": 16.84, "learning_rate": 0.00013174472388157525, "loss": 1.396, "step": 600000 }, { "epoch": 16.85, "learning_rate": 0.00013160427707346303, "loss": 1.3886, "step": 600500 }, { "epoch": 16.87, "learning_rate": 0.0001314638302653508, "loss": 1.3702, "step": 601000 }, { "epoch": 16.88, "learning_rate": 0.00013132338345723862, "loss": 1.3806, "step": 601500 }, { "epoch": 16.89, "learning_rate": 0.0001311829366491264, "loss": 1.3646, "step": 602000 }, { "epoch": 16.91, "learning_rate": 0.0001310424898410142, "loss": 1.3682, "step": 602500 }, { "epoch": 16.92, "learning_rate": 0.000130902043032902, "loss": 1.3821, "step": 603000 }, { "epoch": 16.94, "learning_rate": 0.0001307615962247898, "loss": 1.3607, "step": 603500 }, { "epoch": 16.95, "learning_rate": 0.00013062114941667758, "loss": 1.3596, "step": 604000 }, { "epoch": 16.96, "learning_rate": 0.00013048070260856536, "loss": 1.3993, "step": 604500 }, { "epoch": 16.98, "learning_rate": 0.00013034025580045317, "loss": 1.3803, "step": 605000 }, { "epoch": 16.99, "learning_rate": 0.00013019980899234095, "loss": 1.3975, "step": 605500 }, { "epoch": 17.01, "learning_rate": 0.00013005936218422876, "loss": 1.3645, "step": 606000 }, { "epoch": 17.02, "learning_rate": 0.00012991891537611654, "loss": 1.2937, "step": 606500 }, { "epoch": 17.03, "learning_rate": 0.00012977846856800435, "loss": 1.3025, "step": 607000 }, { "epoch": 17.05, "learning_rate": 0.00012963802175989213, "loss": 1.2941, "step": 607500 }, { "epoch": 17.06, "learning_rate": 0.0001294975749517799, "loss": 1.3042, "step": 608000 }, { "epoch": 17.08, "learning_rate": 0.00012935712814366772, "loss": 1.3002, "step": 608500 }, { "epoch": 17.09, "learning_rate": 0.0001292166813355555, "loss": 1.3202, "step": 609000 }, { "epoch": 17.1, "learning_rate": 0.0001290762345274433, "loss": 1.2977, "step": 609500 }, { "epoch": 17.12, "learning_rate": 0.00012893578771933109, "loss": 1.2959, "step": 610000 }, { "epoch": 17.13, "learning_rate": 0.00012879534091121887, "loss": 1.3038, "step": 610500 }, { "epoch": 17.15, "learning_rate": 0.00012865489410310668, "loss": 1.3174, "step": 611000 }, { "epoch": 17.16, "learning_rate": 0.00012851444729499446, "loss": 1.2808, "step": 611500 }, { "epoch": 17.17, "learning_rate": 0.00012837400048688224, "loss": 1.3028, "step": 612000 }, { "epoch": 17.19, "learning_rate": 0.00012823355367877005, "loss": 1.3325, "step": 612500 }, { "epoch": 17.2, "learning_rate": 0.00012809310687065785, "loss": 1.3061, "step": 613000 }, { "epoch": 17.22, "learning_rate": 0.00012795266006254563, "loss": 1.3116, "step": 613500 }, { "epoch": 17.23, "learning_rate": 0.00012781221325443341, "loss": 1.3119, "step": 614000 }, { "epoch": 17.24, "learning_rate": 0.00012767176644632122, "loss": 1.303, "step": 614500 }, { "epoch": 17.26, "learning_rate": 0.000127531319638209, "loss": 1.276, "step": 615000 }, { "epoch": 17.27, "learning_rate": 0.00012739087283009678, "loss": 1.3066, "step": 615500 }, { "epoch": 17.29, "learning_rate": 0.0001272504260219846, "loss": 1.3132, "step": 616000 }, { "epoch": 17.3, "learning_rate": 0.0001271099792138724, "loss": 1.2656, "step": 616500 }, { "epoch": 17.31, "learning_rate": 0.00012696953240576018, "loss": 1.3112, "step": 617000 }, { "epoch": 17.33, "learning_rate": 0.00012682908559764796, "loss": 1.337, "step": 617500 }, { "epoch": 17.34, "learning_rate": 0.00012668863878953577, "loss": 1.3286, "step": 618000 }, { "epoch": 17.36, "learning_rate": 0.00012654819198142355, "loss": 1.2837, "step": 618500 }, { "epoch": 17.37, "learning_rate": 0.00012640774517331133, "loss": 1.3011, "step": 619000 }, { "epoch": 17.38, "learning_rate": 0.00012626729836519914, "loss": 1.3076, "step": 619500 }, { "epoch": 17.4, "learning_rate": 0.00012612685155708695, "loss": 1.3343, "step": 620000 }, { "epoch": 17.41, "learning_rate": 0.00012598640474897473, "loss": 1.2962, "step": 620500 }, { "epoch": 17.43, "learning_rate": 0.0001258459579408625, "loss": 1.3236, "step": 621000 }, { "epoch": 17.44, "learning_rate": 0.00012570551113275032, "loss": 1.3082, "step": 621500 }, { "epoch": 17.45, "learning_rate": 0.0001255650643246381, "loss": 1.3086, "step": 622000 }, { "epoch": 17.47, "learning_rate": 0.0001254246175165259, "loss": 1.3151, "step": 622500 }, { "epoch": 17.48, "learning_rate": 0.0001252841707084137, "loss": 1.3091, "step": 623000 }, { "epoch": 17.5, "learning_rate": 0.0001251437239003015, "loss": 1.3105, "step": 623500 }, { "epoch": 17.51, "learning_rate": 0.00012500327709218928, "loss": 1.3166, "step": 624000 }, { "epoch": 17.53, "learning_rate": 0.00012486283028407706, "loss": 1.3014, "step": 624500 }, { "epoch": 17.54, "learning_rate": 0.00012472238347596487, "loss": 1.3021, "step": 625000 }, { "epoch": 17.55, "learning_rate": 0.00012458193666785265, "loss": 1.3102, "step": 625500 }, { "epoch": 17.57, "learning_rate": 0.00012444148985974046, "loss": 1.3234, "step": 626000 }, { "epoch": 17.58, "learning_rate": 0.00012430104305162824, "loss": 1.336, "step": 626500 }, { "epoch": 17.6, "learning_rate": 0.00012416059624351604, "loss": 1.315, "step": 627000 }, { "epoch": 17.61, "learning_rate": 0.00012402014943540382, "loss": 1.3038, "step": 627500 }, { "epoch": 17.62, "learning_rate": 0.0001238797026272916, "loss": 1.3132, "step": 628000 }, { "epoch": 17.64, "learning_rate": 0.00012373925581917941, "loss": 1.3037, "step": 628500 }, { "epoch": 17.65, "learning_rate": 0.0001235988090110672, "loss": 1.3109, "step": 629000 }, { "epoch": 17.67, "learning_rate": 0.000123458362202955, "loss": 1.3006, "step": 629500 }, { "epoch": 17.68, "learning_rate": 0.00012331791539484278, "loss": 1.3587, "step": 630000 }, { "epoch": 17.69, "learning_rate": 0.00012317746858673056, "loss": 1.3146, "step": 630500 }, { "epoch": 17.71, "learning_rate": 0.00012303702177861837, "loss": 1.3138, "step": 631000 }, { "epoch": 17.72, "learning_rate": 0.00012289657497050615, "loss": 1.2895, "step": 631500 }, { "epoch": 17.74, "learning_rate": 0.00012275612816239393, "loss": 1.3425, "step": 632000 }, { "epoch": 17.75, "learning_rate": 0.00012261568135428174, "loss": 1.3281, "step": 632500 }, { "epoch": 17.76, "learning_rate": 0.00012247523454616955, "loss": 1.326, "step": 633000 }, { "epoch": 17.78, "learning_rate": 0.00012233478773805733, "loss": 1.3228, "step": 633500 }, { "epoch": 17.79, "learning_rate": 0.0001221943409299451, "loss": 1.3185, "step": 634000 }, { "epoch": 17.81, "learning_rate": 0.0001220538941218329, "loss": 1.349, "step": 634500 }, { "epoch": 17.82, "learning_rate": 0.0001219134473137207, "loss": 1.3335, "step": 635000 }, { "epoch": 17.83, "learning_rate": 0.0001217730005056085, "loss": 1.3331, "step": 635500 }, { "epoch": 17.85, "learning_rate": 0.0001216325536974963, "loss": 1.3346, "step": 636000 }, { "epoch": 17.86, "learning_rate": 0.00012149210688938408, "loss": 1.3286, "step": 636500 }, { "epoch": 17.88, "learning_rate": 0.00012135166008127188, "loss": 1.3264, "step": 637000 }, { "epoch": 17.89, "learning_rate": 0.00012121121327315967, "loss": 1.3262, "step": 637500 }, { "epoch": 17.9, "learning_rate": 0.00012107076646504745, "loss": 1.333, "step": 638000 }, { "epoch": 17.92, "learning_rate": 0.00012093031965693525, "loss": 1.3542, "step": 638500 }, { "epoch": 17.93, "learning_rate": 0.00012078987284882304, "loss": 1.3194, "step": 639000 }, { "epoch": 17.95, "learning_rate": 0.00012064942604071085, "loss": 1.3137, "step": 639500 }, { "epoch": 17.96, "learning_rate": 0.00012050897923259863, "loss": 1.3317, "step": 640000 }, { "epoch": 17.97, "learning_rate": 0.00012036853242448643, "loss": 1.3195, "step": 640500 }, { "epoch": 17.99, "learning_rate": 0.00012022808561637422, "loss": 1.3313, "step": 641000 }, { "epoch": 18.0, "learning_rate": 0.000120087638808262, "loss": 1.3185, "step": 641500 }, { "epoch": 18.02, "learning_rate": 0.0001199471920001498, "loss": 1.2436, "step": 642000 }, { "epoch": 18.03, "learning_rate": 0.00011980674519203758, "loss": 1.2658, "step": 642500 }, { "epoch": 18.04, "learning_rate": 0.00011966629838392539, "loss": 1.2675, "step": 643000 }, { "epoch": 18.06, "learning_rate": 0.00011952585157581318, "loss": 1.2203, "step": 643500 }, { "epoch": 18.07, "learning_rate": 0.00011938540476770097, "loss": 1.2593, "step": 644000 }, { "epoch": 18.09, "learning_rate": 0.00011924495795958876, "loss": 1.2784, "step": 644500 }, { "epoch": 18.1, "learning_rate": 0.00011910451115147655, "loss": 1.2633, "step": 645000 }, { "epoch": 18.11, "learning_rate": 0.00011896406434336434, "loss": 1.2395, "step": 645500 }, { "epoch": 18.13, "learning_rate": 0.00011882361753525213, "loss": 1.2544, "step": 646000 }, { "epoch": 18.14, "learning_rate": 0.00011868317072713993, "loss": 1.2599, "step": 646500 }, { "epoch": 18.16, "learning_rate": 0.00011854272391902773, "loss": 1.2445, "step": 647000 }, { "epoch": 18.17, "learning_rate": 0.00011840227711091552, "loss": 1.2267, "step": 647500 }, { "epoch": 18.18, "learning_rate": 0.0001182618303028033, "loss": 1.2634, "step": 648000 }, { "epoch": 18.2, "learning_rate": 0.0001181213834946911, "loss": 1.2239, "step": 648500 }, { "epoch": 18.21, "learning_rate": 0.00011798093668657889, "loss": 1.2746, "step": 649000 }, { "epoch": 18.23, "learning_rate": 0.00011784048987846669, "loss": 1.2392, "step": 649500 }, { "epoch": 18.24, "learning_rate": 0.00011770004307035448, "loss": 1.2407, "step": 650000 }, { "epoch": 18.25, "learning_rate": 0.00011755959626224228, "loss": 1.2554, "step": 650500 }, { "epoch": 18.27, "learning_rate": 0.00011741914945413006, "loss": 1.2624, "step": 651000 }, { "epoch": 18.28, "learning_rate": 0.00011727870264601785, "loss": 1.2498, "step": 651500 }, { "epoch": 18.3, "learning_rate": 0.00011713825583790565, "loss": 1.2597, "step": 652000 }, { "epoch": 18.31, "learning_rate": 0.00011699780902979343, "loss": 1.278, "step": 652500 }, { "epoch": 18.32, "learning_rate": 0.00011685736222168123, "loss": 1.2565, "step": 653000 }, { "epoch": 18.34, "learning_rate": 0.00011671691541356903, "loss": 1.2496, "step": 653500 }, { "epoch": 18.35, "learning_rate": 0.00011657646860545682, "loss": 1.2582, "step": 654000 }, { "epoch": 18.37, "learning_rate": 0.0001164360217973446, "loss": 1.2733, "step": 654500 }, { "epoch": 18.38, "learning_rate": 0.0001162955749892324, "loss": 1.2515, "step": 655000 }, { "epoch": 18.4, "learning_rate": 0.00011615512818112019, "loss": 1.2596, "step": 655500 }, { "epoch": 18.41, "learning_rate": 0.00011601468137300797, "loss": 1.2828, "step": 656000 }, { "epoch": 18.42, "learning_rate": 0.00011587423456489578, "loss": 1.2634, "step": 656500 }, { "epoch": 18.44, "learning_rate": 0.00011573378775678358, "loss": 1.2501, "step": 657000 }, { "epoch": 18.45, "learning_rate": 0.00011559334094867137, "loss": 1.2474, "step": 657500 }, { "epoch": 18.47, "learning_rate": 0.00011545289414055915, "loss": 1.2746, "step": 658000 }, { "epoch": 18.48, "learning_rate": 0.00011531244733244695, "loss": 1.2862, "step": 658500 }, { "epoch": 18.49, "learning_rate": 0.00011517200052433474, "loss": 1.2696, "step": 659000 }, { "epoch": 18.51, "learning_rate": 0.00011503155371622252, "loss": 1.2729, "step": 659500 }, { "epoch": 18.52, "learning_rate": 0.00011489110690811033, "loss": 1.255, "step": 660000 }, { "epoch": 18.54, "learning_rate": 0.00011475066009999812, "loss": 1.2736, "step": 660500 }, { "epoch": 18.55, "learning_rate": 0.0001146102132918859, "loss": 1.2411, "step": 661000 }, { "epoch": 18.56, "learning_rate": 0.0001144697664837737, "loss": 1.2661, "step": 661500 }, { "epoch": 18.58, "learning_rate": 0.0001143293196756615, "loss": 1.2637, "step": 662000 }, { "epoch": 18.59, "learning_rate": 0.00011418887286754927, "loss": 1.2766, "step": 662500 }, { "epoch": 18.61, "learning_rate": 0.00011404842605943708, "loss": 1.2677, "step": 663000 }, { "epoch": 18.62, "learning_rate": 0.00011390797925132488, "loss": 1.2798, "step": 663500 }, { "epoch": 18.63, "learning_rate": 0.00011376753244321267, "loss": 1.2858, "step": 664000 }, { "epoch": 18.65, "learning_rate": 0.00011362708563510045, "loss": 1.2779, "step": 664500 }, { "epoch": 18.66, "learning_rate": 0.00011348663882698825, "loss": 1.2734, "step": 665000 }, { "epoch": 18.68, "learning_rate": 0.00011334619201887604, "loss": 1.2503, "step": 665500 }, { "epoch": 18.69, "learning_rate": 0.00011320574521076382, "loss": 1.2903, "step": 666000 }, { "epoch": 18.7, "learning_rate": 0.00011306529840265163, "loss": 1.2798, "step": 666500 }, { "epoch": 18.72, "learning_rate": 0.00011292485159453943, "loss": 1.2993, "step": 667000 }, { "epoch": 18.73, "learning_rate": 0.0001127844047864272, "loss": 1.259, "step": 667500 }, { "epoch": 18.75, "learning_rate": 0.000112643957978315, "loss": 1.2531, "step": 668000 }, { "epoch": 18.76, "learning_rate": 0.0001125035111702028, "loss": 1.2713, "step": 668500 }, { "epoch": 18.77, "learning_rate": 0.00011236306436209058, "loss": 1.2919, "step": 669000 }, { "epoch": 18.79, "learning_rate": 0.00011222261755397837, "loss": 1.2812, "step": 669500 }, { "epoch": 18.8, "learning_rate": 0.00011208217074586618, "loss": 1.2909, "step": 670000 }, { "epoch": 18.82, "learning_rate": 0.00011194172393775397, "loss": 1.2807, "step": 670500 }, { "epoch": 18.83, "learning_rate": 0.00011180127712964175, "loss": 1.2726, "step": 671000 }, { "epoch": 18.84, "learning_rate": 0.00011166083032152955, "loss": 1.2782, "step": 671500 }, { "epoch": 18.86, "learning_rate": 0.00011152038351341734, "loss": 1.2554, "step": 672000 }, { "epoch": 18.87, "learning_rate": 0.00011137993670530512, "loss": 1.2748, "step": 672500 }, { "epoch": 18.89, "learning_rate": 0.00011123948989719293, "loss": 1.2875, "step": 673000 }, { "epoch": 18.9, "learning_rate": 0.00011109904308908073, "loss": 1.2593, "step": 673500 }, { "epoch": 18.91, "learning_rate": 0.00011095859628096852, "loss": 1.3059, "step": 674000 }, { "epoch": 18.93, "learning_rate": 0.0001108181494728563, "loss": 1.2666, "step": 674500 }, { "epoch": 18.94, "learning_rate": 0.0001106777026647441, "loss": 1.2965, "step": 675000 }, { "epoch": 18.96, "learning_rate": 0.00011053725585663189, "loss": 1.2656, "step": 675500 }, { "epoch": 18.97, "learning_rate": 0.00011039680904851967, "loss": 1.2737, "step": 676000 }, { "epoch": 18.98, "learning_rate": 0.00011025636224040748, "loss": 1.3124, "step": 676500 }, { "epoch": 19.0, "learning_rate": 0.00011011591543229527, "loss": 1.2881, "step": 677000 }, { "epoch": 19.01, "learning_rate": 0.00010997546862418305, "loss": 1.2242, "step": 677500 }, { "epoch": 19.03, "learning_rate": 0.00010983502181607085, "loss": 1.2085, "step": 678000 }, { "epoch": 19.04, "learning_rate": 0.00010969457500795864, "loss": 1.2096, "step": 678500 }, { "epoch": 19.05, "learning_rate": 0.00010955412819984642, "loss": 1.175, "step": 679000 }, { "epoch": 19.07, "learning_rate": 0.00010941368139173422, "loss": 1.2158, "step": 679500 }, { "epoch": 19.08, "learning_rate": 0.00010927323458362203, "loss": 1.192, "step": 680000 }, { "epoch": 19.1, "learning_rate": 0.00010913278777550982, "loss": 1.211, "step": 680500 }, { "epoch": 19.11, "learning_rate": 0.0001089923409673976, "loss": 1.1971, "step": 681000 }, { "epoch": 19.12, "learning_rate": 0.0001088518941592854, "loss": 1.2256, "step": 681500 }, { "epoch": 19.14, "learning_rate": 0.00010871144735117319, "loss": 1.2046, "step": 682000 }, { "epoch": 19.15, "learning_rate": 0.00010857100054306097, "loss": 1.1963, "step": 682500 }, { "epoch": 19.17, "learning_rate": 0.00010843055373494877, "loss": 1.1934, "step": 683000 }, { "epoch": 19.18, "learning_rate": 0.00010829010692683657, "loss": 1.2209, "step": 683500 }, { "epoch": 19.19, "learning_rate": 0.00010814966011872437, "loss": 1.1987, "step": 684000 }, { "epoch": 19.21, "learning_rate": 0.00010800921331061215, "loss": 1.1989, "step": 684500 }, { "epoch": 19.22, "learning_rate": 0.00010786876650249994, "loss": 1.2023, "step": 685000 }, { "epoch": 19.24, "learning_rate": 0.00010772831969438774, "loss": 1.2116, "step": 685500 }, { "epoch": 19.25, "learning_rate": 0.00010758787288627552, "loss": 1.2047, "step": 686000 }, { "epoch": 19.27, "learning_rate": 0.00010744742607816333, "loss": 1.2377, "step": 686500 }, { "epoch": 19.28, "learning_rate": 0.00010730697927005112, "loss": 1.189, "step": 687000 }, { "epoch": 19.29, "learning_rate": 0.0001071665324619389, "loss": 1.2157, "step": 687500 }, { "epoch": 19.31, "learning_rate": 0.0001070260856538267, "loss": 1.2216, "step": 688000 }, { "epoch": 19.32, "learning_rate": 0.00010688563884571449, "loss": 1.177, "step": 688500 }, { "epoch": 19.34, "learning_rate": 0.00010674519203760227, "loss": 1.2182, "step": 689000 }, { "epoch": 19.35, "learning_rate": 0.00010660474522949007, "loss": 1.1988, "step": 689500 }, { "epoch": 19.36, "learning_rate": 0.00010646429842137788, "loss": 1.2144, "step": 690000 }, { "epoch": 19.38, "learning_rate": 0.00010632385161326567, "loss": 1.2286, "step": 690500 }, { "epoch": 19.39, "learning_rate": 0.00010618340480515345, "loss": 1.2298, "step": 691000 }, { "epoch": 19.41, "learning_rate": 0.00010604295799704125, "loss": 1.1789, "step": 691500 }, { "epoch": 19.42, "learning_rate": 0.00010590251118892904, "loss": 1.2358, "step": 692000 }, { "epoch": 19.43, "learning_rate": 0.00010576206438081682, "loss": 1.2112, "step": 692500 }, { "epoch": 19.45, "learning_rate": 0.00010562161757270462, "loss": 1.2054, "step": 693000 }, { "epoch": 19.46, "learning_rate": 0.00010548117076459242, "loss": 1.2202, "step": 693500 }, { "epoch": 19.48, "learning_rate": 0.0001053407239564802, "loss": 1.2374, "step": 694000 }, { "epoch": 19.49, "learning_rate": 0.000105200277148368, "loss": 1.2075, "step": 694500 }, { "epoch": 19.5, "learning_rate": 0.0001050598303402558, "loss": 1.2234, "step": 695000 }, { "epoch": 19.52, "learning_rate": 0.00010491938353214357, "loss": 1.2231, "step": 695500 }, { "epoch": 19.53, "learning_rate": 0.00010477893672403137, "loss": 1.2446, "step": 696000 }, { "epoch": 19.55, "learning_rate": 0.00010463848991591916, "loss": 1.2135, "step": 696500 }, { "epoch": 19.56, "learning_rate": 0.00010449804310780697, "loss": 1.2248, "step": 697000 }, { "epoch": 19.57, "learning_rate": 0.00010435759629969475, "loss": 1.2104, "step": 697500 }, { "epoch": 19.59, "learning_rate": 0.00010421714949158255, "loss": 1.2013, "step": 698000 }, { "epoch": 19.6, "learning_rate": 0.00010407670268347034, "loss": 1.2264, "step": 698500 }, { "epoch": 19.62, "learning_rate": 0.00010393625587535812, "loss": 1.2195, "step": 699000 }, { "epoch": 19.63, "learning_rate": 0.00010379580906724592, "loss": 1.2195, "step": 699500 }, { "epoch": 19.64, "learning_rate": 0.00010365536225913372, "loss": 1.2198, "step": 700000 }, { "epoch": 19.66, "learning_rate": 0.00010351491545102152, "loss": 1.1913, "step": 700500 }, { "epoch": 19.67, "learning_rate": 0.0001033744686429093, "loss": 1.2231, "step": 701000 }, { "epoch": 19.69, "learning_rate": 0.0001032340218347971, "loss": 1.2456, "step": 701500 }, { "epoch": 19.7, "learning_rate": 0.00010309357502668489, "loss": 1.2426, "step": 702000 }, { "epoch": 19.71, "learning_rate": 0.00010295312821857267, "loss": 1.2245, "step": 702500 }, { "epoch": 19.73, "learning_rate": 0.00010281268141046046, "loss": 1.28, "step": 703000 }, { "epoch": 19.74, "learning_rate": 0.00010267223460234827, "loss": 1.253, "step": 703500 }, { "epoch": 19.76, "learning_rate": 0.00010253178779423605, "loss": 1.2213, "step": 704000 }, { "epoch": 19.77, "learning_rate": 0.00010239134098612385, "loss": 1.2145, "step": 704500 }, { "epoch": 19.78, "learning_rate": 0.00010225089417801164, "loss": 1.2306, "step": 705000 }, { "epoch": 19.8, "learning_rate": 0.00010211044736989942, "loss": 1.241, "step": 705500 }, { "epoch": 19.81, "learning_rate": 0.00010197000056178722, "loss": 1.2447, "step": 706000 }, { "epoch": 19.83, "learning_rate": 0.00010182955375367501, "loss": 1.2606, "step": 706500 }, { "epoch": 19.84, "learning_rate": 0.00010168910694556282, "loss": 1.2593, "step": 707000 }, { "epoch": 19.85, "learning_rate": 0.0001015486601374506, "loss": 1.2424, "step": 707500 }, { "epoch": 19.87, "learning_rate": 0.0001014082133293384, "loss": 1.2421, "step": 708000 }, { "epoch": 19.88, "learning_rate": 0.00010126776652122619, "loss": 1.2328, "step": 708500 }, { "epoch": 19.9, "learning_rate": 0.00010112731971311397, "loss": 1.2323, "step": 709000 }, { "epoch": 19.91, "learning_rate": 0.00010098687290500177, "loss": 1.2401, "step": 709500 }, { "epoch": 19.92, "learning_rate": 0.00010084642609688956, "loss": 1.2343, "step": 710000 }, { "epoch": 19.94, "learning_rate": 0.00010070597928877737, "loss": 1.1958, "step": 710500 }, { "epoch": 19.95, "learning_rate": 0.00010056553248066515, "loss": 1.2349, "step": 711000 }, { "epoch": 19.97, "learning_rate": 0.00010042508567255294, "loss": 1.2477, "step": 711500 }, { "epoch": 19.98, "learning_rate": 0.00010028463886444074, "loss": 1.2248, "step": 712000 }, { "epoch": 19.99, "learning_rate": 0.00010014419205632852, "loss": 1.2459, "step": 712500 }, { "epoch": 20.01, "learning_rate": 0.00010000374524821631, "loss": 1.1723, "step": 713000 }, { "epoch": 20.02, "learning_rate": 9.986329844010412e-05, "loss": 1.1634, "step": 713500 }, { "epoch": 20.04, "learning_rate": 9.97228516319919e-05, "loss": 1.1365, "step": 714000 }, { "epoch": 20.05, "learning_rate": 9.95824048238797e-05, "loss": 1.1556, "step": 714500 }, { "epoch": 20.06, "learning_rate": 9.944195801576749e-05, "loss": 1.179, "step": 715000 }, { "epoch": 20.08, "learning_rate": 9.930151120765527e-05, "loss": 1.1533, "step": 715500 }, { "epoch": 20.09, "learning_rate": 9.916106439954307e-05, "loss": 1.1557, "step": 716000 }, { "epoch": 20.11, "learning_rate": 9.902061759143086e-05, "loss": 1.1516, "step": 716500 }, { "epoch": 20.12, "learning_rate": 9.888017078331867e-05, "loss": 1.1769, "step": 717000 }, { "epoch": 20.13, "learning_rate": 9.873972397520645e-05, "loss": 1.1663, "step": 717500 }, { "epoch": 20.15, "learning_rate": 9.859927716709424e-05, "loss": 1.1744, "step": 718000 }, { "epoch": 20.16, "learning_rate": 9.845883035898204e-05, "loss": 1.1447, "step": 718500 }, { "epoch": 20.18, "learning_rate": 9.831838355086982e-05, "loss": 1.1439, "step": 719000 }, { "epoch": 20.19, "learning_rate": 9.817793674275761e-05, "loss": 1.1868, "step": 719500 }, { "epoch": 20.21, "learning_rate": 9.803748993464541e-05, "loss": 1.181, "step": 720000 }, { "epoch": 20.22, "learning_rate": 9.78970431265332e-05, "loss": 1.1764, "step": 720500 }, { "epoch": 20.23, "learning_rate": 9.7756596318421e-05, "loss": 1.1794, "step": 721000 }, { "epoch": 20.25, "learning_rate": 9.761614951030879e-05, "loss": 1.1583, "step": 721500 }, { "epoch": 20.26, "learning_rate": 9.747570270219657e-05, "loss": 1.174, "step": 722000 }, { "epoch": 20.28, "learning_rate": 9.733525589408437e-05, "loss": 1.1712, "step": 722500 }, { "epoch": 20.29, "learning_rate": 9.719480908597216e-05, "loss": 1.1656, "step": 723000 }, { "epoch": 20.3, "learning_rate": 9.705436227785997e-05, "loss": 1.1593, "step": 723500 }, { "epoch": 20.32, "learning_rate": 9.691391546974775e-05, "loss": 1.1696, "step": 724000 }, { "epoch": 20.33, "learning_rate": 9.677346866163554e-05, "loss": 1.1739, "step": 724500 }, { "epoch": 20.35, "learning_rate": 9.663302185352334e-05, "loss": 1.1932, "step": 725000 }, { "epoch": 20.36, "learning_rate": 9.649257504541112e-05, "loss": 1.1506, "step": 725500 }, { "epoch": 20.37, "learning_rate": 9.635212823729891e-05, "loss": 1.1521, "step": 726000 }, { "epoch": 20.39, "learning_rate": 9.621168142918671e-05, "loss": 1.177, "step": 726500 }, { "epoch": 20.4, "learning_rate": 9.607123462107452e-05, "loss": 1.1874, "step": 727000 }, { "epoch": 20.42, "learning_rate": 9.59307878129623e-05, "loss": 1.1543, "step": 727500 }, { "epoch": 20.43, "learning_rate": 9.579034100485009e-05, "loss": 1.1798, "step": 728000 }, { "epoch": 20.44, "learning_rate": 9.564989419673789e-05, "loss": 1.1665, "step": 728500 }, { "epoch": 20.46, "learning_rate": 9.550944738862567e-05, "loss": 1.1723, "step": 729000 }, { "epoch": 20.47, "learning_rate": 9.536900058051346e-05, "loss": 1.1711, "step": 729500 }, { "epoch": 20.49, "learning_rate": 9.522855377240126e-05, "loss": 1.1574, "step": 730000 }, { "epoch": 20.5, "learning_rate": 9.508810696428905e-05, "loss": 1.2109, "step": 730500 }, { "epoch": 20.51, "learning_rate": 9.494766015617685e-05, "loss": 1.1871, "step": 731000 }, { "epoch": 20.53, "learning_rate": 9.480721334806464e-05, "loss": 1.1752, "step": 731500 }, { "epoch": 20.54, "learning_rate": 9.466676653995242e-05, "loss": 1.1446, "step": 732000 }, { "epoch": 20.56, "learning_rate": 9.452631973184022e-05, "loss": 1.2237, "step": 732500 }, { "epoch": 20.57, "learning_rate": 9.438587292372801e-05, "loss": 1.1726, "step": 733000 }, { "epoch": 20.58, "learning_rate": 9.424542611561579e-05, "loss": 1.1836, "step": 733500 }, { "epoch": 20.6, "learning_rate": 9.41049793075036e-05, "loss": 1.1865, "step": 734000 }, { "epoch": 20.61, "learning_rate": 9.39645324993914e-05, "loss": 1.1597, "step": 734500 }, { "epoch": 20.63, "learning_rate": 9.382408569127919e-05, "loss": 1.1935, "step": 735000 }, { "epoch": 20.64, "learning_rate": 9.368363888316697e-05, "loss": 1.1654, "step": 735500 }, { "epoch": 20.65, "learning_rate": 9.354319207505476e-05, "loss": 1.1634, "step": 736000 }, { "epoch": 20.67, "learning_rate": 9.340274526694256e-05, "loss": 1.1966, "step": 736500 }, { "epoch": 20.68, "learning_rate": 9.326229845883035e-05, "loss": 1.1762, "step": 737000 }, { "epoch": 20.7, "learning_rate": 9.312185165071815e-05, "loss": 1.1922, "step": 737500 }, { "epoch": 20.71, "learning_rate": 9.298140484260594e-05, "loss": 1.183, "step": 738000 }, { "epoch": 20.72, "learning_rate": 9.284095803449372e-05, "loss": 1.171, "step": 738500 }, { "epoch": 20.74, "learning_rate": 9.270051122638152e-05, "loss": 1.1857, "step": 739000 }, { "epoch": 20.75, "learning_rate": 9.256006441826931e-05, "loss": 1.1882, "step": 739500 }, { "epoch": 20.77, "learning_rate": 9.241961761015709e-05, "loss": 1.1782, "step": 740000 }, { "epoch": 20.78, "learning_rate": 9.22791708020449e-05, "loss": 1.1772, "step": 740500 }, { "epoch": 20.79, "learning_rate": 9.21387239939327e-05, "loss": 1.1686, "step": 741000 }, { "epoch": 20.81, "learning_rate": 9.199827718582049e-05, "loss": 1.1965, "step": 741500 }, { "epoch": 20.82, "learning_rate": 9.185783037770827e-05, "loss": 1.1715, "step": 742000 }, { "epoch": 20.84, "learning_rate": 9.171738356959606e-05, "loss": 1.1701, "step": 742500 }, { "epoch": 20.85, "learning_rate": 9.157693676148386e-05, "loss": 1.1616, "step": 743000 }, { "epoch": 20.86, "learning_rate": 9.143648995337164e-05, "loss": 1.2022, "step": 743500 }, { "epoch": 20.88, "learning_rate": 9.129604314525945e-05, "loss": 1.1882, "step": 744000 }, { "epoch": 20.89, "learning_rate": 9.115559633714724e-05, "loss": 1.1873, "step": 744500 }, { "epoch": 20.91, "learning_rate": 9.101514952903504e-05, "loss": 1.1965, "step": 745000 }, { "epoch": 20.92, "learning_rate": 9.087470272092282e-05, "loss": 1.2093, "step": 745500 }, { "epoch": 20.93, "learning_rate": 9.073425591281061e-05, "loss": 1.1754, "step": 746000 }, { "epoch": 20.95, "learning_rate": 9.05938091046984e-05, "loss": 1.1968, "step": 746500 }, { "epoch": 20.96, "learning_rate": 9.045336229658619e-05, "loss": 1.1894, "step": 747000 }, { "epoch": 20.98, "learning_rate": 9.0312915488474e-05, "loss": 1.1938, "step": 747500 }, { "epoch": 20.99, "learning_rate": 9.017246868036179e-05, "loss": 1.1966, "step": 748000 }, { "epoch": 21.0, "learning_rate": 9.003202187224957e-05, "loss": 1.175, "step": 748500 }, { "epoch": 21.02, "learning_rate": 8.989157506413737e-05, "loss": 1.1007, "step": 749000 }, { "epoch": 21.03, "learning_rate": 8.975112825602516e-05, "loss": 1.1129, "step": 749500 }, { "epoch": 21.05, "learning_rate": 8.961068144791294e-05, "loss": 1.1079, "step": 750000 }, { "epoch": 21.06, "learning_rate": 8.947023463980075e-05, "loss": 1.1114, "step": 750500 }, { "epoch": 21.08, "learning_rate": 8.932978783168854e-05, "loss": 1.1245, "step": 751000 }, { "epoch": 21.09, "learning_rate": 8.918934102357634e-05, "loss": 1.1261, "step": 751500 }, { "epoch": 21.1, "learning_rate": 8.904889421546412e-05, "loss": 1.1244, "step": 752000 }, { "epoch": 21.12, "learning_rate": 8.890844740735191e-05, "loss": 1.1137, "step": 752500 }, { "epoch": 21.13, "learning_rate": 8.876800059923971e-05, "loss": 1.1279, "step": 753000 }, { "epoch": 21.15, "learning_rate": 8.862755379112749e-05, "loss": 1.1199, "step": 753500 }, { "epoch": 21.16, "learning_rate": 8.84871069830153e-05, "loss": 1.124, "step": 754000 }, { "epoch": 21.17, "learning_rate": 8.834666017490309e-05, "loss": 1.0937, "step": 754500 }, { "epoch": 21.19, "learning_rate": 8.820621336679089e-05, "loss": 1.134, "step": 755000 }, { "epoch": 21.2, "learning_rate": 8.806576655867867e-05, "loss": 1.1194, "step": 755500 }, { "epoch": 21.22, "learning_rate": 8.792531975056646e-05, "loss": 1.1378, "step": 756000 }, { "epoch": 21.23, "learning_rate": 8.778487294245426e-05, "loss": 1.1242, "step": 756500 }, { "epoch": 21.24, "learning_rate": 8.764442613434204e-05, "loss": 1.1631, "step": 757000 }, { "epoch": 21.26, "learning_rate": 8.750397932622984e-05, "loss": 1.103, "step": 757500 }, { "epoch": 21.27, "learning_rate": 8.736353251811764e-05, "loss": 1.1169, "step": 758000 }, { "epoch": 21.29, "learning_rate": 8.722308571000542e-05, "loss": 1.1532, "step": 758500 }, { "epoch": 21.3, "learning_rate": 8.708263890189321e-05, "loss": 1.1296, "step": 759000 }, { "epoch": 21.31, "learning_rate": 8.694219209378101e-05, "loss": 1.1365, "step": 759500 }, { "epoch": 21.33, "learning_rate": 8.680174528566879e-05, "loss": 1.1171, "step": 760000 }, { "epoch": 21.34, "learning_rate": 8.666129847755658e-05, "loss": 1.1402, "step": 760500 }, { "epoch": 21.36, "learning_rate": 8.652085166944439e-05, "loss": 1.1393, "step": 761000 }, { "epoch": 21.37, "learning_rate": 8.638040486133219e-05, "loss": 1.1442, "step": 761500 }, { "epoch": 21.38, "learning_rate": 8.623995805321997e-05, "loss": 1.143, "step": 762000 }, { "epoch": 21.4, "learning_rate": 8.609951124510776e-05, "loss": 1.1264, "step": 762500 }, { "epoch": 21.41, "learning_rate": 8.595906443699556e-05, "loss": 1.1292, "step": 763000 }, { "epoch": 21.43, "learning_rate": 8.581861762888334e-05, "loss": 1.1087, "step": 763500 }, { "epoch": 21.44, "learning_rate": 8.567817082077115e-05, "loss": 1.1085, "step": 764000 }, { "epoch": 21.45, "learning_rate": 8.553772401265894e-05, "loss": 1.1242, "step": 764500 }, { "epoch": 21.47, "learning_rate": 8.539727720454672e-05, "loss": 1.1303, "step": 765000 }, { "epoch": 21.48, "learning_rate": 8.525683039643451e-05, "loss": 1.099, "step": 765500 }, { "epoch": 21.5, "learning_rate": 8.511638358832231e-05, "loss": 1.1538, "step": 766000 }, { "epoch": 21.51, "learning_rate": 8.497593678021009e-05, "loss": 1.1517, "step": 766500 }, { "epoch": 21.52, "learning_rate": 8.483548997209788e-05, "loss": 1.124, "step": 767000 }, { "epoch": 21.54, "learning_rate": 8.469504316398569e-05, "loss": 1.1305, "step": 767500 }, { "epoch": 21.55, "learning_rate": 8.455459635587349e-05, "loss": 1.1466, "step": 768000 }, { "epoch": 21.57, "learning_rate": 8.441414954776127e-05, "loss": 1.1146, "step": 768500 }, { "epoch": 21.58, "learning_rate": 8.427370273964906e-05, "loss": 1.122, "step": 769000 }, { "epoch": 21.59, "learning_rate": 8.413325593153686e-05, "loss": 1.1454, "step": 769500 }, { "epoch": 21.61, "learning_rate": 8.399280912342464e-05, "loss": 1.16, "step": 770000 }, { "epoch": 21.62, "learning_rate": 8.385236231531243e-05, "loss": 1.1593, "step": 770500 }, { "epoch": 21.64, "learning_rate": 8.371191550720024e-05, "loss": 1.1463, "step": 771000 }, { "epoch": 21.65, "learning_rate": 8.357146869908803e-05, "loss": 1.1309, "step": 771500 }, { "epoch": 21.66, "learning_rate": 8.343102189097582e-05, "loss": 1.1496, "step": 772000 }, { "epoch": 21.68, "learning_rate": 8.329057508286361e-05, "loss": 1.1591, "step": 772500 }, { "epoch": 21.69, "learning_rate": 8.31501282747514e-05, "loss": 1.105, "step": 773000 }, { "epoch": 21.71, "learning_rate": 8.300968146663919e-05, "loss": 1.1542, "step": 773500 }, { "epoch": 21.72, "learning_rate": 8.286923465852698e-05, "loss": 1.17, "step": 774000 }, { "epoch": 21.73, "learning_rate": 8.272878785041479e-05, "loss": 1.1318, "step": 774500 }, { "epoch": 21.75, "learning_rate": 8.258834104230257e-05, "loss": 1.1279, "step": 775000 }, { "epoch": 21.76, "learning_rate": 8.244789423419036e-05, "loss": 1.136, "step": 775500 }, { "epoch": 21.78, "learning_rate": 8.230744742607816e-05, "loss": 1.1214, "step": 776000 }, { "epoch": 21.79, "learning_rate": 8.216700061796594e-05, "loss": 1.1429, "step": 776500 }, { "epoch": 21.8, "learning_rate": 8.202655380985373e-05, "loss": 1.1407, "step": 777000 }, { "epoch": 21.82, "learning_rate": 8.188610700174154e-05, "loss": 1.1617, "step": 777500 }, { "epoch": 21.83, "learning_rate": 8.174566019362934e-05, "loss": 1.1193, "step": 778000 }, { "epoch": 21.85, "learning_rate": 8.160521338551712e-05, "loss": 1.1362, "step": 778500 }, { "epoch": 21.86, "learning_rate": 8.146476657740491e-05, "loss": 1.1325, "step": 779000 }, { "epoch": 21.87, "learning_rate": 8.13243197692927e-05, "loss": 1.1055, "step": 779500 }, { "epoch": 21.89, "learning_rate": 8.118387296118049e-05, "loss": 1.1304, "step": 780000 }, { "epoch": 21.9, "learning_rate": 8.104342615306828e-05, "loss": 1.1545, "step": 780500 }, { "epoch": 21.92, "learning_rate": 8.090297934495609e-05, "loss": 1.1518, "step": 781000 }, { "epoch": 21.93, "learning_rate": 8.076253253684388e-05, "loss": 1.1682, "step": 781500 }, { "epoch": 21.95, "learning_rate": 8.062208572873166e-05, "loss": 1.1433, "step": 782000 }, { "epoch": 21.96, "learning_rate": 8.048163892061946e-05, "loss": 1.1372, "step": 782500 }, { "epoch": 21.97, "learning_rate": 8.034119211250724e-05, "loss": 1.1321, "step": 783000 }, { "epoch": 21.99, "learning_rate": 8.020074530439503e-05, "loss": 1.1505, "step": 783500 }, { "epoch": 22.0, "learning_rate": 8.006029849628283e-05, "loss": 1.1292, "step": 784000 }, { "epoch": 22.02, "learning_rate": 7.991985168817064e-05, "loss": 1.0754, "step": 784500 }, { "epoch": 22.03, "learning_rate": 7.977940488005842e-05, "loss": 1.0455, "step": 785000 }, { "epoch": 22.04, "learning_rate": 7.963895807194621e-05, "loss": 1.0852, "step": 785500 }, { "epoch": 22.06, "learning_rate": 7.9498511263834e-05, "loss": 1.0497, "step": 786000 }, { "epoch": 22.07, "learning_rate": 7.935806445572179e-05, "loss": 1.1157, "step": 786500 }, { "epoch": 22.09, "learning_rate": 7.921761764760958e-05, "loss": 1.085, "step": 787000 }, { "epoch": 22.1, "learning_rate": 7.907717083949739e-05, "loss": 1.0713, "step": 787500 }, { "epoch": 22.11, "learning_rate": 7.893672403138518e-05, "loss": 1.1042, "step": 788000 }, { "epoch": 22.13, "learning_rate": 7.879627722327297e-05, "loss": 1.1009, "step": 788500 }, { "epoch": 22.14, "learning_rate": 7.865583041516076e-05, "loss": 1.0994, "step": 789000 }, { "epoch": 22.16, "learning_rate": 7.851538360704855e-05, "loss": 1.081, "step": 789500 }, { "epoch": 22.17, "learning_rate": 7.837493679893634e-05, "loss": 1.0888, "step": 790000 }, { "epoch": 22.18, "learning_rate": 7.823448999082413e-05, "loss": 1.1001, "step": 790500 }, { "epoch": 22.2, "learning_rate": 7.809404318271194e-05, "loss": 1.0861, "step": 791000 }, { "epoch": 22.21, "learning_rate": 7.795359637459972e-05, "loss": 1.0728, "step": 791500 }, { "epoch": 22.23, "learning_rate": 7.781314956648751e-05, "loss": 1.0877, "step": 792000 }, { "epoch": 22.24, "learning_rate": 7.767270275837531e-05, "loss": 1.1109, "step": 792500 }, { "epoch": 22.25, "learning_rate": 7.753225595026309e-05, "loss": 1.0825, "step": 793000 }, { "epoch": 22.27, "learning_rate": 7.739180914215088e-05, "loss": 1.0772, "step": 793500 }, { "epoch": 22.28, "learning_rate": 7.725136233403868e-05, "loss": 1.114, "step": 794000 }, { "epoch": 22.3, "learning_rate": 7.711091552592649e-05, "loss": 1.0956, "step": 794500 }, { "epoch": 22.31, "learning_rate": 7.697046871781427e-05, "loss": 1.0918, "step": 795000 }, { "epoch": 22.32, "learning_rate": 7.683002190970206e-05, "loss": 1.099, "step": 795500 }, { "epoch": 22.34, "learning_rate": 7.668957510158986e-05, "loss": 1.0841, "step": 796000 }, { "epoch": 22.35, "learning_rate": 7.654912829347764e-05, "loss": 1.0937, "step": 796500 }, { "epoch": 22.37, "learning_rate": 7.640868148536543e-05, "loss": 1.0962, "step": 797000 }, { "epoch": 22.38, "learning_rate": 7.626823467725323e-05, "loss": 1.1158, "step": 797500 }, { "epoch": 22.39, "learning_rate": 7.612778786914103e-05, "loss": 1.1044, "step": 798000 }, { "epoch": 22.41, "learning_rate": 7.598734106102881e-05, "loss": 1.0872, "step": 798500 }, { "epoch": 22.42, "learning_rate": 7.584689425291661e-05, "loss": 1.1139, "step": 799000 }, { "epoch": 22.44, "learning_rate": 7.57064474448044e-05, "loss": 1.0982, "step": 799500 }, { "epoch": 22.45, "learning_rate": 7.556600063669218e-05, "loss": 1.0748, "step": 800000 }, { "epoch": 22.46, "learning_rate": 7.542555382857998e-05, "loss": 1.0869, "step": 800500 }, { "epoch": 22.48, "learning_rate": 7.528510702046779e-05, "loss": 1.1141, "step": 801000 }, { "epoch": 22.49, "learning_rate": 7.514466021235557e-05, "loss": 1.0838, "step": 801500 }, { "epoch": 22.51, "learning_rate": 7.500421340424336e-05, "loss": 1.0729, "step": 802000 }, { "epoch": 22.52, "learning_rate": 7.486376659613116e-05, "loss": 1.0518, "step": 802500 }, { "epoch": 22.53, "learning_rate": 7.472331978801894e-05, "loss": 1.0645, "step": 803000 }, { "epoch": 22.55, "learning_rate": 7.458287297990675e-05, "loss": 1.0993, "step": 803500 }, { "epoch": 22.56, "learning_rate": 7.444242617179453e-05, "loss": 1.0817, "step": 804000 }, { "epoch": 22.58, "learning_rate": 7.430197936368232e-05, "loss": 1.1001, "step": 804500 }, { "epoch": 22.59, "learning_rate": 7.416153255557012e-05, "loss": 1.0691, "step": 805000 }, { "epoch": 22.6, "learning_rate": 7.402108574745791e-05, "loss": 1.0951, "step": 805500 }, { "epoch": 22.62, "learning_rate": 7.38806389393457e-05, "loss": 1.1136, "step": 806000 }, { "epoch": 22.63, "learning_rate": 7.374019213123348e-05, "loss": 1.0895, "step": 806500 }, { "epoch": 22.65, "learning_rate": 7.359974532312129e-05, "loss": 1.0736, "step": 807000 }, { "epoch": 22.66, "learning_rate": 7.345929851500907e-05, "loss": 1.1013, "step": 807500 }, { "epoch": 22.67, "learning_rate": 7.331885170689687e-05, "loss": 1.07, "step": 808000 }, { "epoch": 22.69, "learning_rate": 7.317840489878466e-05, "loss": 1.0795, "step": 808500 }, { "epoch": 22.7, "learning_rate": 7.303795809067246e-05, "loss": 1.0871, "step": 809000 }, { "epoch": 22.72, "learning_rate": 7.289751128256024e-05, "loss": 1.074, "step": 809500 }, { "epoch": 22.73, "learning_rate": 7.275706447444803e-05, "loss": 1.0948, "step": 810000 }, { "epoch": 22.74, "learning_rate": 7.261661766633583e-05, "loss": 1.1008, "step": 810500 }, { "epoch": 22.76, "learning_rate": 7.247617085822362e-05, "loss": 1.1233, "step": 811000 }, { "epoch": 22.77, "learning_rate": 7.233572405011142e-05, "loss": 1.0846, "step": 811500 }, { "epoch": 22.79, "learning_rate": 7.219527724199921e-05, "loss": 1.1133, "step": 812000 }, { "epoch": 22.8, "learning_rate": 7.2054830433887e-05, "loss": 1.1052, "step": 812500 }, { "epoch": 22.81, "learning_rate": 7.191438362577479e-05, "loss": 1.0933, "step": 813000 }, { "epoch": 22.83, "learning_rate": 7.177393681766258e-05, "loss": 1.0879, "step": 813500 }, { "epoch": 22.84, "learning_rate": 7.163349000955037e-05, "loss": 1.0886, "step": 814000 }, { "epoch": 22.86, "learning_rate": 7.149304320143817e-05, "loss": 1.0757, "step": 814500 }, { "epoch": 22.87, "learning_rate": 7.135259639332596e-05, "loss": 1.0979, "step": 815000 }, { "epoch": 22.89, "learning_rate": 7.121214958521376e-05, "loss": 1.1325, "step": 815500 }, { "epoch": 22.9, "learning_rate": 7.107170277710155e-05, "loss": 1.1131, "step": 816000 }, { "epoch": 22.91, "learning_rate": 7.093125596898933e-05, "loss": 1.107, "step": 816500 }, { "epoch": 22.93, "learning_rate": 7.079080916087714e-05, "loss": 1.0754, "step": 817000 }, { "epoch": 22.94, "learning_rate": 7.065036235276492e-05, "loss": 1.1159, "step": 817500 }, { "epoch": 22.96, "learning_rate": 7.050991554465272e-05, "loss": 1.1028, "step": 818000 }, { "epoch": 22.97, "learning_rate": 7.03694687365405e-05, "loss": 1.1204, "step": 818500 }, { "epoch": 22.98, "learning_rate": 7.02290219284283e-05, "loss": 1.0885, "step": 819000 }, { "epoch": 23.0, "learning_rate": 7.008857512031609e-05, "loss": 1.0953, "step": 819500 }, { "epoch": 23.01, "learning_rate": 6.994812831220388e-05, "loss": 1.056, "step": 820000 }, { "epoch": 23.03, "learning_rate": 6.980768150409168e-05, "loss": 1.031, "step": 820500 }, { "epoch": 23.04, "learning_rate": 6.966723469597947e-05, "loss": 1.0594, "step": 821000 }, { "epoch": 23.05, "learning_rate": 6.952678788786726e-05, "loss": 0.9976, "step": 821500 }, { "epoch": 23.07, "learning_rate": 6.938634107975505e-05, "loss": 1.0471, "step": 822000 }, { "epoch": 23.08, "learning_rate": 6.924589427164285e-05, "loss": 1.0296, "step": 822500 }, { "epoch": 23.1, "learning_rate": 6.910544746353063e-05, "loss": 1.0404, "step": 823000 }, { "epoch": 23.11, "learning_rate": 6.896500065541843e-05, "loss": 1.0662, "step": 823500 }, { "epoch": 23.12, "learning_rate": 6.882455384730622e-05, "loss": 1.0507, "step": 824000 }, { "epoch": 23.14, "learning_rate": 6.868410703919402e-05, "loss": 1.0617, "step": 824500 }, { "epoch": 23.15, "learning_rate": 6.854366023108181e-05, "loss": 1.0324, "step": 825000 }, { "epoch": 23.17, "learning_rate": 6.840321342296961e-05, "loss": 1.0515, "step": 825500 }, { "epoch": 23.18, "learning_rate": 6.82627666148574e-05, "loss": 1.051, "step": 826000 }, { "epoch": 23.19, "learning_rate": 6.812231980674518e-05, "loss": 1.0416, "step": 826500 }, { "epoch": 23.21, "learning_rate": 6.798187299863298e-05, "loss": 1.0278, "step": 827000 }, { "epoch": 23.22, "learning_rate": 6.784142619052077e-05, "loss": 1.0625, "step": 827500 }, { "epoch": 23.24, "learning_rate": 6.770097938240857e-05, "loss": 1.0301, "step": 828000 }, { "epoch": 23.25, "learning_rate": 6.756053257429635e-05, "loss": 1.0468, "step": 828500 }, { "epoch": 23.26, "learning_rate": 6.742008576618415e-05, "loss": 1.0368, "step": 829000 }, { "epoch": 23.28, "learning_rate": 6.727963895807194e-05, "loss": 1.0442, "step": 829500 }, { "epoch": 23.29, "learning_rate": 6.713919214995973e-05, "loss": 1.039, "step": 830000 }, { "epoch": 23.31, "learning_rate": 6.699874534184752e-05, "loss": 1.0589, "step": 830500 }, { "epoch": 23.32, "learning_rate": 6.685829853373532e-05, "loss": 1.0398, "step": 831000 }, { "epoch": 23.33, "learning_rate": 6.671785172562311e-05, "loss": 1.0442, "step": 831500 }, { "epoch": 23.35, "learning_rate": 6.65774049175109e-05, "loss": 1.0707, "step": 832000 }, { "epoch": 23.36, "learning_rate": 6.64369581093987e-05, "loss": 1.0625, "step": 832500 }, { "epoch": 23.38, "learning_rate": 6.629651130128648e-05, "loss": 1.0501, "step": 833000 }, { "epoch": 23.39, "learning_rate": 6.615606449317428e-05, "loss": 1.084, "step": 833500 }, { "epoch": 23.4, "learning_rate": 6.601561768506207e-05, "loss": 1.0412, "step": 834000 }, { "epoch": 23.42, "learning_rate": 6.587517087694987e-05, "loss": 1.0433, "step": 834500 }, { "epoch": 23.43, "learning_rate": 6.573472406883766e-05, "loss": 1.0554, "step": 835000 }, { "epoch": 23.45, "learning_rate": 6.559427726072546e-05, "loss": 1.0543, "step": 835500 }, { "epoch": 23.46, "learning_rate": 6.545383045261324e-05, "loss": 1.0653, "step": 836000 }, { "epoch": 23.47, "learning_rate": 6.531338364450103e-05, "loss": 1.0521, "step": 836500 }, { "epoch": 23.49, "learning_rate": 6.517293683638883e-05, "loss": 1.0521, "step": 837000 }, { "epoch": 23.5, "learning_rate": 6.503249002827662e-05, "loss": 1.0654, "step": 837500 }, { "epoch": 23.52, "learning_rate": 6.489204322016441e-05, "loss": 1.0661, "step": 838000 }, { "epoch": 23.53, "learning_rate": 6.47515964120522e-05, "loss": 1.0238, "step": 838500 }, { "epoch": 23.54, "learning_rate": 6.461114960394e-05, "loss": 1.0643, "step": 839000 }, { "epoch": 23.56, "learning_rate": 6.447070279582778e-05, "loss": 1.0537, "step": 839500 }, { "epoch": 23.57, "learning_rate": 6.433025598771558e-05, "loss": 1.0452, "step": 840000 }, { "epoch": 23.59, "learning_rate": 6.418980917960337e-05, "loss": 1.0785, "step": 840500 }, { "epoch": 23.6, "learning_rate": 6.404936237149117e-05, "loss": 1.0739, "step": 841000 }, { "epoch": 23.61, "learning_rate": 6.390891556337896e-05, "loss": 1.0694, "step": 841500 }, { "epoch": 23.63, "learning_rate": 6.376846875526674e-05, "loss": 1.0649, "step": 842000 }, { "epoch": 23.64, "learning_rate": 6.362802194715455e-05, "loss": 1.0585, "step": 842500 }, { "epoch": 23.66, "learning_rate": 6.348757513904233e-05, "loss": 1.0764, "step": 843000 }, { "epoch": 23.67, "learning_rate": 6.334712833093013e-05, "loss": 1.0815, "step": 843500 }, { "epoch": 23.68, "learning_rate": 6.320668152281792e-05, "loss": 1.0748, "step": 844000 }, { "epoch": 23.7, "learning_rate": 6.306623471470572e-05, "loss": 1.0502, "step": 844500 }, { "epoch": 23.71, "learning_rate": 6.29257879065935e-05, "loss": 1.053, "step": 845000 }, { "epoch": 23.73, "learning_rate": 6.278534109848129e-05, "loss": 1.0697, "step": 845500 }, { "epoch": 23.74, "learning_rate": 6.264489429036909e-05, "loss": 1.0811, "step": 846000 }, { "epoch": 23.76, "learning_rate": 6.250444748225688e-05, "loss": 1.0412, "step": 846500 }, { "epoch": 23.77, "learning_rate": 6.236400067414467e-05, "loss": 1.0698, "step": 847000 }, { "epoch": 23.78, "learning_rate": 6.222355386603247e-05, "loss": 1.0591, "step": 847500 }, { "epoch": 23.8, "learning_rate": 6.208310705792026e-05, "loss": 1.0589, "step": 848000 }, { "epoch": 23.81, "learning_rate": 6.194266024980804e-05, "loss": 1.0547, "step": 848500 }, { "epoch": 23.83, "learning_rate": 6.180221344169585e-05, "loss": 1.0766, "step": 849000 }, { "epoch": 23.84, "learning_rate": 6.166176663358363e-05, "loss": 1.0689, "step": 849500 }, { "epoch": 23.85, "learning_rate": 6.152131982547143e-05, "loss": 1.0286, "step": 850000 }, { "epoch": 23.87, "learning_rate": 6.138087301735922e-05, "loss": 1.0654, "step": 850500 }, { "epoch": 23.88, "learning_rate": 6.124042620924702e-05, "loss": 1.0763, "step": 851000 }, { "epoch": 23.9, "learning_rate": 6.109997940113481e-05, "loss": 1.0731, "step": 851500 }, { "epoch": 23.91, "learning_rate": 6.095953259302259e-05, "loss": 1.0472, "step": 852000 }, { "epoch": 23.92, "learning_rate": 6.081908578491039e-05, "loss": 1.0806, "step": 852500 }, { "epoch": 23.94, "learning_rate": 6.067863897679818e-05, "loss": 1.0616, "step": 853000 }, { "epoch": 23.95, "learning_rate": 6.0538192168685975e-05, "loss": 1.0509, "step": 853500 }, { "epoch": 23.97, "learning_rate": 6.039774536057376e-05, "loss": 1.0808, "step": 854000 }, { "epoch": 23.98, "learning_rate": 6.0257298552461564e-05, "loss": 1.0587, "step": 854500 }, { "epoch": 23.99, "learning_rate": 6.011685174434935e-05, "loss": 1.0787, "step": 855000 }, { "epoch": 24.01, "learning_rate": 5.997640493623714e-05, "loss": 1.0337, "step": 855500 }, { "epoch": 24.02, "learning_rate": 5.983595812812494e-05, "loss": 1.0066, "step": 856000 }, { "epoch": 24.04, "learning_rate": 5.969551132001273e-05, "loss": 1.0121, "step": 856500 }, { "epoch": 24.05, "learning_rate": 5.9555064511900516e-05, "loss": 1.0268, "step": 857000 }, { "epoch": 24.06, "learning_rate": 5.941461770378832e-05, "loss": 1.0055, "step": 857500 }, { "epoch": 24.08, "learning_rate": 5.9274170895676105e-05, "loss": 1.0119, "step": 858000 }, { "epoch": 24.09, "learning_rate": 5.91337240875639e-05, "loss": 1.0203, "step": 858500 }, { "epoch": 24.11, "learning_rate": 5.899327727945169e-05, "loss": 1.0067, "step": 859000 }, { "epoch": 24.12, "learning_rate": 5.885283047133948e-05, "loss": 1.0304, "step": 859500 }, { "epoch": 24.13, "learning_rate": 5.8712383663227276e-05, "loss": 1.0314, "step": 860000 }, { "epoch": 24.15, "learning_rate": 5.8571936855115064e-05, "loss": 1.002, "step": 860500 }, { "epoch": 24.16, "learning_rate": 5.8431490047002865e-05, "loss": 0.9908, "step": 861000 }, { "epoch": 24.18, "learning_rate": 5.829104323889065e-05, "loss": 1.0277, "step": 861500 }, { "epoch": 24.19, "learning_rate": 5.815059643077844e-05, "loss": 1.0328, "step": 862000 }, { "epoch": 24.2, "learning_rate": 5.801014962266624e-05, "loss": 1.0052, "step": 862500 }, { "epoch": 24.22, "learning_rate": 5.786970281455403e-05, "loss": 0.9812, "step": 863000 }, { "epoch": 24.23, "learning_rate": 5.7729256006441824e-05, "loss": 0.9925, "step": 863500 }, { "epoch": 24.25, "learning_rate": 5.758880919832961e-05, "loss": 1.0073, "step": 864000 }, { "epoch": 24.26, "learning_rate": 5.7448362390217406e-05, "loss": 0.9962, "step": 864500 }, { "epoch": 24.27, "learning_rate": 5.73079155821052e-05, "loss": 0.9965, "step": 865000 }, { "epoch": 24.29, "learning_rate": 5.716746877399299e-05, "loss": 1.0207, "step": 865500 }, { "epoch": 24.3, "learning_rate": 5.702702196588079e-05, "loss": 1.037, "step": 866000 }, { "epoch": 24.32, "learning_rate": 5.688657515776858e-05, "loss": 1.031, "step": 866500 }, { "epoch": 24.33, "learning_rate": 5.6746128349656365e-05, "loss": 1.0097, "step": 867000 }, { "epoch": 24.34, "learning_rate": 5.6605681541544166e-05, "loss": 1.0315, "step": 867500 }, { "epoch": 24.36, "learning_rate": 5.6465234733431954e-05, "loss": 1.0114, "step": 868000 }, { "epoch": 24.37, "learning_rate": 5.632478792531975e-05, "loss": 1.0141, "step": 868500 }, { "epoch": 24.39, "learning_rate": 5.6184341117207536e-05, "loss": 1.003, "step": 869000 }, { "epoch": 24.4, "learning_rate": 5.604389430909533e-05, "loss": 0.999, "step": 869500 }, { "epoch": 24.41, "learning_rate": 5.5903447500983125e-05, "loss": 1.026, "step": 870000 }, { "epoch": 24.43, "learning_rate": 5.576300069287091e-05, "loss": 0.9967, "step": 870500 }, { "epoch": 24.44, "learning_rate": 5.5622553884758714e-05, "loss": 1.0358, "step": 871000 }, { "epoch": 24.46, "learning_rate": 5.54821070766465e-05, "loss": 1.0213, "step": 871500 }, { "epoch": 24.47, "learning_rate": 5.534166026853429e-05, "loss": 1.0055, "step": 872000 }, { "epoch": 24.48, "learning_rate": 5.5201213460422084e-05, "loss": 1.0138, "step": 872500 }, { "epoch": 24.5, "learning_rate": 5.506076665230988e-05, "loss": 0.9862, "step": 873000 }, { "epoch": 24.51, "learning_rate": 5.4920319844197666e-05, "loss": 1.0179, "step": 873500 }, { "epoch": 24.53, "learning_rate": 5.477987303608546e-05, "loss": 1.0121, "step": 874000 }, { "epoch": 24.54, "learning_rate": 5.4639426227973255e-05, "loss": 0.9966, "step": 874500 }, { "epoch": 24.55, "learning_rate": 5.449897941986105e-05, "loss": 1.0033, "step": 875000 }, { "epoch": 24.57, "learning_rate": 5.435853261174884e-05, "loss": 1.0233, "step": 875500 }, { "epoch": 24.58, "learning_rate": 5.421808580363664e-05, "loss": 1.0081, "step": 876000 }, { "epoch": 24.6, "learning_rate": 5.4077638995524426e-05, "loss": 1.0259, "step": 876500 }, { "epoch": 24.61, "learning_rate": 5.3937192187412213e-05, "loss": 1.0035, "step": 877000 }, { "epoch": 24.63, "learning_rate": 5.379674537930001e-05, "loss": 1.0148, "step": 877500 }, { "epoch": 24.64, "learning_rate": 5.36562985711878e-05, "loss": 1.0296, "step": 878000 }, { "epoch": 24.65, "learning_rate": 5.351585176307559e-05, "loss": 1.0188, "step": 878500 }, { "epoch": 24.67, "learning_rate": 5.3375404954963385e-05, "loss": 1.035, "step": 879000 }, { "epoch": 24.68, "learning_rate": 5.323495814685118e-05, "loss": 1.0216, "step": 879500 }, { "epoch": 24.7, "learning_rate": 5.3094511338738973e-05, "loss": 1.0217, "step": 880000 }, { "epoch": 24.71, "learning_rate": 5.295406453062676e-05, "loss": 1.0079, "step": 880500 }, { "epoch": 24.72, "learning_rate": 5.281361772251456e-05, "loss": 0.9966, "step": 881000 }, { "epoch": 24.74, "learning_rate": 5.267317091440235e-05, "loss": 1.0, "step": 881500 }, { "epoch": 24.75, "learning_rate": 5.253272410629014e-05, "loss": 1.0185, "step": 882000 }, { "epoch": 24.77, "learning_rate": 5.2392277298177925e-05, "loss": 1.0232, "step": 882500 }, { "epoch": 24.78, "learning_rate": 5.225183049006573e-05, "loss": 1.0223, "step": 883000 }, { "epoch": 24.79, "learning_rate": 5.2111383681953514e-05, "loss": 1.0066, "step": 883500 }, { "epoch": 24.81, "learning_rate": 5.197093687384131e-05, "loss": 1.0333, "step": 884000 }, { "epoch": 24.82, "learning_rate": 5.18304900657291e-05, "loss": 1.0348, "step": 884500 }, { "epoch": 24.84, "learning_rate": 5.16900432576169e-05, "loss": 1.0362, "step": 885000 }, { "epoch": 24.85, "learning_rate": 5.1549596449504685e-05, "loss": 1.0099, "step": 885500 }, { "epoch": 24.86, "learning_rate": 5.140914964139247e-05, "loss": 1.0146, "step": 886000 }, { "epoch": 24.88, "learning_rate": 5.1268702833280274e-05, "loss": 1.0462, "step": 886500 }, { "epoch": 24.89, "learning_rate": 5.112825602516806e-05, "loss": 1.0164, "step": 887000 }, { "epoch": 24.91, "learning_rate": 5.098780921705585e-05, "loss": 1.0112, "step": 887500 }, { "epoch": 24.92, "learning_rate": 5.084736240894365e-05, "loss": 1.0161, "step": 888000 }, { "epoch": 24.93, "learning_rate": 5.070691560083144e-05, "loss": 1.0426, "step": 888500 }, { "epoch": 24.95, "learning_rate": 5.056646879271923e-05, "loss": 0.9918, "step": 889000 }, { "epoch": 24.96, "learning_rate": 5.042602198460703e-05, "loss": 1.023, "step": 889500 }, { "epoch": 24.98, "learning_rate": 5.028557517649482e-05, "loss": 1.0051, "step": 890000 }, { "epoch": 24.99, "learning_rate": 5.014512836838261e-05, "loss": 1.0501, "step": 890500 }, { "epoch": 25.0, "learning_rate": 5.00046815602704e-05, "loss": 1.0258, "step": 891000 }, { "epoch": 25.02, "learning_rate": 4.98642347521582e-05, "loss": 0.9781, "step": 891500 }, { "epoch": 25.03, "learning_rate": 4.9723787944045986e-05, "loss": 0.9605, "step": 892000 }, { "epoch": 25.05, "learning_rate": 4.9583341135933774e-05, "loss": 0.9655, "step": 892500 }, { "epoch": 25.06, "learning_rate": 4.9442894327821575e-05, "loss": 0.964, "step": 893000 }, { "epoch": 25.07, "learning_rate": 4.930244751970936e-05, "loss": 0.9666, "step": 893500 }, { "epoch": 25.09, "learning_rate": 4.916200071159716e-05, "loss": 0.9809, "step": 894000 }, { "epoch": 25.1, "learning_rate": 4.902155390348495e-05, "loss": 0.9729, "step": 894500 }, { "epoch": 25.12, "learning_rate": 4.888110709537274e-05, "loss": 0.9895, "step": 895000 }, { "epoch": 25.13, "learning_rate": 4.8740660287260534e-05, "loss": 0.9598, "step": 895500 }, { "epoch": 25.14, "learning_rate": 4.860021347914832e-05, "loss": 0.95, "step": 896000 }, { "epoch": 25.16, "learning_rate": 4.845976667103612e-05, "loss": 0.9933, "step": 896500 }, { "epoch": 25.17, "learning_rate": 4.831931986292391e-05, "loss": 0.9686, "step": 897000 }, { "epoch": 25.19, "learning_rate": 4.81788730548117e-05, "loss": 0.9934, "step": 897500 }, { "epoch": 25.2, "learning_rate": 4.80384262466995e-05, "loss": 0.9792, "step": 898000 }, { "epoch": 25.21, "learning_rate": 4.789797943858729e-05, "loss": 0.9899, "step": 898500 }, { "epoch": 25.23, "learning_rate": 4.775753263047508e-05, "loss": 0.9809, "step": 899000 }, { "epoch": 25.24, "learning_rate": 4.7617085822362876e-05, "loss": 0.9835, "step": 899500 }, { "epoch": 25.26, "learning_rate": 4.7476639014250664e-05, "loss": 0.9338, "step": 900000 }, { "epoch": 25.27, "learning_rate": 4.733619220613846e-05, "loss": 0.9526, "step": 900500 }, { "epoch": 25.28, "learning_rate": 4.7195745398026246e-05, "loss": 0.9714, "step": 901000 }, { "epoch": 25.3, "learning_rate": 4.705529858991405e-05, "loss": 0.9842, "step": 901500 }, { "epoch": 25.31, "learning_rate": 4.6914851781801835e-05, "loss": 0.979, "step": 902000 }, { "epoch": 25.33, "learning_rate": 4.677440497368962e-05, "loss": 0.9746, "step": 902500 }, { "epoch": 25.34, "learning_rate": 4.6633958165577424e-05, "loss": 0.9716, "step": 903000 }, { "epoch": 25.35, "learning_rate": 4.649351135746521e-05, "loss": 0.9827, "step": 903500 }, { "epoch": 25.37, "learning_rate": 4.6353064549353006e-05, "loss": 0.994, "step": 904000 }, { "epoch": 25.38, "learning_rate": 4.6212617741240794e-05, "loss": 0.981, "step": 904500 }, { "epoch": 25.4, "learning_rate": 4.607217093312859e-05, "loss": 0.9842, "step": 905000 }, { "epoch": 25.41, "learning_rate": 4.593172412501638e-05, "loss": 0.979, "step": 905500 }, { "epoch": 25.42, "learning_rate": 4.579127731690417e-05, "loss": 0.9942, "step": 906000 }, { "epoch": 25.44, "learning_rate": 4.565083050879197e-05, "loss": 0.9978, "step": 906500 }, { "epoch": 25.45, "learning_rate": 4.551038370067976e-05, "loss": 1.0006, "step": 907000 }, { "epoch": 25.47, "learning_rate": 4.536993689256755e-05, "loss": 1.0112, "step": 907500 }, { "epoch": 25.48, "learning_rate": 4.522949008445535e-05, "loss": 0.9642, "step": 908000 }, { "epoch": 25.49, "learning_rate": 4.5089043276343136e-05, "loss": 0.9905, "step": 908500 }, { "epoch": 25.51, "learning_rate": 4.4948596468230924e-05, "loss": 0.986, "step": 909000 }, { "epoch": 25.52, "learning_rate": 4.480814966011872e-05, "loss": 0.9829, "step": 909500 }, { "epoch": 25.54, "learning_rate": 4.466770285200651e-05, "loss": 0.9783, "step": 910000 }, { "epoch": 25.55, "learning_rate": 4.452725604389431e-05, "loss": 0.983, "step": 910500 }, { "epoch": 25.57, "learning_rate": 4.4386809235782095e-05, "loss": 0.972, "step": 911000 }, { "epoch": 25.58, "learning_rate": 4.4246362427669896e-05, "loss": 0.9854, "step": 911500 }, { "epoch": 25.59, "learning_rate": 4.4105915619557684e-05, "loss": 0.981, "step": 912000 }, { "epoch": 25.61, "learning_rate": 4.396546881144547e-05, "loss": 0.9816, "step": 912500 }, { "epoch": 25.62, "learning_rate": 4.382502200333327e-05, "loss": 0.9745, "step": 913000 }, { "epoch": 25.64, "learning_rate": 4.368457519522106e-05, "loss": 0.9589, "step": 913500 }, { "epoch": 25.65, "learning_rate": 4.354412838710885e-05, "loss": 0.9854, "step": 914000 }, { "epoch": 25.66, "learning_rate": 4.340368157899664e-05, "loss": 0.9811, "step": 914500 }, { "epoch": 25.68, "learning_rate": 4.326323477088444e-05, "loss": 0.9934, "step": 915000 }, { "epoch": 25.69, "learning_rate": 4.312278796277223e-05, "loss": 0.9691, "step": 915500 }, { "epoch": 25.71, "learning_rate": 4.298234115466002e-05, "loss": 0.9757, "step": 916000 }, { "epoch": 25.72, "learning_rate": 4.284189434654782e-05, "loss": 0.98, "step": 916500 }, { "epoch": 25.73, "learning_rate": 4.270144753843561e-05, "loss": 0.9761, "step": 917000 }, { "epoch": 25.75, "learning_rate": 4.2561000730323396e-05, "loss": 0.9808, "step": 917500 }, { "epoch": 25.76, "learning_rate": 4.2420553922211184e-05, "loss": 0.9752, "step": 918000 }, { "epoch": 25.78, "learning_rate": 4.2280107114098985e-05, "loss": 1.0108, "step": 918500 }, { "epoch": 25.79, "learning_rate": 4.213966030598677e-05, "loss": 0.9959, "step": 919000 }, { "epoch": 25.8, "learning_rate": 4.199921349787457e-05, "loss": 0.9715, "step": 919500 }, { "epoch": 25.82, "learning_rate": 4.185876668976236e-05, "loss": 0.9801, "step": 920000 }, { "epoch": 25.83, "learning_rate": 4.1718319881650156e-05, "loss": 0.9798, "step": 920500 }, { "epoch": 25.85, "learning_rate": 4.1577873073537944e-05, "loss": 1.0103, "step": 921000 }, { "epoch": 25.86, "learning_rate": 4.143742626542574e-05, "loss": 0.982, "step": 921500 }, { "epoch": 25.87, "learning_rate": 4.129697945731353e-05, "loss": 0.9832, "step": 922000 }, { "epoch": 25.89, "learning_rate": 4.115653264920132e-05, "loss": 0.9876, "step": 922500 }, { "epoch": 25.9, "learning_rate": 4.101608584108911e-05, "loss": 0.9796, "step": 923000 }, { "epoch": 25.92, "learning_rate": 4.087563903297691e-05, "loss": 0.988, "step": 923500 }, { "epoch": 25.93, "learning_rate": 4.07351922248647e-05, "loss": 0.9728, "step": 924000 }, { "epoch": 25.94, "learning_rate": 4.059474541675249e-05, "loss": 0.9854, "step": 924500 }, { "epoch": 25.96, "learning_rate": 4.0454298608640286e-05, "loss": 0.9798, "step": 925000 }, { "epoch": 25.97, "learning_rate": 4.031385180052808e-05, "loss": 0.9965, "step": 925500 }, { "epoch": 25.99, "learning_rate": 4.017340499241587e-05, "loss": 0.978, "step": 926000 }, { "epoch": 26.0, "learning_rate": 4.003295818430366e-05, "loss": 0.978, "step": 926500 }, { "epoch": 26.01, "learning_rate": 3.989251137619146e-05, "loss": 0.9435, "step": 927000 }, { "epoch": 26.03, "learning_rate": 3.9752064568079244e-05, "loss": 0.9353, "step": 927500 }, { "epoch": 26.04, "learning_rate": 3.961161775996703e-05, "loss": 0.9308, "step": 928000 }, { "epoch": 26.06, "learning_rate": 3.9471170951854833e-05, "loss": 0.9444, "step": 928500 }, { "epoch": 26.07, "learning_rate": 3.933072414374262e-05, "loss": 0.9623, "step": 929000 }, { "epoch": 26.08, "learning_rate": 3.9190277335630416e-05, "loss": 0.9253, "step": 929500 }, { "epoch": 26.1, "learning_rate": 3.904983052751821e-05, "loss": 0.9352, "step": 930000 }, { "epoch": 26.11, "learning_rate": 3.8909383719406e-05, "loss": 0.9643, "step": 930500 }, { "epoch": 26.13, "learning_rate": 3.876893691129379e-05, "loss": 0.9441, "step": 931000 }, { "epoch": 26.14, "learning_rate": 3.862849010318159e-05, "loss": 0.9533, "step": 931500 }, { "epoch": 26.15, "learning_rate": 3.848804329506938e-05, "loss": 0.9373, "step": 932000 }, { "epoch": 26.17, "learning_rate": 3.834759648695717e-05, "loss": 0.9354, "step": 932500 }, { "epoch": 26.18, "learning_rate": 3.8207149678844957e-05, "loss": 0.9522, "step": 933000 }, { "epoch": 26.2, "learning_rate": 3.806670287073276e-05, "loss": 0.9477, "step": 933500 }, { "epoch": 26.21, "learning_rate": 3.7926256062620545e-05, "loss": 0.9361, "step": 934000 }, { "epoch": 26.22, "learning_rate": 3.778580925450834e-05, "loss": 0.9419, "step": 934500 }, { "epoch": 26.24, "learning_rate": 3.7645362446396134e-05, "loss": 0.968, "step": 935000 }, { "epoch": 26.25, "learning_rate": 3.750491563828392e-05, "loss": 0.9233, "step": 935500 }, { "epoch": 26.27, "learning_rate": 3.7364468830171717e-05, "loss": 0.9514, "step": 936000 }, { "epoch": 26.28, "learning_rate": 3.722402202205951e-05, "loss": 0.9475, "step": 936500 }, { "epoch": 26.29, "learning_rate": 3.7083575213947305e-05, "loss": 0.9377, "step": 937000 }, { "epoch": 26.31, "learning_rate": 3.694312840583509e-05, "loss": 0.9704, "step": 937500 }, { "epoch": 26.32, "learning_rate": 3.680268159772289e-05, "loss": 0.925, "step": 938000 }, { "epoch": 26.34, "learning_rate": 3.6662234789610675e-05, "loss": 0.9333, "step": 938500 }, { "epoch": 26.35, "learning_rate": 3.652178798149847e-05, "loss": 0.9438, "step": 939000 }, { "epoch": 26.36, "learning_rate": 3.6381341173386264e-05, "loss": 0.9884, "step": 939500 }, { "epoch": 26.38, "learning_rate": 3.624089436527405e-05, "loss": 0.9539, "step": 940000 }, { "epoch": 26.39, "learning_rate": 3.6100447557161846e-05, "loss": 0.9565, "step": 940500 }, { "epoch": 26.41, "learning_rate": 3.596000074904964e-05, "loss": 0.9604, "step": 941000 }, { "epoch": 26.42, "learning_rate": 3.5819553940937435e-05, "loss": 0.9424, "step": 941500 }, { "epoch": 26.44, "learning_rate": 3.567910713282522e-05, "loss": 0.9411, "step": 942000 }, { "epoch": 26.45, "learning_rate": 3.553866032471302e-05, "loss": 0.963, "step": 942500 }, { "epoch": 26.46, "learning_rate": 3.539821351660081e-05, "loss": 0.9385, "step": 943000 }, { "epoch": 26.48, "learning_rate": 3.52577667084886e-05, "loss": 0.9585, "step": 943500 }, { "epoch": 26.49, "learning_rate": 3.5117319900376394e-05, "loss": 0.9489, "step": 944000 }, { "epoch": 26.51, "learning_rate": 3.497687309226418e-05, "loss": 0.9572, "step": 944500 }, { "epoch": 26.52, "learning_rate": 3.4836426284151976e-05, "loss": 0.9138, "step": 945000 }, { "epoch": 26.53, "learning_rate": 3.469597947603977e-05, "loss": 0.9485, "step": 945500 }, { "epoch": 26.55, "learning_rate": 3.4555532667927565e-05, "loss": 0.9519, "step": 946000 }, { "epoch": 26.56, "learning_rate": 3.441508585981536e-05, "loss": 0.9356, "step": 946500 }, { "epoch": 26.58, "learning_rate": 3.427463905170315e-05, "loss": 0.9257, "step": 947000 }, { "epoch": 26.59, "learning_rate": 3.413419224359094e-05, "loss": 0.9313, "step": 947500 }, { "epoch": 26.6, "learning_rate": 3.3993745435478736e-05, "loss": 0.9591, "step": 948000 }, { "epoch": 26.62, "learning_rate": 3.3853298627366524e-05, "loss": 0.9424, "step": 948500 }, { "epoch": 26.63, "learning_rate": 3.371285181925432e-05, "loss": 0.9564, "step": 949000 }, { "epoch": 26.65, "learning_rate": 3.3572405011142106e-05, "loss": 0.9548, "step": 949500 }, { "epoch": 26.66, "learning_rate": 3.34319582030299e-05, "loss": 0.9363, "step": 950000 }, { "epoch": 26.67, "learning_rate": 3.3291511394917695e-05, "loss": 0.9518, "step": 950500 }, { "epoch": 26.69, "learning_rate": 3.315106458680549e-05, "loss": 0.9612, "step": 951000 }, { "epoch": 26.7, "learning_rate": 3.3010617778693284e-05, "loss": 0.953, "step": 951500 }, { "epoch": 26.72, "learning_rate": 3.287017097058107e-05, "loss": 0.9675, "step": 952000 }, { "epoch": 26.73, "learning_rate": 3.2729724162468866e-05, "loss": 0.9442, "step": 952500 }, { "epoch": 26.74, "learning_rate": 3.258927735435666e-05, "loss": 0.946, "step": 953000 }, { "epoch": 26.76, "learning_rate": 3.244883054624445e-05, "loss": 0.9465, "step": 953500 }, { "epoch": 26.77, "learning_rate": 3.230838373813224e-05, "loss": 0.949, "step": 954000 }, { "epoch": 26.79, "learning_rate": 3.216793693002003e-05, "loss": 0.9622, "step": 954500 }, { "epoch": 26.8, "learning_rate": 3.2027490121907825e-05, "loss": 0.9415, "step": 955000 }, { "epoch": 26.81, "learning_rate": 3.188704331379562e-05, "loss": 0.9294, "step": 955500 }, { "epoch": 26.83, "learning_rate": 3.1746596505683414e-05, "loss": 0.9346, "step": 956000 }, { "epoch": 26.84, "learning_rate": 3.160614969757121e-05, "loss": 0.9471, "step": 956500 }, { "epoch": 26.86, "learning_rate": 3.1465702889458996e-05, "loss": 0.9375, "step": 957000 }, { "epoch": 26.87, "learning_rate": 3.132525608134679e-05, "loss": 0.9421, "step": 957500 }, { "epoch": 26.88, "learning_rate": 3.118480927323458e-05, "loss": 0.9358, "step": 958000 }, { "epoch": 26.9, "learning_rate": 3.104436246512237e-05, "loss": 0.9515, "step": 958500 }, { "epoch": 26.91, "learning_rate": 3.090391565701017e-05, "loss": 0.9672, "step": 959000 }, { "epoch": 26.93, "learning_rate": 3.0763468848897955e-05, "loss": 0.9548, "step": 959500 }, { "epoch": 26.94, "learning_rate": 3.062302204078575e-05, "loss": 0.9608, "step": 960000 }, { "epoch": 26.95, "learning_rate": 3.048257523267354e-05, "loss": 0.938, "step": 960500 }, { "epoch": 26.97, "learning_rate": 3.0342128424561335e-05, "loss": 0.9375, "step": 961000 }, { "epoch": 26.98, "learning_rate": 3.020168161644913e-05, "loss": 0.926, "step": 961500 }, { "epoch": 27.0, "learning_rate": 3.006123480833692e-05, "loss": 0.9601, "step": 962000 }, { "epoch": 27.01, "learning_rate": 2.992078800022471e-05, "loss": 0.906, "step": 962500 }, { "epoch": 27.02, "learning_rate": 2.9780341192112503e-05, "loss": 0.9176, "step": 963000 }, { "epoch": 27.04, "learning_rate": 2.9639894384000297e-05, "loss": 0.914, "step": 963500 }, { "epoch": 27.05, "learning_rate": 2.949944757588809e-05, "loss": 0.8998, "step": 964000 }, { "epoch": 27.07, "learning_rate": 2.9359000767775883e-05, "loss": 0.9115, "step": 964500 }, { "epoch": 27.08, "learning_rate": 2.9218553959663674e-05, "loss": 0.9167, "step": 965000 }, { "epoch": 27.09, "learning_rate": 2.9078107151551465e-05, "loss": 0.9245, "step": 965500 }, { "epoch": 27.11, "learning_rate": 2.893766034343926e-05, "loss": 0.9306, "step": 966000 }, { "epoch": 27.12, "learning_rate": 2.8797213535327054e-05, "loss": 0.8957, "step": 966500 }, { "epoch": 27.14, "learning_rate": 2.865676672721484e-05, "loss": 0.907, "step": 967000 }, { "epoch": 27.15, "learning_rate": 2.8516319919102636e-05, "loss": 0.9172, "step": 967500 }, { "epoch": 27.16, "learning_rate": 2.8375873110990427e-05, "loss": 0.9171, "step": 968000 }, { "epoch": 27.18, "learning_rate": 2.823542630287822e-05, "loss": 0.9068, "step": 968500 }, { "epoch": 27.19, "learning_rate": 2.8094979494766016e-05, "loss": 0.8993, "step": 969000 }, { "epoch": 27.21, "learning_rate": 2.7954532686653804e-05, "loss": 0.9355, "step": 969500 }, { "epoch": 27.22, "learning_rate": 2.7814085878541598e-05, "loss": 0.9208, "step": 970000 }, { "epoch": 27.23, "learning_rate": 2.767363907042939e-05, "loss": 0.9365, "step": 970500 }, { "epoch": 27.25, "learning_rate": 2.7533192262317184e-05, "loss": 0.9097, "step": 971000 }, { "epoch": 27.26, "learning_rate": 2.7392745454204978e-05, "loss": 0.9277, "step": 971500 }, { "epoch": 27.28, "learning_rate": 2.7252298646092766e-05, "loss": 0.9186, "step": 972000 }, { "epoch": 27.29, "learning_rate": 2.711185183798056e-05, "loss": 0.9201, "step": 972500 }, { "epoch": 27.3, "learning_rate": 2.697140502986835e-05, "loss": 0.9255, "step": 973000 }, { "epoch": 27.32, "learning_rate": 2.6830958221756146e-05, "loss": 0.9401, "step": 973500 }, { "epoch": 27.33, "learning_rate": 2.6690511413643933e-05, "loss": 0.9133, "step": 974000 }, { "epoch": 27.35, "learning_rate": 2.6550064605531728e-05, "loss": 0.9175, "step": 974500 }, { "epoch": 27.36, "learning_rate": 2.6409617797419522e-05, "loss": 0.9265, "step": 975000 }, { "epoch": 27.38, "learning_rate": 2.6269170989307313e-05, "loss": 0.938, "step": 975500 }, { "epoch": 27.39, "learning_rate": 2.6128724181195108e-05, "loss": 0.9301, "step": 976000 }, { "epoch": 27.4, "learning_rate": 2.5988277373082896e-05, "loss": 0.8971, "step": 976500 }, { "epoch": 27.42, "learning_rate": 2.584783056497069e-05, "loss": 0.9441, "step": 977000 }, { "epoch": 27.43, "learning_rate": 2.5707383756858484e-05, "loss": 0.9362, "step": 977500 }, { "epoch": 27.45, "learning_rate": 2.5566936948746276e-05, "loss": 0.9083, "step": 978000 }, { "epoch": 27.46, "learning_rate": 2.542649014063407e-05, "loss": 0.9082, "step": 978500 }, { "epoch": 27.47, "learning_rate": 2.5286043332521858e-05, "loss": 0.9421, "step": 979000 }, { "epoch": 27.49, "learning_rate": 2.5145596524409652e-05, "loss": 0.8992, "step": 979500 }, { "epoch": 27.5, "learning_rate": 2.5005149716297447e-05, "loss": 0.9328, "step": 980000 }, { "epoch": 27.52, "learning_rate": 2.4864702908185238e-05, "loss": 0.9284, "step": 980500 }, { "epoch": 27.53, "learning_rate": 2.4724256100073032e-05, "loss": 0.9122, "step": 981000 }, { "epoch": 27.54, "learning_rate": 2.458380929196082e-05, "loss": 0.8997, "step": 981500 }, { "epoch": 27.56, "learning_rate": 2.4443362483848614e-05, "loss": 0.9328, "step": 982000 }, { "epoch": 27.57, "learning_rate": 2.430291567573641e-05, "loss": 0.9093, "step": 982500 }, { "epoch": 27.59, "learning_rate": 2.41624688676242e-05, "loss": 0.9023, "step": 983000 }, { "epoch": 27.6, "learning_rate": 2.4022022059511994e-05, "loss": 0.9187, "step": 983500 }, { "epoch": 27.61, "learning_rate": 2.3881575251399782e-05, "loss": 0.9128, "step": 984000 }, { "epoch": 27.63, "learning_rate": 2.3741128443287577e-05, "loss": 0.9219, "step": 984500 }, { "epoch": 27.64, "learning_rate": 2.360068163517537e-05, "loss": 0.9225, "step": 985000 }, { "epoch": 27.66, "learning_rate": 2.3460234827063162e-05, "loss": 0.9106, "step": 985500 }, { "epoch": 27.67, "learning_rate": 2.3319788018950957e-05, "loss": 0.9105, "step": 986000 }, { "epoch": 27.68, "learning_rate": 2.3179341210838744e-05, "loss": 0.9277, "step": 986500 }, { "epoch": 27.7, "learning_rate": 2.303889440272654e-05, "loss": 0.9037, "step": 987000 }, { "epoch": 27.71, "learning_rate": 2.2898447594614333e-05, "loss": 0.9158, "step": 987500 }, { "epoch": 27.73, "learning_rate": 2.2758000786502124e-05, "loss": 0.9387, "step": 988000 }, { "epoch": 27.74, "learning_rate": 2.261755397838992e-05, "loss": 0.9273, "step": 988500 }, { "epoch": 27.75, "learning_rate": 2.2477107170277706e-05, "loss": 0.9185, "step": 989000 }, { "epoch": 27.77, "learning_rate": 2.23366603621655e-05, "loss": 0.91, "step": 989500 }, { "epoch": 27.78, "learning_rate": 2.2196213554053292e-05, "loss": 0.9192, "step": 990000 }, { "epoch": 27.8, "learning_rate": 2.2055766745941086e-05, "loss": 0.901, "step": 990500 }, { "epoch": 27.81, "learning_rate": 2.1915319937828877e-05, "loss": 0.9072, "step": 991000 }, { "epoch": 27.82, "learning_rate": 2.177487312971667e-05, "loss": 0.9137, "step": 991500 }, { "epoch": 27.84, "learning_rate": 2.1634426321604463e-05, "loss": 0.9501, "step": 992000 }, { "epoch": 27.85, "learning_rate": 2.1493979513492254e-05, "loss": 0.9243, "step": 992500 }, { "epoch": 27.87, "learning_rate": 2.135353270538005e-05, "loss": 0.9278, "step": 993000 }, { "epoch": 27.88, "learning_rate": 2.121308589726784e-05, "loss": 0.8987, "step": 993500 }, { "epoch": 27.89, "learning_rate": 2.107263908915563e-05, "loss": 0.9121, "step": 994000 }, { "epoch": 27.91, "learning_rate": 2.0932192281043425e-05, "loss": 0.9219, "step": 994500 }, { "epoch": 27.92, "learning_rate": 2.0791745472931216e-05, "loss": 0.921, "step": 995000 }, { "epoch": 27.94, "learning_rate": 2.0651298664819007e-05, "loss": 0.9194, "step": 995500 }, { "epoch": 27.95, "learning_rate": 2.0510851856706802e-05, "loss": 0.9205, "step": 996000 }, { "epoch": 27.96, "learning_rate": 2.0370405048594593e-05, "loss": 0.9412, "step": 996500 }, { "epoch": 27.98, "learning_rate": 2.0229958240482387e-05, "loss": 0.9151, "step": 997000 }, { "epoch": 27.99, "learning_rate": 2.008951143237018e-05, "loss": 0.8942, "step": 997500 }, { "epoch": 28.01, "learning_rate": 1.994906462425797e-05, "loss": 0.8877, "step": 998000 }, { "epoch": 28.02, "learning_rate": 1.9808617816145764e-05, "loss": 0.9026, "step": 998500 }, { "epoch": 28.03, "learning_rate": 1.9668171008033555e-05, "loss": 0.8988, "step": 999000 }, { "epoch": 28.05, "learning_rate": 1.952772419992135e-05, "loss": 0.8773, "step": 999500 }, { "epoch": 28.06, "learning_rate": 1.938727739180914e-05, "loss": 0.8875, "step": 1000000 }, { "epoch": 28.08, "learning_rate": 1.9246830583696932e-05, "loss": 0.8946, "step": 1000500 }, { "epoch": 28.09, "learning_rate": 1.9106383775584726e-05, "loss": 0.8824, "step": 1001000 }, { "epoch": 28.1, "learning_rate": 1.8965936967472517e-05, "loss": 0.914, "step": 1001500 }, { "epoch": 28.12, "learning_rate": 1.8825490159360312e-05, "loss": 0.9184, "step": 1002000 }, { "epoch": 28.13, "learning_rate": 1.8685043351248103e-05, "loss": 0.9002, "step": 1002500 }, { "epoch": 28.15, "learning_rate": 1.8544596543135894e-05, "loss": 0.8737, "step": 1003000 }, { "epoch": 28.16, "learning_rate": 1.840414973502369e-05, "loss": 0.8976, "step": 1003500 }, { "epoch": 28.17, "learning_rate": 1.826370292691148e-05, "loss": 0.8865, "step": 1004000 }, { "epoch": 28.19, "learning_rate": 1.812325611879927e-05, "loss": 0.8947, "step": 1004500 }, { "epoch": 28.2, "learning_rate": 1.7982809310687065e-05, "loss": 0.8757, "step": 1005000 }, { "epoch": 28.22, "learning_rate": 1.7842362502574856e-05, "loss": 0.8997, "step": 1005500 }, { "epoch": 28.23, "learning_rate": 1.770191569446265e-05, "loss": 0.9052, "step": 1006000 }, { "epoch": 28.25, "learning_rate": 1.756146888635044e-05, "loss": 0.8833, "step": 1006500 }, { "epoch": 28.26, "learning_rate": 1.7421022078238233e-05, "loss": 0.9094, "step": 1007000 }, { "epoch": 28.27, "learning_rate": 1.7280575270126027e-05, "loss": 0.888, "step": 1007500 }, { "epoch": 28.29, "learning_rate": 1.7140128462013818e-05, "loss": 0.8907, "step": 1008000 }, { "epoch": 28.3, "learning_rate": 1.6999681653901613e-05, "loss": 0.8982, "step": 1008500 }, { "epoch": 28.32, "learning_rate": 1.6859234845789404e-05, "loss": 0.8964, "step": 1009000 }, { "epoch": 28.33, "learning_rate": 1.6718788037677195e-05, "loss": 0.8914, "step": 1009500 }, { "epoch": 28.34, "learning_rate": 1.6578341229564986e-05, "loss": 0.8984, "step": 1010000 }, { "epoch": 28.36, "learning_rate": 1.643789442145278e-05, "loss": 0.916, "step": 1010500 }, { "epoch": 28.37, "learning_rate": 1.629744761334057e-05, "loss": 0.8848, "step": 1011000 }, { "epoch": 28.39, "learning_rate": 1.6157000805228366e-05, "loss": 0.9012, "step": 1011500 }, { "epoch": 28.4, "learning_rate": 1.6016553997116157e-05, "loss": 0.8999, "step": 1012000 }, { "epoch": 28.41, "learning_rate": 1.5876107189003948e-05, "loss": 0.8859, "step": 1012500 }, { "epoch": 28.43, "learning_rate": 1.5735660380891743e-05, "loss": 0.8975, "step": 1013000 }, { "epoch": 28.44, "learning_rate": 1.5595213572779534e-05, "loss": 0.8983, "step": 1013500 }, { "epoch": 28.46, "learning_rate": 1.5454766764667328e-05, "loss": 0.9061, "step": 1014000 }, { "epoch": 28.47, "learning_rate": 1.531431995655512e-05, "loss": 0.8942, "step": 1014500 }, { "epoch": 28.48, "learning_rate": 1.517387314844291e-05, "loss": 0.917, "step": 1015000 }, { "epoch": 28.5, "learning_rate": 1.5033426340330705e-05, "loss": 0.8868, "step": 1015500 }, { "epoch": 28.51, "learning_rate": 1.4892979532218497e-05, "loss": 0.8692, "step": 1016000 }, { "epoch": 28.53, "learning_rate": 1.4752532724106289e-05, "loss": 0.8927, "step": 1016500 }, { "epoch": 28.54, "learning_rate": 1.4612085915994081e-05, "loss": 0.9105, "step": 1017000 }, { "epoch": 28.55, "learning_rate": 1.4471639107881872e-05, "loss": 0.8974, "step": 1017500 }, { "epoch": 28.57, "learning_rate": 1.4331192299769665e-05, "loss": 0.9013, "step": 1018000 }, { "epoch": 28.58, "learning_rate": 1.419074549165746e-05, "loss": 0.8981, "step": 1018500 }, { "epoch": 28.6, "learning_rate": 1.405029868354525e-05, "loss": 0.871, "step": 1019000 }, { "epoch": 28.61, "learning_rate": 1.3909851875433044e-05, "loss": 0.8917, "step": 1019500 }, { "epoch": 28.62, "learning_rate": 1.3769405067320835e-05, "loss": 0.8872, "step": 1020000 }, { "epoch": 28.64, "learning_rate": 1.3628958259208627e-05, "loss": 0.8958, "step": 1020500 }, { "epoch": 28.65, "learning_rate": 1.3488511451096422e-05, "loss": 0.8783, "step": 1021000 }, { "epoch": 28.67, "learning_rate": 1.3348064642984213e-05, "loss": 0.8722, "step": 1021500 }, { "epoch": 28.68, "learning_rate": 1.3207617834872006e-05, "loss": 0.9058, "step": 1022000 }, { "epoch": 28.69, "learning_rate": 1.3067171026759797e-05, "loss": 0.9316, "step": 1022500 }, { "epoch": 28.71, "learning_rate": 1.292672421864759e-05, "loss": 0.8852, "step": 1023000 }, { "epoch": 28.72, "learning_rate": 1.2786277410535382e-05, "loss": 0.8857, "step": 1023500 }, { "epoch": 28.74, "learning_rate": 1.2645830602423175e-05, "loss": 0.893, "step": 1024000 }, { "epoch": 28.75, "learning_rate": 1.2505383794310968e-05, "loss": 0.8962, "step": 1024500 }, { "epoch": 28.76, "learning_rate": 1.2364936986198759e-05, "loss": 0.8671, "step": 1025000 }, { "epoch": 28.78, "learning_rate": 1.2224490178086552e-05, "loss": 0.8975, "step": 1025500 }, { "epoch": 28.79, "learning_rate": 1.2084043369974343e-05, "loss": 0.904, "step": 1026000 }, { "epoch": 28.81, "learning_rate": 1.1943596561862137e-05, "loss": 0.9048, "step": 1026500 }, { "epoch": 28.82, "learning_rate": 1.1803149753749928e-05, "loss": 0.9158, "step": 1027000 }, { "epoch": 28.83, "learning_rate": 1.1662702945637721e-05, "loss": 0.8749, "step": 1027500 }, { "epoch": 28.85, "learning_rate": 1.1522256137525514e-05, "loss": 0.8801, "step": 1028000 }, { "epoch": 28.86, "learning_rate": 1.1381809329413305e-05, "loss": 0.9151, "step": 1028500 }, { "epoch": 28.88, "learning_rate": 1.12413625213011e-05, "loss": 0.8973, "step": 1029000 }, { "epoch": 28.89, "learning_rate": 1.110091571318889e-05, "loss": 0.8863, "step": 1029500 }, { "epoch": 28.9, "learning_rate": 1.0960468905076683e-05, "loss": 0.8966, "step": 1030000 }, { "epoch": 28.92, "learning_rate": 1.0820022096964474e-05, "loss": 0.8778, "step": 1030500 }, { "epoch": 28.93, "learning_rate": 1.0679575288852267e-05, "loss": 0.8899, "step": 1031000 }, { "epoch": 28.95, "learning_rate": 1.0539128480740062e-05, "loss": 0.8851, "step": 1031500 }, { "epoch": 28.96, "learning_rate": 1.0398681672627853e-05, "loss": 0.8972, "step": 1032000 }, { "epoch": 28.97, "learning_rate": 1.0258234864515645e-05, "loss": 0.8887, "step": 1032500 }, { "epoch": 28.99, "learning_rate": 1.0117788056403437e-05, "loss": 0.9048, "step": 1033000 }, { "epoch": 29.0, "learning_rate": 9.97734124829123e-06, "loss": 0.8814, "step": 1033500 }, { "epoch": 29.02, "learning_rate": 9.83689444017902e-06, "loss": 0.878, "step": 1034000 }, { "epoch": 29.03, "learning_rate": 9.696447632066815e-06, "loss": 0.8769, "step": 1034500 }, { "epoch": 29.04, "learning_rate": 9.556000823954608e-06, "loss": 0.8792, "step": 1035000 }, { "epoch": 29.06, "learning_rate": 9.415554015842399e-06, "loss": 0.8968, "step": 1035500 }, { "epoch": 29.07, "learning_rate": 9.275107207730191e-06, "loss": 0.8679, "step": 1036000 }, { "epoch": 29.09, "learning_rate": 9.134660399617984e-06, "loss": 0.8861, "step": 1036500 }, { "epoch": 29.1, "learning_rate": 8.994213591505775e-06, "loss": 0.8787, "step": 1037000 }, { "epoch": 29.12, "learning_rate": 8.85376678339357e-06, "loss": 0.8827, "step": 1037500 }, { "epoch": 29.13, "learning_rate": 8.713319975281361e-06, "loss": 0.8814, "step": 1038000 }, { "epoch": 29.14, "learning_rate": 8.572873167169154e-06, "loss": 0.8912, "step": 1038500 }, { "epoch": 29.16, "learning_rate": 8.432426359056946e-06, "loss": 0.879, "step": 1039000 }, { "epoch": 29.17, "learning_rate": 8.291979550944737e-06, "loss": 0.8579, "step": 1039500 }, { "epoch": 29.19, "learning_rate": 8.15153274283253e-06, "loss": 0.8612, "step": 1040000 }, { "epoch": 29.2, "learning_rate": 8.011085934720323e-06, "loss": 0.8734, "step": 1040500 }, { "epoch": 29.21, "learning_rate": 7.870639126608116e-06, "loss": 0.8676, "step": 1041000 }, { "epoch": 29.23, "learning_rate": 7.730192318495909e-06, "loss": 0.8894, "step": 1041500 }, { "epoch": 29.24, "learning_rate": 7.5897455103837005e-06, "loss": 0.9045, "step": 1042000 }, { "epoch": 29.26, "learning_rate": 7.449298702271492e-06, "loss": 0.872, "step": 1042500 }, { "epoch": 29.27, "learning_rate": 7.308851894159285e-06, "loss": 0.8897, "step": 1043000 }, { "epoch": 29.28, "learning_rate": 7.168405086047077e-06, "loss": 0.867, "step": 1043500 }, { "epoch": 29.3, "learning_rate": 7.027958277934869e-06, "loss": 0.8667, "step": 1044000 }, { "epoch": 29.31, "learning_rate": 6.887511469822662e-06, "loss": 0.8703, "step": 1044500 }, { "epoch": 29.33, "learning_rate": 6.747064661710454e-06, "loss": 0.8975, "step": 1045000 }, { "epoch": 29.34, "learning_rate": 6.606617853598247e-06, "loss": 0.8692, "step": 1045500 }, { "epoch": 29.35, "learning_rate": 6.466171045486039e-06, "loss": 0.8732, "step": 1046000 }, { "epoch": 29.37, "learning_rate": 6.325724237373831e-06, "loss": 0.8776, "step": 1046500 }, { "epoch": 29.38, "learning_rate": 6.185277429261624e-06, "loss": 0.8845, "step": 1047000 }, { "epoch": 29.4, "learning_rate": 6.044830621149416e-06, "loss": 0.8785, "step": 1047500 }, { "epoch": 29.41, "learning_rate": 5.904383813037208e-06, "loss": 0.8749, "step": 1048000 }, { "epoch": 29.42, "learning_rate": 5.7639370049250015e-06, "loss": 0.8644, "step": 1048500 }, { "epoch": 29.44, "learning_rate": 5.623490196812793e-06, "loss": 0.8678, "step": 1049000 }, { "epoch": 29.45, "learning_rate": 5.483043388700586e-06, "loss": 0.8888, "step": 1049500 }, { "epoch": 29.47, "learning_rate": 5.342596580588378e-06, "loss": 0.8788, "step": 1050000 }, { "epoch": 29.48, "learning_rate": 5.20214977247617e-06, "loss": 0.8708, "step": 1050500 }, { "epoch": 29.49, "learning_rate": 5.061702964363963e-06, "loss": 0.8732, "step": 1051000 }, { "epoch": 29.51, "learning_rate": 4.921256156251755e-06, "loss": 0.8776, "step": 1051500 }, { "epoch": 29.52, "learning_rate": 4.7808093481395475e-06, "loss": 0.8775, "step": 1052000 }, { "epoch": 29.54, "learning_rate": 4.64036254002734e-06, "loss": 0.8747, "step": 1052500 }, { "epoch": 29.55, "learning_rate": 4.499915731915132e-06, "loss": 0.8606, "step": 1053000 }, { "epoch": 29.56, "learning_rate": 4.359468923802925e-06, "loss": 0.8943, "step": 1053500 }, { "epoch": 29.58, "learning_rate": 4.219022115690717e-06, "loss": 0.8663, "step": 1054000 }, { "epoch": 29.59, "learning_rate": 4.07857530757851e-06, "loss": 0.882, "step": 1054500 }, { "epoch": 29.61, "learning_rate": 3.9381284994663016e-06, "loss": 0.8743, "step": 1055000 }, { "epoch": 29.62, "learning_rate": 3.797681691354094e-06, "loss": 0.8854, "step": 1055500 }, { "epoch": 29.63, "learning_rate": 3.6572348832418867e-06, "loss": 0.862, "step": 1056000 }, { "epoch": 29.65, "learning_rate": 3.516788075129679e-06, "loss": 0.8515, "step": 1056500 }, { "epoch": 29.66, "learning_rate": 3.376341267017471e-06, "loss": 0.8682, "step": 1057000 }, { "epoch": 29.68, "learning_rate": 3.2358944589052637e-06, "loss": 0.8791, "step": 1057500 }, { "epoch": 29.69, "learning_rate": 3.095447650793056e-06, "loss": 0.8518, "step": 1058000 }, { "epoch": 29.7, "learning_rate": 2.9550008426808484e-06, "loss": 0.8799, "step": 1058500 }, { "epoch": 29.72, "learning_rate": 2.8145540345686408e-06, "loss": 0.8911, "step": 1059000 }, { "epoch": 29.73, "learning_rate": 2.674107226456433e-06, "loss": 0.8737, "step": 1059500 }, { "epoch": 29.75, "learning_rate": 2.5336604183442255e-06, "loss": 0.8882, "step": 1060000 }, { "epoch": 29.76, "learning_rate": 2.3932136102320183e-06, "loss": 0.8904, "step": 1060500 }, { "epoch": 29.77, "learning_rate": 2.25276680211981e-06, "loss": 0.881, "step": 1061000 }, { "epoch": 29.79, "learning_rate": 2.1123199940076025e-06, "loss": 0.9008, "step": 1061500 }, { "epoch": 29.8, "learning_rate": 1.9718731858953953e-06, "loss": 0.8672, "step": 1062000 }, { "epoch": 29.82, "learning_rate": 1.8314263777831874e-06, "loss": 0.898, "step": 1062500 }, { "epoch": 29.83, "learning_rate": 1.6909795696709798e-06, "loss": 0.8588, "step": 1063000 }, { "epoch": 29.84, "learning_rate": 1.5505327615587721e-06, "loss": 0.8846, "step": 1063500 }, { "epoch": 29.86, "learning_rate": 1.4100859534465647e-06, "loss": 0.8712, "step": 1064000 }, { "epoch": 29.87, "learning_rate": 1.2696391453343568e-06, "loss": 0.872, "step": 1064500 }, { "epoch": 29.89, "learning_rate": 1.1291923372221492e-06, "loss": 0.87, "step": 1065000 }, { "epoch": 29.9, "learning_rate": 9.887455291099415e-07, "loss": 0.8602, "step": 1065500 }, { "epoch": 29.91, "learning_rate": 8.48298720997734e-07, "loss": 0.8788, "step": 1066000 }, { "epoch": 29.93, "learning_rate": 7.078519128855263e-07, "loss": 0.8837, "step": 1066500 }, { "epoch": 29.94, "learning_rate": 5.674051047733188e-07, "loss": 0.8723, "step": 1067000 }, { "epoch": 29.96, "learning_rate": 4.269582966611112e-07, "loss": 0.8793, "step": 1067500 }, { "epoch": 29.97, "learning_rate": 2.8651148854890354e-07, "loss": 0.8865, "step": 1068000 }, { "epoch": 29.98, "learning_rate": 1.4606468043669594e-07, "loss": 0.8533, "step": 1068500 }, { "epoch": 30.0, "learning_rate": 5.617872324488305e-09, "loss": 0.8603, "step": 1069000 }, { "epoch": 30.0, "step": 1069020, "total_flos": 1.8687680352154907e+21, "train_loss": 1.5424620530493403, "train_runtime": 741000.9892, "train_samples_per_second": 23.083, "train_steps_per_second": 1.443 } ], "logging_steps": 500, "max_steps": 1069020, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "total_flos": 1.8687680352154907e+21, "train_batch_size": 4, "trial_name": null, "trial_params": null }