{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.999936517341346, "eval_steps": 500, "global_step": 70884, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 1493.0737256705327, "learning_rate": 1.4106361969248132e-09, "loss": 16.0844, "step": 1 }, { "epoch": 0.0, "grad_norm": 1668.6822325141973, "learning_rate": 7.053180984624066e-09, "loss": 16.2728, "step": 5 }, { "epoch": 0.0, "grad_norm": 1593.5609005055294, "learning_rate": 1.4106361969248132e-08, "loss": 15.8795, "step": 10 }, { "epoch": 0.0, "grad_norm": 1740.7994774605, "learning_rate": 2.11595429538722e-08, "loss": 16.7429, "step": 15 }, { "epoch": 0.0, "grad_norm": 1745.1855857808887, "learning_rate": 2.8212723938496263e-08, "loss": 16.1763, "step": 20 }, { "epoch": 0.0, "grad_norm": 1600.9559741761996, "learning_rate": 3.526590492312033e-08, "loss": 16.3725, "step": 25 }, { "epoch": 0.0, "grad_norm": 1617.3961352296326, "learning_rate": 4.23190859077444e-08, "loss": 15.4492, "step": 30 }, { "epoch": 0.0, "grad_norm": 1621.5530943892318, "learning_rate": 4.937226689236846e-08, "loss": 15.4405, "step": 35 }, { "epoch": 0.0, "grad_norm": 1325.9925021820668, "learning_rate": 5.6425447876992527e-08, "loss": 14.6386, "step": 40 }, { "epoch": 0.0, "grad_norm": 1327.0691324050445, "learning_rate": 6.34786288616166e-08, "loss": 13.4356, "step": 45 }, { "epoch": 0.0, "grad_norm": 1454.622792295747, "learning_rate": 7.053180984624066e-08, "loss": 13.2331, "step": 50 }, { "epoch": 0.0, "grad_norm": 1135.4587201610175, "learning_rate": 7.758499083086472e-08, "loss": 12.2934, "step": 55 }, { "epoch": 0.0, "grad_norm": 976.6086360192892, "learning_rate": 8.46381718154888e-08, "loss": 10.5764, "step": 60 }, { "epoch": 0.0, "grad_norm": 787.7464747036888, "learning_rate": 9.169135280011286e-08, "loss": 9.5626, "step": 65 }, { "epoch": 0.0, "grad_norm": 944.5961012828227, "learning_rate": 9.874453378473692e-08, "loss": 9.28, "step": 70 }, { "epoch": 0.0, "grad_norm": 876.3350246610729, "learning_rate": 1.0579771476936099e-07, "loss": 8.7555, "step": 75 }, { "epoch": 0.0, "grad_norm": 471.3586319835879, "learning_rate": 1.1285089575398505e-07, "loss": 7.3749, "step": 80 }, { "epoch": 0.0, "grad_norm": 539.2888488233264, "learning_rate": 1.1990407673860913e-07, "loss": 6.7965, "step": 85 }, { "epoch": 0.0, "grad_norm": 319.6686952446837, "learning_rate": 1.269572577232332e-07, "loss": 6.2039, "step": 90 }, { "epoch": 0.0, "grad_norm": 323.0983845688617, "learning_rate": 1.3401043870785725e-07, "loss": 5.902, "step": 95 }, { "epoch": 0.0, "grad_norm": 277.4164551652692, "learning_rate": 1.4106361969248132e-07, "loss": 5.6253, "step": 100 }, { "epoch": 0.0, "grad_norm": 211.89978719226133, "learning_rate": 1.481168006771054e-07, "loss": 5.4443, "step": 105 }, { "epoch": 0.0, "grad_norm": 278.59462836711947, "learning_rate": 1.5516998166172944e-07, "loss": 5.2659, "step": 110 }, { "epoch": 0.0, "grad_norm": 250.10995490045883, "learning_rate": 1.622231626463535e-07, "loss": 5.0685, "step": 115 }, { "epoch": 0.01, "grad_norm": 196.42141486889983, "learning_rate": 1.692763436309776e-07, "loss": 4.8159, "step": 120 }, { "epoch": 0.01, "grad_norm": 191.3540979266941, "learning_rate": 1.7632952461560164e-07, "loss": 4.6016, "step": 125 }, { "epoch": 0.01, "grad_norm": 198.3644471959955, "learning_rate": 1.8338270560022571e-07, "loss": 4.4773, "step": 130 }, { "epoch": 0.01, "grad_norm": 174.16448574781828, "learning_rate": 1.9043588658484976e-07, "loss": 4.3955, "step": 135 }, { "epoch": 0.01, "grad_norm": 185.01038252455538, "learning_rate": 1.9748906756947384e-07, "loss": 4.3058, "step": 140 }, { "epoch": 0.01, "grad_norm": 140.31424944177627, "learning_rate": 2.045422485540979e-07, "loss": 3.974, "step": 145 }, { "epoch": 0.01, "grad_norm": 142.34834095434633, "learning_rate": 2.1159542953872198e-07, "loss": 3.9014, "step": 150 }, { "epoch": 0.01, "grad_norm": 141.51462944025857, "learning_rate": 2.1864861052334603e-07, "loss": 3.7357, "step": 155 }, { "epoch": 0.01, "grad_norm": 112.06028834828933, "learning_rate": 2.257017915079701e-07, "loss": 3.6341, "step": 160 }, { "epoch": 0.01, "grad_norm": 111.50920364748472, "learning_rate": 2.3275497249259415e-07, "loss": 3.471, "step": 165 }, { "epoch": 0.01, "grad_norm": 90.71732178503606, "learning_rate": 2.3980815347721825e-07, "loss": 3.2521, "step": 170 }, { "epoch": 0.01, "grad_norm": 100.63799360618768, "learning_rate": 2.468613344618423e-07, "loss": 3.1855, "step": 175 }, { "epoch": 0.01, "grad_norm": 108.26725108354506, "learning_rate": 2.539145154464664e-07, "loss": 3.1287, "step": 180 }, { "epoch": 0.01, "grad_norm": 69.6341645301703, "learning_rate": 2.609676964310904e-07, "loss": 3.0425, "step": 185 }, { "epoch": 0.01, "grad_norm": 82.59536181881954, "learning_rate": 2.680208774157145e-07, "loss": 2.79, "step": 190 }, { "epoch": 0.01, "grad_norm": 60.62454649003572, "learning_rate": 2.7507405840033857e-07, "loss": 2.9082, "step": 195 }, { "epoch": 0.01, "grad_norm": 64.45850724694489, "learning_rate": 2.8212723938496265e-07, "loss": 2.8368, "step": 200 }, { "epoch": 0.01, "grad_norm": 55.92476953087811, "learning_rate": 2.8918042036958667e-07, "loss": 2.9402, "step": 205 }, { "epoch": 0.01, "grad_norm": 65.7903141986139, "learning_rate": 2.962336013542108e-07, "loss": 2.7063, "step": 210 }, { "epoch": 0.01, "grad_norm": 63.13230710643131, "learning_rate": 3.032867823388348e-07, "loss": 2.711, "step": 215 }, { "epoch": 0.01, "grad_norm": 58.0309123612463, "learning_rate": 3.103399633234589e-07, "loss": 2.5864, "step": 220 }, { "epoch": 0.01, "grad_norm": 51.934921917753016, "learning_rate": 3.1739314430808296e-07, "loss": 2.6546, "step": 225 }, { "epoch": 0.01, "grad_norm": 56.9400406309904, "learning_rate": 3.24446325292707e-07, "loss": 2.5507, "step": 230 }, { "epoch": 0.01, "grad_norm": 48.52809979625148, "learning_rate": 3.314995062773311e-07, "loss": 2.5385, "step": 235 }, { "epoch": 0.01, "grad_norm": 47.13451743977923, "learning_rate": 3.385526872619552e-07, "loss": 2.5494, "step": 240 }, { "epoch": 0.01, "grad_norm": 48.93552123234581, "learning_rate": 3.4560586824657926e-07, "loss": 2.4727, "step": 245 }, { "epoch": 0.01, "grad_norm": 38.741755532923, "learning_rate": 3.526590492312033e-07, "loss": 2.4113, "step": 250 }, { "epoch": 0.01, "grad_norm": 39.61271782523791, "learning_rate": 3.5971223021582736e-07, "loss": 2.4041, "step": 255 }, { "epoch": 0.01, "grad_norm": 38.984524568518786, "learning_rate": 3.6676541120045143e-07, "loss": 2.2889, "step": 260 }, { "epoch": 0.01, "grad_norm": 34.863973070307736, "learning_rate": 3.738185921850755e-07, "loss": 2.3173, "step": 265 }, { "epoch": 0.01, "grad_norm": 32.63886144618183, "learning_rate": 3.808717731696995e-07, "loss": 2.3947, "step": 270 }, { "epoch": 0.01, "grad_norm": 33.63961303197873, "learning_rate": 3.879249541543236e-07, "loss": 2.2098, "step": 275 }, { "epoch": 0.01, "grad_norm": 32.37629625616045, "learning_rate": 3.9497813513894767e-07, "loss": 2.3853, "step": 280 }, { "epoch": 0.01, "grad_norm": 32.870580045008275, "learning_rate": 4.020313161235718e-07, "loss": 2.2034, "step": 285 }, { "epoch": 0.01, "grad_norm": 36.17392923347136, "learning_rate": 4.090844971081958e-07, "loss": 2.2471, "step": 290 }, { "epoch": 0.01, "grad_norm": 35.86672938213747, "learning_rate": 4.161376780928199e-07, "loss": 2.2797, "step": 295 }, { "epoch": 0.01, "grad_norm": 31.491322137657292, "learning_rate": 4.2319085907744397e-07, "loss": 2.3222, "step": 300 }, { "epoch": 0.01, "grad_norm": 30.417845046914852, "learning_rate": 4.3024404006206804e-07, "loss": 2.2601, "step": 305 }, { "epoch": 0.01, "grad_norm": 26.230195546192633, "learning_rate": 4.3729722104669207e-07, "loss": 2.1369, "step": 310 }, { "epoch": 0.01, "grad_norm": 26.06903397162156, "learning_rate": 4.4435040203131614e-07, "loss": 2.1749, "step": 315 }, { "epoch": 0.01, "grad_norm": 35.515790618558846, "learning_rate": 4.514035830159402e-07, "loss": 2.131, "step": 320 }, { "epoch": 0.01, "grad_norm": 23.39959370215983, "learning_rate": 4.584567640005643e-07, "loss": 2.2326, "step": 325 }, { "epoch": 0.01, "grad_norm": 25.83729551385949, "learning_rate": 4.655099449851883e-07, "loss": 2.1105, "step": 330 }, { "epoch": 0.01, "grad_norm": 27.433628109552256, "learning_rate": 4.725631259698124e-07, "loss": 2.121, "step": 335 }, { "epoch": 0.01, "grad_norm": 23.869493383953355, "learning_rate": 4.796163069544365e-07, "loss": 2.0329, "step": 340 }, { "epoch": 0.01, "grad_norm": 21.20926380105703, "learning_rate": 4.866694879390605e-07, "loss": 2.1412, "step": 345 }, { "epoch": 0.01, "grad_norm": 20.23856754938487, "learning_rate": 4.937226689236846e-07, "loss": 2.0711, "step": 350 }, { "epoch": 0.02, "grad_norm": 24.5510550881969, "learning_rate": 5.007758499083087e-07, "loss": 1.9521, "step": 355 }, { "epoch": 0.02, "grad_norm": 23.308996549844295, "learning_rate": 5.078290308929328e-07, "loss": 2.1002, "step": 360 }, { "epoch": 0.02, "grad_norm": 21.650458319960855, "learning_rate": 5.148822118775568e-07, "loss": 2.0129, "step": 365 }, { "epoch": 0.02, "grad_norm": 22.35608125514246, "learning_rate": 5.219353928621808e-07, "loss": 1.9728, "step": 370 }, { "epoch": 0.02, "grad_norm": 19.391329162858245, "learning_rate": 5.28988573846805e-07, "loss": 2.0186, "step": 375 }, { "epoch": 0.02, "grad_norm": 18.913264467280225, "learning_rate": 5.36041754831429e-07, "loss": 2.072, "step": 380 }, { "epoch": 0.02, "grad_norm": 22.017640641423437, "learning_rate": 5.430949358160531e-07, "loss": 2.0241, "step": 385 }, { "epoch": 0.02, "grad_norm": 29.61053230838206, "learning_rate": 5.501481168006771e-07, "loss": 2.06, "step": 390 }, { "epoch": 0.02, "grad_norm": 18.66707782214414, "learning_rate": 5.572012977853012e-07, "loss": 1.9234, "step": 395 }, { "epoch": 0.02, "grad_norm": 19.56925408153988, "learning_rate": 5.642544787699253e-07, "loss": 1.9593, "step": 400 }, { "epoch": 0.02, "grad_norm": 21.541478346089974, "learning_rate": 5.713076597545494e-07, "loss": 2.0793, "step": 405 }, { "epoch": 0.02, "grad_norm": 19.50830085835403, "learning_rate": 5.783608407391733e-07, "loss": 1.9824, "step": 410 }, { "epoch": 0.02, "grad_norm": 17.980919852203144, "learning_rate": 5.854140217237975e-07, "loss": 1.962, "step": 415 }, { "epoch": 0.02, "grad_norm": 19.68117938946925, "learning_rate": 5.924672027084216e-07, "loss": 2.0047, "step": 420 }, { "epoch": 0.02, "grad_norm": 21.26570158613031, "learning_rate": 5.995203836930456e-07, "loss": 1.9533, "step": 425 }, { "epoch": 0.02, "grad_norm": 17.78662520957527, "learning_rate": 6.065735646776696e-07, "loss": 1.8809, "step": 430 }, { "epoch": 0.02, "grad_norm": 20.416830761222496, "learning_rate": 6.136267456622938e-07, "loss": 1.8558, "step": 435 }, { "epoch": 0.02, "grad_norm": 18.03954637757054, "learning_rate": 6.206799266469178e-07, "loss": 1.9291, "step": 440 }, { "epoch": 0.02, "grad_norm": 16.7112234084775, "learning_rate": 6.277331076315418e-07, "loss": 1.793, "step": 445 }, { "epoch": 0.02, "grad_norm": 17.82696832616069, "learning_rate": 6.347862886161659e-07, "loss": 1.9045, "step": 450 }, { "epoch": 0.02, "grad_norm": 17.126201478944438, "learning_rate": 6.4183946960079e-07, "loss": 1.8342, "step": 455 }, { "epoch": 0.02, "grad_norm": 23.293631330318448, "learning_rate": 6.48892650585414e-07, "loss": 1.903, "step": 460 }, { "epoch": 0.02, "grad_norm": 21.593676527550635, "learning_rate": 6.559458315700382e-07, "loss": 1.791, "step": 465 }, { "epoch": 0.02, "grad_norm": 16.786452970894214, "learning_rate": 6.629990125546622e-07, "loss": 1.8003, "step": 470 }, { "epoch": 0.02, "grad_norm": 22.804670739432435, "learning_rate": 6.700521935392864e-07, "loss": 1.8244, "step": 475 }, { "epoch": 0.02, "grad_norm": 17.48348527780603, "learning_rate": 6.771053745239104e-07, "loss": 2.0048, "step": 480 }, { "epoch": 0.02, "grad_norm": 16.40123583023564, "learning_rate": 6.841585555085344e-07, "loss": 1.8236, "step": 485 }, { "epoch": 0.02, "grad_norm": 16.173168434695988, "learning_rate": 6.912117364931585e-07, "loss": 1.8029, "step": 490 }, { "epoch": 0.02, "grad_norm": 19.208246288455292, "learning_rate": 6.982649174777825e-07, "loss": 1.8292, "step": 495 }, { "epoch": 0.02, "grad_norm": 15.55138615149806, "learning_rate": 7.053180984624066e-07, "loss": 1.7629, "step": 500 }, { "epoch": 0.02, "grad_norm": 17.549071943792157, "learning_rate": 7.123712794470307e-07, "loss": 1.7353, "step": 505 }, { "epoch": 0.02, "grad_norm": 20.19229022979663, "learning_rate": 7.194244604316547e-07, "loss": 1.8791, "step": 510 }, { "epoch": 0.02, "grad_norm": 15.990919931737333, "learning_rate": 7.264776414162788e-07, "loss": 1.7342, "step": 515 }, { "epoch": 0.02, "grad_norm": 18.196458223019654, "learning_rate": 7.335308224009029e-07, "loss": 1.8321, "step": 520 }, { "epoch": 0.02, "grad_norm": 16.491910452768337, "learning_rate": 7.405840033855269e-07, "loss": 1.7515, "step": 525 }, { "epoch": 0.02, "grad_norm": 18.409490880670386, "learning_rate": 7.47637184370151e-07, "loss": 1.7221, "step": 530 }, { "epoch": 0.02, "grad_norm": 17.21027525557651, "learning_rate": 7.54690365354775e-07, "loss": 1.7706, "step": 535 }, { "epoch": 0.02, "grad_norm": 18.462134586321856, "learning_rate": 7.61743546339399e-07, "loss": 1.7895, "step": 540 }, { "epoch": 0.02, "grad_norm": 18.51094643801263, "learning_rate": 7.687967273240232e-07, "loss": 1.7663, "step": 545 }, { "epoch": 0.02, "grad_norm": 18.31997025951962, "learning_rate": 7.758499083086472e-07, "loss": 1.7756, "step": 550 }, { "epoch": 0.02, "grad_norm": 15.645818406479359, "learning_rate": 7.829030892932714e-07, "loss": 1.7598, "step": 555 }, { "epoch": 0.02, "grad_norm": 20.36937660151297, "learning_rate": 7.899562702778953e-07, "loss": 1.7537, "step": 560 }, { "epoch": 0.02, "grad_norm": 16.716849406801753, "learning_rate": 7.970094512625194e-07, "loss": 1.7234, "step": 565 }, { "epoch": 0.02, "grad_norm": 22.677226207354845, "learning_rate": 8.040626322471436e-07, "loss": 1.789, "step": 570 }, { "epoch": 0.02, "grad_norm": 19.521214063483317, "learning_rate": 8.111158132317676e-07, "loss": 1.6659, "step": 575 }, { "epoch": 0.02, "grad_norm": 18.897013710329233, "learning_rate": 8.181689942163916e-07, "loss": 1.6887, "step": 580 }, { "epoch": 0.02, "grad_norm": 25.51822899243616, "learning_rate": 8.252221752010158e-07, "loss": 1.7394, "step": 585 }, { "epoch": 0.02, "grad_norm": 16.055805022967824, "learning_rate": 8.322753561856398e-07, "loss": 1.7094, "step": 590 }, { "epoch": 0.03, "grad_norm": 15.56903423819068, "learning_rate": 8.393285371702639e-07, "loss": 1.7717, "step": 595 }, { "epoch": 0.03, "grad_norm": 17.14916579452123, "learning_rate": 8.463817181548879e-07, "loss": 1.7228, "step": 600 }, { "epoch": 0.03, "grad_norm": 15.453570175592414, "learning_rate": 8.53434899139512e-07, "loss": 1.6956, "step": 605 }, { "epoch": 0.03, "grad_norm": 17.41331527875509, "learning_rate": 8.604880801241361e-07, "loss": 1.6637, "step": 610 }, { "epoch": 0.03, "grad_norm": 15.160251384145718, "learning_rate": 8.675412611087601e-07, "loss": 1.7307, "step": 615 }, { "epoch": 0.03, "grad_norm": 16.755458063023116, "learning_rate": 8.745944420933841e-07, "loss": 1.6759, "step": 620 }, { "epoch": 0.03, "grad_norm": 16.873437079778068, "learning_rate": 8.816476230780083e-07, "loss": 1.8, "step": 625 }, { "epoch": 0.03, "grad_norm": 14.670246364127115, "learning_rate": 8.887008040626323e-07, "loss": 1.6605, "step": 630 }, { "epoch": 0.03, "grad_norm": 13.697858705196312, "learning_rate": 8.957539850472564e-07, "loss": 1.637, "step": 635 }, { "epoch": 0.03, "grad_norm": 17.385126253131975, "learning_rate": 9.028071660318804e-07, "loss": 1.7152, "step": 640 }, { "epoch": 0.03, "grad_norm": 17.44914280911491, "learning_rate": 9.098603470165044e-07, "loss": 1.7758, "step": 645 }, { "epoch": 0.03, "grad_norm": 16.681771535276187, "learning_rate": 9.169135280011286e-07, "loss": 1.6362, "step": 650 }, { "epoch": 0.03, "grad_norm": 68.24376713762558, "learning_rate": 9.239667089857526e-07, "loss": 1.68, "step": 655 }, { "epoch": 0.03, "grad_norm": 16.15036407245028, "learning_rate": 9.310198899703766e-07, "loss": 1.6402, "step": 660 }, { "epoch": 0.03, "grad_norm": 25.189743493363206, "learning_rate": 9.380730709550008e-07, "loss": 1.6397, "step": 665 }, { "epoch": 0.03, "grad_norm": 16.042629468292205, "learning_rate": 9.451262519396248e-07, "loss": 1.6398, "step": 670 }, { "epoch": 0.03, "grad_norm": 14.995753481259305, "learning_rate": 9.52179432924249e-07, "loss": 1.647, "step": 675 }, { "epoch": 0.03, "grad_norm": 17.09703797004552, "learning_rate": 9.59232613908873e-07, "loss": 1.6328, "step": 680 }, { "epoch": 0.03, "grad_norm": 14.576412545883827, "learning_rate": 9.66285794893497e-07, "loss": 1.66, "step": 685 }, { "epoch": 0.03, "grad_norm": 15.316074398747404, "learning_rate": 9.73338975878121e-07, "loss": 1.5745, "step": 690 }, { "epoch": 0.03, "grad_norm": 15.177981255253448, "learning_rate": 9.80392156862745e-07, "loss": 1.6405, "step": 695 }, { "epoch": 0.03, "grad_norm": 17.350109267760402, "learning_rate": 9.87445337847369e-07, "loss": 1.6253, "step": 700 }, { "epoch": 0.03, "grad_norm": 14.709539765625115, "learning_rate": 9.944985188319933e-07, "loss": 1.6578, "step": 705 }, { "epoch": 0.03, "grad_norm": 14.576124272552956, "learning_rate": 1.0015516998166174e-06, "loss": 1.5545, "step": 710 }, { "epoch": 0.03, "grad_norm": 13.170981917972304, "learning_rate": 1.0086048808012416e-06, "loss": 1.6033, "step": 715 }, { "epoch": 0.03, "grad_norm": 18.280043822001478, "learning_rate": 1.0156580617858656e-06, "loss": 1.7083, "step": 720 }, { "epoch": 0.03, "grad_norm": 14.21071445705185, "learning_rate": 1.0227112427704894e-06, "loss": 1.5985, "step": 725 }, { "epoch": 0.03, "grad_norm": 17.11314449596276, "learning_rate": 1.0297644237551137e-06, "loss": 1.6236, "step": 730 }, { "epoch": 0.03, "grad_norm": 16.49088382137558, "learning_rate": 1.0368176047397377e-06, "loss": 1.6458, "step": 735 }, { "epoch": 0.03, "grad_norm": 23.477084210504625, "learning_rate": 1.0438707857243617e-06, "loss": 1.619, "step": 740 }, { "epoch": 0.03, "grad_norm": 18.294983555366425, "learning_rate": 1.050923966708986e-06, "loss": 1.604, "step": 745 }, { "epoch": 0.03, "grad_norm": 17.52057771612792, "learning_rate": 1.05797714769361e-06, "loss": 1.5461, "step": 750 }, { "epoch": 0.03, "grad_norm": 16.346423808447515, "learning_rate": 1.065030328678234e-06, "loss": 1.5353, "step": 755 }, { "epoch": 0.03, "grad_norm": 17.200436885611502, "learning_rate": 1.072083509662858e-06, "loss": 1.6506, "step": 760 }, { "epoch": 0.03, "grad_norm": 16.807350294042166, "learning_rate": 1.079136690647482e-06, "loss": 1.6094, "step": 765 }, { "epoch": 0.03, "grad_norm": 31.180522720043733, "learning_rate": 1.0861898716321062e-06, "loss": 1.6395, "step": 770 }, { "epoch": 0.03, "grad_norm": 21.33280205078513, "learning_rate": 1.0932430526167303e-06, "loss": 1.5827, "step": 775 }, { "epoch": 0.03, "grad_norm": 14.890135262047313, "learning_rate": 1.1002962336013543e-06, "loss": 1.5186, "step": 780 }, { "epoch": 0.03, "grad_norm": 14.326513851295944, "learning_rate": 1.1073494145859783e-06, "loss": 1.6546, "step": 785 }, { "epoch": 0.03, "grad_norm": 15.32273931917068, "learning_rate": 1.1144025955706023e-06, "loss": 1.5597, "step": 790 }, { "epoch": 0.03, "grad_norm": 17.047288821920386, "learning_rate": 1.1214557765552266e-06, "loss": 1.5502, "step": 795 }, { "epoch": 0.03, "grad_norm": 15.680953956325846, "learning_rate": 1.1285089575398506e-06, "loss": 1.6232, "step": 800 }, { "epoch": 0.03, "grad_norm": 28.445383445826906, "learning_rate": 1.1355621385244746e-06, "loss": 1.5552, "step": 805 }, { "epoch": 0.03, "grad_norm": 14.706228347694973, "learning_rate": 1.1426153195090988e-06, "loss": 1.5749, "step": 810 }, { "epoch": 0.03, "grad_norm": 16.150970886796813, "learning_rate": 1.1496685004937226e-06, "loss": 1.6283, "step": 815 }, { "epoch": 0.03, "grad_norm": 17.330211320075424, "learning_rate": 1.1567216814783467e-06, "loss": 1.5405, "step": 820 }, { "epoch": 0.03, "grad_norm": 13.595097796083746, "learning_rate": 1.163774862462971e-06, "loss": 1.531, "step": 825 }, { "epoch": 0.04, "grad_norm": 12.969918818831422, "learning_rate": 1.170828043447595e-06, "loss": 1.5029, "step": 830 }, { "epoch": 0.04, "grad_norm": 13.331658046415496, "learning_rate": 1.1778812244322192e-06, "loss": 1.4709, "step": 835 }, { "epoch": 0.04, "grad_norm": 20.983987382276847, "learning_rate": 1.1849344054168432e-06, "loss": 1.5851, "step": 840 }, { "epoch": 0.04, "grad_norm": 17.32997600480952, "learning_rate": 1.1919875864014672e-06, "loss": 1.5011, "step": 845 }, { "epoch": 0.04, "grad_norm": 13.608800626612172, "learning_rate": 1.1990407673860912e-06, "loss": 1.5744, "step": 850 }, { "epoch": 0.04, "grad_norm": 17.43423848123491, "learning_rate": 1.2060939483707152e-06, "loss": 1.5913, "step": 855 }, { "epoch": 0.04, "grad_norm": 12.563368927957066, "learning_rate": 1.2131471293553393e-06, "loss": 1.4959, "step": 860 }, { "epoch": 0.04, "grad_norm": 16.962098801441446, "learning_rate": 1.2202003103399635e-06, "loss": 1.5536, "step": 865 }, { "epoch": 0.04, "grad_norm": 14.663938218494293, "learning_rate": 1.2272534913245875e-06, "loss": 1.5721, "step": 870 }, { "epoch": 0.04, "grad_norm": 15.808277124221602, "learning_rate": 1.2343066723092115e-06, "loss": 1.6077, "step": 875 }, { "epoch": 0.04, "grad_norm": 17.660652928445412, "learning_rate": 1.2413598532938356e-06, "loss": 1.5393, "step": 880 }, { "epoch": 0.04, "grad_norm": 16.406825454097827, "learning_rate": 1.2484130342784596e-06, "loss": 1.5348, "step": 885 }, { "epoch": 0.04, "grad_norm": 12.965892407297877, "learning_rate": 1.2554662152630836e-06, "loss": 1.5528, "step": 890 }, { "epoch": 0.04, "grad_norm": 21.573662392243765, "learning_rate": 1.2625193962477078e-06, "loss": 1.5669, "step": 895 }, { "epoch": 0.04, "grad_norm": 23.675814715786238, "learning_rate": 1.2695725772323319e-06, "loss": 1.5172, "step": 900 }, { "epoch": 0.04, "grad_norm": 20.171504982437398, "learning_rate": 1.2766257582169559e-06, "loss": 1.5242, "step": 905 }, { "epoch": 0.04, "grad_norm": 13.92744254846148, "learning_rate": 1.28367893920158e-06, "loss": 1.5454, "step": 910 }, { "epoch": 0.04, "grad_norm": 13.538907137463239, "learning_rate": 1.2907321201862041e-06, "loss": 1.5368, "step": 915 }, { "epoch": 0.04, "grad_norm": 18.720558770000086, "learning_rate": 1.297785301170828e-06, "loss": 1.5307, "step": 920 }, { "epoch": 0.04, "grad_norm": 20.74935766220052, "learning_rate": 1.3048384821554522e-06, "loss": 1.5091, "step": 925 }, { "epoch": 0.04, "grad_norm": 18.55288877610659, "learning_rate": 1.3118916631400764e-06, "loss": 1.5422, "step": 930 }, { "epoch": 0.04, "grad_norm": 25.569895521724, "learning_rate": 1.3189448441247004e-06, "loss": 1.5712, "step": 935 }, { "epoch": 0.04, "grad_norm": 15.420460603450163, "learning_rate": 1.3259980251093244e-06, "loss": 1.4793, "step": 940 }, { "epoch": 0.04, "grad_norm": 20.78721483805772, "learning_rate": 1.3330512060939485e-06, "loss": 1.5645, "step": 945 }, { "epoch": 0.04, "grad_norm": 15.791878511339808, "learning_rate": 1.3401043870785727e-06, "loss": 1.5616, "step": 950 }, { "epoch": 0.04, "grad_norm": 15.202414812578114, "learning_rate": 1.3471575680631965e-06, "loss": 1.4889, "step": 955 }, { "epoch": 0.04, "grad_norm": 18.27629934592644, "learning_rate": 1.3542107490478207e-06, "loss": 1.5141, "step": 960 }, { "epoch": 0.04, "grad_norm": 32.4150671492386, "learning_rate": 1.3612639300324448e-06, "loss": 1.5331, "step": 965 }, { "epoch": 0.04, "grad_norm": 13.093751622915649, "learning_rate": 1.3683171110170688e-06, "loss": 1.4729, "step": 970 }, { "epoch": 0.04, "grad_norm": 13.958939198385906, "learning_rate": 1.3753702920016928e-06, "loss": 1.5687, "step": 975 }, { "epoch": 0.04, "grad_norm": 30.95918343385252, "learning_rate": 1.382423472986317e-06, "loss": 1.5015, "step": 980 }, { "epoch": 0.04, "grad_norm": 25.006202414226543, "learning_rate": 1.3894766539709409e-06, "loss": 1.513, "step": 985 }, { "epoch": 0.04, "grad_norm": 29.915355187277456, "learning_rate": 1.396529834955565e-06, "loss": 1.493, "step": 990 }, { "epoch": 0.04, "grad_norm": 19.19724693772254, "learning_rate": 1.403583015940189e-06, "loss": 1.6291, "step": 995 }, { "epoch": 0.04, "grad_norm": 15.419354806927592, "learning_rate": 1.4106361969248131e-06, "loss": 1.5671, "step": 1000 }, { "epoch": 0.04, "grad_norm": 13.511881848270505, "learning_rate": 1.4176893779094371e-06, "loss": 1.5093, "step": 1005 }, { "epoch": 0.04, "grad_norm": 19.15809744907682, "learning_rate": 1.4247425588940614e-06, "loss": 1.4915, "step": 1010 }, { "epoch": 0.04, "grad_norm": 14.73348327692445, "learning_rate": 1.4317957398786856e-06, "loss": 1.4952, "step": 1015 }, { "epoch": 0.04, "grad_norm": 14.140021739161664, "learning_rate": 1.4388489208633094e-06, "loss": 1.5194, "step": 1020 }, { "epoch": 0.04, "grad_norm": 15.432519692489358, "learning_rate": 1.4459021018479337e-06, "loss": 1.5706, "step": 1025 }, { "epoch": 0.04, "grad_norm": 14.332989363553716, "learning_rate": 1.4529552828325577e-06, "loss": 1.5046, "step": 1030 }, { "epoch": 0.04, "grad_norm": 16.583952967506214, "learning_rate": 1.4600084638171815e-06, "loss": 1.4802, "step": 1035 }, { "epoch": 0.04, "grad_norm": 15.459657012064126, "learning_rate": 1.4670616448018057e-06, "loss": 1.4653, "step": 1040 }, { "epoch": 0.04, "grad_norm": 17.122154121342444, "learning_rate": 1.47411482578643e-06, "loss": 1.5526, "step": 1045 }, { "epoch": 0.04, "grad_norm": 14.329292660217874, "learning_rate": 1.4811680067710538e-06, "loss": 1.4963, "step": 1050 }, { "epoch": 0.04, "grad_norm": 15.230617845206329, "learning_rate": 1.488221187755678e-06, "loss": 1.4018, "step": 1055 }, { "epoch": 0.04, "grad_norm": 15.873361842831489, "learning_rate": 1.495274368740302e-06, "loss": 1.4458, "step": 1060 }, { "epoch": 0.05, "grad_norm": 14.782794216072803, "learning_rate": 1.502327549724926e-06, "loss": 1.591, "step": 1065 }, { "epoch": 0.05, "grad_norm": 14.206349777440574, "learning_rate": 1.50938073070955e-06, "loss": 1.4889, "step": 1070 }, { "epoch": 0.05, "grad_norm": 16.225104279659387, "learning_rate": 1.5164339116941743e-06, "loss": 1.512, "step": 1075 }, { "epoch": 0.05, "grad_norm": 15.346180509521528, "learning_rate": 1.523487092678798e-06, "loss": 1.4694, "step": 1080 }, { "epoch": 0.05, "grad_norm": 13.063404822419741, "learning_rate": 1.5305402736634223e-06, "loss": 1.4155, "step": 1085 }, { "epoch": 0.05, "grad_norm": 20.455007784432723, "learning_rate": 1.5375934546480464e-06, "loss": 1.4692, "step": 1090 }, { "epoch": 0.05, "grad_norm": 21.57413377412271, "learning_rate": 1.5446466356326706e-06, "loss": 1.5055, "step": 1095 }, { "epoch": 0.05, "grad_norm": 17.85600466250613, "learning_rate": 1.5516998166172944e-06, "loss": 1.4905, "step": 1100 }, { "epoch": 0.05, "grad_norm": 14.199361241676119, "learning_rate": 1.5587529976019186e-06, "loss": 1.4647, "step": 1105 }, { "epoch": 0.05, "grad_norm": 12.771270353435733, "learning_rate": 1.5658061785865429e-06, "loss": 1.4676, "step": 1110 }, { "epoch": 0.05, "grad_norm": 11.749034314939355, "learning_rate": 1.5728593595711667e-06, "loss": 1.4262, "step": 1115 }, { "epoch": 0.05, "grad_norm": 13.00053305157364, "learning_rate": 1.5799125405557907e-06, "loss": 1.4795, "step": 1120 }, { "epoch": 0.05, "grad_norm": 14.003896234045099, "learning_rate": 1.586965721540415e-06, "loss": 1.4993, "step": 1125 }, { "epoch": 0.05, "grad_norm": 14.095279458152357, "learning_rate": 1.5940189025250387e-06, "loss": 1.4814, "step": 1130 }, { "epoch": 0.05, "grad_norm": 12.543162407942619, "learning_rate": 1.601072083509663e-06, "loss": 1.4315, "step": 1135 }, { "epoch": 0.05, "grad_norm": 14.42311101247395, "learning_rate": 1.6081252644942872e-06, "loss": 1.4234, "step": 1140 }, { "epoch": 0.05, "grad_norm": 14.350351773931624, "learning_rate": 1.615178445478911e-06, "loss": 1.4529, "step": 1145 }, { "epoch": 0.05, "grad_norm": 13.06791995678334, "learning_rate": 1.6222316264635352e-06, "loss": 1.4306, "step": 1150 }, { "epoch": 0.05, "grad_norm": 16.044063353747298, "learning_rate": 1.6292848074481593e-06, "loss": 1.4614, "step": 1155 }, { "epoch": 0.05, "grad_norm": 12.965247686511777, "learning_rate": 1.6363379884327833e-06, "loss": 1.4448, "step": 1160 }, { "epoch": 0.05, "grad_norm": 16.39413251454674, "learning_rate": 1.6433911694174073e-06, "loss": 1.4909, "step": 1165 }, { "epoch": 0.05, "grad_norm": 19.501289202231618, "learning_rate": 1.6504443504020315e-06, "loss": 1.4759, "step": 1170 }, { "epoch": 0.05, "grad_norm": 17.004850610466043, "learning_rate": 1.6574975313866556e-06, "loss": 1.4527, "step": 1175 }, { "epoch": 0.05, "grad_norm": 21.049825858253758, "learning_rate": 1.6645507123712796e-06, "loss": 1.4794, "step": 1180 }, { "epoch": 0.05, "grad_norm": 17.371088915938834, "learning_rate": 1.6716038933559036e-06, "loss": 1.5064, "step": 1185 }, { "epoch": 0.05, "grad_norm": 19.919662542711166, "learning_rate": 1.6786570743405278e-06, "loss": 1.5015, "step": 1190 }, { "epoch": 0.05, "grad_norm": 16.06116754997933, "learning_rate": 1.6857102553251516e-06, "loss": 1.4229, "step": 1195 }, { "epoch": 0.05, "grad_norm": 15.006772543757613, "learning_rate": 1.6927634363097759e-06, "loss": 1.4358, "step": 1200 }, { "epoch": 0.05, "grad_norm": 22.28120385613161, "learning_rate": 1.6998166172944001e-06, "loss": 1.4693, "step": 1205 }, { "epoch": 0.05, "grad_norm": 13.285364585101334, "learning_rate": 1.706869798279024e-06, "loss": 1.4717, "step": 1210 }, { "epoch": 0.05, "grad_norm": 15.389444883981302, "learning_rate": 1.713922979263648e-06, "loss": 1.4671, "step": 1215 }, { "epoch": 0.05, "grad_norm": 18.43444710155935, "learning_rate": 1.7209761602482722e-06, "loss": 1.4491, "step": 1220 }, { "epoch": 0.05, "grad_norm": 13.553744604035002, "learning_rate": 1.728029341232896e-06, "loss": 1.4807, "step": 1225 }, { "epoch": 0.05, "grad_norm": 13.255376552689096, "learning_rate": 1.7350825222175202e-06, "loss": 1.4971, "step": 1230 }, { "epoch": 0.05, "grad_norm": 13.162027337280843, "learning_rate": 1.7421357032021445e-06, "loss": 1.4039, "step": 1235 }, { "epoch": 0.05, "grad_norm": 13.735772683725203, "learning_rate": 1.7491888841867683e-06, "loss": 1.4397, "step": 1240 }, { "epoch": 0.05, "grad_norm": 16.31259459324704, "learning_rate": 1.7562420651713925e-06, "loss": 1.4632, "step": 1245 }, { "epoch": 0.05, "grad_norm": 11.860225896706122, "learning_rate": 1.7632952461560165e-06, "loss": 1.4312, "step": 1250 }, { "epoch": 0.05, "grad_norm": 16.60544134913032, "learning_rate": 1.7703484271406407e-06, "loss": 1.4046, "step": 1255 }, { "epoch": 0.05, "grad_norm": 14.664553111453099, "learning_rate": 1.7774016081252646e-06, "loss": 1.4493, "step": 1260 }, { "epoch": 0.05, "grad_norm": 24.980869760313446, "learning_rate": 1.7844547891098888e-06, "loss": 1.4209, "step": 1265 }, { "epoch": 0.05, "grad_norm": 17.49625044017745, "learning_rate": 1.7915079700945128e-06, "loss": 1.4141, "step": 1270 }, { "epoch": 0.05, "grad_norm": 40.34521596546542, "learning_rate": 1.7985611510791368e-06, "loss": 1.4496, "step": 1275 }, { "epoch": 0.05, "grad_norm": 22.73153569950643, "learning_rate": 1.8056143320637609e-06, "loss": 1.4257, "step": 1280 }, { "epoch": 0.05, "grad_norm": 25.069577277635705, "learning_rate": 1.812667513048385e-06, "loss": 1.4042, "step": 1285 }, { "epoch": 0.05, "grad_norm": 14.071015188265285, "learning_rate": 1.819720694033009e-06, "loss": 1.3545, "step": 1290 }, { "epoch": 0.05, "grad_norm": 27.667539679244467, "learning_rate": 1.8267738750176331e-06, "loss": 1.4392, "step": 1295 }, { "epoch": 0.06, "grad_norm": 28.486727021793588, "learning_rate": 1.8338270560022571e-06, "loss": 1.4715, "step": 1300 }, { "epoch": 0.06, "grad_norm": 18.27418774106074, "learning_rate": 1.8408802369868812e-06, "loss": 1.4184, "step": 1305 }, { "epoch": 0.06, "grad_norm": 18.594453745449353, "learning_rate": 1.8479334179715052e-06, "loss": 1.4119, "step": 1310 }, { "epoch": 0.06, "grad_norm": 21.381590391450835, "learning_rate": 1.8549865989561294e-06, "loss": 1.4217, "step": 1315 }, { "epoch": 0.06, "grad_norm": 13.413120286400547, "learning_rate": 1.8620397799407532e-06, "loss": 1.4436, "step": 1320 }, { "epoch": 0.06, "grad_norm": 12.893693175352412, "learning_rate": 1.8690929609253775e-06, "loss": 1.4022, "step": 1325 }, { "epoch": 0.06, "grad_norm": 13.493176005244209, "learning_rate": 1.8761461419100017e-06, "loss": 1.4129, "step": 1330 }, { "epoch": 0.06, "grad_norm": 12.326519977003366, "learning_rate": 1.8831993228946257e-06, "loss": 1.418, "step": 1335 }, { "epoch": 0.06, "grad_norm": 13.237437459256197, "learning_rate": 1.8902525038792495e-06, "loss": 1.3908, "step": 1340 }, { "epoch": 0.06, "grad_norm": 12.147333913625673, "learning_rate": 1.8973056848638738e-06, "loss": 1.4188, "step": 1345 }, { "epoch": 0.06, "grad_norm": 15.48135737731555, "learning_rate": 1.904358865848498e-06, "loss": 1.4249, "step": 1350 }, { "epoch": 0.06, "grad_norm": 22.271710806477344, "learning_rate": 1.911412046833122e-06, "loss": 1.4079, "step": 1355 }, { "epoch": 0.06, "grad_norm": 15.87900935508388, "learning_rate": 1.918465227817746e-06, "loss": 1.4289, "step": 1360 }, { "epoch": 0.06, "grad_norm": 16.74854488831577, "learning_rate": 1.92551840880237e-06, "loss": 1.4398, "step": 1365 }, { "epoch": 0.06, "grad_norm": 14.158899131153982, "learning_rate": 1.932571589786994e-06, "loss": 1.436, "step": 1370 }, { "epoch": 0.06, "grad_norm": 12.39229688205433, "learning_rate": 1.939624770771618e-06, "loss": 1.4569, "step": 1375 }, { "epoch": 0.06, "grad_norm": 12.25788023716984, "learning_rate": 1.946677951756242e-06, "loss": 1.3673, "step": 1380 }, { "epoch": 0.06, "grad_norm": 16.551985889523067, "learning_rate": 1.953731132740866e-06, "loss": 1.4129, "step": 1385 }, { "epoch": 0.06, "grad_norm": 11.019618245577336, "learning_rate": 1.96078431372549e-06, "loss": 1.3786, "step": 1390 }, { "epoch": 0.06, "grad_norm": 15.958370211529191, "learning_rate": 1.9678374947101146e-06, "loss": 1.428, "step": 1395 }, { "epoch": 0.06, "grad_norm": 11.942414051256707, "learning_rate": 1.974890675694738e-06, "loss": 1.3626, "step": 1400 }, { "epoch": 0.06, "grad_norm": 16.110012624743174, "learning_rate": 1.9819438566793627e-06, "loss": 1.4725, "step": 1405 }, { "epoch": 0.06, "grad_norm": 19.496193291138677, "learning_rate": 1.9889970376639867e-06, "loss": 1.3902, "step": 1410 }, { "epoch": 0.06, "grad_norm": 22.957177884901764, "learning_rate": 1.9960502186486107e-06, "loss": 1.3647, "step": 1415 }, { "epoch": 0.06, "grad_norm": 23.97029717591676, "learning_rate": 2.0031033996332347e-06, "loss": 1.4448, "step": 1420 }, { "epoch": 0.06, "grad_norm": 15.661277134514194, "learning_rate": 2.0101565806178587e-06, "loss": 1.38, "step": 1425 }, { "epoch": 0.06, "grad_norm": 16.466348958740856, "learning_rate": 2.017209761602483e-06, "loss": 1.4105, "step": 1430 }, { "epoch": 0.06, "grad_norm": 14.18943404010148, "learning_rate": 2.0242629425871068e-06, "loss": 1.4121, "step": 1435 }, { "epoch": 0.06, "grad_norm": 26.197151032062003, "learning_rate": 2.0313161235717312e-06, "loss": 1.419, "step": 1440 }, { "epoch": 0.06, "grad_norm": 27.08878236179872, "learning_rate": 2.0383693045563552e-06, "loss": 1.4033, "step": 1445 }, { "epoch": 0.06, "grad_norm": 27.89064350358611, "learning_rate": 2.045422485540979e-06, "loss": 1.4637, "step": 1450 }, { "epoch": 0.06, "grad_norm": 15.70613178958977, "learning_rate": 2.0524756665256033e-06, "loss": 1.353, "step": 1455 }, { "epoch": 0.06, "grad_norm": 12.791535916409925, "learning_rate": 2.0595288475102273e-06, "loss": 1.3793, "step": 1460 }, { "epoch": 0.06, "grad_norm": 16.147748419636127, "learning_rate": 2.0665820284948513e-06, "loss": 1.4328, "step": 1465 }, { "epoch": 0.06, "grad_norm": 49.261388594284156, "learning_rate": 2.0736352094794754e-06, "loss": 1.4484, "step": 1470 }, { "epoch": 0.06, "grad_norm": 63.17069815777682, "learning_rate": 2.0806883904640994e-06, "loss": 1.3834, "step": 1475 }, { "epoch": 0.06, "grad_norm": 15.663593522273047, "learning_rate": 2.0877415714487234e-06, "loss": 1.4638, "step": 1480 }, { "epoch": 0.06, "grad_norm": 75.92609631954323, "learning_rate": 2.0947947524333474e-06, "loss": 1.3994, "step": 1485 }, { "epoch": 0.06, "grad_norm": 22.106485004542424, "learning_rate": 2.101847933417972e-06, "loss": 1.3998, "step": 1490 }, { "epoch": 0.06, "grad_norm": 40.84734254107766, "learning_rate": 2.108901114402596e-06, "loss": 1.448, "step": 1495 }, { "epoch": 0.06, "grad_norm": 39.78835295369607, "learning_rate": 2.11595429538722e-06, "loss": 1.3962, "step": 1500 }, { "epoch": 0.06, "grad_norm": 29.183531521251187, "learning_rate": 2.123007476371844e-06, "loss": 1.4144, "step": 1505 }, { "epoch": 0.06, "grad_norm": 36.42115195566105, "learning_rate": 2.130060657356468e-06, "loss": 1.432, "step": 1510 }, { "epoch": 0.06, "grad_norm": 16.0577774070433, "learning_rate": 2.137113838341092e-06, "loss": 1.3785, "step": 1515 }, { "epoch": 0.06, "grad_norm": 29.85453094374671, "learning_rate": 2.144167019325716e-06, "loss": 1.4085, "step": 1520 }, { "epoch": 0.06, "grad_norm": 12.714264755030822, "learning_rate": 2.1512202003103404e-06, "loss": 1.4052, "step": 1525 }, { "epoch": 0.06, "grad_norm": 27.39846878215763, "learning_rate": 2.158273381294964e-06, "loss": 1.4022, "step": 1530 }, { "epoch": 0.06, "grad_norm": 12.085106952588045, "learning_rate": 2.1653265622795885e-06, "loss": 1.433, "step": 1535 }, { "epoch": 0.07, "grad_norm": 20.702911786029883, "learning_rate": 2.1723797432642125e-06, "loss": 1.405, "step": 1540 }, { "epoch": 0.07, "grad_norm": 12.21316478545504, "learning_rate": 2.179432924248836e-06, "loss": 1.4159, "step": 1545 }, { "epoch": 0.07, "grad_norm": 12.891125178606297, "learning_rate": 2.1864861052334605e-06, "loss": 1.399, "step": 1550 }, { "epoch": 0.07, "grad_norm": 18.408338062001597, "learning_rate": 2.1935392862180846e-06, "loss": 1.4339, "step": 1555 }, { "epoch": 0.07, "grad_norm": 13.287379305314193, "learning_rate": 2.2005924672027086e-06, "loss": 1.3584, "step": 1560 }, { "epoch": 0.07, "grad_norm": 18.05706675877235, "learning_rate": 2.2076456481873326e-06, "loss": 1.4022, "step": 1565 }, { "epoch": 0.07, "grad_norm": 26.829991457932888, "learning_rate": 2.2146988291719566e-06, "loss": 1.4012, "step": 1570 }, { "epoch": 0.07, "grad_norm": 13.6522840404752, "learning_rate": 2.221752010156581e-06, "loss": 1.3991, "step": 1575 }, { "epoch": 0.07, "grad_norm": 11.827408295721925, "learning_rate": 2.2288051911412047e-06, "loss": 1.3692, "step": 1580 }, { "epoch": 0.07, "grad_norm": 17.130862768303327, "learning_rate": 2.235858372125829e-06, "loss": 1.4243, "step": 1585 }, { "epoch": 0.07, "grad_norm": 17.080414871071657, "learning_rate": 2.242911553110453e-06, "loss": 1.3906, "step": 1590 }, { "epoch": 0.07, "grad_norm": 14.720817904904198, "learning_rate": 2.249964734095077e-06, "loss": 1.3741, "step": 1595 }, { "epoch": 0.07, "grad_norm": 12.169883267513889, "learning_rate": 2.257017915079701e-06, "loss": 1.3741, "step": 1600 }, { "epoch": 0.07, "grad_norm": 10.592925311905242, "learning_rate": 2.264071096064325e-06, "loss": 1.3667, "step": 1605 }, { "epoch": 0.07, "grad_norm": 15.32664364869199, "learning_rate": 2.2711242770489492e-06, "loss": 1.3673, "step": 1610 }, { "epoch": 0.07, "grad_norm": 13.983222923205547, "learning_rate": 2.2781774580335732e-06, "loss": 1.3391, "step": 1615 }, { "epoch": 0.07, "grad_norm": 14.084498525731727, "learning_rate": 2.2852306390181977e-06, "loss": 1.358, "step": 1620 }, { "epoch": 0.07, "grad_norm": 15.721355413481922, "learning_rate": 2.2922838200028213e-06, "loss": 1.4439, "step": 1625 }, { "epoch": 0.07, "grad_norm": 13.22726316745244, "learning_rate": 2.2993370009874453e-06, "loss": 1.3166, "step": 1630 }, { "epoch": 0.07, "grad_norm": 16.448353076993822, "learning_rate": 2.3063901819720697e-06, "loss": 1.4671, "step": 1635 }, { "epoch": 0.07, "grad_norm": 16.21732388378683, "learning_rate": 2.3134433629566933e-06, "loss": 1.3832, "step": 1640 }, { "epoch": 0.07, "grad_norm": 16.162029900989616, "learning_rate": 2.3204965439413178e-06, "loss": 1.3298, "step": 1645 }, { "epoch": 0.07, "grad_norm": 20.131760362607487, "learning_rate": 2.327549724925942e-06, "loss": 1.3596, "step": 1650 }, { "epoch": 0.07, "grad_norm": 17.75664903033911, "learning_rate": 2.334602905910566e-06, "loss": 1.3084, "step": 1655 }, { "epoch": 0.07, "grad_norm": 15.599873670259976, "learning_rate": 2.34165608689519e-06, "loss": 1.3819, "step": 1660 }, { "epoch": 0.07, "grad_norm": 18.33043534627968, "learning_rate": 2.348709267879814e-06, "loss": 1.3836, "step": 1665 }, { "epoch": 0.07, "grad_norm": 12.114395708736549, "learning_rate": 2.3557624488644383e-06, "loss": 1.3784, "step": 1670 }, { "epoch": 0.07, "grad_norm": 14.223743901122075, "learning_rate": 2.362815629849062e-06, "loss": 1.3816, "step": 1675 }, { "epoch": 0.07, "grad_norm": 15.026634719268413, "learning_rate": 2.3698688108336864e-06, "loss": 1.3273, "step": 1680 }, { "epoch": 0.07, "grad_norm": 18.017701591744665, "learning_rate": 2.3769219918183104e-06, "loss": 1.3369, "step": 1685 }, { "epoch": 0.07, "grad_norm": 13.683754087387241, "learning_rate": 2.3839751728029344e-06, "loss": 1.409, "step": 1690 }, { "epoch": 0.07, "grad_norm": 15.348704753527892, "learning_rate": 2.3910283537875584e-06, "loss": 1.4118, "step": 1695 }, { "epoch": 0.07, "grad_norm": 11.542889735095187, "learning_rate": 2.3980815347721824e-06, "loss": 1.3366, "step": 1700 }, { "epoch": 0.07, "grad_norm": 11.025686891831606, "learning_rate": 2.4051347157568065e-06, "loss": 1.3229, "step": 1705 }, { "epoch": 0.07, "grad_norm": 11.533083554714429, "learning_rate": 2.4121878967414305e-06, "loss": 1.3833, "step": 1710 }, { "epoch": 0.07, "grad_norm": 15.737349718281207, "learning_rate": 2.4192410777260545e-06, "loss": 1.3506, "step": 1715 }, { "epoch": 0.07, "grad_norm": 12.545444844779153, "learning_rate": 2.4262942587106785e-06, "loss": 1.4374, "step": 1720 }, { "epoch": 0.07, "grad_norm": 11.782696376867012, "learning_rate": 2.4333474396953025e-06, "loss": 1.3368, "step": 1725 }, { "epoch": 0.07, "grad_norm": 23.511542513936423, "learning_rate": 2.440400620679927e-06, "loss": 1.3859, "step": 1730 }, { "epoch": 0.07, "grad_norm": 22.21156240631286, "learning_rate": 2.447453801664551e-06, "loss": 1.4004, "step": 1735 }, { "epoch": 0.07, "grad_norm": 12.81637150409835, "learning_rate": 2.454506982649175e-06, "loss": 1.3957, "step": 1740 }, { "epoch": 0.07, "grad_norm": 12.369915233483516, "learning_rate": 2.461560163633799e-06, "loss": 1.3727, "step": 1745 }, { "epoch": 0.07, "grad_norm": 15.648441458739722, "learning_rate": 2.468613344618423e-06, "loss": 1.363, "step": 1750 }, { "epoch": 0.07, "grad_norm": 10.931682600920412, "learning_rate": 2.475666525603047e-06, "loss": 1.3587, "step": 1755 }, { "epoch": 0.07, "grad_norm": 34.330012053919155, "learning_rate": 2.482719706587671e-06, "loss": 1.3958, "step": 1760 }, { "epoch": 0.07, "grad_norm": 11.735310404626148, "learning_rate": 2.4897728875722956e-06, "loss": 1.3612, "step": 1765 }, { "epoch": 0.07, "grad_norm": 16.384813410643346, "learning_rate": 2.496826068556919e-06, "loss": 1.3112, "step": 1770 }, { "epoch": 0.08, "grad_norm": 26.838655170379702, "learning_rate": 2.5038792495415436e-06, "loss": 1.3532, "step": 1775 }, { "epoch": 0.08, "grad_norm": 13.706171587310045, "learning_rate": 2.510932430526167e-06, "loss": 1.391, "step": 1780 }, { "epoch": 0.08, "grad_norm": 16.126954660108268, "learning_rate": 2.5179856115107916e-06, "loss": 1.3312, "step": 1785 }, { "epoch": 0.08, "grad_norm": 18.9804836218818, "learning_rate": 2.5250387924954157e-06, "loss": 1.3977, "step": 1790 }, { "epoch": 0.08, "grad_norm": 22.558145099953244, "learning_rate": 2.5320919734800397e-06, "loss": 1.3647, "step": 1795 }, { "epoch": 0.08, "grad_norm": 31.61058690933841, "learning_rate": 2.5391451544646637e-06, "loss": 1.3761, "step": 1800 }, { "epoch": 0.08, "grad_norm": 30.24686401792589, "learning_rate": 2.546198335449288e-06, "loss": 1.3049, "step": 1805 }, { "epoch": 0.08, "grad_norm": 11.651728714815397, "learning_rate": 2.5532515164339118e-06, "loss": 1.3646, "step": 1810 }, { "epoch": 0.08, "grad_norm": 12.507949081652326, "learning_rate": 2.5603046974185358e-06, "loss": 1.3453, "step": 1815 }, { "epoch": 0.08, "grad_norm": 12.895282000896792, "learning_rate": 2.56735787840316e-06, "loss": 1.399, "step": 1820 }, { "epoch": 0.08, "grad_norm": 15.897609064572269, "learning_rate": 2.5744110593877842e-06, "loss": 1.3668, "step": 1825 }, { "epoch": 0.08, "grad_norm": 13.37757192102203, "learning_rate": 2.5814642403724083e-06, "loss": 1.2878, "step": 1830 }, { "epoch": 0.08, "grad_norm": 16.8907097254796, "learning_rate": 2.5885174213570323e-06, "loss": 1.3834, "step": 1835 }, { "epoch": 0.08, "grad_norm": 14.168138053426295, "learning_rate": 2.595570602341656e-06, "loss": 1.3573, "step": 1840 }, { "epoch": 0.08, "grad_norm": 16.758859909730333, "learning_rate": 2.6026237833262803e-06, "loss": 1.3796, "step": 1845 }, { "epoch": 0.08, "grad_norm": 46.55914050794329, "learning_rate": 2.6096769643109043e-06, "loss": 1.4054, "step": 1850 }, { "epoch": 0.08, "grad_norm": 11.077422825170535, "learning_rate": 2.6167301452955284e-06, "loss": 1.3622, "step": 1855 }, { "epoch": 0.08, "grad_norm": 24.461754902624616, "learning_rate": 2.623783326280153e-06, "loss": 1.4014, "step": 1860 }, { "epoch": 0.08, "grad_norm": 11.819181630091125, "learning_rate": 2.630836507264777e-06, "loss": 1.3178, "step": 1865 }, { "epoch": 0.08, "grad_norm": 12.989631124341651, "learning_rate": 2.637889688249401e-06, "loss": 1.394, "step": 1870 }, { "epoch": 0.08, "grad_norm": 63.08141055617478, "learning_rate": 2.6449428692340245e-06, "loss": 1.3432, "step": 1875 }, { "epoch": 0.08, "grad_norm": 95.68367095251443, "learning_rate": 2.651996050218649e-06, "loss": 1.3665, "step": 1880 }, { "epoch": 0.08, "grad_norm": 58.67303782481335, "learning_rate": 2.659049231203273e-06, "loss": 1.3322, "step": 1885 }, { "epoch": 0.08, "grad_norm": 10.602893765265211, "learning_rate": 2.666102412187897e-06, "loss": 1.3146, "step": 1890 }, { "epoch": 0.08, "grad_norm": 18.2506181142999, "learning_rate": 2.673155593172521e-06, "loss": 1.3438, "step": 1895 }, { "epoch": 0.08, "grad_norm": 20.76463669688865, "learning_rate": 2.6802087741571454e-06, "loss": 1.3194, "step": 1900 }, { "epoch": 0.08, "grad_norm": 29.568610153820664, "learning_rate": 2.687261955141769e-06, "loss": 1.3281, "step": 1905 }, { "epoch": 0.08, "grad_norm": 11.562512088861933, "learning_rate": 2.694315136126393e-06, "loss": 1.4566, "step": 1910 }, { "epoch": 0.08, "grad_norm": 14.227205242969235, "learning_rate": 2.701368317111017e-06, "loss": 1.2923, "step": 1915 }, { "epoch": 0.08, "grad_norm": 17.867047764220658, "learning_rate": 2.7084214980956415e-06, "loss": 1.3746, "step": 1920 }, { "epoch": 0.08, "grad_norm": 14.276227086261787, "learning_rate": 2.7154746790802655e-06, "loss": 1.3505, "step": 1925 }, { "epoch": 0.08, "grad_norm": 11.823383199296096, "learning_rate": 2.7225278600648895e-06, "loss": 1.3455, "step": 1930 }, { "epoch": 0.08, "grad_norm": 14.504931851462494, "learning_rate": 2.729581041049514e-06, "loss": 1.3695, "step": 1935 }, { "epoch": 0.08, "grad_norm": 11.210047329054863, "learning_rate": 2.7366342220341376e-06, "loss": 1.3894, "step": 1940 }, { "epoch": 0.08, "grad_norm": 11.882538088996677, "learning_rate": 2.7436874030187616e-06, "loss": 1.4123, "step": 1945 }, { "epoch": 0.08, "grad_norm": 11.991949556805977, "learning_rate": 2.7507405840033856e-06, "loss": 1.3439, "step": 1950 }, { "epoch": 0.08, "grad_norm": 22.073250901092063, "learning_rate": 2.75779376498801e-06, "loss": 1.3315, "step": 1955 }, { "epoch": 0.08, "grad_norm": 22.564408712583464, "learning_rate": 2.764846945972634e-06, "loss": 1.3305, "step": 1960 }, { "epoch": 0.08, "grad_norm": 10.597199108169024, "learning_rate": 2.771900126957258e-06, "loss": 1.3554, "step": 1965 }, { "epoch": 0.08, "grad_norm": 17.32623367921614, "learning_rate": 2.7789533079418817e-06, "loss": 1.3102, "step": 1970 }, { "epoch": 0.08, "grad_norm": 11.800012276730401, "learning_rate": 2.786006488926506e-06, "loss": 1.3767, "step": 1975 }, { "epoch": 0.08, "grad_norm": 14.087888765565582, "learning_rate": 2.79305966991113e-06, "loss": 1.3371, "step": 1980 }, { "epoch": 0.08, "grad_norm": 24.082486116092745, "learning_rate": 2.800112850895754e-06, "loss": 1.3409, "step": 1985 }, { "epoch": 0.08, "grad_norm": 21.58854975477442, "learning_rate": 2.807166031880378e-06, "loss": 1.3658, "step": 1990 }, { "epoch": 0.08, "grad_norm": 10.939736646976643, "learning_rate": 2.8142192128650027e-06, "loss": 1.3868, "step": 1995 }, { "epoch": 0.08, "grad_norm": 15.184029861709678, "learning_rate": 2.8212723938496263e-06, "loss": 1.3823, "step": 2000 }, { "epoch": 0.08, "grad_norm": 12.331610968276019, "learning_rate": 2.8283255748342503e-06, "loss": 1.3486, "step": 2005 }, { "epoch": 0.09, "grad_norm": 11.841051214282952, "learning_rate": 2.8353787558188743e-06, "loss": 1.3259, "step": 2010 }, { "epoch": 0.09, "grad_norm": 10.349318652539013, "learning_rate": 2.8424319368034987e-06, "loss": 1.312, "step": 2015 }, { "epoch": 0.09, "grad_norm": 12.757183957633567, "learning_rate": 2.8494851177881228e-06, "loss": 1.3936, "step": 2020 }, { "epoch": 0.09, "grad_norm": 13.332244529140562, "learning_rate": 2.8565382987727468e-06, "loss": 1.3405, "step": 2025 }, { "epoch": 0.09, "grad_norm": 16.58824888325681, "learning_rate": 2.8635914797573712e-06, "loss": 1.3458, "step": 2030 }, { "epoch": 0.09, "grad_norm": 11.159526982596624, "learning_rate": 2.870644660741995e-06, "loss": 1.3629, "step": 2035 }, { "epoch": 0.09, "grad_norm": 12.955794904060317, "learning_rate": 2.877697841726619e-06, "loss": 1.3312, "step": 2040 }, { "epoch": 0.09, "grad_norm": 26.59938857728824, "learning_rate": 2.884751022711243e-06, "loss": 1.3191, "step": 2045 }, { "epoch": 0.09, "grad_norm": 14.677332217292815, "learning_rate": 2.8918042036958673e-06, "loss": 1.3026, "step": 2050 }, { "epoch": 0.09, "grad_norm": 12.252022834666661, "learning_rate": 2.8988573846804913e-06, "loss": 1.3127, "step": 2055 }, { "epoch": 0.09, "grad_norm": 12.27122412233228, "learning_rate": 2.9059105656651154e-06, "loss": 1.3196, "step": 2060 }, { "epoch": 0.09, "grad_norm": 11.728472202693842, "learning_rate": 2.912963746649739e-06, "loss": 1.3256, "step": 2065 }, { "epoch": 0.09, "grad_norm": 11.852605127202235, "learning_rate": 2.920016927634363e-06, "loss": 1.3346, "step": 2070 }, { "epoch": 0.09, "grad_norm": 11.759742743483143, "learning_rate": 2.9270701086189874e-06, "loss": 1.3265, "step": 2075 }, { "epoch": 0.09, "grad_norm": 15.968779214939827, "learning_rate": 2.9341232896036114e-06, "loss": 1.3381, "step": 2080 }, { "epoch": 0.09, "grad_norm": 11.78630714297429, "learning_rate": 2.9411764705882355e-06, "loss": 1.3242, "step": 2085 }, { "epoch": 0.09, "grad_norm": 11.027645849501496, "learning_rate": 2.94822965157286e-06, "loss": 1.3256, "step": 2090 }, { "epoch": 0.09, "grad_norm": 22.951803114724818, "learning_rate": 2.955282832557484e-06, "loss": 1.3203, "step": 2095 }, { "epoch": 0.09, "grad_norm": 11.702937636560858, "learning_rate": 2.9623360135421075e-06, "loss": 1.3017, "step": 2100 }, { "epoch": 0.09, "grad_norm": 21.60012649866337, "learning_rate": 2.9693891945267315e-06, "loss": 1.3347, "step": 2105 }, { "epoch": 0.09, "grad_norm": 14.825619993648441, "learning_rate": 2.976442375511356e-06, "loss": 1.3325, "step": 2110 }, { "epoch": 0.09, "grad_norm": 11.108079068771541, "learning_rate": 2.98349555649598e-06, "loss": 1.2901, "step": 2115 }, { "epoch": 0.09, "grad_norm": 13.716657113286335, "learning_rate": 2.990548737480604e-06, "loss": 1.3074, "step": 2120 }, { "epoch": 0.09, "grad_norm": 21.610418128877384, "learning_rate": 2.9976019184652285e-06, "loss": 1.3635, "step": 2125 }, { "epoch": 0.09, "grad_norm": 24.76809179113239, "learning_rate": 3.004655099449852e-06, "loss": 1.3328, "step": 2130 }, { "epoch": 0.09, "grad_norm": 20.945111195759797, "learning_rate": 3.011708280434476e-06, "loss": 1.3319, "step": 2135 }, { "epoch": 0.09, "grad_norm": 14.009822169288428, "learning_rate": 3.0187614614191e-06, "loss": 1.3325, "step": 2140 }, { "epoch": 0.09, "grad_norm": 12.687229067901399, "learning_rate": 3.0258146424037246e-06, "loss": 1.3766, "step": 2145 }, { "epoch": 0.09, "grad_norm": 16.42831294095289, "learning_rate": 3.0328678233883486e-06, "loss": 1.3923, "step": 2150 }, { "epoch": 0.09, "grad_norm": 11.283394824788045, "learning_rate": 3.0399210043729726e-06, "loss": 1.3352, "step": 2155 }, { "epoch": 0.09, "grad_norm": 23.440699492833733, "learning_rate": 3.046974185357596e-06, "loss": 1.3345, "step": 2160 }, { "epoch": 0.09, "grad_norm": 12.996365943900992, "learning_rate": 3.0540273663422202e-06, "loss": 1.3508, "step": 2165 }, { "epoch": 0.09, "grad_norm": 15.47001623803198, "learning_rate": 3.0610805473268447e-06, "loss": 1.3227, "step": 2170 }, { "epoch": 0.09, "grad_norm": 14.84822049653563, "learning_rate": 3.0681337283114687e-06, "loss": 1.3236, "step": 2175 }, { "epoch": 0.09, "grad_norm": 37.11621833214017, "learning_rate": 3.0751869092960927e-06, "loss": 1.3245, "step": 2180 }, { "epoch": 0.09, "grad_norm": 11.987006755001374, "learning_rate": 3.082240090280717e-06, "loss": 1.3151, "step": 2185 }, { "epoch": 0.09, "grad_norm": 16.023382053687076, "learning_rate": 3.089293271265341e-06, "loss": 1.3297, "step": 2190 }, { "epoch": 0.09, "grad_norm": 19.444832333451078, "learning_rate": 3.0963464522499648e-06, "loss": 1.2886, "step": 2195 }, { "epoch": 0.09, "grad_norm": 36.22815938531628, "learning_rate": 3.103399633234589e-06, "loss": 1.3297, "step": 2200 }, { "epoch": 0.09, "grad_norm": 12.813592227335402, "learning_rate": 3.1104528142192132e-06, "loss": 1.2639, "step": 2205 }, { "epoch": 0.09, "grad_norm": 18.004689886867, "learning_rate": 3.1175059952038373e-06, "loss": 1.3373, "step": 2210 }, { "epoch": 0.09, "grad_norm": 21.642658462844512, "learning_rate": 3.1245591761884613e-06, "loss": 1.2647, "step": 2215 }, { "epoch": 0.09, "grad_norm": 13.879065452649801, "learning_rate": 3.1316123571730857e-06, "loss": 1.2782, "step": 2220 }, { "epoch": 0.09, "grad_norm": 10.136567194354573, "learning_rate": 3.1386655381577093e-06, "loss": 1.2772, "step": 2225 }, { "epoch": 0.09, "grad_norm": 11.41394232869268, "learning_rate": 3.1457187191423333e-06, "loss": 1.3092, "step": 2230 }, { "epoch": 0.09, "grad_norm": 13.892433684124184, "learning_rate": 3.1527719001269574e-06, "loss": 1.3778, "step": 2235 }, { "epoch": 0.09, "grad_norm": 16.115420760888988, "learning_rate": 3.1598250811115814e-06, "loss": 1.3333, "step": 2240 }, { "epoch": 0.1, "grad_norm": 18.224038237090316, "learning_rate": 3.166878262096206e-06, "loss": 1.3445, "step": 2245 }, { "epoch": 0.1, "grad_norm": 27.409227556066273, "learning_rate": 3.17393144308083e-06, "loss": 1.3295, "step": 2250 }, { "epoch": 0.1, "grad_norm": 41.98539016712513, "learning_rate": 3.180984624065454e-06, "loss": 1.3305, "step": 2255 }, { "epoch": 0.1, "grad_norm": 15.958212387725098, "learning_rate": 3.1880378050500775e-06, "loss": 1.3386, "step": 2260 }, { "epoch": 0.1, "grad_norm": 11.036202828711685, "learning_rate": 3.195090986034702e-06, "loss": 1.3773, "step": 2265 }, { "epoch": 0.1, "grad_norm": 25.47410159501302, "learning_rate": 3.202144167019326e-06, "loss": 1.2733, "step": 2270 }, { "epoch": 0.1, "grad_norm": 15.787436738839402, "learning_rate": 3.20919734800395e-06, "loss": 1.2663, "step": 2275 }, { "epoch": 0.1, "grad_norm": 21.62668559821898, "learning_rate": 3.2162505289885744e-06, "loss": 1.307, "step": 2280 }, { "epoch": 0.1, "grad_norm": 15.521918095272499, "learning_rate": 3.2233037099731984e-06, "loss": 1.3043, "step": 2285 }, { "epoch": 0.1, "grad_norm": 11.8136363287776, "learning_rate": 3.230356890957822e-06, "loss": 1.2959, "step": 2290 }, { "epoch": 0.1, "grad_norm": 13.647769776866582, "learning_rate": 3.237410071942446e-06, "loss": 1.3262, "step": 2295 }, { "epoch": 0.1, "grad_norm": 10.097590129920224, "learning_rate": 3.2444632529270705e-06, "loss": 1.3142, "step": 2300 }, { "epoch": 0.1, "grad_norm": 27.07091015193857, "learning_rate": 3.2515164339116945e-06, "loss": 1.318, "step": 2305 }, { "epoch": 0.1, "grad_norm": 12.830005880702661, "learning_rate": 3.2585696148963185e-06, "loss": 1.3984, "step": 2310 }, { "epoch": 0.1, "grad_norm": 17.729141083621435, "learning_rate": 3.265622795880943e-06, "loss": 1.3169, "step": 2315 }, { "epoch": 0.1, "grad_norm": 25.65087307063273, "learning_rate": 3.2726759768655666e-06, "loss": 1.3101, "step": 2320 }, { "epoch": 0.1, "grad_norm": 13.50130264357366, "learning_rate": 3.2797291578501906e-06, "loss": 1.3188, "step": 2325 }, { "epoch": 0.1, "grad_norm": 22.058211371622484, "learning_rate": 3.2867823388348146e-06, "loss": 1.315, "step": 2330 }, { "epoch": 0.1, "grad_norm": 26.44583527393522, "learning_rate": 3.2938355198194386e-06, "loss": 1.2715, "step": 2335 }, { "epoch": 0.1, "grad_norm": 26.53474816416927, "learning_rate": 3.300888700804063e-06, "loss": 1.294, "step": 2340 }, { "epoch": 0.1, "grad_norm": 14.482297997858216, "learning_rate": 3.307941881788687e-06, "loss": 1.2977, "step": 2345 }, { "epoch": 0.1, "grad_norm": 40.74591393337569, "learning_rate": 3.314995062773311e-06, "loss": 1.3364, "step": 2350 }, { "epoch": 0.1, "grad_norm": 23.502221090287733, "learning_rate": 3.3220482437579347e-06, "loss": 1.3135, "step": 2355 }, { "epoch": 0.1, "grad_norm": 12.981683970958356, "learning_rate": 3.329101424742559e-06, "loss": 1.312, "step": 2360 }, { "epoch": 0.1, "grad_norm": 13.800464338505064, "learning_rate": 3.336154605727183e-06, "loss": 1.2884, "step": 2365 }, { "epoch": 0.1, "grad_norm": 11.931513536142512, "learning_rate": 3.343207786711807e-06, "loss": 1.2874, "step": 2370 }, { "epoch": 0.1, "grad_norm": 12.909096968031482, "learning_rate": 3.3502609676964317e-06, "loss": 1.3575, "step": 2375 }, { "epoch": 0.1, "grad_norm": 12.609320976043588, "learning_rate": 3.3573141486810557e-06, "loss": 1.3592, "step": 2380 }, { "epoch": 0.1, "grad_norm": 14.072613831161481, "learning_rate": 3.3643673296656793e-06, "loss": 1.2885, "step": 2385 }, { "epoch": 0.1, "grad_norm": 12.6865970695013, "learning_rate": 3.3714205106503033e-06, "loss": 1.3017, "step": 2390 }, { "epoch": 0.1, "grad_norm": 12.351657609639986, "learning_rate": 3.3784736916349277e-06, "loss": 1.3181, "step": 2395 }, { "epoch": 0.1, "grad_norm": 13.534039631385772, "learning_rate": 3.3855268726195518e-06, "loss": 1.296, "step": 2400 }, { "epoch": 0.1, "grad_norm": 12.80918629498463, "learning_rate": 3.3925800536041758e-06, "loss": 1.3042, "step": 2405 }, { "epoch": 0.1, "grad_norm": 10.621829378155411, "learning_rate": 3.3996332345888002e-06, "loss": 1.2943, "step": 2410 }, { "epoch": 0.1, "grad_norm": 16.243774254738863, "learning_rate": 3.4066864155734242e-06, "loss": 1.3059, "step": 2415 }, { "epoch": 0.1, "grad_norm": 18.9337711654213, "learning_rate": 3.413739596558048e-06, "loss": 1.3214, "step": 2420 }, { "epoch": 0.1, "grad_norm": 14.242545930087541, "learning_rate": 3.420792777542672e-06, "loss": 1.302, "step": 2425 }, { "epoch": 0.1, "grad_norm": 18.95223396111189, "learning_rate": 3.427845958527296e-06, "loss": 1.2684, "step": 2430 }, { "epoch": 0.1, "grad_norm": 11.658892738145807, "learning_rate": 3.4348991395119203e-06, "loss": 1.333, "step": 2435 }, { "epoch": 0.1, "grad_norm": 15.000832226226128, "learning_rate": 3.4419523204965444e-06, "loss": 1.2924, "step": 2440 }, { "epoch": 0.1, "grad_norm": 19.810483633571383, "learning_rate": 3.4490055014811684e-06, "loss": 1.2569, "step": 2445 }, { "epoch": 0.1, "grad_norm": 13.1723050506315, "learning_rate": 3.456058682465792e-06, "loss": 1.2777, "step": 2450 }, { "epoch": 0.1, "grad_norm": 19.440211606303848, "learning_rate": 3.4631118634504164e-06, "loss": 1.3192, "step": 2455 }, { "epoch": 0.1, "grad_norm": 10.661384646268813, "learning_rate": 3.4701650444350404e-06, "loss": 1.2841, "step": 2460 }, { "epoch": 0.1, "grad_norm": 22.661837522337574, "learning_rate": 3.4772182254196645e-06, "loss": 1.2775, "step": 2465 }, { "epoch": 0.1, "grad_norm": 40.77966132645562, "learning_rate": 3.484271406404289e-06, "loss": 1.3205, "step": 2470 }, { "epoch": 0.1, "grad_norm": 32.84843369664207, "learning_rate": 3.491324587388913e-06, "loss": 1.3226, "step": 2475 }, { "epoch": 0.1, "grad_norm": 15.842673711941524, "learning_rate": 3.4983777683735365e-06, "loss": 1.321, "step": 2480 }, { "epoch": 0.11, "grad_norm": 11.240344484235214, "learning_rate": 3.5054309493581605e-06, "loss": 1.2861, "step": 2485 }, { "epoch": 0.11, "grad_norm": 21.947691749994437, "learning_rate": 3.512484130342785e-06, "loss": 1.2759, "step": 2490 }, { "epoch": 0.11, "grad_norm": 33.778062904136135, "learning_rate": 3.519537311327409e-06, "loss": 1.2348, "step": 2495 }, { "epoch": 0.11, "grad_norm": 23.287923457877422, "learning_rate": 3.526590492312033e-06, "loss": 1.2993, "step": 2500 }, { "epoch": 0.11, "grad_norm": 14.17228249699254, "learning_rate": 3.533643673296657e-06, "loss": 1.2981, "step": 2505 }, { "epoch": 0.11, "grad_norm": 12.469848850375767, "learning_rate": 3.5406968542812815e-06, "loss": 1.3183, "step": 2510 }, { "epoch": 0.11, "grad_norm": 10.146219051893173, "learning_rate": 3.547750035265905e-06, "loss": 1.2639, "step": 2515 }, { "epoch": 0.11, "grad_norm": 23.930408084241922, "learning_rate": 3.554803216250529e-06, "loss": 1.2769, "step": 2520 }, { "epoch": 0.11, "grad_norm": 35.88907171960186, "learning_rate": 3.561856397235153e-06, "loss": 1.2934, "step": 2525 }, { "epoch": 0.11, "grad_norm": 29.89872823061288, "learning_rate": 3.5689095782197776e-06, "loss": 1.3344, "step": 2530 }, { "epoch": 0.11, "grad_norm": 23.836740683850383, "learning_rate": 3.5759627592044016e-06, "loss": 1.3004, "step": 2535 }, { "epoch": 0.11, "grad_norm": 18.70401344073436, "learning_rate": 3.5830159401890256e-06, "loss": 1.3062, "step": 2540 }, { "epoch": 0.11, "grad_norm": 20.62922016464355, "learning_rate": 3.5900691211736492e-06, "loss": 1.2808, "step": 2545 }, { "epoch": 0.11, "grad_norm": 52.97337032778942, "learning_rate": 3.5971223021582737e-06, "loss": 1.2735, "step": 2550 }, { "epoch": 0.11, "grad_norm": 97.69711334654265, "learning_rate": 3.6041754831428977e-06, "loss": 1.3837, "step": 2555 }, { "epoch": 0.11, "grad_norm": 177.5056324394591, "learning_rate": 3.6112286641275217e-06, "loss": 1.4126, "step": 2560 }, { "epoch": 0.11, "grad_norm": 20.872006882419626, "learning_rate": 3.618281845112146e-06, "loss": 1.3315, "step": 2565 }, { "epoch": 0.11, "grad_norm": 39.71345828288374, "learning_rate": 3.62533502609677e-06, "loss": 1.3122, "step": 2570 }, { "epoch": 0.11, "grad_norm": 17.058967582104138, "learning_rate": 3.632388207081394e-06, "loss": 1.3473, "step": 2575 }, { "epoch": 0.11, "grad_norm": 36.7107874365188, "learning_rate": 3.639441388066018e-06, "loss": 1.3599, "step": 2580 }, { "epoch": 0.11, "grad_norm": 29.22607155225058, "learning_rate": 3.6464945690506422e-06, "loss": 1.3297, "step": 2585 }, { "epoch": 0.11, "grad_norm": 20.53354404909482, "learning_rate": 3.6535477500352663e-06, "loss": 1.2778, "step": 2590 }, { "epoch": 0.11, "grad_norm": 9.86006827561748, "learning_rate": 3.6606009310198903e-06, "loss": 1.3521, "step": 2595 }, { "epoch": 0.11, "grad_norm": 11.948199895854632, "learning_rate": 3.6676541120045143e-06, "loss": 1.2731, "step": 2600 }, { "epoch": 0.11, "grad_norm": 9.71692402370183, "learning_rate": 3.6747072929891387e-06, "loss": 1.298, "step": 2605 }, { "epoch": 0.11, "grad_norm": 10.062536899430931, "learning_rate": 3.6817604739737623e-06, "loss": 1.3149, "step": 2610 }, { "epoch": 0.11, "grad_norm": 10.102253390657898, "learning_rate": 3.6888136549583864e-06, "loss": 1.274, "step": 2615 }, { "epoch": 0.11, "grad_norm": 11.642812977658556, "learning_rate": 3.6958668359430104e-06, "loss": 1.265, "step": 2620 }, { "epoch": 0.11, "grad_norm": 10.625803262954769, "learning_rate": 3.702920016927635e-06, "loss": 1.3035, "step": 2625 }, { "epoch": 0.11, "grad_norm": 16.030825998116676, "learning_rate": 3.709973197912259e-06, "loss": 1.2392, "step": 2630 }, { "epoch": 0.11, "grad_norm": 13.259740106001445, "learning_rate": 3.717026378896883e-06, "loss": 1.3278, "step": 2635 }, { "epoch": 0.11, "grad_norm": 11.769848126676907, "learning_rate": 3.7240795598815065e-06, "loss": 1.3199, "step": 2640 }, { "epoch": 0.11, "grad_norm": 14.339422438110708, "learning_rate": 3.731132740866131e-06, "loss": 1.3215, "step": 2645 }, { "epoch": 0.11, "grad_norm": 13.364269568645252, "learning_rate": 3.738185921850755e-06, "loss": 1.2795, "step": 2650 }, { "epoch": 0.11, "grad_norm": 11.673151505722714, "learning_rate": 3.745239102835379e-06, "loss": 1.2879, "step": 2655 }, { "epoch": 0.11, "grad_norm": 10.561347279035587, "learning_rate": 3.7522922838200034e-06, "loss": 1.2966, "step": 2660 }, { "epoch": 0.11, "grad_norm": 10.317004878154123, "learning_rate": 3.7593454648046274e-06, "loss": 1.2809, "step": 2665 }, { "epoch": 0.11, "grad_norm": 10.269771080603558, "learning_rate": 3.7663986457892514e-06, "loss": 1.2365, "step": 2670 }, { "epoch": 0.11, "grad_norm": 18.41904430908563, "learning_rate": 3.773451826773875e-06, "loss": 1.2937, "step": 2675 }, { "epoch": 0.11, "grad_norm": 26.903715456061647, "learning_rate": 3.780505007758499e-06, "loss": 1.3293, "step": 2680 }, { "epoch": 0.11, "grad_norm": 34.5838086355772, "learning_rate": 3.7875581887431235e-06, "loss": 1.273, "step": 2685 }, { "epoch": 0.11, "grad_norm": 16.564786127157202, "learning_rate": 3.7946113697277475e-06, "loss": 1.3264, "step": 2690 }, { "epoch": 0.11, "grad_norm": 15.016589345921918, "learning_rate": 3.8016645507123715e-06, "loss": 1.3289, "step": 2695 }, { "epoch": 0.11, "grad_norm": 16.225391620897856, "learning_rate": 3.808717731696996e-06, "loss": 1.3281, "step": 2700 }, { "epoch": 0.11, "grad_norm": 9.170812110441807, "learning_rate": 3.81577091268162e-06, "loss": 1.2689, "step": 2705 }, { "epoch": 0.11, "grad_norm": 13.85476329953772, "learning_rate": 3.822824093666244e-06, "loss": 1.2498, "step": 2710 }, { "epoch": 0.11, "grad_norm": 10.980331687023341, "learning_rate": 3.829877274650868e-06, "loss": 1.3877, "step": 2715 }, { "epoch": 0.12, "grad_norm": 18.9311116074833, "learning_rate": 3.836930455635492e-06, "loss": 1.2732, "step": 2720 }, { "epoch": 0.12, "grad_norm": 16.65648536273734, "learning_rate": 3.843983636620116e-06, "loss": 1.327, "step": 2725 }, { "epoch": 0.12, "grad_norm": 13.658055479106222, "learning_rate": 3.85103681760474e-06, "loss": 1.2391, "step": 2730 }, { "epoch": 0.12, "grad_norm": 15.267489207192607, "learning_rate": 3.8580899985893646e-06, "loss": 1.2683, "step": 2735 }, { "epoch": 0.12, "grad_norm": 11.675434205406328, "learning_rate": 3.865143179573988e-06, "loss": 1.3038, "step": 2740 }, { "epoch": 0.12, "grad_norm": 43.07447486043644, "learning_rate": 3.872196360558612e-06, "loss": 1.2872, "step": 2745 }, { "epoch": 0.12, "grad_norm": 21.09540935857782, "learning_rate": 3.879249541543236e-06, "loss": 1.284, "step": 2750 }, { "epoch": 0.12, "grad_norm": 13.329659232647545, "learning_rate": 3.886302722527861e-06, "loss": 1.3234, "step": 2755 }, { "epoch": 0.12, "grad_norm": 10.4422946502081, "learning_rate": 3.893355903512484e-06, "loss": 1.2493, "step": 2760 }, { "epoch": 0.12, "grad_norm": 18.457155038757037, "learning_rate": 3.900409084497109e-06, "loss": 1.3149, "step": 2765 }, { "epoch": 0.12, "grad_norm": 19.066991949591987, "learning_rate": 3.907462265481732e-06, "loss": 1.3173, "step": 2770 }, { "epoch": 0.12, "grad_norm": 59.33881414335634, "learning_rate": 3.914515446466357e-06, "loss": 1.2631, "step": 2775 }, { "epoch": 0.12, "grad_norm": 21.26059385079084, "learning_rate": 3.92156862745098e-06, "loss": 1.2353, "step": 2780 }, { "epoch": 0.12, "grad_norm": 11.063165722995702, "learning_rate": 3.928621808435605e-06, "loss": 1.2735, "step": 2785 }, { "epoch": 0.12, "grad_norm": 9.797982570435053, "learning_rate": 3.935674989420229e-06, "loss": 1.3306, "step": 2790 }, { "epoch": 0.12, "grad_norm": 10.183148780642624, "learning_rate": 3.942728170404853e-06, "loss": 1.3298, "step": 2795 }, { "epoch": 0.12, "grad_norm": 10.016617200365182, "learning_rate": 3.949781351389476e-06, "loss": 1.2983, "step": 2800 }, { "epoch": 0.12, "grad_norm": 11.836752394258982, "learning_rate": 3.956834532374101e-06, "loss": 1.3208, "step": 2805 }, { "epoch": 0.12, "grad_norm": 13.785214669426912, "learning_rate": 3.963887713358725e-06, "loss": 1.288, "step": 2810 }, { "epoch": 0.12, "grad_norm": 21.25549862823478, "learning_rate": 3.970940894343349e-06, "loss": 1.2841, "step": 2815 }, { "epoch": 0.12, "grad_norm": 22.864212812801505, "learning_rate": 3.977994075327973e-06, "loss": 1.2843, "step": 2820 }, { "epoch": 0.12, "grad_norm": 17.604115953866582, "learning_rate": 3.985047256312598e-06, "loss": 1.2703, "step": 2825 }, { "epoch": 0.12, "grad_norm": 12.906829408716234, "learning_rate": 3.992100437297221e-06, "loss": 1.344, "step": 2830 }, { "epoch": 0.12, "grad_norm": 14.549679676413833, "learning_rate": 3.999153618281845e-06, "loss": 1.2821, "step": 2835 }, { "epoch": 0.12, "grad_norm": 9.740129414659062, "learning_rate": 4.0062067992664694e-06, "loss": 1.2387, "step": 2840 }, { "epoch": 0.12, "grad_norm": 29.495403707854127, "learning_rate": 4.013259980251094e-06, "loss": 1.2691, "step": 2845 }, { "epoch": 0.12, "grad_norm": 20.795329749765493, "learning_rate": 4.0203131612357175e-06, "loss": 1.2712, "step": 2850 }, { "epoch": 0.12, "grad_norm": 16.695114485986377, "learning_rate": 4.027366342220342e-06, "loss": 1.3347, "step": 2855 }, { "epoch": 0.12, "grad_norm": 21.058774070795085, "learning_rate": 4.034419523204966e-06, "loss": 1.3013, "step": 2860 }, { "epoch": 0.12, "grad_norm": 14.711121636674607, "learning_rate": 4.04147270418959e-06, "loss": 1.2703, "step": 2865 }, { "epoch": 0.12, "grad_norm": 11.891345574399109, "learning_rate": 4.0485258851742136e-06, "loss": 1.2876, "step": 2870 }, { "epoch": 0.12, "grad_norm": 11.003490723314501, "learning_rate": 4.055579066158838e-06, "loss": 1.2898, "step": 2875 }, { "epoch": 0.12, "grad_norm": 10.492300588020184, "learning_rate": 4.0626322471434624e-06, "loss": 1.2768, "step": 2880 }, { "epoch": 0.12, "grad_norm": 21.064936218751935, "learning_rate": 4.069685428128086e-06, "loss": 1.2826, "step": 2885 }, { "epoch": 0.12, "grad_norm": 30.262012568820364, "learning_rate": 4.0767386091127105e-06, "loss": 1.2122, "step": 2890 }, { "epoch": 0.12, "grad_norm": 12.1170416281707, "learning_rate": 4.083791790097334e-06, "loss": 1.3159, "step": 2895 }, { "epoch": 0.12, "grad_norm": 14.470922649541901, "learning_rate": 4.090844971081958e-06, "loss": 1.2255, "step": 2900 }, { "epoch": 0.12, "grad_norm": 92.94573686231264, "learning_rate": 4.097898152066582e-06, "loss": 1.3357, "step": 2905 }, { "epoch": 0.12, "grad_norm": 57.55677343074897, "learning_rate": 4.1049513330512066e-06, "loss": 1.3017, "step": 2910 }, { "epoch": 0.12, "grad_norm": 35.54071332346504, "learning_rate": 4.11200451403583e-06, "loss": 1.3637, "step": 2915 }, { "epoch": 0.12, "grad_norm": 22.551752557418194, "learning_rate": 4.119057695020455e-06, "loss": 1.321, "step": 2920 }, { "epoch": 0.12, "grad_norm": 27.835512236537443, "learning_rate": 4.126110876005079e-06, "loss": 1.2858, "step": 2925 }, { "epoch": 0.12, "grad_norm": 67.08799625233122, "learning_rate": 4.133164056989703e-06, "loss": 1.3391, "step": 2930 }, { "epoch": 0.12, "grad_norm": 80.82188934335447, "learning_rate": 4.140217237974326e-06, "loss": 1.2762, "step": 2935 }, { "epoch": 0.12, "grad_norm": 23.595367431257326, "learning_rate": 4.147270418958951e-06, "loss": 1.3076, "step": 2940 }, { "epoch": 0.12, "grad_norm": 17.486191032838914, "learning_rate": 4.154323599943575e-06, "loss": 1.3045, "step": 2945 }, { "epoch": 0.12, "grad_norm": 47.41869802391103, "learning_rate": 4.161376780928199e-06, "loss": 1.2969, "step": 2950 }, { "epoch": 0.13, "grad_norm": 43.55408239434461, "learning_rate": 4.168429961912823e-06, "loss": 1.3257, "step": 2955 }, { "epoch": 0.13, "grad_norm": 55.65334416941587, "learning_rate": 4.175483142897447e-06, "loss": 1.3091, "step": 2960 }, { "epoch": 0.13, "grad_norm": 42.206429191583105, "learning_rate": 4.182536323882071e-06, "loss": 1.3263, "step": 2965 }, { "epoch": 0.13, "grad_norm": 29.374198431314877, "learning_rate": 4.189589504866695e-06, "loss": 1.2673, "step": 2970 }, { "epoch": 0.13, "grad_norm": 39.630515590905446, "learning_rate": 4.196642685851319e-06, "loss": 1.2646, "step": 2975 }, { "epoch": 0.13, "grad_norm": 31.923398379906896, "learning_rate": 4.203695866835944e-06, "loss": 1.2679, "step": 2980 }, { "epoch": 0.13, "grad_norm": 16.24286810361188, "learning_rate": 4.210749047820567e-06, "loss": 1.2238, "step": 2985 }, { "epoch": 0.13, "grad_norm": 10.078710885293793, "learning_rate": 4.217802228805192e-06, "loss": 1.282, "step": 2990 }, { "epoch": 0.13, "grad_norm": 12.636571569090822, "learning_rate": 4.224855409789815e-06, "loss": 1.2593, "step": 2995 }, { "epoch": 0.13, "grad_norm": 12.233324328700752, "learning_rate": 4.23190859077444e-06, "loss": 1.2586, "step": 3000 }, { "epoch": 0.13, "grad_norm": 11.301653033520429, "learning_rate": 4.238961771759063e-06, "loss": 1.2428, "step": 3005 }, { "epoch": 0.13, "grad_norm": 11.465812370291173, "learning_rate": 4.246014952743688e-06, "loss": 1.2813, "step": 3010 }, { "epoch": 0.13, "grad_norm": 12.080936940122832, "learning_rate": 4.253068133728312e-06, "loss": 1.2739, "step": 3015 }, { "epoch": 0.13, "grad_norm": 11.422997949175425, "learning_rate": 4.260121314712936e-06, "loss": 1.2907, "step": 3020 }, { "epoch": 0.13, "grad_norm": 17.159826171454963, "learning_rate": 4.2671744956975595e-06, "loss": 1.3148, "step": 3025 }, { "epoch": 0.13, "grad_norm": 14.376910478041388, "learning_rate": 4.274227676682184e-06, "loss": 1.3304, "step": 3030 }, { "epoch": 0.13, "grad_norm": 10.327628521308107, "learning_rate": 4.281280857666808e-06, "loss": 1.2522, "step": 3035 }, { "epoch": 0.13, "grad_norm": 11.969648129784302, "learning_rate": 4.288334038651432e-06, "loss": 1.3042, "step": 3040 }, { "epoch": 0.13, "grad_norm": 11.076411629526614, "learning_rate": 4.295387219636056e-06, "loss": 1.3039, "step": 3045 }, { "epoch": 0.13, "grad_norm": 10.21469773823197, "learning_rate": 4.302440400620681e-06, "loss": 1.2547, "step": 3050 }, { "epoch": 0.13, "grad_norm": 9.881882420196233, "learning_rate": 4.3094935816053045e-06, "loss": 1.2522, "step": 3055 }, { "epoch": 0.13, "grad_norm": 17.573570285439917, "learning_rate": 4.316546762589928e-06, "loss": 1.2581, "step": 3060 }, { "epoch": 0.13, "grad_norm": 10.65540407408589, "learning_rate": 4.3235999435745525e-06, "loss": 1.3096, "step": 3065 }, { "epoch": 0.13, "grad_norm": 12.143615295859695, "learning_rate": 4.330653124559177e-06, "loss": 1.2808, "step": 3070 }, { "epoch": 0.13, "grad_norm": 10.781906250128856, "learning_rate": 4.3377063055438005e-06, "loss": 1.1971, "step": 3075 }, { "epoch": 0.13, "grad_norm": 9.198103716610406, "learning_rate": 4.344759486528425e-06, "loss": 1.196, "step": 3080 }, { "epoch": 0.13, "grad_norm": 11.132663311125032, "learning_rate": 4.351812667513049e-06, "loss": 1.2969, "step": 3085 }, { "epoch": 0.13, "grad_norm": 16.392770585766172, "learning_rate": 4.358865848497672e-06, "loss": 1.2861, "step": 3090 }, { "epoch": 0.13, "grad_norm": 15.90090090073315, "learning_rate": 4.365919029482297e-06, "loss": 1.2451, "step": 3095 }, { "epoch": 0.13, "grad_norm": 23.32261999386539, "learning_rate": 4.372972210466921e-06, "loss": 1.2464, "step": 3100 }, { "epoch": 0.13, "grad_norm": 11.003312527261658, "learning_rate": 4.380025391451545e-06, "loss": 1.3055, "step": 3105 }, { "epoch": 0.13, "grad_norm": 9.888227524955175, "learning_rate": 4.387078572436169e-06, "loss": 1.3067, "step": 3110 }, { "epoch": 0.13, "grad_norm": 12.514075282076226, "learning_rate": 4.3941317534207936e-06, "loss": 1.2589, "step": 3115 }, { "epoch": 0.13, "grad_norm": 12.639588585325548, "learning_rate": 4.401184934405417e-06, "loss": 1.2855, "step": 3120 }, { "epoch": 0.13, "grad_norm": 11.186294494085404, "learning_rate": 4.408238115390041e-06, "loss": 1.2921, "step": 3125 }, { "epoch": 0.13, "grad_norm": 24.52326846217789, "learning_rate": 4.415291296374665e-06, "loss": 1.2579, "step": 3130 }, { "epoch": 0.13, "grad_norm": 19.329861694298764, "learning_rate": 4.42234447735929e-06, "loss": 1.2616, "step": 3135 }, { "epoch": 0.13, "grad_norm": 15.993999159722897, "learning_rate": 4.429397658343913e-06, "loss": 1.3041, "step": 3140 }, { "epoch": 0.13, "grad_norm": 11.158261046071381, "learning_rate": 4.436450839328538e-06, "loss": 1.2986, "step": 3145 }, { "epoch": 0.13, "grad_norm": 12.027813476577078, "learning_rate": 4.443504020313162e-06, "loss": 1.3188, "step": 3150 }, { "epoch": 0.13, "grad_norm": 13.649258720805259, "learning_rate": 4.450557201297786e-06, "loss": 1.2454, "step": 3155 }, { "epoch": 0.13, "grad_norm": 10.438215584608244, "learning_rate": 4.457610382282409e-06, "loss": 1.3254, "step": 3160 }, { "epoch": 0.13, "grad_norm": 8.7496660816618, "learning_rate": 4.464663563267034e-06, "loss": 1.2546, "step": 3165 }, { "epoch": 0.13, "grad_norm": 9.496524193277665, "learning_rate": 4.471716744251658e-06, "loss": 1.2651, "step": 3170 }, { "epoch": 0.13, "grad_norm": 9.72691437805028, "learning_rate": 4.478769925236282e-06, "loss": 1.2721, "step": 3175 }, { "epoch": 0.13, "grad_norm": 12.14529539156146, "learning_rate": 4.485823106220906e-06, "loss": 1.2601, "step": 3180 }, { "epoch": 0.13, "grad_norm": 16.457732292094185, "learning_rate": 4.49287628720553e-06, "loss": 1.3189, "step": 3185 }, { "epoch": 0.14, "grad_norm": 16.650494838167653, "learning_rate": 4.499929468190154e-06, "loss": 1.2754, "step": 3190 }, { "epoch": 0.14, "grad_norm": 11.323631987087946, "learning_rate": 4.506982649174778e-06, "loss": 1.2467, "step": 3195 }, { "epoch": 0.14, "grad_norm": 13.011830811647577, "learning_rate": 4.514035830159402e-06, "loss": 1.2685, "step": 3200 }, { "epoch": 0.14, "grad_norm": 10.424442499264432, "learning_rate": 4.521089011144027e-06, "loss": 1.2704, "step": 3205 }, { "epoch": 0.14, "grad_norm": 15.371961437359987, "learning_rate": 4.52814219212865e-06, "loss": 1.3226, "step": 3210 }, { "epoch": 0.14, "grad_norm": 10.835651916357772, "learning_rate": 4.535195373113275e-06, "loss": 1.3091, "step": 3215 }, { "epoch": 0.14, "grad_norm": 17.41319274073559, "learning_rate": 4.5422485540978984e-06, "loss": 1.2568, "step": 3220 }, { "epoch": 0.14, "grad_norm": 12.246287636513452, "learning_rate": 4.549301735082523e-06, "loss": 1.2485, "step": 3225 }, { "epoch": 0.14, "grad_norm": 10.570100016693516, "learning_rate": 4.5563549160671465e-06, "loss": 1.2255, "step": 3230 }, { "epoch": 0.14, "grad_norm": 9.59834089852237, "learning_rate": 4.563408097051771e-06, "loss": 1.2537, "step": 3235 }, { "epoch": 0.14, "grad_norm": 13.954268303022719, "learning_rate": 4.570461278036395e-06, "loss": 1.2545, "step": 3240 }, { "epoch": 0.14, "grad_norm": 14.10277506496896, "learning_rate": 4.577514459021019e-06, "loss": 1.2341, "step": 3245 }, { "epoch": 0.14, "grad_norm": 43.27968295823003, "learning_rate": 4.5845676400056426e-06, "loss": 1.2698, "step": 3250 }, { "epoch": 0.14, "grad_norm": 29.975467048193586, "learning_rate": 4.591620820990267e-06, "loss": 1.2689, "step": 3255 }, { "epoch": 0.14, "grad_norm": 9.975237616706838, "learning_rate": 4.598674001974891e-06, "loss": 1.2746, "step": 3260 }, { "epoch": 0.14, "grad_norm": 10.665770893676218, "learning_rate": 4.605727182959515e-06, "loss": 1.2147, "step": 3265 }, { "epoch": 0.14, "grad_norm": 12.42988049029319, "learning_rate": 4.6127803639441395e-06, "loss": 1.2523, "step": 3270 }, { "epoch": 0.14, "grad_norm": 12.018191519156783, "learning_rate": 4.619833544928763e-06, "loss": 1.2749, "step": 3275 }, { "epoch": 0.14, "grad_norm": 10.441603665958906, "learning_rate": 4.626886725913387e-06, "loss": 1.2419, "step": 3280 }, { "epoch": 0.14, "grad_norm": 51.78100777693077, "learning_rate": 4.633939906898011e-06, "loss": 1.2423, "step": 3285 }, { "epoch": 0.14, "grad_norm": 45.86870898692082, "learning_rate": 4.6409930878826356e-06, "loss": 1.2786, "step": 3290 }, { "epoch": 0.14, "grad_norm": 17.147759385681542, "learning_rate": 4.648046268867259e-06, "loss": 1.2626, "step": 3295 }, { "epoch": 0.14, "grad_norm": 10.82689736554392, "learning_rate": 4.655099449851884e-06, "loss": 1.2384, "step": 3300 }, { "epoch": 0.14, "grad_norm": 10.455156457105254, "learning_rate": 4.662152630836508e-06, "loss": 1.2676, "step": 3305 }, { "epoch": 0.14, "grad_norm": 12.82375637626874, "learning_rate": 4.669205811821132e-06, "loss": 1.2364, "step": 3310 }, { "epoch": 0.14, "grad_norm": 21.085889861009786, "learning_rate": 4.676258992805755e-06, "loss": 1.2543, "step": 3315 }, { "epoch": 0.14, "grad_norm": 13.272611268477121, "learning_rate": 4.68331217379038e-06, "loss": 1.2722, "step": 3320 }, { "epoch": 0.14, "grad_norm": 11.911231828036165, "learning_rate": 4.690365354775004e-06, "loss": 1.2736, "step": 3325 }, { "epoch": 0.14, "grad_norm": 16.693666764497696, "learning_rate": 4.697418535759628e-06, "loss": 1.1832, "step": 3330 }, { "epoch": 0.14, "grad_norm": 23.968076222558604, "learning_rate": 4.704471716744252e-06, "loss": 1.2391, "step": 3335 }, { "epoch": 0.14, "grad_norm": 11.985398361901478, "learning_rate": 4.711524897728877e-06, "loss": 1.3609, "step": 3340 }, { "epoch": 0.14, "grad_norm": 18.990054681752294, "learning_rate": 4.7185780787135e-06, "loss": 1.2265, "step": 3345 }, { "epoch": 0.14, "grad_norm": 15.1148727401013, "learning_rate": 4.725631259698124e-06, "loss": 1.2943, "step": 3350 }, { "epoch": 0.14, "grad_norm": 16.22872713748695, "learning_rate": 4.732684440682748e-06, "loss": 1.2198, "step": 3355 }, { "epoch": 0.14, "grad_norm": 10.445795893427473, "learning_rate": 4.739737621667373e-06, "loss": 1.2157, "step": 3360 }, { "epoch": 0.14, "grad_norm": 12.162977383487279, "learning_rate": 4.746790802651996e-06, "loss": 1.2655, "step": 3365 }, { "epoch": 0.14, "grad_norm": 24.696746213762975, "learning_rate": 4.753843983636621e-06, "loss": 1.2587, "step": 3370 }, { "epoch": 0.14, "grad_norm": 28.10235490138781, "learning_rate": 4.760897164621244e-06, "loss": 1.2917, "step": 3375 }, { "epoch": 0.14, "grad_norm": 20.756286209291872, "learning_rate": 4.767950345605869e-06, "loss": 1.2959, "step": 3380 }, { "epoch": 0.14, "grad_norm": 16.20353117004509, "learning_rate": 4.775003526590492e-06, "loss": 1.2195, "step": 3385 }, { "epoch": 0.14, "grad_norm": 13.189508691118403, "learning_rate": 4.782056707575117e-06, "loss": 1.2833, "step": 3390 }, { "epoch": 0.14, "grad_norm": 9.428530737443314, "learning_rate": 4.789109888559741e-06, "loss": 1.2794, "step": 3395 }, { "epoch": 0.14, "grad_norm": 13.66301189906629, "learning_rate": 4.796163069544365e-06, "loss": 1.2871, "step": 3400 }, { "epoch": 0.14, "grad_norm": 18.450614951492735, "learning_rate": 4.803216250528989e-06, "loss": 1.2987, "step": 3405 }, { "epoch": 0.14, "grad_norm": 9.246211335901181, "learning_rate": 4.810269431513613e-06, "loss": 1.223, "step": 3410 }, { "epoch": 0.14, "grad_norm": 13.113485988685436, "learning_rate": 4.817322612498237e-06, "loss": 1.2844, "step": 3415 }, { "epoch": 0.14, "grad_norm": 12.292734527639393, "learning_rate": 4.824375793482861e-06, "loss": 1.2387, "step": 3420 }, { "epoch": 0.14, "grad_norm": 47.78148910178047, "learning_rate": 4.831428974467485e-06, "loss": 1.3424, "step": 3425 }, { "epoch": 0.15, "grad_norm": 13.890277669697383, "learning_rate": 4.838482155452109e-06, "loss": 1.2322, "step": 3430 }, { "epoch": 0.15, "grad_norm": 17.933300497127828, "learning_rate": 4.8455353364367335e-06, "loss": 1.2728, "step": 3435 }, { "epoch": 0.15, "grad_norm": 22.84721691773643, "learning_rate": 4.852588517421357e-06, "loss": 1.2291, "step": 3440 }, { "epoch": 0.15, "grad_norm": 30.554845364799714, "learning_rate": 4.8596416984059815e-06, "loss": 1.2025, "step": 3445 }, { "epoch": 0.15, "grad_norm": 28.508765751777105, "learning_rate": 4.866694879390605e-06, "loss": 1.238, "step": 3450 }, { "epoch": 0.15, "grad_norm": 49.73294767090246, "learning_rate": 4.8737480603752295e-06, "loss": 1.2829, "step": 3455 }, { "epoch": 0.15, "grad_norm": 39.587702658575665, "learning_rate": 4.880801241359854e-06, "loss": 1.2542, "step": 3460 }, { "epoch": 0.15, "grad_norm": 39.1003622377763, "learning_rate": 4.887854422344478e-06, "loss": 1.249, "step": 3465 }, { "epoch": 0.15, "grad_norm": 27.845414136751447, "learning_rate": 4.894907603329102e-06, "loss": 1.2086, "step": 3470 }, { "epoch": 0.15, "grad_norm": 30.498720085476286, "learning_rate": 4.901960784313726e-06, "loss": 1.2771, "step": 3475 }, { "epoch": 0.15, "grad_norm": 14.786855760143926, "learning_rate": 4.90901396529835e-06, "loss": 1.2083, "step": 3480 }, { "epoch": 0.15, "grad_norm": 36.24396094525753, "learning_rate": 4.916067146282974e-06, "loss": 1.2578, "step": 3485 }, { "epoch": 0.15, "grad_norm": 11.951722024434854, "learning_rate": 4.923120327267598e-06, "loss": 1.2938, "step": 3490 }, { "epoch": 0.15, "grad_norm": 28.62541316775776, "learning_rate": 4.9301735082522226e-06, "loss": 1.2567, "step": 3495 }, { "epoch": 0.15, "grad_norm": 11.409404146967361, "learning_rate": 4.937226689236846e-06, "loss": 1.2897, "step": 3500 }, { "epoch": 0.15, "grad_norm": 9.438793537906815, "learning_rate": 4.94427987022147e-06, "loss": 1.2747, "step": 3505 }, { "epoch": 0.15, "grad_norm": 12.473561091047692, "learning_rate": 4.951333051206094e-06, "loss": 1.2367, "step": 3510 }, { "epoch": 0.15, "grad_norm": 14.08078614053848, "learning_rate": 4.958386232190719e-06, "loss": 1.2957, "step": 3515 }, { "epoch": 0.15, "grad_norm": 12.04613897016436, "learning_rate": 4.965439413175342e-06, "loss": 1.2551, "step": 3520 }, { "epoch": 0.15, "grad_norm": 25.4806371035209, "learning_rate": 4.972492594159967e-06, "loss": 1.278, "step": 3525 }, { "epoch": 0.15, "grad_norm": 14.982113715913048, "learning_rate": 4.979545775144591e-06, "loss": 1.251, "step": 3530 }, { "epoch": 0.15, "grad_norm": 19.33016544355097, "learning_rate": 4.986598956129215e-06, "loss": 1.3304, "step": 3535 }, { "epoch": 0.15, "grad_norm": 20.201567875703628, "learning_rate": 4.993652137113838e-06, "loss": 1.2802, "step": 3540 }, { "epoch": 0.15, "grad_norm": 28.618573898180685, "learning_rate": 5.000705318098464e-06, "loss": 1.2785, "step": 3545 }, { "epoch": 0.15, "grad_norm": 16.972373426929096, "learning_rate": 5.007758499083087e-06, "loss": 1.26, "step": 3550 }, { "epoch": 0.15, "grad_norm": 15.659160389774978, "learning_rate": 5.014811680067711e-06, "loss": 1.2654, "step": 3555 }, { "epoch": 0.15, "grad_norm": 20.954522437050688, "learning_rate": 5.021864861052334e-06, "loss": 1.2917, "step": 3560 }, { "epoch": 0.15, "grad_norm": 19.99441827169673, "learning_rate": 5.028918042036959e-06, "loss": 1.2494, "step": 3565 }, { "epoch": 0.15, "grad_norm": 10.753998533601283, "learning_rate": 5.035971223021583e-06, "loss": 1.2595, "step": 3570 }, { "epoch": 0.15, "grad_norm": 9.763756611272813, "learning_rate": 5.043024404006207e-06, "loss": 1.2143, "step": 3575 }, { "epoch": 0.15, "grad_norm": 27.84702459003064, "learning_rate": 5.050077584990831e-06, "loss": 1.2716, "step": 3580 }, { "epoch": 0.15, "grad_norm": 15.125008774044057, "learning_rate": 5.057130765975456e-06, "loss": 1.3131, "step": 3585 }, { "epoch": 0.15, "grad_norm": 12.479878126083905, "learning_rate": 5.064183946960079e-06, "loss": 1.2692, "step": 3590 }, { "epoch": 0.15, "grad_norm": 8.550931355429048, "learning_rate": 5.071237127944704e-06, "loss": 1.169, "step": 3595 }, { "epoch": 0.15, "grad_norm": 28.478250059292655, "learning_rate": 5.0782903089293274e-06, "loss": 1.2417, "step": 3600 }, { "epoch": 0.15, "grad_norm": 33.8453585371968, "learning_rate": 5.085343489913952e-06, "loss": 1.2972, "step": 3605 }, { "epoch": 0.15, "grad_norm": 29.07758781376888, "learning_rate": 5.092396670898576e-06, "loss": 1.2244, "step": 3610 }, { "epoch": 0.15, "grad_norm": 26.208980724520302, "learning_rate": 5.099449851883199e-06, "loss": 1.2853, "step": 3615 }, { "epoch": 0.15, "grad_norm": 14.281973497746348, "learning_rate": 5.1065030328678235e-06, "loss": 1.2211, "step": 3620 }, { "epoch": 0.15, "grad_norm": 10.128605749862677, "learning_rate": 5.113556213852447e-06, "loss": 1.2624, "step": 3625 }, { "epoch": 0.15, "grad_norm": 14.276090059754628, "learning_rate": 5.1206093948370716e-06, "loss": 1.3062, "step": 3630 }, { "epoch": 0.15, "grad_norm": 27.184153460761387, "learning_rate": 5.127662575821696e-06, "loss": 1.2807, "step": 3635 }, { "epoch": 0.15, "grad_norm": 12.81039743907487, "learning_rate": 5.13471575680632e-06, "loss": 1.2522, "step": 3640 }, { "epoch": 0.15, "grad_norm": 12.300584431242333, "learning_rate": 5.141768937790944e-06, "loss": 1.292, "step": 3645 }, { "epoch": 0.15, "grad_norm": 19.248704558390312, "learning_rate": 5.1488221187755685e-06, "loss": 1.2602, "step": 3650 }, { "epoch": 0.15, "grad_norm": 19.57079862561798, "learning_rate": 5.155875299760192e-06, "loss": 1.2518, "step": 3655 }, { "epoch": 0.15, "grad_norm": 23.026987515970035, "learning_rate": 5.1629284807448165e-06, "loss": 1.2566, "step": 3660 }, { "epoch": 0.16, "grad_norm": 53.242706156792124, "learning_rate": 5.169981661729441e-06, "loss": 1.2915, "step": 3665 }, { "epoch": 0.16, "grad_norm": 22.11577869661808, "learning_rate": 5.1770348427140646e-06, "loss": 1.2245, "step": 3670 }, { "epoch": 0.16, "grad_norm": 33.852659895231234, "learning_rate": 5.184088023698689e-06, "loss": 1.3023, "step": 3675 }, { "epoch": 0.16, "grad_norm": 14.268584865926686, "learning_rate": 5.191141204683312e-06, "loss": 1.2568, "step": 3680 }, { "epoch": 0.16, "grad_norm": 24.821961553526766, "learning_rate": 5.198194385667936e-06, "loss": 1.2488, "step": 3685 }, { "epoch": 0.16, "grad_norm": 24.550111315927886, "learning_rate": 5.205247566652561e-06, "loss": 1.2473, "step": 3690 }, { "epoch": 0.16, "grad_norm": 36.929398031390676, "learning_rate": 5.212300747637184e-06, "loss": 1.3003, "step": 3695 }, { "epoch": 0.16, "grad_norm": 18.268144552737915, "learning_rate": 5.219353928621809e-06, "loss": 1.2627, "step": 3700 }, { "epoch": 0.16, "grad_norm": 8.96899890985399, "learning_rate": 5.226407109606433e-06, "loss": 1.245, "step": 3705 }, { "epoch": 0.16, "grad_norm": 19.226320022504392, "learning_rate": 5.233460290591057e-06, "loss": 1.2526, "step": 3710 }, { "epoch": 0.16, "grad_norm": 20.51427221258509, "learning_rate": 5.240513471575681e-06, "loss": 1.2423, "step": 3715 }, { "epoch": 0.16, "grad_norm": 13.481946559437045, "learning_rate": 5.247566652560306e-06, "loss": 1.2731, "step": 3720 }, { "epoch": 0.16, "grad_norm": 12.848281322736836, "learning_rate": 5.254619833544929e-06, "loss": 1.2452, "step": 3725 }, { "epoch": 0.16, "grad_norm": 11.668540879766415, "learning_rate": 5.261673014529554e-06, "loss": 1.2329, "step": 3730 }, { "epoch": 0.16, "grad_norm": 12.922959391456253, "learning_rate": 5.268726195514178e-06, "loss": 1.2924, "step": 3735 }, { "epoch": 0.16, "grad_norm": 16.70242172595704, "learning_rate": 5.275779376498802e-06, "loss": 1.2388, "step": 3740 }, { "epoch": 0.16, "grad_norm": 41.14582247683044, "learning_rate": 5.282832557483425e-06, "loss": 1.2864, "step": 3745 }, { "epoch": 0.16, "grad_norm": 18.062823082268306, "learning_rate": 5.289885738468049e-06, "loss": 1.2721, "step": 3750 }, { "epoch": 0.16, "grad_norm": 12.554832773331313, "learning_rate": 5.296938919452673e-06, "loss": 1.2511, "step": 3755 }, { "epoch": 0.16, "grad_norm": 14.359285159621118, "learning_rate": 5.303992100437298e-06, "loss": 1.2218, "step": 3760 }, { "epoch": 0.16, "grad_norm": 10.732104143396588, "learning_rate": 5.311045281421921e-06, "loss": 1.2697, "step": 3765 }, { "epoch": 0.16, "grad_norm": 37.23806119058716, "learning_rate": 5.318098462406546e-06, "loss": 1.2736, "step": 3770 }, { "epoch": 0.16, "grad_norm": 13.693945486538784, "learning_rate": 5.3251516433911694e-06, "loss": 1.2675, "step": 3775 }, { "epoch": 0.16, "grad_norm": 34.828089864169726, "learning_rate": 5.332204824375794e-06, "loss": 1.2769, "step": 3780 }, { "epoch": 0.16, "grad_norm": 11.89472386477093, "learning_rate": 5.339258005360418e-06, "loss": 1.2025, "step": 3785 }, { "epoch": 0.16, "grad_norm": 14.172766677676536, "learning_rate": 5.346311186345042e-06, "loss": 1.2306, "step": 3790 }, { "epoch": 0.16, "grad_norm": 13.04400025314236, "learning_rate": 5.353364367329666e-06, "loss": 1.2336, "step": 3795 }, { "epoch": 0.16, "grad_norm": 10.921863451743317, "learning_rate": 5.360417548314291e-06, "loss": 1.193, "step": 3800 }, { "epoch": 0.16, "grad_norm": 14.260150370917506, "learning_rate": 5.367470729298914e-06, "loss": 1.2395, "step": 3805 }, { "epoch": 0.16, "grad_norm": 9.316570987730108, "learning_rate": 5.374523910283538e-06, "loss": 1.238, "step": 3810 }, { "epoch": 0.16, "grad_norm": 12.98288944969821, "learning_rate": 5.381577091268162e-06, "loss": 1.2943, "step": 3815 }, { "epoch": 0.16, "grad_norm": 16.302114164743593, "learning_rate": 5.388630272252786e-06, "loss": 1.2426, "step": 3820 }, { "epoch": 0.16, "grad_norm": 17.591483688383303, "learning_rate": 5.3956834532374105e-06, "loss": 1.2775, "step": 3825 }, { "epoch": 0.16, "grad_norm": 16.87300482551865, "learning_rate": 5.402736634222034e-06, "loss": 1.2676, "step": 3830 }, { "epoch": 0.16, "grad_norm": 24.844493245385017, "learning_rate": 5.4097898152066585e-06, "loss": 1.2578, "step": 3835 }, { "epoch": 0.16, "grad_norm": 8.493827131088455, "learning_rate": 5.416842996191283e-06, "loss": 1.2441, "step": 3840 }, { "epoch": 0.16, "grad_norm": 18.862839267695904, "learning_rate": 5.423896177175907e-06, "loss": 1.2904, "step": 3845 }, { "epoch": 0.16, "grad_norm": 19.51445164871292, "learning_rate": 5.430949358160531e-06, "loss": 1.2498, "step": 3850 }, { "epoch": 0.16, "grad_norm": 15.788641397206522, "learning_rate": 5.4380025391451555e-06, "loss": 1.2382, "step": 3855 }, { "epoch": 0.16, "grad_norm": 42.66853067803099, "learning_rate": 5.445055720129779e-06, "loss": 1.2885, "step": 3860 }, { "epoch": 0.16, "grad_norm": 39.38940734172004, "learning_rate": 5.4521089011144035e-06, "loss": 1.2558, "step": 3865 }, { "epoch": 0.16, "grad_norm": 35.30926800047287, "learning_rate": 5.459162082099028e-06, "loss": 1.2539, "step": 3870 }, { "epoch": 0.16, "grad_norm": 27.121868433439538, "learning_rate": 5.466215263083651e-06, "loss": 1.3113, "step": 3875 }, { "epoch": 0.16, "grad_norm": 12.508865159697022, "learning_rate": 5.473268444068275e-06, "loss": 1.2632, "step": 3880 }, { "epoch": 0.16, "grad_norm": 29.946751869951267, "learning_rate": 5.480321625052899e-06, "loss": 1.2403, "step": 3885 }, { "epoch": 0.16, "grad_norm": 19.98567733859749, "learning_rate": 5.487374806037523e-06, "loss": 1.2952, "step": 3890 }, { "epoch": 0.16, "grad_norm": 19.969233454032903, "learning_rate": 5.494427987022148e-06, "loss": 1.2286, "step": 3895 }, { "epoch": 0.17, "grad_norm": 13.200495931920694, "learning_rate": 5.501481168006771e-06, "loss": 1.2731, "step": 3900 }, { "epoch": 0.17, "grad_norm": 16.287753658655994, "learning_rate": 5.508534348991396e-06, "loss": 1.2445, "step": 3905 }, { "epoch": 0.17, "grad_norm": 12.579639814833723, "learning_rate": 5.51558752997602e-06, "loss": 1.2881, "step": 3910 }, { "epoch": 0.17, "grad_norm": 15.895564369578244, "learning_rate": 5.522640710960644e-06, "loss": 1.2609, "step": 3915 }, { "epoch": 0.17, "grad_norm": 18.593585199742005, "learning_rate": 5.529693891945268e-06, "loss": 1.258, "step": 3920 }, { "epoch": 0.17, "grad_norm": 24.154349961486478, "learning_rate": 5.536747072929893e-06, "loss": 1.2697, "step": 3925 }, { "epoch": 0.17, "grad_norm": 17.826512647308057, "learning_rate": 5.543800253914516e-06, "loss": 1.2619, "step": 3930 }, { "epoch": 0.17, "grad_norm": 29.347512545346877, "learning_rate": 5.55085343489914e-06, "loss": 1.2648, "step": 3935 }, { "epoch": 0.17, "grad_norm": 12.323806259722165, "learning_rate": 5.557906615883763e-06, "loss": 1.289, "step": 3940 }, { "epoch": 0.17, "grad_norm": 38.02571334385203, "learning_rate": 5.564959796868388e-06, "loss": 1.2503, "step": 3945 }, { "epoch": 0.17, "grad_norm": 32.545735306795066, "learning_rate": 5.572012977853012e-06, "loss": 1.2366, "step": 3950 }, { "epoch": 0.17, "grad_norm": 43.70304004193697, "learning_rate": 5.579066158837636e-06, "loss": 1.3085, "step": 3955 }, { "epoch": 0.17, "grad_norm": 14.908420084401357, "learning_rate": 5.58611933982226e-06, "loss": 1.2645, "step": 3960 }, { "epoch": 0.17, "grad_norm": 14.127772130455492, "learning_rate": 5.593172520806884e-06, "loss": 1.2205, "step": 3965 }, { "epoch": 0.17, "grad_norm": 18.196720356489546, "learning_rate": 5.600225701791508e-06, "loss": 1.2641, "step": 3970 }, { "epoch": 0.17, "grad_norm": 51.99348947062528, "learning_rate": 5.607278882776133e-06, "loss": 1.2814, "step": 3975 }, { "epoch": 0.17, "grad_norm": 9.450848090893036, "learning_rate": 5.614332063760756e-06, "loss": 1.2949, "step": 3980 }, { "epoch": 0.17, "grad_norm": 15.347372539755218, "learning_rate": 5.621385244745381e-06, "loss": 1.267, "step": 3985 }, { "epoch": 0.17, "grad_norm": 11.067400551415359, "learning_rate": 5.628438425730005e-06, "loss": 1.2383, "step": 3990 }, { "epoch": 0.17, "grad_norm": 24.396665192217117, "learning_rate": 5.635491606714629e-06, "loss": 1.2776, "step": 3995 }, { "epoch": 0.17, "grad_norm": 37.38209595993001, "learning_rate": 5.6425447876992525e-06, "loss": 1.234, "step": 4000 }, { "epoch": 0.17, "grad_norm": 33.86989983375221, "learning_rate": 5.649597968683876e-06, "loss": 1.2029, "step": 4005 }, { "epoch": 0.17, "grad_norm": 40.47299111083213, "learning_rate": 5.6566511496685005e-06, "loss": 1.2872, "step": 4010 }, { "epoch": 0.17, "grad_norm": 21.74925283256198, "learning_rate": 5.663704330653125e-06, "loss": 1.2281, "step": 4015 }, { "epoch": 0.17, "grad_norm": 18.054164494760023, "learning_rate": 5.670757511637749e-06, "loss": 1.2325, "step": 4020 }, { "epoch": 0.17, "grad_norm": 28.96586779119221, "learning_rate": 5.677810692622373e-06, "loss": 1.2334, "step": 4025 }, { "epoch": 0.17, "grad_norm": 15.218959428611983, "learning_rate": 5.6848638736069975e-06, "loss": 1.2272, "step": 4030 }, { "epoch": 0.17, "grad_norm": 43.124976304431065, "learning_rate": 5.691917054591621e-06, "loss": 1.25, "step": 4035 }, { "epoch": 0.17, "grad_norm": 93.30468133380172, "learning_rate": 5.6989702355762455e-06, "loss": 1.2803, "step": 4040 }, { "epoch": 0.17, "grad_norm": 102.43333571715009, "learning_rate": 5.70602341656087e-06, "loss": 1.2331, "step": 4045 }, { "epoch": 0.17, "grad_norm": 112.03418005270605, "learning_rate": 5.7130765975454936e-06, "loss": 1.2503, "step": 4050 }, { "epoch": 0.17, "grad_norm": 103.91390832861327, "learning_rate": 5.720129778530118e-06, "loss": 1.2606, "step": 4055 }, { "epoch": 0.17, "grad_norm": 16.093811751761326, "learning_rate": 5.7271829595147425e-06, "loss": 1.283, "step": 4060 }, { "epoch": 0.17, "grad_norm": 26.93172767941601, "learning_rate": 5.734236140499365e-06, "loss": 1.2921, "step": 4065 }, { "epoch": 0.17, "grad_norm": 15.177941944522155, "learning_rate": 5.74128932148399e-06, "loss": 1.2806, "step": 4070 }, { "epoch": 0.17, "grad_norm": 29.185596883098917, "learning_rate": 5.748342502468613e-06, "loss": 1.2417, "step": 4075 }, { "epoch": 0.17, "grad_norm": 12.051744305337033, "learning_rate": 5.755395683453238e-06, "loss": 1.2265, "step": 4080 }, { "epoch": 0.17, "grad_norm": 14.057221593617186, "learning_rate": 5.762448864437862e-06, "loss": 1.2887, "step": 4085 }, { "epoch": 0.17, "grad_norm": 16.714811304989844, "learning_rate": 5.769502045422486e-06, "loss": 1.2208, "step": 4090 }, { "epoch": 0.17, "grad_norm": 32.512722645252836, "learning_rate": 5.77655522640711e-06, "loss": 1.2873, "step": 4095 }, { "epoch": 0.17, "grad_norm": 26.843848577592496, "learning_rate": 5.783608407391735e-06, "loss": 1.278, "step": 4100 }, { "epoch": 0.17, "grad_norm": 15.46005842074122, "learning_rate": 5.790661588376358e-06, "loss": 1.2269, "step": 4105 }, { "epoch": 0.17, "grad_norm": 9.109716322565651, "learning_rate": 5.797714769360983e-06, "loss": 1.2814, "step": 4110 }, { "epoch": 0.17, "grad_norm": 7.4481152441406975, "learning_rate": 5.804767950345607e-06, "loss": 1.2517, "step": 4115 }, { "epoch": 0.17, "grad_norm": 18.327012496458657, "learning_rate": 5.811821131330231e-06, "loss": 1.2537, "step": 4120 }, { "epoch": 0.17, "grad_norm": 10.017276109128982, "learning_rate": 5.818874312314855e-06, "loss": 1.2466, "step": 4125 }, { "epoch": 0.17, "grad_norm": 23.34561879987263, "learning_rate": 5.825927493299478e-06, "loss": 1.2587, "step": 4130 }, { "epoch": 0.18, "grad_norm": 11.53598448137646, "learning_rate": 5.832980674284102e-06, "loss": 1.2579, "step": 4135 }, { "epoch": 0.18, "grad_norm": 17.188914641970314, "learning_rate": 5.840033855268726e-06, "loss": 1.2466, "step": 4140 }, { "epoch": 0.18, "grad_norm": 15.657768210567315, "learning_rate": 5.84708703625335e-06, "loss": 1.261, "step": 4145 }, { "epoch": 0.18, "grad_norm": 11.617412272378381, "learning_rate": 5.854140217237975e-06, "loss": 1.2028, "step": 4150 }, { "epoch": 0.18, "grad_norm": 11.837707382428771, "learning_rate": 5.8611933982225984e-06, "loss": 1.2658, "step": 4155 }, { "epoch": 0.18, "grad_norm": 9.968075865608485, "learning_rate": 5.868246579207223e-06, "loss": 1.23, "step": 4160 }, { "epoch": 0.18, "grad_norm": 15.627901389233413, "learning_rate": 5.875299760191847e-06, "loss": 1.2327, "step": 4165 }, { "epoch": 0.18, "grad_norm": 26.98579082603569, "learning_rate": 5.882352941176471e-06, "loss": 1.2567, "step": 4170 }, { "epoch": 0.18, "grad_norm": 25.929086318887546, "learning_rate": 5.889406122161095e-06, "loss": 1.332, "step": 4175 }, { "epoch": 0.18, "grad_norm": 37.879129805501314, "learning_rate": 5.89645930314572e-06, "loss": 1.2862, "step": 4180 }, { "epoch": 0.18, "grad_norm": 42.23786240661063, "learning_rate": 5.903512484130343e-06, "loss": 1.2187, "step": 4185 }, { "epoch": 0.18, "grad_norm": 18.567635063094574, "learning_rate": 5.910565665114968e-06, "loss": 1.3477, "step": 4190 }, { "epoch": 0.18, "grad_norm": 12.925037559476838, "learning_rate": 5.917618846099591e-06, "loss": 1.2657, "step": 4195 }, { "epoch": 0.18, "grad_norm": 10.755363814871327, "learning_rate": 5.924672027084215e-06, "loss": 1.2147, "step": 4200 }, { "epoch": 0.18, "grad_norm": 21.013765422404685, "learning_rate": 5.9317252080688395e-06, "loss": 1.2226, "step": 4205 }, { "epoch": 0.18, "grad_norm": 51.29638914020649, "learning_rate": 5.938778389053463e-06, "loss": 1.297, "step": 4210 }, { "epoch": 0.18, "grad_norm": 57.20210681239939, "learning_rate": 5.9458315700380875e-06, "loss": 1.2776, "step": 4215 }, { "epoch": 0.18, "grad_norm": 48.442410441464226, "learning_rate": 5.952884751022712e-06, "loss": 1.2401, "step": 4220 }, { "epoch": 0.18, "grad_norm": 53.06027896405559, "learning_rate": 5.9599379320073356e-06, "loss": 1.2872, "step": 4225 }, { "epoch": 0.18, "grad_norm": 64.11701466539499, "learning_rate": 5.96699111299196e-06, "loss": 1.2644, "step": 4230 }, { "epoch": 0.18, "grad_norm": 105.4405397871233, "learning_rate": 5.9740442939765845e-06, "loss": 1.3301, "step": 4235 }, { "epoch": 0.18, "grad_norm": 42.761016692177606, "learning_rate": 5.981097474961208e-06, "loss": 1.2817, "step": 4240 }, { "epoch": 0.18, "grad_norm": 132.0515501036898, "learning_rate": 5.9881506559458325e-06, "loss": 1.311, "step": 4245 }, { "epoch": 0.18, "grad_norm": 61.23783719416133, "learning_rate": 5.995203836930457e-06, "loss": 1.2831, "step": 4250 }, { "epoch": 0.18, "grad_norm": 28.3842841786828, "learning_rate": 6.00225701791508e-06, "loss": 1.3447, "step": 4255 }, { "epoch": 0.18, "grad_norm": 34.50311445783356, "learning_rate": 6.009310198899704e-06, "loss": 1.2465, "step": 4260 }, { "epoch": 0.18, "grad_norm": 17.35310049489662, "learning_rate": 6.016363379884328e-06, "loss": 1.2437, "step": 4265 }, { "epoch": 0.18, "grad_norm": 76.98519172566743, "learning_rate": 6.023416560868952e-06, "loss": 1.2907, "step": 4270 }, { "epoch": 0.18, "grad_norm": 19.65288762857618, "learning_rate": 6.030469741853577e-06, "loss": 1.2327, "step": 4275 }, { "epoch": 0.18, "grad_norm": 32.78279868866156, "learning_rate": 6.0375229228382e-06, "loss": 1.2968, "step": 4280 }, { "epoch": 0.18, "grad_norm": 25.930535333766777, "learning_rate": 6.044576103822825e-06, "loss": 1.2477, "step": 4285 }, { "epoch": 0.18, "grad_norm": 22.053943243412213, "learning_rate": 6.051629284807449e-06, "loss": 1.2264, "step": 4290 }, { "epoch": 0.18, "grad_norm": 20.71260152586023, "learning_rate": 6.058682465792073e-06, "loss": 1.2098, "step": 4295 }, { "epoch": 0.18, "grad_norm": 18.252220329487802, "learning_rate": 6.065735646776697e-06, "loss": 1.2694, "step": 4300 }, { "epoch": 0.18, "grad_norm": 11.463870529926188, "learning_rate": 6.072788827761321e-06, "loss": 1.2522, "step": 4305 }, { "epoch": 0.18, "grad_norm": 15.845249553515464, "learning_rate": 6.079842008745945e-06, "loss": 1.2397, "step": 4310 }, { "epoch": 0.18, "grad_norm": 28.731247425252732, "learning_rate": 6.08689518973057e-06, "loss": 1.2727, "step": 4315 }, { "epoch": 0.18, "grad_norm": 8.861653343680853, "learning_rate": 6.093948370715192e-06, "loss": 1.2236, "step": 4320 }, { "epoch": 0.18, "grad_norm": 22.009147926480956, "learning_rate": 6.101001551699817e-06, "loss": 1.3193, "step": 4325 }, { "epoch": 0.18, "grad_norm": 15.038702926390313, "learning_rate": 6.1080547326844404e-06, "loss": 1.2023, "step": 4330 }, { "epoch": 0.18, "grad_norm": 22.729923867980226, "learning_rate": 6.115107913669065e-06, "loss": 1.2688, "step": 4335 }, { "epoch": 0.18, "grad_norm": 20.138365001346727, "learning_rate": 6.122161094653689e-06, "loss": 1.2214, "step": 4340 }, { "epoch": 0.18, "grad_norm": 23.344159254135658, "learning_rate": 6.129214275638313e-06, "loss": 1.2622, "step": 4345 }, { "epoch": 0.18, "grad_norm": 21.288581889146414, "learning_rate": 6.136267456622937e-06, "loss": 1.2784, "step": 4350 }, { "epoch": 0.18, "grad_norm": 20.081998317456286, "learning_rate": 6.143320637607562e-06, "loss": 1.2332, "step": 4355 }, { "epoch": 0.18, "grad_norm": 30.968987023163816, "learning_rate": 6.150373818592185e-06, "loss": 1.25, "step": 4360 }, { "epoch": 0.18, "grad_norm": 12.037895046922486, "learning_rate": 6.15742699957681e-06, "loss": 1.208, "step": 4365 }, { "epoch": 0.18, "grad_norm": 16.508743796682314, "learning_rate": 6.164480180561434e-06, "loss": 1.2668, "step": 4370 }, { "epoch": 0.19, "grad_norm": 18.205816434345365, "learning_rate": 6.171533361546058e-06, "loss": 1.2226, "step": 4375 }, { "epoch": 0.19, "grad_norm": 8.685875478301838, "learning_rate": 6.178586542530682e-06, "loss": 1.269, "step": 4380 }, { "epoch": 0.19, "grad_norm": 20.116569000450617, "learning_rate": 6.185639723515305e-06, "loss": 1.262, "step": 4385 }, { "epoch": 0.19, "grad_norm": 15.502824477099253, "learning_rate": 6.1926929044999295e-06, "loss": 1.2214, "step": 4390 }, { "epoch": 0.19, "grad_norm": 7.737765036321906, "learning_rate": 6.199746085484554e-06, "loss": 1.1868, "step": 4395 }, { "epoch": 0.19, "grad_norm": 27.895069088164966, "learning_rate": 6.206799266469178e-06, "loss": 1.1919, "step": 4400 }, { "epoch": 0.19, "grad_norm": 26.63082356931384, "learning_rate": 6.213852447453802e-06, "loss": 1.2798, "step": 4405 }, { "epoch": 0.19, "grad_norm": 16.70896586815328, "learning_rate": 6.2209056284384265e-06, "loss": 1.2699, "step": 4410 }, { "epoch": 0.19, "grad_norm": 8.802451489350744, "learning_rate": 6.22795880942305e-06, "loss": 1.2491, "step": 4415 }, { "epoch": 0.19, "grad_norm": 11.398986693480657, "learning_rate": 6.2350119904076745e-06, "loss": 1.2717, "step": 4420 }, { "epoch": 0.19, "grad_norm": 8.426557816658939, "learning_rate": 6.242065171392299e-06, "loss": 1.2773, "step": 4425 }, { "epoch": 0.19, "grad_norm": 10.62247766700659, "learning_rate": 6.2491183523769226e-06, "loss": 1.2359, "step": 4430 }, { "epoch": 0.19, "grad_norm": 14.395373937470046, "learning_rate": 6.256171533361547e-06, "loss": 1.212, "step": 4435 }, { "epoch": 0.19, "grad_norm": 10.204125230316254, "learning_rate": 6.2632247143461715e-06, "loss": 1.2624, "step": 4440 }, { "epoch": 0.19, "grad_norm": 19.63742095193836, "learning_rate": 6.270277895330795e-06, "loss": 1.253, "step": 4445 }, { "epoch": 0.19, "grad_norm": 22.14414972404637, "learning_rate": 6.277331076315419e-06, "loss": 1.2406, "step": 4450 }, { "epoch": 0.19, "grad_norm": 9.421229613948904, "learning_rate": 6.284384257300042e-06, "loss": 1.2789, "step": 4455 }, { "epoch": 0.19, "grad_norm": 9.687595585259304, "learning_rate": 6.291437438284667e-06, "loss": 1.2064, "step": 4460 }, { "epoch": 0.19, "grad_norm": 12.25114093177178, "learning_rate": 6.298490619269291e-06, "loss": 1.2548, "step": 4465 }, { "epoch": 0.19, "grad_norm": 17.01156957125535, "learning_rate": 6.305543800253915e-06, "loss": 1.2434, "step": 4470 }, { "epoch": 0.19, "grad_norm": 15.118558932084882, "learning_rate": 6.312596981238539e-06, "loss": 1.2618, "step": 4475 }, { "epoch": 0.19, "grad_norm": 15.110716073860914, "learning_rate": 6.319650162223163e-06, "loss": 1.2546, "step": 4480 }, { "epoch": 0.19, "grad_norm": 10.070031692809758, "learning_rate": 6.326703343207787e-06, "loss": 1.2033, "step": 4485 }, { "epoch": 0.19, "grad_norm": 31.72662568896546, "learning_rate": 6.333756524192412e-06, "loss": 1.2307, "step": 4490 }, { "epoch": 0.19, "grad_norm": 21.302550119851436, "learning_rate": 6.340809705177035e-06, "loss": 1.2006, "step": 4495 }, { "epoch": 0.19, "grad_norm": 17.83523842445794, "learning_rate": 6.34786288616166e-06, "loss": 1.2525, "step": 4500 }, { "epoch": 0.19, "grad_norm": 10.850077169953218, "learning_rate": 6.354916067146284e-06, "loss": 1.2732, "step": 4505 }, { "epoch": 0.19, "grad_norm": 10.170526122594419, "learning_rate": 6.361969248130908e-06, "loss": 1.2551, "step": 4510 }, { "epoch": 0.19, "grad_norm": 20.306824503737438, "learning_rate": 6.369022429115531e-06, "loss": 1.2374, "step": 4515 }, { "epoch": 0.19, "grad_norm": 20.42985922621295, "learning_rate": 6.376075610100155e-06, "loss": 1.2819, "step": 4520 }, { "epoch": 0.19, "grad_norm": 12.818664414829446, "learning_rate": 6.383128791084779e-06, "loss": 1.2594, "step": 4525 }, { "epoch": 0.19, "grad_norm": 10.589621421728435, "learning_rate": 6.390181972069404e-06, "loss": 1.2265, "step": 4530 }, { "epoch": 0.19, "grad_norm": 9.080515844986637, "learning_rate": 6.3972351530540274e-06, "loss": 1.2599, "step": 4535 }, { "epoch": 0.19, "grad_norm": 31.567375012082916, "learning_rate": 6.404288334038652e-06, "loss": 1.2733, "step": 4540 }, { "epoch": 0.19, "grad_norm": 23.8849475472864, "learning_rate": 6.411341515023276e-06, "loss": 1.2053, "step": 4545 }, { "epoch": 0.19, "grad_norm": 26.215994132172288, "learning_rate": 6.4183946960079e-06, "loss": 1.2129, "step": 4550 }, { "epoch": 0.19, "grad_norm": 8.03711583044702, "learning_rate": 6.425447876992524e-06, "loss": 1.2505, "step": 4555 }, { "epoch": 0.19, "grad_norm": 18.32762863686287, "learning_rate": 6.432501057977149e-06, "loss": 1.1944, "step": 4560 }, { "epoch": 0.19, "grad_norm": 18.11227662469492, "learning_rate": 6.439554238961772e-06, "loss": 1.2329, "step": 4565 }, { "epoch": 0.19, "grad_norm": 12.141301098976342, "learning_rate": 6.446607419946397e-06, "loss": 1.2284, "step": 4570 }, { "epoch": 0.19, "grad_norm": 10.574675606298673, "learning_rate": 6.45366060093102e-06, "loss": 1.1957, "step": 4575 }, { "epoch": 0.19, "grad_norm": 21.162667594146992, "learning_rate": 6.460713781915644e-06, "loss": 1.2674, "step": 4580 }, { "epoch": 0.19, "grad_norm": 9.597960415367137, "learning_rate": 6.4677669629002685e-06, "loss": 1.1878, "step": 4585 }, { "epoch": 0.19, "grad_norm": 17.41587230228707, "learning_rate": 6.474820143884892e-06, "loss": 1.2607, "step": 4590 }, { "epoch": 0.19, "grad_norm": 18.275329721851886, "learning_rate": 6.4818733248695165e-06, "loss": 1.2465, "step": 4595 }, { "epoch": 0.19, "grad_norm": 16.4181329184499, "learning_rate": 6.488926505854141e-06, "loss": 1.2292, "step": 4600 }, { "epoch": 0.19, "grad_norm": 8.186165091829702, "learning_rate": 6.4959796868387646e-06, "loss": 1.2496, "step": 4605 }, { "epoch": 0.2, "grad_norm": 15.7939122458535, "learning_rate": 6.503032867823389e-06, "loss": 1.2421, "step": 4610 }, { "epoch": 0.2, "grad_norm": 9.062086815233554, "learning_rate": 6.5100860488080135e-06, "loss": 1.2602, "step": 4615 }, { "epoch": 0.2, "grad_norm": 42.912857207803484, "learning_rate": 6.517139229792637e-06, "loss": 1.2236, "step": 4620 }, { "epoch": 0.2, "grad_norm": 17.971370881200183, "learning_rate": 6.5241924107772615e-06, "loss": 1.2416, "step": 4625 }, { "epoch": 0.2, "grad_norm": 16.229527833171424, "learning_rate": 6.531245591761886e-06, "loss": 1.2604, "step": 4630 }, { "epoch": 0.2, "grad_norm": 32.07525245323547, "learning_rate": 6.5382987727465095e-06, "loss": 1.2442, "step": 4635 }, { "epoch": 0.2, "grad_norm": 26.722295172677985, "learning_rate": 6.545351953731133e-06, "loss": 1.2481, "step": 4640 }, { "epoch": 0.2, "grad_norm": 58.8620086264494, "learning_rate": 6.552405134715757e-06, "loss": 1.2551, "step": 4645 }, { "epoch": 0.2, "grad_norm": 27.105148956024465, "learning_rate": 6.559458315700381e-06, "loss": 1.1924, "step": 4650 }, { "epoch": 0.2, "grad_norm": 37.3996716839102, "learning_rate": 6.566511496685005e-06, "loss": 1.2396, "step": 4655 }, { "epoch": 0.2, "grad_norm": 13.385584996283885, "learning_rate": 6.573564677669629e-06, "loss": 1.2529, "step": 4660 }, { "epoch": 0.2, "grad_norm": 47.27767529483241, "learning_rate": 6.580617858654254e-06, "loss": 1.2648, "step": 4665 }, { "epoch": 0.2, "grad_norm": 16.111911533829485, "learning_rate": 6.587671039638877e-06, "loss": 1.2372, "step": 4670 }, { "epoch": 0.2, "grad_norm": 15.580053924808482, "learning_rate": 6.594724220623502e-06, "loss": 1.2551, "step": 4675 }, { "epoch": 0.2, "grad_norm": 11.07039795529478, "learning_rate": 6.601777401608126e-06, "loss": 1.2241, "step": 4680 }, { "epoch": 0.2, "grad_norm": 9.487965893517133, "learning_rate": 6.60883058259275e-06, "loss": 1.2567, "step": 4685 }, { "epoch": 0.2, "grad_norm": 33.2559515027743, "learning_rate": 6.615883763577374e-06, "loss": 1.2927, "step": 4690 }, { "epoch": 0.2, "grad_norm": 11.423399354802449, "learning_rate": 6.622936944561999e-06, "loss": 1.3167, "step": 4695 }, { "epoch": 0.2, "grad_norm": 15.371956106241857, "learning_rate": 6.629990125546622e-06, "loss": 1.2472, "step": 4700 }, { "epoch": 0.2, "grad_norm": 15.159259831697371, "learning_rate": 6.637043306531246e-06, "loss": 1.2318, "step": 4705 }, { "epoch": 0.2, "grad_norm": 10.773569273743618, "learning_rate": 6.6440964875158694e-06, "loss": 1.2548, "step": 4710 }, { "epoch": 0.2, "grad_norm": 11.07173662577065, "learning_rate": 6.651149668500494e-06, "loss": 1.2435, "step": 4715 }, { "epoch": 0.2, "grad_norm": 8.558139234649046, "learning_rate": 6.658202849485118e-06, "loss": 1.2273, "step": 4720 }, { "epoch": 0.2, "grad_norm": 13.609711847821076, "learning_rate": 6.665256030469742e-06, "loss": 1.2342, "step": 4725 }, { "epoch": 0.2, "grad_norm": 17.724996011129768, "learning_rate": 6.672309211454366e-06, "loss": 1.2351, "step": 4730 }, { "epoch": 0.2, "grad_norm": 28.492096353961507, "learning_rate": 6.679362392438991e-06, "loss": 1.2502, "step": 4735 }, { "epoch": 0.2, "grad_norm": 26.32408910441213, "learning_rate": 6.686415573423614e-06, "loss": 1.2265, "step": 4740 }, { "epoch": 0.2, "grad_norm": 28.482098451695897, "learning_rate": 6.693468754408239e-06, "loss": 1.207, "step": 4745 }, { "epoch": 0.2, "grad_norm": 11.893845252990738, "learning_rate": 6.700521935392863e-06, "loss": 1.2437, "step": 4750 }, { "epoch": 0.2, "grad_norm": 13.485924479755573, "learning_rate": 6.707575116377487e-06, "loss": 1.2426, "step": 4755 }, { "epoch": 0.2, "grad_norm": 22.93788744863069, "learning_rate": 6.714628297362111e-06, "loss": 1.2499, "step": 4760 }, { "epoch": 0.2, "grad_norm": 14.739103812664775, "learning_rate": 6.721681478346736e-06, "loss": 1.2205, "step": 4765 }, { "epoch": 0.2, "grad_norm": 11.269578899710321, "learning_rate": 6.7287346593313585e-06, "loss": 1.2172, "step": 4770 }, { "epoch": 0.2, "grad_norm": 21.467756816152093, "learning_rate": 6.735787840315983e-06, "loss": 1.2394, "step": 4775 }, { "epoch": 0.2, "grad_norm": 8.38554829800853, "learning_rate": 6.742841021300607e-06, "loss": 1.2815, "step": 4780 }, { "epoch": 0.2, "grad_norm": 8.939605925465878, "learning_rate": 6.749894202285231e-06, "loss": 1.2115, "step": 4785 }, { "epoch": 0.2, "grad_norm": 16.987850108768736, "learning_rate": 6.7569473832698555e-06, "loss": 1.2877, "step": 4790 }, { "epoch": 0.2, "grad_norm": 18.723757123880965, "learning_rate": 6.764000564254479e-06, "loss": 1.2158, "step": 4795 }, { "epoch": 0.2, "grad_norm": 23.979174431867218, "learning_rate": 6.7710537452391035e-06, "loss": 1.2259, "step": 4800 }, { "epoch": 0.2, "grad_norm": 16.884194215475357, "learning_rate": 6.778106926223728e-06, "loss": 1.2614, "step": 4805 }, { "epoch": 0.2, "grad_norm": 23.695292180317153, "learning_rate": 6.7851601072083516e-06, "loss": 1.246, "step": 4810 }, { "epoch": 0.2, "grad_norm": 8.64286558091429, "learning_rate": 6.792213288192976e-06, "loss": 1.2334, "step": 4815 }, { "epoch": 0.2, "grad_norm": 15.659020871876756, "learning_rate": 6.7992664691776004e-06, "loss": 1.291, "step": 4820 }, { "epoch": 0.2, "grad_norm": 31.832973137697884, "learning_rate": 6.806319650162224e-06, "loss": 1.2274, "step": 4825 }, { "epoch": 0.2, "grad_norm": 11.279745241859139, "learning_rate": 6.8133728311468485e-06, "loss": 1.2535, "step": 4830 }, { "epoch": 0.2, "grad_norm": 15.281529448929419, "learning_rate": 6.820426012131471e-06, "loss": 1.2132, "step": 4835 }, { "epoch": 0.2, "grad_norm": 14.303496407960964, "learning_rate": 6.827479193116096e-06, "loss": 1.2138, "step": 4840 }, { "epoch": 0.21, "grad_norm": 13.414439438900157, "learning_rate": 6.834532374100719e-06, "loss": 1.2321, "step": 4845 }, { "epoch": 0.21, "grad_norm": 8.958303837167044, "learning_rate": 6.841585555085344e-06, "loss": 1.211, "step": 4850 }, { "epoch": 0.21, "grad_norm": 12.024877483435045, "learning_rate": 6.848638736069968e-06, "loss": 1.2443, "step": 4855 }, { "epoch": 0.21, "grad_norm": 15.751619488887554, "learning_rate": 6.855691917054592e-06, "loss": 1.2187, "step": 4860 }, { "epoch": 0.21, "grad_norm": 11.00348990973335, "learning_rate": 6.862745098039216e-06, "loss": 1.1981, "step": 4865 }, { "epoch": 0.21, "grad_norm": 21.61449142807839, "learning_rate": 6.869798279023841e-06, "loss": 1.2252, "step": 4870 }, { "epoch": 0.21, "grad_norm": 14.811329482264858, "learning_rate": 6.876851460008464e-06, "loss": 1.2429, "step": 4875 }, { "epoch": 0.21, "grad_norm": 11.402305332264874, "learning_rate": 6.883904640993089e-06, "loss": 1.2804, "step": 4880 }, { "epoch": 0.21, "grad_norm": 38.30895830986675, "learning_rate": 6.890957821977713e-06, "loss": 1.2292, "step": 4885 }, { "epoch": 0.21, "grad_norm": 28.730711036505173, "learning_rate": 6.898011002962337e-06, "loss": 1.2022, "step": 4890 }, { "epoch": 0.21, "grad_norm": 16.76296723662178, "learning_rate": 6.90506418394696e-06, "loss": 1.2084, "step": 4895 }, { "epoch": 0.21, "grad_norm": 44.36626252904021, "learning_rate": 6.912117364931584e-06, "loss": 1.2366, "step": 4900 }, { "epoch": 0.21, "grad_norm": 12.203170313920902, "learning_rate": 6.919170545916208e-06, "loss": 1.2631, "step": 4905 }, { "epoch": 0.21, "grad_norm": 14.336109267660797, "learning_rate": 6.926223726900833e-06, "loss": 1.23, "step": 4910 }, { "epoch": 0.21, "grad_norm": 19.385169329304997, "learning_rate": 6.9332769078854564e-06, "loss": 1.2368, "step": 4915 }, { "epoch": 0.21, "grad_norm": 24.41686949163609, "learning_rate": 6.940330088870081e-06, "loss": 1.2557, "step": 4920 }, { "epoch": 0.21, "grad_norm": 18.654871942404327, "learning_rate": 6.947383269854705e-06, "loss": 1.2092, "step": 4925 }, { "epoch": 0.21, "grad_norm": 10.219187386260392, "learning_rate": 6.954436450839329e-06, "loss": 1.2369, "step": 4930 }, { "epoch": 0.21, "grad_norm": 15.045082138446046, "learning_rate": 6.961489631823953e-06, "loss": 1.2341, "step": 4935 }, { "epoch": 0.21, "grad_norm": 9.615703407655676, "learning_rate": 6.968542812808578e-06, "loss": 1.2507, "step": 4940 }, { "epoch": 0.21, "grad_norm": 7.6145670914862915, "learning_rate": 6.975595993793201e-06, "loss": 1.258, "step": 4945 }, { "epoch": 0.21, "grad_norm": 7.66544246130067, "learning_rate": 6.982649174777826e-06, "loss": 1.2604, "step": 4950 }, { "epoch": 0.21, "grad_norm": 34.324393318617716, "learning_rate": 6.98970235576245e-06, "loss": 1.2708, "step": 4955 }, { "epoch": 0.21, "grad_norm": 37.89415729417008, "learning_rate": 6.996755536747073e-06, "loss": 1.2206, "step": 4960 }, { "epoch": 0.21, "grad_norm": 37.586419068795685, "learning_rate": 7.0038087177316975e-06, "loss": 1.2375, "step": 4965 }, { "epoch": 0.21, "grad_norm": 41.293465323629775, "learning_rate": 7.010861898716321e-06, "loss": 1.2625, "step": 4970 }, { "epoch": 0.21, "grad_norm": 27.21765098464862, "learning_rate": 7.0179150797009455e-06, "loss": 1.1929, "step": 4975 }, { "epoch": 0.21, "grad_norm": 9.305652818541972, "learning_rate": 7.02496826068557e-06, "loss": 1.2309, "step": 4980 }, { "epoch": 0.21, "grad_norm": 28.64509065057073, "learning_rate": 7.0320214416701936e-06, "loss": 1.2249, "step": 4985 }, { "epoch": 0.21, "grad_norm": 85.12393967497654, "learning_rate": 7.039074622654818e-06, "loss": 1.2481, "step": 4990 }, { "epoch": 0.21, "grad_norm": 135.8987539009934, "learning_rate": 7.0461278036394425e-06, "loss": 1.2196, "step": 4995 }, { "epoch": 0.21, "grad_norm": 208.07194196806995, "learning_rate": 7.053180984624066e-06, "loss": 1.274, "step": 5000 }, { "epoch": 0.21, "grad_norm": 71.16760199887554, "learning_rate": 7.0602341656086905e-06, "loss": 1.3879, "step": 5005 }, { "epoch": 0.21, "grad_norm": 22.447640267720217, "learning_rate": 7.067287346593314e-06, "loss": 1.3144, "step": 5010 }, { "epoch": 0.21, "grad_norm": 31.382670180597444, "learning_rate": 7.0743405275779385e-06, "loss": 1.2637, "step": 5015 }, { "epoch": 0.21, "grad_norm": 45.52636450521939, "learning_rate": 7.081393708562563e-06, "loss": 1.3118, "step": 5020 }, { "epoch": 0.21, "grad_norm": 43.957279036532334, "learning_rate": 7.088446889547186e-06, "loss": 1.3008, "step": 5025 }, { "epoch": 0.21, "grad_norm": 43.739346603212894, "learning_rate": 7.09550007053181e-06, "loss": 1.2719, "step": 5030 }, { "epoch": 0.21, "grad_norm": 30.362015467775393, "learning_rate": 7.102553251516434e-06, "loss": 1.2414, "step": 5035 }, { "epoch": 0.21, "grad_norm": 26.810874193829417, "learning_rate": 7.109606432501058e-06, "loss": 1.2776, "step": 5040 }, { "epoch": 0.21, "grad_norm": 16.04965719279758, "learning_rate": 7.116659613485683e-06, "loss": 1.2267, "step": 5045 }, { "epoch": 0.21, "grad_norm": 22.832321649941303, "learning_rate": 7.123712794470306e-06, "loss": 1.2251, "step": 5050 }, { "epoch": 0.21, "grad_norm": 12.52005669267623, "learning_rate": 7.130765975454931e-06, "loss": 1.2555, "step": 5055 }, { "epoch": 0.21, "grad_norm": 24.340708031011626, "learning_rate": 7.137819156439555e-06, "loss": 1.2791, "step": 5060 }, { "epoch": 0.21, "grad_norm": 18.919712129577587, "learning_rate": 7.144872337424179e-06, "loss": 1.2454, "step": 5065 }, { "epoch": 0.21, "grad_norm": 12.191742788980482, "learning_rate": 7.151925518408803e-06, "loss": 1.2504, "step": 5070 }, { "epoch": 0.21, "grad_norm": 7.2707869372775065, "learning_rate": 7.158978699393428e-06, "loss": 1.2128, "step": 5075 }, { "epoch": 0.21, "grad_norm": 9.483696456508929, "learning_rate": 7.166031880378051e-06, "loss": 1.2503, "step": 5080 }, { "epoch": 0.22, "grad_norm": 6.904851570847637, "learning_rate": 7.173085061362676e-06, "loss": 1.2405, "step": 5085 }, { "epoch": 0.22, "grad_norm": 10.27068299831643, "learning_rate": 7.1801382423472984e-06, "loss": 1.2255, "step": 5090 }, { "epoch": 0.22, "grad_norm": 12.086551940068498, "learning_rate": 7.187191423331923e-06, "loss": 1.197, "step": 5095 }, { "epoch": 0.22, "grad_norm": 30.351386199272618, "learning_rate": 7.194244604316547e-06, "loss": 1.2506, "step": 5100 }, { "epoch": 0.22, "grad_norm": 25.136033625238245, "learning_rate": 7.201297785301171e-06, "loss": 1.2367, "step": 5105 }, { "epoch": 0.22, "grad_norm": 22.41315672808275, "learning_rate": 7.208350966285795e-06, "loss": 1.1984, "step": 5110 }, { "epoch": 0.22, "grad_norm": 28.215296752301334, "learning_rate": 7.21540414727042e-06, "loss": 1.2647, "step": 5115 }, { "epoch": 0.22, "grad_norm": 35.28103881347845, "learning_rate": 7.222457328255043e-06, "loss": 1.2398, "step": 5120 }, { "epoch": 0.22, "grad_norm": 27.378602433879518, "learning_rate": 7.229510509239668e-06, "loss": 1.231, "step": 5125 }, { "epoch": 0.22, "grad_norm": 18.7609425202486, "learning_rate": 7.236563690224292e-06, "loss": 1.2306, "step": 5130 }, { "epoch": 0.22, "grad_norm": 9.048799249449326, "learning_rate": 7.243616871208916e-06, "loss": 1.226, "step": 5135 }, { "epoch": 0.22, "grad_norm": 19.697886494754847, "learning_rate": 7.25067005219354e-06, "loss": 1.2333, "step": 5140 }, { "epoch": 0.22, "grad_norm": 11.943073511017728, "learning_rate": 7.257723233178165e-06, "loss": 1.2357, "step": 5145 }, { "epoch": 0.22, "grad_norm": 17.29181708016029, "learning_rate": 7.264776414162788e-06, "loss": 1.2389, "step": 5150 }, { "epoch": 0.22, "grad_norm": 9.651865809048662, "learning_rate": 7.271829595147412e-06, "loss": 1.2218, "step": 5155 }, { "epoch": 0.22, "grad_norm": 8.64169189621745, "learning_rate": 7.278882776132036e-06, "loss": 1.2158, "step": 5160 }, { "epoch": 0.22, "grad_norm": 11.11774091818124, "learning_rate": 7.28593595711666e-06, "loss": 1.2415, "step": 5165 }, { "epoch": 0.22, "grad_norm": 10.771656199240232, "learning_rate": 7.2929891381012845e-06, "loss": 1.2472, "step": 5170 }, { "epoch": 0.22, "grad_norm": 15.540803359450853, "learning_rate": 7.300042319085908e-06, "loss": 1.31, "step": 5175 }, { "epoch": 0.22, "grad_norm": 16.61472642623873, "learning_rate": 7.3070955000705325e-06, "loss": 1.3246, "step": 5180 }, { "epoch": 0.22, "grad_norm": 8.1878758336833, "learning_rate": 7.314148681055156e-06, "loss": 1.2258, "step": 5185 }, { "epoch": 0.22, "grad_norm": 13.221182429738512, "learning_rate": 7.3212018620397806e-06, "loss": 1.2575, "step": 5190 }, { "epoch": 0.22, "grad_norm": 22.50872224561072, "learning_rate": 7.328255043024405e-06, "loss": 1.2197, "step": 5195 }, { "epoch": 0.22, "grad_norm": 8.6994195201989, "learning_rate": 7.335308224009029e-06, "loss": 1.234, "step": 5200 }, { "epoch": 0.22, "grad_norm": 10.94860403481735, "learning_rate": 7.342361404993653e-06, "loss": 1.2017, "step": 5205 }, { "epoch": 0.22, "grad_norm": 11.60764621923028, "learning_rate": 7.3494145859782775e-06, "loss": 1.1956, "step": 5210 }, { "epoch": 0.22, "grad_norm": 12.868630296089705, "learning_rate": 7.3564677669629e-06, "loss": 1.2232, "step": 5215 }, { "epoch": 0.22, "grad_norm": 19.60381910504052, "learning_rate": 7.363520947947525e-06, "loss": 1.25, "step": 5220 }, { "epoch": 0.22, "grad_norm": 28.170648681732985, "learning_rate": 7.370574128932148e-06, "loss": 1.2503, "step": 5225 }, { "epoch": 0.22, "grad_norm": 7.710517283293103, "learning_rate": 7.377627309916773e-06, "loss": 1.2602, "step": 5230 }, { "epoch": 0.22, "grad_norm": 11.156412950775163, "learning_rate": 7.384680490901397e-06, "loss": 1.2194, "step": 5235 }, { "epoch": 0.22, "grad_norm": 7.346549065589771, "learning_rate": 7.391733671886021e-06, "loss": 1.2053, "step": 5240 }, { "epoch": 0.22, "grad_norm": 11.05275653516866, "learning_rate": 7.398786852870645e-06, "loss": 1.2435, "step": 5245 }, { "epoch": 0.22, "grad_norm": 14.780765152837011, "learning_rate": 7.40584003385527e-06, "loss": 1.2374, "step": 5250 }, { "epoch": 0.22, "grad_norm": 7.578334485018415, "learning_rate": 7.412893214839893e-06, "loss": 1.1733, "step": 5255 }, { "epoch": 0.22, "grad_norm": 8.089542001474847, "learning_rate": 7.419946395824518e-06, "loss": 1.2053, "step": 5260 }, { "epoch": 0.22, "grad_norm": 13.055737575437313, "learning_rate": 7.426999576809142e-06, "loss": 1.2428, "step": 5265 }, { "epoch": 0.22, "grad_norm": 33.28259990502187, "learning_rate": 7.434052757793766e-06, "loss": 1.1782, "step": 5270 }, { "epoch": 0.22, "grad_norm": 15.629775398546094, "learning_rate": 7.44110593877839e-06, "loss": 1.327, "step": 5275 }, { "epoch": 0.22, "grad_norm": 21.091615298178418, "learning_rate": 7.448159119763013e-06, "loss": 1.2721, "step": 5280 }, { "epoch": 0.22, "grad_norm": 59.36783809856298, "learning_rate": 7.455212300747637e-06, "loss": 1.2424, "step": 5285 }, { "epoch": 0.22, "grad_norm": 20.788459245771993, "learning_rate": 7.462265481732262e-06, "loss": 1.2114, "step": 5290 }, { "epoch": 0.22, "grad_norm": 25.50948987670773, "learning_rate": 7.469318662716885e-06, "loss": 1.2384, "step": 5295 }, { "epoch": 0.22, "grad_norm": 10.722448159244014, "learning_rate": 7.47637184370151e-06, "loss": 1.2698, "step": 5300 }, { "epoch": 0.22, "grad_norm": 15.375770571889714, "learning_rate": 7.483425024686134e-06, "loss": 1.241, "step": 5305 }, { "epoch": 0.22, "grad_norm": 13.258407829634491, "learning_rate": 7.490478205670758e-06, "loss": 1.271, "step": 5310 }, { "epoch": 0.22, "grad_norm": 27.29466633067146, "learning_rate": 7.497531386655382e-06, "loss": 1.2247, "step": 5315 }, { "epoch": 0.23, "grad_norm": 27.39793150373791, "learning_rate": 7.504584567640007e-06, "loss": 1.2847, "step": 5320 }, { "epoch": 0.23, "grad_norm": 14.625293576187255, "learning_rate": 7.51163774862463e-06, "loss": 1.2409, "step": 5325 }, { "epoch": 0.23, "grad_norm": 10.499825077190442, "learning_rate": 7.518690929609255e-06, "loss": 1.2379, "step": 5330 }, { "epoch": 0.23, "grad_norm": 44.09292451305061, "learning_rate": 7.525744110593879e-06, "loss": 1.284, "step": 5335 }, { "epoch": 0.23, "grad_norm": 21.285965713590198, "learning_rate": 7.532797291578503e-06, "loss": 1.2467, "step": 5340 }, { "epoch": 0.23, "grad_norm": 14.644627220773152, "learning_rate": 7.5398504725631265e-06, "loss": 1.1858, "step": 5345 }, { "epoch": 0.23, "grad_norm": 19.663194191382065, "learning_rate": 7.54690365354775e-06, "loss": 1.2171, "step": 5350 }, { "epoch": 0.23, "grad_norm": 28.902397205479353, "learning_rate": 7.5539568345323745e-06, "loss": 1.2271, "step": 5355 }, { "epoch": 0.23, "grad_norm": 18.945930006474786, "learning_rate": 7.561010015516998e-06, "loss": 1.2322, "step": 5360 }, { "epoch": 0.23, "grad_norm": 19.414902357142513, "learning_rate": 7.5680631965016226e-06, "loss": 1.2074, "step": 5365 }, { "epoch": 0.23, "grad_norm": 11.475661376914047, "learning_rate": 7.575116377486247e-06, "loss": 1.2363, "step": 5370 }, { "epoch": 0.23, "grad_norm": 8.999917219708243, "learning_rate": 7.582169558470871e-06, "loss": 1.2628, "step": 5375 }, { "epoch": 0.23, "grad_norm": 38.849038339519595, "learning_rate": 7.589222739455495e-06, "loss": 1.2485, "step": 5380 }, { "epoch": 0.23, "grad_norm": 32.841423621602274, "learning_rate": 7.5962759204401195e-06, "loss": 1.2107, "step": 5385 }, { "epoch": 0.23, "grad_norm": 35.69942150166393, "learning_rate": 7.603329101424743e-06, "loss": 1.2589, "step": 5390 }, { "epoch": 0.23, "grad_norm": 29.696559181952644, "learning_rate": 7.6103822824093675e-06, "loss": 1.2622, "step": 5395 }, { "epoch": 0.23, "grad_norm": 24.542615685281255, "learning_rate": 7.617435463393992e-06, "loss": 1.2789, "step": 5400 }, { "epoch": 0.23, "grad_norm": 37.03208988309521, "learning_rate": 7.624488644378616e-06, "loss": 1.2489, "step": 5405 }, { "epoch": 0.23, "grad_norm": 30.78302835451295, "learning_rate": 7.63154182536324e-06, "loss": 1.246, "step": 5410 }, { "epoch": 0.23, "grad_norm": 17.69008865206168, "learning_rate": 7.638595006347863e-06, "loss": 1.2748, "step": 5415 }, { "epoch": 0.23, "grad_norm": 20.61441193591923, "learning_rate": 7.645648187332488e-06, "loss": 1.2625, "step": 5420 }, { "epoch": 0.23, "grad_norm": 10.080083707736156, "learning_rate": 7.652701368317112e-06, "loss": 1.2568, "step": 5425 }, { "epoch": 0.23, "grad_norm": 14.197557034230073, "learning_rate": 7.659754549301735e-06, "loss": 1.2328, "step": 5430 }, { "epoch": 0.23, "grad_norm": 17.757497801017163, "learning_rate": 7.66680773028636e-06, "loss": 1.2327, "step": 5435 }, { "epoch": 0.23, "grad_norm": 8.237937606222026, "learning_rate": 7.673860911270984e-06, "loss": 1.2932, "step": 5440 }, { "epoch": 0.23, "grad_norm": 26.596692719642242, "learning_rate": 7.680914092255608e-06, "loss": 1.2155, "step": 5445 }, { "epoch": 0.23, "grad_norm": 29.475003336477855, "learning_rate": 7.687967273240231e-06, "loss": 1.2566, "step": 5450 }, { "epoch": 0.23, "grad_norm": 47.35898381503337, "learning_rate": 7.695020454224857e-06, "loss": 1.2041, "step": 5455 }, { "epoch": 0.23, "grad_norm": 41.63328925060982, "learning_rate": 7.70207363520948e-06, "loss": 1.199, "step": 5460 }, { "epoch": 0.23, "grad_norm": 48.10794058752825, "learning_rate": 7.709126816194104e-06, "loss": 1.226, "step": 5465 }, { "epoch": 0.23, "grad_norm": 7.496571253075856, "learning_rate": 7.716179997178729e-06, "loss": 1.1894, "step": 5470 }, { "epoch": 0.23, "grad_norm": 24.74008078473553, "learning_rate": 7.723233178163351e-06, "loss": 1.2284, "step": 5475 }, { "epoch": 0.23, "grad_norm": 13.09872035218063, "learning_rate": 7.730286359147976e-06, "loss": 1.292, "step": 5480 }, { "epoch": 0.23, "grad_norm": 7.937516307018183, "learning_rate": 7.7373395401326e-06, "loss": 1.2219, "step": 5485 }, { "epoch": 0.23, "grad_norm": 15.708157872424051, "learning_rate": 7.744392721117224e-06, "loss": 1.2542, "step": 5490 }, { "epoch": 0.23, "grad_norm": 32.82070351482054, "learning_rate": 7.751445902101849e-06, "loss": 1.2413, "step": 5495 }, { "epoch": 0.23, "grad_norm": 45.352652386867184, "learning_rate": 7.758499083086472e-06, "loss": 1.2461, "step": 5500 }, { "epoch": 0.23, "grad_norm": 23.682529662802693, "learning_rate": 7.765552264071096e-06, "loss": 1.2513, "step": 5505 }, { "epoch": 0.23, "grad_norm": 13.026179777053374, "learning_rate": 7.772605445055721e-06, "loss": 1.2086, "step": 5510 }, { "epoch": 0.23, "grad_norm": 20.451632989280416, "learning_rate": 7.779658626040345e-06, "loss": 1.2593, "step": 5515 }, { "epoch": 0.23, "grad_norm": 8.645583127940919, "learning_rate": 7.786711807024968e-06, "loss": 1.2267, "step": 5520 }, { "epoch": 0.23, "grad_norm": 18.17246112408486, "learning_rate": 7.793764988009594e-06, "loss": 1.2759, "step": 5525 }, { "epoch": 0.23, "grad_norm": 57.88720436051201, "learning_rate": 7.800818168994217e-06, "loss": 1.2603, "step": 5530 }, { "epoch": 0.23, "grad_norm": 32.99982230022284, "learning_rate": 7.807871349978841e-06, "loss": 1.2291, "step": 5535 }, { "epoch": 0.23, "grad_norm": 9.907954453742963, "learning_rate": 7.814924530963465e-06, "loss": 1.2425, "step": 5540 }, { "epoch": 0.23, "grad_norm": 9.755041842092039, "learning_rate": 7.821977711948088e-06, "loss": 1.2465, "step": 5545 }, { "epoch": 0.23, "grad_norm": 11.108776012805647, "learning_rate": 7.829030892932713e-06, "loss": 1.1827, "step": 5550 }, { "epoch": 0.24, "grad_norm": 8.051574743245526, "learning_rate": 7.836084073917337e-06, "loss": 1.2208, "step": 5555 }, { "epoch": 0.24, "grad_norm": 8.29578232029804, "learning_rate": 7.84313725490196e-06, "loss": 1.2522, "step": 5560 }, { "epoch": 0.24, "grad_norm": 8.437022803264277, "learning_rate": 7.850190435886586e-06, "loss": 1.2787, "step": 5565 }, { "epoch": 0.24, "grad_norm": 7.813191051935494, "learning_rate": 7.85724361687121e-06, "loss": 1.23, "step": 5570 }, { "epoch": 0.24, "grad_norm": 20.982176454917052, "learning_rate": 7.864296797855833e-06, "loss": 1.21, "step": 5575 }, { "epoch": 0.24, "grad_norm": 38.91391833811104, "learning_rate": 7.871349978840458e-06, "loss": 1.2275, "step": 5580 }, { "epoch": 0.24, "grad_norm": 25.652714703934958, "learning_rate": 7.878403159825082e-06, "loss": 1.2645, "step": 5585 }, { "epoch": 0.24, "grad_norm": 12.586967924272614, "learning_rate": 7.885456340809706e-06, "loss": 1.2101, "step": 5590 }, { "epoch": 0.24, "grad_norm": 14.759683978221286, "learning_rate": 7.892509521794331e-06, "loss": 1.2695, "step": 5595 }, { "epoch": 0.24, "grad_norm": 14.594071720655114, "learning_rate": 7.899562702778953e-06, "loss": 1.2081, "step": 5600 }, { "epoch": 0.24, "grad_norm": 17.557672653758722, "learning_rate": 7.906615883763578e-06, "loss": 1.2413, "step": 5605 }, { "epoch": 0.24, "grad_norm": 10.303184324542405, "learning_rate": 7.913669064748202e-06, "loss": 1.2112, "step": 5610 }, { "epoch": 0.24, "grad_norm": 16.093487008474828, "learning_rate": 7.920722245732825e-06, "loss": 1.2247, "step": 5615 }, { "epoch": 0.24, "grad_norm": 24.14998033564898, "learning_rate": 7.92777542671745e-06, "loss": 1.2365, "step": 5620 }, { "epoch": 0.24, "grad_norm": 57.246181883266686, "learning_rate": 7.934828607702074e-06, "loss": 1.256, "step": 5625 }, { "epoch": 0.24, "grad_norm": 11.939623103628394, "learning_rate": 7.941881788686698e-06, "loss": 1.2494, "step": 5630 }, { "epoch": 0.24, "grad_norm": 26.782811993135432, "learning_rate": 7.948934969671323e-06, "loss": 1.2393, "step": 5635 }, { "epoch": 0.24, "grad_norm": 80.56850881605354, "learning_rate": 7.955988150655947e-06, "loss": 1.2408, "step": 5640 }, { "epoch": 0.24, "grad_norm": 81.83284909740311, "learning_rate": 7.96304133164057e-06, "loss": 1.2026, "step": 5645 }, { "epoch": 0.24, "grad_norm": 9.815992929128662, "learning_rate": 7.970094512625196e-06, "loss": 1.2516, "step": 5650 }, { "epoch": 0.24, "grad_norm": 9.051358772845925, "learning_rate": 7.97714769360982e-06, "loss": 1.2117, "step": 5655 }, { "epoch": 0.24, "grad_norm": 13.070032008142176, "learning_rate": 7.984200874594443e-06, "loss": 1.2234, "step": 5660 }, { "epoch": 0.24, "grad_norm": 26.233579984973666, "learning_rate": 7.991254055579066e-06, "loss": 1.235, "step": 5665 }, { "epoch": 0.24, "grad_norm": 9.280753878087175, "learning_rate": 7.99830723656369e-06, "loss": 1.2167, "step": 5670 }, { "epoch": 0.24, "grad_norm": 10.019445593955554, "learning_rate": 8.005360417548315e-06, "loss": 1.2258, "step": 5675 }, { "epoch": 0.24, "grad_norm": 22.261336225693217, "learning_rate": 8.012413598532939e-06, "loss": 1.272, "step": 5680 }, { "epoch": 0.24, "grad_norm": 13.891613852460367, "learning_rate": 8.019466779517562e-06, "loss": 1.2047, "step": 5685 }, { "epoch": 0.24, "grad_norm": 11.411480507776263, "learning_rate": 8.026519960502188e-06, "loss": 1.2797, "step": 5690 }, { "epoch": 0.24, "grad_norm": 35.70797685198074, "learning_rate": 8.033573141486811e-06, "loss": 1.2431, "step": 5695 }, { "epoch": 0.24, "grad_norm": 11.769754151648415, "learning_rate": 8.040626322471435e-06, "loss": 1.2125, "step": 5700 }, { "epoch": 0.24, "grad_norm": 11.950516163892479, "learning_rate": 8.04767950345606e-06, "loss": 1.2535, "step": 5705 }, { "epoch": 0.24, "grad_norm": 14.365075035102075, "learning_rate": 8.054732684440684e-06, "loss": 1.2078, "step": 5710 }, { "epoch": 0.24, "grad_norm": 13.740482274832386, "learning_rate": 8.061785865425307e-06, "loss": 1.2612, "step": 5715 }, { "epoch": 0.24, "grad_norm": 9.74218161219491, "learning_rate": 8.068839046409933e-06, "loss": 1.2607, "step": 5720 }, { "epoch": 0.24, "grad_norm": 10.590193525435973, "learning_rate": 8.075892227394556e-06, "loss": 1.2207, "step": 5725 }, { "epoch": 0.24, "grad_norm": 10.94878125209523, "learning_rate": 8.08294540837918e-06, "loss": 1.2702, "step": 5730 }, { "epoch": 0.24, "grad_norm": 14.180769665936394, "learning_rate": 8.089998589363804e-06, "loss": 1.2464, "step": 5735 }, { "epoch": 0.24, "grad_norm": 9.462500006829307, "learning_rate": 8.097051770348427e-06, "loss": 1.2221, "step": 5740 }, { "epoch": 0.24, "grad_norm": 11.413536579040953, "learning_rate": 8.104104951333052e-06, "loss": 1.2738, "step": 5745 }, { "epoch": 0.24, "grad_norm": 12.131398238975681, "learning_rate": 8.111158132317676e-06, "loss": 1.2675, "step": 5750 }, { "epoch": 0.24, "grad_norm": 9.714857970314563, "learning_rate": 8.1182113133023e-06, "loss": 1.2363, "step": 5755 }, { "epoch": 0.24, "grad_norm": 18.14180250016712, "learning_rate": 8.125264494286925e-06, "loss": 1.221, "step": 5760 }, { "epoch": 0.24, "grad_norm": 13.905825663762378, "learning_rate": 8.132317675271548e-06, "loss": 1.1805, "step": 5765 }, { "epoch": 0.24, "grad_norm": 7.844830230607571, "learning_rate": 8.139370856256172e-06, "loss": 1.2156, "step": 5770 }, { "epoch": 0.24, "grad_norm": 8.475546565266745, "learning_rate": 8.146424037240797e-06, "loss": 1.253, "step": 5775 }, { "epoch": 0.24, "grad_norm": 11.619127623301695, "learning_rate": 8.153477218225421e-06, "loss": 1.2056, "step": 5780 }, { "epoch": 0.24, "grad_norm": 17.777988691308142, "learning_rate": 8.160530399210045e-06, "loss": 1.2465, "step": 5785 }, { "epoch": 0.25, "grad_norm": 10.56102143403454, "learning_rate": 8.167583580194668e-06, "loss": 1.2206, "step": 5790 }, { "epoch": 0.25, "grad_norm": 9.398107937329991, "learning_rate": 8.174636761179292e-06, "loss": 1.297, "step": 5795 }, { "epoch": 0.25, "grad_norm": 12.610804066433388, "learning_rate": 8.181689942163915e-06, "loss": 1.2474, "step": 5800 }, { "epoch": 0.25, "grad_norm": 12.748263168704009, "learning_rate": 8.18874312314854e-06, "loss": 1.1945, "step": 5805 }, { "epoch": 0.25, "grad_norm": 22.716600253053812, "learning_rate": 8.195796304133164e-06, "loss": 1.2332, "step": 5810 }, { "epoch": 0.25, "grad_norm": 10.54240303361471, "learning_rate": 8.202849485117788e-06, "loss": 1.2739, "step": 5815 }, { "epoch": 0.25, "grad_norm": 11.835208832050242, "learning_rate": 8.209902666102413e-06, "loss": 1.2132, "step": 5820 }, { "epoch": 0.25, "grad_norm": 24.55848121281398, "learning_rate": 8.216955847087037e-06, "loss": 1.254, "step": 5825 }, { "epoch": 0.25, "grad_norm": 28.473766456855103, "learning_rate": 8.22400902807166e-06, "loss": 1.2187, "step": 5830 }, { "epoch": 0.25, "grad_norm": 6.971680188358672, "learning_rate": 8.231062209056286e-06, "loss": 1.2146, "step": 5835 }, { "epoch": 0.25, "grad_norm": 29.133127126204222, "learning_rate": 8.23811539004091e-06, "loss": 1.2497, "step": 5840 }, { "epoch": 0.25, "grad_norm": 28.868645770345797, "learning_rate": 8.245168571025533e-06, "loss": 1.2448, "step": 5845 }, { "epoch": 0.25, "grad_norm": 8.03794233105637, "learning_rate": 8.252221752010158e-06, "loss": 1.2285, "step": 5850 }, { "epoch": 0.25, "grad_norm": 29.153293390917938, "learning_rate": 8.25927493299478e-06, "loss": 1.2408, "step": 5855 }, { "epoch": 0.25, "grad_norm": 16.821871459501633, "learning_rate": 8.266328113979405e-06, "loss": 1.251, "step": 5860 }, { "epoch": 0.25, "grad_norm": 23.098621548804456, "learning_rate": 8.273381294964029e-06, "loss": 1.2557, "step": 5865 }, { "epoch": 0.25, "grad_norm": 33.754388048352, "learning_rate": 8.280434475948653e-06, "loss": 1.2415, "step": 5870 }, { "epoch": 0.25, "grad_norm": 17.433105404568604, "learning_rate": 8.287487656933278e-06, "loss": 1.2526, "step": 5875 }, { "epoch": 0.25, "grad_norm": 9.112171234106306, "learning_rate": 8.294540837917901e-06, "loss": 1.2705, "step": 5880 }, { "epoch": 0.25, "grad_norm": 27.981100316425948, "learning_rate": 8.301594018902525e-06, "loss": 1.2355, "step": 5885 }, { "epoch": 0.25, "grad_norm": 16.9682915748727, "learning_rate": 8.30864719988715e-06, "loss": 1.2916, "step": 5890 }, { "epoch": 0.25, "grad_norm": 9.561662484568016, "learning_rate": 8.315700380871774e-06, "loss": 1.2448, "step": 5895 }, { "epoch": 0.25, "grad_norm": 13.83498002391689, "learning_rate": 8.322753561856397e-06, "loss": 1.2385, "step": 5900 }, { "epoch": 0.25, "grad_norm": 41.10703286431987, "learning_rate": 8.329806742841023e-06, "loss": 1.2532, "step": 5905 }, { "epoch": 0.25, "grad_norm": 41.24506108790113, "learning_rate": 8.336859923825646e-06, "loss": 1.2591, "step": 5910 }, { "epoch": 0.25, "grad_norm": 11.094347359313005, "learning_rate": 8.34391310481027e-06, "loss": 1.1914, "step": 5915 }, { "epoch": 0.25, "grad_norm": 10.222070668505308, "learning_rate": 8.350966285794894e-06, "loss": 1.2535, "step": 5920 }, { "epoch": 0.25, "grad_norm": 40.26337514147059, "learning_rate": 8.358019466779517e-06, "loss": 1.2681, "step": 5925 }, { "epoch": 0.25, "grad_norm": 10.333942163534006, "learning_rate": 8.365072647764142e-06, "loss": 1.2403, "step": 5930 }, { "epoch": 0.25, "grad_norm": 32.37180920346308, "learning_rate": 8.372125828748766e-06, "loss": 1.2256, "step": 5935 }, { "epoch": 0.25, "grad_norm": 8.203347877396613, "learning_rate": 8.37917900973339e-06, "loss": 1.2219, "step": 5940 }, { "epoch": 0.25, "grad_norm": 14.855367688307293, "learning_rate": 8.386232190718015e-06, "loss": 1.2552, "step": 5945 }, { "epoch": 0.25, "grad_norm": 31.131371072859213, "learning_rate": 8.393285371702639e-06, "loss": 1.233, "step": 5950 }, { "epoch": 0.25, "grad_norm": 21.566081614895563, "learning_rate": 8.400338552687262e-06, "loss": 1.217, "step": 5955 }, { "epoch": 0.25, "grad_norm": 24.159453225969166, "learning_rate": 8.407391733671887e-06, "loss": 1.2386, "step": 5960 }, { "epoch": 0.25, "grad_norm": 33.781014459154264, "learning_rate": 8.414444914656511e-06, "loss": 1.2681, "step": 5965 }, { "epoch": 0.25, "grad_norm": 51.31754001607082, "learning_rate": 8.421498095641135e-06, "loss": 1.2287, "step": 5970 }, { "epoch": 0.25, "grad_norm": 14.985695551876963, "learning_rate": 8.42855127662576e-06, "loss": 1.2531, "step": 5975 }, { "epoch": 0.25, "grad_norm": 9.859082281561756, "learning_rate": 8.435604457610384e-06, "loss": 1.2726, "step": 5980 }, { "epoch": 0.25, "grad_norm": 23.738347826502846, "learning_rate": 8.442657638595007e-06, "loss": 1.3014, "step": 5985 }, { "epoch": 0.25, "grad_norm": 80.91375473200746, "learning_rate": 8.44971081957963e-06, "loss": 1.2805, "step": 5990 }, { "epoch": 0.25, "grad_norm": 75.49345948735171, "learning_rate": 8.456764000564254e-06, "loss": 1.2187, "step": 5995 }, { "epoch": 0.25, "grad_norm": 10.493516296256855, "learning_rate": 8.46381718154888e-06, "loss": 1.2316, "step": 6000 }, { "epoch": 0.25, "grad_norm": 13.678058973313023, "learning_rate": 8.470870362533503e-06, "loss": 1.2581, "step": 6005 }, { "epoch": 0.25, "grad_norm": 36.77265560264806, "learning_rate": 8.477923543518127e-06, "loss": 1.2469, "step": 6010 }, { "epoch": 0.25, "grad_norm": 40.824198800378696, "learning_rate": 8.484976724502752e-06, "loss": 1.2639, "step": 6015 }, { "epoch": 0.25, "grad_norm": 22.046645380727732, "learning_rate": 8.492029905487376e-06, "loss": 1.2586, "step": 6020 }, { "epoch": 0.25, "grad_norm": 13.701584889580545, "learning_rate": 8.499083086472e-06, "loss": 1.2471, "step": 6025 }, { "epoch": 0.26, "grad_norm": 38.999248208946106, "learning_rate": 8.506136267456625e-06, "loss": 1.2623, "step": 6030 }, { "epoch": 0.26, "grad_norm": 27.593393111353738, "learning_rate": 8.513189448441248e-06, "loss": 1.2096, "step": 6035 }, { "epoch": 0.26, "grad_norm": 24.224788728771827, "learning_rate": 8.520242629425872e-06, "loss": 1.2188, "step": 6040 }, { "epoch": 0.26, "grad_norm": 12.235996778586854, "learning_rate": 8.527295810410497e-06, "loss": 1.2542, "step": 6045 }, { "epoch": 0.26, "grad_norm": 29.151698753961035, "learning_rate": 8.534348991395119e-06, "loss": 1.2384, "step": 6050 }, { "epoch": 0.26, "grad_norm": 34.17689264705203, "learning_rate": 8.541402172379744e-06, "loss": 1.2338, "step": 6055 }, { "epoch": 0.26, "grad_norm": 7.745395446213094, "learning_rate": 8.548455353364368e-06, "loss": 1.2153, "step": 6060 }, { "epoch": 0.26, "grad_norm": 32.48340329821858, "learning_rate": 8.555508534348991e-06, "loss": 1.2329, "step": 6065 }, { "epoch": 0.26, "grad_norm": 51.215089575102965, "learning_rate": 8.562561715333617e-06, "loss": 1.2319, "step": 6070 }, { "epoch": 0.26, "grad_norm": 24.584438140553477, "learning_rate": 8.56961489631824e-06, "loss": 1.2332, "step": 6075 }, { "epoch": 0.26, "grad_norm": 11.967200680729418, "learning_rate": 8.576668077302864e-06, "loss": 1.254, "step": 6080 }, { "epoch": 0.26, "grad_norm": 38.11061044214363, "learning_rate": 8.58372125828749e-06, "loss": 1.2552, "step": 6085 }, { "epoch": 0.26, "grad_norm": 54.682670316095404, "learning_rate": 8.590774439272113e-06, "loss": 1.2329, "step": 6090 }, { "epoch": 0.26, "grad_norm": 14.26925850042772, "learning_rate": 8.597827620256736e-06, "loss": 1.2312, "step": 6095 }, { "epoch": 0.26, "grad_norm": 18.7128259255319, "learning_rate": 8.604880801241362e-06, "loss": 1.2597, "step": 6100 }, { "epoch": 0.26, "grad_norm": 12.500827664986156, "learning_rate": 8.611933982225985e-06, "loss": 1.2203, "step": 6105 }, { "epoch": 0.26, "grad_norm": 6.919881059791322, "learning_rate": 8.618987163210609e-06, "loss": 1.2393, "step": 6110 }, { "epoch": 0.26, "grad_norm": 21.491534830197576, "learning_rate": 8.626040344195233e-06, "loss": 1.223, "step": 6115 }, { "epoch": 0.26, "grad_norm": 13.02363809501699, "learning_rate": 8.633093525179856e-06, "loss": 1.2377, "step": 6120 }, { "epoch": 0.26, "grad_norm": 16.420006385311563, "learning_rate": 8.640146706164481e-06, "loss": 1.2609, "step": 6125 }, { "epoch": 0.26, "grad_norm": 20.282643488172756, "learning_rate": 8.647199887149105e-06, "loss": 1.2317, "step": 6130 }, { "epoch": 0.26, "grad_norm": 8.959388297459531, "learning_rate": 8.654253068133729e-06, "loss": 1.1877, "step": 6135 }, { "epoch": 0.26, "grad_norm": 13.17308093008255, "learning_rate": 8.661306249118354e-06, "loss": 1.2313, "step": 6140 }, { "epoch": 0.26, "grad_norm": 9.94757800224602, "learning_rate": 8.668359430102977e-06, "loss": 1.282, "step": 6145 }, { "epoch": 0.26, "grad_norm": 21.881167665155324, "learning_rate": 8.675412611087601e-06, "loss": 1.213, "step": 6150 }, { "epoch": 0.26, "grad_norm": 9.648062439059323, "learning_rate": 8.682465792072225e-06, "loss": 1.2579, "step": 6155 }, { "epoch": 0.26, "grad_norm": 8.385133852783163, "learning_rate": 8.68951897305685e-06, "loss": 1.1988, "step": 6160 }, { "epoch": 0.26, "grad_norm": 9.878996002885085, "learning_rate": 8.696572154041474e-06, "loss": 1.2769, "step": 6165 }, { "epoch": 0.26, "grad_norm": 24.433149853698712, "learning_rate": 8.703625335026097e-06, "loss": 1.2139, "step": 6170 }, { "epoch": 0.26, "grad_norm": 8.331256218984516, "learning_rate": 8.71067851601072e-06, "loss": 1.2681, "step": 6175 }, { "epoch": 0.26, "grad_norm": 14.511879388422308, "learning_rate": 8.717731696995344e-06, "loss": 1.2452, "step": 6180 }, { "epoch": 0.26, "grad_norm": 17.812720243099903, "learning_rate": 8.72478487797997e-06, "loss": 1.229, "step": 6185 }, { "epoch": 0.26, "grad_norm": 16.770009248289686, "learning_rate": 8.731838058964593e-06, "loss": 1.247, "step": 6190 }, { "epoch": 0.26, "grad_norm": 57.829209743218975, "learning_rate": 8.738891239949217e-06, "loss": 1.3041, "step": 6195 }, { "epoch": 0.26, "grad_norm": 99.97774675141999, "learning_rate": 8.745944420933842e-06, "loss": 1.1974, "step": 6200 }, { "epoch": 0.26, "grad_norm": 54.45871800251744, "learning_rate": 8.752997601918466e-06, "loss": 1.2489, "step": 6205 }, { "epoch": 0.26, "grad_norm": 25.189597602922817, "learning_rate": 8.76005078290309e-06, "loss": 1.2836, "step": 6210 }, { "epoch": 0.26, "grad_norm": 20.13169430046746, "learning_rate": 8.767103963887715e-06, "loss": 1.2246, "step": 6215 }, { "epoch": 0.26, "grad_norm": 73.49461140527075, "learning_rate": 8.774157144872338e-06, "loss": 1.2681, "step": 6220 }, { "epoch": 0.26, "grad_norm": 7.636753707659049, "learning_rate": 8.781210325856962e-06, "loss": 1.2503, "step": 6225 }, { "epoch": 0.26, "grad_norm": 13.662445957353112, "learning_rate": 8.788263506841587e-06, "loss": 1.1943, "step": 6230 }, { "epoch": 0.26, "grad_norm": 12.197452669599823, "learning_rate": 8.79531668782621e-06, "loss": 1.2352, "step": 6235 }, { "epoch": 0.26, "grad_norm": 19.297528218532758, "learning_rate": 8.802369868810834e-06, "loss": 1.2723, "step": 6240 }, { "epoch": 0.26, "grad_norm": 20.29766624538617, "learning_rate": 8.809423049795458e-06, "loss": 1.287, "step": 6245 }, { "epoch": 0.26, "grad_norm": 9.211322098722844, "learning_rate": 8.816476230780082e-06, "loss": 1.2602, "step": 6250 }, { "epoch": 0.26, "grad_norm": 41.46066578954416, "learning_rate": 8.823529411764707e-06, "loss": 1.2413, "step": 6255 }, { "epoch": 0.26, "grad_norm": 8.084909680587566, "learning_rate": 8.83058259274933e-06, "loss": 1.2291, "step": 6260 }, { "epoch": 0.27, "grad_norm": 18.44768238199645, "learning_rate": 8.837635773733954e-06, "loss": 1.2437, "step": 6265 }, { "epoch": 0.27, "grad_norm": 10.24746135596397, "learning_rate": 8.84468895471858e-06, "loss": 1.2853, "step": 6270 }, { "epoch": 0.27, "grad_norm": 14.996544223196262, "learning_rate": 8.851742135703203e-06, "loss": 1.2332, "step": 6275 }, { "epoch": 0.27, "grad_norm": 11.337923811698634, "learning_rate": 8.858795316687826e-06, "loss": 1.2634, "step": 6280 }, { "epoch": 0.27, "grad_norm": 26.86281771116737, "learning_rate": 8.865848497672452e-06, "loss": 1.2536, "step": 6285 }, { "epoch": 0.27, "grad_norm": 13.650889930117474, "learning_rate": 8.872901678657075e-06, "loss": 1.237, "step": 6290 }, { "epoch": 0.27, "grad_norm": 15.607836824309675, "learning_rate": 8.879954859641699e-06, "loss": 1.2362, "step": 6295 }, { "epoch": 0.27, "grad_norm": 16.340547701232726, "learning_rate": 8.887008040626324e-06, "loss": 1.231, "step": 6300 }, { "epoch": 0.27, "grad_norm": 10.975445501193548, "learning_rate": 8.894061221610946e-06, "loss": 1.2056, "step": 6305 }, { "epoch": 0.27, "grad_norm": 9.705302544141029, "learning_rate": 8.901114402595571e-06, "loss": 1.2842, "step": 6310 }, { "epoch": 0.27, "grad_norm": 12.47465157592828, "learning_rate": 8.908167583580195e-06, "loss": 1.1873, "step": 6315 }, { "epoch": 0.27, "grad_norm": 8.636478594612388, "learning_rate": 8.915220764564819e-06, "loss": 1.3166, "step": 6320 }, { "epoch": 0.27, "grad_norm": 12.77143050385025, "learning_rate": 8.922273945549444e-06, "loss": 1.2491, "step": 6325 }, { "epoch": 0.27, "grad_norm": 12.06744959236745, "learning_rate": 8.929327126534068e-06, "loss": 1.2689, "step": 6330 }, { "epoch": 0.27, "grad_norm": 12.405857344209824, "learning_rate": 8.936380307518691e-06, "loss": 1.2329, "step": 6335 }, { "epoch": 0.27, "grad_norm": 37.69327845881934, "learning_rate": 8.943433488503316e-06, "loss": 1.2392, "step": 6340 }, { "epoch": 0.27, "grad_norm": 24.45051235208879, "learning_rate": 8.95048666948794e-06, "loss": 1.3396, "step": 6345 }, { "epoch": 0.27, "grad_norm": 13.613444741263459, "learning_rate": 8.957539850472564e-06, "loss": 1.2743, "step": 6350 }, { "epoch": 0.27, "grad_norm": 22.628575415059263, "learning_rate": 8.964593031457189e-06, "loss": 1.2392, "step": 6355 }, { "epoch": 0.27, "grad_norm": 11.993092754354002, "learning_rate": 8.971646212441813e-06, "loss": 1.2434, "step": 6360 }, { "epoch": 0.27, "grad_norm": 10.008685196231513, "learning_rate": 8.978699393426436e-06, "loss": 1.2424, "step": 6365 }, { "epoch": 0.27, "grad_norm": 22.11892486800647, "learning_rate": 8.98575257441106e-06, "loss": 1.2522, "step": 6370 }, { "epoch": 0.27, "grad_norm": 16.695346758875903, "learning_rate": 8.992805755395683e-06, "loss": 1.2533, "step": 6375 }, { "epoch": 0.27, "grad_norm": 20.153682954502635, "learning_rate": 8.999858936380309e-06, "loss": 1.2763, "step": 6380 }, { "epoch": 0.27, "grad_norm": 29.32547480642583, "learning_rate": 9.006912117364932e-06, "loss": 1.2429, "step": 6385 }, { "epoch": 0.27, "grad_norm": 56.28021316586544, "learning_rate": 9.013965298349556e-06, "loss": 1.2663, "step": 6390 }, { "epoch": 0.27, "grad_norm": 16.2498082014171, "learning_rate": 9.021018479334181e-06, "loss": 1.2698, "step": 6395 }, { "epoch": 0.27, "grad_norm": 12.103811062616357, "learning_rate": 9.028071660318805e-06, "loss": 1.2241, "step": 6400 }, { "epoch": 0.27, "grad_norm": 22.002297844588433, "learning_rate": 9.035124841303428e-06, "loss": 1.2484, "step": 6405 }, { "epoch": 0.27, "grad_norm": 45.690699292059094, "learning_rate": 9.042178022288054e-06, "loss": 1.2834, "step": 6410 }, { "epoch": 0.27, "grad_norm": 24.57463607698267, "learning_rate": 9.049231203272677e-06, "loss": 1.2319, "step": 6415 }, { "epoch": 0.27, "grad_norm": 9.18168321404906, "learning_rate": 9.0562843842573e-06, "loss": 1.2147, "step": 6420 }, { "epoch": 0.27, "grad_norm": 20.25467353498429, "learning_rate": 9.063337565241926e-06, "loss": 1.2572, "step": 6425 }, { "epoch": 0.27, "grad_norm": 19.442951622927744, "learning_rate": 9.07039074622655e-06, "loss": 1.2408, "step": 6430 }, { "epoch": 0.27, "grad_norm": 12.571936240030306, "learning_rate": 9.077443927211173e-06, "loss": 1.228, "step": 6435 }, { "epoch": 0.27, "grad_norm": 10.258119211497204, "learning_rate": 9.084497108195797e-06, "loss": 1.2615, "step": 6440 }, { "epoch": 0.27, "grad_norm": 8.547620224390553, "learning_rate": 9.09155028918042e-06, "loss": 1.2115, "step": 6445 }, { "epoch": 0.27, "grad_norm": 8.738589655965189, "learning_rate": 9.098603470165046e-06, "loss": 1.2314, "step": 6450 }, { "epoch": 0.27, "grad_norm": 12.209345287249919, "learning_rate": 9.10565665114967e-06, "loss": 1.2512, "step": 6455 }, { "epoch": 0.27, "grad_norm": 8.23917268726962, "learning_rate": 9.112709832134293e-06, "loss": 1.2604, "step": 6460 }, { "epoch": 0.27, "grad_norm": 8.885138688466684, "learning_rate": 9.119763013118918e-06, "loss": 1.1988, "step": 6465 }, { "epoch": 0.27, "grad_norm": 20.387843405529964, "learning_rate": 9.126816194103542e-06, "loss": 1.2722, "step": 6470 }, { "epoch": 0.27, "grad_norm": 8.078784511863649, "learning_rate": 9.133869375088165e-06, "loss": 1.2903, "step": 6475 }, { "epoch": 0.27, "grad_norm": 40.183169101161546, "learning_rate": 9.14092255607279e-06, "loss": 1.2208, "step": 6480 }, { "epoch": 0.27, "grad_norm": 19.525544665540576, "learning_rate": 9.147975737057414e-06, "loss": 1.2695, "step": 6485 }, { "epoch": 0.27, "grad_norm": 8.910153420949271, "learning_rate": 9.155028918042038e-06, "loss": 1.21, "step": 6490 }, { "epoch": 0.27, "grad_norm": 39.21260531852875, "learning_rate": 9.162082099026662e-06, "loss": 1.2776, "step": 6495 }, { "epoch": 0.28, "grad_norm": 15.92815337795953, "learning_rate": 9.169135280011285e-06, "loss": 1.2124, "step": 6500 }, { "epoch": 0.28, "grad_norm": 26.03417346214362, "learning_rate": 9.176188460995909e-06, "loss": 1.2389, "step": 6505 }, { "epoch": 0.28, "grad_norm": 99.72050497541237, "learning_rate": 9.183241641980534e-06, "loss": 1.243, "step": 6510 }, { "epoch": 0.28, "grad_norm": 46.92661924822619, "learning_rate": 9.190294822965158e-06, "loss": 1.2116, "step": 6515 }, { "epoch": 0.28, "grad_norm": 13.603076940015615, "learning_rate": 9.197348003949781e-06, "loss": 1.2536, "step": 6520 }, { "epoch": 0.28, "grad_norm": 40.0576205211251, "learning_rate": 9.204401184934406e-06, "loss": 1.2216, "step": 6525 }, { "epoch": 0.28, "grad_norm": 10.887788606086115, "learning_rate": 9.21145436591903e-06, "loss": 1.2492, "step": 6530 }, { "epoch": 0.28, "grad_norm": 20.05273742866196, "learning_rate": 9.218507546903654e-06, "loss": 1.2406, "step": 6535 }, { "epoch": 0.28, "grad_norm": 22.823627519107824, "learning_rate": 9.225560727888279e-06, "loss": 1.2376, "step": 6540 }, { "epoch": 0.28, "grad_norm": 13.041483215640545, "learning_rate": 9.232613908872903e-06, "loss": 1.269, "step": 6545 }, { "epoch": 0.28, "grad_norm": 34.13540556399942, "learning_rate": 9.239667089857526e-06, "loss": 1.2383, "step": 6550 }, { "epoch": 0.28, "grad_norm": 18.320458508306157, "learning_rate": 9.246720270842151e-06, "loss": 1.2276, "step": 6555 }, { "epoch": 0.28, "grad_norm": 7.5330072443733656, "learning_rate": 9.253773451826773e-06, "loss": 1.2875, "step": 6560 }, { "epoch": 0.28, "grad_norm": 9.27703167706361, "learning_rate": 9.260826632811399e-06, "loss": 1.2494, "step": 6565 }, { "epoch": 0.28, "grad_norm": 14.531046672259434, "learning_rate": 9.267879813796022e-06, "loss": 1.2304, "step": 6570 }, { "epoch": 0.28, "grad_norm": 15.248786361276295, "learning_rate": 9.274932994780646e-06, "loss": 1.2394, "step": 6575 }, { "epoch": 0.28, "grad_norm": 27.714492410526507, "learning_rate": 9.281986175765271e-06, "loss": 1.2613, "step": 6580 }, { "epoch": 0.28, "grad_norm": 19.51501165108399, "learning_rate": 9.289039356749895e-06, "loss": 1.2815, "step": 6585 }, { "epoch": 0.28, "grad_norm": 25.436359038800738, "learning_rate": 9.296092537734518e-06, "loss": 1.2282, "step": 6590 }, { "epoch": 0.28, "grad_norm": 43.23269701575712, "learning_rate": 9.303145718719144e-06, "loss": 1.2262, "step": 6595 }, { "epoch": 0.28, "grad_norm": 21.101540281939712, "learning_rate": 9.310198899703767e-06, "loss": 1.2463, "step": 6600 }, { "epoch": 0.28, "grad_norm": 34.49422984799425, "learning_rate": 9.31725208068839e-06, "loss": 1.2238, "step": 6605 }, { "epoch": 0.28, "grad_norm": 33.850133561648185, "learning_rate": 9.324305261673016e-06, "loss": 1.2259, "step": 6610 }, { "epoch": 0.28, "grad_norm": 10.220414096605742, "learning_rate": 9.33135844265764e-06, "loss": 1.26, "step": 6615 }, { "epoch": 0.28, "grad_norm": 10.9514579009943, "learning_rate": 9.338411623642263e-06, "loss": 1.2266, "step": 6620 }, { "epoch": 0.28, "grad_norm": 11.419869255961212, "learning_rate": 9.345464804626887e-06, "loss": 1.2271, "step": 6625 }, { "epoch": 0.28, "grad_norm": 9.086626967310414, "learning_rate": 9.35251798561151e-06, "loss": 1.226, "step": 6630 }, { "epoch": 0.28, "grad_norm": 11.506884833581381, "learning_rate": 9.359571166596136e-06, "loss": 1.2283, "step": 6635 }, { "epoch": 0.28, "grad_norm": 7.91163949574279, "learning_rate": 9.36662434758076e-06, "loss": 1.2172, "step": 6640 }, { "epoch": 0.28, "grad_norm": 12.803823993791173, "learning_rate": 9.373677528565383e-06, "loss": 1.2227, "step": 6645 }, { "epoch": 0.28, "grad_norm": 32.0430165125269, "learning_rate": 9.380730709550008e-06, "loss": 1.2504, "step": 6650 }, { "epoch": 0.28, "grad_norm": 44.66390322780334, "learning_rate": 9.387783890534632e-06, "loss": 1.252, "step": 6655 }, { "epoch": 0.28, "grad_norm": 97.49006407157172, "learning_rate": 9.394837071519255e-06, "loss": 1.2577, "step": 6660 }, { "epoch": 0.28, "grad_norm": 62.61288477401787, "learning_rate": 9.40189025250388e-06, "loss": 1.2364, "step": 6665 }, { "epoch": 0.28, "grad_norm": 25.024011085526986, "learning_rate": 9.408943433488504e-06, "loss": 1.3067, "step": 6670 }, { "epoch": 0.28, "grad_norm": 69.01029904561533, "learning_rate": 9.415996614473128e-06, "loss": 1.3036, "step": 6675 }, { "epoch": 0.28, "grad_norm": 25.084797032255793, "learning_rate": 9.423049795457753e-06, "loss": 1.2838, "step": 6680 }, { "epoch": 0.28, "grad_norm": 19.60869619954398, "learning_rate": 9.430102976442377e-06, "loss": 1.2856, "step": 6685 }, { "epoch": 0.28, "grad_norm": 11.290648170649316, "learning_rate": 9.437156157427e-06, "loss": 1.2291, "step": 6690 }, { "epoch": 0.28, "grad_norm": 15.222827943551566, "learning_rate": 9.444209338411624e-06, "loss": 1.2397, "step": 6695 }, { "epoch": 0.28, "grad_norm": 9.133795897181006, "learning_rate": 9.451262519396248e-06, "loss": 1.2603, "step": 6700 }, { "epoch": 0.28, "grad_norm": 7.681778696999336, "learning_rate": 9.458315700380873e-06, "loss": 1.2379, "step": 6705 }, { "epoch": 0.28, "grad_norm": 10.5031107852674, "learning_rate": 9.465368881365497e-06, "loss": 1.2541, "step": 6710 }, { "epoch": 0.28, "grad_norm": 14.839698285967577, "learning_rate": 9.47242206235012e-06, "loss": 1.2514, "step": 6715 }, { "epoch": 0.28, "grad_norm": 8.061437442300123, "learning_rate": 9.479475243334745e-06, "loss": 1.2607, "step": 6720 }, { "epoch": 0.28, "grad_norm": 7.432480326890641, "learning_rate": 9.486528424319369e-06, "loss": 1.2898, "step": 6725 }, { "epoch": 0.28, "grad_norm": 26.456548063118635, "learning_rate": 9.493581605303993e-06, "loss": 1.2432, "step": 6730 }, { "epoch": 0.29, "grad_norm": 36.9215479602442, "learning_rate": 9.500634786288618e-06, "loss": 1.2933, "step": 6735 }, { "epoch": 0.29, "grad_norm": 27.30788433252015, "learning_rate": 9.507687967273242e-06, "loss": 1.2425, "step": 6740 }, { "epoch": 0.29, "grad_norm": 8.314027848854106, "learning_rate": 9.514741148257865e-06, "loss": 1.2306, "step": 6745 }, { "epoch": 0.29, "grad_norm": 21.18878334837518, "learning_rate": 9.521794329242489e-06, "loss": 1.2251, "step": 6750 }, { "epoch": 0.29, "grad_norm": 9.69161743226239, "learning_rate": 9.528847510227112e-06, "loss": 1.2971, "step": 6755 }, { "epoch": 0.29, "grad_norm": 18.398504422452305, "learning_rate": 9.535900691211738e-06, "loss": 1.2514, "step": 6760 }, { "epoch": 0.29, "grad_norm": 25.03698505170366, "learning_rate": 9.542953872196361e-06, "loss": 1.2264, "step": 6765 }, { "epoch": 0.29, "grad_norm": 24.830659858829385, "learning_rate": 9.550007053180985e-06, "loss": 1.2423, "step": 6770 }, { "epoch": 0.29, "grad_norm": 11.721940880879972, "learning_rate": 9.55706023416561e-06, "loss": 1.2323, "step": 6775 }, { "epoch": 0.29, "grad_norm": 17.333025291327758, "learning_rate": 9.564113415150234e-06, "loss": 1.2237, "step": 6780 }, { "epoch": 0.29, "grad_norm": 13.070519039905475, "learning_rate": 9.571166596134857e-06, "loss": 1.2337, "step": 6785 }, { "epoch": 0.29, "grad_norm": 9.17048396112688, "learning_rate": 9.578219777119483e-06, "loss": 1.2148, "step": 6790 }, { "epoch": 0.29, "grad_norm": 33.68604246681523, "learning_rate": 9.585272958104106e-06, "loss": 1.2104, "step": 6795 }, { "epoch": 0.29, "grad_norm": 27.06048167618181, "learning_rate": 9.59232613908873e-06, "loss": 1.2669, "step": 6800 }, { "epoch": 0.29, "grad_norm": 51.53117728762783, "learning_rate": 9.599379320073355e-06, "loss": 1.2563, "step": 6805 }, { "epoch": 0.29, "grad_norm": 25.84955784474549, "learning_rate": 9.606432501057979e-06, "loss": 1.2569, "step": 6810 }, { "epoch": 0.29, "grad_norm": 10.361937831104246, "learning_rate": 9.613485682042602e-06, "loss": 1.2629, "step": 6815 }, { "epoch": 0.29, "grad_norm": 40.58280208568006, "learning_rate": 9.620538863027226e-06, "loss": 1.2569, "step": 6820 }, { "epoch": 0.29, "grad_norm": 9.147141882700138, "learning_rate": 9.62759204401185e-06, "loss": 1.2467, "step": 6825 }, { "epoch": 0.29, "grad_norm": 11.276443836964162, "learning_rate": 9.634645224996475e-06, "loss": 1.227, "step": 6830 }, { "epoch": 0.29, "grad_norm": 38.07613603311421, "learning_rate": 9.641698405981098e-06, "loss": 1.2474, "step": 6835 }, { "epoch": 0.29, "grad_norm": 17.687385407167028, "learning_rate": 9.648751586965722e-06, "loss": 1.2672, "step": 6840 }, { "epoch": 0.29, "grad_norm": 10.298038992959794, "learning_rate": 9.655804767950346e-06, "loss": 1.2562, "step": 6845 }, { "epoch": 0.29, "grad_norm": 9.810686984475629, "learning_rate": 9.66285794893497e-06, "loss": 1.2536, "step": 6850 }, { "epoch": 0.29, "grad_norm": 8.555461508206447, "learning_rate": 9.669911129919594e-06, "loss": 1.2054, "step": 6855 }, { "epoch": 0.29, "grad_norm": 25.506303203520943, "learning_rate": 9.676964310904218e-06, "loss": 1.2778, "step": 6860 }, { "epoch": 0.29, "grad_norm": 50.132857462998636, "learning_rate": 9.684017491888843e-06, "loss": 1.2548, "step": 6865 }, { "epoch": 0.29, "grad_norm": 84.18993846114763, "learning_rate": 9.691070672873467e-06, "loss": 1.2724, "step": 6870 }, { "epoch": 0.29, "grad_norm": 48.67721445772831, "learning_rate": 9.69812385385809e-06, "loss": 1.2141, "step": 6875 }, { "epoch": 0.29, "grad_norm": 15.120591634256032, "learning_rate": 9.705177034842714e-06, "loss": 1.2837, "step": 6880 }, { "epoch": 0.29, "grad_norm": 18.91538361254449, "learning_rate": 9.712230215827338e-06, "loss": 1.2584, "step": 6885 }, { "epoch": 0.29, "grad_norm": 8.264430773104124, "learning_rate": 9.719283396811963e-06, "loss": 1.226, "step": 6890 }, { "epoch": 0.29, "grad_norm": 37.88382063845113, "learning_rate": 9.726336577796587e-06, "loss": 1.2702, "step": 6895 }, { "epoch": 0.29, "grad_norm": 19.170282497488508, "learning_rate": 9.73338975878121e-06, "loss": 1.3045, "step": 6900 }, { "epoch": 0.29, "grad_norm": 29.493663417607124, "learning_rate": 9.740442939765835e-06, "loss": 1.2242, "step": 6905 }, { "epoch": 0.29, "grad_norm": 13.667086924220484, "learning_rate": 9.747496120750459e-06, "loss": 1.2449, "step": 6910 }, { "epoch": 0.29, "grad_norm": 68.56140151164675, "learning_rate": 9.754549301735083e-06, "loss": 1.2189, "step": 6915 }, { "epoch": 0.29, "grad_norm": 29.96503198593971, "learning_rate": 9.761602482719708e-06, "loss": 1.2171, "step": 6920 }, { "epoch": 0.29, "grad_norm": 8.155145918203711, "learning_rate": 9.768655663704332e-06, "loss": 1.2007, "step": 6925 }, { "epoch": 0.29, "grad_norm": 7.511859808137694, "learning_rate": 9.775708844688955e-06, "loss": 1.2025, "step": 6930 }, { "epoch": 0.29, "grad_norm": 20.271716498027313, "learning_rate": 9.78276202567358e-06, "loss": 1.2113, "step": 6935 }, { "epoch": 0.29, "grad_norm": 8.139895279217205, "learning_rate": 9.789815206658204e-06, "loss": 1.2641, "step": 6940 }, { "epoch": 0.29, "grad_norm": 40.850804477581036, "learning_rate": 9.796868387642828e-06, "loss": 1.2336, "step": 6945 }, { "epoch": 0.29, "grad_norm": 30.815248966199405, "learning_rate": 9.803921568627451e-06, "loss": 1.2198, "step": 6950 }, { "epoch": 0.29, "grad_norm": 18.216597164118205, "learning_rate": 9.810974749612075e-06, "loss": 1.2363, "step": 6955 }, { "epoch": 0.29, "grad_norm": 32.232897698220725, "learning_rate": 9.8180279305967e-06, "loss": 1.2591, "step": 6960 }, { "epoch": 0.29, "grad_norm": 13.372140403537237, "learning_rate": 9.825081111581324e-06, "loss": 1.2617, "step": 6965 }, { "epoch": 0.29, "grad_norm": 27.69909019104548, "learning_rate": 9.832134292565947e-06, "loss": 1.2243, "step": 6970 }, { "epoch": 0.3, "grad_norm": 10.394667773664086, "learning_rate": 9.839187473550573e-06, "loss": 1.2555, "step": 6975 }, { "epoch": 0.3, "grad_norm": 8.234862274671976, "learning_rate": 9.846240654535196e-06, "loss": 1.2151, "step": 6980 }, { "epoch": 0.3, "grad_norm": 8.849116326056897, "learning_rate": 9.85329383551982e-06, "loss": 1.2567, "step": 6985 }, { "epoch": 0.3, "grad_norm": 13.259288433082045, "learning_rate": 9.860347016504445e-06, "loss": 1.262, "step": 6990 }, { "epoch": 0.3, "grad_norm": 18.285717134687246, "learning_rate": 9.867400197489069e-06, "loss": 1.2321, "step": 6995 }, { "epoch": 0.3, "grad_norm": 9.882948609293603, "learning_rate": 9.874453378473692e-06, "loss": 1.244, "step": 7000 }, { "epoch": 0.3, "grad_norm": 18.44038198421586, "learning_rate": 9.881506559458318e-06, "loss": 1.2736, "step": 7005 }, { "epoch": 0.3, "grad_norm": 8.523441913220164, "learning_rate": 9.88855974044294e-06, "loss": 1.2524, "step": 7010 }, { "epoch": 0.3, "grad_norm": 13.735473518728714, "learning_rate": 9.895612921427565e-06, "loss": 1.2293, "step": 7015 }, { "epoch": 0.3, "grad_norm": 26.071146100547974, "learning_rate": 9.902666102412188e-06, "loss": 1.2245, "step": 7020 }, { "epoch": 0.3, "grad_norm": 10.670824286415508, "learning_rate": 9.909719283396812e-06, "loss": 1.2643, "step": 7025 }, { "epoch": 0.3, "grad_norm": 9.696896219798152, "learning_rate": 9.916772464381437e-06, "loss": 1.2103, "step": 7030 }, { "epoch": 0.3, "grad_norm": 10.637212164904719, "learning_rate": 9.923825645366061e-06, "loss": 1.2176, "step": 7035 }, { "epoch": 0.3, "grad_norm": 23.71924414263371, "learning_rate": 9.930878826350684e-06, "loss": 1.2756, "step": 7040 }, { "epoch": 0.3, "grad_norm": 9.989907121354511, "learning_rate": 9.93793200733531e-06, "loss": 1.2984, "step": 7045 }, { "epoch": 0.3, "grad_norm": 16.661405452546745, "learning_rate": 9.944985188319933e-06, "loss": 1.2339, "step": 7050 }, { "epoch": 0.3, "grad_norm": 8.547809511768321, "learning_rate": 9.952038369304557e-06, "loss": 1.2349, "step": 7055 }, { "epoch": 0.3, "grad_norm": 8.34676097748933, "learning_rate": 9.959091550289182e-06, "loss": 1.2088, "step": 7060 }, { "epoch": 0.3, "grad_norm": 14.030020061469806, "learning_rate": 9.966144731273806e-06, "loss": 1.2487, "step": 7065 }, { "epoch": 0.3, "grad_norm": 33.3367943143273, "learning_rate": 9.97319791225843e-06, "loss": 1.1999, "step": 7070 }, { "epoch": 0.3, "grad_norm": 27.162683049854188, "learning_rate": 9.980251093243053e-06, "loss": 1.2396, "step": 7075 }, { "epoch": 0.3, "grad_norm": 12.318519946093282, "learning_rate": 9.987304274227677e-06, "loss": 1.2475, "step": 7080 }, { "epoch": 0.3, "grad_norm": 55.24829758647621, "learning_rate": 9.994357455212302e-06, "loss": 1.2372, "step": 7085 }, { "epoch": 0.3, "grad_norm": 23.118377040536743, "learning_rate": 9.999999993937295e-06, "loss": 1.2352, "step": 7090 }, { "epoch": 0.3, "grad_norm": 10.567172442993417, "learning_rate": 9.999999781742606e-06, "loss": 1.2776, "step": 7095 }, { "epoch": 0.3, "grad_norm": 28.114851435665756, "learning_rate": 9.999999266412656e-06, "loss": 1.2927, "step": 7100 }, { "epoch": 0.3, "grad_norm": 24.013488713382067, "learning_rate": 9.99999844794748e-06, "loss": 1.2322, "step": 7105 }, { "epoch": 0.3, "grad_norm": 9.806948014231962, "learning_rate": 9.999997326347127e-06, "loss": 1.2195, "step": 7110 }, { "epoch": 0.3, "grad_norm": 11.482976873118554, "learning_rate": 9.999995901611661e-06, "loss": 1.2068, "step": 7115 }, { "epoch": 0.3, "grad_norm": 31.26450098534542, "learning_rate": 9.999994173741175e-06, "loss": 1.251, "step": 7120 }, { "epoch": 0.3, "grad_norm": 14.927637286752303, "learning_rate": 9.999992142735769e-06, "loss": 1.1983, "step": 7125 }, { "epoch": 0.3, "grad_norm": 21.77653752243219, "learning_rate": 9.999989808595565e-06, "loss": 1.2704, "step": 7130 }, { "epoch": 0.3, "grad_norm": 29.279723439352683, "learning_rate": 9.999987171320711e-06, "loss": 1.2205, "step": 7135 }, { "epoch": 0.3, "grad_norm": 29.929199323968046, "learning_rate": 9.99998423091136e-06, "loss": 1.2348, "step": 7140 }, { "epoch": 0.3, "grad_norm": 70.22256448811632, "learning_rate": 9.999980987367693e-06, "loss": 1.2314, "step": 7145 }, { "epoch": 0.3, "grad_norm": 95.94222776095704, "learning_rate": 9.999977440689907e-06, "loss": 1.2624, "step": 7150 }, { "epoch": 0.3, "grad_norm": 87.03008705004844, "learning_rate": 9.999973590878217e-06, "loss": 1.304, "step": 7155 }, { "epoch": 0.3, "grad_norm": 65.70503905454055, "learning_rate": 9.999969437932857e-06, "loss": 1.2935, "step": 7160 }, { "epoch": 0.3, "grad_norm": 58.053975816463925, "learning_rate": 9.999964981854076e-06, "loss": 1.5134, "step": 7165 }, { "epoch": 0.3, "grad_norm": 39.86573643074013, "learning_rate": 9.999960222642147e-06, "loss": 1.3608, "step": 7170 }, { "epoch": 0.3, "grad_norm": 30.920617348566125, "learning_rate": 9.999955160297358e-06, "loss": 1.347, "step": 7175 }, { "epoch": 0.3, "grad_norm": 24.510902413567575, "learning_rate": 9.999949794820015e-06, "loss": 1.2376, "step": 7180 }, { "epoch": 0.3, "grad_norm": 45.94314202948226, "learning_rate": 9.999944126210443e-06, "loss": 1.2942, "step": 7185 }, { "epoch": 0.3, "grad_norm": 40.64924461179615, "learning_rate": 9.999938154468988e-06, "loss": 1.3315, "step": 7190 }, { "epoch": 0.3, "grad_norm": 17.441152486298893, "learning_rate": 9.99993187959601e-06, "loss": 1.3502, "step": 7195 }, { "epoch": 0.3, "grad_norm": 70.72250142919358, "learning_rate": 9.999925301591889e-06, "loss": 1.2887, "step": 7200 }, { "epoch": 0.3, "grad_norm": 85.83398730521338, "learning_rate": 9.999918420457026e-06, "loss": 1.3308, "step": 7205 }, { "epoch": 0.31, "grad_norm": 30.257110268519835, "learning_rate": 9.999911236191835e-06, "loss": 1.265, "step": 7210 }, { "epoch": 0.31, "grad_norm": 16.828095931794895, "learning_rate": 9.999903748796755e-06, "loss": 1.2408, "step": 7215 }, { "epoch": 0.31, "grad_norm": 15.21232869967946, "learning_rate": 9.99989595827224e-06, "loss": 1.2553, "step": 7220 }, { "epoch": 0.31, "grad_norm": 31.846923253910173, "learning_rate": 9.999887864618757e-06, "loss": 1.266, "step": 7225 }, { "epoch": 0.31, "grad_norm": 40.571439949018696, "learning_rate": 9.999879467836802e-06, "loss": 1.2926, "step": 7230 }, { "epoch": 0.31, "grad_norm": 79.6937695566078, "learning_rate": 9.999870767926882e-06, "loss": 1.3066, "step": 7235 }, { "epoch": 0.31, "grad_norm": 57.81566886552214, "learning_rate": 9.999861764889527e-06, "loss": 1.2911, "step": 7240 }, { "epoch": 0.31, "grad_norm": 97.85933247019047, "learning_rate": 9.99985245872528e-06, "loss": 1.257, "step": 7245 }, { "epoch": 0.31, "grad_norm": 58.23713217890289, "learning_rate": 9.999842849434704e-06, "loss": 1.2629, "step": 7250 }, { "epoch": 0.31, "grad_norm": 50.65625672662297, "learning_rate": 9.999832937018385e-06, "loss": 1.2899, "step": 7255 }, { "epoch": 0.31, "grad_norm": 34.63620857818771, "learning_rate": 9.999822721476924e-06, "loss": 1.2584, "step": 7260 }, { "epoch": 0.31, "grad_norm": 59.04136237806621, "learning_rate": 9.999812202810936e-06, "loss": 1.4571, "step": 7265 }, { "epoch": 0.31, "grad_norm": 40.152131744650944, "learning_rate": 9.999801381021064e-06, "loss": 1.48, "step": 7270 }, { "epoch": 0.31, "grad_norm": 167.66359659883003, "learning_rate": 9.99979025610796e-06, "loss": 1.5042, "step": 7275 }, { "epoch": 0.31, "grad_norm": 9.792388454188853, "learning_rate": 9.999778828072301e-06, "loss": 1.37, "step": 7280 }, { "epoch": 0.31, "grad_norm": 63.373408611411264, "learning_rate": 9.999767096914779e-06, "loss": 1.3056, "step": 7285 }, { "epoch": 0.31, "grad_norm": 10.42701285172961, "learning_rate": 9.999755062636105e-06, "loss": 1.4225, "step": 7290 }, { "epoch": 0.31, "grad_norm": 26.463401732432942, "learning_rate": 9.999742725237007e-06, "loss": 1.3252, "step": 7295 }, { "epoch": 0.31, "grad_norm": 15.546278624607, "learning_rate": 9.999730084718237e-06, "loss": 1.2829, "step": 7300 }, { "epoch": 0.31, "grad_norm": 16.439375097900484, "learning_rate": 9.999717141080561e-06, "loss": 1.3155, "step": 7305 }, { "epoch": 0.31, "grad_norm": 23.437065581949568, "learning_rate": 9.999703894324758e-06, "loss": 1.2805, "step": 7310 }, { "epoch": 0.31, "grad_norm": 19.46991340240531, "learning_rate": 9.999690344451636e-06, "loss": 1.245, "step": 7315 }, { "epoch": 0.31, "grad_norm": 23.870439760675467, "learning_rate": 9.999676491462017e-06, "loss": 1.31, "step": 7320 }, { "epoch": 0.31, "grad_norm": 9.004614503633006, "learning_rate": 9.999662335356739e-06, "loss": 1.2967, "step": 7325 }, { "epoch": 0.31, "grad_norm": 10.86470357855451, "learning_rate": 9.99964787613666e-06, "loss": 1.2743, "step": 7330 }, { "epoch": 0.31, "grad_norm": 8.191662355901187, "learning_rate": 9.999633113802658e-06, "loss": 1.3193, "step": 7335 }, { "epoch": 0.31, "grad_norm": 16.873228836838706, "learning_rate": 9.999618048355626e-06, "loss": 1.301, "step": 7340 }, { "epoch": 0.31, "grad_norm": 38.18032122958042, "learning_rate": 9.999602679796479e-06, "loss": 1.2095, "step": 7345 }, { "epoch": 0.31, "grad_norm": 16.96525312990652, "learning_rate": 9.999587008126149e-06, "loss": 1.1979, "step": 7350 }, { "epoch": 0.31, "grad_norm": 40.087840048579, "learning_rate": 9.999571033345584e-06, "loss": 1.2875, "step": 7355 }, { "epoch": 0.31, "grad_norm": 17.576188453115833, "learning_rate": 9.999554755455754e-06, "loss": 1.2333, "step": 7360 }, { "epoch": 0.31, "grad_norm": 7.562209904492508, "learning_rate": 9.999538174457647e-06, "loss": 1.2304, "step": 7365 }, { "epoch": 0.31, "grad_norm": 33.890982837458935, "learning_rate": 9.999521290352268e-06, "loss": 1.255, "step": 7370 }, { "epoch": 0.31, "grad_norm": 33.44197930525397, "learning_rate": 9.999504103140638e-06, "loss": 1.284, "step": 7375 }, { "epoch": 0.31, "grad_norm": 13.273275265151184, "learning_rate": 9.9994866128238e-06, "loss": 1.2505, "step": 7380 }, { "epoch": 0.31, "grad_norm": 8.328347103549904, "learning_rate": 9.999468819402818e-06, "loss": 1.31, "step": 7385 }, { "epoch": 0.31, "grad_norm": 36.641863082702564, "learning_rate": 9.999450722878766e-06, "loss": 1.2526, "step": 7390 }, { "epoch": 0.31, "grad_norm": 20.00053943239536, "learning_rate": 9.999432323252742e-06, "loss": 1.2419, "step": 7395 }, { "epoch": 0.31, "grad_norm": 12.422848996931565, "learning_rate": 9.999413620525865e-06, "loss": 1.2642, "step": 7400 }, { "epoch": 0.31, "grad_norm": 44.88626897977361, "learning_rate": 9.999394614699265e-06, "loss": 1.2653, "step": 7405 }, { "epoch": 0.31, "grad_norm": 8.444568888006865, "learning_rate": 9.999375305774096e-06, "loss": 1.2753, "step": 7410 }, { "epoch": 0.31, "grad_norm": 22.598004780732158, "learning_rate": 9.999355693751529e-06, "loss": 1.223, "step": 7415 }, { "epoch": 0.31, "grad_norm": 15.225997803410483, "learning_rate": 9.99933577863275e-06, "loss": 1.2429, "step": 7420 }, { "epoch": 0.31, "grad_norm": 31.59763094890729, "learning_rate": 9.999315560418972e-06, "loss": 1.2213, "step": 7425 }, { "epoch": 0.31, "grad_norm": 45.27902333938972, "learning_rate": 9.999295039111417e-06, "loss": 1.2645, "step": 7430 }, { "epoch": 0.31, "grad_norm": 16.295561266548244, "learning_rate": 9.999274214711327e-06, "loss": 1.2021, "step": 7435 }, { "epoch": 0.31, "grad_norm": 9.520466979433348, "learning_rate": 9.99925308721997e-06, "loss": 1.2561, "step": 7440 }, { "epoch": 0.32, "grad_norm": 26.513562267392324, "learning_rate": 9.999231656638623e-06, "loss": 1.2444, "step": 7445 }, { "epoch": 0.32, "grad_norm": 22.962308581453126, "learning_rate": 9.999209922968587e-06, "loss": 1.2082, "step": 7450 }, { "epoch": 0.32, "grad_norm": 12.499960610839619, "learning_rate": 9.999187886211177e-06, "loss": 1.2607, "step": 7455 }, { "epoch": 0.32, "grad_norm": 19.160465456910075, "learning_rate": 9.999165546367734e-06, "loss": 1.2092, "step": 7460 }, { "epoch": 0.32, "grad_norm": 15.42316580287671, "learning_rate": 9.999142903439608e-06, "loss": 1.2618, "step": 7465 }, { "epoch": 0.32, "grad_norm": 14.162399948110279, "learning_rate": 9.999119957428172e-06, "loss": 1.262, "step": 7470 }, { "epoch": 0.32, "grad_norm": 15.732472202398695, "learning_rate": 9.99909670833482e-06, "loss": 1.262, "step": 7475 }, { "epoch": 0.32, "grad_norm": 33.46859983735862, "learning_rate": 9.99907315616096e-06, "loss": 1.2333, "step": 7480 }, { "epoch": 0.32, "grad_norm": 68.6028957957856, "learning_rate": 9.999049300908018e-06, "loss": 1.3109, "step": 7485 }, { "epoch": 0.32, "grad_norm": 35.18566845440983, "learning_rate": 9.999025142577443e-06, "loss": 1.2191, "step": 7490 }, { "epoch": 0.32, "grad_norm": 22.031380148984464, "learning_rate": 9.999000681170701e-06, "loss": 1.2188, "step": 7495 }, { "epoch": 0.32, "grad_norm": 8.583683859556693, "learning_rate": 9.99897591668927e-06, "loss": 1.2341, "step": 7500 }, { "epoch": 0.32, "grad_norm": 7.399575555971131, "learning_rate": 9.998950849134655e-06, "loss": 1.2638, "step": 7505 }, { "epoch": 0.32, "grad_norm": 8.681108162959296, "learning_rate": 9.998925478508375e-06, "loss": 1.2617, "step": 7510 }, { "epoch": 0.32, "grad_norm": 26.73927786581154, "learning_rate": 9.998899804811967e-06, "loss": 1.23, "step": 7515 }, { "epoch": 0.32, "grad_norm": 7.357775940658529, "learning_rate": 9.99887382804699e-06, "loss": 1.2488, "step": 7520 }, { "epoch": 0.32, "grad_norm": 8.655827071683177, "learning_rate": 9.998847548215018e-06, "loss": 1.2971, "step": 7525 }, { "epoch": 0.32, "grad_norm": 26.12113740009643, "learning_rate": 9.998820965317642e-06, "loss": 1.2357, "step": 7530 }, { "epoch": 0.32, "grad_norm": 34.18938653487381, "learning_rate": 9.998794079356476e-06, "loss": 1.2019, "step": 7535 }, { "epoch": 0.32, "grad_norm": 29.794034792221073, "learning_rate": 9.99876689033315e-06, "loss": 1.2942, "step": 7540 }, { "epoch": 0.32, "grad_norm": 37.92169362942798, "learning_rate": 9.998739398249311e-06, "loss": 1.2463, "step": 7545 }, { "epoch": 0.32, "grad_norm": 41.69638564952318, "learning_rate": 9.998711603106625e-06, "loss": 1.3201, "step": 7550 }, { "epoch": 0.32, "grad_norm": 21.89090509019165, "learning_rate": 9.998683504906782e-06, "loss": 1.2507, "step": 7555 }, { "epoch": 0.32, "grad_norm": 11.62595707000461, "learning_rate": 9.99865510365148e-06, "loss": 1.2628, "step": 7560 }, { "epoch": 0.32, "grad_norm": 10.22148233153354, "learning_rate": 9.998626399342444e-06, "loss": 1.2531, "step": 7565 }, { "epoch": 0.32, "grad_norm": 27.631231675971897, "learning_rate": 9.998597391981413e-06, "loss": 1.1924, "step": 7570 }, { "epoch": 0.32, "grad_norm": 17.810600021103443, "learning_rate": 9.998568081570146e-06, "loss": 1.2559, "step": 7575 }, { "epoch": 0.32, "grad_norm": 13.322156600940671, "learning_rate": 9.99853846811042e-06, "loss": 1.1985, "step": 7580 }, { "epoch": 0.32, "grad_norm": 11.057831224628954, "learning_rate": 9.998508551604031e-06, "loss": 1.2874, "step": 7585 }, { "epoch": 0.32, "grad_norm": 16.836707416794145, "learning_rate": 9.998478332052793e-06, "loss": 1.2336, "step": 7590 }, { "epoch": 0.32, "grad_norm": 7.913961140074016, "learning_rate": 9.998447809458533e-06, "loss": 1.2407, "step": 7595 }, { "epoch": 0.32, "grad_norm": 12.185825281546505, "learning_rate": 9.99841698382311e-06, "loss": 1.189, "step": 7600 }, { "epoch": 0.32, "grad_norm": 7.03689758383576, "learning_rate": 9.998385855148387e-06, "loss": 1.2225, "step": 7605 }, { "epoch": 0.32, "grad_norm": 7.959434158162542, "learning_rate": 9.998354423436253e-06, "loss": 1.2405, "step": 7610 }, { "epoch": 0.32, "grad_norm": 9.437909546331637, "learning_rate": 9.998322688688614e-06, "loss": 1.207, "step": 7615 }, { "epoch": 0.32, "grad_norm": 7.984877873962511, "learning_rate": 9.998290650907391e-06, "loss": 1.3317, "step": 7620 }, { "epoch": 0.32, "grad_norm": 7.750241980292008, "learning_rate": 9.998258310094532e-06, "loss": 1.2167, "step": 7625 }, { "epoch": 0.32, "grad_norm": 6.79780561274728, "learning_rate": 9.998225666251993e-06, "loss": 1.2334, "step": 7630 }, { "epoch": 0.32, "grad_norm": 16.21279698053489, "learning_rate": 9.998192719381753e-06, "loss": 1.285, "step": 7635 }, { "epoch": 0.32, "grad_norm": 10.791513369959016, "learning_rate": 9.998159469485814e-06, "loss": 1.2154, "step": 7640 }, { "epoch": 0.32, "grad_norm": 11.51909210772539, "learning_rate": 9.998125916566189e-06, "loss": 1.2389, "step": 7645 }, { "epoch": 0.32, "grad_norm": 14.751255139905355, "learning_rate": 9.99809206062491e-06, "loss": 1.2967, "step": 7650 }, { "epoch": 0.32, "grad_norm": 13.810064123766226, "learning_rate": 9.998057901664032e-06, "loss": 1.2417, "step": 7655 }, { "epoch": 0.32, "grad_norm": 7.120699512188413, "learning_rate": 9.998023439685627e-06, "loss": 1.277, "step": 7660 }, { "epoch": 0.32, "grad_norm": 19.53438067734915, "learning_rate": 9.997988674691782e-06, "loss": 1.2577, "step": 7665 }, { "epoch": 0.32, "grad_norm": 12.689096741715723, "learning_rate": 9.997953606684605e-06, "loss": 1.1871, "step": 7670 }, { "epoch": 0.32, "grad_norm": 16.15410418789472, "learning_rate": 9.997918235666223e-06, "loss": 1.2454, "step": 7675 }, { "epoch": 0.33, "grad_norm": 7.6757528872797325, "learning_rate": 9.997882561638782e-06, "loss": 1.2168, "step": 7680 }, { "epoch": 0.33, "grad_norm": 19.133715988011993, "learning_rate": 9.997846584604443e-06, "loss": 1.1902, "step": 7685 }, { "epoch": 0.33, "grad_norm": 9.679947110622978, "learning_rate": 9.997810304565386e-06, "loss": 1.2614, "step": 7690 }, { "epoch": 0.33, "grad_norm": 23.004587892593033, "learning_rate": 9.997773721523812e-06, "loss": 1.2344, "step": 7695 }, { "epoch": 0.33, "grad_norm": 14.67682874432702, "learning_rate": 9.99773683548194e-06, "loss": 1.2305, "step": 7700 }, { "epoch": 0.33, "grad_norm": 18.400094449302642, "learning_rate": 9.997699646442003e-06, "loss": 1.2357, "step": 7705 }, { "epoch": 0.33, "grad_norm": 12.49392548640079, "learning_rate": 9.997662154406258e-06, "loss": 1.2657, "step": 7710 }, { "epoch": 0.33, "grad_norm": 9.736775222487934, "learning_rate": 9.997624359376979e-06, "loss": 1.2515, "step": 7715 }, { "epoch": 0.33, "grad_norm": 41.4303926787153, "learning_rate": 9.997586261356453e-06, "loss": 1.2132, "step": 7720 }, { "epoch": 0.33, "grad_norm": 35.184748895730245, "learning_rate": 9.997547860346996e-06, "loss": 1.221, "step": 7725 }, { "epoch": 0.33, "grad_norm": 10.457499390191062, "learning_rate": 9.997509156350932e-06, "loss": 1.2248, "step": 7730 }, { "epoch": 0.33, "grad_norm": 10.796548136163155, "learning_rate": 9.997470149370608e-06, "loss": 1.2587, "step": 7735 }, { "epoch": 0.33, "grad_norm": 9.546352192140306, "learning_rate": 9.99743083940839e-06, "loss": 1.2349, "step": 7740 }, { "epoch": 0.33, "grad_norm": 20.295144935838564, "learning_rate": 9.99739122646666e-06, "loss": 1.1899, "step": 7745 }, { "epoch": 0.33, "grad_norm": 53.787368747896565, "learning_rate": 9.997351310547821e-06, "loss": 1.2089, "step": 7750 }, { "epoch": 0.33, "grad_norm": 40.22827895925454, "learning_rate": 9.997311091654291e-06, "loss": 1.2261, "step": 7755 }, { "epoch": 0.33, "grad_norm": 10.179337496482024, "learning_rate": 9.997270569788511e-06, "loss": 1.228, "step": 7760 }, { "epoch": 0.33, "grad_norm": 7.965323017882522, "learning_rate": 9.997229744952937e-06, "loss": 1.223, "step": 7765 }, { "epoch": 0.33, "grad_norm": 13.503594652631213, "learning_rate": 9.99718861715004e-06, "loss": 1.2266, "step": 7770 }, { "epoch": 0.33, "grad_norm": 41.67889440230999, "learning_rate": 9.99714718638232e-06, "loss": 1.2406, "step": 7775 }, { "epoch": 0.33, "grad_norm": 23.892405951309872, "learning_rate": 9.997105452652284e-06, "loss": 1.2496, "step": 7780 }, { "epoch": 0.33, "grad_norm": 63.42668530241874, "learning_rate": 9.997063415962464e-06, "loss": 1.2372, "step": 7785 }, { "epoch": 0.33, "grad_norm": 14.343590782148018, "learning_rate": 9.99702107631541e-06, "loss": 1.2494, "step": 7790 }, { "epoch": 0.33, "grad_norm": 7.999735564065194, "learning_rate": 9.996978433713687e-06, "loss": 1.2866, "step": 7795 }, { "epoch": 0.33, "grad_norm": 19.826309999008483, "learning_rate": 9.99693548815988e-06, "loss": 1.2461, "step": 7800 }, { "epoch": 0.33, "grad_norm": 19.750264485329023, "learning_rate": 9.996892239656592e-06, "loss": 1.2406, "step": 7805 }, { "epoch": 0.33, "grad_norm": 19.688627552278987, "learning_rate": 9.996848688206448e-06, "loss": 1.23, "step": 7810 }, { "epoch": 0.33, "grad_norm": 9.938342338364622, "learning_rate": 9.996804833812086e-06, "loss": 1.239, "step": 7815 }, { "epoch": 0.33, "grad_norm": 38.06803747943537, "learning_rate": 9.996760676476168e-06, "loss": 1.2259, "step": 7820 }, { "epoch": 0.33, "grad_norm": 13.5285251319783, "learning_rate": 9.996716216201366e-06, "loss": 1.2307, "step": 7825 }, { "epoch": 0.33, "grad_norm": 8.94350367479322, "learning_rate": 9.996671452990377e-06, "loss": 1.2223, "step": 7830 }, { "epoch": 0.33, "grad_norm": 8.698492823605658, "learning_rate": 9.996626386845918e-06, "loss": 1.272, "step": 7835 }, { "epoch": 0.33, "grad_norm": 30.432536493106333, "learning_rate": 9.996581017770719e-06, "loss": 1.2543, "step": 7840 }, { "epoch": 0.33, "grad_norm": 27.97645657966109, "learning_rate": 9.99653534576753e-06, "loss": 1.2245, "step": 7845 }, { "epoch": 0.33, "grad_norm": 84.9303648895969, "learning_rate": 9.996489370839122e-06, "loss": 1.2887, "step": 7850 }, { "epoch": 0.33, "grad_norm": 101.70790412364232, "learning_rate": 9.996443092988279e-06, "loss": 1.2718, "step": 7855 }, { "epoch": 0.33, "grad_norm": 74.92552930292959, "learning_rate": 9.99639651221781e-06, "loss": 1.2506, "step": 7860 }, { "epoch": 0.33, "grad_norm": 14.316343216911543, "learning_rate": 9.99634962853054e-06, "loss": 1.2369, "step": 7865 }, { "epoch": 0.33, "grad_norm": 24.512540844754735, "learning_rate": 9.996302441929306e-06, "loss": 1.235, "step": 7870 }, { "epoch": 0.33, "grad_norm": 26.647148749658335, "learning_rate": 9.996254952416974e-06, "loss": 1.2123, "step": 7875 }, { "epoch": 0.33, "grad_norm": 12.573684349444694, "learning_rate": 9.996207159996421e-06, "loss": 1.2301, "step": 7880 }, { "epoch": 0.33, "grad_norm": 9.08187471953139, "learning_rate": 9.996159064670542e-06, "loss": 1.2543, "step": 7885 }, { "epoch": 0.33, "grad_norm": 32.892617578869405, "learning_rate": 9.99611066644226e-06, "loss": 1.2378, "step": 7890 }, { "epoch": 0.33, "grad_norm": 17.88049211720188, "learning_rate": 9.996061965314502e-06, "loss": 1.2068, "step": 7895 }, { "epoch": 0.33, "grad_norm": 21.080132073202652, "learning_rate": 9.996012961290223e-06, "loss": 1.2323, "step": 7900 }, { "epoch": 0.33, "grad_norm": 7.198564632839893, "learning_rate": 9.995963654372396e-06, "loss": 1.1807, "step": 7905 }, { "epoch": 0.33, "grad_norm": 13.01419295448973, "learning_rate": 9.995914044564008e-06, "loss": 1.2347, "step": 7910 }, { "epoch": 0.33, "grad_norm": 29.497398275690852, "learning_rate": 9.995864131868066e-06, "loss": 1.2257, "step": 7915 }, { "epoch": 0.34, "grad_norm": 33.43508479103748, "learning_rate": 9.995813916287602e-06, "loss": 1.224, "step": 7920 }, { "epoch": 0.34, "grad_norm": 9.52845846510623, "learning_rate": 9.995763397825651e-06, "loss": 1.2573, "step": 7925 }, { "epoch": 0.34, "grad_norm": 12.311686554471457, "learning_rate": 9.995712576485284e-06, "loss": 1.2407, "step": 7930 }, { "epoch": 0.34, "grad_norm": 19.7741420666964, "learning_rate": 9.995661452269578e-06, "loss": 1.2345, "step": 7935 }, { "epoch": 0.34, "grad_norm": 40.743896167918365, "learning_rate": 9.995610025181633e-06, "loss": 1.2415, "step": 7940 }, { "epoch": 0.34, "grad_norm": 25.302561478764066, "learning_rate": 9.995558295224565e-06, "loss": 1.2255, "step": 7945 }, { "epoch": 0.34, "grad_norm": 15.209111688247388, "learning_rate": 9.995506262401516e-06, "loss": 1.3095, "step": 7950 }, { "epoch": 0.34, "grad_norm": 40.61841968723452, "learning_rate": 9.995453926715635e-06, "loss": 1.2154, "step": 7955 }, { "epoch": 0.34, "grad_norm": 17.519057812630436, "learning_rate": 9.995401288170097e-06, "loss": 1.2436, "step": 7960 }, { "epoch": 0.34, "grad_norm": 9.248821001314777, "learning_rate": 9.995348346768094e-06, "loss": 1.1808, "step": 7965 }, { "epoch": 0.34, "grad_norm": 28.084413250987303, "learning_rate": 9.995295102512837e-06, "loss": 1.2527, "step": 7970 }, { "epoch": 0.34, "grad_norm": 7.836964539629219, "learning_rate": 9.99524155540755e-06, "loss": 1.2256, "step": 7975 }, { "epoch": 0.34, "grad_norm": 8.832120488861237, "learning_rate": 9.99518770545548e-06, "loss": 1.241, "step": 7980 }, { "epoch": 0.34, "grad_norm": 19.17927685882691, "learning_rate": 9.995133552659895e-06, "loss": 1.2742, "step": 7985 }, { "epoch": 0.34, "grad_norm": 15.558874917828359, "learning_rate": 9.995079097024076e-06, "loss": 1.2293, "step": 7990 }, { "epoch": 0.34, "grad_norm": 9.357318209950067, "learning_rate": 9.995024338551324e-06, "loss": 1.2094, "step": 7995 }, { "epoch": 0.34, "grad_norm": 63.034609308669275, "learning_rate": 9.994969277244961e-06, "loss": 1.2543, "step": 8000 }, { "epoch": 0.34, "grad_norm": 20.097186013491743, "learning_rate": 9.994913913108324e-06, "loss": 1.2389, "step": 8005 }, { "epoch": 0.34, "grad_norm": 25.499347342565777, "learning_rate": 9.994858246144769e-06, "loss": 1.2486, "step": 8010 }, { "epoch": 0.34, "grad_norm": 80.8675075405855, "learning_rate": 9.99480227635767e-06, "loss": 1.2531, "step": 8015 }, { "epoch": 0.34, "grad_norm": 81.82780626138764, "learning_rate": 9.994746003750423e-06, "loss": 1.2832, "step": 8020 }, { "epoch": 0.34, "grad_norm": 71.51347532104299, "learning_rate": 9.994689428326438e-06, "loss": 1.2762, "step": 8025 }, { "epoch": 0.34, "grad_norm": 94.67499035587554, "learning_rate": 9.994632550089147e-06, "loss": 1.2469, "step": 8030 }, { "epoch": 0.34, "grad_norm": 54.31907865475567, "learning_rate": 9.994575369041995e-06, "loss": 1.3078, "step": 8035 }, { "epoch": 0.34, "grad_norm": 17.760119873262017, "learning_rate": 9.99451788518845e-06, "loss": 1.2339, "step": 8040 }, { "epoch": 0.34, "grad_norm": 14.417887412121337, "learning_rate": 9.994460098531998e-06, "loss": 1.2521, "step": 8045 }, { "epoch": 0.34, "grad_norm": 17.271854470515674, "learning_rate": 9.994402009076144e-06, "loss": 1.2073, "step": 8050 }, { "epoch": 0.34, "grad_norm": 9.833875269160144, "learning_rate": 9.994343616824404e-06, "loss": 1.2184, "step": 8055 }, { "epoch": 0.34, "grad_norm": 12.445511082474821, "learning_rate": 9.994284921780323e-06, "loss": 1.1979, "step": 8060 }, { "epoch": 0.34, "grad_norm": 20.19483393932203, "learning_rate": 9.994225923947459e-06, "loss": 1.2111, "step": 8065 }, { "epoch": 0.34, "grad_norm": 16.2307867214878, "learning_rate": 9.99416662332939e-06, "loss": 1.2233, "step": 8070 }, { "epoch": 0.34, "grad_norm": 11.346104187003878, "learning_rate": 9.994107019929707e-06, "loss": 1.1857, "step": 8075 }, { "epoch": 0.34, "grad_norm": 16.273289567960617, "learning_rate": 9.994047113752025e-06, "loss": 1.1966, "step": 8080 }, { "epoch": 0.34, "grad_norm": 8.741119461132842, "learning_rate": 9.993986904799979e-06, "loss": 1.2129, "step": 8085 }, { "epoch": 0.34, "grad_norm": 19.90653974587955, "learning_rate": 9.993926393077215e-06, "loss": 1.2397, "step": 8090 }, { "epoch": 0.34, "grad_norm": 8.282798225737693, "learning_rate": 9.993865578587407e-06, "loss": 1.2103, "step": 8095 }, { "epoch": 0.34, "grad_norm": 21.24182674626366, "learning_rate": 9.993804461334238e-06, "loss": 1.1706, "step": 8100 }, { "epoch": 0.34, "grad_norm": 7.099869396552126, "learning_rate": 9.993743041321413e-06, "loss": 1.1946, "step": 8105 }, { "epoch": 0.34, "grad_norm": 17.8129641406443, "learning_rate": 9.993681318552658e-06, "loss": 1.244, "step": 8110 }, { "epoch": 0.34, "grad_norm": 10.37912288019264, "learning_rate": 9.993619293031713e-06, "loss": 1.194, "step": 8115 }, { "epoch": 0.34, "grad_norm": 6.201576798859058, "learning_rate": 9.99355696476234e-06, "loss": 1.2422, "step": 8120 }, { "epoch": 0.34, "grad_norm": 34.121740170383944, "learning_rate": 9.993494333748317e-06, "loss": 1.2311, "step": 8125 }, { "epoch": 0.34, "grad_norm": 23.12775019310754, "learning_rate": 9.993431399993442e-06, "loss": 1.2201, "step": 8130 }, { "epoch": 0.34, "grad_norm": 19.871244630607208, "learning_rate": 9.993368163501529e-06, "loss": 1.1514, "step": 8135 }, { "epoch": 0.34, "grad_norm": 37.6939611663003, "learning_rate": 9.993304624276413e-06, "loss": 1.1888, "step": 8140 }, { "epoch": 0.34, "grad_norm": 31.48436670499356, "learning_rate": 9.993240782321946e-06, "loss": 1.2134, "step": 8145 }, { "epoch": 0.34, "grad_norm": 12.000254758597286, "learning_rate": 9.993176637641999e-06, "loss": 1.2127, "step": 8150 }, { "epoch": 0.35, "grad_norm": 16.115236506025425, "learning_rate": 9.99311219024046e-06, "loss": 1.2451, "step": 8155 }, { "epoch": 0.35, "grad_norm": 26.95413463578466, "learning_rate": 9.993047440121236e-06, "loss": 1.1916, "step": 8160 }, { "epoch": 0.35, "grad_norm": 11.110935308606182, "learning_rate": 9.992982387288254e-06, "loss": 1.2311, "step": 8165 }, { "epoch": 0.35, "grad_norm": 7.544790635763175, "learning_rate": 9.992917031745457e-06, "loss": 1.2719, "step": 8170 }, { "epoch": 0.35, "grad_norm": 17.279036810480022, "learning_rate": 9.992851373496808e-06, "loss": 1.1894, "step": 8175 }, { "epoch": 0.35, "grad_norm": 10.789545001376956, "learning_rate": 9.992785412546287e-06, "loss": 1.2384, "step": 8180 }, { "epoch": 0.35, "grad_norm": 17.02853229262546, "learning_rate": 9.992719148897891e-06, "loss": 1.1855, "step": 8185 }, { "epoch": 0.35, "grad_norm": 36.68130672151206, "learning_rate": 9.992652582555643e-06, "loss": 1.2494, "step": 8190 }, { "epoch": 0.35, "grad_norm": 40.171663041857016, "learning_rate": 9.992585713523572e-06, "loss": 1.2288, "step": 8195 }, { "epoch": 0.35, "grad_norm": 78.18049083936823, "learning_rate": 9.992518541805737e-06, "loss": 1.2246, "step": 8200 }, { "epoch": 0.35, "grad_norm": 11.209966268972378, "learning_rate": 9.992451067406208e-06, "loss": 1.2702, "step": 8205 }, { "epoch": 0.35, "grad_norm": 8.833679272598818, "learning_rate": 9.992383290329077e-06, "loss": 1.2214, "step": 8210 }, { "epoch": 0.35, "grad_norm": 6.623403413698795, "learning_rate": 9.992315210578454e-06, "loss": 1.1954, "step": 8215 }, { "epoch": 0.35, "grad_norm": 8.918535797297617, "learning_rate": 9.992246828158463e-06, "loss": 1.2429, "step": 8220 }, { "epoch": 0.35, "grad_norm": 6.954019510201876, "learning_rate": 9.992178143073252e-06, "loss": 1.1988, "step": 8225 }, { "epoch": 0.35, "grad_norm": 24.854442925528698, "learning_rate": 9.992109155326986e-06, "loss": 1.215, "step": 8230 }, { "epoch": 0.35, "grad_norm": 22.89155681525993, "learning_rate": 9.992039864923849e-06, "loss": 1.2506, "step": 8235 }, { "epoch": 0.35, "grad_norm": 12.781146440290884, "learning_rate": 9.991970271868035e-06, "loss": 1.1868, "step": 8240 }, { "epoch": 0.35, "grad_norm": 28.355165901493866, "learning_rate": 9.99190037616377e-06, "loss": 1.2444, "step": 8245 }, { "epoch": 0.35, "grad_norm": 30.469499153565415, "learning_rate": 9.991830177815289e-06, "loss": 1.2607, "step": 8250 }, { "epoch": 0.35, "grad_norm": 33.17248520538483, "learning_rate": 9.99175967682685e-06, "loss": 1.2455, "step": 8255 }, { "epoch": 0.35, "grad_norm": 13.863038883572782, "learning_rate": 9.991688873202723e-06, "loss": 1.1944, "step": 8260 }, { "epoch": 0.35, "grad_norm": 12.309650321051647, "learning_rate": 9.991617766947204e-06, "loss": 1.2785, "step": 8265 }, { "epoch": 0.35, "grad_norm": 16.524302448390912, "learning_rate": 9.991546358064603e-06, "loss": 1.2365, "step": 8270 }, { "epoch": 0.35, "grad_norm": 19.728092271346227, "learning_rate": 9.991474646559249e-06, "loss": 1.2437, "step": 8275 }, { "epoch": 0.35, "grad_norm": 59.56060337780727, "learning_rate": 9.991402632435491e-06, "loss": 1.2216, "step": 8280 }, { "epoch": 0.35, "grad_norm": 9.849854878122546, "learning_rate": 9.991330315697693e-06, "loss": 1.2205, "step": 8285 }, { "epoch": 0.35, "grad_norm": 13.29344131643094, "learning_rate": 9.991257696350242e-06, "loss": 1.2128, "step": 8290 }, { "epoch": 0.35, "grad_norm": 15.632113433715542, "learning_rate": 9.991184774397537e-06, "loss": 1.2315, "step": 8295 }, { "epoch": 0.35, "grad_norm": 11.313737404668368, "learning_rate": 9.991111549844004e-06, "loss": 1.2292, "step": 8300 }, { "epoch": 0.35, "grad_norm": 7.953074968542056, "learning_rate": 9.991038022694078e-06, "loss": 1.1862, "step": 8305 }, { "epoch": 0.35, "grad_norm": 11.73974789227461, "learning_rate": 9.990964192952218e-06, "loss": 1.2926, "step": 8310 }, { "epoch": 0.35, "grad_norm": 10.908010535863633, "learning_rate": 9.990890060622899e-06, "loss": 1.1757, "step": 8315 }, { "epoch": 0.35, "grad_norm": 16.05993049729101, "learning_rate": 9.99081562571062e-06, "loss": 1.2032, "step": 8320 }, { "epoch": 0.35, "grad_norm": 30.869534435483125, "learning_rate": 9.990740888219887e-06, "loss": 1.278, "step": 8325 }, { "epoch": 0.35, "grad_norm": 6.363204753364154, "learning_rate": 9.990665848155237e-06, "loss": 1.1965, "step": 8330 }, { "epoch": 0.35, "grad_norm": 11.506123380321823, "learning_rate": 9.990590505521216e-06, "loss": 1.1889, "step": 8335 }, { "epoch": 0.35, "grad_norm": 12.322006757393218, "learning_rate": 9.990514860322393e-06, "loss": 1.2443, "step": 8340 }, { "epoch": 0.35, "grad_norm": 14.952070178029388, "learning_rate": 9.990438912563355e-06, "loss": 1.2335, "step": 8345 }, { "epoch": 0.35, "grad_norm": 14.990567241234745, "learning_rate": 9.990362662248705e-06, "loss": 1.2414, "step": 8350 }, { "epoch": 0.35, "grad_norm": 13.105396826159597, "learning_rate": 9.990286109383067e-06, "loss": 1.2282, "step": 8355 }, { "epoch": 0.35, "grad_norm": 12.70209767655266, "learning_rate": 9.990209253971082e-06, "loss": 1.2345, "step": 8360 }, { "epoch": 0.35, "grad_norm": 12.36698546637828, "learning_rate": 9.990132096017407e-06, "loss": 1.2684, "step": 8365 }, { "epoch": 0.35, "grad_norm": 13.583087022136393, "learning_rate": 9.990054635526722e-06, "loss": 1.2101, "step": 8370 }, { "epoch": 0.35, "grad_norm": 11.895110158292534, "learning_rate": 9.989976872503725e-06, "loss": 1.2425, "step": 8375 }, { "epoch": 0.35, "grad_norm": 8.050796838688562, "learning_rate": 9.989898806953127e-06, "loss": 1.2478, "step": 8380 }, { "epoch": 0.35, "grad_norm": 7.869059538645431, "learning_rate": 9.989820438879664e-06, "loss": 1.2392, "step": 8385 }, { "epoch": 0.36, "grad_norm": 9.896020847728801, "learning_rate": 9.989741768288086e-06, "loss": 1.1862, "step": 8390 }, { "epoch": 0.36, "grad_norm": 45.703456980106715, "learning_rate": 9.989662795183162e-06, "loss": 1.242, "step": 8395 }, { "epoch": 0.36, "grad_norm": 9.579828700821357, "learning_rate": 9.98958351956968e-06, "loss": 1.1891, "step": 8400 }, { "epoch": 0.36, "grad_norm": 8.481324903792737, "learning_rate": 9.989503941452445e-06, "loss": 1.21, "step": 8405 }, { "epoch": 0.36, "grad_norm": 11.583278438929653, "learning_rate": 9.989424060836283e-06, "loss": 1.2205, "step": 8410 }, { "epoch": 0.36, "grad_norm": 7.376906541576698, "learning_rate": 9.98934387772604e-06, "loss": 1.1888, "step": 8415 }, { "epoch": 0.36, "grad_norm": 10.57658585583126, "learning_rate": 9.989263392126573e-06, "loss": 1.2269, "step": 8420 }, { "epoch": 0.36, "grad_norm": 8.275333470481575, "learning_rate": 9.989182604042762e-06, "loss": 1.242, "step": 8425 }, { "epoch": 0.36, "grad_norm": 14.11764500076583, "learning_rate": 9.989101513479506e-06, "loss": 1.2365, "step": 8430 }, { "epoch": 0.36, "grad_norm": 9.452450309467196, "learning_rate": 9.98902012044172e-06, "loss": 1.2123, "step": 8435 }, { "epoch": 0.36, "grad_norm": 16.149069983795446, "learning_rate": 9.988938424934343e-06, "loss": 1.2663, "step": 8440 }, { "epoch": 0.36, "grad_norm": 7.659271865325053, "learning_rate": 9.988856426962324e-06, "loss": 1.2146, "step": 8445 }, { "epoch": 0.36, "grad_norm": 8.20109071245686, "learning_rate": 9.988774126530634e-06, "loss": 1.2251, "step": 8450 }, { "epoch": 0.36, "grad_norm": 11.207621549082202, "learning_rate": 9.988691523644262e-06, "loss": 1.2343, "step": 8455 }, { "epoch": 0.36, "grad_norm": 8.82954071691952, "learning_rate": 9.98860861830822e-06, "loss": 1.2121, "step": 8460 }, { "epoch": 0.36, "grad_norm": 6.376664299695347, "learning_rate": 9.988525410527531e-06, "loss": 1.1781, "step": 8465 }, { "epoch": 0.36, "grad_norm": 49.39244173055254, "learning_rate": 9.988441900307242e-06, "loss": 1.2269, "step": 8470 }, { "epoch": 0.36, "grad_norm": 50.8504410856827, "learning_rate": 9.988358087652412e-06, "loss": 1.2337, "step": 8475 }, { "epoch": 0.36, "grad_norm": 39.10687162104513, "learning_rate": 9.988273972568126e-06, "loss": 1.1752, "step": 8480 }, { "epoch": 0.36, "grad_norm": 67.31066961645065, "learning_rate": 9.988189555059483e-06, "loss": 1.2588, "step": 8485 }, { "epoch": 0.36, "grad_norm": 14.69441032988168, "learning_rate": 9.988104835131598e-06, "loss": 1.1815, "step": 8490 }, { "epoch": 0.36, "grad_norm": 33.467110310779006, "learning_rate": 9.988019812789613e-06, "loss": 1.2216, "step": 8495 }, { "epoch": 0.36, "grad_norm": 14.671270848207165, "learning_rate": 9.987934488038678e-06, "loss": 1.1768, "step": 8500 }, { "epoch": 0.36, "grad_norm": 28.150762478072988, "learning_rate": 9.987848860883968e-06, "loss": 1.2599, "step": 8505 }, { "epoch": 0.36, "grad_norm": 74.5275694909008, "learning_rate": 9.987762931330673e-06, "loss": 1.2056, "step": 8510 }, { "epoch": 0.36, "grad_norm": 30.507403480430057, "learning_rate": 9.987676699384004e-06, "loss": 1.2353, "step": 8515 }, { "epoch": 0.36, "grad_norm": 23.543567868742304, "learning_rate": 9.987590165049188e-06, "loss": 1.2098, "step": 8520 }, { "epoch": 0.36, "grad_norm": 25.656673732893854, "learning_rate": 9.987503328331472e-06, "loss": 1.2859, "step": 8525 }, { "epoch": 0.36, "grad_norm": 28.959458074427264, "learning_rate": 9.987416189236121e-06, "loss": 1.1597, "step": 8530 }, { "epoch": 0.36, "grad_norm": 43.86085421227326, "learning_rate": 9.987328747768416e-06, "loss": 1.2405, "step": 8535 }, { "epoch": 0.36, "grad_norm": 32.72468333360079, "learning_rate": 9.98724100393366e-06, "loss": 1.2069, "step": 8540 }, { "epoch": 0.36, "grad_norm": 12.995011286255298, "learning_rate": 9.987152957737172e-06, "loss": 1.2345, "step": 8545 }, { "epoch": 0.36, "grad_norm": 73.06658047429889, "learning_rate": 9.987064609184291e-06, "loss": 1.2452, "step": 8550 }, { "epoch": 0.36, "grad_norm": 24.817118783827976, "learning_rate": 9.986975958280373e-06, "loss": 1.2164, "step": 8555 }, { "epoch": 0.36, "grad_norm": 14.990043689200686, "learning_rate": 9.986887005030792e-06, "loss": 1.2518, "step": 8560 }, { "epoch": 0.36, "grad_norm": 31.572278664978654, "learning_rate": 9.986797749440943e-06, "loss": 1.2592, "step": 8565 }, { "epoch": 0.36, "grad_norm": 19.435482013691885, "learning_rate": 9.986708191516232e-06, "loss": 1.2424, "step": 8570 }, { "epoch": 0.36, "grad_norm": 63.693461961295036, "learning_rate": 9.986618331262095e-06, "loss": 1.2578, "step": 8575 }, { "epoch": 0.36, "grad_norm": 31.446843658547774, "learning_rate": 9.986528168683975e-06, "loss": 1.1853, "step": 8580 }, { "epoch": 0.36, "grad_norm": 12.301533764234394, "learning_rate": 9.986437703787342e-06, "loss": 1.1933, "step": 8585 }, { "epoch": 0.36, "grad_norm": 19.8460881160074, "learning_rate": 9.986346936577678e-06, "loss": 1.2295, "step": 8590 }, { "epoch": 0.36, "grad_norm": 7.55608879962847, "learning_rate": 9.986255867060489e-06, "loss": 1.1702, "step": 8595 }, { "epoch": 0.36, "grad_norm": 44.75645688198931, "learning_rate": 9.98616449524129e-06, "loss": 1.2348, "step": 8600 }, { "epoch": 0.36, "grad_norm": 26.859487581688636, "learning_rate": 9.986072821125629e-06, "loss": 1.2364, "step": 8605 }, { "epoch": 0.36, "grad_norm": 16.91839599023379, "learning_rate": 9.985980844719059e-06, "loss": 1.2198, "step": 8610 }, { "epoch": 0.36, "grad_norm": 26.15389996024574, "learning_rate": 9.985888566027156e-06, "loss": 1.2076, "step": 8615 }, { "epoch": 0.36, "grad_norm": 14.085079912611075, "learning_rate": 9.985795985055516e-06, "loss": 1.1763, "step": 8620 }, { "epoch": 0.37, "grad_norm": 33.792171021963675, "learning_rate": 9.985703101809752e-06, "loss": 1.2248, "step": 8625 }, { "epoch": 0.37, "grad_norm": 7.3260248214026396, "learning_rate": 9.985609916295493e-06, "loss": 1.2355, "step": 8630 }, { "epoch": 0.37, "grad_norm": 10.311401163652134, "learning_rate": 9.985516428518393e-06, "loss": 1.2196, "step": 8635 }, { "epoch": 0.37, "grad_norm": 11.378826713087722, "learning_rate": 9.985422638484116e-06, "loss": 1.2499, "step": 8640 }, { "epoch": 0.37, "grad_norm": 10.541024734387541, "learning_rate": 9.98532854619835e-06, "loss": 1.2503, "step": 8645 }, { "epoch": 0.37, "grad_norm": 10.773454926388277, "learning_rate": 9.985234151666797e-06, "loss": 1.2345, "step": 8650 }, { "epoch": 0.37, "grad_norm": 6.599773351956539, "learning_rate": 9.985139454895186e-06, "loss": 1.2246, "step": 8655 }, { "epoch": 0.37, "grad_norm": 6.615899718378288, "learning_rate": 9.98504445588925e-06, "loss": 1.1834, "step": 8660 }, { "epoch": 0.37, "grad_norm": 12.15254060666548, "learning_rate": 9.984949154654756e-06, "loss": 1.2356, "step": 8665 }, { "epoch": 0.37, "grad_norm": 14.55878618843663, "learning_rate": 9.984853551197476e-06, "loss": 1.2019, "step": 8670 }, { "epoch": 0.37, "grad_norm": 13.53312672145288, "learning_rate": 9.984757645523208e-06, "loss": 1.2075, "step": 8675 }, { "epoch": 0.37, "grad_norm": 14.028101438814463, "learning_rate": 9.984661437637768e-06, "loss": 1.1971, "step": 8680 }, { "epoch": 0.37, "grad_norm": 7.962681134293541, "learning_rate": 9.984564927546989e-06, "loss": 1.2292, "step": 8685 }, { "epoch": 0.37, "grad_norm": 7.105873883293662, "learning_rate": 9.98446811525672e-06, "loss": 1.2133, "step": 8690 }, { "epoch": 0.37, "grad_norm": 34.975382958953205, "learning_rate": 9.98437100077283e-06, "loss": 1.2186, "step": 8695 }, { "epoch": 0.37, "grad_norm": 11.37087709348676, "learning_rate": 9.984273584101211e-06, "loss": 1.1987, "step": 8700 }, { "epoch": 0.37, "grad_norm": 71.08804182456454, "learning_rate": 9.984175865247766e-06, "loss": 1.1986, "step": 8705 }, { "epoch": 0.37, "grad_norm": 52.10398569690377, "learning_rate": 9.984077844218416e-06, "loss": 1.2244, "step": 8710 }, { "epoch": 0.37, "grad_norm": 37.81548927949398, "learning_rate": 9.983979521019111e-06, "loss": 1.2653, "step": 8715 }, { "epoch": 0.37, "grad_norm": 12.85074773890059, "learning_rate": 9.983880895655806e-06, "loss": 1.2603, "step": 8720 }, { "epoch": 0.37, "grad_norm": 32.64581515056232, "learning_rate": 9.983781968134485e-06, "loss": 1.242, "step": 8725 }, { "epoch": 0.37, "grad_norm": 17.099230723476186, "learning_rate": 9.983682738461143e-06, "loss": 1.2303, "step": 8730 }, { "epoch": 0.37, "grad_norm": 36.264254645909595, "learning_rate": 9.983583206641796e-06, "loss": 1.2389, "step": 8735 }, { "epoch": 0.37, "grad_norm": 45.86121139741487, "learning_rate": 9.98348337268248e-06, "loss": 1.2036, "step": 8740 }, { "epoch": 0.37, "grad_norm": 7.538096106752206, "learning_rate": 9.983383236589243e-06, "loss": 1.2565, "step": 8745 }, { "epoch": 0.37, "grad_norm": 33.679354719636336, "learning_rate": 9.983282798368162e-06, "loss": 1.2806, "step": 8750 }, { "epoch": 0.37, "grad_norm": 20.910192795381583, "learning_rate": 9.983182058025323e-06, "loss": 1.2322, "step": 8755 }, { "epoch": 0.37, "grad_norm": 30.370012964051725, "learning_rate": 9.983081015566835e-06, "loss": 1.2055, "step": 8760 }, { "epoch": 0.37, "grad_norm": 29.424674522331955, "learning_rate": 9.982979670998823e-06, "loss": 1.2074, "step": 8765 }, { "epoch": 0.37, "grad_norm": 32.41331231991534, "learning_rate": 9.982878024327432e-06, "loss": 1.2511, "step": 8770 }, { "epoch": 0.37, "grad_norm": 46.05780744909376, "learning_rate": 9.982776075558822e-06, "loss": 1.2172, "step": 8775 }, { "epoch": 0.37, "grad_norm": 74.16296669242696, "learning_rate": 9.982673824699178e-06, "loss": 1.245, "step": 8780 }, { "epoch": 0.37, "grad_norm": 45.764843173008884, "learning_rate": 9.982571271754696e-06, "loss": 1.2429, "step": 8785 }, { "epoch": 0.37, "grad_norm": 53.24053470535527, "learning_rate": 9.982468416731595e-06, "loss": 1.2552, "step": 8790 }, { "epoch": 0.37, "grad_norm": 27.331446935935848, "learning_rate": 9.982365259636109e-06, "loss": 1.2329, "step": 8795 }, { "epoch": 0.37, "grad_norm": 21.411223037335787, "learning_rate": 9.982261800474493e-06, "loss": 1.2223, "step": 8800 }, { "epoch": 0.37, "grad_norm": 17.64017648011196, "learning_rate": 9.98215803925302e-06, "loss": 1.2085, "step": 8805 }, { "epoch": 0.37, "grad_norm": 8.656967648553069, "learning_rate": 9.982053975977983e-06, "loss": 1.1984, "step": 8810 }, { "epoch": 0.37, "grad_norm": 6.252972648379149, "learning_rate": 9.981949610655685e-06, "loss": 1.2275, "step": 8815 }, { "epoch": 0.37, "grad_norm": 6.496446996490132, "learning_rate": 9.98184494329246e-06, "loss": 1.1854, "step": 8820 }, { "epoch": 0.37, "grad_norm": 11.380020725748448, "learning_rate": 9.981739973894649e-06, "loss": 1.2188, "step": 8825 }, { "epoch": 0.37, "grad_norm": 13.801606564301279, "learning_rate": 9.981634702468619e-06, "loss": 1.179, "step": 8830 }, { "epoch": 0.37, "grad_norm": 10.634426159659236, "learning_rate": 9.981529129020748e-06, "loss": 1.2104, "step": 8835 }, { "epoch": 0.37, "grad_norm": 6.448842060217998, "learning_rate": 9.981423253557441e-06, "loss": 1.2178, "step": 8840 }, { "epoch": 0.37, "grad_norm": 8.91483111537622, "learning_rate": 9.981317076085116e-06, "loss": 1.1853, "step": 8845 }, { "epoch": 0.37, "grad_norm": 7.894164928940903, "learning_rate": 9.981210596610209e-06, "loss": 1.2524, "step": 8850 }, { "epoch": 0.37, "grad_norm": 14.56533418155501, "learning_rate": 9.981103815139177e-06, "loss": 1.2328, "step": 8855 }, { "epoch": 0.37, "grad_norm": 23.480236730572294, "learning_rate": 9.980996731678492e-06, "loss": 1.2229, "step": 8860 }, { "epoch": 0.38, "grad_norm": 8.096746190639573, "learning_rate": 9.980889346234647e-06, "loss": 1.2436, "step": 8865 }, { "epoch": 0.38, "grad_norm": 10.167888811625206, "learning_rate": 9.980781658814153e-06, "loss": 1.1952, "step": 8870 }, { "epoch": 0.38, "grad_norm": 6.15643863428365, "learning_rate": 9.98067366942354e-06, "loss": 1.2394, "step": 8875 }, { "epoch": 0.38, "grad_norm": 7.635171481534623, "learning_rate": 9.980565378069351e-06, "loss": 1.2384, "step": 8880 }, { "epoch": 0.38, "grad_norm": 7.9089943342825455, "learning_rate": 9.980456784758155e-06, "loss": 1.1857, "step": 8885 }, { "epoch": 0.38, "grad_norm": 7.2258435060369575, "learning_rate": 9.980347889496535e-06, "loss": 1.227, "step": 8890 }, { "epoch": 0.38, "grad_norm": 7.606807741332121, "learning_rate": 9.980238692291092e-06, "loss": 1.2276, "step": 8895 }, { "epoch": 0.38, "grad_norm": 24.85900410879738, "learning_rate": 9.980129193148447e-06, "loss": 1.2489, "step": 8900 }, { "epoch": 0.38, "grad_norm": 12.428713981368675, "learning_rate": 9.980019392075237e-06, "loss": 1.1592, "step": 8905 }, { "epoch": 0.38, "grad_norm": 7.980839871296275, "learning_rate": 9.979909289078122e-06, "loss": 1.1661, "step": 8910 }, { "epoch": 0.38, "grad_norm": 13.313186641896175, "learning_rate": 9.979798884163777e-06, "loss": 1.2035, "step": 8915 }, { "epoch": 0.38, "grad_norm": 8.548352876330076, "learning_rate": 9.979688177338891e-06, "loss": 1.2039, "step": 8920 }, { "epoch": 0.38, "grad_norm": 8.12293553689391, "learning_rate": 9.97957716861018e-06, "loss": 1.2454, "step": 8925 }, { "epoch": 0.38, "grad_norm": 8.097587309196077, "learning_rate": 9.979465857984374e-06, "loss": 1.1947, "step": 8930 }, { "epoch": 0.38, "grad_norm": 6.829882468003256, "learning_rate": 9.97935424546822e-06, "loss": 1.2133, "step": 8935 }, { "epoch": 0.38, "grad_norm": 23.637873643294103, "learning_rate": 9.979242331068486e-06, "loss": 1.2483, "step": 8940 }, { "epoch": 0.38, "grad_norm": 8.449244133554773, "learning_rate": 9.979130114791956e-06, "loss": 1.2092, "step": 8945 }, { "epoch": 0.38, "grad_norm": 7.49330264158639, "learning_rate": 9.979017596645433e-06, "loss": 1.2071, "step": 8950 }, { "epoch": 0.38, "grad_norm": 16.026036396204606, "learning_rate": 9.97890477663574e-06, "loss": 1.1998, "step": 8955 }, { "epoch": 0.38, "grad_norm": 38.581175690273106, "learning_rate": 9.978791654769715e-06, "loss": 1.202, "step": 8960 }, { "epoch": 0.38, "grad_norm": 16.265219455304024, "learning_rate": 9.97867823105422e-06, "loss": 1.2264, "step": 8965 }, { "epoch": 0.38, "grad_norm": 34.11472475520768, "learning_rate": 9.978564505496127e-06, "loss": 1.2313, "step": 8970 }, { "epoch": 0.38, "grad_norm": 15.337839150110696, "learning_rate": 9.978450478102332e-06, "loss": 1.2064, "step": 8975 }, { "epoch": 0.38, "grad_norm": 18.227860649309974, "learning_rate": 9.97833614887975e-06, "loss": 1.2129, "step": 8980 }, { "epoch": 0.38, "grad_norm": 38.56136195651849, "learning_rate": 9.978221517835313e-06, "loss": 1.2209, "step": 8985 }, { "epoch": 0.38, "grad_norm": 8.615944511583525, "learning_rate": 9.978106584975968e-06, "loss": 1.2103, "step": 8990 }, { "epoch": 0.38, "grad_norm": 7.0190235325719375, "learning_rate": 9.977991350308685e-06, "loss": 1.1965, "step": 8995 }, { "epoch": 0.38, "grad_norm": 17.377226855422318, "learning_rate": 9.977875813840448e-06, "loss": 1.2246, "step": 9000 }, { "epoch": 0.38, "grad_norm": 6.4866811491150065, "learning_rate": 9.977759975578263e-06, "loss": 1.2314, "step": 9005 }, { "epoch": 0.38, "grad_norm": 30.561186684380058, "learning_rate": 9.977643835529154e-06, "loss": 1.1982, "step": 9010 }, { "epoch": 0.38, "grad_norm": 8.614377286199833, "learning_rate": 9.977527393700161e-06, "loss": 1.1769, "step": 9015 }, { "epoch": 0.38, "grad_norm": 13.648626162771638, "learning_rate": 9.977410650098345e-06, "loss": 1.2375, "step": 9020 }, { "epoch": 0.38, "grad_norm": 6.88166927988688, "learning_rate": 9.977293604730783e-06, "loss": 1.1966, "step": 9025 }, { "epoch": 0.38, "grad_norm": 6.355959328176522, "learning_rate": 9.977176257604569e-06, "loss": 1.2331, "step": 9030 }, { "epoch": 0.38, "grad_norm": 6.856032834593801, "learning_rate": 9.97705860872682e-06, "loss": 1.1486, "step": 9035 }, { "epoch": 0.38, "grad_norm": 35.17041070558963, "learning_rate": 9.97694065810467e-06, "loss": 1.1925, "step": 9040 }, { "epoch": 0.38, "grad_norm": 51.191372284353925, "learning_rate": 9.976822405745266e-06, "loss": 1.2622, "step": 9045 }, { "epoch": 0.38, "grad_norm": 44.731476925687744, "learning_rate": 9.976703851655777e-06, "loss": 1.2073, "step": 9050 }, { "epoch": 0.38, "grad_norm": 51.585680710726216, "learning_rate": 9.976584995843397e-06, "loss": 1.1665, "step": 9055 }, { "epoch": 0.38, "grad_norm": 74.17018898861281, "learning_rate": 9.976465838315326e-06, "loss": 1.2279, "step": 9060 }, { "epoch": 0.38, "grad_norm": 15.071347112116781, "learning_rate": 9.97634637907879e-06, "loss": 1.2556, "step": 9065 }, { "epoch": 0.38, "grad_norm": 15.740017956468256, "learning_rate": 9.976226618141032e-06, "loss": 1.216, "step": 9070 }, { "epoch": 0.38, "grad_norm": 24.379319275867136, "learning_rate": 9.97610655550931e-06, "loss": 1.2112, "step": 9075 }, { "epoch": 0.38, "grad_norm": 18.377575647372183, "learning_rate": 9.975986191190907e-06, "loss": 1.2016, "step": 9080 }, { "epoch": 0.38, "grad_norm": 12.094704476883008, "learning_rate": 9.975865525193117e-06, "loss": 1.2667, "step": 9085 }, { "epoch": 0.38, "grad_norm": 21.242836312515045, "learning_rate": 9.97574455752326e-06, "loss": 1.2677, "step": 9090 }, { "epoch": 0.38, "grad_norm": 17.88504325575899, "learning_rate": 9.975623288188663e-06, "loss": 1.2623, "step": 9095 }, { "epoch": 0.39, "grad_norm": 13.534679209993314, "learning_rate": 9.975501717196685e-06, "loss": 1.2321, "step": 9100 }, { "epoch": 0.39, "grad_norm": 30.52300065728163, "learning_rate": 9.975379844554693e-06, "loss": 1.233, "step": 9105 }, { "epoch": 0.39, "grad_norm": 38.665283899913156, "learning_rate": 9.975257670270075e-06, "loss": 1.2354, "step": 9110 }, { "epoch": 0.39, "grad_norm": 36.6414471320579, "learning_rate": 9.975135194350242e-06, "loss": 1.2214, "step": 9115 }, { "epoch": 0.39, "grad_norm": 32.84228839088985, "learning_rate": 9.975012416802616e-06, "loss": 1.2103, "step": 9120 }, { "epoch": 0.39, "grad_norm": 15.323335299056941, "learning_rate": 9.974889337634641e-06, "loss": 1.245, "step": 9125 }, { "epoch": 0.39, "grad_norm": 9.934028539857001, "learning_rate": 9.97476595685378e-06, "loss": 1.1905, "step": 9130 }, { "epoch": 0.39, "grad_norm": 15.674077430134009, "learning_rate": 9.974642274467514e-06, "loss": 1.2332, "step": 9135 }, { "epoch": 0.39, "grad_norm": 8.956130481943227, "learning_rate": 9.974518290483337e-06, "loss": 1.2256, "step": 9140 }, { "epoch": 0.39, "grad_norm": 10.718214599864837, "learning_rate": 9.974394004908772e-06, "loss": 1.1681, "step": 9145 }, { "epoch": 0.39, "grad_norm": 22.08091334930836, "learning_rate": 9.97426941775135e-06, "loss": 1.2137, "step": 9150 }, { "epoch": 0.39, "grad_norm": 25.003739736335447, "learning_rate": 9.974144529018624e-06, "loss": 1.2542, "step": 9155 }, { "epoch": 0.39, "grad_norm": 10.06498327456631, "learning_rate": 9.974019338718169e-06, "loss": 1.1996, "step": 9160 }, { "epoch": 0.39, "grad_norm": 6.305698985940414, "learning_rate": 9.973893846857571e-06, "loss": 1.2505, "step": 9165 }, { "epoch": 0.39, "grad_norm": 32.528895746257916, "learning_rate": 9.973768053444442e-06, "loss": 1.2005, "step": 9170 }, { "epoch": 0.39, "grad_norm": 8.681747307156694, "learning_rate": 9.973641958486406e-06, "loss": 1.2146, "step": 9175 }, { "epoch": 0.39, "grad_norm": 43.466986428302825, "learning_rate": 9.97351556199111e-06, "loss": 1.244, "step": 9180 }, { "epoch": 0.39, "grad_norm": 24.838146302228044, "learning_rate": 9.973388863966212e-06, "loss": 1.1969, "step": 9185 }, { "epoch": 0.39, "grad_norm": 18.117779674843764, "learning_rate": 9.9732618644194e-06, "loss": 1.2609, "step": 9190 }, { "epoch": 0.39, "grad_norm": 7.313913530776297, "learning_rate": 9.973134563358368e-06, "loss": 1.1797, "step": 9195 }, { "epoch": 0.39, "grad_norm": 7.817488959664177, "learning_rate": 9.973006960790838e-06, "loss": 1.1973, "step": 9200 }, { "epoch": 0.39, "grad_norm": 15.319552373347777, "learning_rate": 9.972879056724543e-06, "loss": 1.2126, "step": 9205 }, { "epoch": 0.39, "grad_norm": 9.863014477728541, "learning_rate": 9.97275085116724e-06, "loss": 1.2109, "step": 9210 }, { "epoch": 0.39, "grad_norm": 9.692998717379215, "learning_rate": 9.972622344126698e-06, "loss": 1.201, "step": 9215 }, { "epoch": 0.39, "grad_norm": 13.32763675096636, "learning_rate": 9.972493535610714e-06, "loss": 1.2042, "step": 9220 }, { "epoch": 0.39, "grad_norm": 16.761512287758755, "learning_rate": 9.972364425627093e-06, "loss": 1.2137, "step": 9225 }, { "epoch": 0.39, "grad_norm": 24.78386503786613, "learning_rate": 9.972235014183663e-06, "loss": 1.2106, "step": 9230 }, { "epoch": 0.39, "grad_norm": 10.916391574474108, "learning_rate": 9.97210530128827e-06, "loss": 1.2147, "step": 9235 }, { "epoch": 0.39, "grad_norm": 17.10123306781741, "learning_rate": 9.97197528694878e-06, "loss": 1.244, "step": 9240 }, { "epoch": 0.39, "grad_norm": 19.835980329009985, "learning_rate": 9.971844971173073e-06, "loss": 1.2327, "step": 9245 }, { "epoch": 0.39, "grad_norm": 36.644335544164925, "learning_rate": 9.97171435396905e-06, "loss": 1.1874, "step": 9250 }, { "epoch": 0.39, "grad_norm": 34.14895463364265, "learning_rate": 9.97158343534463e-06, "loss": 1.1941, "step": 9255 }, { "epoch": 0.39, "grad_norm": 14.07805329527793, "learning_rate": 9.97145221530775e-06, "loss": 1.1773, "step": 9260 }, { "epoch": 0.39, "grad_norm": 8.123960321358524, "learning_rate": 9.971320693866368e-06, "loss": 1.244, "step": 9265 }, { "epoch": 0.39, "grad_norm": 36.284677104994806, "learning_rate": 9.971188871028455e-06, "loss": 1.1978, "step": 9270 }, { "epoch": 0.39, "grad_norm": 23.821124025631523, "learning_rate": 9.971056746802004e-06, "loss": 1.2352, "step": 9275 }, { "epoch": 0.39, "grad_norm": 36.58726253370013, "learning_rate": 9.970924321195024e-06, "loss": 1.2086, "step": 9280 }, { "epoch": 0.39, "grad_norm": 27.682805783908606, "learning_rate": 9.970791594215547e-06, "loss": 1.2619, "step": 9285 }, { "epoch": 0.39, "grad_norm": 10.743290347808301, "learning_rate": 9.970658565871615e-06, "loss": 1.2008, "step": 9290 }, { "epoch": 0.39, "grad_norm": 8.412098045778697, "learning_rate": 9.970525236171297e-06, "loss": 1.2233, "step": 9295 }, { "epoch": 0.39, "grad_norm": 25.903932901089096, "learning_rate": 9.970391605122676e-06, "loss": 1.2391, "step": 9300 }, { "epoch": 0.39, "grad_norm": 7.922382831878737, "learning_rate": 9.970257672733852e-06, "loss": 1.216, "step": 9305 }, { "epoch": 0.39, "grad_norm": 15.014446775158502, "learning_rate": 9.970123439012946e-06, "loss": 1.1972, "step": 9310 }, { "epoch": 0.39, "grad_norm": 18.875799486524393, "learning_rate": 9.969988903968095e-06, "loss": 1.2416, "step": 9315 }, { "epoch": 0.39, "grad_norm": 14.636377736334097, "learning_rate": 9.969854067607456e-06, "loss": 1.226, "step": 9320 }, { "epoch": 0.39, "grad_norm": 12.341771794662598, "learning_rate": 9.969718929939205e-06, "loss": 1.2059, "step": 9325 }, { "epoch": 0.39, "grad_norm": 8.616163269366373, "learning_rate": 9.969583490971535e-06, "loss": 1.2015, "step": 9330 }, { "epoch": 0.4, "grad_norm": 10.245377853271005, "learning_rate": 9.969447750712655e-06, "loss": 1.2215, "step": 9335 }, { "epoch": 0.4, "grad_norm": 16.460400561065228, "learning_rate": 9.969311709170796e-06, "loss": 1.2048, "step": 9340 }, { "epoch": 0.4, "grad_norm": 13.400660646223276, "learning_rate": 9.969175366354204e-06, "loss": 1.2148, "step": 9345 }, { "epoch": 0.4, "grad_norm": 15.307860109074186, "learning_rate": 9.96903872227115e-06, "loss": 1.2117, "step": 9350 }, { "epoch": 0.4, "grad_norm": 11.560237957166326, "learning_rate": 9.968901776929913e-06, "loss": 1.2413, "step": 9355 }, { "epoch": 0.4, "grad_norm": 12.224681080050976, "learning_rate": 9.968764530338799e-06, "loss": 1.1956, "step": 9360 }, { "epoch": 0.4, "grad_norm": 92.6125976287972, "learning_rate": 9.968626982506127e-06, "loss": 1.2209, "step": 9365 }, { "epoch": 0.4, "grad_norm": 48.374082869780395, "learning_rate": 9.968489133440235e-06, "loss": 1.2498, "step": 9370 }, { "epoch": 0.4, "grad_norm": 51.47521895669246, "learning_rate": 9.968350983149484e-06, "loss": 1.228, "step": 9375 }, { "epoch": 0.4, "grad_norm": 39.49127084087763, "learning_rate": 9.968212531642247e-06, "loss": 1.1937, "step": 9380 }, { "epoch": 0.4, "grad_norm": 71.86835692271305, "learning_rate": 9.968073778926919e-06, "loss": 1.2229, "step": 9385 }, { "epoch": 0.4, "grad_norm": 47.10702971907258, "learning_rate": 9.967934725011911e-06, "loss": 1.2573, "step": 9390 }, { "epoch": 0.4, "grad_norm": 61.89525407565561, "learning_rate": 9.967795369905654e-06, "loss": 1.2283, "step": 9395 }, { "epoch": 0.4, "grad_norm": 8.992972884755579, "learning_rate": 9.967655713616598e-06, "loss": 1.2561, "step": 9400 }, { "epoch": 0.4, "grad_norm": 16.67991763705181, "learning_rate": 9.967515756153207e-06, "loss": 1.2726, "step": 9405 }, { "epoch": 0.4, "grad_norm": 8.497996936958751, "learning_rate": 9.967375497523969e-06, "loss": 1.2193, "step": 9410 }, { "epoch": 0.4, "grad_norm": 25.499726306243453, "learning_rate": 9.967234937737387e-06, "loss": 1.2312, "step": 9415 }, { "epoch": 0.4, "grad_norm": 7.283297652763751, "learning_rate": 9.967094076801982e-06, "loss": 1.1984, "step": 9420 }, { "epoch": 0.4, "grad_norm": 6.781712508453821, "learning_rate": 9.966952914726294e-06, "loss": 1.2253, "step": 9425 }, { "epoch": 0.4, "grad_norm": 15.504400914115953, "learning_rate": 9.96681145151888e-06, "loss": 1.2019, "step": 9430 }, { "epoch": 0.4, "grad_norm": 17.823813006269976, "learning_rate": 9.966669687188319e-06, "loss": 1.187, "step": 9435 }, { "epoch": 0.4, "grad_norm": 59.731765636649634, "learning_rate": 9.966527621743206e-06, "loss": 1.2216, "step": 9440 }, { "epoch": 0.4, "grad_norm": 23.48687006658772, "learning_rate": 9.96638525519215e-06, "loss": 1.222, "step": 9445 }, { "epoch": 0.4, "grad_norm": 36.304092430688456, "learning_rate": 9.966242587543787e-06, "loss": 1.2238, "step": 9450 }, { "epoch": 0.4, "grad_norm": 16.035660789309322, "learning_rate": 9.966099618806764e-06, "loss": 1.2561, "step": 9455 }, { "epoch": 0.4, "grad_norm": 133.60400532452636, "learning_rate": 9.96595634898975e-06, "loss": 1.5928, "step": 9460 }, { "epoch": 0.4, "grad_norm": 1246.3639950308245, "learning_rate": 9.96581277810143e-06, "loss": 3.5181, "step": 9465 }, { "epoch": 0.4, "grad_norm": 1564.101183261985, "learning_rate": 9.965668906150508e-06, "loss": 13.1298, "step": 9470 }, { "epoch": 0.4, "grad_norm": 648.1757424480604, "learning_rate": 9.965524733145705e-06, "loss": 31.2339, "step": 9475 }, { "epoch": 0.4, "grad_norm": 435.7401949488054, "learning_rate": 9.965380259095767e-06, "loss": 22.4813, "step": 9480 }, { "epoch": 0.4, "grad_norm": 207.38204420943876, "learning_rate": 9.965235484009449e-06, "loss": 15.5626, "step": 9485 }, { "epoch": 0.4, "grad_norm": 131.22620060772448, "learning_rate": 9.965090407895529e-06, "loss": 13.1922, "step": 9490 }, { "epoch": 0.4, "grad_norm": 170.61189718333307, "learning_rate": 9.964945030762803e-06, "loss": 10.9917, "step": 9495 }, { "epoch": 0.4, "grad_norm": 154.81543956380625, "learning_rate": 9.964799352620084e-06, "loss": 9.926, "step": 9500 }, { "epoch": 0.4, "grad_norm": 41.39714851341611, "learning_rate": 9.964653373476206e-06, "loss": 9.0876, "step": 9505 }, { "epoch": 0.4, "grad_norm": 45.46295761377743, "learning_rate": 9.964507093340015e-06, "loss": 8.4916, "step": 9510 }, { "epoch": 0.4, "grad_norm": 45.630639893960875, "learning_rate": 9.964360512220384e-06, "loss": 8.2392, "step": 9515 }, { "epoch": 0.4, "grad_norm": 28.34539297347392, "learning_rate": 9.964213630126198e-06, "loss": 7.8186, "step": 9520 }, { "epoch": 0.4, "grad_norm": 51.916435761534224, "learning_rate": 9.964066447066363e-06, "loss": 7.7061, "step": 9525 }, { "epoch": 0.4, "grad_norm": 37.494201471517435, "learning_rate": 9.963918963049802e-06, "loss": 7.4171, "step": 9530 }, { "epoch": 0.4, "grad_norm": 23.851659564101183, "learning_rate": 9.963771178085453e-06, "loss": 7.3364, "step": 9535 }, { "epoch": 0.4, "grad_norm": 40.157847317787926, "learning_rate": 9.963623092182282e-06, "loss": 7.1963, "step": 9540 }, { "epoch": 0.4, "grad_norm": 25.138830161265012, "learning_rate": 9.963474705349261e-06, "loss": 6.9885, "step": 9545 }, { "epoch": 0.4, "grad_norm": 89.29539425932496, "learning_rate": 9.963326017595391e-06, "loss": 6.9163, "step": 9550 }, { "epoch": 0.4, "grad_norm": 42.64818735240843, "learning_rate": 9.963177028929684e-06, "loss": 6.9437, "step": 9555 }, { "epoch": 0.4, "grad_norm": 35.26096380157238, "learning_rate": 9.963027739361173e-06, "loss": 6.8422, "step": 9560 }, { "epoch": 0.4, "grad_norm": 251.2796965724704, "learning_rate": 9.96287814889891e-06, "loss": 6.8281, "step": 9565 }, { "epoch": 0.41, "grad_norm": 130.01734608880602, "learning_rate": 9.962728257551962e-06, "loss": 6.8942, "step": 9570 }, { "epoch": 0.41, "grad_norm": 54.406652980621814, "learning_rate": 9.962578065329418e-06, "loss": 6.6734, "step": 9575 }, { "epoch": 0.41, "grad_norm": 31.478822197942467, "learning_rate": 9.962427572240386e-06, "loss": 6.6376, "step": 9580 }, { "epoch": 0.41, "grad_norm": 19.134982938702933, "learning_rate": 9.962276778293985e-06, "loss": 6.5207, "step": 9585 }, { "epoch": 0.41, "grad_norm": 75.0329054930933, "learning_rate": 9.96212568349936e-06, "loss": 6.5194, "step": 9590 }, { "epoch": 0.41, "grad_norm": 69.17552242907516, "learning_rate": 9.961974287865673e-06, "loss": 6.4638, "step": 9595 }, { "epoch": 0.41, "grad_norm": 26.344162018491446, "learning_rate": 9.961822591402099e-06, "loss": 6.4512, "step": 9600 }, { "epoch": 0.41, "grad_norm": 64.74865605741786, "learning_rate": 9.961670594117838e-06, "loss": 6.4404, "step": 9605 }, { "epoch": 0.41, "grad_norm": 133.72320086340795, "learning_rate": 9.961518296022103e-06, "loss": 6.367, "step": 9610 }, { "epoch": 0.41, "grad_norm": 85.25682159143611, "learning_rate": 9.96136569712413e-06, "loss": 6.3148, "step": 9615 }, { "epoch": 0.41, "grad_norm": 76.04932912541877, "learning_rate": 9.961212797433168e-06, "loss": 6.3086, "step": 9620 }, { "epoch": 0.41, "grad_norm": 47.48107198192422, "learning_rate": 9.961059596958487e-06, "loss": 6.0329, "step": 9625 }, { "epoch": 0.41, "grad_norm": 560.7905988781308, "learning_rate": 9.960906095709378e-06, "loss": 7.3474, "step": 9630 }, { "epoch": 0.41, "grad_norm": 107.42656906943576, "learning_rate": 9.960752293695142e-06, "loss": 8.1501, "step": 9635 }, { "epoch": 0.41, "grad_norm": 140.85335787901693, "learning_rate": 9.96059819092511e-06, "loss": 7.6273, "step": 9640 }, { "epoch": 0.41, "grad_norm": 76.80491095184355, "learning_rate": 9.96044378740862e-06, "loss": 7.2934, "step": 9645 }, { "epoch": 0.41, "grad_norm": 88.51053018950759, "learning_rate": 9.960289083155035e-06, "loss": 7.1166, "step": 9650 }, { "epoch": 0.41, "grad_norm": 86.82587076796945, "learning_rate": 9.960134078173734e-06, "loss": 6.9439, "step": 9655 }, { "epoch": 0.41, "grad_norm": 80.49749753416499, "learning_rate": 9.959978772474113e-06, "loss": 6.7618, "step": 9660 }, { "epoch": 0.41, "grad_norm": 82.71371750955271, "learning_rate": 9.959823166065592e-06, "loss": 6.6756, "step": 9665 }, { "epoch": 0.41, "grad_norm": 44.079068344126675, "learning_rate": 9.9596672589576e-06, "loss": 6.5916, "step": 9670 }, { "epoch": 0.41, "grad_norm": 34.73313355427132, "learning_rate": 9.95951105115959e-06, "loss": 6.5831, "step": 9675 }, { "epoch": 0.41, "grad_norm": 34.90671996650152, "learning_rate": 9.959354542681037e-06, "loss": 6.3289, "step": 9680 }, { "epoch": 0.41, "grad_norm": 32.08593072464212, "learning_rate": 9.959197733531425e-06, "loss": 6.5721, "step": 9685 }, { "epoch": 0.41, "grad_norm": 81.5408683228983, "learning_rate": 9.959040623720261e-06, "loss": 6.4341, "step": 9690 }, { "epoch": 0.41, "grad_norm": 18.9408354844983, "learning_rate": 9.958883213257074e-06, "loss": 6.3347, "step": 9695 }, { "epoch": 0.41, "grad_norm": 18.890746090474316, "learning_rate": 9.958725502151402e-06, "loss": 6.2115, "step": 9700 }, { "epoch": 0.41, "grad_norm": 18.216768757550874, "learning_rate": 9.95856749041281e-06, "loss": 6.2049, "step": 9705 }, { "epoch": 0.41, "grad_norm": 26.134445645789967, "learning_rate": 9.958409178050878e-06, "loss": 6.134, "step": 9710 }, { "epoch": 0.41, "grad_norm": 27.01987844611143, "learning_rate": 9.958250565075202e-06, "loss": 6.1552, "step": 9715 }, { "epoch": 0.41, "grad_norm": 29.999863123436292, "learning_rate": 9.958091651495399e-06, "loss": 6.0328, "step": 9720 }, { "epoch": 0.41, "grad_norm": 31.27345738732662, "learning_rate": 9.957932437321104e-06, "loss": 5.9652, "step": 9725 }, { "epoch": 0.41, "grad_norm": 42.5058427296302, "learning_rate": 9.95777292256197e-06, "loss": 5.8758, "step": 9730 }, { "epoch": 0.41, "grad_norm": 57.62779698359081, "learning_rate": 9.957613107227666e-06, "loss": 5.8712, "step": 9735 }, { "epoch": 0.41, "grad_norm": 52.777051997228845, "learning_rate": 9.957452991327882e-06, "loss": 5.7757, "step": 9740 }, { "epoch": 0.41, "grad_norm": 166.2712541342479, "learning_rate": 9.957292574872329e-06, "loss": 5.8184, "step": 9745 }, { "epoch": 0.41, "grad_norm": 82.29460628475253, "learning_rate": 9.957131857870728e-06, "loss": 6.0833, "step": 9750 }, { "epoch": 0.41, "grad_norm": 38.91824140406657, "learning_rate": 9.956970840332823e-06, "loss": 5.8787, "step": 9755 }, { "epoch": 0.41, "grad_norm": 72.19171063211087, "learning_rate": 9.956809522268378e-06, "loss": 5.7328, "step": 9760 }, { "epoch": 0.41, "grad_norm": 125.33977216827597, "learning_rate": 9.95664790368717e-06, "loss": 5.5346, "step": 9765 }, { "epoch": 0.41, "grad_norm": 42.511155139928945, "learning_rate": 9.956485984599003e-06, "loss": 5.4775, "step": 9770 }, { "epoch": 0.41, "grad_norm": 92.39804911136169, "learning_rate": 9.95632376501369e-06, "loss": 5.3655, "step": 9775 }, { "epoch": 0.41, "grad_norm": 270.82508199954304, "learning_rate": 9.956161244941067e-06, "loss": 5.1565, "step": 9780 }, { "epoch": 0.41, "grad_norm": 105.35039052278564, "learning_rate": 9.955998424390985e-06, "loss": 5.1373, "step": 9785 }, { "epoch": 0.41, "grad_norm": 230.6278558606637, "learning_rate": 9.955835303373318e-06, "loss": 4.9518, "step": 9790 }, { "epoch": 0.41, "grad_norm": 57.15879498637197, "learning_rate": 9.955671881897954e-06, "loss": 4.8838, "step": 9795 }, { "epoch": 0.41, "grad_norm": 62.03105504704029, "learning_rate": 9.9555081599748e-06, "loss": 4.4759, "step": 9800 }, { "epoch": 0.41, "grad_norm": 164.88866486764255, "learning_rate": 9.955344137613785e-06, "loss": 4.0557, "step": 9805 }, { "epoch": 0.42, "grad_norm": 76.10018504744276, "learning_rate": 9.955179814824852e-06, "loss": 3.7505, "step": 9810 }, { "epoch": 0.42, "grad_norm": 123.94440081408953, "learning_rate": 9.95501519161796e-06, "loss": 3.4826, "step": 9815 }, { "epoch": 0.42, "grad_norm": 23.852795754122074, "learning_rate": 9.954850268003095e-06, "loss": 3.0169, "step": 9820 }, { "epoch": 0.42, "grad_norm": 58.00109644180814, "learning_rate": 9.954685043990252e-06, "loss": 2.7229, "step": 9825 }, { "epoch": 0.42, "grad_norm": 45.564132396663126, "learning_rate": 9.954519519589449e-06, "loss": 2.4982, "step": 9830 }, { "epoch": 0.42, "grad_norm": 89.10698141972962, "learning_rate": 9.954353694810722e-06, "loss": 2.4265, "step": 9835 }, { "epoch": 0.42, "grad_norm": 13.89724085652203, "learning_rate": 9.954187569664124e-06, "loss": 2.2177, "step": 9840 }, { "epoch": 0.42, "grad_norm": 16.38689386273016, "learning_rate": 9.954021144159726e-06, "loss": 2.1623, "step": 9845 }, { "epoch": 0.42, "grad_norm": 64.54936960834726, "learning_rate": 9.95385441830762e-06, "loss": 1.8974, "step": 9850 }, { "epoch": 0.42, "grad_norm": 26.561861795726873, "learning_rate": 9.953687392117913e-06, "loss": 1.7854, "step": 9855 }, { "epoch": 0.42, "grad_norm": 32.04597088306472, "learning_rate": 9.95352006560073e-06, "loss": 1.9033, "step": 9860 }, { "epoch": 0.42, "grad_norm": 33.74923644329989, "learning_rate": 9.953352438766217e-06, "loss": 1.9048, "step": 9865 }, { "epoch": 0.42, "grad_norm": 88.22758671179918, "learning_rate": 9.953184511624535e-06, "loss": 1.8914, "step": 9870 }, { "epoch": 0.42, "grad_norm": 48.07428025503431, "learning_rate": 9.953016284185867e-06, "loss": 1.847, "step": 9875 }, { "epoch": 0.42, "grad_norm": 27.518829521750558, "learning_rate": 9.952847756460411e-06, "loss": 1.6421, "step": 9880 }, { "epoch": 0.42, "grad_norm": 70.77074742148697, "learning_rate": 9.952678928458384e-06, "loss": 1.6483, "step": 9885 }, { "epoch": 0.42, "grad_norm": 22.329833525195394, "learning_rate": 9.952509800190023e-06, "loss": 1.5452, "step": 9890 }, { "epoch": 0.42, "grad_norm": 22.527894812873893, "learning_rate": 9.952340371665581e-06, "loss": 1.5635, "step": 9895 }, { "epoch": 0.42, "grad_norm": 30.679732319240273, "learning_rate": 9.952170642895331e-06, "loss": 1.5152, "step": 9900 }, { "epoch": 0.42, "grad_norm": 15.910460973885947, "learning_rate": 9.95200061388956e-06, "loss": 1.4791, "step": 9905 }, { "epoch": 0.42, "grad_norm": 15.673746860152601, "learning_rate": 9.951830284658579e-06, "loss": 1.4556, "step": 9910 }, { "epoch": 0.42, "grad_norm": 20.242012716298134, "learning_rate": 9.951659655212713e-06, "loss": 1.5105, "step": 9915 }, { "epoch": 0.42, "grad_norm": 10.390724594801506, "learning_rate": 9.951488725562309e-06, "loss": 1.4504, "step": 9920 }, { "epoch": 0.42, "grad_norm": 8.077366181486358, "learning_rate": 9.951317495717729e-06, "loss": 1.4336, "step": 9925 }, { "epoch": 0.42, "grad_norm": 8.71516293169589, "learning_rate": 9.951145965689353e-06, "loss": 1.3927, "step": 9930 }, { "epoch": 0.42, "grad_norm": 12.01714917542507, "learning_rate": 9.950974135487581e-06, "loss": 1.3778, "step": 9935 }, { "epoch": 0.42, "grad_norm": 18.000653886509415, "learning_rate": 9.950802005122832e-06, "loss": 1.3947, "step": 9940 }, { "epoch": 0.42, "grad_norm": 13.401114901020838, "learning_rate": 9.950629574605539e-06, "loss": 1.365, "step": 9945 }, { "epoch": 0.42, "grad_norm": 7.594615964471209, "learning_rate": 9.95045684394616e-06, "loss": 1.2829, "step": 9950 }, { "epoch": 0.42, "grad_norm": 9.144439761709952, "learning_rate": 9.950283813155162e-06, "loss": 1.3727, "step": 9955 }, { "epoch": 0.42, "grad_norm": 7.608225881956869, "learning_rate": 9.950110482243037e-06, "loss": 1.3639, "step": 9960 }, { "epoch": 0.42, "grad_norm": 7.918362963941835, "learning_rate": 9.949936851220297e-06, "loss": 1.4023, "step": 9965 }, { "epoch": 0.42, "grad_norm": 6.838873323810942, "learning_rate": 9.949762920097466e-06, "loss": 1.2959, "step": 9970 }, { "epoch": 0.42, "grad_norm": 7.2778845793516975, "learning_rate": 9.949588688885088e-06, "loss": 1.3927, "step": 9975 }, { "epoch": 0.42, "grad_norm": 6.547618869449657, "learning_rate": 9.949414157593726e-06, "loss": 1.3341, "step": 9980 }, { "epoch": 0.42, "grad_norm": 11.497332697543262, "learning_rate": 9.949239326233967e-06, "loss": 1.3462, "step": 9985 }, { "epoch": 0.42, "grad_norm": 6.97726774216965, "learning_rate": 9.949064194816403e-06, "loss": 1.3584, "step": 9990 }, { "epoch": 0.42, "grad_norm": 9.34222557050974, "learning_rate": 9.948888763351655e-06, "loss": 1.3885, "step": 9995 }, { "epoch": 0.42, "grad_norm": 14.622687670045263, "learning_rate": 9.948713031850359e-06, "loss": 1.363, "step": 10000 }, { "epoch": 0.42, "grad_norm": 23.099097828415495, "learning_rate": 9.948537000323168e-06, "loss": 1.2767, "step": 10005 }, { "epoch": 0.42, "grad_norm": 10.171415356852316, "learning_rate": 9.948360668780757e-06, "loss": 1.3293, "step": 10010 }, { "epoch": 0.42, "grad_norm": 6.526871210026702, "learning_rate": 9.948184037233812e-06, "loss": 1.3387, "step": 10015 }, { "epoch": 0.42, "grad_norm": 10.064163367669915, "learning_rate": 9.948007105693047e-06, "loss": 1.31, "step": 10020 }, { "epoch": 0.42, "grad_norm": 14.252667667761068, "learning_rate": 9.947829874169183e-06, "loss": 1.3373, "step": 10025 }, { "epoch": 0.42, "grad_norm": 18.30752951354965, "learning_rate": 9.947652342672969e-06, "loss": 1.2854, "step": 10030 }, { "epoch": 0.42, "grad_norm": 16.352958613582345, "learning_rate": 9.94747451121517e-06, "loss": 1.3213, "step": 10035 }, { "epoch": 0.42, "grad_norm": 12.29479058175808, "learning_rate": 9.947296379806561e-06, "loss": 1.2825, "step": 10040 }, { "epoch": 0.43, "grad_norm": 13.01481681442109, "learning_rate": 9.947117948457948e-06, "loss": 1.2957, "step": 10045 }, { "epoch": 0.43, "grad_norm": 7.300700150619039, "learning_rate": 9.946939217180143e-06, "loss": 1.2922, "step": 10050 }, { "epoch": 0.43, "grad_norm": 20.127238513405484, "learning_rate": 9.946760185983988e-06, "loss": 1.2786, "step": 10055 }, { "epoch": 0.43, "grad_norm": 16.126504988198615, "learning_rate": 9.946580854880332e-06, "loss": 1.2778, "step": 10060 }, { "epoch": 0.43, "grad_norm": 23.495416520452515, "learning_rate": 9.94640122388005e-06, "loss": 1.2576, "step": 10065 }, { "epoch": 0.43, "grad_norm": 26.79450746953665, "learning_rate": 9.946221292994033e-06, "loss": 1.2895, "step": 10070 }, { "epoch": 0.43, "grad_norm": 12.657243521737724, "learning_rate": 9.946041062233188e-06, "loss": 1.2628, "step": 10075 }, { "epoch": 0.43, "grad_norm": 18.45985948516765, "learning_rate": 9.945860531608442e-06, "loss": 1.2894, "step": 10080 }, { "epoch": 0.43, "grad_norm": 23.609942320006592, "learning_rate": 9.945679701130739e-06, "loss": 1.2995, "step": 10085 }, { "epoch": 0.43, "grad_norm": 11.477147046333467, "learning_rate": 9.945498570811045e-06, "loss": 1.2799, "step": 10090 }, { "epoch": 0.43, "grad_norm": 14.371002370930244, "learning_rate": 9.945317140660342e-06, "loss": 1.33, "step": 10095 }, { "epoch": 0.43, "grad_norm": 7.251357492721981, "learning_rate": 9.945135410689626e-06, "loss": 1.2775, "step": 10100 }, { "epoch": 0.43, "grad_norm": 7.784609324227422, "learning_rate": 9.944953380909917e-06, "loss": 1.2835, "step": 10105 }, { "epoch": 0.43, "grad_norm": 7.5387534223853185, "learning_rate": 9.944771051332249e-06, "loss": 1.2862, "step": 10110 }, { "epoch": 0.43, "grad_norm": 6.798370850984581, "learning_rate": 9.944588421967677e-06, "loss": 1.2451, "step": 10115 }, { "epoch": 0.43, "grad_norm": 6.34992074620122, "learning_rate": 9.944405492827277e-06, "loss": 1.2479, "step": 10120 }, { "epoch": 0.43, "grad_norm": 11.835843580870858, "learning_rate": 9.944222263922135e-06, "loss": 1.2436, "step": 10125 }, { "epoch": 0.43, "grad_norm": 20.53673234569956, "learning_rate": 9.944038735263359e-06, "loss": 1.2684, "step": 10130 }, { "epoch": 0.43, "grad_norm": 18.354472401129364, "learning_rate": 9.943854906862078e-06, "loss": 1.293, "step": 10135 }, { "epoch": 0.43, "grad_norm": 12.749836491121126, "learning_rate": 9.943670778729438e-06, "loss": 1.2302, "step": 10140 }, { "epoch": 0.43, "grad_norm": 7.777341395366744, "learning_rate": 9.943486350876599e-06, "loss": 1.249, "step": 10145 }, { "epoch": 0.43, "grad_norm": 11.779285606737107, "learning_rate": 9.943301623314745e-06, "loss": 1.2554, "step": 10150 }, { "epoch": 0.43, "grad_norm": 25.19685136713185, "learning_rate": 9.943116596055076e-06, "loss": 1.2424, "step": 10155 }, { "epoch": 0.43, "grad_norm": 8.531134343319577, "learning_rate": 9.942931269108807e-06, "loss": 1.2701, "step": 10160 }, { "epoch": 0.43, "grad_norm": 22.493845189413534, "learning_rate": 9.942745642487174e-06, "loss": 1.2611, "step": 10165 }, { "epoch": 0.43, "grad_norm": 9.022938427879689, "learning_rate": 9.942559716201432e-06, "loss": 1.2111, "step": 10170 }, { "epoch": 0.43, "grad_norm": 5.996480938568168, "learning_rate": 9.942373490262856e-06, "loss": 1.2027, "step": 10175 }, { "epoch": 0.43, "grad_norm": 6.126233348452894, "learning_rate": 9.94218696468273e-06, "loss": 1.196, "step": 10180 }, { "epoch": 0.43, "grad_norm": 7.269249382801538, "learning_rate": 9.942000139472368e-06, "loss": 1.2359, "step": 10185 }, { "epoch": 0.43, "grad_norm": 5.743907405383896, "learning_rate": 9.941813014643093e-06, "loss": 1.2303, "step": 10190 }, { "epoch": 0.43, "grad_norm": 20.409102476243174, "learning_rate": 9.941625590206254e-06, "loss": 1.2467, "step": 10195 }, { "epoch": 0.43, "grad_norm": 18.379766589318194, "learning_rate": 9.94143786617321e-06, "loss": 1.2728, "step": 10200 }, { "epoch": 0.43, "grad_norm": 29.223060156588254, "learning_rate": 9.941249842555344e-06, "loss": 1.2467, "step": 10205 }, { "epoch": 0.43, "grad_norm": 14.571588802146547, "learning_rate": 9.941061519364054e-06, "loss": 1.2114, "step": 10210 }, { "epoch": 0.43, "grad_norm": 6.506940018892727, "learning_rate": 9.94087289661076e-06, "loss": 1.2424, "step": 10215 }, { "epoch": 0.43, "grad_norm": 14.040622817211133, "learning_rate": 9.940683974306896e-06, "loss": 1.2287, "step": 10220 }, { "epoch": 0.43, "grad_norm": 7.128572303196038, "learning_rate": 9.940494752463917e-06, "loss": 1.2209, "step": 10225 }, { "epoch": 0.43, "grad_norm": 12.880363564274072, "learning_rate": 9.940305231093291e-06, "loss": 1.2293, "step": 10230 }, { "epoch": 0.43, "grad_norm": 7.2771416985494914, "learning_rate": 9.940115410206512e-06, "loss": 1.2518, "step": 10235 }, { "epoch": 0.43, "grad_norm": 6.486797815515412, "learning_rate": 9.939925289815087e-06, "loss": 1.2335, "step": 10240 }, { "epoch": 0.43, "grad_norm": 14.069423242264826, "learning_rate": 9.939734869930544e-06, "loss": 1.2441, "step": 10245 }, { "epoch": 0.43, "grad_norm": 13.42878969194725, "learning_rate": 9.939544150564425e-06, "loss": 1.2234, "step": 10250 }, { "epoch": 0.43, "grad_norm": 8.924472816865137, "learning_rate": 9.939353131728295e-06, "loss": 1.2169, "step": 10255 }, { "epoch": 0.43, "grad_norm": 31.972436499585463, "learning_rate": 9.939161813433733e-06, "loss": 1.2647, "step": 10260 }, { "epoch": 0.43, "grad_norm": 31.627258030667768, "learning_rate": 9.93897019569234e-06, "loss": 1.2107, "step": 10265 }, { "epoch": 0.43, "grad_norm": 54.57097622714503, "learning_rate": 9.938778278515733e-06, "loss": 1.2562, "step": 10270 }, { "epoch": 0.43, "grad_norm": 21.665738941858674, "learning_rate": 9.938586061915545e-06, "loss": 1.211, "step": 10275 }, { "epoch": 0.44, "grad_norm": 10.0504571677781, "learning_rate": 9.938393545903431e-06, "loss": 1.1989, "step": 10280 }, { "epoch": 0.44, "grad_norm": 6.359550985591298, "learning_rate": 9.938200730491064e-06, "loss": 1.212, "step": 10285 }, { "epoch": 0.44, "grad_norm": 12.139990522598348, "learning_rate": 9.938007615690133e-06, "loss": 1.2356, "step": 10290 }, { "epoch": 0.44, "grad_norm": 9.328557260145226, "learning_rate": 9.937814201512343e-06, "loss": 1.2207, "step": 10295 }, { "epoch": 0.44, "grad_norm": 6.353832565514606, "learning_rate": 9.937620487969426e-06, "loss": 1.2234, "step": 10300 }, { "epoch": 0.44, "grad_norm": 10.055326277381491, "learning_rate": 9.93742647507312e-06, "loss": 1.1901, "step": 10305 }, { "epoch": 0.44, "grad_norm": 9.51592292175932, "learning_rate": 9.937232162835194e-06, "loss": 1.2267, "step": 10310 }, { "epoch": 0.44, "grad_norm": 6.588113392157487, "learning_rate": 9.93703755126742e-06, "loss": 1.1771, "step": 10315 }, { "epoch": 0.44, "grad_norm": 8.226208415684464, "learning_rate": 9.936842640381607e-06, "loss": 1.1875, "step": 10320 }, { "epoch": 0.44, "grad_norm": 10.603391632253025, "learning_rate": 9.936647430189563e-06, "loss": 1.2562, "step": 10325 }, { "epoch": 0.44, "grad_norm": 7.950184690178073, "learning_rate": 9.93645192070313e-06, "loss": 1.1953, "step": 10330 }, { "epoch": 0.44, "grad_norm": 9.765367393192978, "learning_rate": 9.936256111934155e-06, "loss": 1.2267, "step": 10335 }, { "epoch": 0.44, "grad_norm": 5.4761024012374735, "learning_rate": 9.936060003894514e-06, "loss": 1.2096, "step": 10340 }, { "epoch": 0.44, "grad_norm": 6.081180503907223, "learning_rate": 9.935863596596094e-06, "loss": 1.2242, "step": 10345 }, { "epoch": 0.44, "grad_norm": 7.787545341346834, "learning_rate": 9.935666890050802e-06, "loss": 1.2399, "step": 10350 }, { "epoch": 0.44, "grad_norm": 6.032648729682437, "learning_rate": 9.935469884270566e-06, "loss": 1.2055, "step": 10355 }, { "epoch": 0.44, "grad_norm": 9.62949883737289, "learning_rate": 9.935272579267328e-06, "loss": 1.1796, "step": 10360 }, { "epoch": 0.44, "grad_norm": 5.5222261312219745, "learning_rate": 9.935074975053053e-06, "loss": 1.2745, "step": 10365 }, { "epoch": 0.44, "grad_norm": 8.163099772861981, "learning_rate": 9.934877071639716e-06, "loss": 1.2093, "step": 10370 }, { "epoch": 0.44, "grad_norm": 6.613481196334059, "learning_rate": 9.93467886903932e-06, "loss": 1.2463, "step": 10375 }, { "epoch": 0.44, "grad_norm": 8.931685390895252, "learning_rate": 9.93448036726388e-06, "loss": 1.2018, "step": 10380 }, { "epoch": 0.44, "grad_norm": 7.975094818793018, "learning_rate": 9.934281566325431e-06, "loss": 1.19, "step": 10385 }, { "epoch": 0.44, "grad_norm": 5.298937810576603, "learning_rate": 9.934082466236024e-06, "loss": 1.2056, "step": 10390 }, { "epoch": 0.44, "grad_norm": 7.161477414177748, "learning_rate": 9.93388306700773e-06, "loss": 1.2265, "step": 10395 }, { "epoch": 0.44, "grad_norm": 10.388915051137957, "learning_rate": 9.93368336865264e-06, "loss": 1.1905, "step": 10400 }, { "epoch": 0.44, "grad_norm": 9.427422972855142, "learning_rate": 9.93348337118286e-06, "loss": 1.1469, "step": 10405 }, { "epoch": 0.44, "grad_norm": 7.127322135473031, "learning_rate": 9.933283074610516e-06, "loss": 1.1783, "step": 10410 }, { "epoch": 0.44, "grad_norm": 6.070302630294536, "learning_rate": 9.933082478947749e-06, "loss": 1.2091, "step": 10415 }, { "epoch": 0.44, "grad_norm": 5.167061128334721, "learning_rate": 9.932881584206722e-06, "loss": 1.2058, "step": 10420 }, { "epoch": 0.44, "grad_norm": 5.671128985805976, "learning_rate": 9.932680390399616e-06, "loss": 1.2503, "step": 10425 }, { "epoch": 0.44, "grad_norm": 9.428469323767652, "learning_rate": 9.932478897538628e-06, "loss": 1.2091, "step": 10430 }, { "epoch": 0.44, "grad_norm": 9.027737668250325, "learning_rate": 9.932277105635973e-06, "loss": 1.2004, "step": 10435 }, { "epoch": 0.44, "grad_norm": 17.639585985612662, "learning_rate": 9.932075014703886e-06, "loss": 1.2108, "step": 10440 }, { "epoch": 0.44, "grad_norm": 12.846812907766918, "learning_rate": 9.931872624754618e-06, "loss": 1.1871, "step": 10445 }, { "epoch": 0.44, "grad_norm": 8.769758325038678, "learning_rate": 9.93166993580044e-06, "loss": 1.2511, "step": 10450 }, { "epoch": 0.44, "grad_norm": 7.353791888476142, "learning_rate": 9.93146694785364e-06, "loss": 1.215, "step": 10455 }, { "epoch": 0.44, "grad_norm": 16.6760296090444, "learning_rate": 9.931263660926527e-06, "loss": 1.2557, "step": 10460 }, { "epoch": 0.44, "grad_norm": 14.307014761297724, "learning_rate": 9.931060075031423e-06, "loss": 1.2008, "step": 10465 }, { "epoch": 0.44, "grad_norm": 12.395314637904972, "learning_rate": 9.93085619018067e-06, "loss": 1.1386, "step": 10470 }, { "epoch": 0.44, "grad_norm": 5.765312547763097, "learning_rate": 9.930652006386632e-06, "loss": 1.1823, "step": 10475 }, { "epoch": 0.44, "grad_norm": 7.7062486490976685, "learning_rate": 9.930447523661686e-06, "loss": 1.2302, "step": 10480 }, { "epoch": 0.44, "grad_norm": 5.317431111849158, "learning_rate": 9.93024274201823e-06, "loss": 1.2074, "step": 10485 }, { "epoch": 0.44, "grad_norm": 6.294735669461464, "learning_rate": 9.930037661468679e-06, "loss": 1.2016, "step": 10490 }, { "epoch": 0.44, "grad_norm": 11.501356204117338, "learning_rate": 9.929832282025466e-06, "loss": 1.225, "step": 10495 }, { "epoch": 0.44, "grad_norm": 11.941532626605468, "learning_rate": 9.929626603701044e-06, "loss": 1.2284, "step": 10500 }, { "epoch": 0.44, "grad_norm": 17.715653674861617, "learning_rate": 9.929420626507882e-06, "loss": 1.1705, "step": 10505 }, { "epoch": 0.44, "grad_norm": 17.15148177217384, "learning_rate": 9.929214350458465e-06, "loss": 1.2225, "step": 10510 }, { "epoch": 0.45, "grad_norm": 23.90702080577724, "learning_rate": 9.929007775565305e-06, "loss": 1.1634, "step": 10515 }, { "epoch": 0.45, "grad_norm": 30.177090621342753, "learning_rate": 9.928800901840921e-06, "loss": 1.1539, "step": 10520 }, { "epoch": 0.45, "grad_norm": 17.44135285936784, "learning_rate": 9.928593729297855e-06, "loss": 1.1366, "step": 10525 }, { "epoch": 0.45, "grad_norm": 7.270926953351559, "learning_rate": 9.92838625794867e-06, "loss": 1.1712, "step": 10530 }, { "epoch": 0.45, "grad_norm": 6.348831852629494, "learning_rate": 9.928178487805945e-06, "loss": 1.2401, "step": 10535 }, { "epoch": 0.45, "grad_norm": 22.577816872892846, "learning_rate": 9.927970418882272e-06, "loss": 1.2159, "step": 10540 }, { "epoch": 0.45, "grad_norm": 8.183281260052668, "learning_rate": 9.927762051190268e-06, "loss": 1.2373, "step": 10545 }, { "epoch": 0.45, "grad_norm": 6.7125380455308825, "learning_rate": 9.927553384742567e-06, "loss": 1.2083, "step": 10550 }, { "epoch": 0.45, "grad_norm": 9.94323068571043, "learning_rate": 9.927344419551822e-06, "loss": 1.1979, "step": 10555 }, { "epoch": 0.45, "grad_norm": 10.586780828445535, "learning_rate": 9.927135155630694e-06, "loss": 1.1675, "step": 10560 }, { "epoch": 0.45, "grad_norm": 8.978746246588358, "learning_rate": 9.926925592991876e-06, "loss": 1.2019, "step": 10565 }, { "epoch": 0.45, "grad_norm": 11.632834658864674, "learning_rate": 9.926715731648073e-06, "loss": 1.2366, "step": 10570 }, { "epoch": 0.45, "grad_norm": 14.174405240556075, "learning_rate": 9.926505571612008e-06, "loss": 1.1868, "step": 10575 }, { "epoch": 0.45, "grad_norm": 6.3040202484705175, "learning_rate": 9.92629511289642e-06, "loss": 1.2072, "step": 10580 }, { "epoch": 0.45, "grad_norm": 7.341495925088784, "learning_rate": 9.926084355514073e-06, "loss": 1.202, "step": 10585 }, { "epoch": 0.45, "grad_norm": 28.351874817946406, "learning_rate": 9.92587329947774e-06, "loss": 1.1906, "step": 10590 }, { "epoch": 0.45, "grad_norm": 25.798690096809228, "learning_rate": 9.925661944800218e-06, "loss": 1.1903, "step": 10595 }, { "epoch": 0.45, "grad_norm": 6.5458717369179285, "learning_rate": 9.92545029149432e-06, "loss": 1.2071, "step": 10600 }, { "epoch": 0.45, "grad_norm": 7.424439651233213, "learning_rate": 9.925238339572883e-06, "loss": 1.1945, "step": 10605 }, { "epoch": 0.45, "grad_norm": 5.717217661607357, "learning_rate": 9.92502608904875e-06, "loss": 1.1704, "step": 10610 }, { "epoch": 0.45, "grad_norm": 11.751766054459633, "learning_rate": 9.924813539934794e-06, "loss": 1.1939, "step": 10615 }, { "epoch": 0.45, "grad_norm": 6.462968047747415, "learning_rate": 9.924600692243898e-06, "loss": 1.1689, "step": 10620 }, { "epoch": 0.45, "grad_norm": 5.607152682053208, "learning_rate": 9.92438754598897e-06, "loss": 1.2046, "step": 10625 }, { "epoch": 0.45, "grad_norm": 7.2681757986446645, "learning_rate": 9.924174101182928e-06, "loss": 1.1635, "step": 10630 }, { "epoch": 0.45, "grad_norm": 5.920607135261278, "learning_rate": 9.923960357838717e-06, "loss": 1.1499, "step": 10635 }, { "epoch": 0.45, "grad_norm": 8.156130633446434, "learning_rate": 9.923746315969292e-06, "loss": 1.194, "step": 10640 }, { "epoch": 0.45, "grad_norm": 16.3057165190141, "learning_rate": 9.923531975587633e-06, "loss": 1.1468, "step": 10645 }, { "epoch": 0.45, "grad_norm": 8.44207288541131, "learning_rate": 9.923317336706732e-06, "loss": 1.1572, "step": 10650 }, { "epoch": 0.45, "grad_norm": 5.491729437834684, "learning_rate": 9.923102399339602e-06, "loss": 1.1538, "step": 10655 }, { "epoch": 0.45, "grad_norm": 11.769953007994715, "learning_rate": 9.922887163499276e-06, "loss": 1.2177, "step": 10660 }, { "epoch": 0.45, "grad_norm": 19.14152021932703, "learning_rate": 9.922671629198802e-06, "loss": 1.2153, "step": 10665 }, { "epoch": 0.45, "grad_norm": 9.978992801364809, "learning_rate": 9.922455796451249e-06, "loss": 1.2067, "step": 10670 }, { "epoch": 0.45, "grad_norm": 12.661404526341018, "learning_rate": 9.922239665269697e-06, "loss": 1.1566, "step": 10675 }, { "epoch": 0.45, "grad_norm": 7.327352607288402, "learning_rate": 9.922023235667255e-06, "loss": 1.1975, "step": 10680 }, { "epoch": 0.45, "grad_norm": 11.745619312070527, "learning_rate": 9.921806507657044e-06, "loss": 1.2059, "step": 10685 }, { "epoch": 0.45, "grad_norm": 7.176168882922202, "learning_rate": 9.921589481252201e-06, "loss": 1.1482, "step": 10690 }, { "epoch": 0.45, "grad_norm": 9.78473586808955, "learning_rate": 9.921372156465884e-06, "loss": 1.1564, "step": 10695 }, { "epoch": 0.45, "grad_norm": 6.819308712859828, "learning_rate": 9.92115453331127e-06, "loss": 1.1899, "step": 10700 }, { "epoch": 0.45, "grad_norm": 6.4444198557490315, "learning_rate": 9.920936611801552e-06, "loss": 1.1829, "step": 10705 }, { "epoch": 0.45, "grad_norm": 6.0103124452863135, "learning_rate": 9.920718391949943e-06, "loss": 1.1114, "step": 10710 }, { "epoch": 0.45, "grad_norm": 7.218072493475162, "learning_rate": 9.920499873769671e-06, "loss": 1.1903, "step": 10715 }, { "epoch": 0.45, "grad_norm": 5.811183074814596, "learning_rate": 9.920281057273988e-06, "loss": 1.1242, "step": 10720 }, { "epoch": 0.45, "grad_norm": 6.6195642267455765, "learning_rate": 9.920061942476156e-06, "loss": 1.1823, "step": 10725 }, { "epoch": 0.45, "grad_norm": 6.882686024666247, "learning_rate": 9.91984252938946e-06, "loss": 1.1829, "step": 10730 }, { "epoch": 0.45, "grad_norm": 7.687098526835065, "learning_rate": 9.919622818027204e-06, "loss": 1.1504, "step": 10735 }, { "epoch": 0.45, "grad_norm": 9.901209944181845, "learning_rate": 9.91940280840271e-06, "loss": 1.1802, "step": 10740 }, { "epoch": 0.45, "grad_norm": 6.741967202730921, "learning_rate": 9.919182500529311e-06, "loss": 1.1603, "step": 10745 }, { "epoch": 0.45, "grad_norm": 8.49314460818403, "learning_rate": 9.91896189442037e-06, "loss": 1.1385, "step": 10750 }, { "epoch": 0.46, "grad_norm": 5.846613960624861, "learning_rate": 9.918740990089256e-06, "loss": 1.1914, "step": 10755 }, { "epoch": 0.46, "grad_norm": 14.628152978231483, "learning_rate": 9.918519787549366e-06, "loss": 1.1483, "step": 10760 }, { "epoch": 0.46, "grad_norm": 6.360298756997522, "learning_rate": 9.918298286814108e-06, "loss": 1.1382, "step": 10765 }, { "epoch": 0.46, "grad_norm": 17.051419942281832, "learning_rate": 9.918076487896912e-06, "loss": 1.1592, "step": 10770 }, { "epoch": 0.46, "grad_norm": 17.184741407663875, "learning_rate": 9.917854390811227e-06, "loss": 1.1841, "step": 10775 }, { "epoch": 0.46, "grad_norm": 20.95540204270835, "learning_rate": 9.917631995570516e-06, "loss": 1.1757, "step": 10780 }, { "epoch": 0.46, "grad_norm": 5.5651707387832205, "learning_rate": 9.917409302188261e-06, "loss": 1.1859, "step": 10785 }, { "epoch": 0.46, "grad_norm": 6.32757135428432, "learning_rate": 9.917186310677967e-06, "loss": 1.1413, "step": 10790 }, { "epoch": 0.46, "grad_norm": 13.345522140302892, "learning_rate": 9.916963021053148e-06, "loss": 1.2138, "step": 10795 }, { "epoch": 0.46, "grad_norm": 12.546032560435066, "learning_rate": 9.916739433327347e-06, "loss": 1.1614, "step": 10800 }, { "epoch": 0.46, "grad_norm": 15.037771854499969, "learning_rate": 9.916515547514116e-06, "loss": 1.1657, "step": 10805 }, { "epoch": 0.46, "grad_norm": 12.962530625932569, "learning_rate": 9.916291363627028e-06, "loss": 1.1548, "step": 10810 }, { "epoch": 0.46, "grad_norm": 8.733858126523737, "learning_rate": 9.916066881679678e-06, "loss": 1.2221, "step": 10815 }, { "epoch": 0.46, "grad_norm": 6.4197775228783485, "learning_rate": 9.915842101685672e-06, "loss": 1.2215, "step": 10820 }, { "epoch": 0.46, "grad_norm": 6.446367708194145, "learning_rate": 9.915617023658642e-06, "loss": 1.1865, "step": 10825 }, { "epoch": 0.46, "grad_norm": 5.909688736237768, "learning_rate": 9.915391647612228e-06, "loss": 1.1873, "step": 10830 }, { "epoch": 0.46, "grad_norm": 8.560801320774795, "learning_rate": 9.9151659735601e-06, "loss": 1.116, "step": 10835 }, { "epoch": 0.46, "grad_norm": 10.92035753231308, "learning_rate": 9.914940001515937e-06, "loss": 1.131, "step": 10840 }, { "epoch": 0.46, "grad_norm": 7.47640382614778, "learning_rate": 9.914713731493437e-06, "loss": 1.1332, "step": 10845 }, { "epoch": 0.46, "grad_norm": 13.442824328039446, "learning_rate": 9.914487163506323e-06, "loss": 1.1487, "step": 10850 }, { "epoch": 0.46, "grad_norm": 10.219963387438971, "learning_rate": 9.914260297568326e-06, "loss": 1.1919, "step": 10855 }, { "epoch": 0.46, "grad_norm": 17.830512400940798, "learning_rate": 9.914033133693204e-06, "loss": 1.1713, "step": 10860 }, { "epoch": 0.46, "grad_norm": 8.115427526870187, "learning_rate": 9.913805671894727e-06, "loss": 1.1565, "step": 10865 }, { "epoch": 0.46, "grad_norm": 6.363191331256874, "learning_rate": 9.913577912186688e-06, "loss": 1.1689, "step": 10870 }, { "epoch": 0.46, "grad_norm": 6.708693374997952, "learning_rate": 9.91334985458289e-06, "loss": 1.1921, "step": 10875 }, { "epoch": 0.46, "grad_norm": 11.043603686528792, "learning_rate": 9.913121499097165e-06, "loss": 1.1896, "step": 10880 }, { "epoch": 0.46, "grad_norm": 7.127183021152288, "learning_rate": 9.912892845743356e-06, "loss": 1.156, "step": 10885 }, { "epoch": 0.46, "grad_norm": 9.784181488898342, "learning_rate": 9.912663894535325e-06, "loss": 1.1585, "step": 10890 }, { "epoch": 0.46, "grad_norm": 5.946782241883635, "learning_rate": 9.912434645486953e-06, "loss": 1.1534, "step": 10895 }, { "epoch": 0.46, "grad_norm": 8.684903220320171, "learning_rate": 9.912205098612138e-06, "loss": 1.1408, "step": 10900 }, { "epoch": 0.46, "grad_norm": 6.512343867443584, "learning_rate": 9.911975253924797e-06, "loss": 1.1632, "step": 10905 }, { "epoch": 0.46, "grad_norm": 6.717541036271599, "learning_rate": 9.911745111438867e-06, "loss": 1.1869, "step": 10910 }, { "epoch": 0.46, "grad_norm": 5.509526494615323, "learning_rate": 9.911514671168296e-06, "loss": 1.148, "step": 10915 }, { "epoch": 0.46, "grad_norm": 7.182359008080275, "learning_rate": 9.911283933127061e-06, "loss": 1.1491, "step": 10920 }, { "epoch": 0.46, "grad_norm": 7.7066101472575665, "learning_rate": 9.911052897329145e-06, "loss": 1.1815, "step": 10925 }, { "epoch": 0.46, "grad_norm": 7.857949064757787, "learning_rate": 9.91082156378856e-06, "loss": 1.1518, "step": 10930 }, { "epoch": 0.46, "grad_norm": 5.724963982439529, "learning_rate": 9.910589932519327e-06, "loss": 1.1351, "step": 10935 }, { "epoch": 0.46, "grad_norm": 7.159897620900517, "learning_rate": 9.910358003535491e-06, "loss": 1.1423, "step": 10940 }, { "epoch": 0.46, "grad_norm": 9.45715717713818, "learning_rate": 9.910125776851114e-06, "loss": 1.131, "step": 10945 }, { "epoch": 0.46, "grad_norm": 6.8028978521572725, "learning_rate": 9.909893252480273e-06, "loss": 1.1664, "step": 10950 }, { "epoch": 0.46, "grad_norm": 14.87835802927879, "learning_rate": 9.90966043043707e-06, "loss": 1.204, "step": 10955 }, { "epoch": 0.46, "grad_norm": 8.652741016209589, "learning_rate": 9.909427310735615e-06, "loss": 1.159, "step": 10960 }, { "epoch": 0.46, "grad_norm": 7.679135125939183, "learning_rate": 9.909193893390043e-06, "loss": 1.1371, "step": 10965 }, { "epoch": 0.46, "grad_norm": 9.593510965900318, "learning_rate": 9.908960178414505e-06, "loss": 1.1884, "step": 10970 }, { "epoch": 0.46, "grad_norm": 8.342203549577611, "learning_rate": 9.908726165823172e-06, "loss": 1.1862, "step": 10975 }, { "epoch": 0.46, "grad_norm": 5.65191266709501, "learning_rate": 9.908491855630232e-06, "loss": 1.133, "step": 10980 }, { "epoch": 0.46, "grad_norm": 5.986985423965624, "learning_rate": 9.90825724784989e-06, "loss": 1.1273, "step": 10985 }, { "epoch": 0.47, "grad_norm": 6.8708933815736755, "learning_rate": 9.908022342496365e-06, "loss": 1.1642, "step": 10990 }, { "epoch": 0.47, "grad_norm": 6.22685427868934, "learning_rate": 9.907787139583904e-06, "loss": 1.1549, "step": 10995 }, { "epoch": 0.47, "grad_norm": 5.662857809878343, "learning_rate": 9.907551639126766e-06, "loss": 1.186, "step": 11000 }, { "epoch": 0.47, "grad_norm": 12.706587813035455, "learning_rate": 9.907315841139226e-06, "loss": 1.2093, "step": 11005 }, { "epoch": 0.47, "grad_norm": 23.324541045987278, "learning_rate": 9.907079745635583e-06, "loss": 1.1803, "step": 11010 }, { "epoch": 0.47, "grad_norm": 7.8620193130054945, "learning_rate": 9.90684335263015e-06, "loss": 1.1758, "step": 11015 }, { "epoch": 0.47, "grad_norm": 28.110493558181762, "learning_rate": 9.906606662137257e-06, "loss": 1.1587, "step": 11020 }, { "epoch": 0.47, "grad_norm": 20.75697424936364, "learning_rate": 9.906369674171257e-06, "loss": 1.1616, "step": 11025 }, { "epoch": 0.47, "grad_norm": 13.74740157103678, "learning_rate": 9.906132388746512e-06, "loss": 1.1667, "step": 11030 }, { "epoch": 0.47, "grad_norm": 16.221646397570733, "learning_rate": 9.905894805877413e-06, "loss": 1.1574, "step": 11035 }, { "epoch": 0.47, "grad_norm": 10.315429426999328, "learning_rate": 9.905656925578365e-06, "loss": 1.1335, "step": 11040 }, { "epoch": 0.47, "grad_norm": 5.289812467695272, "learning_rate": 9.905418747863786e-06, "loss": 1.1973, "step": 11045 }, { "epoch": 0.47, "grad_norm": 7.691144253246272, "learning_rate": 9.905180272748119e-06, "loss": 1.2163, "step": 11050 }, { "epoch": 0.47, "grad_norm": 11.125900649933367, "learning_rate": 9.90494150024582e-06, "loss": 1.1528, "step": 11055 }, { "epoch": 0.47, "grad_norm": 6.967571187709722, "learning_rate": 9.904702430371365e-06, "loss": 1.129, "step": 11060 }, { "epoch": 0.47, "grad_norm": 6.232701180420777, "learning_rate": 9.904463063139249e-06, "loss": 1.1288, "step": 11065 }, { "epoch": 0.47, "grad_norm": 7.9823375151125235, "learning_rate": 9.904223398563983e-06, "loss": 1.1561, "step": 11070 }, { "epoch": 0.47, "grad_norm": 7.328654473530816, "learning_rate": 9.903983436660101e-06, "loss": 1.1787, "step": 11075 }, { "epoch": 0.47, "grad_norm": 21.760572758974128, "learning_rate": 9.903743177442146e-06, "loss": 1.2078, "step": 11080 }, { "epoch": 0.47, "grad_norm": 10.31790366894579, "learning_rate": 9.903502620924688e-06, "loss": 1.2201, "step": 11085 }, { "epoch": 0.47, "grad_norm": 24.48203400676599, "learning_rate": 9.90326176712231e-06, "loss": 1.1926, "step": 11090 }, { "epoch": 0.47, "grad_norm": 17.618875344588776, "learning_rate": 9.903020616049613e-06, "loss": 1.2167, "step": 11095 }, { "epoch": 0.47, "grad_norm": 17.325643244351472, "learning_rate": 9.90277916772122e-06, "loss": 1.1709, "step": 11100 }, { "epoch": 0.47, "grad_norm": 6.934175497898546, "learning_rate": 9.902537422151766e-06, "loss": 1.2131, "step": 11105 }, { "epoch": 0.47, "grad_norm": 9.16973069180265, "learning_rate": 9.90229537935591e-06, "loss": 1.1607, "step": 11110 }, { "epoch": 0.47, "grad_norm": 12.288594806946907, "learning_rate": 9.902053039348325e-06, "loss": 1.1726, "step": 11115 }, { "epoch": 0.47, "grad_norm": 12.854571230860627, "learning_rate": 9.901810402143703e-06, "loss": 1.1653, "step": 11120 }, { "epoch": 0.47, "grad_norm": 6.151482995214919, "learning_rate": 9.901567467756757e-06, "loss": 1.1681, "step": 11125 }, { "epoch": 0.47, "grad_norm": 5.632971060096517, "learning_rate": 9.90132423620221e-06, "loss": 1.1442, "step": 11130 }, { "epoch": 0.47, "grad_norm": 7.215110069184859, "learning_rate": 9.901080707494814e-06, "loss": 1.0976, "step": 11135 }, { "epoch": 0.47, "grad_norm": 10.286433333904665, "learning_rate": 9.900836881649332e-06, "loss": 1.1856, "step": 11140 }, { "epoch": 0.47, "grad_norm": 12.377274914922964, "learning_rate": 9.900592758680544e-06, "loss": 1.2035, "step": 11145 }, { "epoch": 0.47, "grad_norm": 10.379698830333417, "learning_rate": 9.900348338603256e-06, "loss": 1.1318, "step": 11150 }, { "epoch": 0.47, "grad_norm": 9.058571170997721, "learning_rate": 9.900103621432278e-06, "loss": 1.1988, "step": 11155 }, { "epoch": 0.47, "grad_norm": 13.483186687397655, "learning_rate": 9.899858607182454e-06, "loss": 1.1307, "step": 11160 }, { "epoch": 0.47, "grad_norm": 15.372143287190866, "learning_rate": 9.899613295868635e-06, "loss": 1.1436, "step": 11165 }, { "epoch": 0.47, "grad_norm": 15.441760553757844, "learning_rate": 9.899367687505695e-06, "loss": 1.1663, "step": 11170 }, { "epoch": 0.47, "grad_norm": 6.067452984345042, "learning_rate": 9.899121782108521e-06, "loss": 1.1389, "step": 11175 }, { "epoch": 0.47, "grad_norm": 7.493878741153034, "learning_rate": 9.898875579692027e-06, "loss": 1.1334, "step": 11180 }, { "epoch": 0.47, "grad_norm": 6.877351187180319, "learning_rate": 9.898629080271135e-06, "loss": 1.1579, "step": 11185 }, { "epoch": 0.47, "grad_norm": 9.306766372634094, "learning_rate": 9.898382283860792e-06, "loss": 1.1591, "step": 11190 }, { "epoch": 0.47, "grad_norm": 13.997116383859714, "learning_rate": 9.89813519047596e-06, "loss": 1.1434, "step": 11195 }, { "epoch": 0.47, "grad_norm": 7.456764815120626, "learning_rate": 9.897887800131618e-06, "loss": 1.2191, "step": 11200 }, { "epoch": 0.47, "grad_norm": 6.380024381576704, "learning_rate": 9.897640112842769e-06, "loss": 1.1698, "step": 11205 }, { "epoch": 0.47, "grad_norm": 6.062180917583801, "learning_rate": 9.897392128624422e-06, "loss": 1.1533, "step": 11210 }, { "epoch": 0.47, "grad_norm": 9.372322311934546, "learning_rate": 9.897143847491619e-06, "loss": 1.1595, "step": 11215 }, { "epoch": 0.47, "grad_norm": 8.137540645494184, "learning_rate": 9.896895269459408e-06, "loss": 1.1745, "step": 11220 }, { "epoch": 0.48, "grad_norm": 14.488009026628987, "learning_rate": 9.896646394542861e-06, "loss": 1.1979, "step": 11225 }, { "epoch": 0.48, "grad_norm": 21.479331352304104, "learning_rate": 9.896397222757069e-06, "loss": 1.1748, "step": 11230 }, { "epoch": 0.48, "grad_norm": 32.17705539395102, "learning_rate": 9.896147754117133e-06, "loss": 1.1763, "step": 11235 }, { "epoch": 0.48, "grad_norm": 29.641023561579832, "learning_rate": 9.895897988638184e-06, "loss": 1.1612, "step": 11240 }, { "epoch": 0.48, "grad_norm": 12.618612499955207, "learning_rate": 9.895647926335359e-06, "loss": 1.1143, "step": 11245 }, { "epoch": 0.48, "grad_norm": 24.552763013483638, "learning_rate": 9.895397567223821e-06, "loss": 1.1828, "step": 11250 }, { "epoch": 0.48, "grad_norm": 12.827877276813723, "learning_rate": 9.895146911318749e-06, "loss": 1.1868, "step": 11255 }, { "epoch": 0.48, "grad_norm": 8.126715833569445, "learning_rate": 9.89489595863534e-06, "loss": 1.1495, "step": 11260 }, { "epoch": 0.48, "grad_norm": 20.218207112675735, "learning_rate": 9.894644709188805e-06, "loss": 1.1768, "step": 11265 }, { "epoch": 0.48, "grad_norm": 10.921473798859457, "learning_rate": 9.894393162994381e-06, "loss": 1.199, "step": 11270 }, { "epoch": 0.48, "grad_norm": 5.33166151881928, "learning_rate": 9.894141320067316e-06, "loss": 1.1707, "step": 11275 }, { "epoch": 0.48, "grad_norm": 7.565354823955448, "learning_rate": 9.893889180422878e-06, "loss": 1.164, "step": 11280 }, { "epoch": 0.48, "grad_norm": 7.776125898509952, "learning_rate": 9.893636744076357e-06, "loss": 1.198, "step": 11285 }, { "epoch": 0.48, "grad_norm": 8.152371316351982, "learning_rate": 9.89338401104305e-06, "loss": 1.1801, "step": 11290 }, { "epoch": 0.48, "grad_norm": 10.126109015757814, "learning_rate": 9.893130981338289e-06, "loss": 1.1661, "step": 11295 }, { "epoch": 0.48, "grad_norm": 14.083493515860624, "learning_rate": 9.892877654977407e-06, "loss": 1.1731, "step": 11300 }, { "epoch": 0.48, "grad_norm": 10.998715081522729, "learning_rate": 9.892624031975766e-06, "loss": 1.1692, "step": 11305 }, { "epoch": 0.48, "grad_norm": 5.411370045012572, "learning_rate": 9.89237011234874e-06, "loss": 1.1651, "step": 11310 }, { "epoch": 0.48, "grad_norm": 5.578768938585467, "learning_rate": 9.89211589611173e-06, "loss": 1.1566, "step": 11315 }, { "epoch": 0.48, "grad_norm": 6.316568316732369, "learning_rate": 9.89186138328014e-06, "loss": 1.105, "step": 11320 }, { "epoch": 0.48, "grad_norm": 5.548768655491145, "learning_rate": 9.891606573869403e-06, "loss": 1.1269, "step": 11325 }, { "epoch": 0.48, "grad_norm": 6.0706108911607455, "learning_rate": 9.89135146789497e-06, "loss": 1.1464, "step": 11330 }, { "epoch": 0.48, "grad_norm": 5.419681955203033, "learning_rate": 9.891096065372304e-06, "loss": 1.1455, "step": 11335 }, { "epoch": 0.48, "grad_norm": 5.676930979505991, "learning_rate": 9.890840366316892e-06, "loss": 1.2126, "step": 11340 }, { "epoch": 0.48, "grad_norm": 9.52047652324343, "learning_rate": 9.890584370744233e-06, "loss": 1.1998, "step": 11345 }, { "epoch": 0.48, "grad_norm": 12.455724561809516, "learning_rate": 9.890328078669851e-06, "loss": 1.1378, "step": 11350 }, { "epoch": 0.48, "grad_norm": 8.426712602811074, "learning_rate": 9.890071490109283e-06, "loss": 1.1505, "step": 11355 }, { "epoch": 0.48, "grad_norm": 6.674302587106533, "learning_rate": 9.889814605078084e-06, "loss": 1.1526, "step": 11360 }, { "epoch": 0.48, "grad_norm": 5.134097221101347, "learning_rate": 9.889557423591829e-06, "loss": 1.1458, "step": 11365 }, { "epoch": 0.48, "grad_norm": 5.898009761601114, "learning_rate": 9.889299945666108e-06, "loss": 1.2127, "step": 11370 }, { "epoch": 0.48, "grad_norm": 9.632360731331874, "learning_rate": 9.889042171316537e-06, "loss": 1.1626, "step": 11375 }, { "epoch": 0.48, "grad_norm": 5.307125513946716, "learning_rate": 9.888784100558739e-06, "loss": 1.1575, "step": 11380 }, { "epoch": 0.48, "grad_norm": 6.228412416269242, "learning_rate": 9.88852573340836e-06, "loss": 1.1368, "step": 11385 }, { "epoch": 0.48, "grad_norm": 5.802860959891278, "learning_rate": 9.888267069881066e-06, "loss": 1.1398, "step": 11390 }, { "epoch": 0.48, "grad_norm": 12.361479226155447, "learning_rate": 9.88800810999254e-06, "loss": 1.1186, "step": 11395 }, { "epoch": 0.48, "grad_norm": 7.43506792761793, "learning_rate": 9.887748853758477e-06, "loss": 1.1355, "step": 11400 }, { "epoch": 0.48, "grad_norm": 5.477860747686588, "learning_rate": 9.887489301194602e-06, "loss": 1.1583, "step": 11405 }, { "epoch": 0.48, "grad_norm": 11.60647595615046, "learning_rate": 9.887229452316644e-06, "loss": 1.1852, "step": 11410 }, { "epoch": 0.48, "grad_norm": 6.555330057575714, "learning_rate": 9.886969307140363e-06, "loss": 1.1814, "step": 11415 }, { "epoch": 0.48, "grad_norm": 6.367462869352004, "learning_rate": 9.886708865681526e-06, "loss": 1.1296, "step": 11420 }, { "epoch": 0.48, "grad_norm": 11.8952387272428, "learning_rate": 9.886448127955925e-06, "loss": 1.1599, "step": 11425 }, { "epoch": 0.48, "grad_norm": 8.796066164176459, "learning_rate": 9.886187093979369e-06, "loss": 1.1442, "step": 11430 }, { "epoch": 0.48, "grad_norm": 6.384124771531093, "learning_rate": 9.88592576376768e-06, "loss": 1.1124, "step": 11435 }, { "epoch": 0.48, "grad_norm": 5.365682983872836, "learning_rate": 9.885664137336706e-06, "loss": 1.1528, "step": 11440 }, { "epoch": 0.48, "grad_norm": 6.466847423576245, "learning_rate": 9.885402214702306e-06, "loss": 1.1681, "step": 11445 }, { "epoch": 0.48, "grad_norm": 5.85172158396155, "learning_rate": 9.88513999588036e-06, "loss": 1.1054, "step": 11450 }, { "epoch": 0.48, "grad_norm": 5.954694471587122, "learning_rate": 9.884877480886765e-06, "loss": 1.1429, "step": 11455 }, { "epoch": 0.49, "grad_norm": 7.109127539501378, "learning_rate": 9.884614669737439e-06, "loss": 1.2066, "step": 11460 }, { "epoch": 0.49, "grad_norm": 8.70820384291903, "learning_rate": 9.88435156244831e-06, "loss": 1.1445, "step": 11465 }, { "epoch": 0.49, "grad_norm": 7.487129277691837, "learning_rate": 9.884088159035336e-06, "loss": 1.1642, "step": 11470 }, { "epoch": 0.49, "grad_norm": 6.258581288924794, "learning_rate": 9.883824459514484e-06, "loss": 1.1408, "step": 11475 }, { "epoch": 0.49, "grad_norm": 7.318640774823393, "learning_rate": 9.883560463901738e-06, "loss": 1.1729, "step": 11480 }, { "epoch": 0.49, "grad_norm": 6.708089259801228, "learning_rate": 9.883296172213108e-06, "loss": 1.1813, "step": 11485 }, { "epoch": 0.49, "grad_norm": 11.455045503263648, "learning_rate": 9.883031584464616e-06, "loss": 1.174, "step": 11490 }, { "epoch": 0.49, "grad_norm": 9.396411284344936, "learning_rate": 9.882766700672301e-06, "loss": 1.1069, "step": 11495 }, { "epoch": 0.49, "grad_norm": 10.535704924614947, "learning_rate": 9.882501520852223e-06, "loss": 1.2267, "step": 11500 }, { "epoch": 0.49, "grad_norm": 9.242712743571305, "learning_rate": 9.882236045020462e-06, "loss": 1.1192, "step": 11505 }, { "epoch": 0.49, "grad_norm": 18.20033342471653, "learning_rate": 9.881970273193109e-06, "loss": 1.1573, "step": 11510 }, { "epoch": 0.49, "grad_norm": 8.817606417347065, "learning_rate": 9.88170420538628e-06, "loss": 1.1386, "step": 11515 }, { "epoch": 0.49, "grad_norm": 6.848578061698391, "learning_rate": 9.881437841616103e-06, "loss": 1.1566, "step": 11520 }, { "epoch": 0.49, "grad_norm": 9.89601129169758, "learning_rate": 9.88117118189873e-06, "loss": 1.1164, "step": 11525 }, { "epoch": 0.49, "grad_norm": 13.32008606488712, "learning_rate": 9.880904226250325e-06, "loss": 1.1142, "step": 11530 }, { "epoch": 0.49, "grad_norm": 9.772024052164713, "learning_rate": 9.880636974687074e-06, "loss": 1.1134, "step": 11535 }, { "epoch": 0.49, "grad_norm": 7.853995128338482, "learning_rate": 9.88036942722518e-06, "loss": 1.133, "step": 11540 }, { "epoch": 0.49, "grad_norm": 8.69844691158747, "learning_rate": 9.880101583880863e-06, "loss": 1.1452, "step": 11545 }, { "epoch": 0.49, "grad_norm": 7.587696727294008, "learning_rate": 9.879833444670361e-06, "loss": 1.192, "step": 11550 }, { "epoch": 0.49, "grad_norm": 7.417324970434418, "learning_rate": 9.879565009609933e-06, "loss": 1.1693, "step": 11555 }, { "epoch": 0.49, "grad_norm": 19.74729653004002, "learning_rate": 9.87929627871585e-06, "loss": 1.1348, "step": 11560 }, { "epoch": 0.49, "grad_norm": 17.75267370519572, "learning_rate": 9.879027252004408e-06, "loss": 1.1499, "step": 11565 }, { "epoch": 0.49, "grad_norm": 7.246657708584769, "learning_rate": 9.878757929491915e-06, "loss": 1.1304, "step": 11570 }, { "epoch": 0.49, "grad_norm": 7.807281544727722, "learning_rate": 9.878488311194698e-06, "loss": 1.1753, "step": 11575 }, { "epoch": 0.49, "grad_norm": 8.60747522287729, "learning_rate": 9.878218397129106e-06, "loss": 1.1186, "step": 11580 }, { "epoch": 0.49, "grad_norm": 8.474045394785346, "learning_rate": 9.877948187311501e-06, "loss": 1.162, "step": 11585 }, { "epoch": 0.49, "grad_norm": 7.7293988831557625, "learning_rate": 9.877677681758267e-06, "loss": 1.1007, "step": 11590 }, { "epoch": 0.49, "grad_norm": 6.72239460734512, "learning_rate": 9.877406880485803e-06, "loss": 1.1875, "step": 11595 }, { "epoch": 0.49, "grad_norm": 10.430059083077545, "learning_rate": 9.877135783510526e-06, "loss": 1.1778, "step": 11600 }, { "epoch": 0.49, "grad_norm": 15.11322880660984, "learning_rate": 9.876864390848871e-06, "loss": 1.1018, "step": 11605 }, { "epoch": 0.49, "grad_norm": 8.953724958813943, "learning_rate": 9.876592702517294e-06, "loss": 1.1063, "step": 11610 }, { "epoch": 0.49, "grad_norm": 7.047768168085878, "learning_rate": 9.876320718532268e-06, "loss": 1.1484, "step": 11615 }, { "epoch": 0.49, "grad_norm": 8.633786295620178, "learning_rate": 9.876048438910279e-06, "loss": 1.128, "step": 11620 }, { "epoch": 0.49, "grad_norm": 7.325124078791975, "learning_rate": 9.875775863667835e-06, "loss": 1.1114, "step": 11625 }, { "epoch": 0.49, "grad_norm": 9.767935984740749, "learning_rate": 9.875502992821464e-06, "loss": 1.145, "step": 11630 }, { "epoch": 0.49, "grad_norm": 7.112306088496393, "learning_rate": 9.875229826387706e-06, "loss": 1.1438, "step": 11635 }, { "epoch": 0.49, "grad_norm": 8.120903752262135, "learning_rate": 9.874956364383127e-06, "loss": 1.1587, "step": 11640 }, { "epoch": 0.49, "grad_norm": 4.986091808075715, "learning_rate": 9.874682606824301e-06, "loss": 1.131, "step": 11645 }, { "epoch": 0.49, "grad_norm": 22.406463114043778, "learning_rate": 9.874408553727828e-06, "loss": 1.1337, "step": 11650 }, { "epoch": 0.49, "grad_norm": 18.712704924972094, "learning_rate": 9.874134205110325e-06, "loss": 1.1075, "step": 11655 }, { "epoch": 0.49, "grad_norm": 25.079555102890428, "learning_rate": 9.87385956098842e-06, "loss": 1.166, "step": 11660 }, { "epoch": 0.49, "grad_norm": 5.954483214737107, "learning_rate": 9.873584621378765e-06, "loss": 1.1655, "step": 11665 }, { "epoch": 0.49, "grad_norm": 7.097962197693542, "learning_rate": 9.873309386298031e-06, "loss": 1.1765, "step": 11670 }, { "epoch": 0.49, "grad_norm": 5.677980010760047, "learning_rate": 9.873033855762903e-06, "loss": 1.1326, "step": 11675 }, { "epoch": 0.49, "grad_norm": 19.441939212176987, "learning_rate": 9.872758029790088e-06, "loss": 1.1481, "step": 11680 }, { "epoch": 0.49, "grad_norm": 20.724179910793424, "learning_rate": 9.872481908396305e-06, "loss": 1.1623, "step": 11685 }, { "epoch": 0.49, "grad_norm": 27.182337162688494, "learning_rate": 9.872205491598299e-06, "loss": 1.1807, "step": 11690 }, { "epoch": 0.49, "grad_norm": 18.04984628427394, "learning_rate": 9.871928779412822e-06, "loss": 1.1557, "step": 11695 }, { "epoch": 0.5, "grad_norm": 12.077281823470901, "learning_rate": 9.871651771856656e-06, "loss": 1.1259, "step": 11700 }, { "epoch": 0.5, "grad_norm": 9.989097544793694, "learning_rate": 9.871374468946593e-06, "loss": 1.1845, "step": 11705 }, { "epoch": 0.5, "grad_norm": 9.182163676999648, "learning_rate": 9.871096870699446e-06, "loss": 1.2276, "step": 11710 }, { "epoch": 0.5, "grad_norm": 15.305742794562596, "learning_rate": 9.870818977132042e-06, "loss": 1.1558, "step": 11715 }, { "epoch": 0.5, "grad_norm": 6.286994580972955, "learning_rate": 9.870540788261232e-06, "loss": 1.1256, "step": 11720 }, { "epoch": 0.5, "grad_norm": 6.976057077609181, "learning_rate": 9.87026230410388e-06, "loss": 1.151, "step": 11725 }, { "epoch": 0.5, "grad_norm": 8.290155160380621, "learning_rate": 9.869983524676872e-06, "loss": 1.112, "step": 11730 }, { "epoch": 0.5, "grad_norm": 15.640665605558757, "learning_rate": 9.869704449997106e-06, "loss": 1.1689, "step": 11735 }, { "epoch": 0.5, "grad_norm": 19.726793316037796, "learning_rate": 9.869425080081504e-06, "loss": 1.1257, "step": 11740 }, { "epoch": 0.5, "grad_norm": 14.918565976829093, "learning_rate": 9.869145414947002e-06, "loss": 1.1265, "step": 11745 }, { "epoch": 0.5, "grad_norm": 24.131782175561387, "learning_rate": 9.868865454610557e-06, "loss": 1.1579, "step": 11750 }, { "epoch": 0.5, "grad_norm": 7.8976802694399035, "learning_rate": 9.868585199089141e-06, "loss": 1.1422, "step": 11755 }, { "epoch": 0.5, "grad_norm": 7.921775189693692, "learning_rate": 9.868304648399744e-06, "loss": 1.172, "step": 11760 }, { "epoch": 0.5, "grad_norm": 9.36851777741143, "learning_rate": 9.86802380255938e-06, "loss": 1.1871, "step": 11765 }, { "epoch": 0.5, "grad_norm": 16.40339172531344, "learning_rate": 9.867742661585069e-06, "loss": 1.1623, "step": 11770 }, { "epoch": 0.5, "grad_norm": 10.176582100990586, "learning_rate": 9.86746122549386e-06, "loss": 1.1564, "step": 11775 }, { "epoch": 0.5, "grad_norm": 8.080872944075164, "learning_rate": 9.867179494302812e-06, "loss": 1.1331, "step": 11780 }, { "epoch": 0.5, "grad_norm": 8.506834876776587, "learning_rate": 9.866897468029011e-06, "loss": 1.1192, "step": 11785 }, { "epoch": 0.5, "grad_norm": 10.992554052031743, "learning_rate": 9.866615146689551e-06, "loss": 1.1307, "step": 11790 }, { "epoch": 0.5, "grad_norm": 6.137574257507623, "learning_rate": 9.86633253030155e-06, "loss": 1.1357, "step": 11795 }, { "epoch": 0.5, "grad_norm": 6.130393457328972, "learning_rate": 9.866049618882142e-06, "loss": 1.1271, "step": 11800 }, { "epoch": 0.5, "grad_norm": 8.689287447938648, "learning_rate": 9.865766412448479e-06, "loss": 1.1208, "step": 11805 }, { "epoch": 0.5, "grad_norm": 5.993566818309528, "learning_rate": 9.865482911017731e-06, "loss": 1.1143, "step": 11810 }, { "epoch": 0.5, "grad_norm": 9.610987656025143, "learning_rate": 9.865199114607085e-06, "loss": 1.1449, "step": 11815 }, { "epoch": 0.5, "grad_norm": 9.86202202993207, "learning_rate": 9.86491502323375e-06, "loss": 1.1436, "step": 11820 }, { "epoch": 0.5, "grad_norm": 5.824515678162103, "learning_rate": 9.864630636914945e-06, "loss": 1.1821, "step": 11825 }, { "epoch": 0.5, "grad_norm": 6.742485790665893, "learning_rate": 9.864345955667914e-06, "loss": 1.135, "step": 11830 }, { "epoch": 0.5, "grad_norm": 7.738242649379986, "learning_rate": 9.864060979509917e-06, "loss": 1.1031, "step": 11835 }, { "epoch": 0.5, "grad_norm": 22.016315223740655, "learning_rate": 9.86377570845823e-06, "loss": 1.1167, "step": 11840 }, { "epoch": 0.5, "grad_norm": 20.104340436977523, "learning_rate": 9.863490142530148e-06, "loss": 1.1509, "step": 11845 }, { "epoch": 0.5, "grad_norm": 11.793524747206087, "learning_rate": 9.863204281742986e-06, "loss": 1.1273, "step": 11850 }, { "epoch": 0.5, "grad_norm": 6.055343078022386, "learning_rate": 9.862918126114072e-06, "loss": 1.1877, "step": 11855 }, { "epoch": 0.5, "grad_norm": 8.114627805232248, "learning_rate": 9.862631675660756e-06, "loss": 1.1714, "step": 11860 }, { "epoch": 0.5, "grad_norm": 6.36176240385175, "learning_rate": 9.862344930400404e-06, "loss": 1.1365, "step": 11865 }, { "epoch": 0.5, "grad_norm": 9.119053552631945, "learning_rate": 9.862057890350404e-06, "loss": 1.1911, "step": 11870 }, { "epoch": 0.5, "grad_norm": 7.5555333501647866, "learning_rate": 9.861770555528155e-06, "loss": 1.112, "step": 11875 }, { "epoch": 0.5, "grad_norm": 5.8162057066325525, "learning_rate": 9.861482925951078e-06, "loss": 1.1905, "step": 11880 }, { "epoch": 0.5, "grad_norm": 7.992003210854348, "learning_rate": 9.86119500163661e-06, "loss": 1.1283, "step": 11885 }, { "epoch": 0.5, "grad_norm": 5.889743370342024, "learning_rate": 9.86090678260221e-06, "loss": 1.1337, "step": 11890 }, { "epoch": 0.5, "grad_norm": 6.803355038794472, "learning_rate": 9.860618268865348e-06, "loss": 1.1648, "step": 11895 }, { "epoch": 0.5, "grad_norm": 5.987428176412822, "learning_rate": 9.860329460443519e-06, "loss": 1.1388, "step": 11900 }, { "epoch": 0.5, "grad_norm": 10.44307176779172, "learning_rate": 9.860040357354232e-06, "loss": 1.1044, "step": 11905 }, { "epoch": 0.5, "grad_norm": 6.738452427336835, "learning_rate": 9.859750959615012e-06, "loss": 1.1741, "step": 11910 }, { "epoch": 0.5, "grad_norm": 8.93914151574783, "learning_rate": 9.859461267243407e-06, "loss": 1.2008, "step": 11915 }, { "epoch": 0.5, "grad_norm": 14.973737260555566, "learning_rate": 9.859171280256981e-06, "loss": 1.1892, "step": 11920 }, { "epoch": 0.5, "grad_norm": 15.26786668503323, "learning_rate": 9.85888099867331e-06, "loss": 1.1432, "step": 11925 }, { "epoch": 0.5, "grad_norm": 8.213308210695427, "learning_rate": 9.858590422509999e-06, "loss": 1.1755, "step": 11930 }, { "epoch": 0.51, "grad_norm": 5.4987460503540895, "learning_rate": 9.858299551784662e-06, "loss": 1.1261, "step": 11935 }, { "epoch": 0.51, "grad_norm": 6.231781553337773, "learning_rate": 9.858008386514933e-06, "loss": 1.1421, "step": 11940 }, { "epoch": 0.51, "grad_norm": 11.494777253475009, "learning_rate": 9.857716926718465e-06, "loss": 1.0886, "step": 11945 }, { "epoch": 0.51, "grad_norm": 6.847364984317209, "learning_rate": 9.85742517241293e-06, "loss": 1.154, "step": 11950 }, { "epoch": 0.51, "grad_norm": 5.527729858172997, "learning_rate": 9.857133123616013e-06, "loss": 1.1518, "step": 11955 }, { "epoch": 0.51, "grad_norm": 8.682313498314514, "learning_rate": 9.856840780345423e-06, "loss": 1.197, "step": 11960 }, { "epoch": 0.51, "grad_norm": 7.54415687857973, "learning_rate": 9.856548142618882e-06, "loss": 1.1779, "step": 11965 }, { "epoch": 0.51, "grad_norm": 14.561235190352997, "learning_rate": 9.856255210454134e-06, "loss": 1.1911, "step": 11970 }, { "epoch": 0.51, "grad_norm": 6.241505704266924, "learning_rate": 9.855961983868936e-06, "loss": 1.1173, "step": 11975 }, { "epoch": 0.51, "grad_norm": 7.585499314172027, "learning_rate": 9.855668462881068e-06, "loss": 1.1293, "step": 11980 }, { "epoch": 0.51, "grad_norm": 5.90281683007015, "learning_rate": 9.855374647508322e-06, "loss": 1.1305, "step": 11985 }, { "epoch": 0.51, "grad_norm": 9.18070927433015, "learning_rate": 9.855080537768515e-06, "loss": 1.1195, "step": 11990 }, { "epoch": 0.51, "grad_norm": 6.334953659829334, "learning_rate": 9.854786133679474e-06, "loss": 1.1617, "step": 11995 }, { "epoch": 0.51, "grad_norm": 9.425613823444337, "learning_rate": 9.854491435259051e-06, "loss": 1.1605, "step": 12000 }, { "epoch": 0.51, "grad_norm": 7.043819698721772, "learning_rate": 9.854196442525113e-06, "loss": 1.1403, "step": 12005 }, { "epoch": 0.51, "grad_norm": 6.840243726585245, "learning_rate": 9.85390115549554e-06, "loss": 1.1572, "step": 12010 }, { "epoch": 0.51, "grad_norm": 9.902078776899268, "learning_rate": 9.853605574188241e-06, "loss": 1.1378, "step": 12015 }, { "epoch": 0.51, "grad_norm": 5.4409833323953505, "learning_rate": 9.85330969862113e-06, "loss": 1.1054, "step": 12020 }, { "epoch": 0.51, "grad_norm": 5.569106348711084, "learning_rate": 9.85301352881215e-06, "loss": 1.1288, "step": 12025 }, { "epoch": 0.51, "grad_norm": 11.641174835617154, "learning_rate": 9.852717064779253e-06, "loss": 1.1175, "step": 12030 }, { "epoch": 0.51, "grad_norm": 11.205088887888452, "learning_rate": 9.852420306540416e-06, "loss": 1.1288, "step": 12035 }, { "epoch": 0.51, "grad_norm": 5.499418937192942, "learning_rate": 9.852123254113628e-06, "loss": 1.1452, "step": 12040 }, { "epoch": 0.51, "grad_norm": 7.598682355484569, "learning_rate": 9.851825907516901e-06, "loss": 1.1074, "step": 12045 }, { "epoch": 0.51, "grad_norm": 7.358570349103616, "learning_rate": 9.85152826676826e-06, "loss": 1.1532, "step": 12050 }, { "epoch": 0.51, "grad_norm": 7.60412566758258, "learning_rate": 9.85123033188575e-06, "loss": 1.1361, "step": 12055 }, { "epoch": 0.51, "grad_norm": 7.030084181433548, "learning_rate": 9.850932102887435e-06, "loss": 1.1439, "step": 12060 }, { "epoch": 0.51, "grad_norm": 6.751578427274406, "learning_rate": 9.850633579791395e-06, "loss": 1.1975, "step": 12065 }, { "epoch": 0.51, "grad_norm": 10.70896980875338, "learning_rate": 9.85033476261573e-06, "loss": 1.134, "step": 12070 }, { "epoch": 0.51, "grad_norm": 14.877170969086212, "learning_rate": 9.850035651378556e-06, "loss": 1.1283, "step": 12075 }, { "epoch": 0.51, "grad_norm": 7.75355538586889, "learning_rate": 9.849736246098006e-06, "loss": 1.1375, "step": 12080 }, { "epoch": 0.51, "grad_norm": 10.401902740850932, "learning_rate": 9.849436546792232e-06, "loss": 1.112, "step": 12085 }, { "epoch": 0.51, "grad_norm": 8.490671748975146, "learning_rate": 9.849136553479406e-06, "loss": 1.2094, "step": 12090 }, { "epoch": 0.51, "grad_norm": 13.213197699095515, "learning_rate": 9.848836266177715e-06, "loss": 1.159, "step": 12095 }, { "epoch": 0.51, "grad_norm": 13.130490153881233, "learning_rate": 9.848535684905362e-06, "loss": 1.1072, "step": 12100 }, { "epoch": 0.51, "grad_norm": 6.175600104181329, "learning_rate": 9.848234809680573e-06, "loss": 1.1511, "step": 12105 }, { "epoch": 0.51, "grad_norm": 7.363976292832451, "learning_rate": 9.847933640521588e-06, "loss": 1.1702, "step": 12110 }, { "epoch": 0.51, "grad_norm": 6.123969685033667, "learning_rate": 9.847632177446666e-06, "loss": 1.1188, "step": 12115 }, { "epoch": 0.51, "grad_norm": 7.306763945286356, "learning_rate": 9.847330420474086e-06, "loss": 1.1382, "step": 12120 }, { "epoch": 0.51, "grad_norm": 10.023152029184388, "learning_rate": 9.84702836962214e-06, "loss": 1.1379, "step": 12125 }, { "epoch": 0.51, "grad_norm": 5.908879001583405, "learning_rate": 9.846726024909141e-06, "loss": 1.0826, "step": 12130 }, { "epoch": 0.51, "grad_norm": 10.461184144598489, "learning_rate": 9.84642338635342e-06, "loss": 1.14, "step": 12135 }, { "epoch": 0.51, "grad_norm": 6.656567574368935, "learning_rate": 9.846120453973324e-06, "loss": 1.16, "step": 12140 }, { "epoch": 0.51, "grad_norm": 6.545006166182492, "learning_rate": 9.84581722778722e-06, "loss": 1.1671, "step": 12145 }, { "epoch": 0.51, "grad_norm": 6.954054299452032, "learning_rate": 9.845513707813492e-06, "loss": 1.1496, "step": 12150 }, { "epoch": 0.51, "grad_norm": 8.675309374480587, "learning_rate": 9.845209894070542e-06, "loss": 1.1485, "step": 12155 }, { "epoch": 0.51, "grad_norm": 11.551780199834928, "learning_rate": 9.844905786576785e-06, "loss": 1.1599, "step": 12160 }, { "epoch": 0.51, "grad_norm": 10.382406762670646, "learning_rate": 9.844601385350663e-06, "loss": 1.141, "step": 12165 }, { "epoch": 0.52, "grad_norm": 11.548914499956982, "learning_rate": 9.84429669041063e-06, "loss": 1.1339, "step": 12170 }, { "epoch": 0.52, "grad_norm": 12.219673744661028, "learning_rate": 9.843991701775157e-06, "loss": 1.1126, "step": 12175 }, { "epoch": 0.52, "grad_norm": 18.084839847096443, "learning_rate": 9.843686419462735e-06, "loss": 1.1076, "step": 12180 }, { "epoch": 0.52, "grad_norm": 5.595470266242617, "learning_rate": 9.843380843491873e-06, "loss": 1.2094, "step": 12185 }, { "epoch": 0.52, "grad_norm": 6.273411631819693, "learning_rate": 9.843074973881098e-06, "loss": 1.0957, "step": 12190 }, { "epoch": 0.52, "grad_norm": 11.8993901549616, "learning_rate": 9.842768810648953e-06, "loss": 1.1489, "step": 12195 }, { "epoch": 0.52, "grad_norm": 7.101486783709928, "learning_rate": 9.842462353813999e-06, "loss": 1.1784, "step": 12200 }, { "epoch": 0.52, "grad_norm": 11.08392377893216, "learning_rate": 9.842155603394817e-06, "loss": 1.1808, "step": 12205 }, { "epoch": 0.52, "grad_norm": 17.361090798023238, "learning_rate": 9.841848559410004e-06, "loss": 1.1442, "step": 12210 }, { "epoch": 0.52, "grad_norm": 11.627036436334981, "learning_rate": 9.841541221878174e-06, "loss": 1.1487, "step": 12215 }, { "epoch": 0.52, "grad_norm": 10.14019284124057, "learning_rate": 9.841233590817963e-06, "loss": 1.1025, "step": 12220 }, { "epoch": 0.52, "grad_norm": 8.612791421299756, "learning_rate": 9.840925666248018e-06, "loss": 1.1266, "step": 12225 }, { "epoch": 0.52, "grad_norm": 7.726548685526764, "learning_rate": 9.84061744818701e-06, "loss": 1.1427, "step": 12230 }, { "epoch": 0.52, "grad_norm": 9.314003498907114, "learning_rate": 9.840308936653624e-06, "loss": 1.145, "step": 12235 }, { "epoch": 0.52, "grad_norm": 6.567299277357848, "learning_rate": 9.840000131666564e-06, "loss": 1.1142, "step": 12240 }, { "epoch": 0.52, "grad_norm": 10.518454294916243, "learning_rate": 9.839691033244556e-06, "loss": 1.1624, "step": 12245 }, { "epoch": 0.52, "grad_norm": 8.855018387313464, "learning_rate": 9.839381641406334e-06, "loss": 1.1292, "step": 12250 }, { "epoch": 0.52, "grad_norm": 7.742207974064404, "learning_rate": 9.839071956170658e-06, "loss": 1.1543, "step": 12255 }, { "epoch": 0.52, "grad_norm": 7.838812325933963, "learning_rate": 9.838761977556304e-06, "loss": 1.1783, "step": 12260 }, { "epoch": 0.52, "grad_norm": 7.890962090327382, "learning_rate": 9.838451705582062e-06, "loss": 1.1565, "step": 12265 }, { "epoch": 0.52, "grad_norm": 10.19375103386591, "learning_rate": 9.838141140266749e-06, "loss": 1.137, "step": 12270 }, { "epoch": 0.52, "grad_norm": 5.889923065665591, "learning_rate": 9.837830281629186e-06, "loss": 1.0944, "step": 12275 }, { "epoch": 0.52, "grad_norm": 8.170740026527037, "learning_rate": 9.837519129688227e-06, "loss": 1.1337, "step": 12280 }, { "epoch": 0.52, "grad_norm": 14.290042217647155, "learning_rate": 9.837207684462729e-06, "loss": 1.1765, "step": 12285 }, { "epoch": 0.52, "grad_norm": 15.132686526221493, "learning_rate": 9.836895945971581e-06, "loss": 1.157, "step": 12290 }, { "epoch": 0.52, "grad_norm": 13.92011040772267, "learning_rate": 9.836583914233677e-06, "loss": 1.1129, "step": 12295 }, { "epoch": 0.52, "grad_norm": 16.96459814968213, "learning_rate": 9.836271589267938e-06, "loss": 1.1892, "step": 12300 }, { "epoch": 0.52, "grad_norm": 14.478207855501797, "learning_rate": 9.835958971093296e-06, "loss": 1.1556, "step": 12305 }, { "epoch": 0.52, "grad_norm": 7.210182340022875, "learning_rate": 9.83564605972871e-06, "loss": 1.1311, "step": 12310 }, { "epoch": 0.52, "grad_norm": 6.717631978597978, "learning_rate": 9.835332855193145e-06, "loss": 1.1124, "step": 12315 }, { "epoch": 0.52, "grad_norm": 4.960093758316839, "learning_rate": 9.835019357505592e-06, "loss": 1.146, "step": 12320 }, { "epoch": 0.52, "grad_norm": 6.015175594719165, "learning_rate": 9.834705566685058e-06, "loss": 1.1082, "step": 12325 }, { "epoch": 0.52, "grad_norm": 5.36279854848031, "learning_rate": 9.834391482750566e-06, "loss": 1.1079, "step": 12330 }, { "epoch": 0.52, "grad_norm": 7.009252765865015, "learning_rate": 9.834077105721159e-06, "loss": 1.1276, "step": 12335 }, { "epoch": 0.52, "grad_norm": 8.048365631873782, "learning_rate": 9.833762435615897e-06, "loss": 1.1155, "step": 12340 }, { "epoch": 0.52, "grad_norm": 5.785986992471564, "learning_rate": 9.833447472453855e-06, "loss": 1.1139, "step": 12345 }, { "epoch": 0.52, "grad_norm": 7.911923101542478, "learning_rate": 9.83313221625413e-06, "loss": 1.1512, "step": 12350 }, { "epoch": 0.52, "grad_norm": 23.79428146410804, "learning_rate": 9.832816667035838e-06, "loss": 1.1757, "step": 12355 }, { "epoch": 0.52, "grad_norm": 7.239342441896353, "learning_rate": 9.832500824818105e-06, "loss": 1.1321, "step": 12360 }, { "epoch": 0.52, "grad_norm": 14.58435822384286, "learning_rate": 9.832184689620082e-06, "loss": 1.1473, "step": 12365 }, { "epoch": 0.52, "grad_norm": 13.56254222195042, "learning_rate": 9.831868261460935e-06, "loss": 1.1171, "step": 12370 }, { "epoch": 0.52, "grad_norm": 8.202953054801172, "learning_rate": 9.831551540359848e-06, "loss": 1.0989, "step": 12375 }, { "epoch": 0.52, "grad_norm": 14.997296884312815, "learning_rate": 9.831234526336022e-06, "loss": 1.161, "step": 12380 }, { "epoch": 0.52, "grad_norm": 5.967783318989335, "learning_rate": 9.830917219408678e-06, "loss": 1.1303, "step": 12385 }, { "epoch": 0.52, "grad_norm": 11.072364252202307, "learning_rate": 9.830599619597055e-06, "loss": 1.1275, "step": 12390 }, { "epoch": 0.52, "grad_norm": 7.005212657771845, "learning_rate": 9.830281726920403e-06, "loss": 1.1175, "step": 12395 }, { "epoch": 0.52, "grad_norm": 12.244484428979382, "learning_rate": 9.829963541398e-06, "loss": 1.156, "step": 12400 }, { "epoch": 0.53, "grad_norm": 9.140462559704638, "learning_rate": 9.829645063049134e-06, "loss": 1.116, "step": 12405 }, { "epoch": 0.53, "grad_norm": 5.5892189571569455, "learning_rate": 9.829326291893114e-06, "loss": 1.1308, "step": 12410 }, { "epoch": 0.53, "grad_norm": 5.395369945832596, "learning_rate": 9.829007227949266e-06, "loss": 1.1662, "step": 12415 }, { "epoch": 0.53, "grad_norm": 7.342506794748691, "learning_rate": 9.828687871236935e-06, "loss": 1.1115, "step": 12420 }, { "epoch": 0.53, "grad_norm": 5.434145580962522, "learning_rate": 9.82836822177548e-06, "loss": 1.1159, "step": 12425 }, { "epoch": 0.53, "grad_norm": 8.50773849951274, "learning_rate": 9.828048279584284e-06, "loss": 1.1084, "step": 12430 }, { "epoch": 0.53, "grad_norm": 12.79929312415913, "learning_rate": 9.82772804468274e-06, "loss": 1.1001, "step": 12435 }, { "epoch": 0.53, "grad_norm": 12.916878234370095, "learning_rate": 9.827407517090268e-06, "loss": 1.1864, "step": 12440 }, { "epoch": 0.53, "grad_norm": 10.624640186750703, "learning_rate": 9.827086696826295e-06, "loss": 1.1754, "step": 12445 }, { "epoch": 0.53, "grad_norm": 5.983018743222077, "learning_rate": 9.826765583910277e-06, "loss": 1.1326, "step": 12450 }, { "epoch": 0.53, "grad_norm": 6.773415605407802, "learning_rate": 9.826444178361678e-06, "loss": 1.1252, "step": 12455 }, { "epoch": 0.53, "grad_norm": 7.64346583503369, "learning_rate": 9.826122480199984e-06, "loss": 1.1121, "step": 12460 }, { "epoch": 0.53, "grad_norm": 6.806255887153926, "learning_rate": 9.825800489444702e-06, "loss": 1.1332, "step": 12465 }, { "epoch": 0.53, "grad_norm": 5.946886018001433, "learning_rate": 9.825478206115348e-06, "loss": 1.1263, "step": 12470 }, { "epoch": 0.53, "grad_norm": 5.7102504435850525, "learning_rate": 9.825155630231468e-06, "loss": 1.1914, "step": 12475 }, { "epoch": 0.53, "grad_norm": 6.187177547860948, "learning_rate": 9.824832761812612e-06, "loss": 1.1005, "step": 12480 }, { "epoch": 0.53, "grad_norm": 7.932640658112496, "learning_rate": 9.82450960087836e-06, "loss": 1.0953, "step": 12485 }, { "epoch": 0.53, "grad_norm": 6.713554701617836, "learning_rate": 9.824186147448302e-06, "loss": 1.1101, "step": 12490 }, { "epoch": 0.53, "grad_norm": 11.940642765514276, "learning_rate": 9.823862401542047e-06, "loss": 1.1848, "step": 12495 }, { "epoch": 0.53, "grad_norm": 12.374947051131372, "learning_rate": 9.823538363179224e-06, "loss": 1.125, "step": 12500 }, { "epoch": 0.53, "grad_norm": 25.063050467263594, "learning_rate": 9.823214032379477e-06, "loss": 1.1641, "step": 12505 }, { "epoch": 0.53, "grad_norm": 31.946356147034052, "learning_rate": 9.822889409162472e-06, "loss": 1.1359, "step": 12510 }, { "epoch": 0.53, "grad_norm": 7.352240288543752, "learning_rate": 9.822564493547888e-06, "loss": 1.1469, "step": 12515 }, { "epoch": 0.53, "grad_norm": 9.341985165460384, "learning_rate": 9.822239285555426e-06, "loss": 1.1558, "step": 12520 }, { "epoch": 0.53, "grad_norm": 13.90649560814533, "learning_rate": 9.821913785204798e-06, "loss": 1.2014, "step": 12525 }, { "epoch": 0.53, "grad_norm": 6.377792204778581, "learning_rate": 9.82158799251574e-06, "loss": 1.097, "step": 12530 }, { "epoch": 0.53, "grad_norm": 9.663508185237124, "learning_rate": 9.821261907508007e-06, "loss": 1.1191, "step": 12535 }, { "epoch": 0.53, "grad_norm": 6.410816308626841, "learning_rate": 9.820935530201364e-06, "loss": 1.1672, "step": 12540 }, { "epoch": 0.53, "grad_norm": 6.703330757894308, "learning_rate": 9.8206088606156e-06, "loss": 1.1126, "step": 12545 }, { "epoch": 0.53, "grad_norm": 5.376797202098381, "learning_rate": 9.820281898770522e-06, "loss": 1.0941, "step": 12550 }, { "epoch": 0.53, "grad_norm": 9.020811120181008, "learning_rate": 9.819954644685951e-06, "loss": 1.1362, "step": 12555 }, { "epoch": 0.53, "grad_norm": 14.561177397605544, "learning_rate": 9.819627098381727e-06, "loss": 1.1461, "step": 12560 }, { "epoch": 0.53, "grad_norm": 7.375564514096008, "learning_rate": 9.819299259877708e-06, "loss": 1.1366, "step": 12565 }, { "epoch": 0.53, "grad_norm": 9.46221781201574, "learning_rate": 9.818971129193772e-06, "loss": 1.1309, "step": 12570 }, { "epoch": 0.53, "grad_norm": 5.227433319273252, "learning_rate": 9.81864270634981e-06, "loss": 1.1424, "step": 12575 }, { "epoch": 0.53, "grad_norm": 6.015066391850717, "learning_rate": 9.818313991365736e-06, "loss": 1.1272, "step": 12580 }, { "epoch": 0.53, "grad_norm": 5.431547869025431, "learning_rate": 9.817984984261476e-06, "loss": 1.1126, "step": 12585 }, { "epoch": 0.53, "grad_norm": 14.452244129505589, "learning_rate": 9.81765568505698e-06, "loss": 1.1295, "step": 12590 }, { "epoch": 0.53, "grad_norm": 16.044731059430934, "learning_rate": 9.817326093772208e-06, "loss": 1.1428, "step": 12595 }, { "epoch": 0.53, "grad_norm": 12.009431959670444, "learning_rate": 9.816996210427147e-06, "loss": 1.0916, "step": 12600 }, { "epoch": 0.53, "grad_norm": 5.532064237578303, "learning_rate": 9.816666035041793e-06, "loss": 1.1325, "step": 12605 }, { "epoch": 0.53, "grad_norm": 8.229898720306615, "learning_rate": 9.816335567636165e-06, "loss": 1.1359, "step": 12610 }, { "epoch": 0.53, "grad_norm": 10.480896011148356, "learning_rate": 9.816004808230302e-06, "loss": 1.1097, "step": 12615 }, { "epoch": 0.53, "grad_norm": 11.08466030790317, "learning_rate": 9.815673756844251e-06, "loss": 1.1308, "step": 12620 }, { "epoch": 0.53, "grad_norm": 5.434118290289443, "learning_rate": 9.815342413498085e-06, "loss": 1.1649, "step": 12625 }, { "epoch": 0.53, "grad_norm": 14.253357449063092, "learning_rate": 9.815010778211894e-06, "loss": 1.1472, "step": 12630 }, { "epoch": 0.53, "grad_norm": 5.003635887936105, "learning_rate": 9.814678851005782e-06, "loss": 1.1347, "step": 12635 }, { "epoch": 0.53, "grad_norm": 22.983628003413013, "learning_rate": 9.814346631899871e-06, "loss": 1.1425, "step": 12640 }, { "epoch": 0.54, "grad_norm": 30.646669162736565, "learning_rate": 9.814014120914307e-06, "loss": 1.1457, "step": 12645 }, { "epoch": 0.54, "grad_norm": 21.95596516313099, "learning_rate": 9.813681318069247e-06, "loss": 1.1085, "step": 12650 }, { "epoch": 0.54, "grad_norm": 23.921706235280478, "learning_rate": 9.813348223384866e-06, "loss": 1.1443, "step": 12655 }, { "epoch": 0.54, "grad_norm": 24.963542697151166, "learning_rate": 9.813014836881363e-06, "loss": 1.1096, "step": 12660 }, { "epoch": 0.54, "grad_norm": 55.33834627454908, "learning_rate": 9.812681158578947e-06, "loss": 1.1325, "step": 12665 }, { "epoch": 0.54, "grad_norm": 34.10999318071417, "learning_rate": 9.812347188497848e-06, "loss": 1.1356, "step": 12670 }, { "epoch": 0.54, "grad_norm": 44.40637943552744, "learning_rate": 9.812012926658315e-06, "loss": 1.183, "step": 12675 }, { "epoch": 0.54, "grad_norm": 32.96691171243745, "learning_rate": 9.811678373080611e-06, "loss": 1.1082, "step": 12680 }, { "epoch": 0.54, "grad_norm": 11.224975241150997, "learning_rate": 9.811343527785023e-06, "loss": 1.1141, "step": 12685 }, { "epoch": 0.54, "grad_norm": 11.394199433038377, "learning_rate": 9.811008390791845e-06, "loss": 1.1469, "step": 12690 }, { "epoch": 0.54, "grad_norm": 17.29075550459264, "learning_rate": 9.810672962121403e-06, "loss": 1.145, "step": 12695 }, { "epoch": 0.54, "grad_norm": 22.42269663005383, "learning_rate": 9.81033724179403e-06, "loss": 1.1455, "step": 12700 }, { "epoch": 0.54, "grad_norm": 21.35283491140994, "learning_rate": 9.810001229830078e-06, "loss": 1.1254, "step": 12705 }, { "epoch": 0.54, "grad_norm": 14.17781485465181, "learning_rate": 9.809664926249918e-06, "loss": 1.145, "step": 12710 }, { "epoch": 0.54, "grad_norm": 6.730954399864765, "learning_rate": 9.809328331073943e-06, "loss": 1.1819, "step": 12715 }, { "epoch": 0.54, "grad_norm": 12.30549736617908, "learning_rate": 9.808991444322558e-06, "loss": 1.1581, "step": 12720 }, { "epoch": 0.54, "grad_norm": 9.926665877801275, "learning_rate": 9.808654266016186e-06, "loss": 1.1038, "step": 12725 }, { "epoch": 0.54, "grad_norm": 10.135075584557665, "learning_rate": 9.80831679617527e-06, "loss": 1.1031, "step": 12730 }, { "epoch": 0.54, "grad_norm": 6.136098592509309, "learning_rate": 9.80797903482027e-06, "loss": 1.1215, "step": 12735 }, { "epoch": 0.54, "grad_norm": 6.901920819566608, "learning_rate": 9.807640981971664e-06, "loss": 1.1156, "step": 12740 }, { "epoch": 0.54, "grad_norm": 8.920824264615367, "learning_rate": 9.807302637649947e-06, "loss": 1.1256, "step": 12745 }, { "epoch": 0.54, "grad_norm": 16.526178949933264, "learning_rate": 9.806964001875632e-06, "loss": 1.1082, "step": 12750 }, { "epoch": 0.54, "grad_norm": 5.221653671846322, "learning_rate": 9.806625074669248e-06, "loss": 1.1521, "step": 12755 }, { "epoch": 0.54, "grad_norm": 10.624469505400643, "learning_rate": 9.806285856051345e-06, "loss": 1.1303, "step": 12760 }, { "epoch": 0.54, "grad_norm": 23.123082479810204, "learning_rate": 9.805946346042486e-06, "loss": 1.1037, "step": 12765 }, { "epoch": 0.54, "grad_norm": 10.680466429914798, "learning_rate": 9.805606544663257e-06, "loss": 1.1262, "step": 12770 }, { "epoch": 0.54, "grad_norm": 5.556054906504793, "learning_rate": 9.80526645193426e-06, "loss": 1.1625, "step": 12775 }, { "epoch": 0.54, "grad_norm": 7.657764965405082, "learning_rate": 9.804926067876112e-06, "loss": 1.1915, "step": 12780 }, { "epoch": 0.54, "grad_norm": 6.42309815476824, "learning_rate": 9.804585392509448e-06, "loss": 1.1129, "step": 12785 }, { "epoch": 0.54, "grad_norm": 7.267310833703389, "learning_rate": 9.804244425854927e-06, "loss": 1.1317, "step": 12790 }, { "epoch": 0.54, "grad_norm": 14.011457840467049, "learning_rate": 9.803903167933216e-06, "loss": 1.1128, "step": 12795 }, { "epoch": 0.54, "grad_norm": 16.280056218264157, "learning_rate": 9.803561618765007e-06, "loss": 1.1294, "step": 12800 }, { "epoch": 0.54, "grad_norm": 12.124777958454295, "learning_rate": 9.803219778371005e-06, "loss": 1.1371, "step": 12805 }, { "epoch": 0.54, "grad_norm": 30.772978134808493, "learning_rate": 9.802877646771939e-06, "loss": 1.1246, "step": 12810 }, { "epoch": 0.54, "grad_norm": 10.177471773456658, "learning_rate": 9.802535223988544e-06, "loss": 1.1362, "step": 12815 }, { "epoch": 0.54, "grad_norm": 15.673892195663568, "learning_rate": 9.802192510041588e-06, "loss": 1.1163, "step": 12820 }, { "epoch": 0.54, "grad_norm": 19.536658912570903, "learning_rate": 9.801849504951846e-06, "loss": 1.1069, "step": 12825 }, { "epoch": 0.54, "grad_norm": 17.10915844199704, "learning_rate": 9.80150620874011e-06, "loss": 1.1453, "step": 12830 }, { "epoch": 0.54, "grad_norm": 13.435559174247235, "learning_rate": 9.801162621427199e-06, "loss": 1.1161, "step": 12835 }, { "epoch": 0.54, "grad_norm": 7.029011371702906, "learning_rate": 9.800818743033939e-06, "loss": 1.1586, "step": 12840 }, { "epoch": 0.54, "grad_norm": 5.447254382637652, "learning_rate": 9.80047457358118e-06, "loss": 1.1156, "step": 12845 }, { "epoch": 0.54, "grad_norm": 11.16029474577938, "learning_rate": 9.800130113089788e-06, "loss": 1.2196, "step": 12850 }, { "epoch": 0.54, "grad_norm": 6.7279993904063184, "learning_rate": 9.799785361580646e-06, "loss": 1.1116, "step": 12855 }, { "epoch": 0.54, "grad_norm": 5.524184367358572, "learning_rate": 9.799440319074656e-06, "loss": 1.1308, "step": 12860 }, { "epoch": 0.54, "grad_norm": 7.511117821800435, "learning_rate": 9.799094985592736e-06, "loss": 1.118, "step": 12865 }, { "epoch": 0.54, "grad_norm": 11.719451076107806, "learning_rate": 9.798749361155825e-06, "loss": 1.1574, "step": 12870 }, { "epoch": 0.54, "grad_norm": 7.147397647563752, "learning_rate": 9.798403445784874e-06, "loss": 1.0919, "step": 12875 }, { "epoch": 0.55, "grad_norm": 6.02630171898831, "learning_rate": 9.798057239500857e-06, "loss": 1.1158, "step": 12880 }, { "epoch": 0.55, "grad_norm": 21.51259520835566, "learning_rate": 9.797710742324762e-06, "loss": 1.1634, "step": 12885 }, { "epoch": 0.55, "grad_norm": 20.52457904564015, "learning_rate": 9.797363954277597e-06, "loss": 1.1102, "step": 12890 }, { "epoch": 0.55, "grad_norm": 7.405605907789512, "learning_rate": 9.797016875380387e-06, "loss": 1.0867, "step": 12895 }, { "epoch": 0.55, "grad_norm": 10.751986469586543, "learning_rate": 9.796669505654174e-06, "loss": 1.1125, "step": 12900 }, { "epoch": 0.55, "grad_norm": 14.914563903998776, "learning_rate": 9.796321845120019e-06, "loss": 1.1791, "step": 12905 }, { "epoch": 0.55, "grad_norm": 6.078160319987566, "learning_rate": 9.795973893798995e-06, "loss": 1.1318, "step": 12910 }, { "epoch": 0.55, "grad_norm": 7.398663014267725, "learning_rate": 9.795625651712205e-06, "loss": 1.1231, "step": 12915 }, { "epoch": 0.55, "grad_norm": 9.264465644945457, "learning_rate": 9.795277118880755e-06, "loss": 1.1441, "step": 12920 }, { "epoch": 0.55, "grad_norm": 5.928179448521267, "learning_rate": 9.794928295325779e-06, "loss": 1.1839, "step": 12925 }, { "epoch": 0.55, "grad_norm": 10.980306187907086, "learning_rate": 9.794579181068424e-06, "loss": 1.1098, "step": 12930 }, { "epoch": 0.55, "grad_norm": 7.425153845735667, "learning_rate": 9.794229776129855e-06, "loss": 1.1161, "step": 12935 }, { "epoch": 0.55, "grad_norm": 11.590078288626902, "learning_rate": 9.79388008053126e-06, "loss": 1.1725, "step": 12940 }, { "epoch": 0.55, "grad_norm": 23.779202647418575, "learning_rate": 9.793530094293834e-06, "loss": 1.1069, "step": 12945 }, { "epoch": 0.55, "grad_norm": 11.318809585200905, "learning_rate": 9.793179817438797e-06, "loss": 1.15, "step": 12950 }, { "epoch": 0.55, "grad_norm": 24.039171889246116, "learning_rate": 9.79282924998739e-06, "loss": 1.0864, "step": 12955 }, { "epoch": 0.55, "grad_norm": 10.701881120168887, "learning_rate": 9.79247839196086e-06, "loss": 1.1382, "step": 12960 }, { "epoch": 0.55, "grad_norm": 20.868832837071682, "learning_rate": 9.792127243380485e-06, "loss": 1.1006, "step": 12965 }, { "epoch": 0.55, "grad_norm": 9.805720085328945, "learning_rate": 9.79177580426755e-06, "loss": 1.114, "step": 12970 }, { "epoch": 0.55, "grad_norm": 20.943371067699374, "learning_rate": 9.79142407464336e-06, "loss": 1.1025, "step": 12975 }, { "epoch": 0.55, "grad_norm": 25.874461585041974, "learning_rate": 9.791072054529245e-06, "loss": 1.1405, "step": 12980 }, { "epoch": 0.55, "grad_norm": 20.662401985754244, "learning_rate": 9.790719743946542e-06, "loss": 1.1386, "step": 12985 }, { "epoch": 0.55, "grad_norm": 18.911195988444344, "learning_rate": 9.790367142916613e-06, "loss": 1.1325, "step": 12990 }, { "epoch": 0.55, "grad_norm": 6.043732067918676, "learning_rate": 9.790014251460835e-06, "loss": 1.1395, "step": 12995 }, { "epoch": 0.55, "grad_norm": 6.912730063709543, "learning_rate": 9.789661069600603e-06, "loss": 1.1412, "step": 13000 }, { "epoch": 0.55, "grad_norm": 11.143892789975729, "learning_rate": 9.789307597357327e-06, "loss": 1.1014, "step": 13005 }, { "epoch": 0.55, "grad_norm": 24.767448383009672, "learning_rate": 9.788953834752439e-06, "loss": 1.1729, "step": 13010 }, { "epoch": 0.55, "grad_norm": 17.37375828521619, "learning_rate": 9.788599781807387e-06, "loss": 1.1142, "step": 13015 }, { "epoch": 0.55, "grad_norm": 14.446506230426834, "learning_rate": 9.788245438543635e-06, "loss": 1.1723, "step": 13020 }, { "epoch": 0.55, "grad_norm": 12.718297894103685, "learning_rate": 9.787890804982667e-06, "loss": 1.1274, "step": 13025 }, { "epoch": 0.55, "grad_norm": 16.281808194011177, "learning_rate": 9.787535881145983e-06, "loss": 1.1229, "step": 13030 }, { "epoch": 0.55, "grad_norm": 16.231563175521178, "learning_rate": 9.7871806670551e-06, "loss": 1.1112, "step": 13035 }, { "epoch": 0.55, "grad_norm": 9.332706893444458, "learning_rate": 9.786825162731554e-06, "loss": 1.133, "step": 13040 }, { "epoch": 0.55, "grad_norm": 13.073314528986915, "learning_rate": 9.7864693681969e-06, "loss": 1.1103, "step": 13045 }, { "epoch": 0.55, "grad_norm": 10.191234904647198, "learning_rate": 9.786113283472706e-06, "loss": 1.1365, "step": 13050 }, { "epoch": 0.55, "grad_norm": 9.688970311284944, "learning_rate": 9.785756908580562e-06, "loss": 1.1674, "step": 13055 }, { "epoch": 0.55, "grad_norm": 10.66700676991886, "learning_rate": 9.785400243542072e-06, "loss": 1.1107, "step": 13060 }, { "epoch": 0.55, "grad_norm": 7.482845963252206, "learning_rate": 9.785043288378863e-06, "loss": 1.1549, "step": 13065 }, { "epoch": 0.55, "grad_norm": 6.2982069199988775, "learning_rate": 9.784686043112574e-06, "loss": 1.1173, "step": 13070 }, { "epoch": 0.55, "grad_norm": 20.93924534870786, "learning_rate": 9.784328507764861e-06, "loss": 1.1022, "step": 13075 }, { "epoch": 0.55, "grad_norm": 17.482783700983667, "learning_rate": 9.783970682357407e-06, "loss": 1.107, "step": 13080 }, { "epoch": 0.55, "grad_norm": 26.29068808237692, "learning_rate": 9.783612566911901e-06, "loss": 1.1432, "step": 13085 }, { "epoch": 0.55, "grad_norm": 18.0251665510324, "learning_rate": 9.783254161450056e-06, "loss": 1.2032, "step": 13090 }, { "epoch": 0.55, "grad_norm": 13.233875236490901, "learning_rate": 9.782895465993601e-06, "loss": 1.1393, "step": 13095 }, { "epoch": 0.55, "grad_norm": 10.454995385907694, "learning_rate": 9.782536480564282e-06, "loss": 1.1571, "step": 13100 }, { "epoch": 0.55, "grad_norm": 5.030320811867446, "learning_rate": 9.782177205183863e-06, "loss": 1.0999, "step": 13105 }, { "epoch": 0.55, "grad_norm": 4.734900954082516, "learning_rate": 9.781817639874126e-06, "loss": 1.1176, "step": 13110 }, { "epoch": 0.56, "grad_norm": 22.133119933221828, "learning_rate": 9.781457784656872e-06, "loss": 1.0956, "step": 13115 }, { "epoch": 0.56, "grad_norm": 9.938707646888012, "learning_rate": 9.781097639553916e-06, "loss": 1.1223, "step": 13120 }, { "epoch": 0.56, "grad_norm": 6.564486410330966, "learning_rate": 9.780737204587094e-06, "loss": 1.1954, "step": 13125 }, { "epoch": 0.56, "grad_norm": 10.985861891688922, "learning_rate": 9.780376479778257e-06, "loss": 1.1281, "step": 13130 }, { "epoch": 0.56, "grad_norm": 6.705715360507454, "learning_rate": 9.780015465149275e-06, "loss": 1.0931, "step": 13135 }, { "epoch": 0.56, "grad_norm": 16.620647171367157, "learning_rate": 9.779654160722035e-06, "loss": 1.1835, "step": 13140 }, { "epoch": 0.56, "grad_norm": 8.299144146136616, "learning_rate": 9.779292566518442e-06, "loss": 1.1564, "step": 13145 }, { "epoch": 0.56, "grad_norm": 9.521385037124467, "learning_rate": 9.778930682560419e-06, "loss": 1.1982, "step": 13150 }, { "epoch": 0.56, "grad_norm": 6.840960460428551, "learning_rate": 9.778568508869906e-06, "loss": 1.1115, "step": 13155 }, { "epoch": 0.56, "grad_norm": 5.6274360915454, "learning_rate": 9.77820604546886e-06, "loss": 1.1553, "step": 13160 }, { "epoch": 0.56, "grad_norm": 13.768737399090917, "learning_rate": 9.777843292379255e-06, "loss": 1.1328, "step": 13165 }, { "epoch": 0.56, "grad_norm": 10.133797486111874, "learning_rate": 9.777480249623086e-06, "loss": 1.1522, "step": 13170 }, { "epoch": 0.56, "grad_norm": 13.654277072752937, "learning_rate": 9.777116917222362e-06, "loss": 1.1358, "step": 13175 }, { "epoch": 0.56, "grad_norm": 5.167302637129885, "learning_rate": 9.776753295199111e-06, "loss": 1.1703, "step": 13180 }, { "epoch": 0.56, "grad_norm": 21.932925745156684, "learning_rate": 9.776389383575377e-06, "loss": 1.1098, "step": 13185 }, { "epoch": 0.56, "grad_norm": 25.755926387523015, "learning_rate": 9.776025182373225e-06, "loss": 1.1554, "step": 13190 }, { "epoch": 0.56, "grad_norm": 12.18752070214331, "learning_rate": 9.775660691614733e-06, "loss": 1.1828, "step": 13195 }, { "epoch": 0.56, "grad_norm": 6.4710004089662485, "learning_rate": 9.775295911322002e-06, "loss": 1.1657, "step": 13200 }, { "epoch": 0.56, "grad_norm": 5.907298713227695, "learning_rate": 9.774930841517146e-06, "loss": 1.1235, "step": 13205 }, { "epoch": 0.56, "grad_norm": 27.11503848014546, "learning_rate": 9.774565482222297e-06, "loss": 1.117, "step": 13210 }, { "epoch": 0.56, "grad_norm": 8.551029589771035, "learning_rate": 9.774199833459606e-06, "loss": 1.1581, "step": 13215 }, { "epoch": 0.56, "grad_norm": 5.828455747367782, "learning_rate": 9.773833895251243e-06, "loss": 1.1336, "step": 13220 }, { "epoch": 0.56, "grad_norm": 8.552269523706661, "learning_rate": 9.773467667619394e-06, "loss": 1.1383, "step": 13225 }, { "epoch": 0.56, "grad_norm": 6.5264486516209255, "learning_rate": 9.77310115058626e-06, "loss": 1.1657, "step": 13230 }, { "epoch": 0.56, "grad_norm": 8.143082185385055, "learning_rate": 9.772734344174062e-06, "loss": 1.0869, "step": 13235 }, { "epoch": 0.56, "grad_norm": 6.501204261538233, "learning_rate": 9.77236724840504e-06, "loss": 1.1671, "step": 13240 }, { "epoch": 0.56, "grad_norm": 17.787340675653407, "learning_rate": 9.77199986330145e-06, "loss": 1.0893, "step": 13245 }, { "epoch": 0.56, "grad_norm": 7.627285149178578, "learning_rate": 9.771632188885563e-06, "loss": 1.1145, "step": 13250 }, { "epoch": 0.56, "grad_norm": 9.613557491982418, "learning_rate": 9.771264225179673e-06, "loss": 1.0989, "step": 13255 }, { "epoch": 0.56, "grad_norm": 5.458179462400088, "learning_rate": 9.770895972206087e-06, "loss": 1.1087, "step": 13260 }, { "epoch": 0.56, "grad_norm": 5.599539106617368, "learning_rate": 9.77052742998713e-06, "loss": 1.1729, "step": 13265 }, { "epoch": 0.56, "grad_norm": 11.427568675308853, "learning_rate": 9.77015859854515e-06, "loss": 1.1244, "step": 13270 }, { "epoch": 0.56, "grad_norm": 7.338757436962799, "learning_rate": 9.769789477902503e-06, "loss": 1.1314, "step": 13275 }, { "epoch": 0.56, "grad_norm": 6.127939920679577, "learning_rate": 9.76942006808157e-06, "loss": 1.1432, "step": 13280 }, { "epoch": 0.56, "grad_norm": 17.482971661011053, "learning_rate": 9.769050369104747e-06, "loss": 1.1435, "step": 13285 }, { "epoch": 0.56, "grad_norm": 18.968380951063157, "learning_rate": 9.76868038099445e-06, "loss": 1.1223, "step": 13290 }, { "epoch": 0.56, "grad_norm": 11.851121365538491, "learning_rate": 9.768310103773105e-06, "loss": 1.1565, "step": 13295 }, { "epoch": 0.56, "grad_norm": 18.52619269070551, "learning_rate": 9.767939537463164e-06, "loss": 1.0886, "step": 13300 }, { "epoch": 0.56, "grad_norm": 7.914681046230499, "learning_rate": 9.767568682087094e-06, "loss": 1.1477, "step": 13305 }, { "epoch": 0.56, "grad_norm": 11.957620866819147, "learning_rate": 9.767197537667379e-06, "loss": 1.1418, "step": 13310 }, { "epoch": 0.56, "grad_norm": 15.458989591754985, "learning_rate": 9.766826104226519e-06, "loss": 1.1442, "step": 13315 }, { "epoch": 0.56, "grad_norm": 12.85429724644223, "learning_rate": 9.766454381787034e-06, "loss": 1.1263, "step": 13320 }, { "epoch": 0.56, "grad_norm": 4.9052956788645385, "learning_rate": 9.766082370371458e-06, "loss": 1.088, "step": 13325 }, { "epoch": 0.56, "grad_norm": 8.644506708146974, "learning_rate": 9.76571007000235e-06, "loss": 1.1216, "step": 13330 }, { "epoch": 0.56, "grad_norm": 11.936463125365135, "learning_rate": 9.765337480702276e-06, "loss": 1.152, "step": 13335 }, { "epoch": 0.56, "grad_norm": 10.375067115709802, "learning_rate": 9.764964602493828e-06, "loss": 1.1596, "step": 13340 }, { "epoch": 0.56, "grad_norm": 6.902950180571504, "learning_rate": 9.76459143539961e-06, "loss": 1.1224, "step": 13345 }, { "epoch": 0.56, "grad_norm": 21.837736280134347, "learning_rate": 9.76421797944225e-06, "loss": 1.1504, "step": 13350 }, { "epoch": 0.57, "grad_norm": 16.842568521064024, "learning_rate": 9.763844234644386e-06, "loss": 1.1049, "step": 13355 }, { "epoch": 0.57, "grad_norm": 18.12746969766832, "learning_rate": 9.76347020102868e-06, "loss": 1.1119, "step": 13360 }, { "epoch": 0.57, "grad_norm": 5.935129818403512, "learning_rate": 9.763095878617806e-06, "loss": 1.1298, "step": 13365 }, { "epoch": 0.57, "grad_norm": 6.539806839753402, "learning_rate": 9.76272126743446e-06, "loss": 1.1265, "step": 13370 }, { "epoch": 0.57, "grad_norm": 6.547612250773259, "learning_rate": 9.76234636750135e-06, "loss": 1.1188, "step": 13375 }, { "epoch": 0.57, "grad_norm": 7.68767556630829, "learning_rate": 9.76197117884121e-06, "loss": 1.0998, "step": 13380 }, { "epoch": 0.57, "grad_norm": 6.448630061504575, "learning_rate": 9.761595701476784e-06, "loss": 1.1226, "step": 13385 }, { "epoch": 0.57, "grad_norm": 5.38702777729219, "learning_rate": 9.761219935430836e-06, "loss": 1.1193, "step": 13390 }, { "epoch": 0.57, "grad_norm": 5.590565912070996, "learning_rate": 9.760843880726148e-06, "loss": 1.1309, "step": 13395 }, { "epoch": 0.57, "grad_norm": 7.055441084633275, "learning_rate": 9.760467537385519e-06, "loss": 1.1085, "step": 13400 }, { "epoch": 0.57, "grad_norm": 7.93073082795177, "learning_rate": 9.760090905431767e-06, "loss": 1.1238, "step": 13405 }, { "epoch": 0.57, "grad_norm": 6.027074855740675, "learning_rate": 9.759713984887723e-06, "loss": 1.0785, "step": 13410 }, { "epoch": 0.57, "grad_norm": 5.404823610158473, "learning_rate": 9.75933677577624e-06, "loss": 1.1467, "step": 13415 }, { "epoch": 0.57, "grad_norm": 6.184722880962163, "learning_rate": 9.758959278120189e-06, "loss": 1.0982, "step": 13420 }, { "epoch": 0.57, "grad_norm": 13.452705144229121, "learning_rate": 9.758581491942455e-06, "loss": 1.1098, "step": 13425 }, { "epoch": 0.57, "grad_norm": 15.831042783525902, "learning_rate": 9.75820341726594e-06, "loss": 1.1453, "step": 13430 }, { "epoch": 0.57, "grad_norm": 4.9233019978285775, "learning_rate": 9.757825054113569e-06, "loss": 1.1134, "step": 13435 }, { "epoch": 0.57, "grad_norm": 15.175120159721672, "learning_rate": 9.75744640250828e-06, "loss": 1.1236, "step": 13440 }, { "epoch": 0.57, "grad_norm": 18.157207065832747, "learning_rate": 9.757067462473026e-06, "loss": 1.1399, "step": 13445 }, { "epoch": 0.57, "grad_norm": 14.618263639784244, "learning_rate": 9.756688234030788e-06, "loss": 1.1042, "step": 13450 }, { "epoch": 0.57, "grad_norm": 15.872953737147169, "learning_rate": 9.756308717204552e-06, "loss": 1.1474, "step": 13455 }, { "epoch": 0.57, "grad_norm": 6.4409305621497275, "learning_rate": 9.75592891201733e-06, "loss": 1.154, "step": 13460 }, { "epoch": 0.57, "grad_norm": 37.656149382181226, "learning_rate": 9.755548818492145e-06, "loss": 1.1332, "step": 13465 }, { "epoch": 0.57, "grad_norm": 39.410070890251866, "learning_rate": 9.755168436652044e-06, "loss": 1.1329, "step": 13470 }, { "epoch": 0.57, "grad_norm": 12.494051807223958, "learning_rate": 9.754787766520087e-06, "loss": 1.1539, "step": 13475 }, { "epoch": 0.57, "grad_norm": 6.13011391022796, "learning_rate": 9.754406808119355e-06, "loss": 1.1663, "step": 13480 }, { "epoch": 0.57, "grad_norm": 11.419113045490374, "learning_rate": 9.754025561472942e-06, "loss": 1.1087, "step": 13485 }, { "epoch": 0.57, "grad_norm": 9.516755129262156, "learning_rate": 9.753644026603961e-06, "loss": 1.189, "step": 13490 }, { "epoch": 0.57, "grad_norm": 12.978766271770928, "learning_rate": 9.753262203535547e-06, "loss": 1.1421, "step": 13495 }, { "epoch": 0.57, "grad_norm": 11.117541691549466, "learning_rate": 9.752880092290846e-06, "loss": 1.1256, "step": 13500 }, { "epoch": 0.57, "grad_norm": 5.252353779560716, "learning_rate": 9.752497692893024e-06, "loss": 1.1152, "step": 13505 }, { "epoch": 0.57, "grad_norm": 11.319861335587225, "learning_rate": 9.752115005365268e-06, "loss": 1.1269, "step": 13510 }, { "epoch": 0.57, "grad_norm": 13.993503207907033, "learning_rate": 9.751732029730776e-06, "loss": 1.0822, "step": 13515 }, { "epoch": 0.57, "grad_norm": 5.9736893549726755, "learning_rate": 9.751348766012768e-06, "loss": 1.1072, "step": 13520 }, { "epoch": 0.57, "grad_norm": 6.278543704694994, "learning_rate": 9.75096521423448e-06, "loss": 1.0968, "step": 13525 }, { "epoch": 0.57, "grad_norm": 5.823957093385041, "learning_rate": 9.750581374419165e-06, "loss": 1.1128, "step": 13530 }, { "epoch": 0.57, "grad_norm": 8.354038431585671, "learning_rate": 9.750197246590094e-06, "loss": 1.1535, "step": 13535 }, { "epoch": 0.57, "grad_norm": 8.392417723197982, "learning_rate": 9.749812830770557e-06, "loss": 1.0951, "step": 13540 }, { "epoch": 0.57, "grad_norm": 7.583628493567195, "learning_rate": 9.74942812698386e-06, "loss": 1.1295, "step": 13545 }, { "epoch": 0.57, "grad_norm": 7.242990049423247, "learning_rate": 9.749043135253325e-06, "loss": 1.1074, "step": 13550 }, { "epoch": 0.57, "grad_norm": 18.01123885900048, "learning_rate": 9.748657855602291e-06, "loss": 1.1773, "step": 13555 }, { "epoch": 0.57, "grad_norm": 22.042223974575627, "learning_rate": 9.748272288054121e-06, "loss": 1.192, "step": 13560 }, { "epoch": 0.57, "grad_norm": 14.879311750750942, "learning_rate": 9.747886432632188e-06, "loss": 1.1903, "step": 13565 }, { "epoch": 0.57, "grad_norm": 6.711097179570518, "learning_rate": 9.747500289359888e-06, "loss": 1.144, "step": 13570 }, { "epoch": 0.57, "grad_norm": 23.60807442866413, "learning_rate": 9.747113858260627e-06, "loss": 1.1923, "step": 13575 }, { "epoch": 0.57, "grad_norm": 32.66858165124962, "learning_rate": 9.746727139357836e-06, "loss": 1.1754, "step": 13580 }, { "epoch": 0.57, "grad_norm": 20.49191445348783, "learning_rate": 9.746340132674963e-06, "loss": 1.197, "step": 13585 }, { "epoch": 0.58, "grad_norm": 13.596808091652, "learning_rate": 9.745952838235466e-06, "loss": 1.1538, "step": 13590 }, { "epoch": 0.58, "grad_norm": 10.961680188320575, "learning_rate": 9.74556525606283e-06, "loss": 1.1269, "step": 13595 }, { "epoch": 0.58, "grad_norm": 11.699277531139078, "learning_rate": 9.74517738618055e-06, "loss": 1.1891, "step": 13600 }, { "epoch": 0.58, "grad_norm": 10.596301814481997, "learning_rate": 9.744789228612143e-06, "loss": 1.1343, "step": 13605 }, { "epoch": 0.58, "grad_norm": 10.07379404370688, "learning_rate": 9.74440078338114e-06, "loss": 1.1586, "step": 13610 }, { "epoch": 0.58, "grad_norm": 15.745282290035206, "learning_rate": 9.744012050511093e-06, "loss": 1.0977, "step": 13615 }, { "epoch": 0.58, "grad_norm": 7.093819211165586, "learning_rate": 9.74362303002557e-06, "loss": 1.1253, "step": 13620 }, { "epoch": 0.58, "grad_norm": 8.86902471573825, "learning_rate": 9.743233721948155e-06, "loss": 1.1165, "step": 13625 }, { "epoch": 0.58, "grad_norm": 8.982300055530532, "learning_rate": 9.742844126302451e-06, "loss": 1.1519, "step": 13630 }, { "epoch": 0.58, "grad_norm": 10.238724259179179, "learning_rate": 9.74245424311208e-06, "loss": 1.1186, "step": 13635 }, { "epoch": 0.58, "grad_norm": 5.663496552845421, "learning_rate": 9.742064072400676e-06, "loss": 1.0901, "step": 13640 }, { "epoch": 0.58, "grad_norm": 7.083149295452875, "learning_rate": 9.741673614191896e-06, "loss": 1.1641, "step": 13645 }, { "epoch": 0.58, "grad_norm": 8.079652347188938, "learning_rate": 9.74128286850941e-06, "loss": 1.1024, "step": 13650 }, { "epoch": 0.58, "grad_norm": 5.683752446150179, "learning_rate": 9.74089183537691e-06, "loss": 1.1076, "step": 13655 }, { "epoch": 0.58, "grad_norm": 21.919906620793522, "learning_rate": 9.740500514818105e-06, "loss": 1.1159, "step": 13660 }, { "epoch": 0.58, "grad_norm": 16.774229118180383, "learning_rate": 9.740108906856716e-06, "loss": 1.1621, "step": 13665 }, { "epoch": 0.58, "grad_norm": 15.083800645690784, "learning_rate": 9.739717011516488e-06, "loss": 1.1314, "step": 13670 }, { "epoch": 0.58, "grad_norm": 11.218406244385575, "learning_rate": 9.739324828821177e-06, "loss": 1.1417, "step": 13675 }, { "epoch": 0.58, "grad_norm": 10.438724003076803, "learning_rate": 9.738932358794561e-06, "loss": 1.1099, "step": 13680 }, { "epoch": 0.58, "grad_norm": 7.825192852736801, "learning_rate": 9.738539601460437e-06, "loss": 1.1449, "step": 13685 }, { "epoch": 0.58, "grad_norm": 13.254767362012407, "learning_rate": 9.738146556842616e-06, "loss": 1.1387, "step": 13690 }, { "epoch": 0.58, "grad_norm": 12.341334327589971, "learning_rate": 9.737753224964924e-06, "loss": 1.1537, "step": 13695 }, { "epoch": 0.58, "grad_norm": 6.57147756649199, "learning_rate": 9.737359605851209e-06, "loss": 1.1073, "step": 13700 }, { "epoch": 0.58, "grad_norm": 30.182589884308637, "learning_rate": 9.736965699525335e-06, "loss": 1.1537, "step": 13705 }, { "epoch": 0.58, "grad_norm": 84.36950066110069, "learning_rate": 9.736571506011186e-06, "loss": 1.1251, "step": 13710 }, { "epoch": 0.58, "grad_norm": 85.08634303108919, "learning_rate": 9.736177025332655e-06, "loss": 1.1921, "step": 13715 }, { "epoch": 0.58, "grad_norm": 56.43510058129571, "learning_rate": 9.735782257513666e-06, "loss": 1.1423, "step": 13720 }, { "epoch": 0.58, "grad_norm": 46.89654876156769, "learning_rate": 9.735387202578146e-06, "loss": 1.1496, "step": 13725 }, { "epoch": 0.58, "grad_norm": 12.981475427424796, "learning_rate": 9.73499186055005e-06, "loss": 1.1316, "step": 13730 }, { "epoch": 0.58, "grad_norm": 9.249150020614929, "learning_rate": 9.734596231453344e-06, "loss": 1.1715, "step": 13735 }, { "epoch": 0.58, "grad_norm": 7.96237764776874, "learning_rate": 9.734200315312015e-06, "loss": 1.0843, "step": 13740 }, { "epoch": 0.58, "grad_norm": 16.453782232823215, "learning_rate": 9.733804112150065e-06, "loss": 1.1192, "step": 13745 }, { "epoch": 0.58, "grad_norm": 9.457717433038631, "learning_rate": 9.733407621991516e-06, "loss": 1.1513, "step": 13750 }, { "epoch": 0.58, "grad_norm": 9.563850450972106, "learning_rate": 9.733010844860405e-06, "loss": 1.1506, "step": 13755 }, { "epoch": 0.58, "grad_norm": 7.7193049880769715, "learning_rate": 9.73261378078079e-06, "loss": 1.1181, "step": 13760 }, { "epoch": 0.58, "grad_norm": 11.184904037318445, "learning_rate": 9.73221642977674e-06, "loss": 1.1236, "step": 13765 }, { "epoch": 0.58, "grad_norm": 15.912619313513849, "learning_rate": 9.731818791872347e-06, "loss": 1.1639, "step": 13770 }, { "epoch": 0.58, "grad_norm": 7.133080421518732, "learning_rate": 9.73142086709172e-06, "loss": 1.1199, "step": 13775 }, { "epoch": 0.58, "grad_norm": 7.340388594522803, "learning_rate": 9.731022655458981e-06, "loss": 1.0936, "step": 13780 }, { "epoch": 0.58, "grad_norm": 9.278513456854574, "learning_rate": 9.730624156998277e-06, "loss": 1.1234, "step": 13785 }, { "epoch": 0.58, "grad_norm": 6.500570873388791, "learning_rate": 9.730225371733762e-06, "loss": 1.1168, "step": 13790 }, { "epoch": 0.58, "grad_norm": 5.250782373989602, "learning_rate": 9.729826299689617e-06, "loss": 1.1311, "step": 13795 }, { "epoch": 0.58, "grad_norm": 5.741684382842332, "learning_rate": 9.729426940890037e-06, "loss": 1.1329, "step": 13800 }, { "epoch": 0.58, "grad_norm": 7.4175202822844435, "learning_rate": 9.72902729535923e-06, "loss": 1.1001, "step": 13805 }, { "epoch": 0.58, "grad_norm": 5.272043537203692, "learning_rate": 9.72862736312143e-06, "loss": 1.1515, "step": 13810 }, { "epoch": 0.58, "grad_norm": 14.496071095936655, "learning_rate": 9.72822714420088e-06, "loss": 1.1328, "step": 13815 }, { "epoch": 0.58, "grad_norm": 6.016957578389495, "learning_rate": 9.727826638621847e-06, "loss": 1.0869, "step": 13820 }, { "epoch": 0.59, "grad_norm": 6.763331240721669, "learning_rate": 9.72742584640861e-06, "loss": 1.1273, "step": 13825 }, { "epoch": 0.59, "grad_norm": 20.149762669917695, "learning_rate": 9.727024767585469e-06, "loss": 1.1247, "step": 13830 }, { "epoch": 0.59, "grad_norm": 15.028645592929125, "learning_rate": 9.726623402176741e-06, "loss": 1.1096, "step": 13835 }, { "epoch": 0.59, "grad_norm": 5.254216583524284, "learning_rate": 9.726221750206759e-06, "loss": 1.1012, "step": 13840 }, { "epoch": 0.59, "grad_norm": 12.932555915952687, "learning_rate": 9.725819811699874e-06, "loss": 1.0829, "step": 13845 }, { "epoch": 0.59, "grad_norm": 18.495150180801897, "learning_rate": 9.725417586680454e-06, "loss": 1.1774, "step": 13850 }, { "epoch": 0.59, "grad_norm": 13.736696370952199, "learning_rate": 9.725015075172883e-06, "loss": 1.0882, "step": 13855 }, { "epoch": 0.59, "grad_norm": 14.387044403058038, "learning_rate": 9.724612277201569e-06, "loss": 1.1382, "step": 13860 }, { "epoch": 0.59, "grad_norm": 5.636671771889029, "learning_rate": 9.724209192790928e-06, "loss": 1.1354, "step": 13865 }, { "epoch": 0.59, "grad_norm": 5.586172667041027, "learning_rate": 9.7238058219654e-06, "loss": 1.1096, "step": 13870 }, { "epoch": 0.59, "grad_norm": 9.586335870123596, "learning_rate": 9.723402164749438e-06, "loss": 1.1567, "step": 13875 }, { "epoch": 0.59, "grad_norm": 11.408673890467808, "learning_rate": 9.722998221167517e-06, "loss": 1.1554, "step": 13880 }, { "epoch": 0.59, "grad_norm": 10.564909108022746, "learning_rate": 9.722593991244125e-06, "loss": 1.1875, "step": 13885 }, { "epoch": 0.59, "grad_norm": 5.454590478153151, "learning_rate": 9.722189475003771e-06, "loss": 1.0963, "step": 13890 }, { "epoch": 0.59, "grad_norm": 5.029927090426701, "learning_rate": 9.721784672470978e-06, "loss": 1.1357, "step": 13895 }, { "epoch": 0.59, "grad_norm": 9.891036632495453, "learning_rate": 9.72137958367029e-06, "loss": 1.1459, "step": 13900 }, { "epoch": 0.59, "grad_norm": 9.365981627250617, "learning_rate": 9.720974208626264e-06, "loss": 1.1083, "step": 13905 }, { "epoch": 0.59, "grad_norm": 5.8215572344329685, "learning_rate": 9.720568547363479e-06, "loss": 1.1109, "step": 13910 }, { "epoch": 0.59, "grad_norm": 8.248447010576974, "learning_rate": 9.720162599906526e-06, "loss": 1.1341, "step": 13915 }, { "epoch": 0.59, "grad_norm": 10.525695339655307, "learning_rate": 9.71975636628002e-06, "loss": 1.0999, "step": 13920 }, { "epoch": 0.59, "grad_norm": 5.3392416167837915, "learning_rate": 9.719349846508587e-06, "loss": 1.1087, "step": 13925 }, { "epoch": 0.59, "grad_norm": 21.205709958927134, "learning_rate": 9.718943040616875e-06, "loss": 1.1688, "step": 13930 }, { "epoch": 0.59, "grad_norm": 29.453578570488713, "learning_rate": 9.718535948629546e-06, "loss": 1.1688, "step": 13935 }, { "epoch": 0.59, "grad_norm": 7.778574829443633, "learning_rate": 9.71812857057128e-06, "loss": 1.1433, "step": 13940 }, { "epoch": 0.59, "grad_norm": 12.220735777567683, "learning_rate": 9.717720906466779e-06, "loss": 1.1192, "step": 13945 }, { "epoch": 0.59, "grad_norm": 6.563226486213903, "learning_rate": 9.717312956340756e-06, "loss": 1.1463, "step": 13950 }, { "epoch": 0.59, "grad_norm": 5.515330457974046, "learning_rate": 9.716904720217942e-06, "loss": 1.1598, "step": 13955 }, { "epoch": 0.59, "grad_norm": 6.854047314048814, "learning_rate": 9.716496198123088e-06, "loss": 1.126, "step": 13960 }, { "epoch": 0.59, "grad_norm": 6.019166990613669, "learning_rate": 9.716087390080966e-06, "loss": 1.0897, "step": 13965 }, { "epoch": 0.59, "grad_norm": 9.789855726925458, "learning_rate": 9.715678296116354e-06, "loss": 1.1387, "step": 13970 }, { "epoch": 0.59, "grad_norm": 8.132125621767042, "learning_rate": 9.71526891625406e-06, "loss": 1.1022, "step": 13975 }, { "epoch": 0.59, "grad_norm": 5.427187535674997, "learning_rate": 9.714859250518898e-06, "loss": 1.1444, "step": 13980 }, { "epoch": 0.59, "grad_norm": 35.65294469608465, "learning_rate": 9.714449298935708e-06, "loss": 1.1232, "step": 13985 }, { "epoch": 0.59, "grad_norm": 18.61637138231272, "learning_rate": 9.714039061529346e-06, "loss": 1.1059, "step": 13990 }, { "epoch": 0.59, "grad_norm": 6.714080450971223, "learning_rate": 9.713628538324681e-06, "loss": 1.1204, "step": 13995 }, { "epoch": 0.59, "grad_norm": 8.32495006374814, "learning_rate": 9.713217729346602e-06, "loss": 1.1245, "step": 14000 }, { "epoch": 0.59, "grad_norm": 5.8906853219804525, "learning_rate": 9.712806634620013e-06, "loss": 1.119, "step": 14005 }, { "epoch": 0.59, "grad_norm": 5.8371994359884, "learning_rate": 9.712395254169842e-06, "loss": 1.1242, "step": 14010 }, { "epoch": 0.59, "grad_norm": 7.6348799615958685, "learning_rate": 9.711983588021027e-06, "loss": 1.0972, "step": 14015 }, { "epoch": 0.59, "grad_norm": 7.464835712508369, "learning_rate": 9.711571636198528e-06, "loss": 1.1374, "step": 14020 }, { "epoch": 0.59, "grad_norm": 5.8760804537559626, "learning_rate": 9.711159398727318e-06, "loss": 1.111, "step": 14025 }, { "epoch": 0.59, "grad_norm": 16.603816509164826, "learning_rate": 9.71074687563239e-06, "loss": 1.1311, "step": 14030 }, { "epoch": 0.59, "grad_norm": 13.885629283636852, "learning_rate": 9.710334066938756e-06, "loss": 1.0825, "step": 14035 }, { "epoch": 0.59, "grad_norm": 7.597870646280369, "learning_rate": 9.709920972671442e-06, "loss": 1.0968, "step": 14040 }, { "epoch": 0.59, "grad_norm": 10.385967033254175, "learning_rate": 9.709507592855494e-06, "loss": 1.1209, "step": 14045 }, { "epoch": 0.59, "grad_norm": 5.284912747176475, "learning_rate": 9.70909392751597e-06, "loss": 1.1417, "step": 14050 }, { "epoch": 0.59, "grad_norm": 9.030024940957162, "learning_rate": 9.708679976677955e-06, "loss": 1.1091, "step": 14055 }, { "epoch": 0.6, "grad_norm": 7.692313777138527, "learning_rate": 9.708265740366541e-06, "loss": 1.184, "step": 14060 }, { "epoch": 0.6, "grad_norm": 7.096693910857474, "learning_rate": 9.707851218606845e-06, "loss": 1.1261, "step": 14065 }, { "epoch": 0.6, "grad_norm": 8.063844375083768, "learning_rate": 9.707436411423997e-06, "loss": 1.1421, "step": 14070 }, { "epoch": 0.6, "grad_norm": 17.11067654018697, "learning_rate": 9.707021318843147e-06, "loss": 1.1354, "step": 14075 }, { "epoch": 0.6, "grad_norm": 20.637254197598722, "learning_rate": 9.706605940889456e-06, "loss": 1.1227, "step": 14080 }, { "epoch": 0.6, "grad_norm": 18.57758018515164, "learning_rate": 9.706190277588112e-06, "loss": 1.1679, "step": 14085 }, { "epoch": 0.6, "grad_norm": 17.011518395593868, "learning_rate": 9.705774328964315e-06, "loss": 1.1249, "step": 14090 }, { "epoch": 0.6, "grad_norm": 6.259547528540654, "learning_rate": 9.705358095043282e-06, "loss": 1.1685, "step": 14095 }, { "epoch": 0.6, "grad_norm": 7.804697857419432, "learning_rate": 9.704941575850246e-06, "loss": 1.0849, "step": 14100 }, { "epoch": 0.6, "grad_norm": 7.311651499996418, "learning_rate": 9.704524771410461e-06, "loss": 1.1242, "step": 14105 }, { "epoch": 0.6, "grad_norm": 6.630908308786798, "learning_rate": 9.704107681749199e-06, "loss": 1.1246, "step": 14110 }, { "epoch": 0.6, "grad_norm": 19.456640518105218, "learning_rate": 9.703690306891744e-06, "loss": 1.1094, "step": 14115 }, { "epoch": 0.6, "grad_norm": 16.315665765220558, "learning_rate": 9.7032726468634e-06, "loss": 1.1668, "step": 14120 }, { "epoch": 0.6, "grad_norm": 17.123573267526094, "learning_rate": 9.70285470168949e-06, "loss": 1.1391, "step": 14125 }, { "epoch": 0.6, "grad_norm": 28.912282344415782, "learning_rate": 9.70243647139535e-06, "loss": 1.1259, "step": 14130 }, { "epoch": 0.6, "grad_norm": 43.18446506558985, "learning_rate": 9.702017956006343e-06, "loss": 1.1615, "step": 14135 }, { "epoch": 0.6, "grad_norm": 17.102065114051854, "learning_rate": 9.701599155547832e-06, "loss": 1.124, "step": 14140 }, { "epoch": 0.6, "grad_norm": 20.82897825568837, "learning_rate": 9.701180070045217e-06, "loss": 1.1466, "step": 14145 }, { "epoch": 0.6, "grad_norm": 14.43507501127827, "learning_rate": 9.700760699523902e-06, "loss": 1.1053, "step": 14150 }, { "epoch": 0.6, "grad_norm": 23.000105978468245, "learning_rate": 9.700341044009311e-06, "loss": 1.1295, "step": 14155 }, { "epoch": 0.6, "grad_norm": 10.197616835755062, "learning_rate": 9.699921103526888e-06, "loss": 1.1048, "step": 14160 }, { "epoch": 0.6, "grad_norm": 10.870948441850969, "learning_rate": 9.699500878102093e-06, "loss": 1.092, "step": 14165 }, { "epoch": 0.6, "grad_norm": 10.392302297617999, "learning_rate": 9.699080367760401e-06, "loss": 1.1945, "step": 14170 }, { "epoch": 0.6, "grad_norm": 6.33064846082759, "learning_rate": 9.69865957252731e-06, "loss": 1.0801, "step": 14175 }, { "epoch": 0.6, "grad_norm": 6.406694537179667, "learning_rate": 9.698238492428329e-06, "loss": 1.1227, "step": 14180 }, { "epoch": 0.6, "grad_norm": 6.656183095272076, "learning_rate": 9.697817127488986e-06, "loss": 1.1013, "step": 14185 }, { "epoch": 0.6, "grad_norm": 17.706762423709147, "learning_rate": 9.69739547773483e-06, "loss": 1.1546, "step": 14190 }, { "epoch": 0.6, "grad_norm": 12.15549954656361, "learning_rate": 9.696973543191421e-06, "loss": 1.092, "step": 14195 }, { "epoch": 0.6, "grad_norm": 13.291328539456801, "learning_rate": 9.696551323884344e-06, "loss": 1.131, "step": 14200 }, { "epoch": 0.6, "grad_norm": 5.09770951178777, "learning_rate": 9.696128819839192e-06, "loss": 1.1339, "step": 14205 }, { "epoch": 0.6, "grad_norm": 5.401555679099213, "learning_rate": 9.695706031081585e-06, "loss": 1.1072, "step": 14210 }, { "epoch": 0.6, "grad_norm": 7.154129426934878, "learning_rate": 9.695282957637152e-06, "loss": 1.1523, "step": 14215 }, { "epoch": 0.6, "grad_norm": 12.555466433147776, "learning_rate": 9.694859599531542e-06, "loss": 1.1253, "step": 14220 }, { "epoch": 0.6, "grad_norm": 16.532846358518643, "learning_rate": 9.694435956790425e-06, "loss": 1.1669, "step": 14225 }, { "epoch": 0.6, "grad_norm": 9.787662137130075, "learning_rate": 9.694012029439484e-06, "loss": 1.1046, "step": 14230 }, { "epoch": 0.6, "grad_norm": 12.321373558046599, "learning_rate": 9.693587817504419e-06, "loss": 1.1241, "step": 14235 }, { "epoch": 0.6, "grad_norm": 14.177500473313764, "learning_rate": 9.693163321010951e-06, "loss": 1.1157, "step": 14240 }, { "epoch": 0.6, "grad_norm": 17.424638170835365, "learning_rate": 9.692738539984817e-06, "loss": 1.1354, "step": 14245 }, { "epoch": 0.6, "grad_norm": 16.129220300041492, "learning_rate": 9.692313474451766e-06, "loss": 1.1508, "step": 14250 }, { "epoch": 0.6, "grad_norm": 29.534047927514592, "learning_rate": 9.691888124437571e-06, "loss": 1.1256, "step": 14255 }, { "epoch": 0.6, "grad_norm": 35.34617874476829, "learning_rate": 9.69146248996802e-06, "loss": 1.1364, "step": 14260 }, { "epoch": 0.6, "grad_norm": 25.769318004468367, "learning_rate": 9.691036571068916e-06, "loss": 1.1262, "step": 14265 }, { "epoch": 0.6, "grad_norm": 10.156003738279631, "learning_rate": 9.690610367766084e-06, "loss": 1.141, "step": 14270 }, { "epoch": 0.6, "grad_norm": 7.55976996682998, "learning_rate": 9.690183880085361e-06, "loss": 1.1031, "step": 14275 }, { "epoch": 0.6, "grad_norm": 6.2408477756647365, "learning_rate": 9.689757108052606e-06, "loss": 1.1307, "step": 14280 }, { "epoch": 0.6, "grad_norm": 10.524267369930584, "learning_rate": 9.68933005169369e-06, "loss": 1.1655, "step": 14285 }, { "epoch": 0.6, "grad_norm": 13.195124963742375, "learning_rate": 9.688902711034506e-06, "loss": 1.1158, "step": 14290 }, { "epoch": 0.6, "grad_norm": 24.639025098053352, "learning_rate": 9.688475086100964e-06, "loss": 1.1024, "step": 14295 }, { "epoch": 0.61, "grad_norm": 11.106642013701407, "learning_rate": 9.688047176918987e-06, "loss": 1.1598, "step": 14300 }, { "epoch": 0.61, "grad_norm": 9.017831339967914, "learning_rate": 9.68761898351452e-06, "loss": 1.1444, "step": 14305 }, { "epoch": 0.61, "grad_norm": 5.450636171679976, "learning_rate": 9.68719050591352e-06, "loss": 1.0963, "step": 14310 }, { "epoch": 0.61, "grad_norm": 10.592425950734231, "learning_rate": 9.686761744141966e-06, "loss": 1.1545, "step": 14315 }, { "epoch": 0.61, "grad_norm": 7.609845210528615, "learning_rate": 9.686332698225853e-06, "loss": 1.1346, "step": 14320 }, { "epoch": 0.61, "grad_norm": 7.691612394219117, "learning_rate": 9.685903368191193e-06, "loss": 1.1315, "step": 14325 }, { "epoch": 0.61, "grad_norm": 12.80123532064526, "learning_rate": 9.685473754064017e-06, "loss": 1.1288, "step": 14330 }, { "epoch": 0.61, "grad_norm": 15.765324860390736, "learning_rate": 9.685043855870364e-06, "loss": 1.1452, "step": 14335 }, { "epoch": 0.61, "grad_norm": 18.75024884075784, "learning_rate": 9.684613673636307e-06, "loss": 1.1537, "step": 14340 }, { "epoch": 0.61, "grad_norm": 8.879513503633115, "learning_rate": 9.684183207387918e-06, "loss": 1.1086, "step": 14345 }, { "epoch": 0.61, "grad_norm": 15.055767729742575, "learning_rate": 9.6837524571513e-06, "loss": 1.1712, "step": 14350 }, { "epoch": 0.61, "grad_norm": 7.522534053725067, "learning_rate": 9.683321422952568e-06, "loss": 1.0963, "step": 14355 }, { "epoch": 0.61, "grad_norm": 11.96653752963125, "learning_rate": 9.682890104817853e-06, "loss": 1.0918, "step": 14360 }, { "epoch": 0.61, "grad_norm": 11.786773311136603, "learning_rate": 9.682458502773305e-06, "loss": 1.1482, "step": 14365 }, { "epoch": 0.61, "grad_norm": 6.271167665895201, "learning_rate": 9.68202661684509e-06, "loss": 1.0533, "step": 14370 }, { "epoch": 0.61, "grad_norm": 10.127188815553549, "learning_rate": 9.681594447059393e-06, "loss": 1.1342, "step": 14375 }, { "epoch": 0.61, "grad_norm": 10.124285529409141, "learning_rate": 9.681161993442416e-06, "loss": 1.1517, "step": 14380 }, { "epoch": 0.61, "grad_norm": 8.347945330177689, "learning_rate": 9.680729256020373e-06, "loss": 1.1027, "step": 14385 }, { "epoch": 0.61, "grad_norm": 5.423364445638174, "learning_rate": 9.680296234819505e-06, "loss": 1.0798, "step": 14390 }, { "epoch": 0.61, "grad_norm": 11.457764193921415, "learning_rate": 9.679862929866062e-06, "loss": 1.1257, "step": 14395 }, { "epoch": 0.61, "grad_norm": 9.505127413413769, "learning_rate": 9.679429341186315e-06, "loss": 1.1142, "step": 14400 }, { "epoch": 0.61, "grad_norm": 16.20710828996411, "learning_rate": 9.67899546880655e-06, "loss": 1.0509, "step": 14405 }, { "epoch": 0.61, "grad_norm": 15.762825368671312, "learning_rate": 9.678561312753073e-06, "loss": 1.1079, "step": 14410 }, { "epoch": 0.61, "grad_norm": 5.586218987889025, "learning_rate": 9.678126873052204e-06, "loss": 1.122, "step": 14415 }, { "epoch": 0.61, "grad_norm": 10.992598271387472, "learning_rate": 9.677692149730282e-06, "loss": 1.1441, "step": 14420 }, { "epoch": 0.61, "grad_norm": 5.249406138669678, "learning_rate": 9.677257142813665e-06, "loss": 1.0739, "step": 14425 }, { "epoch": 0.61, "grad_norm": 4.975653255007732, "learning_rate": 9.676821852328724e-06, "loss": 1.1518, "step": 14430 }, { "epoch": 0.61, "grad_norm": 15.296017980394742, "learning_rate": 9.676386278301851e-06, "loss": 1.1016, "step": 14435 }, { "epoch": 0.61, "grad_norm": 6.978794255748017, "learning_rate": 9.675950420759453e-06, "loss": 1.1133, "step": 14440 }, { "epoch": 0.61, "grad_norm": 5.093122422684749, "learning_rate": 9.675514279727953e-06, "loss": 1.117, "step": 14445 }, { "epoch": 0.61, "grad_norm": 6.0733795065361, "learning_rate": 9.675077855233794e-06, "loss": 1.1169, "step": 14450 }, { "epoch": 0.61, "grad_norm": 5.453056572996434, "learning_rate": 9.674641147303437e-06, "loss": 1.1268, "step": 14455 }, { "epoch": 0.61, "grad_norm": 6.266373519269233, "learning_rate": 9.674204155963356e-06, "loss": 1.1457, "step": 14460 }, { "epoch": 0.61, "grad_norm": 7.2647054288458115, "learning_rate": 9.673766881240045e-06, "loss": 1.1204, "step": 14465 }, { "epoch": 0.61, "grad_norm": 9.759027051071511, "learning_rate": 9.673329323160015e-06, "loss": 1.1407, "step": 14470 }, { "epoch": 0.61, "grad_norm": 15.897602252911188, "learning_rate": 9.672891481749796e-06, "loss": 1.1194, "step": 14475 }, { "epoch": 0.61, "grad_norm": 8.052546497909548, "learning_rate": 9.67245335703593e-06, "loss": 1.1268, "step": 14480 }, { "epoch": 0.61, "grad_norm": 12.411893677328415, "learning_rate": 9.672014949044977e-06, "loss": 1.1185, "step": 14485 }, { "epoch": 0.61, "grad_norm": 5.5893558155293, "learning_rate": 9.671576257803524e-06, "loss": 1.0789, "step": 14490 }, { "epoch": 0.61, "grad_norm": 12.26452835241954, "learning_rate": 9.67113728333816e-06, "loss": 1.158, "step": 14495 }, { "epoch": 0.61, "grad_norm": 6.822106493314099, "learning_rate": 9.670698025675502e-06, "loss": 1.1699, "step": 14500 }, { "epoch": 0.61, "grad_norm": 6.284019134420179, "learning_rate": 9.670258484842182e-06, "loss": 1.1387, "step": 14505 }, { "epoch": 0.61, "grad_norm": 6.3509592926517655, "learning_rate": 9.669818660864847e-06, "loss": 1.0986, "step": 14510 }, { "epoch": 0.61, "grad_norm": 8.471410795592737, "learning_rate": 9.669378553770162e-06, "loss": 1.108, "step": 14515 }, { "epoch": 0.61, "grad_norm": 13.913658595388489, "learning_rate": 9.668938163584807e-06, "loss": 1.1193, "step": 14520 }, { "epoch": 0.61, "grad_norm": 9.916414957990833, "learning_rate": 9.668497490335487e-06, "loss": 1.1139, "step": 14525 }, { "epoch": 0.61, "grad_norm": 5.577167360695154, "learning_rate": 9.668056534048913e-06, "loss": 1.1515, "step": 14530 }, { "epoch": 0.62, "grad_norm": 5.922769047859126, "learning_rate": 9.667615294751824e-06, "loss": 1.14, "step": 14535 }, { "epoch": 0.62, "grad_norm": 14.535203927215703, "learning_rate": 9.667173772470967e-06, "loss": 1.2139, "step": 14540 }, { "epoch": 0.62, "grad_norm": 36.48363028442636, "learning_rate": 9.666731967233113e-06, "loss": 1.1349, "step": 14545 }, { "epoch": 0.62, "grad_norm": 21.0262985066971, "learning_rate": 9.666289879065046e-06, "loss": 1.1071, "step": 14550 }, { "epoch": 0.62, "grad_norm": 58.844424374469526, "learning_rate": 9.665847507993567e-06, "loss": 1.0956, "step": 14555 }, { "epoch": 0.62, "grad_norm": 7.52443592324907, "learning_rate": 9.665404854045497e-06, "loss": 1.1442, "step": 14560 }, { "epoch": 0.62, "grad_norm": 34.869846029476804, "learning_rate": 9.664961917247675e-06, "loss": 1.154, "step": 14565 }, { "epoch": 0.62, "grad_norm": 27.194417909613236, "learning_rate": 9.664518697626952e-06, "loss": 1.171, "step": 14570 }, { "epoch": 0.62, "grad_norm": 13.706891969274936, "learning_rate": 9.664075195210203e-06, "loss": 1.1137, "step": 14575 }, { "epoch": 0.62, "grad_norm": 36.71351842453812, "learning_rate": 9.66363141002431e-06, "loss": 1.0981, "step": 14580 }, { "epoch": 0.62, "grad_norm": 18.19588738492687, "learning_rate": 9.663187342096182e-06, "loss": 1.1632, "step": 14585 }, { "epoch": 0.62, "grad_norm": 25.530544913332296, "learning_rate": 9.662742991452742e-06, "loss": 1.1066, "step": 14590 }, { "epoch": 0.62, "grad_norm": 21.76289279151669, "learning_rate": 9.66229835812093e-06, "loss": 1.1201, "step": 14595 }, { "epoch": 0.62, "grad_norm": 11.227374121904603, "learning_rate": 9.6618534421277e-06, "loss": 1.1448, "step": 14600 }, { "epoch": 0.62, "grad_norm": 12.131144471095478, "learning_rate": 9.66140824350003e-06, "loss": 1.1151, "step": 14605 }, { "epoch": 0.62, "grad_norm": 11.706943321549794, "learning_rate": 9.660962762264908e-06, "loss": 1.1375, "step": 14610 }, { "epoch": 0.62, "grad_norm": 10.292501325497955, "learning_rate": 9.66051699844934e-06, "loss": 1.1493, "step": 14615 }, { "epoch": 0.62, "grad_norm": 10.38464317476439, "learning_rate": 9.660070952080358e-06, "loss": 1.1564, "step": 14620 }, { "epoch": 0.62, "grad_norm": 6.85675488204554, "learning_rate": 9.659624623185e-06, "loss": 1.1196, "step": 14625 }, { "epoch": 0.62, "grad_norm": 7.2106366247174645, "learning_rate": 9.659178011790326e-06, "loss": 1.1316, "step": 14630 }, { "epoch": 0.62, "grad_norm": 5.121924784323083, "learning_rate": 9.658731117923415e-06, "loss": 1.1276, "step": 14635 }, { "epoch": 0.62, "grad_norm": 5.295109594330357, "learning_rate": 9.658283941611356e-06, "loss": 1.1206, "step": 14640 }, { "epoch": 0.62, "grad_norm": 6.992896623594316, "learning_rate": 9.657836482881265e-06, "loss": 1.1453, "step": 14645 }, { "epoch": 0.62, "grad_norm": 10.104412657694432, "learning_rate": 9.657388741760268e-06, "loss": 1.1414, "step": 14650 }, { "epoch": 0.62, "grad_norm": 14.853921095894423, "learning_rate": 9.65694071827551e-06, "loss": 1.1506, "step": 14655 }, { "epoch": 0.62, "grad_norm": 9.312774107907364, "learning_rate": 9.656492412454153e-06, "loss": 1.1465, "step": 14660 }, { "epoch": 0.62, "grad_norm": 6.436309054999819, "learning_rate": 9.656043824323379e-06, "loss": 1.1072, "step": 14665 }, { "epoch": 0.62, "grad_norm": 6.624628936860415, "learning_rate": 9.655594953910382e-06, "loss": 1.1126, "step": 14670 }, { "epoch": 0.62, "grad_norm": 14.378255020995566, "learning_rate": 9.655145801242376e-06, "loss": 1.1495, "step": 14675 }, { "epoch": 0.62, "grad_norm": 6.236898550021181, "learning_rate": 9.654696366346591e-06, "loss": 1.0908, "step": 14680 }, { "epoch": 0.62, "grad_norm": 7.240160620415107, "learning_rate": 9.654246649250279e-06, "loss": 1.1509, "step": 14685 }, { "epoch": 0.62, "grad_norm": 8.412423079542824, "learning_rate": 9.6537966499807e-06, "loss": 1.1125, "step": 14690 }, { "epoch": 0.62, "grad_norm": 15.974520412331332, "learning_rate": 9.65334636856514e-06, "loss": 1.1504, "step": 14695 }, { "epoch": 0.62, "grad_norm": 7.404924957271204, "learning_rate": 9.652895805030896e-06, "loss": 1.1567, "step": 14700 }, { "epoch": 0.62, "grad_norm": 31.70569891795151, "learning_rate": 9.652444959405284e-06, "loss": 1.1181, "step": 14705 }, { "epoch": 0.62, "grad_norm": 33.13321061852332, "learning_rate": 9.651993831715638e-06, "loss": 1.1329, "step": 14710 }, { "epoch": 0.62, "grad_norm": 22.37197901400863, "learning_rate": 9.651542421989311e-06, "loss": 1.1608, "step": 14715 }, { "epoch": 0.62, "grad_norm": 28.934620271939934, "learning_rate": 9.651090730253668e-06, "loss": 1.1543, "step": 14720 }, { "epoch": 0.62, "grad_norm": 10.170921356817367, "learning_rate": 9.650638756536092e-06, "loss": 1.1518, "step": 14725 }, { "epoch": 0.62, "grad_norm": 8.659704770912452, "learning_rate": 9.65018650086399e-06, "loss": 1.1423, "step": 14730 }, { "epoch": 0.62, "grad_norm": 7.627436737501537, "learning_rate": 9.649733963264775e-06, "loss": 1.1088, "step": 14735 }, { "epoch": 0.62, "grad_norm": 20.15416503207731, "learning_rate": 9.649281143765888e-06, "loss": 1.1543, "step": 14740 }, { "epoch": 0.62, "grad_norm": 11.099105736261631, "learning_rate": 9.64882804239478e-06, "loss": 1.1466, "step": 14745 }, { "epoch": 0.62, "grad_norm": 8.821298000599661, "learning_rate": 9.648374659178922e-06, "loss": 1.1475, "step": 14750 }, { "epoch": 0.62, "grad_norm": 18.5733303838369, "learning_rate": 9.6479209941458e-06, "loss": 1.1471, "step": 14755 }, { "epoch": 0.62, "grad_norm": 18.973824251446448, "learning_rate": 9.647467047322918e-06, "loss": 1.1101, "step": 14760 }, { "epoch": 0.62, "grad_norm": 16.425630115141452, "learning_rate": 9.6470128187378e-06, "loss": 1.1206, "step": 14765 }, { "epoch": 0.63, "grad_norm": 14.660184417370282, "learning_rate": 9.646558308417983e-06, "loss": 1.127, "step": 14770 }, { "epoch": 0.63, "grad_norm": 18.32203882018408, "learning_rate": 9.646103516391021e-06, "loss": 1.0729, "step": 14775 }, { "epoch": 0.63, "grad_norm": 8.301171455064672, "learning_rate": 9.64564844268449e-06, "loss": 1.1493, "step": 14780 }, { "epoch": 0.63, "grad_norm": 6.48203082744321, "learning_rate": 9.645193087325978e-06, "loss": 1.0993, "step": 14785 }, { "epoch": 0.63, "grad_norm": 13.764486995440448, "learning_rate": 9.644737450343092e-06, "loss": 1.1028, "step": 14790 }, { "epoch": 0.63, "grad_norm": 10.638575846552063, "learning_rate": 9.644281531763456e-06, "loss": 1.097, "step": 14795 }, { "epoch": 0.63, "grad_norm": 7.766919340157793, "learning_rate": 9.64382533161471e-06, "loss": 1.1222, "step": 14800 }, { "epoch": 0.63, "grad_norm": 10.07352528372201, "learning_rate": 9.643368849924513e-06, "loss": 1.1223, "step": 14805 }, { "epoch": 0.63, "grad_norm": 6.41311685997666, "learning_rate": 9.64291208672054e-06, "loss": 1.0925, "step": 14810 }, { "epoch": 0.63, "grad_norm": 10.52337127878568, "learning_rate": 9.642455042030483e-06, "loss": 1.1027, "step": 14815 }, { "epoch": 0.63, "grad_norm": 20.750230322413955, "learning_rate": 9.641997715882054e-06, "loss": 1.1207, "step": 14820 }, { "epoch": 0.63, "grad_norm": 14.045843276737358, "learning_rate": 9.641540108302975e-06, "loss": 1.0781, "step": 14825 }, { "epoch": 0.63, "grad_norm": 15.980347740082388, "learning_rate": 9.641082219320991e-06, "loss": 1.1319, "step": 14830 }, { "epoch": 0.63, "grad_norm": 13.18362011388887, "learning_rate": 9.640624048963864e-06, "loss": 1.1315, "step": 14835 }, { "epoch": 0.63, "grad_norm": 5.814234861088137, "learning_rate": 9.64016559725937e-06, "loss": 1.1083, "step": 14840 }, { "epoch": 0.63, "grad_norm": 13.05604326331034, "learning_rate": 9.639706864235304e-06, "loss": 1.1311, "step": 14845 }, { "epoch": 0.63, "grad_norm": 6.546098093676746, "learning_rate": 9.639247849919476e-06, "loss": 1.1095, "step": 14850 }, { "epoch": 0.63, "grad_norm": 9.727428382748387, "learning_rate": 9.638788554339717e-06, "loss": 1.0946, "step": 14855 }, { "epoch": 0.63, "grad_norm": 21.40400777595905, "learning_rate": 9.638328977523874e-06, "loss": 1.149, "step": 14860 }, { "epoch": 0.63, "grad_norm": 25.56823675908026, "learning_rate": 9.637869119499804e-06, "loss": 1.1223, "step": 14865 }, { "epoch": 0.63, "grad_norm": 20.095678010055522, "learning_rate": 9.637408980295394e-06, "loss": 1.1234, "step": 14870 }, { "epoch": 0.63, "grad_norm": 46.0051044355793, "learning_rate": 9.636948559938537e-06, "loss": 1.1208, "step": 14875 }, { "epoch": 0.63, "grad_norm": 14.386593610310534, "learning_rate": 9.636487858457146e-06, "loss": 1.1485, "step": 14880 }, { "epoch": 0.63, "grad_norm": 10.780819395625295, "learning_rate": 9.636026875879154e-06, "loss": 1.1785, "step": 14885 }, { "epoch": 0.63, "grad_norm": 9.371711799097483, "learning_rate": 9.635565612232508e-06, "loss": 1.1254, "step": 14890 }, { "epoch": 0.63, "grad_norm": 5.981367267676593, "learning_rate": 9.635104067545173e-06, "loss": 1.1234, "step": 14895 }, { "epoch": 0.63, "grad_norm": 13.77327259153108, "learning_rate": 9.634642241845134e-06, "loss": 1.1011, "step": 14900 }, { "epoch": 0.63, "grad_norm": 21.445395018488565, "learning_rate": 9.634180135160385e-06, "loss": 1.1342, "step": 14905 }, { "epoch": 0.63, "grad_norm": 26.339476639671517, "learning_rate": 9.633717747518946e-06, "loss": 1.1879, "step": 14910 }, { "epoch": 0.63, "grad_norm": 46.58196426876102, "learning_rate": 9.63325507894885e-06, "loss": 1.2517, "step": 14915 }, { "epoch": 0.63, "grad_norm": 42.15122592569484, "learning_rate": 9.632792129478144e-06, "loss": 1.1565, "step": 14920 }, { "epoch": 0.63, "grad_norm": 16.894451350487213, "learning_rate": 9.632328899134897e-06, "loss": 1.1536, "step": 14925 }, { "epoch": 0.63, "grad_norm": 33.16456657300665, "learning_rate": 9.631865387947196e-06, "loss": 1.1525, "step": 14930 }, { "epoch": 0.63, "grad_norm": 21.98475192767441, "learning_rate": 9.631401595943139e-06, "loss": 1.1515, "step": 14935 }, { "epoch": 0.63, "grad_norm": 26.819245939364354, "learning_rate": 9.630937523150848e-06, "loss": 1.1326, "step": 14940 }, { "epoch": 0.63, "grad_norm": 25.454359725082117, "learning_rate": 9.630473169598452e-06, "loss": 1.1537, "step": 14945 }, { "epoch": 0.63, "grad_norm": 30.190462371778626, "learning_rate": 9.63000853531411e-06, "loss": 1.1479, "step": 14950 }, { "epoch": 0.63, "grad_norm": 44.79817296059923, "learning_rate": 9.629543620325988e-06, "loss": 1.1309, "step": 14955 }, { "epoch": 0.63, "grad_norm": 36.18920488552786, "learning_rate": 9.629078424662273e-06, "loss": 1.1651, "step": 14960 }, { "epoch": 0.63, "grad_norm": 5.481515417629619, "learning_rate": 9.628612948351168e-06, "loss": 1.1416, "step": 14965 }, { "epoch": 0.63, "grad_norm": 34.70285757368889, "learning_rate": 9.628147191420893e-06, "loss": 1.1099, "step": 14970 }, { "epoch": 0.63, "grad_norm": 44.07771473792773, "learning_rate": 9.627681153899689e-06, "loss": 1.1651, "step": 14975 }, { "epoch": 0.63, "grad_norm": 21.236827398173254, "learning_rate": 9.627214835815805e-06, "loss": 1.0993, "step": 14980 }, { "epoch": 0.63, "grad_norm": 16.80799620798502, "learning_rate": 9.626748237197518e-06, "loss": 1.1662, "step": 14985 }, { "epoch": 0.63, "grad_norm": 6.047128242105549, "learning_rate": 9.626281358073113e-06, "loss": 1.1341, "step": 14990 }, { "epoch": 0.63, "grad_norm": 8.836468761643587, "learning_rate": 9.625814198470897e-06, "loss": 1.1315, "step": 14995 }, { "epoch": 0.63, "grad_norm": 8.627893008953913, "learning_rate": 9.625346758419191e-06, "loss": 1.114, "step": 15000 }, { "epoch": 0.64, "grad_norm": 13.330496846937193, "learning_rate": 9.624879037946335e-06, "loss": 1.1373, "step": 15005 }, { "epoch": 0.64, "grad_norm": 21.802550959539268, "learning_rate": 9.624411037080687e-06, "loss": 1.1129, "step": 15010 }, { "epoch": 0.64, "grad_norm": 13.928081298992451, "learning_rate": 9.623942755850619e-06, "loss": 1.0807, "step": 15015 }, { "epoch": 0.64, "grad_norm": 6.989717071840893, "learning_rate": 9.623474194284524e-06, "loss": 1.1188, "step": 15020 }, { "epoch": 0.64, "grad_norm": 10.530098397527423, "learning_rate": 9.623005352410805e-06, "loss": 1.1367, "step": 15025 }, { "epoch": 0.64, "grad_norm": 6.7048373067148725, "learning_rate": 9.62253623025789e-06, "loss": 1.1486, "step": 15030 }, { "epoch": 0.64, "grad_norm": 9.246290567489709, "learning_rate": 9.622066827854218e-06, "loss": 1.1514, "step": 15035 }, { "epoch": 0.64, "grad_norm": 12.862912887518295, "learning_rate": 9.62159714522825e-06, "loss": 1.127, "step": 15040 }, { "epoch": 0.64, "grad_norm": 8.246636272270521, "learning_rate": 9.621127182408461e-06, "loss": 1.1616, "step": 15045 }, { "epoch": 0.64, "grad_norm": 7.3660907840974446, "learning_rate": 9.620656939423343e-06, "loss": 1.1167, "step": 15050 }, { "epoch": 0.64, "grad_norm": 6.1388773365299265, "learning_rate": 9.620186416301404e-06, "loss": 1.1218, "step": 15055 }, { "epoch": 0.64, "grad_norm": 8.368396681648354, "learning_rate": 9.619715613071173e-06, "loss": 1.1014, "step": 15060 }, { "epoch": 0.64, "grad_norm": 8.622130786971702, "learning_rate": 9.619244529761191e-06, "loss": 1.1116, "step": 15065 }, { "epoch": 0.64, "grad_norm": 5.700741596223363, "learning_rate": 9.618773166400022e-06, "loss": 1.1046, "step": 15070 }, { "epoch": 0.64, "grad_norm": 5.334937953106167, "learning_rate": 9.618301523016238e-06, "loss": 1.1224, "step": 15075 }, { "epoch": 0.64, "grad_norm": 5.412167580151939, "learning_rate": 9.61782959963844e-06, "loss": 1.1225, "step": 15080 }, { "epoch": 0.64, "grad_norm": 5.381815950702849, "learning_rate": 9.617357396295231e-06, "loss": 1.1475, "step": 15085 }, { "epoch": 0.64, "grad_norm": 5.607805687189267, "learning_rate": 9.616884913015247e-06, "loss": 1.1468, "step": 15090 }, { "epoch": 0.64, "grad_norm": 5.31491405956735, "learning_rate": 9.616412149827131e-06, "loss": 1.1441, "step": 15095 }, { "epoch": 0.64, "grad_norm": 21.56853357776938, "learning_rate": 9.615939106759542e-06, "loss": 1.1621, "step": 15100 }, { "epoch": 0.64, "grad_norm": 35.003379998015944, "learning_rate": 9.615465783841163e-06, "loss": 1.0939, "step": 15105 }, { "epoch": 0.64, "grad_norm": 14.105672351267819, "learning_rate": 9.614992181100686e-06, "loss": 1.1313, "step": 15110 }, { "epoch": 0.64, "grad_norm": 17.003286142133955, "learning_rate": 9.61451829856683e-06, "loss": 1.1361, "step": 15115 }, { "epoch": 0.64, "grad_norm": 6.321819577415538, "learning_rate": 9.614044136268321e-06, "loss": 1.1386, "step": 15120 }, { "epoch": 0.64, "grad_norm": 7.925335012066343, "learning_rate": 9.613569694233908e-06, "loss": 1.118, "step": 15125 }, { "epoch": 0.64, "grad_norm": 5.741195814966175, "learning_rate": 9.613094972492353e-06, "loss": 1.1181, "step": 15130 }, { "epoch": 0.64, "grad_norm": 5.450759668474747, "learning_rate": 9.61261997107244e-06, "loss": 1.1353, "step": 15135 }, { "epoch": 0.64, "grad_norm": 8.709871208971862, "learning_rate": 9.612144690002965e-06, "loss": 1.1289, "step": 15140 }, { "epoch": 0.64, "grad_norm": 6.668294653667959, "learning_rate": 9.61166912931274e-06, "loss": 1.0972, "step": 15145 }, { "epoch": 0.64, "grad_norm": 7.458141394285696, "learning_rate": 9.611193289030603e-06, "loss": 1.0752, "step": 15150 }, { "epoch": 0.64, "grad_norm": 9.871885733259331, "learning_rate": 9.610717169185398e-06, "loss": 1.1394, "step": 15155 }, { "epoch": 0.64, "grad_norm": 9.356954333625659, "learning_rate": 9.610240769805992e-06, "loss": 1.122, "step": 15160 }, { "epoch": 0.64, "grad_norm": 5.593192755699136, "learning_rate": 9.60976409092127e-06, "loss": 1.0903, "step": 15165 }, { "epoch": 0.64, "grad_norm": 6.608453637198863, "learning_rate": 9.609287132560128e-06, "loss": 1.1088, "step": 15170 }, { "epoch": 0.64, "grad_norm": 6.969575739114598, "learning_rate": 9.608809894751483e-06, "loss": 1.0988, "step": 15175 }, { "epoch": 0.64, "grad_norm": 9.107995819643573, "learning_rate": 9.608332377524272e-06, "loss": 1.098, "step": 15180 }, { "epoch": 0.64, "grad_norm": 10.229953142990972, "learning_rate": 9.607854580907442e-06, "loss": 1.1688, "step": 15185 }, { "epoch": 0.64, "grad_norm": 6.552428134196232, "learning_rate": 9.607376504929963e-06, "loss": 1.088, "step": 15190 }, { "epoch": 0.64, "grad_norm": 8.564121504278818, "learning_rate": 9.606898149620819e-06, "loss": 1.1601, "step": 15195 }, { "epoch": 0.64, "grad_norm": 6.294144000050321, "learning_rate": 9.606419515009007e-06, "loss": 1.1437, "step": 15200 }, { "epoch": 0.64, "grad_norm": 6.855905131825997, "learning_rate": 9.605940601123548e-06, "loss": 1.0986, "step": 15205 }, { "epoch": 0.64, "grad_norm": 6.739582811248398, "learning_rate": 9.605461407993482e-06, "loss": 1.0751, "step": 15210 }, { "epoch": 0.64, "grad_norm": 6.631573384150668, "learning_rate": 9.604981935647854e-06, "loss": 1.1277, "step": 15215 }, { "epoch": 0.64, "grad_norm": 7.480553947910073, "learning_rate": 9.604502184115735e-06, "loss": 1.1576, "step": 15220 }, { "epoch": 0.64, "grad_norm": 7.3106903851644764, "learning_rate": 9.604022153426212e-06, "loss": 1.1743, "step": 15225 }, { "epoch": 0.64, "grad_norm": 26.499021769743585, "learning_rate": 9.603541843608389e-06, "loss": 1.1499, "step": 15230 }, { "epoch": 0.64, "grad_norm": 24.156117228150563, "learning_rate": 9.603061254691382e-06, "loss": 1.1301, "step": 15235 }, { "epoch": 0.64, "grad_norm": 27.54954903843597, "learning_rate": 9.602580386704332e-06, "loss": 1.1537, "step": 15240 }, { "epoch": 0.65, "grad_norm": 41.92336775416203, "learning_rate": 9.602099239676388e-06, "loss": 1.1213, "step": 15245 }, { "epoch": 0.65, "grad_norm": 39.66492236584202, "learning_rate": 9.601617813636726e-06, "loss": 1.1499, "step": 15250 }, { "epoch": 0.65, "grad_norm": 20.84220989989977, "learning_rate": 9.601136108614529e-06, "loss": 1.0993, "step": 15255 }, { "epoch": 0.65, "grad_norm": 10.38712579463707, "learning_rate": 9.600654124639001e-06, "loss": 1.1428, "step": 15260 }, { "epoch": 0.65, "grad_norm": 7.110201153788826, "learning_rate": 9.600171861739368e-06, "loss": 1.1241, "step": 15265 }, { "epoch": 0.65, "grad_norm": 16.55737513511684, "learning_rate": 9.599689319944863e-06, "loss": 1.1512, "step": 15270 }, { "epoch": 0.65, "grad_norm": 19.05739551041232, "learning_rate": 9.599206499284746e-06, "loss": 1.1165, "step": 15275 }, { "epoch": 0.65, "grad_norm": 8.975587719188491, "learning_rate": 9.598723399788285e-06, "loss": 1.146, "step": 15280 }, { "epoch": 0.65, "grad_norm": 13.90430246967029, "learning_rate": 9.598240021484769e-06, "loss": 1.1598, "step": 15285 }, { "epoch": 0.65, "grad_norm": 14.496261326306803, "learning_rate": 9.597756364403507e-06, "loss": 1.1223, "step": 15290 }, { "epoch": 0.65, "grad_norm": 13.361462360200242, "learning_rate": 9.597272428573818e-06, "loss": 1.1687, "step": 15295 }, { "epoch": 0.65, "grad_norm": 6.951477644937146, "learning_rate": 9.596788214025046e-06, "loss": 1.1478, "step": 15300 }, { "epoch": 0.65, "grad_norm": 5.952930515222706, "learning_rate": 9.596303720786542e-06, "loss": 1.1217, "step": 15305 }, { "epoch": 0.65, "grad_norm": 5.302537352729694, "learning_rate": 9.595818948887683e-06, "loss": 1.13, "step": 15310 }, { "epoch": 0.65, "grad_norm": 9.75155589663738, "learning_rate": 9.59533389835786e-06, "loss": 1.1172, "step": 15315 }, { "epoch": 0.65, "grad_norm": 9.409933249584224, "learning_rate": 9.594848569226477e-06, "loss": 1.163, "step": 15320 }, { "epoch": 0.65, "grad_norm": 32.95505167114163, "learning_rate": 9.59436296152296e-06, "loss": 1.1477, "step": 15325 }, { "epoch": 0.65, "grad_norm": 30.137321109503546, "learning_rate": 9.59387707527675e-06, "loss": 1.1104, "step": 15330 }, { "epoch": 0.65, "grad_norm": 13.139967014584997, "learning_rate": 9.593390910517306e-06, "loss": 1.1372, "step": 15335 }, { "epoch": 0.65, "grad_norm": 11.578959372276609, "learning_rate": 9.5929044672741e-06, "loss": 1.134, "step": 15340 }, { "epoch": 0.65, "grad_norm": 11.620349952741133, "learning_rate": 9.592417745576625e-06, "loss": 1.0904, "step": 15345 }, { "epoch": 0.65, "grad_norm": 15.005242597791108, "learning_rate": 9.59193074545439e-06, "loss": 1.1307, "step": 15350 }, { "epoch": 0.65, "grad_norm": 11.910151880964161, "learning_rate": 9.591443466936921e-06, "loss": 1.1156, "step": 15355 }, { "epoch": 0.65, "grad_norm": 5.237347235567157, "learning_rate": 9.590955910053756e-06, "loss": 1.1365, "step": 15360 }, { "epoch": 0.65, "grad_norm": 5.942465973419977, "learning_rate": 9.59046807483446e-06, "loss": 1.0889, "step": 15365 }, { "epoch": 0.65, "grad_norm": 7.094992492945449, "learning_rate": 9.589979961308606e-06, "loss": 1.0906, "step": 15370 }, { "epoch": 0.65, "grad_norm": 5.519965628845473, "learning_rate": 9.589491569505786e-06, "loss": 1.1386, "step": 15375 }, { "epoch": 0.65, "grad_norm": 5.826222461543118, "learning_rate": 9.589002899455611e-06, "loss": 1.117, "step": 15380 }, { "epoch": 0.65, "grad_norm": 5.421505559972395, "learning_rate": 9.588513951187709e-06, "loss": 1.1214, "step": 15385 }, { "epoch": 0.65, "grad_norm": 8.681384302708578, "learning_rate": 9.588024724731721e-06, "loss": 1.1148, "step": 15390 }, { "epoch": 0.65, "grad_norm": 11.7074944230244, "learning_rate": 9.587535220117309e-06, "loss": 1.1289, "step": 15395 }, { "epoch": 0.65, "grad_norm": 7.640409633458371, "learning_rate": 9.587045437374149e-06, "loss": 1.0997, "step": 15400 }, { "epoch": 0.65, "grad_norm": 6.4491849647084525, "learning_rate": 9.586555376531935e-06, "loss": 1.1295, "step": 15405 }, { "epoch": 0.65, "grad_norm": 30.828365701952105, "learning_rate": 9.58606503762038e-06, "loss": 1.1218, "step": 15410 }, { "epoch": 0.65, "grad_norm": 28.877471025102423, "learning_rate": 9.58557442066921e-06, "loss": 1.1131, "step": 15415 }, { "epoch": 0.65, "grad_norm": 8.32310077638982, "learning_rate": 9.58508352570817e-06, "loss": 1.1105, "step": 15420 }, { "epoch": 0.65, "grad_norm": 26.048816125512946, "learning_rate": 9.584592352767021e-06, "loss": 1.1031, "step": 15425 }, { "epoch": 0.65, "grad_norm": 22.648669070359446, "learning_rate": 9.584100901875544e-06, "loss": 1.1388, "step": 15430 }, { "epoch": 0.65, "grad_norm": 5.709365412817161, "learning_rate": 9.583609173063531e-06, "loss": 1.0632, "step": 15435 }, { "epoch": 0.65, "grad_norm": 14.504446695258476, "learning_rate": 9.583117166360796e-06, "loss": 1.1326, "step": 15440 }, { "epoch": 0.65, "grad_norm": 23.930724401242085, "learning_rate": 9.582624881797166e-06, "loss": 1.1092, "step": 15445 }, { "epoch": 0.65, "grad_norm": 6.572371547572622, "learning_rate": 9.582132319402489e-06, "loss": 1.107, "step": 15450 }, { "epoch": 0.65, "grad_norm": 8.015536040581603, "learning_rate": 9.581639479206627e-06, "loss": 1.1566, "step": 15455 }, { "epoch": 0.65, "grad_norm": 13.490313232809362, "learning_rate": 9.581146361239457e-06, "loss": 1.119, "step": 15460 }, { "epoch": 0.65, "grad_norm": 38.448176265161806, "learning_rate": 9.580652965530878e-06, "loss": 1.1208, "step": 15465 }, { "epoch": 0.65, "grad_norm": 47.74830792797482, "learning_rate": 9.580159292110802e-06, "loss": 1.1239, "step": 15470 }, { "epoch": 0.65, "grad_norm": 26.011696593388926, "learning_rate": 9.579665341009161e-06, "loss": 1.1341, "step": 15475 }, { "epoch": 0.66, "grad_norm": 7.986201059912118, "learning_rate": 9.5791711122559e-06, "loss": 1.0977, "step": 15480 }, { "epoch": 0.66, "grad_norm": 6.02722953640949, "learning_rate": 9.578676605880983e-06, "loss": 1.1241, "step": 15485 }, { "epoch": 0.66, "grad_norm": 8.694007453047524, "learning_rate": 9.578181821914389e-06, "loss": 1.129, "step": 15490 }, { "epoch": 0.66, "grad_norm": 6.395570151734314, "learning_rate": 9.577686760386118e-06, "loss": 1.1279, "step": 15495 }, { "epoch": 0.66, "grad_norm": 11.100312053589473, "learning_rate": 9.577191421326181e-06, "loss": 1.0861, "step": 15500 }, { "epoch": 0.66, "grad_norm": 9.341903044806589, "learning_rate": 9.576695804764611e-06, "loss": 1.0621, "step": 15505 }, { "epoch": 0.66, "grad_norm": 23.190702477762677, "learning_rate": 9.576199910731457e-06, "loss": 1.1588, "step": 15510 }, { "epoch": 0.66, "grad_norm": 23.639729053832877, "learning_rate": 9.57570373925678e-06, "loss": 1.1282, "step": 15515 }, { "epoch": 0.66, "grad_norm": 17.305840873138052, "learning_rate": 9.575207290370667e-06, "loss": 1.1465, "step": 15520 }, { "epoch": 0.66, "grad_norm": 32.69940676134057, "learning_rate": 9.574710564103208e-06, "loss": 1.1625, "step": 15525 }, { "epoch": 0.66, "grad_norm": 37.39367118463734, "learning_rate": 9.574213560484526e-06, "loss": 1.1548, "step": 15530 }, { "epoch": 0.66, "grad_norm": 29.88323507569578, "learning_rate": 9.573716279544749e-06, "loss": 1.0891, "step": 15535 }, { "epoch": 0.66, "grad_norm": 30.463372908170502, "learning_rate": 9.573218721314028e-06, "loss": 1.1628, "step": 15540 }, { "epoch": 0.66, "grad_norm": 31.169487999370382, "learning_rate": 9.572720885822523e-06, "loss": 1.1217, "step": 15545 }, { "epoch": 0.66, "grad_norm": 7.6138819420453805, "learning_rate": 9.572222773100423e-06, "loss": 1.1429, "step": 15550 }, { "epoch": 0.66, "grad_norm": 8.1102214560028, "learning_rate": 9.571724383177923e-06, "loss": 1.1188, "step": 15555 }, { "epoch": 0.66, "grad_norm": 8.633020345327878, "learning_rate": 9.571225716085239e-06, "loss": 1.1449, "step": 15560 }, { "epoch": 0.66, "grad_norm": 5.6029530390775735, "learning_rate": 9.570726771852608e-06, "loss": 1.0976, "step": 15565 }, { "epoch": 0.66, "grad_norm": 5.368538144798845, "learning_rate": 9.570227550510274e-06, "loss": 1.1194, "step": 15570 }, { "epoch": 0.66, "grad_norm": 12.955013860094212, "learning_rate": 9.569728052088505e-06, "loss": 1.1142, "step": 15575 }, { "epoch": 0.66, "grad_norm": 5.799660654824613, "learning_rate": 9.569228276617587e-06, "loss": 1.1305, "step": 15580 }, { "epoch": 0.66, "grad_norm": 6.6834533740776925, "learning_rate": 9.568728224127817e-06, "loss": 1.1182, "step": 15585 }, { "epoch": 0.66, "grad_norm": 8.783924326567748, "learning_rate": 9.56822789464951e-06, "loss": 1.1301, "step": 15590 }, { "epoch": 0.66, "grad_norm": 8.054448725862445, "learning_rate": 9.567727288213005e-06, "loss": 1.0734, "step": 15595 }, { "epoch": 0.66, "grad_norm": 5.443051879102491, "learning_rate": 9.567226404848648e-06, "loss": 1.1024, "step": 15600 }, { "epoch": 0.66, "grad_norm": 11.629438032018662, "learning_rate": 9.566725244586807e-06, "loss": 1.0917, "step": 15605 }, { "epoch": 0.66, "grad_norm": 5.231888706468046, "learning_rate": 9.566223807457867e-06, "loss": 1.0969, "step": 15610 }, { "epoch": 0.66, "grad_norm": 5.790613630056143, "learning_rate": 9.565722093492226e-06, "loss": 1.1092, "step": 15615 }, { "epoch": 0.66, "grad_norm": 6.649892906466696, "learning_rate": 9.565220102720306e-06, "loss": 1.1045, "step": 15620 }, { "epoch": 0.66, "grad_norm": 9.83614325693871, "learning_rate": 9.564717835172536e-06, "loss": 1.0735, "step": 15625 }, { "epoch": 0.66, "grad_norm": 13.741316253963543, "learning_rate": 9.564215290879371e-06, "loss": 1.15, "step": 15630 }, { "epoch": 0.66, "grad_norm": 10.543531557768679, "learning_rate": 9.563712469871277e-06, "loss": 1.0852, "step": 15635 }, { "epoch": 0.66, "grad_norm": 16.545885733273415, "learning_rate": 9.563209372178738e-06, "loss": 1.1104, "step": 15640 }, { "epoch": 0.66, "grad_norm": 16.379386872751233, "learning_rate": 9.562705997832259e-06, "loss": 1.1113, "step": 15645 }, { "epoch": 0.66, "grad_norm": 14.40211055238993, "learning_rate": 9.562202346862352e-06, "loss": 1.1412, "step": 15650 }, { "epoch": 0.66, "grad_norm": 13.807773616994584, "learning_rate": 9.561698419299555e-06, "loss": 1.1149, "step": 15655 }, { "epoch": 0.66, "grad_norm": 14.39871623794586, "learning_rate": 9.56119421517442e-06, "loss": 1.1371, "step": 15660 }, { "epoch": 0.66, "grad_norm": 13.291224197745935, "learning_rate": 9.560689734517517e-06, "loss": 1.0982, "step": 15665 }, { "epoch": 0.66, "grad_norm": 9.707853349257634, "learning_rate": 9.560184977359429e-06, "loss": 1.1654, "step": 15670 }, { "epoch": 0.66, "grad_norm": 7.95705431277963, "learning_rate": 9.559679943730757e-06, "loss": 1.108, "step": 15675 }, { "epoch": 0.66, "grad_norm": 6.318265773283196, "learning_rate": 9.559174633662123e-06, "loss": 1.0886, "step": 15680 }, { "epoch": 0.66, "grad_norm": 9.238269887472416, "learning_rate": 9.558669047184157e-06, "loss": 1.0963, "step": 15685 }, { "epoch": 0.66, "grad_norm": 9.118318181593873, "learning_rate": 9.558163184327519e-06, "loss": 1.1661, "step": 15690 }, { "epoch": 0.66, "grad_norm": 7.576816564619521, "learning_rate": 9.55765704512287e-06, "loss": 1.097, "step": 15695 }, { "epoch": 0.66, "grad_norm": 18.722824564847656, "learning_rate": 9.557150629600901e-06, "loss": 1.1214, "step": 15700 }, { "epoch": 0.66, "grad_norm": 12.425301804278783, "learning_rate": 9.556643937792313e-06, "loss": 1.097, "step": 15705 }, { "epoch": 0.66, "grad_norm": 6.88683107008038, "learning_rate": 9.556136969727825e-06, "loss": 1.1574, "step": 15710 }, { "epoch": 0.67, "grad_norm": 5.418389644282192, "learning_rate": 9.555629725438172e-06, "loss": 1.1009, "step": 15715 }, { "epoch": 0.67, "grad_norm": 9.832660233585669, "learning_rate": 9.555122204954109e-06, "loss": 1.1345, "step": 15720 }, { "epoch": 0.67, "grad_norm": 11.003568247307738, "learning_rate": 9.554614408306405e-06, "loss": 1.0998, "step": 15725 }, { "epoch": 0.67, "grad_norm": 13.71699216321345, "learning_rate": 9.554106335525843e-06, "loss": 1.1264, "step": 15730 }, { "epoch": 0.67, "grad_norm": 10.291658488293862, "learning_rate": 9.55359798664323e-06, "loss": 1.125, "step": 15735 }, { "epoch": 0.67, "grad_norm": 10.86309031595583, "learning_rate": 9.553089361689382e-06, "loss": 1.102, "step": 15740 }, { "epoch": 0.67, "grad_norm": 6.669922414982565, "learning_rate": 9.552580460695141e-06, "loss": 1.13, "step": 15745 }, { "epoch": 0.67, "grad_norm": 5.947133682212815, "learning_rate": 9.552071283691355e-06, "loss": 1.0998, "step": 15750 }, { "epoch": 0.67, "grad_norm": 12.20076178027709, "learning_rate": 9.551561830708895e-06, "loss": 1.1301, "step": 15755 }, { "epoch": 0.67, "grad_norm": 10.68342801957456, "learning_rate": 9.55105210177865e-06, "loss": 1.1394, "step": 15760 }, { "epoch": 0.67, "grad_norm": 8.222756752599814, "learning_rate": 9.55054209693152e-06, "loss": 1.0735, "step": 15765 }, { "epoch": 0.67, "grad_norm": 8.186772023727826, "learning_rate": 9.550031816198429e-06, "loss": 1.0959, "step": 15770 }, { "epoch": 0.67, "grad_norm": 7.3105167317237685, "learning_rate": 9.54952125961031e-06, "loss": 1.1537, "step": 15775 }, { "epoch": 0.67, "grad_norm": 14.938936955446723, "learning_rate": 9.549010427198118e-06, "loss": 1.1429, "step": 15780 }, { "epoch": 0.67, "grad_norm": 16.576650004438832, "learning_rate": 9.548499318992824e-06, "loss": 1.1108, "step": 15785 }, { "epoch": 0.67, "grad_norm": 6.816720702593366, "learning_rate": 9.547987935025412e-06, "loss": 1.0916, "step": 15790 }, { "epoch": 0.67, "grad_norm": 6.261444769111539, "learning_rate": 9.54747627532689e-06, "loss": 1.1293, "step": 15795 }, { "epoch": 0.67, "grad_norm": 5.129132085123604, "learning_rate": 9.546964339928276e-06, "loss": 1.1087, "step": 15800 }, { "epoch": 0.67, "grad_norm": 13.167944209636138, "learning_rate": 9.546452128860607e-06, "loss": 1.1325, "step": 15805 }, { "epoch": 0.67, "grad_norm": 14.30170678593782, "learning_rate": 9.54593964215494e-06, "loss": 1.1487, "step": 15810 }, { "epoch": 0.67, "grad_norm": 15.31473977237239, "learning_rate": 9.545426879842341e-06, "loss": 1.1125, "step": 15815 }, { "epoch": 0.67, "grad_norm": 13.344238010450255, "learning_rate": 9.544913841953898e-06, "loss": 1.1277, "step": 15820 }, { "epoch": 0.67, "grad_norm": 6.851476086789551, "learning_rate": 9.544400528520718e-06, "loss": 1.1238, "step": 15825 }, { "epoch": 0.67, "grad_norm": 10.344716489051912, "learning_rate": 9.543886939573921e-06, "loss": 1.1089, "step": 15830 }, { "epoch": 0.67, "grad_norm": 5.706495578276909, "learning_rate": 9.543373075144642e-06, "loss": 1.1363, "step": 15835 }, { "epoch": 0.67, "grad_norm": 5.833524880990271, "learning_rate": 9.542858935264036e-06, "loss": 1.103, "step": 15840 }, { "epoch": 0.67, "grad_norm": 13.888900647368413, "learning_rate": 9.542344519963274e-06, "loss": 1.1431, "step": 15845 }, { "epoch": 0.67, "grad_norm": 24.72540705356045, "learning_rate": 9.541829829273546e-06, "loss": 1.1353, "step": 15850 }, { "epoch": 0.67, "grad_norm": 10.60610919904562, "learning_rate": 9.541314863226052e-06, "loss": 1.1313, "step": 15855 }, { "epoch": 0.67, "grad_norm": 16.89472331801454, "learning_rate": 9.540799621852014e-06, "loss": 1.1158, "step": 15860 }, { "epoch": 0.67, "grad_norm": 5.72988600930111, "learning_rate": 9.540284105182671e-06, "loss": 1.1197, "step": 15865 }, { "epoch": 0.67, "grad_norm": 19.191752374280185, "learning_rate": 9.539768313249276e-06, "loss": 1.1337, "step": 15870 }, { "epoch": 0.67, "grad_norm": 6.856539691834326, "learning_rate": 9.5392522460831e-06, "loss": 1.0902, "step": 15875 }, { "epoch": 0.67, "grad_norm": 9.920787210253751, "learning_rate": 9.538735903715435e-06, "loss": 1.1644, "step": 15880 }, { "epoch": 0.67, "grad_norm": 19.515779275107036, "learning_rate": 9.538219286177579e-06, "loss": 1.0936, "step": 15885 }, { "epoch": 0.67, "grad_norm": 8.302226564565773, "learning_rate": 9.537702393500854e-06, "loss": 1.1388, "step": 15890 }, { "epoch": 0.67, "grad_norm": 43.789120777383566, "learning_rate": 9.537185225716601e-06, "loss": 1.1483, "step": 15895 }, { "epoch": 0.67, "grad_norm": 35.37208805690864, "learning_rate": 9.536667782856172e-06, "loss": 1.1654, "step": 15900 }, { "epoch": 0.67, "grad_norm": 14.85696555679927, "learning_rate": 9.536150064950937e-06, "loss": 1.184, "step": 15905 }, { "epoch": 0.67, "grad_norm": 6.0762966819038065, "learning_rate": 9.535632072032288e-06, "loss": 1.1384, "step": 15910 }, { "epoch": 0.67, "grad_norm": 20.23928215278144, "learning_rate": 9.535113804131625e-06, "loss": 1.1511, "step": 15915 }, { "epoch": 0.67, "grad_norm": 9.104557315697825, "learning_rate": 9.53459526128037e-06, "loss": 1.1009, "step": 15920 }, { "epoch": 0.67, "grad_norm": 15.795062846814455, "learning_rate": 9.534076443509963e-06, "loss": 1.1241, "step": 15925 }, { "epoch": 0.67, "grad_norm": 6.636920453176176, "learning_rate": 9.533557350851857e-06, "loss": 1.1081, "step": 15930 }, { "epoch": 0.67, "grad_norm": 6.241516908927179, "learning_rate": 9.533037983337521e-06, "loss": 1.0959, "step": 15935 }, { "epoch": 0.67, "grad_norm": 13.778451139155578, "learning_rate": 9.532518340998444e-06, "loss": 1.1441, "step": 15940 }, { "epoch": 0.67, "grad_norm": 15.62020461296476, "learning_rate": 9.531998423866133e-06, "loss": 1.0973, "step": 15945 }, { "epoch": 0.68, "grad_norm": 20.267337819458096, "learning_rate": 9.531478231972107e-06, "loss": 1.1004, "step": 15950 }, { "epoch": 0.68, "grad_norm": 21.857553320454397, "learning_rate": 9.530957765347903e-06, "loss": 1.1235, "step": 15955 }, { "epoch": 0.68, "grad_norm": 12.091841783402426, "learning_rate": 9.530437024025077e-06, "loss": 1.1269, "step": 15960 }, { "epoch": 0.68, "grad_norm": 7.705531629037003, "learning_rate": 9.529916008035199e-06, "loss": 1.1122, "step": 15965 }, { "epoch": 0.68, "grad_norm": 8.588748077856957, "learning_rate": 9.529394717409855e-06, "loss": 1.1093, "step": 15970 }, { "epoch": 0.68, "grad_norm": 6.094558818256664, "learning_rate": 9.52887315218065e-06, "loss": 1.0989, "step": 15975 }, { "epoch": 0.68, "grad_norm": 5.787451083155929, "learning_rate": 9.52835131237921e-06, "loss": 1.1398, "step": 15980 }, { "epoch": 0.68, "grad_norm": 7.1548537208422305, "learning_rate": 9.527829198037167e-06, "loss": 1.1331, "step": 15985 }, { "epoch": 0.68, "grad_norm": 8.652170694507138, "learning_rate": 9.527306809186176e-06, "loss": 1.0993, "step": 15990 }, { "epoch": 0.68, "grad_norm": 5.655357942185252, "learning_rate": 9.52678414585791e-06, "loss": 1.1247, "step": 15995 }, { "epoch": 0.68, "grad_norm": 5.386303646992126, "learning_rate": 9.526261208084054e-06, "loss": 1.1226, "step": 16000 }, { "epoch": 0.68, "grad_norm": 7.6778458086669845, "learning_rate": 9.525737995896313e-06, "loss": 1.0901, "step": 16005 }, { "epoch": 0.68, "grad_norm": 9.39788114004018, "learning_rate": 9.52521450932641e-06, "loss": 1.1147, "step": 16010 }, { "epoch": 0.68, "grad_norm": 17.652087015538783, "learning_rate": 9.52469074840608e-06, "loss": 1.1446, "step": 16015 }, { "epoch": 0.68, "grad_norm": 10.40592830103076, "learning_rate": 9.524166713167077e-06, "loss": 1.1256, "step": 16020 }, { "epoch": 0.68, "grad_norm": 17.01522461655715, "learning_rate": 9.523642403641176e-06, "loss": 1.1371, "step": 16025 }, { "epoch": 0.68, "grad_norm": 8.246350472239362, "learning_rate": 9.523117819860158e-06, "loss": 1.1202, "step": 16030 }, { "epoch": 0.68, "grad_norm": 11.297322699968756, "learning_rate": 9.52259296185583e-06, "loss": 1.0879, "step": 16035 }, { "epoch": 0.68, "grad_norm": 10.013068269723783, "learning_rate": 9.522067829660013e-06, "loss": 1.125, "step": 16040 }, { "epoch": 0.68, "grad_norm": 7.293790337648549, "learning_rate": 9.521542423304544e-06, "loss": 1.159, "step": 16045 }, { "epoch": 0.68, "grad_norm": 4.835919261424686, "learning_rate": 9.521016742821275e-06, "loss": 1.1087, "step": 16050 }, { "epoch": 0.68, "grad_norm": 11.975019618669092, "learning_rate": 9.52049078824208e-06, "loss": 1.0601, "step": 16055 }, { "epoch": 0.68, "grad_norm": 26.809077836318586, "learning_rate": 9.519964559598842e-06, "loss": 1.1079, "step": 16060 }, { "epoch": 0.68, "grad_norm": 10.223704884661332, "learning_rate": 9.519438056923468e-06, "loss": 1.0978, "step": 16065 }, { "epoch": 0.68, "grad_norm": 5.007418574741625, "learning_rate": 9.518911280247877e-06, "loss": 1.0972, "step": 16070 }, { "epoch": 0.68, "grad_norm": 5.307951209339337, "learning_rate": 9.518384229604004e-06, "loss": 1.1494, "step": 16075 }, { "epoch": 0.68, "grad_norm": 12.847877619887234, "learning_rate": 9.517856905023808e-06, "loss": 1.094, "step": 16080 }, { "epoch": 0.68, "grad_norm": 21.596597576292055, "learning_rate": 9.517329306539254e-06, "loss": 1.1117, "step": 16085 }, { "epoch": 0.68, "grad_norm": 7.248205504346232, "learning_rate": 9.51680143418233e-06, "loss": 1.1208, "step": 16090 }, { "epoch": 0.68, "grad_norm": 5.96613183549509, "learning_rate": 9.516273287985041e-06, "loss": 1.1585, "step": 16095 }, { "epoch": 0.68, "grad_norm": 9.801891804621334, "learning_rate": 9.515744867979404e-06, "loss": 1.0901, "step": 16100 }, { "epoch": 0.68, "grad_norm": 11.049608898358061, "learning_rate": 9.515216174197457e-06, "loss": 1.0984, "step": 16105 }, { "epoch": 0.68, "grad_norm": 5.075499506984404, "learning_rate": 9.514687206671255e-06, "loss": 1.139, "step": 16110 }, { "epoch": 0.68, "grad_norm": 10.316105228669224, "learning_rate": 9.514157965432866e-06, "loss": 1.0923, "step": 16115 }, { "epoch": 0.68, "grad_norm": 5.499657914771534, "learning_rate": 9.513628450514376e-06, "loss": 1.1219, "step": 16120 }, { "epoch": 0.68, "grad_norm": 9.68355911771093, "learning_rate": 9.513098661947888e-06, "loss": 1.1414, "step": 16125 }, { "epoch": 0.68, "grad_norm": 12.640527817201807, "learning_rate": 9.51256859976552e-06, "loss": 1.13, "step": 16130 }, { "epoch": 0.68, "grad_norm": 16.602723941317617, "learning_rate": 9.512038263999414e-06, "loss": 1.1081, "step": 16135 }, { "epoch": 0.68, "grad_norm": 19.779127002067398, "learning_rate": 9.511507654681717e-06, "loss": 1.1269, "step": 16140 }, { "epoch": 0.68, "grad_norm": 17.327055552687504, "learning_rate": 9.5109767718446e-06, "loss": 1.1417, "step": 16145 }, { "epoch": 0.68, "grad_norm": 12.872922848971763, "learning_rate": 9.510445615520248e-06, "loss": 1.1774, "step": 16150 }, { "epoch": 0.68, "grad_norm": 28.237574261276997, "learning_rate": 9.509914185740864e-06, "loss": 1.1181, "step": 16155 }, { "epoch": 0.68, "grad_norm": 6.998291608488835, "learning_rate": 9.50938248253867e-06, "loss": 1.1581, "step": 16160 }, { "epoch": 0.68, "grad_norm": 17.765806747822108, "learning_rate": 9.508850505945896e-06, "loss": 1.1223, "step": 16165 }, { "epoch": 0.68, "grad_norm": 12.429989143848161, "learning_rate": 9.508318255994798e-06, "loss": 1.1074, "step": 16170 }, { "epoch": 0.68, "grad_norm": 6.564872981137462, "learning_rate": 9.507785732717645e-06, "loss": 1.1088, "step": 16175 }, { "epoch": 0.68, "grad_norm": 17.90562616962865, "learning_rate": 9.50725293614672e-06, "loss": 1.1405, "step": 16180 }, { "epoch": 0.68, "grad_norm": 16.709698958734737, "learning_rate": 9.506719866314326e-06, "loss": 1.0864, "step": 16185 }, { "epoch": 0.69, "grad_norm": 12.107245754363326, "learning_rate": 9.506186523252781e-06, "loss": 1.0789, "step": 16190 }, { "epoch": 0.69, "grad_norm": 10.172998808660411, "learning_rate": 9.505652906994421e-06, "loss": 1.225, "step": 16195 }, { "epoch": 0.69, "grad_norm": 12.204090149073378, "learning_rate": 9.505119017571599e-06, "loss": 1.1731, "step": 16200 }, { "epoch": 0.69, "grad_norm": 13.789001715457545, "learning_rate": 9.504584855016678e-06, "loss": 1.0912, "step": 16205 }, { "epoch": 0.69, "grad_norm": 12.282588972993546, "learning_rate": 9.504050419362047e-06, "loss": 1.1168, "step": 16210 }, { "epoch": 0.69, "grad_norm": 26.93422669138498, "learning_rate": 9.503515710640108e-06, "loss": 1.1653, "step": 16215 }, { "epoch": 0.69, "grad_norm": 12.774993639459181, "learning_rate": 9.502980728883275e-06, "loss": 1.0825, "step": 16220 }, { "epoch": 0.69, "grad_norm": 6.722513696576831, "learning_rate": 9.502445474123984e-06, "loss": 1.1287, "step": 16225 }, { "epoch": 0.69, "grad_norm": 6.518843001585743, "learning_rate": 9.501909946394688e-06, "loss": 1.1302, "step": 16230 }, { "epoch": 0.69, "grad_norm": 15.861134296712128, "learning_rate": 9.501374145727854e-06, "loss": 1.113, "step": 16235 }, { "epoch": 0.69, "grad_norm": 5.522280441281781, "learning_rate": 9.500838072155961e-06, "loss": 1.1006, "step": 16240 }, { "epoch": 0.69, "grad_norm": 11.687416219228599, "learning_rate": 9.500301725711516e-06, "loss": 1.1507, "step": 16245 }, { "epoch": 0.69, "grad_norm": 5.217268767273105, "learning_rate": 9.499765106427033e-06, "loss": 1.1127, "step": 16250 }, { "epoch": 0.69, "grad_norm": 9.184309992267938, "learning_rate": 9.499228214335045e-06, "loss": 1.1159, "step": 16255 }, { "epoch": 0.69, "grad_norm": 19.033486016390004, "learning_rate": 9.498691049468103e-06, "loss": 1.0839, "step": 16260 }, { "epoch": 0.69, "grad_norm": 16.854178720427843, "learning_rate": 9.498153611858776e-06, "loss": 1.074, "step": 16265 }, { "epoch": 0.69, "grad_norm": 23.058516645518235, "learning_rate": 9.497615901539642e-06, "loss": 1.1786, "step": 16270 }, { "epoch": 0.69, "grad_norm": 8.234404255321616, "learning_rate": 9.497077918543309e-06, "loss": 1.1098, "step": 16275 }, { "epoch": 0.69, "grad_norm": 7.644911172226839, "learning_rate": 9.496539662902384e-06, "loss": 1.1383, "step": 16280 }, { "epoch": 0.69, "grad_norm": 8.895257087064357, "learning_rate": 9.496001134649507e-06, "loss": 1.1289, "step": 16285 }, { "epoch": 0.69, "grad_norm": 7.890853218819522, "learning_rate": 9.495462333817323e-06, "loss": 1.1314, "step": 16290 }, { "epoch": 0.69, "grad_norm": 14.88983867536514, "learning_rate": 9.4949232604385e-06, "loss": 1.1254, "step": 16295 }, { "epoch": 0.69, "grad_norm": 45.02490898857594, "learning_rate": 9.494383914545719e-06, "loss": 1.1536, "step": 16300 }, { "epoch": 0.69, "grad_norm": 15.21717036655647, "learning_rate": 9.493844296171681e-06, "loss": 1.1455, "step": 16305 }, { "epoch": 0.69, "grad_norm": 29.87505299491644, "learning_rate": 9.493304405349101e-06, "loss": 1.1207, "step": 16310 }, { "epoch": 0.69, "grad_norm": 29.983671380398928, "learning_rate": 9.49276424211071e-06, "loss": 1.133, "step": 16315 }, { "epoch": 0.69, "grad_norm": 20.735292581448892, "learning_rate": 9.492223806489256e-06, "loss": 1.1071, "step": 16320 }, { "epoch": 0.69, "grad_norm": 11.83409718003557, "learning_rate": 9.491683098517505e-06, "loss": 1.1569, "step": 16325 }, { "epoch": 0.69, "grad_norm": 6.172079042977844, "learning_rate": 9.49114211822824e-06, "loss": 1.09, "step": 16330 }, { "epoch": 0.69, "grad_norm": 5.763372494395667, "learning_rate": 9.490600865654255e-06, "loss": 1.1123, "step": 16335 }, { "epoch": 0.69, "grad_norm": 5.56089879060707, "learning_rate": 9.490059340828369e-06, "loss": 1.1648, "step": 16340 }, { "epoch": 0.69, "grad_norm": 14.45202987057126, "learning_rate": 9.489517543783412e-06, "loss": 1.103, "step": 16345 }, { "epoch": 0.69, "grad_norm": 10.019185491297037, "learning_rate": 9.488975474552226e-06, "loss": 1.081, "step": 16350 }, { "epoch": 0.69, "grad_norm": 11.742327704659386, "learning_rate": 9.488433133167683e-06, "loss": 1.1387, "step": 16355 }, { "epoch": 0.69, "grad_norm": 11.401143502026688, "learning_rate": 9.487890519662662e-06, "loss": 1.1229, "step": 16360 }, { "epoch": 0.69, "grad_norm": 6.89927105312118, "learning_rate": 9.487347634070055e-06, "loss": 1.0955, "step": 16365 }, { "epoch": 0.69, "grad_norm": 12.008315941857413, "learning_rate": 9.48680447642278e-06, "loss": 1.1261, "step": 16370 }, { "epoch": 0.69, "grad_norm": 7.744313717021116, "learning_rate": 9.486261046753767e-06, "loss": 1.122, "step": 16375 }, { "epoch": 0.69, "grad_norm": 10.758601921147918, "learning_rate": 9.485717345095959e-06, "loss": 1.1122, "step": 16380 }, { "epoch": 0.69, "grad_norm": 6.707997637719105, "learning_rate": 9.485173371482323e-06, "loss": 1.1099, "step": 16385 }, { "epoch": 0.69, "grad_norm": 13.043130417480645, "learning_rate": 9.484629125945837e-06, "loss": 1.1429, "step": 16390 }, { "epoch": 0.69, "grad_norm": 10.65680356110051, "learning_rate": 9.484084608519497e-06, "loss": 1.1278, "step": 16395 }, { "epoch": 0.69, "grad_norm": 6.398807630176025, "learning_rate": 9.483539819236316e-06, "loss": 1.1294, "step": 16400 }, { "epoch": 0.69, "grad_norm": 20.79442772954482, "learning_rate": 9.482994758129322e-06, "loss": 1.111, "step": 16405 }, { "epoch": 0.69, "grad_norm": 16.319576719853313, "learning_rate": 9.48244942523156e-06, "loss": 1.1385, "step": 16410 }, { "epoch": 0.69, "grad_norm": 26.400663066036593, "learning_rate": 9.481903820576093e-06, "loss": 1.1272, "step": 16415 }, { "epoch": 0.69, "grad_norm": 13.146974735838572, "learning_rate": 9.481357944196001e-06, "loss": 1.1244, "step": 16420 }, { "epoch": 0.7, "grad_norm": 10.770150240753477, "learning_rate": 9.480811796124376e-06, "loss": 1.1168, "step": 16425 }, { "epoch": 0.7, "grad_norm": 21.510290694212912, "learning_rate": 9.48026537639433e-06, "loss": 1.1411, "step": 16430 }, { "epoch": 0.7, "grad_norm": 5.124776639888739, "learning_rate": 9.479718685038991e-06, "loss": 1.1448, "step": 16435 }, { "epoch": 0.7, "grad_norm": 19.330327500848504, "learning_rate": 9.479171722091504e-06, "loss": 1.1232, "step": 16440 }, { "epoch": 0.7, "grad_norm": 12.162637567554073, "learning_rate": 9.478624487585031e-06, "loss": 1.1045, "step": 16445 }, { "epoch": 0.7, "grad_norm": 7.40534662028943, "learning_rate": 9.478076981552746e-06, "loss": 1.1079, "step": 16450 }, { "epoch": 0.7, "grad_norm": 12.273655272335528, "learning_rate": 9.477529204027844e-06, "loss": 1.1047, "step": 16455 }, { "epoch": 0.7, "grad_norm": 19.68001402078473, "learning_rate": 9.476981155043537e-06, "loss": 1.1248, "step": 16460 }, { "epoch": 0.7, "grad_norm": 21.380072068327515, "learning_rate": 9.476432834633048e-06, "loss": 1.1198, "step": 16465 }, { "epoch": 0.7, "grad_norm": 34.85628614751562, "learning_rate": 9.475884242829623e-06, "loss": 1.1098, "step": 16470 }, { "epoch": 0.7, "grad_norm": 47.94743515292792, "learning_rate": 9.47533537966652e-06, "loss": 1.1085, "step": 16475 }, { "epoch": 0.7, "grad_norm": 20.81681702072176, "learning_rate": 9.474786245177015e-06, "loss": 1.1517, "step": 16480 }, { "epoch": 0.7, "grad_norm": 10.068269343963848, "learning_rate": 9.474236839394402e-06, "loss": 1.1808, "step": 16485 }, { "epoch": 0.7, "grad_norm": 21.19243581628494, "learning_rate": 9.473687162351987e-06, "loss": 1.1239, "step": 16490 }, { "epoch": 0.7, "grad_norm": 37.21512502656769, "learning_rate": 9.473137214083099e-06, "loss": 1.1444, "step": 16495 }, { "epoch": 0.7, "grad_norm": 7.356721661213098, "learning_rate": 9.472586994621077e-06, "loss": 1.1335, "step": 16500 }, { "epoch": 0.7, "grad_norm": 9.130062442997964, "learning_rate": 9.47203650399928e-06, "loss": 1.1571, "step": 16505 }, { "epoch": 0.7, "grad_norm": 7.896186773646387, "learning_rate": 9.471485742251083e-06, "loss": 1.1137, "step": 16510 }, { "epoch": 0.7, "grad_norm": 10.551283986598412, "learning_rate": 9.470934709409876e-06, "loss": 1.1337, "step": 16515 }, { "epoch": 0.7, "grad_norm": 7.7003160427665, "learning_rate": 9.470383405509067e-06, "loss": 1.1181, "step": 16520 }, { "epoch": 0.7, "grad_norm": 16.27370780684043, "learning_rate": 9.469831830582082e-06, "loss": 1.1203, "step": 16525 }, { "epoch": 0.7, "grad_norm": 5.450951092994064, "learning_rate": 9.469279984662356e-06, "loss": 1.1223, "step": 16530 }, { "epoch": 0.7, "grad_norm": 8.826104983490803, "learning_rate": 9.468727867783352e-06, "loss": 1.1338, "step": 16535 }, { "epoch": 0.7, "grad_norm": 11.548589140974116, "learning_rate": 9.468175479978538e-06, "loss": 1.0937, "step": 16540 }, { "epoch": 0.7, "grad_norm": 10.04772186126436, "learning_rate": 9.467622821281408e-06, "loss": 1.0864, "step": 16545 }, { "epoch": 0.7, "grad_norm": 13.61626104485777, "learning_rate": 9.467069891725466e-06, "loss": 1.1257, "step": 16550 }, { "epoch": 0.7, "grad_norm": 6.4844180416513435, "learning_rate": 9.466516691344235e-06, "loss": 1.1204, "step": 16555 }, { "epoch": 0.7, "grad_norm": 28.168245753658315, "learning_rate": 9.465963220171252e-06, "loss": 1.1052, "step": 16560 }, { "epoch": 0.7, "grad_norm": 32.393992640015774, "learning_rate": 9.465409478240074e-06, "loss": 1.1046, "step": 16565 }, { "epoch": 0.7, "grad_norm": 11.053725853513852, "learning_rate": 9.464855465584275e-06, "loss": 1.1247, "step": 16570 }, { "epoch": 0.7, "grad_norm": 16.38971028809713, "learning_rate": 9.464301182237439e-06, "loss": 1.1117, "step": 16575 }, { "epoch": 0.7, "grad_norm": 17.61020730135982, "learning_rate": 9.463746628233173e-06, "loss": 1.1814, "step": 16580 }, { "epoch": 0.7, "grad_norm": 21.44109326803698, "learning_rate": 9.463191803605097e-06, "loss": 1.1135, "step": 16585 }, { "epoch": 0.7, "grad_norm": 7.426235470967014, "learning_rate": 9.462636708386849e-06, "loss": 1.119, "step": 16590 }, { "epoch": 0.7, "grad_norm": 7.947325330805191, "learning_rate": 9.462081342612081e-06, "loss": 1.0905, "step": 16595 }, { "epoch": 0.7, "grad_norm": 6.528238906675508, "learning_rate": 9.461525706314467e-06, "loss": 1.1249, "step": 16600 }, { "epoch": 0.7, "grad_norm": 18.805288994820586, "learning_rate": 9.46096979952769e-06, "loss": 1.1099, "step": 16605 }, { "epoch": 0.7, "grad_norm": 23.33570587767011, "learning_rate": 9.460413622285454e-06, "loss": 1.1433, "step": 16610 }, { "epoch": 0.7, "grad_norm": 14.004866042143698, "learning_rate": 9.45985717462148e-06, "loss": 1.0779, "step": 16615 }, { "epoch": 0.7, "grad_norm": 14.242876809876437, "learning_rate": 9.4593004565695e-06, "loss": 1.1287, "step": 16620 }, { "epoch": 0.7, "grad_norm": 5.65946635603587, "learning_rate": 9.458743468163271e-06, "loss": 1.1391, "step": 16625 }, { "epoch": 0.7, "grad_norm": 8.191679028924941, "learning_rate": 9.458186209436559e-06, "loss": 1.1183, "step": 16630 }, { "epoch": 0.7, "grad_norm": 8.594517614370647, "learning_rate": 9.457628680423148e-06, "loss": 1.1034, "step": 16635 }, { "epoch": 0.7, "grad_norm": 5.6666821311089075, "learning_rate": 9.45707088115684e-06, "loss": 1.1435, "step": 16640 }, { "epoch": 0.7, "grad_norm": 7.787138239729822, "learning_rate": 9.456512811671454e-06, "loss": 1.1315, "step": 16645 }, { "epoch": 0.7, "grad_norm": 5.824007676561451, "learning_rate": 9.455954472000824e-06, "loss": 1.1229, "step": 16650 }, { "epoch": 0.7, "grad_norm": 9.70450859969771, "learning_rate": 9.4553958621788e-06, "loss": 1.1387, "step": 16655 }, { "epoch": 0.71, "grad_norm": 5.183771705613105, "learning_rate": 9.454836982239248e-06, "loss": 1.0918, "step": 16660 }, { "epoch": 0.71, "grad_norm": 12.68487848217025, "learning_rate": 9.454277832216053e-06, "loss": 1.1459, "step": 16665 }, { "epoch": 0.71, "grad_norm": 12.588762678610511, "learning_rate": 9.453718412143111e-06, "loss": 1.1686, "step": 16670 }, { "epoch": 0.71, "grad_norm": 5.95041425199994, "learning_rate": 9.453158722054342e-06, "loss": 1.1775, "step": 16675 }, { "epoch": 0.71, "grad_norm": 7.389140281785481, "learning_rate": 9.452598761983676e-06, "loss": 1.0824, "step": 16680 }, { "epoch": 0.71, "grad_norm": 6.457371902774636, "learning_rate": 9.452038531965064e-06, "loss": 1.1244, "step": 16685 }, { "epoch": 0.71, "grad_norm": 5.138594175909104, "learning_rate": 9.45147803203247e-06, "loss": 1.1502, "step": 16690 }, { "epoch": 0.71, "grad_norm": 15.73782408581993, "learning_rate": 9.450917262219873e-06, "loss": 1.1096, "step": 16695 }, { "epoch": 0.71, "grad_norm": 48.15487289315489, "learning_rate": 9.450356222561275e-06, "loss": 1.1158, "step": 16700 }, { "epoch": 0.71, "grad_norm": 71.0660940825832, "learning_rate": 9.449794913090688e-06, "loss": 1.1074, "step": 16705 }, { "epoch": 0.71, "grad_norm": 37.908447686872755, "learning_rate": 9.449233333842142e-06, "loss": 1.1157, "step": 16710 }, { "epoch": 0.71, "grad_norm": 48.95623884410956, "learning_rate": 9.448671484849686e-06, "loss": 1.1215, "step": 16715 }, { "epoch": 0.71, "grad_norm": 26.511995987652156, "learning_rate": 9.44810936614738e-06, "loss": 1.1472, "step": 16720 }, { "epoch": 0.71, "grad_norm": 24.35007005317823, "learning_rate": 9.447546977769306e-06, "loss": 1.1275, "step": 16725 }, { "epoch": 0.71, "grad_norm": 19.500741273390563, "learning_rate": 9.44698431974956e-06, "loss": 1.0949, "step": 16730 }, { "epoch": 0.71, "grad_norm": 19.970507472491803, "learning_rate": 9.446421392122252e-06, "loss": 1.1443, "step": 16735 }, { "epoch": 0.71, "grad_norm": 14.051456844397624, "learning_rate": 9.445858194921513e-06, "loss": 1.083, "step": 16740 }, { "epoch": 0.71, "grad_norm": 9.514466356345627, "learning_rate": 9.445294728181488e-06, "loss": 1.173, "step": 16745 }, { "epoch": 0.71, "grad_norm": 9.583482026424033, "learning_rate": 9.444730991936339e-06, "loss": 1.1785, "step": 16750 }, { "epoch": 0.71, "grad_norm": 16.056715779376827, "learning_rate": 9.444166986220241e-06, "loss": 1.105, "step": 16755 }, { "epoch": 0.71, "grad_norm": 11.80940019974729, "learning_rate": 9.44360271106739e-06, "loss": 1.1694, "step": 16760 }, { "epoch": 0.71, "grad_norm": 22.361662859930433, "learning_rate": 9.443038166511995e-06, "loss": 1.1116, "step": 16765 }, { "epoch": 0.71, "grad_norm": 9.027221559233777, "learning_rate": 9.442473352588284e-06, "loss": 1.103, "step": 16770 }, { "epoch": 0.71, "grad_norm": 9.836040408668314, "learning_rate": 9.441908269330497e-06, "loss": 1.1322, "step": 16775 }, { "epoch": 0.71, "grad_norm": 6.303328143306504, "learning_rate": 9.441342916772897e-06, "loss": 1.1345, "step": 16780 }, { "epoch": 0.71, "grad_norm": 5.021272221926106, "learning_rate": 9.44077729494976e-06, "loss": 1.0654, "step": 16785 }, { "epoch": 0.71, "grad_norm": 6.995398075099797, "learning_rate": 9.440211403895375e-06, "loss": 1.1068, "step": 16790 }, { "epoch": 0.71, "grad_norm": 5.521030212433786, "learning_rate": 9.439645243644052e-06, "loss": 1.1246, "step": 16795 }, { "epoch": 0.71, "grad_norm": 9.464672992246477, "learning_rate": 9.439078814230116e-06, "loss": 1.1399, "step": 16800 }, { "epoch": 0.71, "grad_norm": 6.968872254572704, "learning_rate": 9.438512115687906e-06, "loss": 1.128, "step": 16805 }, { "epoch": 0.71, "grad_norm": 5.658854728003837, "learning_rate": 9.43794514805178e-06, "loss": 1.0866, "step": 16810 }, { "epoch": 0.71, "grad_norm": 5.91804251262899, "learning_rate": 9.437377911356113e-06, "loss": 1.154, "step": 16815 }, { "epoch": 0.71, "grad_norm": 7.433954831086169, "learning_rate": 9.436810405635295e-06, "loss": 1.1803, "step": 16820 }, { "epoch": 0.71, "grad_norm": 17.669302545018628, "learning_rate": 9.436242630923729e-06, "loss": 1.1257, "step": 16825 }, { "epoch": 0.71, "grad_norm": 7.6086021840869105, "learning_rate": 9.43567458725584e-06, "loss": 1.0987, "step": 16830 }, { "epoch": 0.71, "grad_norm": 28.14374126918958, "learning_rate": 9.435106274666068e-06, "loss": 1.1591, "step": 16835 }, { "epoch": 0.71, "grad_norm": 8.222061371987685, "learning_rate": 9.434537693188866e-06, "loss": 1.1114, "step": 16840 }, { "epoch": 0.71, "grad_norm": 7.979414582554136, "learning_rate": 9.433968842858707e-06, "loss": 1.1037, "step": 16845 }, { "epoch": 0.71, "grad_norm": 15.03706353610534, "learning_rate": 9.433399723710075e-06, "loss": 1.0911, "step": 16850 }, { "epoch": 0.71, "grad_norm": 14.901984005001061, "learning_rate": 9.432830335777478e-06, "loss": 1.1511, "step": 16855 }, { "epoch": 0.71, "grad_norm": 6.1973574333662365, "learning_rate": 9.432260679095437e-06, "loss": 1.147, "step": 16860 }, { "epoch": 0.71, "grad_norm": 10.799997034857435, "learning_rate": 9.431690753698484e-06, "loss": 1.0888, "step": 16865 }, { "epoch": 0.71, "grad_norm": 6.515932725361904, "learning_rate": 9.431120559621177e-06, "loss": 1.1235, "step": 16870 }, { "epoch": 0.71, "grad_norm": 5.396228285215444, "learning_rate": 9.430550096898081e-06, "loss": 1.1252, "step": 16875 }, { "epoch": 0.71, "grad_norm": 6.736459841290592, "learning_rate": 9.429979365563784e-06, "loss": 1.0846, "step": 16880 }, { "epoch": 0.71, "grad_norm": 8.170423620966293, "learning_rate": 9.429408365652885e-06, "loss": 1.1327, "step": 16885 }, { "epoch": 0.71, "grad_norm": 15.019682647948079, "learning_rate": 9.428837097200007e-06, "loss": 1.1283, "step": 16890 }, { "epoch": 0.72, "grad_norm": 19.109949796023205, "learning_rate": 9.42826556023978e-06, "loss": 1.1327, "step": 16895 }, { "epoch": 0.72, "grad_norm": 11.792807691435836, "learning_rate": 9.427693754806855e-06, "loss": 1.1405, "step": 16900 }, { "epoch": 0.72, "grad_norm": 7.830178430312812, "learning_rate": 9.427121680935903e-06, "loss": 1.1026, "step": 16905 }, { "epoch": 0.72, "grad_norm": 5.2772761218963415, "learning_rate": 9.4265493386616e-06, "loss": 1.102, "step": 16910 }, { "epoch": 0.72, "grad_norm": 8.513334085513613, "learning_rate": 9.425976728018652e-06, "loss": 1.1461, "step": 16915 }, { "epoch": 0.72, "grad_norm": 15.461304275406098, "learning_rate": 9.425403849041772e-06, "loss": 1.1177, "step": 16920 }, { "epoch": 0.72, "grad_norm": 11.172519127997129, "learning_rate": 9.424830701765691e-06, "loss": 1.1218, "step": 16925 }, { "epoch": 0.72, "grad_norm": 10.780778037333757, "learning_rate": 9.424257286225158e-06, "loss": 1.103, "step": 16930 }, { "epoch": 0.72, "grad_norm": 7.074888927969338, "learning_rate": 9.42368360245494e-06, "loss": 1.1047, "step": 16935 }, { "epoch": 0.72, "grad_norm": 5.343478713355483, "learning_rate": 9.423109650489813e-06, "loss": 1.112, "step": 16940 }, { "epoch": 0.72, "grad_norm": 8.18160662537546, "learning_rate": 9.422535430364578e-06, "loss": 1.1071, "step": 16945 }, { "epoch": 0.72, "grad_norm": 5.727614405773898, "learning_rate": 9.421960942114047e-06, "loss": 1.1492, "step": 16950 }, { "epoch": 0.72, "grad_norm": 5.975550544799563, "learning_rate": 9.42138618577305e-06, "loss": 1.0887, "step": 16955 }, { "epoch": 0.72, "grad_norm": 6.165521010744902, "learning_rate": 9.42081116137643e-06, "loss": 1.1371, "step": 16960 }, { "epoch": 0.72, "grad_norm": 5.9756960727466275, "learning_rate": 9.420235868959053e-06, "loss": 1.102, "step": 16965 }, { "epoch": 0.72, "grad_norm": 7.288050139755605, "learning_rate": 9.419660308555794e-06, "loss": 1.125, "step": 16970 }, { "epoch": 0.72, "grad_norm": 22.46363958680743, "learning_rate": 9.41908448020155e-06, "loss": 1.1041, "step": 16975 }, { "epoch": 0.72, "grad_norm": 25.92286580198727, "learning_rate": 9.41850838393123e-06, "loss": 1.0745, "step": 16980 }, { "epoch": 0.72, "grad_norm": 31.478701769149943, "learning_rate": 9.417932019779764e-06, "loss": 1.1298, "step": 16985 }, { "epoch": 0.72, "grad_norm": 49.98478516291867, "learning_rate": 9.417355387782093e-06, "loss": 1.2096, "step": 16990 }, { "epoch": 0.72, "grad_norm": 18.777311914589585, "learning_rate": 9.416778487973175e-06, "loss": 1.1411, "step": 16995 }, { "epoch": 0.72, "grad_norm": 29.088862736629796, "learning_rate": 9.416201320387987e-06, "loss": 1.1457, "step": 17000 }, { "epoch": 0.72, "grad_norm": 14.911032623941088, "learning_rate": 9.415623885061523e-06, "loss": 1.0973, "step": 17005 }, { "epoch": 0.72, "grad_norm": 9.592416651664726, "learning_rate": 9.415046182028786e-06, "loss": 1.1031, "step": 17010 }, { "epoch": 0.72, "grad_norm": 24.22813894139801, "learning_rate": 9.414468211324807e-06, "loss": 1.1858, "step": 17015 }, { "epoch": 0.72, "grad_norm": 9.970733707924163, "learning_rate": 9.413889972984624e-06, "loss": 1.1443, "step": 17020 }, { "epoch": 0.72, "grad_norm": 6.495194174545278, "learning_rate": 9.413311467043291e-06, "loss": 1.1401, "step": 17025 }, { "epoch": 0.72, "grad_norm": 10.941363346822824, "learning_rate": 9.412732693535886e-06, "loss": 1.0958, "step": 17030 }, { "epoch": 0.72, "grad_norm": 5.263242700227079, "learning_rate": 9.412153652497495e-06, "loss": 1.1256, "step": 17035 }, { "epoch": 0.72, "grad_norm": 8.335565848080336, "learning_rate": 9.411574343963224e-06, "loss": 1.0734, "step": 17040 }, { "epoch": 0.72, "grad_norm": 13.382657427490711, "learning_rate": 9.410994767968195e-06, "loss": 1.1308, "step": 17045 }, { "epoch": 0.72, "grad_norm": 24.033479591859525, "learning_rate": 9.410414924547547e-06, "loss": 1.1264, "step": 17050 }, { "epoch": 0.72, "grad_norm": 21.630015037907583, "learning_rate": 9.409834813736433e-06, "loss": 1.1463, "step": 17055 }, { "epoch": 0.72, "grad_norm": 9.037300580388155, "learning_rate": 9.409254435570026e-06, "loss": 1.1246, "step": 17060 }, { "epoch": 0.72, "grad_norm": 10.793201911770602, "learning_rate": 9.408673790083507e-06, "loss": 1.1093, "step": 17065 }, { "epoch": 0.72, "grad_norm": 10.380811723294608, "learning_rate": 9.408092877312083e-06, "loss": 1.1116, "step": 17070 }, { "epoch": 0.72, "grad_norm": 5.33713237365468, "learning_rate": 9.407511697290974e-06, "loss": 1.1354, "step": 17075 }, { "epoch": 0.72, "grad_norm": 5.693570574742182, "learning_rate": 9.406930250055412e-06, "loss": 1.0813, "step": 17080 }, { "epoch": 0.72, "grad_norm": 5.41622611707326, "learning_rate": 9.406348535640652e-06, "loss": 1.0948, "step": 17085 }, { "epoch": 0.72, "grad_norm": 6.456705795607186, "learning_rate": 9.405766554081958e-06, "loss": 1.0801, "step": 17090 }, { "epoch": 0.72, "grad_norm": 6.4960135527114495, "learning_rate": 9.405184305414616e-06, "loss": 1.1138, "step": 17095 }, { "epoch": 0.72, "grad_norm": 6.288026182079762, "learning_rate": 9.404601789673925e-06, "loss": 1.0832, "step": 17100 }, { "epoch": 0.72, "grad_norm": 19.3479543569613, "learning_rate": 9.404019006895203e-06, "loss": 1.107, "step": 17105 }, { "epoch": 0.72, "grad_norm": 7.842926222082708, "learning_rate": 9.403435957113777e-06, "loss": 1.1164, "step": 17110 }, { "epoch": 0.72, "grad_norm": 13.281561361795422, "learning_rate": 9.402852640365004e-06, "loss": 1.0897, "step": 17115 }, { "epoch": 0.72, "grad_norm": 8.077612566222394, "learning_rate": 9.402269056684243e-06, "loss": 1.1498, "step": 17120 }, { "epoch": 0.72, "grad_norm": 5.166434646711995, "learning_rate": 9.401685206106876e-06, "loss": 1.1154, "step": 17125 }, { "epoch": 0.72, "grad_norm": 20.958865281871816, "learning_rate": 9.401101088668302e-06, "loss": 1.0815, "step": 17130 }, { "epoch": 0.73, "grad_norm": 10.18135045122717, "learning_rate": 9.400516704403932e-06, "loss": 1.12, "step": 17135 }, { "epoch": 0.73, "grad_norm": 10.568880727957094, "learning_rate": 9.399932053349197e-06, "loss": 1.1508, "step": 17140 }, { "epoch": 0.73, "grad_norm": 6.885155559948981, "learning_rate": 9.399347135539541e-06, "loss": 1.1253, "step": 17145 }, { "epoch": 0.73, "grad_norm": 14.876977894790413, "learning_rate": 9.398761951010426e-06, "loss": 1.1455, "step": 17150 }, { "epoch": 0.73, "grad_norm": 7.3827275308788245, "learning_rate": 9.398176499797332e-06, "loss": 1.1025, "step": 17155 }, { "epoch": 0.73, "grad_norm": 6.951926088984365, "learning_rate": 9.397590781935752e-06, "loss": 1.1129, "step": 17160 }, { "epoch": 0.73, "grad_norm": 7.206918830206302, "learning_rate": 9.397004797461197e-06, "loss": 1.0866, "step": 17165 }, { "epoch": 0.73, "grad_norm": 13.939961323895474, "learning_rate": 9.396418546409192e-06, "loss": 1.086, "step": 17170 }, { "epoch": 0.73, "grad_norm": 17.98587503767636, "learning_rate": 9.395832028815282e-06, "loss": 1.0882, "step": 17175 }, { "epoch": 0.73, "grad_norm": 8.494169701184303, "learning_rate": 9.395245244715023e-06, "loss": 1.0921, "step": 17180 }, { "epoch": 0.73, "grad_norm": 10.881569315129182, "learning_rate": 9.394658194143992e-06, "loss": 1.1037, "step": 17185 }, { "epoch": 0.73, "grad_norm": 8.78607005751547, "learning_rate": 9.39407087713778e-06, "loss": 1.0849, "step": 17190 }, { "epoch": 0.73, "grad_norm": 8.58776841781562, "learning_rate": 9.393483293731993e-06, "loss": 1.1106, "step": 17195 }, { "epoch": 0.73, "grad_norm": 7.688776652978134, "learning_rate": 9.392895443962256e-06, "loss": 1.1113, "step": 17200 }, { "epoch": 0.73, "grad_norm": 16.680057695339766, "learning_rate": 9.392307327864208e-06, "loss": 1.0985, "step": 17205 }, { "epoch": 0.73, "grad_norm": 6.334391613909822, "learning_rate": 9.391718945473503e-06, "loss": 1.1252, "step": 17210 }, { "epoch": 0.73, "grad_norm": 5.2719185549215375, "learning_rate": 9.391130296825816e-06, "loss": 1.1098, "step": 17215 }, { "epoch": 0.73, "grad_norm": 5.852469179256973, "learning_rate": 9.390541381956834e-06, "loss": 1.1391, "step": 17220 }, { "epoch": 0.73, "grad_norm": 6.0455342614041525, "learning_rate": 9.38995220090226e-06, "loss": 1.1411, "step": 17225 }, { "epoch": 0.73, "grad_norm": 7.0591929612376525, "learning_rate": 9.389362753697816e-06, "loss": 1.1087, "step": 17230 }, { "epoch": 0.73, "grad_norm": 6.763499201327044, "learning_rate": 9.388773040379235e-06, "loss": 1.0955, "step": 17235 }, { "epoch": 0.73, "grad_norm": 7.7384206899086205, "learning_rate": 9.388183060982275e-06, "loss": 1.0961, "step": 17240 }, { "epoch": 0.73, "grad_norm": 7.225344644349165, "learning_rate": 9.387592815542701e-06, "loss": 1.1043, "step": 17245 }, { "epoch": 0.73, "grad_norm": 6.081547305535541, "learning_rate": 9.387002304096299e-06, "loss": 1.0947, "step": 17250 }, { "epoch": 0.73, "grad_norm": 5.5018757772530344, "learning_rate": 9.38641152667887e-06, "loss": 1.0955, "step": 17255 }, { "epoch": 0.73, "grad_norm": 6.018645712561101, "learning_rate": 9.38582048332623e-06, "loss": 1.12, "step": 17260 }, { "epoch": 0.73, "grad_norm": 6.032871693566749, "learning_rate": 9.38522917407421e-06, "loss": 1.1433, "step": 17265 }, { "epoch": 0.73, "grad_norm": 5.085426516684288, "learning_rate": 9.384637598958665e-06, "loss": 1.1228, "step": 17270 }, { "epoch": 0.73, "grad_norm": 16.940379431112994, "learning_rate": 9.384045758015458e-06, "loss": 1.1136, "step": 17275 }, { "epoch": 0.73, "grad_norm": 10.867936870634642, "learning_rate": 9.383453651280473e-06, "loss": 1.1184, "step": 17280 }, { "epoch": 0.73, "grad_norm": 15.159912773918336, "learning_rate": 9.382861278789601e-06, "loss": 1.0968, "step": 17285 }, { "epoch": 0.73, "grad_norm": 35.23415371528219, "learning_rate": 9.38226864057876e-06, "loss": 1.1321, "step": 17290 }, { "epoch": 0.73, "grad_norm": 38.455798061691674, "learning_rate": 9.381675736683882e-06, "loss": 1.1038, "step": 17295 }, { "epoch": 0.73, "grad_norm": 22.97976332771787, "learning_rate": 9.38108256714091e-06, "loss": 1.1248, "step": 17300 }, { "epoch": 0.73, "grad_norm": 16.54808353760375, "learning_rate": 9.380489131985806e-06, "loss": 1.0636, "step": 17305 }, { "epoch": 0.73, "grad_norm": 7.500960986525851, "learning_rate": 9.37989543125455e-06, "loss": 1.1391, "step": 17310 }, { "epoch": 0.73, "grad_norm": 7.254658336402659, "learning_rate": 9.379301464983132e-06, "loss": 1.0996, "step": 17315 }, { "epoch": 0.73, "grad_norm": 7.8062352509720485, "learning_rate": 9.37870723320757e-06, "loss": 1.0848, "step": 17320 }, { "epoch": 0.73, "grad_norm": 5.58053408673204, "learning_rate": 9.378112735963884e-06, "loss": 1.0942, "step": 17325 }, { "epoch": 0.73, "grad_norm": 11.5728432405758, "learning_rate": 9.377517973288122e-06, "loss": 1.1486, "step": 17330 }, { "epoch": 0.73, "grad_norm": 5.758099755696685, "learning_rate": 9.376922945216336e-06, "loss": 1.0788, "step": 17335 }, { "epoch": 0.73, "grad_norm": 7.961547409784049, "learning_rate": 9.376327651784607e-06, "loss": 1.1187, "step": 17340 }, { "epoch": 0.73, "grad_norm": 10.382997081280067, "learning_rate": 9.375732093029022e-06, "loss": 1.1194, "step": 17345 }, { "epoch": 0.73, "grad_norm": 8.939893616718557, "learning_rate": 9.37513626898569e-06, "loss": 1.0756, "step": 17350 }, { "epoch": 0.73, "grad_norm": 6.144261339286091, "learning_rate": 9.374540179690732e-06, "loss": 1.1073, "step": 17355 }, { "epoch": 0.73, "grad_norm": 5.522202673309658, "learning_rate": 9.373943825180291e-06, "loss": 1.1012, "step": 17360 }, { "epoch": 0.73, "grad_norm": 6.270819616283949, "learning_rate": 9.373347205490519e-06, "loss": 1.1631, "step": 17365 }, { "epoch": 0.74, "grad_norm": 5.940002302468172, "learning_rate": 9.372750320657586e-06, "loss": 1.0831, "step": 17370 }, { "epoch": 0.74, "grad_norm": 7.116239465877265, "learning_rate": 9.372153170717684e-06, "loss": 1.093, "step": 17375 }, { "epoch": 0.74, "grad_norm": 10.224456581099487, "learning_rate": 9.371555755707014e-06, "loss": 1.1437, "step": 17380 }, { "epoch": 0.74, "grad_norm": 12.929234697607665, "learning_rate": 9.370958075661793e-06, "loss": 1.1664, "step": 17385 }, { "epoch": 0.74, "grad_norm": 9.412018440039608, "learning_rate": 9.37036013061826e-06, "loss": 1.0812, "step": 17390 }, { "epoch": 0.74, "grad_norm": 27.757568484336996, "learning_rate": 9.369761920612666e-06, "loss": 1.1752, "step": 17395 }, { "epoch": 0.74, "grad_norm": 6.993272958203807, "learning_rate": 9.369163445681278e-06, "loss": 1.1116, "step": 17400 }, { "epoch": 0.74, "grad_norm": 6.819832161701608, "learning_rate": 9.368564705860382e-06, "loss": 1.0893, "step": 17405 }, { "epoch": 0.74, "grad_norm": 16.23664331522571, "learning_rate": 9.367965701186273e-06, "loss": 1.059, "step": 17410 }, { "epoch": 0.74, "grad_norm": 8.786421369373763, "learning_rate": 9.367366431695271e-06, "loss": 1.1032, "step": 17415 }, { "epoch": 0.74, "grad_norm": 12.881198429582792, "learning_rate": 9.366766897423707e-06, "loss": 1.0745, "step": 17420 }, { "epoch": 0.74, "grad_norm": 6.6695823337627775, "learning_rate": 9.366167098407929e-06, "loss": 1.1006, "step": 17425 }, { "epoch": 0.74, "grad_norm": 5.397347096500923, "learning_rate": 9.365567034684299e-06, "loss": 1.0793, "step": 17430 }, { "epoch": 0.74, "grad_norm": 5.504096159984868, "learning_rate": 9.3649667062892e-06, "loss": 1.1459, "step": 17435 }, { "epoch": 0.74, "grad_norm": 38.57755971082172, "learning_rate": 9.364366113259027e-06, "loss": 1.072, "step": 17440 }, { "epoch": 0.74, "grad_norm": 26.433593794628287, "learning_rate": 9.363765255630192e-06, "loss": 1.1172, "step": 17445 }, { "epoch": 0.74, "grad_norm": 27.051505783097266, "learning_rate": 9.363164133439125e-06, "loss": 1.1307, "step": 17450 }, { "epoch": 0.74, "grad_norm": 5.177600258122624, "learning_rate": 9.362562746722266e-06, "loss": 1.0892, "step": 17455 }, { "epoch": 0.74, "grad_norm": 8.911785509530484, "learning_rate": 9.36196109551608e-06, "loss": 1.1751, "step": 17460 }, { "epoch": 0.74, "grad_norm": 6.142733250197846, "learning_rate": 9.36135917985704e-06, "loss": 1.1293, "step": 17465 }, { "epoch": 0.74, "grad_norm": 4.599561485892141, "learning_rate": 9.360756999781641e-06, "loss": 1.0728, "step": 17470 }, { "epoch": 0.74, "grad_norm": 8.748536093258558, "learning_rate": 9.360154555326389e-06, "loss": 1.1313, "step": 17475 }, { "epoch": 0.74, "grad_norm": 16.18103476033407, "learning_rate": 9.359551846527811e-06, "loss": 1.0639, "step": 17480 }, { "epoch": 0.74, "grad_norm": 7.837274662600126, "learning_rate": 9.358948873422446e-06, "loss": 1.0918, "step": 17485 }, { "epoch": 0.74, "grad_norm": 7.6687705979683525, "learning_rate": 9.358345636046848e-06, "loss": 1.0925, "step": 17490 }, { "epoch": 0.74, "grad_norm": 21.562897690698215, "learning_rate": 9.357742134437595e-06, "loss": 1.1433, "step": 17495 }, { "epoch": 0.74, "grad_norm": 20.13977736018128, "learning_rate": 9.35713836863127e-06, "loss": 1.1734, "step": 17500 }, { "epoch": 0.74, "grad_norm": 8.160711819186727, "learning_rate": 9.356534338664483e-06, "loss": 1.102, "step": 17505 }, { "epoch": 0.74, "grad_norm": 31.946127381726082, "learning_rate": 9.35593004457385e-06, "loss": 1.1448, "step": 17510 }, { "epoch": 0.74, "grad_norm": 24.62622025068199, "learning_rate": 9.35532548639601e-06, "loss": 1.0676, "step": 17515 }, { "epoch": 0.74, "grad_norm": 16.42863546105984, "learning_rate": 9.354720664167614e-06, "loss": 1.1279, "step": 17520 }, { "epoch": 0.74, "grad_norm": 26.444107761655193, "learning_rate": 9.354115577925332e-06, "loss": 1.1216, "step": 17525 }, { "epoch": 0.74, "grad_norm": 14.4372949897646, "learning_rate": 9.353510227705846e-06, "loss": 1.1043, "step": 17530 }, { "epoch": 0.74, "grad_norm": 6.547363155591738, "learning_rate": 9.35290461354586e-06, "loss": 1.1228, "step": 17535 }, { "epoch": 0.74, "grad_norm": 10.442737868382967, "learning_rate": 9.35229873548209e-06, "loss": 1.1339, "step": 17540 }, { "epoch": 0.74, "grad_norm": 15.48346025871906, "learning_rate": 9.351692593551265e-06, "loss": 1.1451, "step": 17545 }, { "epoch": 0.74, "grad_norm": 13.996469509651037, "learning_rate": 9.35108618779014e-06, "loss": 1.1276, "step": 17550 }, { "epoch": 0.74, "grad_norm": 9.49324712392557, "learning_rate": 9.350479518235473e-06, "loss": 1.1509, "step": 17555 }, { "epoch": 0.74, "grad_norm": 6.181695831946108, "learning_rate": 9.349872584924048e-06, "loss": 1.0905, "step": 17560 }, { "epoch": 0.74, "grad_norm": 10.978894884206056, "learning_rate": 9.349265387892663e-06, "loss": 1.0913, "step": 17565 }, { "epoch": 0.74, "grad_norm": 8.62675790500558, "learning_rate": 9.348657927178129e-06, "loss": 1.1052, "step": 17570 }, { "epoch": 0.74, "grad_norm": 6.1643408333638945, "learning_rate": 9.348050202817273e-06, "loss": 1.108, "step": 17575 }, { "epoch": 0.74, "grad_norm": 5.285029094815165, "learning_rate": 9.34744221484694e-06, "loss": 1.0962, "step": 17580 }, { "epoch": 0.74, "grad_norm": 13.100019988748052, "learning_rate": 9.346833963303993e-06, "loss": 1.1107, "step": 17585 }, { "epoch": 0.74, "grad_norm": 10.649098406277306, "learning_rate": 9.346225448225306e-06, "loss": 1.1542, "step": 17590 }, { "epoch": 0.74, "grad_norm": 21.37435265346111, "learning_rate": 9.345616669647773e-06, "loss": 1.1085, "step": 17595 }, { "epoch": 0.74, "grad_norm": 10.18876984518517, "learning_rate": 9.3450076276083e-06, "loss": 1.0728, "step": 17600 }, { "epoch": 0.75, "grad_norm": 20.201192777929748, "learning_rate": 9.344398322143816e-06, "loss": 1.0933, "step": 17605 }, { "epoch": 0.75, "grad_norm": 12.445424417338074, "learning_rate": 9.343788753291256e-06, "loss": 1.1315, "step": 17610 }, { "epoch": 0.75, "grad_norm": 12.36200115962045, "learning_rate": 9.34317892108758e-06, "loss": 1.0948, "step": 17615 }, { "epoch": 0.75, "grad_norm": 18.17910100725324, "learning_rate": 9.342568825569758e-06, "loss": 1.0926, "step": 17620 }, { "epoch": 0.75, "grad_norm": 12.473700386629618, "learning_rate": 9.34195846677478e-06, "loss": 1.0816, "step": 17625 }, { "epoch": 0.75, "grad_norm": 12.48303123125485, "learning_rate": 9.34134784473965e-06, "loss": 1.1206, "step": 17630 }, { "epoch": 0.75, "grad_norm": 9.097180306245573, "learning_rate": 9.34073695950139e-06, "loss": 1.1322, "step": 17635 }, { "epoch": 0.75, "grad_norm": 10.525758247727076, "learning_rate": 9.340125811097031e-06, "loss": 1.0828, "step": 17640 }, { "epoch": 0.75, "grad_norm": 5.6009010325028505, "learning_rate": 9.339514399563631e-06, "loss": 1.0958, "step": 17645 }, { "epoch": 0.75, "grad_norm": 7.640477077965488, "learning_rate": 9.338902724938254e-06, "loss": 1.1042, "step": 17650 }, { "epoch": 0.75, "grad_norm": 13.994414103562079, "learning_rate": 9.338290787257986e-06, "loss": 1.0807, "step": 17655 }, { "epoch": 0.75, "grad_norm": 17.560956580459454, "learning_rate": 9.337678586559926e-06, "loss": 1.1153, "step": 17660 }, { "epoch": 0.75, "grad_norm": 19.149023371553817, "learning_rate": 9.337066122881192e-06, "loss": 1.1285, "step": 17665 }, { "epoch": 0.75, "grad_norm": 6.840414915856152, "learning_rate": 9.33645339625891e-06, "loss": 1.1475, "step": 17670 }, { "epoch": 0.75, "grad_norm": 9.318343283167946, "learning_rate": 9.335840406730236e-06, "loss": 1.0679, "step": 17675 }, { "epoch": 0.75, "grad_norm": 15.886471567354363, "learning_rate": 9.33522715433233e-06, "loss": 1.0834, "step": 17680 }, { "epoch": 0.75, "grad_norm": 6.1570218387264255, "learning_rate": 9.334613639102369e-06, "loss": 1.1588, "step": 17685 }, { "epoch": 0.75, "grad_norm": 9.496101410888, "learning_rate": 9.333999861077551e-06, "loss": 1.1213, "step": 17690 }, { "epoch": 0.75, "grad_norm": 6.4879162131471455, "learning_rate": 9.33338582029509e-06, "loss": 1.0902, "step": 17695 }, { "epoch": 0.75, "grad_norm": 9.163005891091226, "learning_rate": 9.33277151679221e-06, "loss": 1.1084, "step": 17700 }, { "epoch": 0.75, "grad_norm": 8.280386367844475, "learning_rate": 9.332156950606156e-06, "loss": 1.1438, "step": 17705 }, { "epoch": 0.75, "grad_norm": 21.097335957884365, "learning_rate": 9.331542121774185e-06, "loss": 1.1196, "step": 17710 }, { "epoch": 0.75, "grad_norm": 9.706026362108982, "learning_rate": 9.330927030333576e-06, "loss": 1.0956, "step": 17715 }, { "epoch": 0.75, "grad_norm": 11.424335749711798, "learning_rate": 9.330311676321617e-06, "loss": 1.0882, "step": 17720 }, { "epoch": 0.75, "grad_norm": 11.46983326743464, "learning_rate": 9.329696059775617e-06, "loss": 1.1302, "step": 17725 }, { "epoch": 0.75, "grad_norm": 6.2976372954129625, "learning_rate": 9.3290801807329e-06, "loss": 1.142, "step": 17730 }, { "epoch": 0.75, "grad_norm": 12.858724317071061, "learning_rate": 9.3284640392308e-06, "loss": 1.1238, "step": 17735 }, { "epoch": 0.75, "grad_norm": 6.370489362463976, "learning_rate": 9.32784763530668e-06, "loss": 1.087, "step": 17740 }, { "epoch": 0.75, "grad_norm": 12.886489401922693, "learning_rate": 9.327230968997902e-06, "loss": 1.0841, "step": 17745 }, { "epoch": 0.75, "grad_norm": 11.61986652593081, "learning_rate": 9.326614040341856e-06, "loss": 1.1004, "step": 17750 }, { "epoch": 0.75, "grad_norm": 6.961581315637526, "learning_rate": 9.32599684937595e-06, "loss": 1.0827, "step": 17755 }, { "epoch": 0.75, "grad_norm": 8.36029046294683, "learning_rate": 9.325379396137592e-06, "loss": 1.1208, "step": 17760 }, { "epoch": 0.75, "grad_norm": 5.653540400390449, "learning_rate": 9.324761680664225e-06, "loss": 1.1047, "step": 17765 }, { "epoch": 0.75, "grad_norm": 5.167477681863815, "learning_rate": 9.324143702993297e-06, "loss": 1.1157, "step": 17770 }, { "epoch": 0.75, "grad_norm": 13.181209543313301, "learning_rate": 9.323525463162272e-06, "loss": 1.1029, "step": 17775 }, { "epoch": 0.75, "grad_norm": 6.453272784212921, "learning_rate": 9.322906961208635e-06, "loss": 1.0982, "step": 17780 }, { "epoch": 0.75, "grad_norm": 5.783406016091666, "learning_rate": 9.32228819716988e-06, "loss": 1.0925, "step": 17785 }, { "epoch": 0.75, "grad_norm": 5.707581107944481, "learning_rate": 9.321669171083524e-06, "loss": 1.1077, "step": 17790 }, { "epoch": 0.75, "grad_norm": 5.24513728558436, "learning_rate": 9.321049882987099e-06, "loss": 1.0983, "step": 17795 }, { "epoch": 0.75, "grad_norm": 7.114739383565758, "learning_rate": 9.320430332918145e-06, "loss": 1.0697, "step": 17800 }, { "epoch": 0.75, "grad_norm": 6.696646599122937, "learning_rate": 9.319810520914227e-06, "loss": 1.1071, "step": 17805 }, { "epoch": 0.75, "grad_norm": 5.325697501887738, "learning_rate": 9.31919044701292e-06, "loss": 1.0609, "step": 17810 }, { "epoch": 0.75, "grad_norm": 6.564186081772088, "learning_rate": 9.318570111251822e-06, "loss": 1.1163, "step": 17815 }, { "epoch": 0.75, "grad_norm": 9.896498195909624, "learning_rate": 9.317949513668535e-06, "loss": 1.1309, "step": 17820 }, { "epoch": 0.75, "grad_norm": 7.935313062509824, "learning_rate": 9.31732865430069e-06, "loss": 1.0803, "step": 17825 }, { "epoch": 0.75, "grad_norm": 6.371963286186434, "learning_rate": 9.316707533185926e-06, "loss": 1.1124, "step": 17830 }, { "epoch": 0.75, "grad_norm": 5.1389184696570345, "learning_rate": 9.316086150361898e-06, "loss": 1.0989, "step": 17835 }, { "epoch": 0.76, "grad_norm": 9.254101640906628, "learning_rate": 9.315464505866283e-06, "loss": 1.1019, "step": 17840 }, { "epoch": 0.76, "grad_norm": 7.617964891678169, "learning_rate": 9.314842599736765e-06, "loss": 1.0296, "step": 17845 }, { "epoch": 0.76, "grad_norm": 8.994548316031661, "learning_rate": 9.31422043201105e-06, "loss": 1.0999, "step": 17850 }, { "epoch": 0.76, "grad_norm": 8.884209118718427, "learning_rate": 9.313598002726858e-06, "loss": 1.1303, "step": 17855 }, { "epoch": 0.76, "grad_norm": 43.291399748229495, "learning_rate": 9.312975311921925e-06, "loss": 1.1126, "step": 17860 }, { "epoch": 0.76, "grad_norm": 20.692757428848292, "learning_rate": 9.312352359634002e-06, "loss": 1.1431, "step": 17865 }, { "epoch": 0.76, "grad_norm": 13.025039825363798, "learning_rate": 9.311729145900861e-06, "loss": 1.1445, "step": 17870 }, { "epoch": 0.76, "grad_norm": 12.413295935876926, "learning_rate": 9.31110567076028e-06, "loss": 1.113, "step": 17875 }, { "epoch": 0.76, "grad_norm": 10.707804017583332, "learning_rate": 9.310481934250061e-06, "loss": 1.0626, "step": 17880 }, { "epoch": 0.76, "grad_norm": 7.335946387506712, "learning_rate": 9.309857936408021e-06, "loss": 1.08, "step": 17885 }, { "epoch": 0.76, "grad_norm": 5.608756107676717, "learning_rate": 9.309233677271989e-06, "loss": 1.0948, "step": 17890 }, { "epoch": 0.76, "grad_norm": 7.91177670711054, "learning_rate": 9.308609156879811e-06, "loss": 1.0963, "step": 17895 }, { "epoch": 0.76, "grad_norm": 8.115951076738714, "learning_rate": 9.307984375269352e-06, "loss": 1.0847, "step": 17900 }, { "epoch": 0.76, "grad_norm": 8.004888216317523, "learning_rate": 9.30735933247849e-06, "loss": 1.1112, "step": 17905 }, { "epoch": 0.76, "grad_norm": 22.769500361730856, "learning_rate": 9.306734028545119e-06, "loss": 1.1411, "step": 17910 }, { "epoch": 0.76, "grad_norm": 9.246741595201495, "learning_rate": 9.306108463507151e-06, "loss": 1.165, "step": 17915 }, { "epoch": 0.76, "grad_norm": 60.33464362540984, "learning_rate": 9.30548263740251e-06, "loss": 1.0808, "step": 17920 }, { "epoch": 0.76, "grad_norm": 43.69616070366895, "learning_rate": 9.30485655026914e-06, "loss": 1.1266, "step": 17925 }, { "epoch": 0.76, "grad_norm": 10.321737392385257, "learning_rate": 9.304230202144994e-06, "loss": 1.0959, "step": 17930 }, { "epoch": 0.76, "grad_norm": 43.41819720045734, "learning_rate": 9.303603593068053e-06, "loss": 1.1409, "step": 17935 }, { "epoch": 0.76, "grad_norm": 24.649802928595093, "learning_rate": 9.302976723076302e-06, "loss": 1.1148, "step": 17940 }, { "epoch": 0.76, "grad_norm": 18.029746468154173, "learning_rate": 9.302349592207749e-06, "loss": 1.1472, "step": 17945 }, { "epoch": 0.76, "grad_norm": 14.193913968429644, "learning_rate": 9.30172220050041e-06, "loss": 1.0816, "step": 17950 }, { "epoch": 0.76, "grad_norm": 9.508118285996746, "learning_rate": 9.301094547992326e-06, "loss": 1.1818, "step": 17955 }, { "epoch": 0.76, "grad_norm": 20.527532773215444, "learning_rate": 9.30046663472155e-06, "loss": 1.0734, "step": 17960 }, { "epoch": 0.76, "grad_norm": 18.413200169623018, "learning_rate": 9.299838460726147e-06, "loss": 1.0533, "step": 17965 }, { "epoch": 0.76, "grad_norm": 14.572408066102856, "learning_rate": 9.299210026044206e-06, "loss": 1.1383, "step": 17970 }, { "epoch": 0.76, "grad_norm": 27.10646904666803, "learning_rate": 9.298581330713825e-06, "loss": 1.148, "step": 17975 }, { "epoch": 0.76, "grad_norm": 15.448729598628704, "learning_rate": 9.297952374773117e-06, "loss": 1.1288, "step": 17980 }, { "epoch": 0.76, "grad_norm": 19.19352992645366, "learning_rate": 9.29732315826022e-06, "loss": 1.1217, "step": 17985 }, { "epoch": 0.76, "grad_norm": 13.857559087635702, "learning_rate": 9.296693681213278e-06, "loss": 1.1038, "step": 17990 }, { "epoch": 0.76, "grad_norm": 21.89320667055709, "learning_rate": 9.296063943670451e-06, "loss": 1.1167, "step": 17995 }, { "epoch": 0.76, "grad_norm": 12.428263262811615, "learning_rate": 9.295433945669925e-06, "loss": 1.1003, "step": 18000 }, { "epoch": 0.76, "grad_norm": 7.786520722827463, "learning_rate": 9.29480368724989e-06, "loss": 1.1053, "step": 18005 }, { "epoch": 0.76, "grad_norm": 14.621900385283961, "learning_rate": 9.294173168448557e-06, "loss": 1.0974, "step": 18010 }, { "epoch": 0.76, "grad_norm": 7.872821012468063, "learning_rate": 9.293542389304156e-06, "loss": 1.138, "step": 18015 }, { "epoch": 0.76, "grad_norm": 18.62667057385184, "learning_rate": 9.292911349854925e-06, "loss": 1.0886, "step": 18020 }, { "epoch": 0.76, "grad_norm": 16.427205921395075, "learning_rate": 9.292280050139127e-06, "loss": 1.0571, "step": 18025 }, { "epoch": 0.76, "grad_norm": 26.118521970745363, "learning_rate": 9.29164849019503e-06, "loss": 1.0896, "step": 18030 }, { "epoch": 0.76, "grad_norm": 5.594387110319337, "learning_rate": 9.291016670060927e-06, "loss": 1.064, "step": 18035 }, { "epoch": 0.76, "grad_norm": 18.444047145454256, "learning_rate": 9.290384589775124e-06, "loss": 1.0505, "step": 18040 }, { "epoch": 0.76, "grad_norm": 15.13698466135544, "learning_rate": 9.28975224937594e-06, "loss": 1.1057, "step": 18045 }, { "epoch": 0.76, "grad_norm": 7.143583582507327, "learning_rate": 9.289119648901713e-06, "loss": 1.0773, "step": 18050 }, { "epoch": 0.76, "grad_norm": 15.324132017195472, "learning_rate": 9.288486788390796e-06, "loss": 1.1165, "step": 18055 }, { "epoch": 0.76, "grad_norm": 19.686030221150965, "learning_rate": 9.287853667881558e-06, "loss": 1.1165, "step": 18060 }, { "epoch": 0.76, "grad_norm": 7.856307777689082, "learning_rate": 9.287220287412382e-06, "loss": 1.1115, "step": 18065 }, { "epoch": 0.76, "grad_norm": 5.529049466086411, "learning_rate": 9.286586647021667e-06, "loss": 1.1069, "step": 18070 }, { "epoch": 0.76, "grad_norm": 21.14855222012083, "learning_rate": 9.285952746747832e-06, "loss": 1.1055, "step": 18075 }, { "epoch": 0.77, "grad_norm": 14.488448627417618, "learning_rate": 9.285318586629307e-06, "loss": 1.1114, "step": 18080 }, { "epoch": 0.77, "grad_norm": 11.68412109571187, "learning_rate": 9.284684166704536e-06, "loss": 1.115, "step": 18085 }, { "epoch": 0.77, "grad_norm": 5.678861619744061, "learning_rate": 9.284049487011986e-06, "loss": 1.1153, "step": 18090 }, { "epoch": 0.77, "grad_norm": 12.751745063264902, "learning_rate": 9.283414547590136e-06, "loss": 1.1135, "step": 18095 }, { "epoch": 0.77, "grad_norm": 5.728169038157912, "learning_rate": 9.28277934847748e-06, "loss": 1.0642, "step": 18100 }, { "epoch": 0.77, "grad_norm": 13.563350595731961, "learning_rate": 9.282143889712525e-06, "loss": 1.1345, "step": 18105 }, { "epoch": 0.77, "grad_norm": 10.644341445123432, "learning_rate": 9.281508171333799e-06, "loss": 1.0736, "step": 18110 }, { "epoch": 0.77, "grad_norm": 6.201505795645917, "learning_rate": 9.280872193379846e-06, "loss": 1.1169, "step": 18115 }, { "epoch": 0.77, "grad_norm": 12.743236804679773, "learning_rate": 9.280235955889223e-06, "loss": 1.125, "step": 18120 }, { "epoch": 0.77, "grad_norm": 17.444599243677327, "learning_rate": 9.2795994589005e-06, "loss": 1.138, "step": 18125 }, { "epoch": 0.77, "grad_norm": 23.029678620204297, "learning_rate": 9.278962702452268e-06, "loss": 1.1331, "step": 18130 }, { "epoch": 0.77, "grad_norm": 8.891904219305596, "learning_rate": 9.27832568658313e-06, "loss": 1.1102, "step": 18135 }, { "epoch": 0.77, "grad_norm": 11.6712913782314, "learning_rate": 9.27768841133171e-06, "loss": 1.1113, "step": 18140 }, { "epoch": 0.77, "grad_norm": 19.658027741518705, "learning_rate": 9.27705087673664e-06, "loss": 1.118, "step": 18145 }, { "epoch": 0.77, "grad_norm": 7.549807127756542, "learning_rate": 9.276413082836577e-06, "loss": 1.118, "step": 18150 }, { "epoch": 0.77, "grad_norm": 20.480539715359487, "learning_rate": 9.275775029670182e-06, "loss": 1.1275, "step": 18155 }, { "epoch": 0.77, "grad_norm": 12.764272951596997, "learning_rate": 9.275136717276143e-06, "loss": 1.1518, "step": 18160 }, { "epoch": 0.77, "grad_norm": 6.756798278971463, "learning_rate": 9.274498145693159e-06, "loss": 1.0684, "step": 18165 }, { "epoch": 0.77, "grad_norm": 32.97820281601068, "learning_rate": 9.27385931495994e-06, "loss": 1.1132, "step": 18170 }, { "epoch": 0.77, "grad_norm": 28.049341545594245, "learning_rate": 9.273220225115222e-06, "loss": 1.1441, "step": 18175 }, { "epoch": 0.77, "grad_norm": 53.46882825394066, "learning_rate": 9.27258087619775e-06, "loss": 1.1038, "step": 18180 }, { "epoch": 0.77, "grad_norm": 51.659466178457414, "learning_rate": 9.271941268246284e-06, "loss": 1.1818, "step": 18185 }, { "epoch": 0.77, "grad_norm": 32.20468389648628, "learning_rate": 9.2713014012996e-06, "loss": 1.1615, "step": 18190 }, { "epoch": 0.77, "grad_norm": 25.027750831389525, "learning_rate": 9.270661275396496e-06, "loss": 1.1027, "step": 18195 }, { "epoch": 0.77, "grad_norm": 11.19228217314939, "learning_rate": 9.270020890575778e-06, "loss": 1.182, "step": 18200 }, { "epoch": 0.77, "grad_norm": 11.256715054942326, "learning_rate": 9.269380246876272e-06, "loss": 1.0932, "step": 18205 }, { "epoch": 0.77, "grad_norm": 12.695021259289096, "learning_rate": 9.268739344336816e-06, "loss": 1.0704, "step": 18210 }, { "epoch": 0.77, "grad_norm": 8.134439070614352, "learning_rate": 9.268098182996267e-06, "loss": 1.0791, "step": 18215 }, { "epoch": 0.77, "grad_norm": 6.478176467403107, "learning_rate": 9.267456762893499e-06, "loss": 1.1099, "step": 18220 }, { "epoch": 0.77, "grad_norm": 7.418045680315594, "learning_rate": 9.266815084067398e-06, "loss": 1.1563, "step": 18225 }, { "epoch": 0.77, "grad_norm": 7.349754864637389, "learning_rate": 9.266173146556865e-06, "loss": 1.1304, "step": 18230 }, { "epoch": 0.77, "grad_norm": 7.6400881312963085, "learning_rate": 9.265530950400821e-06, "loss": 1.1084, "step": 18235 }, { "epoch": 0.77, "grad_norm": 7.626035085964305, "learning_rate": 9.2648884956382e-06, "loss": 1.1198, "step": 18240 }, { "epoch": 0.77, "grad_norm": 5.461797984128544, "learning_rate": 9.264245782307953e-06, "loss": 1.1128, "step": 18245 }, { "epoch": 0.77, "grad_norm": 12.192555207213774, "learning_rate": 9.263602810449046e-06, "loss": 1.1169, "step": 18250 }, { "epoch": 0.77, "grad_norm": 8.466099159104695, "learning_rate": 9.262959580100457e-06, "loss": 1.1208, "step": 18255 }, { "epoch": 0.77, "grad_norm": 16.593427194263676, "learning_rate": 9.262316091301187e-06, "loss": 1.131, "step": 18260 }, { "epoch": 0.77, "grad_norm": 8.11305565293234, "learning_rate": 9.261672344090247e-06, "loss": 1.1044, "step": 18265 }, { "epoch": 0.77, "grad_norm": 19.64817614237251, "learning_rate": 9.261028338506666e-06, "loss": 1.094, "step": 18270 }, { "epoch": 0.77, "grad_norm": 7.723357452339682, "learning_rate": 9.260384074589487e-06, "loss": 1.153, "step": 18275 }, { "epoch": 0.77, "grad_norm": 6.534030160676935, "learning_rate": 9.259739552377773e-06, "loss": 1.0862, "step": 18280 }, { "epoch": 0.77, "grad_norm": 6.441695749826908, "learning_rate": 9.259094771910597e-06, "loss": 1.095, "step": 18285 }, { "epoch": 0.77, "grad_norm": 7.146424724688408, "learning_rate": 9.258449733227049e-06, "loss": 1.0893, "step": 18290 }, { "epoch": 0.77, "grad_norm": 8.34937160389928, "learning_rate": 9.257804436366239e-06, "loss": 1.0733, "step": 18295 }, { "epoch": 0.77, "grad_norm": 14.743914833467466, "learning_rate": 9.257158881367287e-06, "loss": 1.0894, "step": 18300 }, { "epoch": 0.77, "grad_norm": 9.70492924811333, "learning_rate": 9.256513068269333e-06, "loss": 1.0656, "step": 18305 }, { "epoch": 0.77, "grad_norm": 9.972679783154874, "learning_rate": 9.255866997111527e-06, "loss": 1.0959, "step": 18310 }, { "epoch": 0.78, "grad_norm": 20.10908837704645, "learning_rate": 9.255220667933044e-06, "loss": 1.1272, "step": 18315 }, { "epoch": 0.78, "grad_norm": 15.791800090881644, "learning_rate": 9.254574080773065e-06, "loss": 1.1172, "step": 18320 }, { "epoch": 0.78, "grad_norm": 20.716794303522093, "learning_rate": 9.25392723567079e-06, "loss": 1.1157, "step": 18325 }, { "epoch": 0.78, "grad_norm": 15.876879265665632, "learning_rate": 9.253280132665438e-06, "loss": 1.129, "step": 18330 }, { "epoch": 0.78, "grad_norm": 11.140608566368778, "learning_rate": 9.252632771796241e-06, "loss": 1.0718, "step": 18335 }, { "epoch": 0.78, "grad_norm": 7.32721571880933, "learning_rate": 9.251985153102445e-06, "loss": 1.0714, "step": 18340 }, { "epoch": 0.78, "grad_norm": 21.412824484047892, "learning_rate": 9.251337276623315e-06, "loss": 1.1693, "step": 18345 }, { "epoch": 0.78, "grad_norm": 13.576899680911131, "learning_rate": 9.250689142398128e-06, "loss": 1.1573, "step": 18350 }, { "epoch": 0.78, "grad_norm": 20.368065654779944, "learning_rate": 9.250040750466177e-06, "loss": 1.0656, "step": 18355 }, { "epoch": 0.78, "grad_norm": 20.024988350800516, "learning_rate": 9.249392100866777e-06, "loss": 1.1204, "step": 18360 }, { "epoch": 0.78, "grad_norm": 6.315975053603287, "learning_rate": 9.24874319363925e-06, "loss": 1.1216, "step": 18365 }, { "epoch": 0.78, "grad_norm": 5.061537153000435, "learning_rate": 9.248094028822938e-06, "loss": 1.0548, "step": 18370 }, { "epoch": 0.78, "grad_norm": 4.866519384445859, "learning_rate": 9.247444606457199e-06, "loss": 1.0836, "step": 18375 }, { "epoch": 0.78, "grad_norm": 8.840992088596085, "learning_rate": 9.246794926581404e-06, "loss": 1.1002, "step": 18380 }, { "epoch": 0.78, "grad_norm": 7.125959813886627, "learning_rate": 9.246144989234942e-06, "loss": 1.1379, "step": 18385 }, { "epoch": 0.78, "grad_norm": 6.093677475359922, "learning_rate": 9.245494794457216e-06, "loss": 1.1096, "step": 18390 }, { "epoch": 0.78, "grad_norm": 11.956324046401775, "learning_rate": 9.244844342287647e-06, "loss": 1.1054, "step": 18395 }, { "epoch": 0.78, "grad_norm": 7.923510744953704, "learning_rate": 9.24419363276567e-06, "loss": 1.0951, "step": 18400 }, { "epoch": 0.78, "grad_norm": 14.04440030568993, "learning_rate": 9.243542665930734e-06, "loss": 1.1066, "step": 18405 }, { "epoch": 0.78, "grad_norm": 5.648409708461334, "learning_rate": 9.242891441822304e-06, "loss": 1.0764, "step": 18410 }, { "epoch": 0.78, "grad_norm": 6.341981280631778, "learning_rate": 9.242239960479866e-06, "loss": 1.0876, "step": 18415 }, { "epoch": 0.78, "grad_norm": 7.1259447557074305, "learning_rate": 9.241588221942914e-06, "loss": 1.069, "step": 18420 }, { "epoch": 0.78, "grad_norm": 5.085677030674877, "learning_rate": 9.240936226250962e-06, "loss": 1.0434, "step": 18425 }, { "epoch": 0.78, "grad_norm": 5.735618808712853, "learning_rate": 9.24028397344354e-06, "loss": 1.13, "step": 18430 }, { "epoch": 0.78, "grad_norm": 5.742183155115284, "learning_rate": 9.23963146356019e-06, "loss": 1.1425, "step": 18435 }, { "epoch": 0.78, "grad_norm": 6.034925119789757, "learning_rate": 9.238978696640473e-06, "loss": 1.1215, "step": 18440 }, { "epoch": 0.78, "grad_norm": 5.957544218782891, "learning_rate": 9.238325672723964e-06, "loss": 1.1179, "step": 18445 }, { "epoch": 0.78, "grad_norm": 7.0995740054848255, "learning_rate": 9.237672391850255e-06, "loss": 1.1248, "step": 18450 }, { "epoch": 0.78, "grad_norm": 9.46097926388208, "learning_rate": 9.237018854058949e-06, "loss": 1.0863, "step": 18455 }, { "epoch": 0.78, "grad_norm": 6.3683893659291035, "learning_rate": 9.236365059389672e-06, "loss": 1.1092, "step": 18460 }, { "epoch": 0.78, "grad_norm": 5.614740256304254, "learning_rate": 9.235711007882057e-06, "loss": 1.0744, "step": 18465 }, { "epoch": 0.78, "grad_norm": 18.380904701796076, "learning_rate": 9.235056699575763e-06, "loss": 1.1049, "step": 18470 }, { "epoch": 0.78, "grad_norm": 5.530310889229228, "learning_rate": 9.234402134510456e-06, "loss": 1.1031, "step": 18475 }, { "epoch": 0.78, "grad_norm": 11.240197606016899, "learning_rate": 9.23374731272582e-06, "loss": 1.1016, "step": 18480 }, { "epoch": 0.78, "grad_norm": 16.227232089645792, "learning_rate": 9.233092234261557e-06, "loss": 1.1076, "step": 18485 }, { "epoch": 0.78, "grad_norm": 4.960853602955756, "learning_rate": 9.23243689915738e-06, "loss": 1.1437, "step": 18490 }, { "epoch": 0.78, "grad_norm": 11.844782647036462, "learning_rate": 9.231781307453019e-06, "loss": 1.0723, "step": 18495 }, { "epoch": 0.78, "grad_norm": 22.168588671403622, "learning_rate": 9.231125459188223e-06, "loss": 1.104, "step": 18500 }, { "epoch": 0.78, "grad_norm": 12.297962124650041, "learning_rate": 9.230469354402755e-06, "loss": 1.1606, "step": 18505 }, { "epoch": 0.78, "grad_norm": 6.858954595797314, "learning_rate": 9.229812993136392e-06, "loss": 1.0898, "step": 18510 }, { "epoch": 0.78, "grad_norm": 7.039162007908019, "learning_rate": 9.229156375428925e-06, "loss": 1.0209, "step": 18515 }, { "epoch": 0.78, "grad_norm": 14.488953158080891, "learning_rate": 9.228499501320164e-06, "loss": 1.0841, "step": 18520 }, { "epoch": 0.78, "grad_norm": 6.901604927770394, "learning_rate": 9.227842370849933e-06, "loss": 1.0845, "step": 18525 }, { "epoch": 0.78, "grad_norm": 15.274488941514678, "learning_rate": 9.227184984058075e-06, "loss": 1.048, "step": 18530 }, { "epoch": 0.78, "grad_norm": 7.215552861968598, "learning_rate": 9.226527340984442e-06, "loss": 1.0646, "step": 18535 }, { "epoch": 0.78, "grad_norm": 5.669264228477738, "learning_rate": 9.225869441668906e-06, "loss": 1.1157, "step": 18540 }, { "epoch": 0.78, "grad_norm": 8.411649102171838, "learning_rate": 9.225211286151353e-06, "loss": 1.1102, "step": 18545 }, { "epoch": 0.79, "grad_norm": 8.654181227317375, "learning_rate": 9.224552874471685e-06, "loss": 1.098, "step": 18550 }, { "epoch": 0.79, "grad_norm": 10.102364558992827, "learning_rate": 9.22389420666982e-06, "loss": 1.0893, "step": 18555 }, { "epoch": 0.79, "grad_norm": 6.382254349643196, "learning_rate": 9.223235282785692e-06, "loss": 1.104, "step": 18560 }, { "epoch": 0.79, "grad_norm": 11.827504129215075, "learning_rate": 9.22257610285925e-06, "loss": 1.0865, "step": 18565 }, { "epoch": 0.79, "grad_norm": 11.825696764593685, "learning_rate": 9.221916666930455e-06, "loss": 1.1063, "step": 18570 }, { "epoch": 0.79, "grad_norm": 26.225003421671392, "learning_rate": 9.22125697503929e-06, "loss": 1.1692, "step": 18575 }, { "epoch": 0.79, "grad_norm": 23.087112157295447, "learning_rate": 9.220597027225746e-06, "loss": 1.1267, "step": 18580 }, { "epoch": 0.79, "grad_norm": 9.137806415286787, "learning_rate": 9.219936823529838e-06, "loss": 1.1274, "step": 18585 }, { "epoch": 0.79, "grad_norm": 30.88384659808171, "learning_rate": 9.219276363991592e-06, "loss": 1.1425, "step": 18590 }, { "epoch": 0.79, "grad_norm": 18.257608901237255, "learning_rate": 9.218615648651046e-06, "loss": 1.1239, "step": 18595 }, { "epoch": 0.79, "grad_norm": 14.841531358854471, "learning_rate": 9.217954677548261e-06, "loss": 1.1022, "step": 18600 }, { "epoch": 0.79, "grad_norm": 30.428804838114356, "learning_rate": 9.217293450723307e-06, "loss": 1.1175, "step": 18605 }, { "epoch": 0.79, "grad_norm": 22.431269080361552, "learning_rate": 9.216631968216275e-06, "loss": 1.0723, "step": 18610 }, { "epoch": 0.79, "grad_norm": 9.17281384190824, "learning_rate": 9.215970230067268e-06, "loss": 1.0997, "step": 18615 }, { "epoch": 0.79, "grad_norm": 5.9231234344069685, "learning_rate": 9.215308236316402e-06, "loss": 1.0959, "step": 18620 }, { "epoch": 0.79, "grad_norm": 8.33253390344492, "learning_rate": 9.214645987003817e-06, "loss": 1.1546, "step": 18625 }, { "epoch": 0.79, "grad_norm": 10.182887200506652, "learning_rate": 9.213983482169658e-06, "loss": 1.1178, "step": 18630 }, { "epoch": 0.79, "grad_norm": 18.184168576979193, "learning_rate": 9.213320721854095e-06, "loss": 1.1034, "step": 18635 }, { "epoch": 0.79, "grad_norm": 5.343857355602296, "learning_rate": 9.212657706097305e-06, "loss": 1.095, "step": 18640 }, { "epoch": 0.79, "grad_norm": 10.258431794972262, "learning_rate": 9.21199443493949e-06, "loss": 1.0829, "step": 18645 }, { "epoch": 0.79, "grad_norm": 14.923175420362524, "learning_rate": 9.211330908420857e-06, "loss": 1.0947, "step": 18650 }, { "epoch": 0.79, "grad_norm": 34.55885679530513, "learning_rate": 9.210667126581636e-06, "loss": 1.1119, "step": 18655 }, { "epoch": 0.79, "grad_norm": 26.29222103457184, "learning_rate": 9.210003089462071e-06, "loss": 1.1027, "step": 18660 }, { "epoch": 0.79, "grad_norm": 39.09497882226064, "learning_rate": 9.20933879710242e-06, "loss": 1.1229, "step": 18665 }, { "epoch": 0.79, "grad_norm": 22.170186193514663, "learning_rate": 9.208674249542956e-06, "loss": 1.0841, "step": 18670 }, { "epoch": 0.79, "grad_norm": 11.492904018642998, "learning_rate": 9.20800944682397e-06, "loss": 1.0961, "step": 18675 }, { "epoch": 0.79, "grad_norm": 8.959205187086468, "learning_rate": 9.207344388985766e-06, "loss": 1.096, "step": 18680 }, { "epoch": 0.79, "grad_norm": 6.470877767454973, "learning_rate": 9.206679076068665e-06, "loss": 1.1152, "step": 18685 }, { "epoch": 0.79, "grad_norm": 19.487070228600583, "learning_rate": 9.206013508113e-06, "loss": 1.1321, "step": 18690 }, { "epoch": 0.79, "grad_norm": 19.664031635949794, "learning_rate": 9.20534768515913e-06, "loss": 1.1228, "step": 18695 }, { "epoch": 0.79, "grad_norm": 16.076270648885565, "learning_rate": 9.204681607247415e-06, "loss": 1.0941, "step": 18700 }, { "epoch": 0.79, "grad_norm": 10.218854558319101, "learning_rate": 9.204015274418239e-06, "loss": 1.1111, "step": 18705 }, { "epoch": 0.79, "grad_norm": 13.939959121000188, "learning_rate": 9.203348686712e-06, "loss": 1.0835, "step": 18710 }, { "epoch": 0.79, "grad_norm": 28.39451363343461, "learning_rate": 9.202681844169112e-06, "loss": 1.0916, "step": 18715 }, { "epoch": 0.79, "grad_norm": 14.97537603596154, "learning_rate": 9.202014746830004e-06, "loss": 1.065, "step": 18720 }, { "epoch": 0.79, "grad_norm": 36.05035382625349, "learning_rate": 9.201347394735119e-06, "loss": 1.1081, "step": 18725 }, { "epoch": 0.79, "grad_norm": 37.27792781835876, "learning_rate": 9.200679787924915e-06, "loss": 1.0945, "step": 18730 }, { "epoch": 0.79, "grad_norm": 26.38255343026431, "learning_rate": 9.200011926439872e-06, "loss": 1.1013, "step": 18735 }, { "epoch": 0.79, "grad_norm": 31.212573738405045, "learning_rate": 9.199343810320474e-06, "loss": 1.0924, "step": 18740 }, { "epoch": 0.79, "grad_norm": 23.260054360062796, "learning_rate": 9.198675439607232e-06, "loss": 1.1118, "step": 18745 }, { "epoch": 0.79, "grad_norm": 16.96758238167588, "learning_rate": 9.198006814340666e-06, "loss": 1.0758, "step": 18750 }, { "epoch": 0.79, "grad_norm": 18.199432752815447, "learning_rate": 9.197337934561312e-06, "loss": 1.1021, "step": 18755 }, { "epoch": 0.79, "grad_norm": 8.77019913257233, "learning_rate": 9.196668800309724e-06, "loss": 1.1559, "step": 18760 }, { "epoch": 0.79, "grad_norm": 7.660052237914633, "learning_rate": 9.195999411626467e-06, "loss": 1.0785, "step": 18765 }, { "epoch": 0.79, "grad_norm": 7.406555244894811, "learning_rate": 9.195329768552125e-06, "loss": 1.0345, "step": 18770 }, { "epoch": 0.79, "grad_norm": 7.56550525952078, "learning_rate": 9.194659871127299e-06, "loss": 1.1117, "step": 18775 }, { "epoch": 0.79, "grad_norm": 6.169526120138727, "learning_rate": 9.193989719392599e-06, "loss": 1.0913, "step": 18780 }, { "epoch": 0.8, "grad_norm": 6.14875418458956, "learning_rate": 9.193319313388657e-06, "loss": 1.0948, "step": 18785 }, { "epoch": 0.8, "grad_norm": 5.59929653686058, "learning_rate": 9.192648653156115e-06, "loss": 1.0959, "step": 18790 }, { "epoch": 0.8, "grad_norm": 5.401496333026, "learning_rate": 9.191977738735638e-06, "loss": 1.1397, "step": 18795 }, { "epoch": 0.8, "grad_norm": 6.36854464421448, "learning_rate": 9.191306570167897e-06, "loss": 1.0903, "step": 18800 }, { "epoch": 0.8, "grad_norm": 9.888985182158882, "learning_rate": 9.190635147493583e-06, "loss": 1.1292, "step": 18805 }, { "epoch": 0.8, "grad_norm": 15.694279392670527, "learning_rate": 9.189963470753406e-06, "loss": 1.0974, "step": 18810 }, { "epoch": 0.8, "grad_norm": 12.549628981745615, "learning_rate": 9.189291539988085e-06, "loss": 1.0868, "step": 18815 }, { "epoch": 0.8, "grad_norm": 7.924012018978287, "learning_rate": 9.18861935523836e-06, "loss": 1.064, "step": 18820 }, { "epoch": 0.8, "grad_norm": 7.163714404107731, "learning_rate": 9.187946916544979e-06, "loss": 1.0679, "step": 18825 }, { "epoch": 0.8, "grad_norm": 11.562522128754315, "learning_rate": 9.187274223948713e-06, "loss": 1.1086, "step": 18830 }, { "epoch": 0.8, "grad_norm": 8.356125579253188, "learning_rate": 9.186601277490345e-06, "loss": 1.0623, "step": 18835 }, { "epoch": 0.8, "grad_norm": 6.4770963823764545, "learning_rate": 9.185928077210673e-06, "loss": 1.0885, "step": 18840 }, { "epoch": 0.8, "grad_norm": 7.764008627091845, "learning_rate": 9.185254623150512e-06, "loss": 1.0774, "step": 18845 }, { "epoch": 0.8, "grad_norm": 9.253763251913796, "learning_rate": 9.184580915350692e-06, "loss": 1.092, "step": 18850 }, { "epoch": 0.8, "grad_norm": 8.89590025114452, "learning_rate": 9.183906953852055e-06, "loss": 1.0722, "step": 18855 }, { "epoch": 0.8, "grad_norm": 5.4778403355398195, "learning_rate": 9.183232738695466e-06, "loss": 1.0935, "step": 18860 }, { "epoch": 0.8, "grad_norm": 9.940980094998093, "learning_rate": 9.182558269921798e-06, "loss": 1.0745, "step": 18865 }, { "epoch": 0.8, "grad_norm": 10.440141846582979, "learning_rate": 9.181883547571941e-06, "loss": 1.0837, "step": 18870 }, { "epoch": 0.8, "grad_norm": 6.5994434274352765, "learning_rate": 9.181208571686803e-06, "loss": 1.1118, "step": 18875 }, { "epoch": 0.8, "grad_norm": 6.011216745860771, "learning_rate": 9.180533342307305e-06, "loss": 1.1157, "step": 18880 }, { "epoch": 0.8, "grad_norm": 8.521502193027965, "learning_rate": 9.179857859474386e-06, "loss": 1.0823, "step": 18885 }, { "epoch": 0.8, "grad_norm": 9.346760599258086, "learning_rate": 9.179182123228997e-06, "loss": 1.063, "step": 18890 }, { "epoch": 0.8, "grad_norm": 7.878544302423429, "learning_rate": 9.178506133612105e-06, "loss": 1.1045, "step": 18895 }, { "epoch": 0.8, "grad_norm": 8.178471866740171, "learning_rate": 9.177829890664695e-06, "loss": 1.0584, "step": 18900 }, { "epoch": 0.8, "grad_norm": 14.241658516893521, "learning_rate": 9.177153394427765e-06, "loss": 1.0686, "step": 18905 }, { "epoch": 0.8, "grad_norm": 17.54081778180161, "learning_rate": 9.176476644942328e-06, "loss": 1.0617, "step": 18910 }, { "epoch": 0.8, "grad_norm": 6.572734515834037, "learning_rate": 9.175799642249415e-06, "loss": 1.0825, "step": 18915 }, { "epoch": 0.8, "grad_norm": 9.223257770848594, "learning_rate": 9.175122386390069e-06, "loss": 1.0758, "step": 18920 }, { "epoch": 0.8, "grad_norm": 6.588816346488197, "learning_rate": 9.174444877405353e-06, "loss": 1.0816, "step": 18925 }, { "epoch": 0.8, "grad_norm": 13.27215080545339, "learning_rate": 9.173767115336338e-06, "loss": 1.1355, "step": 18930 }, { "epoch": 0.8, "grad_norm": 6.464572280662654, "learning_rate": 9.173089100224118e-06, "loss": 1.1102, "step": 18935 }, { "epoch": 0.8, "grad_norm": 9.647966082272436, "learning_rate": 9.1724108321098e-06, "loss": 1.0575, "step": 18940 }, { "epoch": 0.8, "grad_norm": 7.249108019532874, "learning_rate": 9.1717323110345e-06, "loss": 1.1121, "step": 18945 }, { "epoch": 0.8, "grad_norm": 14.032627924416587, "learning_rate": 9.17105353703936e-06, "loss": 1.0519, "step": 18950 }, { "epoch": 0.8, "grad_norm": 14.13910179747161, "learning_rate": 9.17037451016553e-06, "loss": 1.0683, "step": 18955 }, { "epoch": 0.8, "grad_norm": 10.344457997778898, "learning_rate": 9.16969523045418e-06, "loss": 1.0494, "step": 18960 }, { "epoch": 0.8, "grad_norm": 5.207826443825296, "learning_rate": 9.169015697946486e-06, "loss": 1.0673, "step": 18965 }, { "epoch": 0.8, "grad_norm": 6.292452697401298, "learning_rate": 9.168335912683653e-06, "loss": 1.1212, "step": 18970 }, { "epoch": 0.8, "grad_norm": 7.76765548899204, "learning_rate": 9.167655874706891e-06, "loss": 1.0713, "step": 18975 }, { "epoch": 0.8, "grad_norm": 5.915497305254457, "learning_rate": 9.166975584057432e-06, "loss": 1.0762, "step": 18980 }, { "epoch": 0.8, "grad_norm": 15.030166307519531, "learning_rate": 9.166295040776514e-06, "loss": 1.1023, "step": 18985 }, { "epoch": 0.8, "grad_norm": 8.328793277685982, "learning_rate": 9.165614244905402e-06, "loss": 1.1102, "step": 18990 }, { "epoch": 0.8, "grad_norm": 5.164809125917204, "learning_rate": 9.164933196485368e-06, "loss": 1.0572, "step": 18995 }, { "epoch": 0.8, "grad_norm": 10.264076581011489, "learning_rate": 9.164251895557704e-06, "loss": 1.0856, "step": 19000 }, { "epoch": 0.8, "grad_norm": 19.88653791877927, "learning_rate": 9.163570342163712e-06, "loss": 1.0841, "step": 19005 }, { "epoch": 0.8, "grad_norm": 5.56257230491043, "learning_rate": 9.162888536344714e-06, "loss": 1.0661, "step": 19010 }, { "epoch": 0.8, "grad_norm": 6.829204000371574, "learning_rate": 9.162206478142047e-06, "loss": 1.1083, "step": 19015 }, { "epoch": 0.8, "grad_norm": 7.96648729986634, "learning_rate": 9.16152416759706e-06, "loss": 1.0787, "step": 19020 }, { "epoch": 0.81, "grad_norm": 8.823120734154049, "learning_rate": 9.160841604751124e-06, "loss": 1.0879, "step": 19025 }, { "epoch": 0.81, "grad_norm": 7.557988977111406, "learning_rate": 9.160158789645616e-06, "loss": 1.0861, "step": 19030 }, { "epoch": 0.81, "grad_norm": 6.323859105048502, "learning_rate": 9.159475722321934e-06, "loss": 1.0493, "step": 19035 }, { "epoch": 0.81, "grad_norm": 6.146001316945294, "learning_rate": 9.158792402821493e-06, "loss": 1.0749, "step": 19040 }, { "epoch": 0.81, "grad_norm": 10.444302499400449, "learning_rate": 9.158108831185716e-06, "loss": 1.1309, "step": 19045 }, { "epoch": 0.81, "grad_norm": 27.666702919519214, "learning_rate": 9.157425007456051e-06, "loss": 1.0981, "step": 19050 }, { "epoch": 0.81, "grad_norm": 12.295897843798757, "learning_rate": 9.156740931673954e-06, "loss": 1.0839, "step": 19055 }, { "epoch": 0.81, "grad_norm": 18.243577534648047, "learning_rate": 9.156056603880896e-06, "loss": 1.136, "step": 19060 }, { "epoch": 0.81, "grad_norm": 10.791910175421773, "learning_rate": 9.15537202411837e-06, "loss": 1.0691, "step": 19065 }, { "epoch": 0.81, "grad_norm": 8.836083884623443, "learning_rate": 9.154687192427878e-06, "loss": 1.1183, "step": 19070 }, { "epoch": 0.81, "grad_norm": 4.678385330915157, "learning_rate": 9.154002108850938e-06, "loss": 1.1134, "step": 19075 }, { "epoch": 0.81, "grad_norm": 6.305402511879571, "learning_rate": 9.153316773429089e-06, "loss": 1.1513, "step": 19080 }, { "epoch": 0.81, "grad_norm": 18.107870763211942, "learning_rate": 9.152631186203877e-06, "loss": 1.1157, "step": 19085 }, { "epoch": 0.81, "grad_norm": 33.44419033884193, "learning_rate": 9.151945347216867e-06, "loss": 1.0885, "step": 19090 }, { "epoch": 0.81, "grad_norm": 17.62082181138283, "learning_rate": 9.151259256509641e-06, "loss": 1.09, "step": 19095 }, { "epoch": 0.81, "grad_norm": 15.11849150910975, "learning_rate": 9.150572914123795e-06, "loss": 1.079, "step": 19100 }, { "epoch": 0.81, "grad_norm": 14.763376625110595, "learning_rate": 9.149886320100939e-06, "loss": 1.1357, "step": 19105 }, { "epoch": 0.81, "grad_norm": 6.495754179251353, "learning_rate": 9.1491994744827e-06, "loss": 1.0966, "step": 19110 }, { "epoch": 0.81, "grad_norm": 9.86885507172298, "learning_rate": 9.148512377310716e-06, "loss": 1.086, "step": 19115 }, { "epoch": 0.81, "grad_norm": 9.035546965614941, "learning_rate": 9.14782502862665e-06, "loss": 1.0783, "step": 19120 }, { "epoch": 0.81, "grad_norm": 7.639223711428465, "learning_rate": 9.147137428472167e-06, "loss": 1.0931, "step": 19125 }, { "epoch": 0.81, "grad_norm": 12.007655885003556, "learning_rate": 9.146449576888958e-06, "loss": 1.0942, "step": 19130 }, { "epoch": 0.81, "grad_norm": 16.38938102398451, "learning_rate": 9.145761473918727e-06, "loss": 1.0962, "step": 19135 }, { "epoch": 0.81, "grad_norm": 30.007942150758492, "learning_rate": 9.145073119603188e-06, "loss": 1.0505, "step": 19140 }, { "epoch": 0.81, "grad_norm": 12.754590507173786, "learning_rate": 9.144384513984076e-06, "loss": 1.099, "step": 19145 }, { "epoch": 0.81, "grad_norm": 5.650281206295421, "learning_rate": 9.14369565710314e-06, "loss": 1.1044, "step": 19150 }, { "epoch": 0.81, "grad_norm": 5.2447836248349216, "learning_rate": 9.14300654900214e-06, "loss": 1.1031, "step": 19155 }, { "epoch": 0.81, "grad_norm": 5.459504512729552, "learning_rate": 9.142317189722856e-06, "loss": 1.0634, "step": 19160 }, { "epoch": 0.81, "grad_norm": 11.093171162012137, "learning_rate": 9.141627579307084e-06, "loss": 1.1065, "step": 19165 }, { "epoch": 0.81, "grad_norm": 6.419564898508516, "learning_rate": 9.140937717796631e-06, "loss": 1.0867, "step": 19170 }, { "epoch": 0.81, "grad_norm": 11.544563631803051, "learning_rate": 9.140247605233321e-06, "loss": 1.1049, "step": 19175 }, { "epoch": 0.81, "grad_norm": 5.777214234831718, "learning_rate": 9.139557241658995e-06, "loss": 1.1437, "step": 19180 }, { "epoch": 0.81, "grad_norm": 17.913400095080576, "learning_rate": 9.138866627115507e-06, "loss": 1.0458, "step": 19185 }, { "epoch": 0.81, "grad_norm": 22.401171282603954, "learning_rate": 9.138175761644726e-06, "loss": 1.0968, "step": 19190 }, { "epoch": 0.81, "grad_norm": 11.235794915938502, "learning_rate": 9.137484645288539e-06, "loss": 1.0726, "step": 19195 }, { "epoch": 0.81, "grad_norm": 10.814588677429702, "learning_rate": 9.136793278088844e-06, "loss": 1.0953, "step": 19200 }, { "epoch": 0.81, "grad_norm": 12.83755866548187, "learning_rate": 9.13610166008756e-06, "loss": 1.0906, "step": 19205 }, { "epoch": 0.81, "grad_norm": 6.309167887529496, "learning_rate": 9.135409791326614e-06, "loss": 1.0794, "step": 19210 }, { "epoch": 0.81, "grad_norm": 7.009610075576476, "learning_rate": 9.134717671847954e-06, "loss": 1.0954, "step": 19215 }, { "epoch": 0.81, "grad_norm": 6.613001490424345, "learning_rate": 9.134025301693542e-06, "loss": 1.0821, "step": 19220 }, { "epoch": 0.81, "grad_norm": 14.780312253020812, "learning_rate": 9.13333268090535e-06, "loss": 1.078, "step": 19225 }, { "epoch": 0.81, "grad_norm": 5.60593018369186, "learning_rate": 9.132639809525374e-06, "loss": 1.0939, "step": 19230 }, { "epoch": 0.81, "grad_norm": 13.688194726528058, "learning_rate": 9.131946687595621e-06, "loss": 1.0813, "step": 19235 }, { "epoch": 0.81, "grad_norm": 5.7923322045800605, "learning_rate": 9.13125331515811e-06, "loss": 1.0552, "step": 19240 }, { "epoch": 0.81, "grad_norm": 8.905352240938075, "learning_rate": 9.13055969225488e-06, "loss": 1.0717, "step": 19245 }, { "epoch": 0.81, "grad_norm": 8.973108039942938, "learning_rate": 9.129865818927984e-06, "loss": 1.1372, "step": 19250 }, { "epoch": 0.81, "grad_norm": 6.565336859234096, "learning_rate": 9.129171695219486e-06, "loss": 1.1252, "step": 19255 }, { "epoch": 0.82, "grad_norm": 5.9793779482690805, "learning_rate": 9.128477321171471e-06, "loss": 1.0295, "step": 19260 }, { "epoch": 0.82, "grad_norm": 8.301272087600875, "learning_rate": 9.127782696826037e-06, "loss": 1.0889, "step": 19265 }, { "epoch": 0.82, "grad_norm": 6.770130018522191, "learning_rate": 9.127087822225298e-06, "loss": 1.0691, "step": 19270 }, { "epoch": 0.82, "grad_norm": 5.107034374003805, "learning_rate": 9.126392697411382e-06, "loss": 1.0682, "step": 19275 }, { "epoch": 0.82, "grad_norm": 6.8254758260181845, "learning_rate": 9.12569732242643e-06, "loss": 1.0907, "step": 19280 }, { "epoch": 0.82, "grad_norm": 7.877748248639448, "learning_rate": 9.1250016973126e-06, "loss": 1.0949, "step": 19285 }, { "epoch": 0.82, "grad_norm": 4.994028524515401, "learning_rate": 9.124305822112071e-06, "loss": 1.015, "step": 19290 }, { "epoch": 0.82, "grad_norm": 6.6559920477563645, "learning_rate": 9.123609696867028e-06, "loss": 1.0665, "step": 19295 }, { "epoch": 0.82, "grad_norm": 18.045241127899516, "learning_rate": 9.122913321619675e-06, "loss": 1.0739, "step": 19300 }, { "epoch": 0.82, "grad_norm": 28.321087239813032, "learning_rate": 9.122216696412232e-06, "loss": 1.1056, "step": 19305 }, { "epoch": 0.82, "grad_norm": 13.012864156176525, "learning_rate": 9.121519821286933e-06, "loss": 1.0769, "step": 19310 }, { "epoch": 0.82, "grad_norm": 5.527751634950696, "learning_rate": 9.120822696286027e-06, "loss": 1.0878, "step": 19315 }, { "epoch": 0.82, "grad_norm": 24.561124908224215, "learning_rate": 9.12012532145178e-06, "loss": 1.1165, "step": 19320 }, { "epoch": 0.82, "grad_norm": 9.484752725806603, "learning_rate": 9.11942769682647e-06, "loss": 1.118, "step": 19325 }, { "epoch": 0.82, "grad_norm": 15.799356698049024, "learning_rate": 9.118729822452394e-06, "loss": 1.0865, "step": 19330 }, { "epoch": 0.82, "grad_norm": 8.404032264513745, "learning_rate": 9.11803169837186e-06, "loss": 1.114, "step": 19335 }, { "epoch": 0.82, "grad_norm": 17.759231298478277, "learning_rate": 9.117333324627195e-06, "loss": 1.0864, "step": 19340 }, { "epoch": 0.82, "grad_norm": 5.872268322535767, "learning_rate": 9.116634701260738e-06, "loss": 1.0893, "step": 19345 }, { "epoch": 0.82, "grad_norm": 5.282267041584997, "learning_rate": 9.115935828314847e-06, "loss": 1.1351, "step": 19350 }, { "epoch": 0.82, "grad_norm": 5.09022097777567, "learning_rate": 9.115236705831886e-06, "loss": 1.1151, "step": 19355 }, { "epoch": 0.82, "grad_norm": 8.97505691874872, "learning_rate": 9.11453733385425e-06, "loss": 1.0476, "step": 19360 }, { "epoch": 0.82, "grad_norm": 6.117871109799094, "learning_rate": 9.113837712424332e-06, "loss": 1.1249, "step": 19365 }, { "epoch": 0.82, "grad_norm": 6.911394135113546, "learning_rate": 9.113137841584552e-06, "loss": 1.1155, "step": 19370 }, { "epoch": 0.82, "grad_norm": 7.6286089657656575, "learning_rate": 9.112437721377341e-06, "loss": 1.0551, "step": 19375 }, { "epoch": 0.82, "grad_norm": 10.309863351619459, "learning_rate": 9.111737351845144e-06, "loss": 1.0989, "step": 19380 }, { "epoch": 0.82, "grad_norm": 10.530328281457148, "learning_rate": 9.111036733030425e-06, "loss": 1.0951, "step": 19385 }, { "epoch": 0.82, "grad_norm": 6.869816279715394, "learning_rate": 9.110335864975657e-06, "loss": 1.0407, "step": 19390 }, { "epoch": 0.82, "grad_norm": 7.611064080496048, "learning_rate": 9.109634747723331e-06, "loss": 1.0611, "step": 19395 }, { "epoch": 0.82, "grad_norm": 12.201274680216573, "learning_rate": 9.108933381315958e-06, "loss": 1.1019, "step": 19400 }, { "epoch": 0.82, "grad_norm": 24.447852592558526, "learning_rate": 9.108231765796055e-06, "loss": 1.0933, "step": 19405 }, { "epoch": 0.82, "grad_norm": 5.72430836714062, "learning_rate": 9.107529901206164e-06, "loss": 1.0837, "step": 19410 }, { "epoch": 0.82, "grad_norm": 8.772729618562487, "learning_rate": 9.106827787588832e-06, "loss": 1.091, "step": 19415 }, { "epoch": 0.82, "grad_norm": 20.059071865937053, "learning_rate": 9.106125424986629e-06, "loss": 1.1038, "step": 19420 }, { "epoch": 0.82, "grad_norm": 11.652340814006722, "learning_rate": 9.105422813442137e-06, "loss": 1.0728, "step": 19425 }, { "epoch": 0.82, "grad_norm": 34.18818336171334, "learning_rate": 9.104719952997953e-06, "loss": 1.1001, "step": 19430 }, { "epoch": 0.82, "grad_norm": 10.4046297526499, "learning_rate": 9.104016843696688e-06, "loss": 1.1397, "step": 19435 }, { "epoch": 0.82, "grad_norm": 17.37654636027216, "learning_rate": 9.103313485580972e-06, "loss": 1.0904, "step": 19440 }, { "epoch": 0.82, "grad_norm": 33.344432938612684, "learning_rate": 9.102609878693444e-06, "loss": 1.0952, "step": 19445 }, { "epoch": 0.82, "grad_norm": 11.74082419750613, "learning_rate": 9.101906023076765e-06, "loss": 1.0873, "step": 19450 }, { "epoch": 0.82, "grad_norm": 14.362731668300325, "learning_rate": 9.101201918773607e-06, "loss": 1.116, "step": 19455 }, { "epoch": 0.82, "grad_norm": 9.310362478294449, "learning_rate": 9.100497565826656e-06, "loss": 1.1179, "step": 19460 }, { "epoch": 0.82, "grad_norm": 17.532089238639685, "learning_rate": 9.099792964278616e-06, "loss": 1.0442, "step": 19465 }, { "epoch": 0.82, "grad_norm": 6.546423523225817, "learning_rate": 9.099088114172206e-06, "loss": 1.0908, "step": 19470 }, { "epoch": 0.82, "grad_norm": 13.724064341744679, "learning_rate": 9.098383015550155e-06, "loss": 1.0426, "step": 19475 }, { "epoch": 0.82, "grad_norm": 23.444118248180988, "learning_rate": 9.097677668455218e-06, "loss": 1.088, "step": 19480 }, { "epoch": 0.82, "grad_norm": 39.30190879515092, "learning_rate": 9.096972072930153e-06, "loss": 1.1005, "step": 19485 }, { "epoch": 0.82, "grad_norm": 28.292805688068835, "learning_rate": 9.096266229017738e-06, "loss": 1.0969, "step": 19490 }, { "epoch": 0.83, "grad_norm": 5.74188068243222, "learning_rate": 9.095560136760768e-06, "loss": 1.0869, "step": 19495 }, { "epoch": 0.83, "grad_norm": 12.535740606916772, "learning_rate": 9.09485379620205e-06, "loss": 1.0821, "step": 19500 }, { "epoch": 0.83, "grad_norm": 23.276572860000982, "learning_rate": 9.09414720738441e-06, "loss": 1.1266, "step": 19505 }, { "epoch": 0.83, "grad_norm": 40.002289558763835, "learning_rate": 9.093440370350682e-06, "loss": 1.0953, "step": 19510 }, { "epoch": 0.83, "grad_norm": 28.71328740336301, "learning_rate": 9.092733285143723e-06, "loss": 1.1166, "step": 19515 }, { "epoch": 0.83, "grad_norm": 45.9264411176706, "learning_rate": 9.092025951806402e-06, "loss": 1.1115, "step": 19520 }, { "epoch": 0.83, "grad_norm": 21.601372558443252, "learning_rate": 9.091318370381599e-06, "loss": 1.0908, "step": 19525 }, { "epoch": 0.83, "grad_norm": 27.49187266331318, "learning_rate": 9.090610540912214e-06, "loss": 1.0823, "step": 19530 }, { "epoch": 0.83, "grad_norm": 35.236884920197994, "learning_rate": 9.089902463441163e-06, "loss": 1.1576, "step": 19535 }, { "epoch": 0.83, "grad_norm": 26.254648166086003, "learning_rate": 9.089194138011372e-06, "loss": 1.0969, "step": 19540 }, { "epoch": 0.83, "grad_norm": 12.892745445502996, "learning_rate": 9.088485564665788e-06, "loss": 1.0708, "step": 19545 }, { "epoch": 0.83, "grad_norm": 8.370896317391928, "learning_rate": 9.087776743447364e-06, "loss": 1.0686, "step": 19550 }, { "epoch": 0.83, "grad_norm": 21.883055982097662, "learning_rate": 9.087067674399079e-06, "loss": 1.09, "step": 19555 }, { "epoch": 0.83, "grad_norm": 8.08360488336419, "learning_rate": 9.086358357563919e-06, "loss": 1.0829, "step": 19560 }, { "epoch": 0.83, "grad_norm": 6.738685106889233, "learning_rate": 9.085648792984887e-06, "loss": 1.0873, "step": 19565 }, { "epoch": 0.83, "grad_norm": 10.61050377511876, "learning_rate": 9.084938980705005e-06, "loss": 1.1301, "step": 19570 }, { "epoch": 0.83, "grad_norm": 9.007318275262184, "learning_rate": 9.084228920767306e-06, "loss": 1.0537, "step": 19575 }, { "epoch": 0.83, "grad_norm": 15.244836727040267, "learning_rate": 9.083518613214836e-06, "loss": 1.0601, "step": 19580 }, { "epoch": 0.83, "grad_norm": 5.324853940949712, "learning_rate": 9.082808058090662e-06, "loss": 1.1292, "step": 19585 }, { "epoch": 0.83, "grad_norm": 6.4246808006599485, "learning_rate": 9.082097255437863e-06, "loss": 1.0518, "step": 19590 }, { "epoch": 0.83, "grad_norm": 6.2451595886324975, "learning_rate": 9.08138620529953e-06, "loss": 1.0926, "step": 19595 }, { "epoch": 0.83, "grad_norm": 9.879622339407153, "learning_rate": 9.080674907718773e-06, "loss": 1.1288, "step": 19600 }, { "epoch": 0.83, "grad_norm": 13.63277475712674, "learning_rate": 9.079963362738717e-06, "loss": 1.0906, "step": 19605 }, { "epoch": 0.83, "grad_norm": 4.9457377690941655, "learning_rate": 9.0792515704025e-06, "loss": 1.0607, "step": 19610 }, { "epoch": 0.83, "grad_norm": 12.61193743839909, "learning_rate": 9.078539530753277e-06, "loss": 1.0933, "step": 19615 }, { "epoch": 0.83, "grad_norm": 7.934530538656906, "learning_rate": 9.077827243834216e-06, "loss": 1.0515, "step": 19620 }, { "epoch": 0.83, "grad_norm": 7.350044952936317, "learning_rate": 9.0771147096885e-06, "loss": 1.1146, "step": 19625 }, { "epoch": 0.83, "grad_norm": 9.394082976889703, "learning_rate": 9.076401928359328e-06, "loss": 1.0514, "step": 19630 }, { "epoch": 0.83, "grad_norm": 16.23560948344088, "learning_rate": 9.075688899889916e-06, "loss": 1.1166, "step": 19635 }, { "epoch": 0.83, "grad_norm": 5.958315161605933, "learning_rate": 9.074975624323491e-06, "loss": 1.0914, "step": 19640 }, { "epoch": 0.83, "grad_norm": 10.566861986679763, "learning_rate": 9.074262101703297e-06, "loss": 1.0859, "step": 19645 }, { "epoch": 0.83, "grad_norm": 5.632693449285916, "learning_rate": 9.073548332072592e-06, "loss": 1.0791, "step": 19650 }, { "epoch": 0.83, "grad_norm": 20.65064323140121, "learning_rate": 9.072834315474652e-06, "loss": 1.1455, "step": 19655 }, { "epoch": 0.83, "grad_norm": 24.610704855283625, "learning_rate": 9.072120051952765e-06, "loss": 1.1107, "step": 19660 }, { "epoch": 0.83, "grad_norm": 26.838465839568748, "learning_rate": 9.071405541550232e-06, "loss": 1.0736, "step": 19665 }, { "epoch": 0.83, "grad_norm": 18.03428719682385, "learning_rate": 9.070690784310375e-06, "loss": 1.1318, "step": 19670 }, { "epoch": 0.83, "grad_norm": 7.369698976471521, "learning_rate": 9.069975780276525e-06, "loss": 1.1233, "step": 19675 }, { "epoch": 0.83, "grad_norm": 5.880803190191005, "learning_rate": 9.069260529492034e-06, "loss": 1.1, "step": 19680 }, { "epoch": 0.83, "grad_norm": 6.2379545085251, "learning_rate": 9.068545032000262e-06, "loss": 1.0436, "step": 19685 }, { "epoch": 0.83, "grad_norm": 5.4150443652031015, "learning_rate": 9.06782928784459e-06, "loss": 1.0594, "step": 19690 }, { "epoch": 0.83, "grad_norm": 12.920069532461167, "learning_rate": 9.06711329706841e-06, "loss": 1.0735, "step": 19695 }, { "epoch": 0.83, "grad_norm": 11.10497312710696, "learning_rate": 9.06639705971513e-06, "loss": 1.1091, "step": 19700 }, { "epoch": 0.83, "grad_norm": 8.655612335696453, "learning_rate": 9.065680575828175e-06, "loss": 1.068, "step": 19705 }, { "epoch": 0.83, "grad_norm": 8.769799576207244, "learning_rate": 9.064963845450982e-06, "loss": 1.0921, "step": 19710 }, { "epoch": 0.83, "grad_norm": 12.515708396778855, "learning_rate": 9.064246868627007e-06, "loss": 1.1151, "step": 19715 }, { "epoch": 0.83, "grad_norm": 11.04131509292738, "learning_rate": 9.063529645399714e-06, "loss": 1.0905, "step": 19720 }, { "epoch": 0.83, "grad_norm": 10.315296274757085, "learning_rate": 9.062812175812588e-06, "loss": 1.0856, "step": 19725 }, { "epoch": 0.84, "grad_norm": 6.128517727264331, "learning_rate": 9.062094459909128e-06, "loss": 1.0619, "step": 19730 }, { "epoch": 0.84, "grad_norm": 8.66805833480779, "learning_rate": 9.061376497732845e-06, "loss": 1.1245, "step": 19735 }, { "epoch": 0.84, "grad_norm": 6.825044422790672, "learning_rate": 9.060658289327269e-06, "loss": 1.0863, "step": 19740 }, { "epoch": 0.84, "grad_norm": 6.590008143801267, "learning_rate": 9.059939834735943e-06, "loss": 1.0848, "step": 19745 }, { "epoch": 0.84, "grad_norm": 5.891054470172313, "learning_rate": 9.059221134002424e-06, "loss": 1.0208, "step": 19750 }, { "epoch": 0.84, "grad_norm": 10.079366340057518, "learning_rate": 9.058502187170282e-06, "loss": 1.0624, "step": 19755 }, { "epoch": 0.84, "grad_norm": 5.696786757151849, "learning_rate": 9.057782994283109e-06, "loss": 1.109, "step": 19760 }, { "epoch": 0.84, "grad_norm": 6.791281576320251, "learning_rate": 9.057063555384505e-06, "loss": 1.091, "step": 19765 }, { "epoch": 0.84, "grad_norm": 9.343303814871241, "learning_rate": 9.056343870518087e-06, "loss": 1.0976, "step": 19770 }, { "epoch": 0.84, "grad_norm": 6.42922021394102, "learning_rate": 9.05562393972749e-06, "loss": 1.1127, "step": 19775 }, { "epoch": 0.84, "grad_norm": 20.434826362852455, "learning_rate": 9.05490376305636e-06, "loss": 1.086, "step": 19780 }, { "epoch": 0.84, "grad_norm": 15.140866189177508, "learning_rate": 9.054183340548357e-06, "loss": 1.0985, "step": 19785 }, { "epoch": 0.84, "grad_norm": 11.213400280238245, "learning_rate": 9.053462672247162e-06, "loss": 1.1016, "step": 19790 }, { "epoch": 0.84, "grad_norm": 6.6912766315098215, "learning_rate": 9.052741758196463e-06, "loss": 1.0957, "step": 19795 }, { "epoch": 0.84, "grad_norm": 11.88233843925602, "learning_rate": 9.052020598439972e-06, "loss": 1.0864, "step": 19800 }, { "epoch": 0.84, "grad_norm": 11.95720025396798, "learning_rate": 9.051299193021405e-06, "loss": 1.1033, "step": 19805 }, { "epoch": 0.84, "grad_norm": 17.654101836644557, "learning_rate": 9.050577541984504e-06, "loss": 1.0988, "step": 19810 }, { "epoch": 0.84, "grad_norm": 10.800794635451995, "learning_rate": 9.049855645373015e-06, "loss": 1.104, "step": 19815 }, { "epoch": 0.84, "grad_norm": 31.641052757993517, "learning_rate": 9.049133503230708e-06, "loss": 1.0669, "step": 19820 }, { "epoch": 0.84, "grad_norm": 20.30315004564152, "learning_rate": 9.048411115601367e-06, "loss": 1.0696, "step": 19825 }, { "epoch": 0.84, "grad_norm": 44.01827957171962, "learning_rate": 9.047688482528783e-06, "loss": 1.1108, "step": 19830 }, { "epoch": 0.84, "grad_norm": 6.505357928869747, "learning_rate": 9.046965604056768e-06, "loss": 1.0876, "step": 19835 }, { "epoch": 0.84, "grad_norm": 9.48392857117296, "learning_rate": 9.046242480229151e-06, "loss": 1.0674, "step": 19840 }, { "epoch": 0.84, "grad_norm": 10.178370075808223, "learning_rate": 9.045519111089769e-06, "loss": 1.0723, "step": 19845 }, { "epoch": 0.84, "grad_norm": 21.580231628102435, "learning_rate": 9.044795496682482e-06, "loss": 1.0773, "step": 19850 }, { "epoch": 0.84, "grad_norm": 6.992579931934888, "learning_rate": 9.044071637051156e-06, "loss": 1.0985, "step": 19855 }, { "epoch": 0.84, "grad_norm": 21.68329726592509, "learning_rate": 9.043347532239682e-06, "loss": 1.034, "step": 19860 }, { "epoch": 0.84, "grad_norm": 5.202868556941622, "learning_rate": 9.042623182291955e-06, "loss": 1.0997, "step": 19865 }, { "epoch": 0.84, "grad_norm": 6.869264887140003, "learning_rate": 9.041898587251891e-06, "loss": 1.1441, "step": 19870 }, { "epoch": 0.84, "grad_norm": 6.605558241622164, "learning_rate": 9.041173747163423e-06, "loss": 1.1082, "step": 19875 }, { "epoch": 0.84, "grad_norm": 15.410529170905154, "learning_rate": 9.040448662070496e-06, "loss": 1.0713, "step": 19880 }, { "epoch": 0.84, "grad_norm": 21.61015827706873, "learning_rate": 9.039723332017066e-06, "loss": 1.1334, "step": 19885 }, { "epoch": 0.84, "grad_norm": 10.725539701215709, "learning_rate": 9.03899775704711e-06, "loss": 1.1025, "step": 19890 }, { "epoch": 0.84, "grad_norm": 9.885791292808605, "learning_rate": 9.038271937204617e-06, "loss": 1.0914, "step": 19895 }, { "epoch": 0.84, "grad_norm": 5.510368052497252, "learning_rate": 9.037545872533593e-06, "loss": 1.0794, "step": 19900 }, { "epoch": 0.84, "grad_norm": 5.672695980168514, "learning_rate": 9.036819563078057e-06, "loss": 1.0642, "step": 19905 }, { "epoch": 0.84, "grad_norm": 5.063951377464615, "learning_rate": 9.036093008882039e-06, "loss": 1.0824, "step": 19910 }, { "epoch": 0.84, "grad_norm": 22.049718130399935, "learning_rate": 9.035366209989592e-06, "loss": 1.092, "step": 19915 }, { "epoch": 0.84, "grad_norm": 27.384805349827268, "learning_rate": 9.034639166444776e-06, "loss": 1.1315, "step": 19920 }, { "epoch": 0.84, "grad_norm": 13.00327097821516, "learning_rate": 9.033911878291673e-06, "loss": 1.0554, "step": 19925 }, { "epoch": 0.84, "grad_norm": 13.525656956369206, "learning_rate": 9.033184345574375e-06, "loss": 1.1081, "step": 19930 }, { "epoch": 0.84, "grad_norm": 7.343269884581191, "learning_rate": 9.032456568336992e-06, "loss": 1.0505, "step": 19935 }, { "epoch": 0.84, "grad_norm": 6.742466878310966, "learning_rate": 9.031728546623644e-06, "loss": 1.106, "step": 19940 }, { "epoch": 0.84, "grad_norm": 10.827341983796607, "learning_rate": 9.031000280478471e-06, "loss": 1.1043, "step": 19945 }, { "epoch": 0.84, "grad_norm": 9.8023909340638, "learning_rate": 9.030271769945622e-06, "loss": 1.0994, "step": 19950 }, { "epoch": 0.84, "grad_norm": 8.330851482313204, "learning_rate": 9.029543015069268e-06, "loss": 1.0535, "step": 19955 }, { "epoch": 0.84, "grad_norm": 8.247238471473315, "learning_rate": 9.028814015893591e-06, "loss": 1.0763, "step": 19960 }, { "epoch": 0.84, "grad_norm": 31.42258428677921, "learning_rate": 9.02808477246279e-06, "loss": 1.1185, "step": 19965 }, { "epoch": 0.85, "grad_norm": 15.59514788468728, "learning_rate": 9.02735528482107e-06, "loss": 1.0878, "step": 19970 }, { "epoch": 0.85, "grad_norm": 13.624107785535747, "learning_rate": 9.026625553012664e-06, "loss": 1.0662, "step": 19975 }, { "epoch": 0.85, "grad_norm": 6.468863942961594, "learning_rate": 9.025895577081813e-06, "loss": 1.0798, "step": 19980 }, { "epoch": 0.85, "grad_norm": 6.473507673820163, "learning_rate": 9.02516535707277e-06, "loss": 1.0545, "step": 19985 }, { "epoch": 0.85, "grad_norm": 4.967272762152329, "learning_rate": 9.024434893029809e-06, "loss": 1.1045, "step": 19990 }, { "epoch": 0.85, "grad_norm": 6.0224164179803745, "learning_rate": 9.023704184997216e-06, "loss": 1.082, "step": 19995 }, { "epoch": 0.85, "grad_norm": 6.552637286883164, "learning_rate": 9.022973233019288e-06, "loss": 1.082, "step": 20000 }, { "epoch": 0.85, "grad_norm": 19.333768100992216, "learning_rate": 9.022242037140343e-06, "loss": 1.1083, "step": 20005 }, { "epoch": 0.85, "grad_norm": 5.640705155336257, "learning_rate": 9.021510597404712e-06, "loss": 1.028, "step": 20010 }, { "epoch": 0.85, "grad_norm": 9.2091170735482, "learning_rate": 9.020778913856738e-06, "loss": 1.1127, "step": 20015 }, { "epoch": 0.85, "grad_norm": 4.803134728674725, "learning_rate": 9.020046986540784e-06, "loss": 1.0781, "step": 20020 }, { "epoch": 0.85, "grad_norm": 6.9612602105252686, "learning_rate": 9.019314815501221e-06, "loss": 1.1101, "step": 20025 }, { "epoch": 0.85, "grad_norm": 5.757119428537681, "learning_rate": 9.01858240078244e-06, "loss": 1.1053, "step": 20030 }, { "epoch": 0.85, "grad_norm": 9.221655078730315, "learning_rate": 9.017849742428847e-06, "loss": 1.1389, "step": 20035 }, { "epoch": 0.85, "grad_norm": 6.128617971391468, "learning_rate": 9.017116840484858e-06, "loss": 1.0491, "step": 20040 }, { "epoch": 0.85, "grad_norm": 12.523177373523286, "learning_rate": 9.016383694994908e-06, "loss": 1.0592, "step": 20045 }, { "epoch": 0.85, "grad_norm": 12.85040321334905, "learning_rate": 9.015650306003445e-06, "loss": 1.0505, "step": 20050 }, { "epoch": 0.85, "grad_norm": 15.760789253857968, "learning_rate": 9.014916673554932e-06, "loss": 1.0888, "step": 20055 }, { "epoch": 0.85, "grad_norm": 14.141240520302501, "learning_rate": 9.014182797693847e-06, "loss": 1.102, "step": 20060 }, { "epoch": 0.85, "grad_norm": 5.243446148153748, "learning_rate": 9.013448678464685e-06, "loss": 1.0378, "step": 20065 }, { "epoch": 0.85, "grad_norm": 6.655297097676386, "learning_rate": 9.01271431591195e-06, "loss": 1.0683, "step": 20070 }, { "epoch": 0.85, "grad_norm": 9.262279623707647, "learning_rate": 9.011979710080168e-06, "loss": 1.0767, "step": 20075 }, { "epoch": 0.85, "grad_norm": 7.256080630066792, "learning_rate": 9.01124486101387e-06, "loss": 1.0688, "step": 20080 }, { "epoch": 0.85, "grad_norm": 6.997072070051807, "learning_rate": 9.010509768757615e-06, "loss": 1.0731, "step": 20085 }, { "epoch": 0.85, "grad_norm": 34.617390684592856, "learning_rate": 9.009774433355964e-06, "loss": 1.153, "step": 20090 }, { "epoch": 0.85, "grad_norm": 7.104900195519913, "learning_rate": 9.009038854853503e-06, "loss": 1.1183, "step": 20095 }, { "epoch": 0.85, "grad_norm": 18.819050127253945, "learning_rate": 9.008303033294823e-06, "loss": 1.0961, "step": 20100 }, { "epoch": 0.85, "grad_norm": 8.942577049123328, "learning_rate": 9.007566968724536e-06, "loss": 1.0912, "step": 20105 }, { "epoch": 0.85, "grad_norm": 13.93438485176529, "learning_rate": 9.00683066118727e-06, "loss": 1.0767, "step": 20110 }, { "epoch": 0.85, "grad_norm": 6.54712902837516, "learning_rate": 9.006094110727664e-06, "loss": 1.0883, "step": 20115 }, { "epoch": 0.85, "grad_norm": 5.759340423207265, "learning_rate": 9.005357317390372e-06, "loss": 1.0946, "step": 20120 }, { "epoch": 0.85, "grad_norm": 17.119935450790077, "learning_rate": 9.004620281220063e-06, "loss": 1.0717, "step": 20125 }, { "epoch": 0.85, "grad_norm": 21.515732978868932, "learning_rate": 9.003883002261423e-06, "loss": 1.0819, "step": 20130 }, { "epoch": 0.85, "grad_norm": 12.557007819034817, "learning_rate": 9.00314548055915e-06, "loss": 1.1214, "step": 20135 }, { "epoch": 0.85, "grad_norm": 7.591066386613737, "learning_rate": 9.002407716157959e-06, "loss": 1.0672, "step": 20140 }, { "epoch": 0.85, "grad_norm": 10.8559643287969, "learning_rate": 9.001669709102577e-06, "loss": 1.0787, "step": 20145 }, { "epoch": 0.85, "grad_norm": 10.420559727599148, "learning_rate": 9.000931459437749e-06, "loss": 1.0677, "step": 20150 }, { "epoch": 0.85, "grad_norm": 9.7387590482845, "learning_rate": 9.00019296720823e-06, "loss": 1.1189, "step": 20155 }, { "epoch": 0.85, "grad_norm": 6.497387769725192, "learning_rate": 8.999454232458795e-06, "loss": 1.1007, "step": 20160 }, { "epoch": 0.85, "grad_norm": 5.911551653629037, "learning_rate": 8.998715255234232e-06, "loss": 1.079, "step": 20165 }, { "epoch": 0.85, "grad_norm": 6.944551973676329, "learning_rate": 8.99797603557934e-06, "loss": 1.0517, "step": 20170 }, { "epoch": 0.85, "grad_norm": 5.304741232955825, "learning_rate": 8.997236573538937e-06, "loss": 1.1277, "step": 20175 }, { "epoch": 0.85, "grad_norm": 13.778028106667294, "learning_rate": 8.996496869157856e-06, "loss": 1.0798, "step": 20180 }, { "epoch": 0.85, "grad_norm": 9.231798086007403, "learning_rate": 8.995756922480942e-06, "loss": 1.0894, "step": 20185 }, { "epoch": 0.85, "grad_norm": 5.043254808534116, "learning_rate": 8.995016733553056e-06, "loss": 1.0831, "step": 20190 }, { "epoch": 0.85, "grad_norm": 7.879243494463987, "learning_rate": 8.994276302419072e-06, "loss": 1.0269, "step": 20195 }, { "epoch": 0.85, "grad_norm": 5.297548316870222, "learning_rate": 8.993535629123881e-06, "loss": 1.0558, "step": 20200 }, { "epoch": 0.86, "grad_norm": 9.946190612093227, "learning_rate": 8.992794713712389e-06, "loss": 1.1101, "step": 20205 }, { "epoch": 0.86, "grad_norm": 9.822172476018839, "learning_rate": 8.992053556229516e-06, "loss": 1.0688, "step": 20210 }, { "epoch": 0.86, "grad_norm": 7.79893351264903, "learning_rate": 8.991312156720193e-06, "loss": 1.0624, "step": 20215 }, { "epoch": 0.86, "grad_norm": 6.109727100432633, "learning_rate": 8.99057051522937e-06, "loss": 1.1096, "step": 20220 }, { "epoch": 0.86, "grad_norm": 6.79388252749503, "learning_rate": 8.989828631802014e-06, "loss": 1.0705, "step": 20225 }, { "epoch": 0.86, "grad_norm": 6.354789730293937, "learning_rate": 8.989086506483098e-06, "loss": 1.0845, "step": 20230 }, { "epoch": 0.86, "grad_norm": 13.389109037603895, "learning_rate": 8.988344139317618e-06, "loss": 1.0408, "step": 20235 }, { "epoch": 0.86, "grad_norm": 5.848361395776826, "learning_rate": 8.987601530350582e-06, "loss": 1.0563, "step": 20240 }, { "epoch": 0.86, "grad_norm": 6.706417136899323, "learning_rate": 8.986858679627011e-06, "loss": 1.0645, "step": 20245 }, { "epoch": 0.86, "grad_norm": 5.886295755280162, "learning_rate": 8.98611558719194e-06, "loss": 1.092, "step": 20250 }, { "epoch": 0.86, "grad_norm": 14.00277406789332, "learning_rate": 8.985372253090423e-06, "loss": 1.0701, "step": 20255 }, { "epoch": 0.86, "grad_norm": 8.568020195797528, "learning_rate": 8.984628677367527e-06, "loss": 1.0825, "step": 20260 }, { "epoch": 0.86, "grad_norm": 12.433120214978809, "learning_rate": 8.98388486006833e-06, "loss": 1.0918, "step": 20265 }, { "epoch": 0.86, "grad_norm": 15.155095513664751, "learning_rate": 8.983140801237931e-06, "loss": 1.0906, "step": 20270 }, { "epoch": 0.86, "grad_norm": 15.708769156445875, "learning_rate": 8.982396500921436e-06, "loss": 1.0612, "step": 20275 }, { "epoch": 0.86, "grad_norm": 6.12344335717567, "learning_rate": 8.981651959163972e-06, "loss": 1.0705, "step": 20280 }, { "epoch": 0.86, "grad_norm": 16.48037028658968, "learning_rate": 8.980907176010678e-06, "loss": 1.0742, "step": 20285 }, { "epoch": 0.86, "grad_norm": 18.42077654837551, "learning_rate": 8.980162151506708e-06, "loss": 1.0267, "step": 20290 }, { "epoch": 0.86, "grad_norm": 7.632192408542217, "learning_rate": 8.979416885697231e-06, "loss": 1.0954, "step": 20295 }, { "epoch": 0.86, "grad_norm": 5.966806795829951, "learning_rate": 8.97867137862743e-06, "loss": 1.0995, "step": 20300 }, { "epoch": 0.86, "grad_norm": 49.69778209689961, "learning_rate": 8.977925630342503e-06, "loss": 1.0752, "step": 20305 }, { "epoch": 0.86, "grad_norm": 13.307026872944558, "learning_rate": 8.977179640887661e-06, "loss": 1.1463, "step": 20310 }, { "epoch": 0.86, "grad_norm": 18.21257520093946, "learning_rate": 8.976433410308134e-06, "loss": 1.0799, "step": 20315 }, { "epoch": 0.86, "grad_norm": 33.002611892588924, "learning_rate": 8.975686938649162e-06, "loss": 1.1548, "step": 20320 }, { "epoch": 0.86, "grad_norm": 20.35768011153258, "learning_rate": 8.974940225956002e-06, "loss": 1.1126, "step": 20325 }, { "epoch": 0.86, "grad_norm": 14.607463469824511, "learning_rate": 8.974193272273925e-06, "loss": 1.0883, "step": 20330 }, { "epoch": 0.86, "grad_norm": 6.865027666199963, "learning_rate": 8.973446077648215e-06, "loss": 1.039, "step": 20335 }, { "epoch": 0.86, "grad_norm": 16.69134700460327, "learning_rate": 8.972698642124174e-06, "loss": 1.0965, "step": 20340 }, { "epoch": 0.86, "grad_norm": 5.399351492280459, "learning_rate": 8.971950965747117e-06, "loss": 1.0769, "step": 20345 }, { "epoch": 0.86, "grad_norm": 11.902657752331681, "learning_rate": 8.971203048562372e-06, "loss": 1.0457, "step": 20350 }, { "epoch": 0.86, "grad_norm": 10.524370329936866, "learning_rate": 8.970454890615283e-06, "loss": 1.067, "step": 20355 }, { "epoch": 0.86, "grad_norm": 5.86432894769377, "learning_rate": 8.969706491951211e-06, "loss": 1.099, "step": 20360 }, { "epoch": 0.86, "grad_norm": 18.858133900544203, "learning_rate": 8.968957852615527e-06, "loss": 1.0831, "step": 20365 }, { "epoch": 0.86, "grad_norm": 14.070009841432515, "learning_rate": 8.968208972653619e-06, "loss": 1.0457, "step": 20370 }, { "epoch": 0.86, "grad_norm": 25.502989972825734, "learning_rate": 8.967459852110889e-06, "loss": 1.1033, "step": 20375 }, { "epoch": 0.86, "grad_norm": 17.446731739851604, "learning_rate": 8.966710491032756e-06, "loss": 1.0618, "step": 20380 }, { "epoch": 0.86, "grad_norm": 7.701372549347564, "learning_rate": 8.965960889464649e-06, "loss": 1.0618, "step": 20385 }, { "epoch": 0.86, "grad_norm": 8.508137373487315, "learning_rate": 8.965211047452016e-06, "loss": 1.054, "step": 20390 }, { "epoch": 0.86, "grad_norm": 8.562783184307362, "learning_rate": 8.964460965040318e-06, "loss": 1.0725, "step": 20395 }, { "epoch": 0.86, "grad_norm": 10.357401594470959, "learning_rate": 8.963710642275029e-06, "loss": 1.0351, "step": 20400 }, { "epoch": 0.86, "grad_norm": 10.856682878737185, "learning_rate": 8.962960079201639e-06, "loss": 1.1016, "step": 20405 }, { "epoch": 0.86, "grad_norm": 5.200709766732144, "learning_rate": 8.962209275865653e-06, "loss": 1.0679, "step": 20410 }, { "epoch": 0.86, "grad_norm": 12.876027765183197, "learning_rate": 8.961458232312588e-06, "loss": 1.0743, "step": 20415 }, { "epoch": 0.86, "grad_norm": 9.675125683171963, "learning_rate": 8.960706948587981e-06, "loss": 1.1276, "step": 20420 }, { "epoch": 0.86, "grad_norm": 16.350548429975284, "learning_rate": 8.959955424737378e-06, "loss": 1.0713, "step": 20425 }, { "epoch": 0.86, "grad_norm": 8.117774312324682, "learning_rate": 8.959203660806342e-06, "loss": 1.0598, "step": 20430 }, { "epoch": 0.86, "grad_norm": 12.957991636458253, "learning_rate": 8.95845165684045e-06, "loss": 1.0998, "step": 20435 }, { "epoch": 0.87, "grad_norm": 15.229193068692858, "learning_rate": 8.957699412885295e-06, "loss": 1.0535, "step": 20440 }, { "epoch": 0.87, "grad_norm": 12.668239749634456, "learning_rate": 8.956946928986481e-06, "loss": 1.0856, "step": 20445 }, { "epoch": 0.87, "grad_norm": 9.851890856640829, "learning_rate": 8.95619420518963e-06, "loss": 1.0966, "step": 20450 }, { "epoch": 0.87, "grad_norm": 7.811605278014109, "learning_rate": 8.955441241540379e-06, "loss": 1.1098, "step": 20455 }, { "epoch": 0.87, "grad_norm": 8.046129936556467, "learning_rate": 8.954688038084376e-06, "loss": 1.0611, "step": 20460 }, { "epoch": 0.87, "grad_norm": 8.500104935071587, "learning_rate": 8.953934594867287e-06, "loss": 1.0544, "step": 20465 }, { "epoch": 0.87, "grad_norm": 5.481932886297046, "learning_rate": 8.95318091193479e-06, "loss": 1.0846, "step": 20470 }, { "epoch": 0.87, "grad_norm": 7.080761304040258, "learning_rate": 8.952426989332577e-06, "loss": 1.0182, "step": 20475 }, { "epoch": 0.87, "grad_norm": 14.224281468221, "learning_rate": 8.95167282710636e-06, "loss": 1.0712, "step": 20480 }, { "epoch": 0.87, "grad_norm": 18.84219314552434, "learning_rate": 8.950918425301858e-06, "loss": 1.0775, "step": 20485 }, { "epoch": 0.87, "grad_norm": 31.204784337293685, "learning_rate": 8.950163783964812e-06, "loss": 1.0813, "step": 20490 }, { "epoch": 0.87, "grad_norm": 20.484143649162505, "learning_rate": 8.949408903140968e-06, "loss": 1.0541, "step": 20495 }, { "epoch": 0.87, "grad_norm": 6.115122733564604, "learning_rate": 8.948653782876098e-06, "loss": 1.0568, "step": 20500 }, { "epoch": 0.87, "grad_norm": 5.505647491943554, "learning_rate": 8.94789842321598e-06, "loss": 1.0812, "step": 20505 }, { "epoch": 0.87, "grad_norm": 10.40810364648984, "learning_rate": 8.94714282420641e-06, "loss": 1.085, "step": 20510 }, { "epoch": 0.87, "grad_norm": 15.230703497318224, "learning_rate": 8.946386985893196e-06, "loss": 1.0775, "step": 20515 }, { "epoch": 0.87, "grad_norm": 5.705929919561878, "learning_rate": 8.945630908322164e-06, "loss": 1.0563, "step": 20520 }, { "epoch": 0.87, "grad_norm": 8.141083517598648, "learning_rate": 8.944874591539152e-06, "loss": 1.0818, "step": 20525 }, { "epoch": 0.87, "grad_norm": 5.1663110330952415, "learning_rate": 8.944118035590014e-06, "loss": 1.0462, "step": 20530 }, { "epoch": 0.87, "grad_norm": 8.749706182628607, "learning_rate": 8.943361240520616e-06, "loss": 1.0998, "step": 20535 }, { "epoch": 0.87, "grad_norm": 6.910245931412528, "learning_rate": 8.942604206376844e-06, "loss": 1.0616, "step": 20540 }, { "epoch": 0.87, "grad_norm": 20.380305710353692, "learning_rate": 8.94184693320459e-06, "loss": 1.0853, "step": 20545 }, { "epoch": 0.87, "grad_norm": 6.088292089443374, "learning_rate": 8.94108942104977e-06, "loss": 1.0674, "step": 20550 }, { "epoch": 0.87, "grad_norm": 6.908595489520451, "learning_rate": 8.940331669958304e-06, "loss": 1.0982, "step": 20555 }, { "epoch": 0.87, "grad_norm": 9.231517270423463, "learning_rate": 8.939573679976137e-06, "loss": 1.0693, "step": 20560 }, { "epoch": 0.87, "grad_norm": 13.75705596330069, "learning_rate": 8.938815451149224e-06, "loss": 1.0941, "step": 20565 }, { "epoch": 0.87, "grad_norm": 11.741464463568612, "learning_rate": 8.93805698352353e-06, "loss": 1.0467, "step": 20570 }, { "epoch": 0.87, "grad_norm": 5.68267101065953, "learning_rate": 8.937298277145041e-06, "loss": 1.1012, "step": 20575 }, { "epoch": 0.87, "grad_norm": 6.17126534449252, "learning_rate": 8.936539332059755e-06, "loss": 1.1025, "step": 20580 }, { "epoch": 0.87, "grad_norm": 12.702948326580303, "learning_rate": 8.935780148313686e-06, "loss": 1.098, "step": 20585 }, { "epoch": 0.87, "grad_norm": 12.86970600561228, "learning_rate": 8.93502072595286e-06, "loss": 1.0963, "step": 20590 }, { "epoch": 0.87, "grad_norm": 6.156514746335783, "learning_rate": 8.934261065023318e-06, "loss": 1.0958, "step": 20595 }, { "epoch": 0.87, "grad_norm": 6.158055846196725, "learning_rate": 8.933501165571115e-06, "loss": 1.0649, "step": 20600 }, { "epoch": 0.87, "grad_norm": 12.187059209033082, "learning_rate": 8.932741027642324e-06, "loss": 1.0818, "step": 20605 }, { "epoch": 0.87, "grad_norm": 6.930266801849595, "learning_rate": 8.93198065128303e-06, "loss": 1.0357, "step": 20610 }, { "epoch": 0.87, "grad_norm": 5.394440513658248, "learning_rate": 8.931220036539329e-06, "loss": 1.0529, "step": 20615 }, { "epoch": 0.87, "grad_norm": 5.629478752452526, "learning_rate": 8.930459183457338e-06, "loss": 1.1006, "step": 20620 }, { "epoch": 0.87, "grad_norm": 14.066311021056976, "learning_rate": 8.929698092083184e-06, "loss": 1.0507, "step": 20625 }, { "epoch": 0.87, "grad_norm": 19.557018415209853, "learning_rate": 8.92893676246301e-06, "loss": 1.0749, "step": 20630 }, { "epoch": 0.87, "grad_norm": 15.864813280868297, "learning_rate": 8.928175194642974e-06, "loss": 1.06, "step": 20635 }, { "epoch": 0.87, "grad_norm": 34.294989888131774, "learning_rate": 8.927413388669246e-06, "loss": 1.0518, "step": 20640 }, { "epoch": 0.87, "grad_norm": 33.18504417784154, "learning_rate": 8.926651344588014e-06, "loss": 1.12, "step": 20645 }, { "epoch": 0.87, "grad_norm": 52.22895972154031, "learning_rate": 8.925889062445475e-06, "loss": 1.0405, "step": 20650 }, { "epoch": 0.87, "grad_norm": 27.638755830578024, "learning_rate": 8.925126542287848e-06, "loss": 1.0766, "step": 20655 }, { "epoch": 0.87, "grad_norm": 16.14746930318794, "learning_rate": 8.924363784161358e-06, "loss": 1.0963, "step": 20660 }, { "epoch": 0.87, "grad_norm": 23.38002364267718, "learning_rate": 8.923600788112255e-06, "loss": 1.0448, "step": 20665 }, { "epoch": 0.87, "grad_norm": 14.527568486030331, "learning_rate": 8.922837554186791e-06, "loss": 1.0695, "step": 20670 }, { "epoch": 0.88, "grad_norm": 23.44621025035625, "learning_rate": 8.922074082431242e-06, "loss": 1.0786, "step": 20675 }, { "epoch": 0.88, "grad_norm": 10.3400610424946, "learning_rate": 8.921310372891896e-06, "loss": 1.0836, "step": 20680 }, { "epoch": 0.88, "grad_norm": 16.164554952646025, "learning_rate": 8.92054642561505e-06, "loss": 1.1359, "step": 20685 }, { "epoch": 0.88, "grad_norm": 6.931753405279445, "learning_rate": 8.919782240647024e-06, "loss": 1.0583, "step": 20690 }, { "epoch": 0.88, "grad_norm": 17.801030716947963, "learning_rate": 8.919017818034146e-06, "loss": 1.0792, "step": 20695 }, { "epoch": 0.88, "grad_norm": 15.31334068495149, "learning_rate": 8.918253157822762e-06, "loss": 1.0739, "step": 20700 }, { "epoch": 0.88, "grad_norm": 6.938565593692502, "learning_rate": 8.917488260059231e-06, "loss": 1.0941, "step": 20705 }, { "epoch": 0.88, "grad_norm": 7.405551159369352, "learning_rate": 8.916723124789925e-06, "loss": 1.0762, "step": 20710 }, { "epoch": 0.88, "grad_norm": 19.204296672551017, "learning_rate": 8.915957752061233e-06, "loss": 1.1, "step": 20715 }, { "epoch": 0.88, "grad_norm": 14.21333323378113, "learning_rate": 8.91519214191956e-06, "loss": 1.0697, "step": 20720 }, { "epoch": 0.88, "grad_norm": 18.83871330536022, "learning_rate": 8.914426294411317e-06, "loss": 1.0631, "step": 20725 }, { "epoch": 0.88, "grad_norm": 25.934339745583106, "learning_rate": 8.913660209582939e-06, "loss": 1.0939, "step": 20730 }, { "epoch": 0.88, "grad_norm": 6.257537411450811, "learning_rate": 8.912893887480871e-06, "loss": 1.041, "step": 20735 }, { "epoch": 0.88, "grad_norm": 5.142897804106785, "learning_rate": 8.912127328151571e-06, "loss": 1.0361, "step": 20740 }, { "epoch": 0.88, "grad_norm": 6.132320895741621, "learning_rate": 8.911360531641518e-06, "loss": 1.062, "step": 20745 }, { "epoch": 0.88, "grad_norm": 5.179315535499878, "learning_rate": 8.910593497997194e-06, "loss": 1.0799, "step": 20750 }, { "epoch": 0.88, "grad_norm": 5.187907941619817, "learning_rate": 8.909826227265106e-06, "loss": 1.0989, "step": 20755 }, { "epoch": 0.88, "grad_norm": 5.177659043854928, "learning_rate": 8.90905871949177e-06, "loss": 1.06, "step": 20760 }, { "epoch": 0.88, "grad_norm": 7.5550911277253405, "learning_rate": 8.908290974723721e-06, "loss": 1.0565, "step": 20765 }, { "epoch": 0.88, "grad_norm": 4.946757083052128, "learning_rate": 8.907522993007501e-06, "loss": 1.0353, "step": 20770 }, { "epoch": 0.88, "grad_norm": 5.52549635103769, "learning_rate": 8.90675477438967e-06, "loss": 1.0856, "step": 20775 }, { "epoch": 0.88, "grad_norm": 5.454723011371246, "learning_rate": 8.905986318916806e-06, "loss": 1.0717, "step": 20780 }, { "epoch": 0.88, "grad_norm": 6.053309484669723, "learning_rate": 8.905217626635498e-06, "loss": 1.0865, "step": 20785 }, { "epoch": 0.88, "grad_norm": 10.761567338458246, "learning_rate": 8.904448697592348e-06, "loss": 1.0641, "step": 20790 }, { "epoch": 0.88, "grad_norm": 10.520055402460125, "learning_rate": 8.903679531833975e-06, "loss": 1.0307, "step": 20795 }, { "epoch": 0.88, "grad_norm": 12.033838696527171, "learning_rate": 8.90291012940701e-06, "loss": 1.0553, "step": 20800 }, { "epoch": 0.88, "grad_norm": 15.765954286507517, "learning_rate": 8.9021404903581e-06, "loss": 1.1015, "step": 20805 }, { "epoch": 0.88, "grad_norm": 19.209895388757598, "learning_rate": 8.901370614733906e-06, "loss": 1.0945, "step": 20810 }, { "epoch": 0.88, "grad_norm": 12.406341842835518, "learning_rate": 8.900600502581103e-06, "loss": 1.0553, "step": 20815 }, { "epoch": 0.88, "grad_norm": 16.402156409278557, "learning_rate": 8.899830153946383e-06, "loss": 1.0928, "step": 20820 }, { "epoch": 0.88, "grad_norm": 26.428071666534358, "learning_rate": 8.899059568876447e-06, "loss": 1.0769, "step": 20825 }, { "epoch": 0.88, "grad_norm": 15.63819326871164, "learning_rate": 8.898288747418013e-06, "loss": 1.0898, "step": 20830 }, { "epoch": 0.88, "grad_norm": 7.303785869266925, "learning_rate": 8.897517689617816e-06, "loss": 1.0838, "step": 20835 }, { "epoch": 0.88, "grad_norm": 5.821092322354956, "learning_rate": 8.896746395522604e-06, "loss": 1.0604, "step": 20840 }, { "epoch": 0.88, "grad_norm": 6.341660470921059, "learning_rate": 8.895974865179134e-06, "loss": 1.0286, "step": 20845 }, { "epoch": 0.88, "grad_norm": 10.596173663255243, "learning_rate": 8.895203098634185e-06, "loss": 1.0588, "step": 20850 }, { "epoch": 0.88, "grad_norm": 6.366538384175701, "learning_rate": 8.894431095934546e-06, "loss": 1.0295, "step": 20855 }, { "epoch": 0.88, "grad_norm": 6.623112085436764, "learning_rate": 8.89365885712702e-06, "loss": 1.0751, "step": 20860 }, { "epoch": 0.88, "grad_norm": 6.643617595513142, "learning_rate": 8.892886382258428e-06, "loss": 1.0702, "step": 20865 }, { "epoch": 0.88, "grad_norm": 5.66271425253117, "learning_rate": 8.8921136713756e-06, "loss": 1.0499, "step": 20870 }, { "epoch": 0.88, "grad_norm": 9.159326185861467, "learning_rate": 8.891340724525386e-06, "loss": 1.0308, "step": 20875 }, { "epoch": 0.88, "grad_norm": 6.833823525018116, "learning_rate": 8.890567541754644e-06, "loss": 1.025, "step": 20880 }, { "epoch": 0.88, "grad_norm": 12.745727709978524, "learning_rate": 8.889794123110255e-06, "loss": 1.0511, "step": 20885 }, { "epoch": 0.88, "grad_norm": 12.938843311877473, "learning_rate": 8.889020468639105e-06, "loss": 1.0931, "step": 20890 }, { "epoch": 0.88, "grad_norm": 6.913676987686792, "learning_rate": 8.888246578388098e-06, "loss": 1.0751, "step": 20895 }, { "epoch": 0.88, "grad_norm": 22.48758205215652, "learning_rate": 8.887472452404155e-06, "loss": 1.0831, "step": 20900 }, { "epoch": 0.88, "grad_norm": 9.087265746936609, "learning_rate": 8.886698090734209e-06, "loss": 1.0422, "step": 20905 }, { "epoch": 0.88, "grad_norm": 12.119597605665433, "learning_rate": 8.885923493425207e-06, "loss": 1.0428, "step": 20910 }, { "epoch": 0.89, "grad_norm": 13.421824227753902, "learning_rate": 8.885148660524107e-06, "loss": 1.0522, "step": 20915 }, { "epoch": 0.89, "grad_norm": 12.855683202356163, "learning_rate": 8.88437359207789e-06, "loss": 1.0706, "step": 20920 }, { "epoch": 0.89, "grad_norm": 5.3552971430271, "learning_rate": 8.883598288133543e-06, "loss": 1.023, "step": 20925 }, { "epoch": 0.89, "grad_norm": 7.112546080983584, "learning_rate": 8.882822748738073e-06, "loss": 1.0557, "step": 20930 }, { "epoch": 0.89, "grad_norm": 20.83501223995967, "learning_rate": 8.882046973938495e-06, "loss": 1.0992, "step": 20935 }, { "epoch": 0.89, "grad_norm": 15.898196183156685, "learning_rate": 8.881270963781845e-06, "loss": 1.067, "step": 20940 }, { "epoch": 0.89, "grad_norm": 20.368831475959936, "learning_rate": 8.880494718315169e-06, "loss": 1.0645, "step": 20945 }, { "epoch": 0.89, "grad_norm": 14.840503456194964, "learning_rate": 8.879718237585528e-06, "loss": 1.0425, "step": 20950 }, { "epoch": 0.89, "grad_norm": 11.94553073434144, "learning_rate": 8.878941521639999e-06, "loss": 1.0555, "step": 20955 }, { "epoch": 0.89, "grad_norm": 21.304225090622737, "learning_rate": 8.87816457052567e-06, "loss": 1.0813, "step": 20960 }, { "epoch": 0.89, "grad_norm": 17.063780005158844, "learning_rate": 8.877387384289648e-06, "loss": 1.0206, "step": 20965 }, { "epoch": 0.89, "grad_norm": 7.71493471733423, "learning_rate": 8.876609962979051e-06, "loss": 1.0705, "step": 20970 }, { "epoch": 0.89, "grad_norm": 5.457989573601514, "learning_rate": 8.875832306641007e-06, "loss": 1.0568, "step": 20975 }, { "epoch": 0.89, "grad_norm": 13.199547787306146, "learning_rate": 8.875054415322672e-06, "loss": 1.069, "step": 20980 }, { "epoch": 0.89, "grad_norm": 8.397217092721782, "learning_rate": 8.874276289071198e-06, "loss": 1.0304, "step": 20985 }, { "epoch": 0.89, "grad_norm": 5.190563130924475, "learning_rate": 8.873497927933768e-06, "loss": 1.1021, "step": 20990 }, { "epoch": 0.89, "grad_norm": 6.791445325662123, "learning_rate": 8.872719331957566e-06, "loss": 1.0497, "step": 20995 }, { "epoch": 0.89, "grad_norm": 8.97302100044505, "learning_rate": 8.871940501189798e-06, "loss": 1.0483, "step": 21000 }, { "epoch": 0.89, "grad_norm": 5.535502584319712, "learning_rate": 8.871161435677684e-06, "loss": 1.0322, "step": 21005 }, { "epoch": 0.89, "grad_norm": 35.578972990516014, "learning_rate": 8.870382135468456e-06, "loss": 1.1031, "step": 21010 }, { "epoch": 0.89, "grad_norm": 6.2327016161187885, "learning_rate": 8.869602600609358e-06, "loss": 1.0906, "step": 21015 }, { "epoch": 0.89, "grad_norm": 5.810473207120032, "learning_rate": 8.868822831147651e-06, "loss": 1.0523, "step": 21020 }, { "epoch": 0.89, "grad_norm": 8.844436916856838, "learning_rate": 8.868042827130616e-06, "loss": 1.0779, "step": 21025 }, { "epoch": 0.89, "grad_norm": 5.218708977326758, "learning_rate": 8.867262588605536e-06, "loss": 1.0639, "step": 21030 }, { "epoch": 0.89, "grad_norm": 5.533044933174893, "learning_rate": 8.866482115619717e-06, "loss": 1.0608, "step": 21035 }, { "epoch": 0.89, "grad_norm": 6.454596487583646, "learning_rate": 8.865701408220475e-06, "loss": 1.0782, "step": 21040 }, { "epoch": 0.89, "grad_norm": 9.99224609192941, "learning_rate": 8.864920466455145e-06, "loss": 1.0687, "step": 21045 }, { "epoch": 0.89, "grad_norm": 7.927393602973079, "learning_rate": 8.86413929037107e-06, "loss": 1.1232, "step": 21050 }, { "epoch": 0.89, "grad_norm": 12.648906703651733, "learning_rate": 8.863357880015614e-06, "loss": 1.0797, "step": 21055 }, { "epoch": 0.89, "grad_norm": 13.729621479341857, "learning_rate": 8.862576235436148e-06, "loss": 1.0756, "step": 21060 }, { "epoch": 0.89, "grad_norm": 9.686381069102199, "learning_rate": 8.861794356680062e-06, "loss": 1.0806, "step": 21065 }, { "epoch": 0.89, "grad_norm": 19.142315008884896, "learning_rate": 8.861012243794763e-06, "loss": 1.053, "step": 21070 }, { "epoch": 0.89, "grad_norm": 21.590932178107565, "learning_rate": 8.86022989682766e-06, "loss": 1.069, "step": 21075 }, { "epoch": 0.89, "grad_norm": 15.617893747057055, "learning_rate": 8.859447315826193e-06, "loss": 1.0569, "step": 21080 }, { "epoch": 0.89, "grad_norm": 5.787898382927495, "learning_rate": 8.858664500837802e-06, "loss": 1.0509, "step": 21085 }, { "epoch": 0.89, "grad_norm": 11.134315365923271, "learning_rate": 8.857881451909946e-06, "loss": 1.0495, "step": 21090 }, { "epoch": 0.89, "grad_norm": 7.63660454211466, "learning_rate": 8.857098169090106e-06, "loss": 1.0577, "step": 21095 }, { "epoch": 0.89, "grad_norm": 12.141387720627309, "learning_rate": 8.85631465242576e-06, "loss": 1.0862, "step": 21100 }, { "epoch": 0.89, "grad_norm": 6.027946337922725, "learning_rate": 8.85553090196442e-06, "loss": 0.993, "step": 21105 }, { "epoch": 0.89, "grad_norm": 7.698045784393106, "learning_rate": 8.854746917753598e-06, "loss": 1.0491, "step": 21110 }, { "epoch": 0.89, "grad_norm": 14.001669152120114, "learning_rate": 8.853962699840823e-06, "loss": 1.0904, "step": 21115 }, { "epoch": 0.89, "grad_norm": 6.743278108038464, "learning_rate": 8.853178248273643e-06, "loss": 1.033, "step": 21120 }, { "epoch": 0.89, "grad_norm": 4.834385892911956, "learning_rate": 8.852393563099615e-06, "loss": 1.0709, "step": 21125 }, { "epoch": 0.89, "grad_norm": 13.304727341112551, "learning_rate": 8.851608644366313e-06, "loss": 1.0335, "step": 21130 }, { "epoch": 0.89, "grad_norm": 55.55135213246576, "learning_rate": 8.850823492121326e-06, "loss": 1.0712, "step": 21135 }, { "epoch": 0.89, "grad_norm": 21.269407661946715, "learning_rate": 8.850038106412252e-06, "loss": 1.0979, "step": 21140 }, { "epoch": 0.89, "grad_norm": 12.780294573712636, "learning_rate": 8.849252487286707e-06, "loss": 1.0481, "step": 21145 }, { "epoch": 0.9, "grad_norm": 6.307682314681879, "learning_rate": 8.848466634792324e-06, "loss": 1.0805, "step": 21150 }, { "epoch": 0.9, "grad_norm": 6.452304683137468, "learning_rate": 8.847680548976744e-06, "loss": 1.0428, "step": 21155 }, { "epoch": 0.9, "grad_norm": 10.633935619732695, "learning_rate": 8.846894229887625e-06, "loss": 1.033, "step": 21160 }, { "epoch": 0.9, "grad_norm": 7.035233577059684, "learning_rate": 8.846107677572643e-06, "loss": 1.0812, "step": 21165 }, { "epoch": 0.9, "grad_norm": 6.810927330439723, "learning_rate": 8.84532089207948e-06, "loss": 1.1059, "step": 21170 }, { "epoch": 0.9, "grad_norm": 12.457373964066747, "learning_rate": 8.844533873455839e-06, "loss": 1.0532, "step": 21175 }, { "epoch": 0.9, "grad_norm": 11.29557635870949, "learning_rate": 8.843746621749431e-06, "loss": 1.0529, "step": 21180 }, { "epoch": 0.9, "grad_norm": 18.903483485020317, "learning_rate": 8.84295913700799e-06, "loss": 1.1045, "step": 21185 }, { "epoch": 0.9, "grad_norm": 9.697674756133697, "learning_rate": 8.842171419279254e-06, "loss": 1.0873, "step": 21190 }, { "epoch": 0.9, "grad_norm": 11.637893625317709, "learning_rate": 8.841383468610986e-06, "loss": 1.0582, "step": 21195 }, { "epoch": 0.9, "grad_norm": 9.852088128804523, "learning_rate": 8.840595285050952e-06, "loss": 1.0517, "step": 21200 }, { "epoch": 0.9, "grad_norm": 6.642060777790125, "learning_rate": 8.839806868646939e-06, "loss": 1.0442, "step": 21205 }, { "epoch": 0.9, "grad_norm": 4.771363577990281, "learning_rate": 8.839018219446745e-06, "loss": 1.0318, "step": 21210 }, { "epoch": 0.9, "grad_norm": 5.191485223243958, "learning_rate": 8.838229337498184e-06, "loss": 1.0469, "step": 21215 }, { "epoch": 0.9, "grad_norm": 7.381567428825308, "learning_rate": 8.837440222849085e-06, "loss": 1.1077, "step": 21220 }, { "epoch": 0.9, "grad_norm": 5.872507242014159, "learning_rate": 8.83665087554729e-06, "loss": 1.0596, "step": 21225 }, { "epoch": 0.9, "grad_norm": 6.729158805788173, "learning_rate": 8.835861295640651e-06, "loss": 1.0328, "step": 21230 }, { "epoch": 0.9, "grad_norm": 13.881584941590122, "learning_rate": 8.835071483177043e-06, "loss": 1.0925, "step": 21235 }, { "epoch": 0.9, "grad_norm": 20.221204981328434, "learning_rate": 8.834281438204346e-06, "loss": 1.0432, "step": 21240 }, { "epoch": 0.9, "grad_norm": 25.58926523591417, "learning_rate": 8.83349116077046e-06, "loss": 1.1169, "step": 21245 }, { "epoch": 0.9, "grad_norm": 4.861630965217138, "learning_rate": 8.832700650923298e-06, "loss": 1.022, "step": 21250 }, { "epoch": 0.9, "grad_norm": 5.387333370723109, "learning_rate": 8.831909908710783e-06, "loss": 1.0343, "step": 21255 }, { "epoch": 0.9, "grad_norm": 12.849297724015658, "learning_rate": 8.83111893418086e-06, "loss": 1.0543, "step": 21260 }, { "epoch": 0.9, "grad_norm": 10.427884669005525, "learning_rate": 8.83032772738148e-06, "loss": 1.0992, "step": 21265 }, { "epoch": 0.9, "grad_norm": 5.618057487027922, "learning_rate": 8.829536288360611e-06, "loss": 1.0839, "step": 21270 }, { "epoch": 0.9, "grad_norm": 7.942745729346651, "learning_rate": 8.828744617166238e-06, "loss": 1.0663, "step": 21275 }, { "epoch": 0.9, "grad_norm": 6.342060660292795, "learning_rate": 8.827952713846357e-06, "loss": 1.0415, "step": 21280 }, { "epoch": 0.9, "grad_norm": 6.8713240571949274, "learning_rate": 8.827160578448979e-06, "loss": 1.0678, "step": 21285 }, { "epoch": 0.9, "grad_norm": 8.684412898819534, "learning_rate": 8.826368211022128e-06, "loss": 1.0481, "step": 21290 }, { "epoch": 0.9, "grad_norm": 9.399008127084763, "learning_rate": 8.825575611613843e-06, "loss": 1.0225, "step": 21295 }, { "epoch": 0.9, "grad_norm": 7.377956228972538, "learning_rate": 8.824782780272178e-06, "loss": 1.1008, "step": 21300 }, { "epoch": 0.9, "grad_norm": 5.2756897674620165, "learning_rate": 8.823989717045198e-06, "loss": 1.0509, "step": 21305 }, { "epoch": 0.9, "grad_norm": 5.507591575698728, "learning_rate": 8.823196421980987e-06, "loss": 1.0821, "step": 21310 }, { "epoch": 0.9, "grad_norm": 4.85916782231805, "learning_rate": 8.822402895127638e-06, "loss": 1.0823, "step": 21315 }, { "epoch": 0.9, "grad_norm": 6.744323366967096, "learning_rate": 8.82160913653326e-06, "loss": 1.0612, "step": 21320 }, { "epoch": 0.9, "grad_norm": 16.969147456939872, "learning_rate": 8.820815146245978e-06, "loss": 1.0899, "step": 21325 }, { "epoch": 0.9, "grad_norm": 15.716666278893374, "learning_rate": 8.820020924313928e-06, "loss": 1.0519, "step": 21330 }, { "epoch": 0.9, "grad_norm": 6.511279862971171, "learning_rate": 8.819226470785262e-06, "loss": 1.0858, "step": 21335 }, { "epoch": 0.9, "grad_norm": 10.637362562293369, "learning_rate": 8.818431785708143e-06, "loss": 1.0587, "step": 21340 }, { "epoch": 0.9, "grad_norm": 4.951445253670879, "learning_rate": 8.817636869130755e-06, "loss": 1.0846, "step": 21345 }, { "epoch": 0.9, "grad_norm": 7.889855922987261, "learning_rate": 8.816841721101287e-06, "loss": 1.0456, "step": 21350 }, { "epoch": 0.9, "grad_norm": 8.067959870935121, "learning_rate": 8.816046341667948e-06, "loss": 1.0435, "step": 21355 }, { "epoch": 0.9, "grad_norm": 9.687627150407593, "learning_rate": 8.815250730878962e-06, "loss": 1.0366, "step": 21360 }, { "epoch": 0.9, "grad_norm": 18.289593121532008, "learning_rate": 8.81445488878256e-06, "loss": 1.0521, "step": 21365 }, { "epoch": 0.9, "grad_norm": 17.720960834379003, "learning_rate": 8.813658815426995e-06, "loss": 1.0795, "step": 21370 }, { "epoch": 0.9, "grad_norm": 13.44152781084223, "learning_rate": 8.81286251086053e-06, "loss": 1.0637, "step": 21375 }, { "epoch": 0.9, "grad_norm": 9.492694841534716, "learning_rate": 8.812065975131442e-06, "loss": 1.0366, "step": 21380 }, { "epoch": 0.91, "grad_norm": 23.36785605756628, "learning_rate": 8.811269208288022e-06, "loss": 1.0937, "step": 21385 }, { "epoch": 0.91, "grad_norm": 14.642373998336643, "learning_rate": 8.810472210378578e-06, "loss": 1.0141, "step": 21390 }, { "epoch": 0.91, "grad_norm": 6.094154983879217, "learning_rate": 8.809674981451427e-06, "loss": 1.0569, "step": 21395 }, { "epoch": 0.91, "grad_norm": 6.460687961498408, "learning_rate": 8.808877521554901e-06, "loss": 1.0272, "step": 21400 }, { "epoch": 0.91, "grad_norm": 7.8291843082098564, "learning_rate": 8.808079830737354e-06, "loss": 1.041, "step": 21405 }, { "epoch": 0.91, "grad_norm": 8.623937533126616, "learning_rate": 8.807281909047142e-06, "loss": 0.9993, "step": 21410 }, { "epoch": 0.91, "grad_norm": 15.794917263344798, "learning_rate": 8.806483756532645e-06, "loss": 1.0967, "step": 21415 }, { "epoch": 0.91, "grad_norm": 5.515565518273104, "learning_rate": 8.805685373242248e-06, "loss": 1.0765, "step": 21420 }, { "epoch": 0.91, "grad_norm": 8.285868561488702, "learning_rate": 8.804886759224356e-06, "loss": 1.0596, "step": 21425 }, { "epoch": 0.91, "grad_norm": 4.851472600076748, "learning_rate": 8.804087914527389e-06, "loss": 1.0179, "step": 21430 }, { "epoch": 0.91, "grad_norm": 8.729506306413892, "learning_rate": 8.803288839199778e-06, "loss": 1.0859, "step": 21435 }, { "epoch": 0.91, "grad_norm": 10.726209158536673, "learning_rate": 8.802489533289966e-06, "loss": 1.0451, "step": 21440 }, { "epoch": 0.91, "grad_norm": 29.801373754501615, "learning_rate": 8.801689996846413e-06, "loss": 1.0383, "step": 21445 }, { "epoch": 0.91, "grad_norm": 9.899255609396299, "learning_rate": 8.800890229917597e-06, "loss": 1.0664, "step": 21450 }, { "epoch": 0.91, "grad_norm": 9.096716507522391, "learning_rate": 8.800090232552e-06, "loss": 1.0756, "step": 21455 }, { "epoch": 0.91, "grad_norm": 5.469882665770605, "learning_rate": 8.799290004798126e-06, "loss": 1.101, "step": 21460 }, { "epoch": 0.91, "grad_norm": 5.940804734570921, "learning_rate": 8.79848954670449e-06, "loss": 1.0801, "step": 21465 }, { "epoch": 0.91, "grad_norm": 10.168900541435045, "learning_rate": 8.797688858319623e-06, "loss": 1.0362, "step": 21470 }, { "epoch": 0.91, "grad_norm": 12.83710785361824, "learning_rate": 8.796887939692067e-06, "loss": 1.0336, "step": 21475 }, { "epoch": 0.91, "grad_norm": 27.06704901373965, "learning_rate": 8.796086790870377e-06, "loss": 0.9858, "step": 21480 }, { "epoch": 0.91, "grad_norm": 28.989457394553632, "learning_rate": 8.795285411903128e-06, "loss": 1.032, "step": 21485 }, { "epoch": 0.91, "grad_norm": 11.901257782767786, "learning_rate": 8.794483802838905e-06, "loss": 1.076, "step": 21490 }, { "epoch": 0.91, "grad_norm": 8.684125418272892, "learning_rate": 8.793681963726305e-06, "loss": 1.067, "step": 21495 }, { "epoch": 0.91, "grad_norm": 6.352511766000096, "learning_rate": 8.792879894613942e-06, "loss": 1.1057, "step": 21500 }, { "epoch": 0.91, "grad_norm": 7.871832773556916, "learning_rate": 8.792077595550443e-06, "loss": 1.0517, "step": 21505 }, { "epoch": 0.91, "grad_norm": 7.278087552393924, "learning_rate": 8.791275066584448e-06, "loss": 1.0908, "step": 21510 }, { "epoch": 0.91, "grad_norm": 9.343478919696157, "learning_rate": 8.790472307764615e-06, "loss": 1.0214, "step": 21515 }, { "epoch": 0.91, "grad_norm": 6.111609858482743, "learning_rate": 8.789669319139612e-06, "loss": 1.0369, "step": 21520 }, { "epoch": 0.91, "grad_norm": 5.406706354094906, "learning_rate": 8.78886610075812e-06, "loss": 1.01, "step": 21525 }, { "epoch": 0.91, "grad_norm": 6.335915396451932, "learning_rate": 8.788062652668838e-06, "loss": 1.0848, "step": 21530 }, { "epoch": 0.91, "grad_norm": 5.420963162954823, "learning_rate": 8.787258974920473e-06, "loss": 1.1064, "step": 21535 }, { "epoch": 0.91, "grad_norm": 10.615029492454818, "learning_rate": 8.786455067561755e-06, "loss": 1.0473, "step": 21540 }, { "epoch": 0.91, "grad_norm": 11.365865015705364, "learning_rate": 8.78565093064142e-06, "loss": 1.0412, "step": 21545 }, { "epoch": 0.91, "grad_norm": 17.57346314875629, "learning_rate": 8.784846564208216e-06, "loss": 1.0716, "step": 21550 }, { "epoch": 0.91, "grad_norm": 7.0366668798692755, "learning_rate": 8.784041968310916e-06, "loss": 1.0769, "step": 21555 }, { "epoch": 0.91, "grad_norm": 6.139804577220695, "learning_rate": 8.783237142998298e-06, "loss": 1.0266, "step": 21560 }, { "epoch": 0.91, "grad_norm": 6.52747414563638, "learning_rate": 8.782432088319155e-06, "loss": 1.0808, "step": 21565 }, { "epoch": 0.91, "grad_norm": 4.587613788154729, "learning_rate": 8.781626804322297e-06, "loss": 1.0394, "step": 21570 }, { "epoch": 0.91, "grad_norm": 6.6715182651851785, "learning_rate": 8.780821291056543e-06, "loss": 1.041, "step": 21575 }, { "epoch": 0.91, "grad_norm": 4.674414577419238, "learning_rate": 8.780015548570732e-06, "loss": 1.0008, "step": 21580 }, { "epoch": 0.91, "grad_norm": 9.574997036691096, "learning_rate": 8.779209576913714e-06, "loss": 1.0319, "step": 21585 }, { "epoch": 0.91, "grad_norm": 16.10578121438595, "learning_rate": 8.77840337613435e-06, "loss": 1.0339, "step": 21590 }, { "epoch": 0.91, "grad_norm": 16.73778871115213, "learning_rate": 8.777596946281518e-06, "loss": 1.0627, "step": 21595 }, { "epoch": 0.91, "grad_norm": 14.389316835856235, "learning_rate": 8.776790287404111e-06, "loss": 1.0496, "step": 21600 }, { "epoch": 0.91, "grad_norm": 12.491064468821286, "learning_rate": 8.775983399551036e-06, "loss": 1.0559, "step": 21605 }, { "epoch": 0.91, "grad_norm": 15.80902430708104, "learning_rate": 8.775176282771209e-06, "loss": 1.0155, "step": 21610 }, { "epoch": 0.91, "grad_norm": 7.154023048005005, "learning_rate": 8.774368937113562e-06, "loss": 1.0482, "step": 21615 }, { "epoch": 0.91, "grad_norm": 7.3376485078668905, "learning_rate": 8.773561362627046e-06, "loss": 1.0403, "step": 21620 }, { "epoch": 0.92, "grad_norm": 12.685281741953801, "learning_rate": 8.77275355936062e-06, "loss": 1.0422, "step": 21625 }, { "epoch": 0.92, "grad_norm": 12.784705236886882, "learning_rate": 8.771945527363258e-06, "loss": 1.0651, "step": 21630 }, { "epoch": 0.92, "grad_norm": 10.468583235877373, "learning_rate": 8.77113726668395e-06, "loss": 1.0873, "step": 21635 }, { "epoch": 0.92, "grad_norm": 13.840071593082772, "learning_rate": 8.770328777371697e-06, "loss": 1.1091, "step": 21640 }, { "epoch": 0.92, "grad_norm": 23.321627193632175, "learning_rate": 8.769520059475517e-06, "loss": 1.0721, "step": 21645 }, { "epoch": 0.92, "grad_norm": 5.336669417801813, "learning_rate": 8.768711113044439e-06, "loss": 1.0281, "step": 21650 }, { "epoch": 0.92, "grad_norm": 7.04609472650862, "learning_rate": 8.767901938127508e-06, "loss": 1.0611, "step": 21655 }, { "epoch": 0.92, "grad_norm": 11.885480966822543, "learning_rate": 8.76709253477378e-06, "loss": 1.0695, "step": 21660 }, { "epoch": 0.92, "grad_norm": 18.06587988887282, "learning_rate": 8.766282903032328e-06, "loss": 1.0506, "step": 21665 }, { "epoch": 0.92, "grad_norm": 16.495498752347274, "learning_rate": 8.765473042952237e-06, "loss": 1.0431, "step": 21670 }, { "epoch": 0.92, "grad_norm": 9.47121866701029, "learning_rate": 8.764662954582608e-06, "loss": 1.0271, "step": 21675 }, { "epoch": 0.92, "grad_norm": 9.207834602336261, "learning_rate": 8.763852637972552e-06, "loss": 1.0483, "step": 21680 }, { "epoch": 0.92, "grad_norm": 6.929564142189032, "learning_rate": 8.763042093171199e-06, "loss": 1.0721, "step": 21685 }, { "epoch": 0.92, "grad_norm": 7.444481860338322, "learning_rate": 8.762231320227687e-06, "loss": 1.0309, "step": 21690 }, { "epoch": 0.92, "grad_norm": 5.929177762962361, "learning_rate": 8.761420319191175e-06, "loss": 1.0396, "step": 21695 }, { "epoch": 0.92, "grad_norm": 8.111111542085034, "learning_rate": 8.760609090110826e-06, "loss": 1.0546, "step": 21700 }, { "epoch": 0.92, "grad_norm": 5.70990168497997, "learning_rate": 8.759797633035825e-06, "loss": 1.0612, "step": 21705 }, { "epoch": 0.92, "grad_norm": 5.59013134631506, "learning_rate": 8.758985948015367e-06, "loss": 1.0218, "step": 21710 }, { "epoch": 0.92, "grad_norm": 8.552035113880665, "learning_rate": 8.758174035098668e-06, "loss": 1.0847, "step": 21715 }, { "epoch": 0.92, "grad_norm": 8.140368859277036, "learning_rate": 8.757361894334943e-06, "loss": 1.0572, "step": 21720 }, { "epoch": 0.92, "grad_norm": 57.63361491524216, "learning_rate": 8.756549525773438e-06, "loss": 1.0483, "step": 21725 }, { "epoch": 0.92, "grad_norm": 47.2841645970153, "learning_rate": 8.755736929463397e-06, "loss": 1.0694, "step": 21730 }, { "epoch": 0.92, "grad_norm": 15.940659935050457, "learning_rate": 8.75492410545409e-06, "loss": 1.0751, "step": 21735 }, { "epoch": 0.92, "grad_norm": 11.61599805270187, "learning_rate": 8.754111053794795e-06, "loss": 1.0377, "step": 21740 }, { "epoch": 0.92, "grad_norm": 22.077601283259092, "learning_rate": 8.753297774534804e-06, "loss": 1.1211, "step": 21745 }, { "epoch": 0.92, "grad_norm": 33.27846151912245, "learning_rate": 8.752484267723427e-06, "loss": 1.012, "step": 21750 }, { "epoch": 0.92, "grad_norm": 32.92893552543508, "learning_rate": 8.75167053340998e-06, "loss": 1.0694, "step": 21755 }, { "epoch": 0.92, "grad_norm": 12.979473432915224, "learning_rate": 8.7508565716438e-06, "loss": 1.0257, "step": 21760 }, { "epoch": 0.92, "grad_norm": 9.061015184293739, "learning_rate": 8.750042382474235e-06, "loss": 1.0365, "step": 21765 }, { "epoch": 0.92, "grad_norm": 19.22056404639587, "learning_rate": 8.749227965950647e-06, "loss": 1.0847, "step": 21770 }, { "epoch": 0.92, "grad_norm": 44.50404057302526, "learning_rate": 8.74841332212241e-06, "loss": 1.071, "step": 21775 }, { "epoch": 0.92, "grad_norm": 21.410855373385008, "learning_rate": 8.747598451038914e-06, "loss": 1.0629, "step": 21780 }, { "epoch": 0.92, "grad_norm": 9.127279653696853, "learning_rate": 8.746783352749564e-06, "loss": 1.0882, "step": 21785 }, { "epoch": 0.92, "grad_norm": 8.852493897897254, "learning_rate": 8.745968027303775e-06, "loss": 1.0656, "step": 21790 }, { "epoch": 0.92, "grad_norm": 8.363963186886291, "learning_rate": 8.74515247475098e-06, "loss": 1.0656, "step": 21795 }, { "epoch": 0.92, "grad_norm": 22.089935622392925, "learning_rate": 8.744336695140621e-06, "loss": 1.0812, "step": 21800 }, { "epoch": 0.92, "grad_norm": 19.19163037925424, "learning_rate": 8.743520688522158e-06, "loss": 1.0956, "step": 21805 }, { "epoch": 0.92, "grad_norm": 6.047441321112834, "learning_rate": 8.742704454945063e-06, "loss": 1.0352, "step": 21810 }, { "epoch": 0.92, "grad_norm": 5.062421142821726, "learning_rate": 8.74188799445882e-06, "loss": 1.0091, "step": 21815 }, { "epoch": 0.92, "grad_norm": 7.495731062251956, "learning_rate": 8.741071307112931e-06, "loss": 1.0263, "step": 21820 }, { "epoch": 0.92, "grad_norm": 5.042633889615112, "learning_rate": 8.740254392956908e-06, "loss": 1.0816, "step": 21825 }, { "epoch": 0.92, "grad_norm": 5.076738491993376, "learning_rate": 8.739437252040279e-06, "loss": 1.0315, "step": 21830 }, { "epoch": 0.92, "grad_norm": 10.015667998514347, "learning_rate": 8.738619884412584e-06, "loss": 1.0495, "step": 21835 }, { "epoch": 0.92, "grad_norm": 7.3949257086583255, "learning_rate": 8.737802290123377e-06, "loss": 1.042, "step": 21840 }, { "epoch": 0.92, "grad_norm": 7.764006044340583, "learning_rate": 8.736984469222228e-06, "loss": 1.0415, "step": 21845 }, { "epoch": 0.92, "grad_norm": 8.09804184512544, "learning_rate": 8.736166421758719e-06, "loss": 1.0495, "step": 21850 }, { "epoch": 0.92, "grad_norm": 5.974383652800656, "learning_rate": 8.735348147782444e-06, "loss": 1.053, "step": 21855 }, { "epoch": 0.93, "grad_norm": 15.009311942839261, "learning_rate": 8.734529647343014e-06, "loss": 1.0612, "step": 21860 }, { "epoch": 0.93, "grad_norm": 6.121323101727387, "learning_rate": 8.733710920490051e-06, "loss": 1.076, "step": 21865 }, { "epoch": 0.93, "grad_norm": 6.79092212251893, "learning_rate": 8.732891967273194e-06, "loss": 1.0497, "step": 21870 }, { "epoch": 0.93, "grad_norm": 9.935487296396415, "learning_rate": 8.732072787742092e-06, "loss": 1.0392, "step": 21875 }, { "epoch": 0.93, "grad_norm": 13.793977192089978, "learning_rate": 8.731253381946411e-06, "loss": 1.049, "step": 21880 }, { "epoch": 0.93, "grad_norm": 9.48089571565006, "learning_rate": 8.730433749935826e-06, "loss": 1.0616, "step": 21885 }, { "epoch": 0.93, "grad_norm": 5.745497063889569, "learning_rate": 8.729613891760033e-06, "loss": 1.0816, "step": 21890 }, { "epoch": 0.93, "grad_norm": 10.890019287663492, "learning_rate": 8.728793807468737e-06, "loss": 1.0768, "step": 21895 }, { "epoch": 0.93, "grad_norm": 5.265183064840851, "learning_rate": 8.727973497111654e-06, "loss": 1.0326, "step": 21900 }, { "epoch": 0.93, "grad_norm": 8.815145211579305, "learning_rate": 8.72715296073852e-06, "loss": 1.0755, "step": 21905 }, { "epoch": 0.93, "grad_norm": 5.82723645844355, "learning_rate": 8.72633219839908e-06, "loss": 1.0264, "step": 21910 }, { "epoch": 0.93, "grad_norm": 9.72244192439977, "learning_rate": 8.725511210143096e-06, "loss": 1.0188, "step": 21915 }, { "epoch": 0.93, "grad_norm": 9.078545180241331, "learning_rate": 8.724689996020338e-06, "loss": 1.0749, "step": 21920 }, { "epoch": 0.93, "grad_norm": 8.70268875774656, "learning_rate": 8.7238685560806e-06, "loss": 1.0633, "step": 21925 }, { "epoch": 0.93, "grad_norm": 6.593572157882693, "learning_rate": 8.72304689037368e-06, "loss": 1.0389, "step": 21930 }, { "epoch": 0.93, "grad_norm": 7.637762503754141, "learning_rate": 8.722224998949394e-06, "loss": 1.0724, "step": 21935 }, { "epoch": 0.93, "grad_norm": 17.670081107075355, "learning_rate": 8.72140288185757e-06, "loss": 1.0172, "step": 21940 }, { "epoch": 0.93, "grad_norm": 35.55222885560427, "learning_rate": 8.720580539148051e-06, "loss": 1.0569, "step": 21945 }, { "epoch": 0.93, "grad_norm": 19.40025731407459, "learning_rate": 8.719757970870692e-06, "loss": 1.0306, "step": 21950 }, { "epoch": 0.93, "grad_norm": 15.076429975283755, "learning_rate": 8.718935177075364e-06, "loss": 1.0261, "step": 21955 }, { "epoch": 0.93, "grad_norm": 10.542991044235364, "learning_rate": 8.718112157811953e-06, "loss": 1.069, "step": 21960 }, { "epoch": 0.93, "grad_norm": 7.838222235628757, "learning_rate": 8.717288913130353e-06, "loss": 1.0041, "step": 21965 }, { "epoch": 0.93, "grad_norm": 9.170399537129438, "learning_rate": 8.716465443080476e-06, "loss": 1.075, "step": 21970 }, { "epoch": 0.93, "grad_norm": 9.4674115154313, "learning_rate": 8.715641747712246e-06, "loss": 1.0142, "step": 21975 }, { "epoch": 0.93, "grad_norm": 5.669374867755228, "learning_rate": 8.714817827075602e-06, "loss": 1.0923, "step": 21980 }, { "epoch": 0.93, "grad_norm": 7.820976552613582, "learning_rate": 8.713993681220496e-06, "loss": 1.0408, "step": 21985 }, { "epoch": 0.93, "grad_norm": 8.140315928607075, "learning_rate": 8.713169310196893e-06, "loss": 1.0784, "step": 21990 }, { "epoch": 0.93, "grad_norm": 5.374115522248512, "learning_rate": 8.71234471405477e-06, "loss": 1.0612, "step": 21995 }, { "epoch": 0.93, "grad_norm": 5.971696584577922, "learning_rate": 8.711519892844124e-06, "loss": 1.0208, "step": 22000 }, { "epoch": 0.93, "grad_norm": 13.450381433773453, "learning_rate": 8.710694846614958e-06, "loss": 1.028, "step": 22005 }, { "epoch": 0.93, "grad_norm": 7.070905848438207, "learning_rate": 8.709869575417296e-06, "loss": 1.0564, "step": 22010 }, { "epoch": 0.93, "grad_norm": 7.282452083221917, "learning_rate": 8.709044079301167e-06, "loss": 1.0273, "step": 22015 }, { "epoch": 0.93, "grad_norm": 7.6394521627122, "learning_rate": 8.708218358316622e-06, "loss": 1.0433, "step": 22020 }, { "epoch": 0.93, "grad_norm": 9.823966610599468, "learning_rate": 8.707392412513723e-06, "loss": 1.1086, "step": 22025 }, { "epoch": 0.93, "grad_norm": 17.6737234213204, "learning_rate": 8.706566241942538e-06, "loss": 1.0758, "step": 22030 }, { "epoch": 0.93, "grad_norm": 4.96099275751703, "learning_rate": 8.705739846653163e-06, "loss": 1.0564, "step": 22035 }, { "epoch": 0.93, "grad_norm": 5.396464325059147, "learning_rate": 8.704913226695694e-06, "loss": 1.0872, "step": 22040 }, { "epoch": 0.93, "grad_norm": 6.155400502711098, "learning_rate": 8.704086382120253e-06, "loss": 1.0688, "step": 22045 }, { "epoch": 0.93, "grad_norm": 11.276094930929235, "learning_rate": 8.703259312976964e-06, "loss": 0.9951, "step": 22050 }, { "epoch": 0.93, "grad_norm": 5.959162644711834, "learning_rate": 8.702432019315969e-06, "loss": 1.0726, "step": 22055 }, { "epoch": 0.93, "grad_norm": 8.8817955997993, "learning_rate": 8.701604501187429e-06, "loss": 1.0269, "step": 22060 }, { "epoch": 0.93, "grad_norm": 9.704106881361415, "learning_rate": 8.700776758641511e-06, "loss": 1.0408, "step": 22065 }, { "epoch": 0.93, "grad_norm": 9.948920096532369, "learning_rate": 8.699948791728398e-06, "loss": 1.0354, "step": 22070 }, { "epoch": 0.93, "grad_norm": 20.575258315768465, "learning_rate": 8.69912060049829e-06, "loss": 1.083, "step": 22075 }, { "epoch": 0.93, "grad_norm": 10.109688586745168, "learning_rate": 8.698292185001395e-06, "loss": 1.0532, "step": 22080 }, { "epoch": 0.93, "grad_norm": 14.700664626707857, "learning_rate": 8.697463545287939e-06, "loss": 1.0509, "step": 22085 }, { "epoch": 0.93, "grad_norm": 31.00422435019389, "learning_rate": 8.696634681408159e-06, "loss": 1.0516, "step": 22090 }, { "epoch": 0.94, "grad_norm": 30.538540018804785, "learning_rate": 8.695805593412307e-06, "loss": 1.035, "step": 22095 }, { "epoch": 0.94, "grad_norm": 6.582995982042233, "learning_rate": 8.694976281350648e-06, "loss": 0.9977, "step": 22100 }, { "epoch": 0.94, "grad_norm": 8.216779362198505, "learning_rate": 8.694146745273461e-06, "loss": 1.0347, "step": 22105 }, { "epoch": 0.94, "grad_norm": 43.73094129805623, "learning_rate": 8.693316985231039e-06, "loss": 1.0219, "step": 22110 }, { "epoch": 0.94, "grad_norm": 11.16376083750654, "learning_rate": 8.692487001273688e-06, "loss": 1.0525, "step": 22115 }, { "epoch": 0.94, "grad_norm": 34.72791944055447, "learning_rate": 8.691656793451726e-06, "loss": 1.056, "step": 22120 }, { "epoch": 0.94, "grad_norm": 69.58577127840131, "learning_rate": 8.690826361815486e-06, "loss": 1.0973, "step": 22125 }, { "epoch": 0.94, "grad_norm": 65.60558608180274, "learning_rate": 8.689995706415316e-06, "loss": 1.084, "step": 22130 }, { "epoch": 0.94, "grad_norm": 31.721758687253875, "learning_rate": 8.689164827301575e-06, "loss": 1.037, "step": 22135 }, { "epoch": 0.94, "grad_norm": 5.7644748360270235, "learning_rate": 8.688333724524637e-06, "loss": 1.0589, "step": 22140 }, { "epoch": 0.94, "grad_norm": 5.808601941765356, "learning_rate": 8.687502398134892e-06, "loss": 1.0694, "step": 22145 }, { "epoch": 0.94, "grad_norm": 11.527289182132591, "learning_rate": 8.686670848182736e-06, "loss": 1.0818, "step": 22150 }, { "epoch": 0.94, "grad_norm": 5.663093006318125, "learning_rate": 8.685839074718588e-06, "loss": 1.0566, "step": 22155 }, { "epoch": 0.94, "grad_norm": 5.395977401398287, "learning_rate": 8.685007077792871e-06, "loss": 1.0494, "step": 22160 }, { "epoch": 0.94, "grad_norm": 9.56342243810745, "learning_rate": 8.684174857456031e-06, "loss": 1.0433, "step": 22165 }, { "epoch": 0.94, "grad_norm": 6.210971882092373, "learning_rate": 8.683342413758522e-06, "loss": 1.0578, "step": 22170 }, { "epoch": 0.94, "grad_norm": 9.57597147072617, "learning_rate": 8.68250974675081e-06, "loss": 1.0456, "step": 22175 }, { "epoch": 0.94, "grad_norm": 5.802007712555317, "learning_rate": 8.681676856483383e-06, "loss": 1.072, "step": 22180 }, { "epoch": 0.94, "grad_norm": 5.752925562526565, "learning_rate": 8.680843743006731e-06, "loss": 1.0251, "step": 22185 }, { "epoch": 0.94, "grad_norm": 6.8426808897513665, "learning_rate": 8.680010406371364e-06, "loss": 1.0249, "step": 22190 }, { "epoch": 0.94, "grad_norm": 14.329783195261392, "learning_rate": 8.679176846627807e-06, "loss": 1.0299, "step": 22195 }, { "epoch": 0.94, "grad_norm": 9.776929723652254, "learning_rate": 8.678343063826594e-06, "loss": 1.0303, "step": 22200 }, { "epoch": 0.94, "grad_norm": 5.368130237537494, "learning_rate": 8.677509058018278e-06, "loss": 1.0773, "step": 22205 }, { "epoch": 0.94, "grad_norm": 5.61928727770007, "learning_rate": 8.676674829253419e-06, "loss": 1.0222, "step": 22210 }, { "epoch": 0.94, "grad_norm": 6.233023249819664, "learning_rate": 8.675840377582595e-06, "loss": 1.0411, "step": 22215 }, { "epoch": 0.94, "grad_norm": 7.97172455881486, "learning_rate": 8.675005703056399e-06, "loss": 1.044, "step": 22220 }, { "epoch": 0.94, "grad_norm": 13.314626258237514, "learning_rate": 8.674170805725428e-06, "loss": 1.059, "step": 22225 }, { "epoch": 0.94, "grad_norm": 15.545766082051502, "learning_rate": 8.673335685640305e-06, "loss": 0.9978, "step": 22230 }, { "epoch": 0.94, "grad_norm": 9.86523492801638, "learning_rate": 8.672500342851661e-06, "loss": 1.0564, "step": 22235 }, { "epoch": 0.94, "grad_norm": 11.914569423381977, "learning_rate": 8.671664777410138e-06, "loss": 1.0691, "step": 22240 }, { "epoch": 0.94, "grad_norm": 5.773933891915756, "learning_rate": 8.670828989366395e-06, "loss": 1.1061, "step": 22245 }, { "epoch": 0.94, "grad_norm": 5.446393775155094, "learning_rate": 8.669992978771104e-06, "loss": 1.0216, "step": 22250 }, { "epoch": 0.94, "grad_norm": 7.594977449224079, "learning_rate": 8.669156745674946e-06, "loss": 1.0548, "step": 22255 }, { "epoch": 0.94, "grad_norm": 11.22305430863255, "learning_rate": 8.668320290128626e-06, "loss": 1.0312, "step": 22260 }, { "epoch": 0.94, "grad_norm": 10.739578516044126, "learning_rate": 8.66748361218285e-06, "loss": 1.0732, "step": 22265 }, { "epoch": 0.94, "grad_norm": 12.354408314238693, "learning_rate": 8.666646711888348e-06, "loss": 1.0553, "step": 22270 }, { "epoch": 0.94, "grad_norm": 7.903784659517549, "learning_rate": 8.665809589295855e-06, "loss": 1.1459, "step": 22275 }, { "epoch": 0.94, "grad_norm": 8.243230755908847, "learning_rate": 8.664972244456122e-06, "loss": 1.072, "step": 22280 }, { "epoch": 0.94, "grad_norm": 9.100263469256676, "learning_rate": 8.664134677419921e-06, "loss": 1.0762, "step": 22285 }, { "epoch": 0.94, "grad_norm": 7.910965601986049, "learning_rate": 8.663296888238027e-06, "loss": 1.0732, "step": 22290 }, { "epoch": 0.94, "grad_norm": 10.029501501277, "learning_rate": 8.66245887696123e-06, "loss": 1.0584, "step": 22295 }, { "epoch": 0.94, "grad_norm": 28.751099622813843, "learning_rate": 8.661620643640343e-06, "loss": 1.0465, "step": 22300 }, { "epoch": 0.94, "grad_norm": 17.95335348479722, "learning_rate": 8.660782188326181e-06, "loss": 1.0192, "step": 22305 }, { "epoch": 0.94, "grad_norm": 8.530815863475906, "learning_rate": 8.659943511069578e-06, "loss": 1.0489, "step": 22310 }, { "epoch": 0.94, "grad_norm": 5.403582955689854, "learning_rate": 8.659104611921381e-06, "loss": 1.0601, "step": 22315 }, { "epoch": 0.94, "grad_norm": 4.75370249460377, "learning_rate": 8.65826549093245e-06, "loss": 1.0346, "step": 22320 }, { "epoch": 0.94, "grad_norm": 7.142803175559164, "learning_rate": 8.657426148153656e-06, "loss": 1.0341, "step": 22325 }, { "epoch": 0.95, "grad_norm": 16.139827316661293, "learning_rate": 8.656586583635888e-06, "loss": 1.0633, "step": 22330 }, { "epoch": 0.95, "grad_norm": 15.591077376580577, "learning_rate": 8.655746797430047e-06, "loss": 1.033, "step": 22335 }, { "epoch": 0.95, "grad_norm": 10.795394436701578, "learning_rate": 8.654906789587048e-06, "loss": 1.0995, "step": 22340 }, { "epoch": 0.95, "grad_norm": 10.812144871268645, "learning_rate": 8.654066560157814e-06, "loss": 1.0595, "step": 22345 }, { "epoch": 0.95, "grad_norm": 7.381837875600517, "learning_rate": 8.653226109193289e-06, "loss": 1.0755, "step": 22350 }, { "epoch": 0.95, "grad_norm": 6.724953554206786, "learning_rate": 8.652385436744426e-06, "loss": 1.055, "step": 22355 }, { "epoch": 0.95, "grad_norm": 19.75986423849521, "learning_rate": 8.651544542862192e-06, "loss": 1.0178, "step": 22360 }, { "epoch": 0.95, "grad_norm": 14.122213502198173, "learning_rate": 8.650703427597568e-06, "loss": 1.0619, "step": 22365 }, { "epoch": 0.95, "grad_norm": 8.715685344258217, "learning_rate": 8.649862091001548e-06, "loss": 1.0256, "step": 22370 }, { "epoch": 0.95, "grad_norm": 25.253990303156407, "learning_rate": 8.649020533125142e-06, "loss": 1.0669, "step": 22375 }, { "epoch": 0.95, "grad_norm": 18.860335000019642, "learning_rate": 8.64817875401937e-06, "loss": 1.0242, "step": 22380 }, { "epoch": 0.95, "grad_norm": 6.2157275145466135, "learning_rate": 8.647336753735264e-06, "loss": 1.0696, "step": 22385 }, { "epoch": 0.95, "grad_norm": 16.67555358222662, "learning_rate": 8.646494532323876e-06, "loss": 1.0699, "step": 22390 }, { "epoch": 0.95, "grad_norm": 17.85617248554722, "learning_rate": 8.645652089836265e-06, "loss": 1.075, "step": 22395 }, { "epoch": 0.95, "grad_norm": 28.737897039503693, "learning_rate": 8.644809426323505e-06, "loss": 1.049, "step": 22400 }, { "epoch": 0.95, "grad_norm": 13.60442217055186, "learning_rate": 8.643966541836688e-06, "loss": 1.0492, "step": 22405 }, { "epoch": 0.95, "grad_norm": 12.916508236432474, "learning_rate": 8.643123436426912e-06, "loss": 1.0932, "step": 22410 }, { "epoch": 0.95, "grad_norm": 8.646594956507517, "learning_rate": 8.642280110145294e-06, "loss": 1.0767, "step": 22415 }, { "epoch": 0.95, "grad_norm": 4.448217132402646, "learning_rate": 8.641436563042961e-06, "loss": 1.0644, "step": 22420 }, { "epoch": 0.95, "grad_norm": 4.597621449451742, "learning_rate": 8.640592795171056e-06, "loss": 1.0572, "step": 22425 }, { "epoch": 0.95, "grad_norm": 5.789746424483436, "learning_rate": 8.639748806580732e-06, "loss": 1.0236, "step": 22430 }, { "epoch": 0.95, "grad_norm": 5.924856195661939, "learning_rate": 8.638904597323161e-06, "loss": 1.0459, "step": 22435 }, { "epoch": 0.95, "grad_norm": 10.299525784868806, "learning_rate": 8.638060167449522e-06, "loss": 1.0626, "step": 22440 }, { "epoch": 0.95, "grad_norm": 14.895224853085512, "learning_rate": 8.63721551701101e-06, "loss": 1.028, "step": 22445 }, { "epoch": 0.95, "grad_norm": 11.478161503300399, "learning_rate": 8.636370646058838e-06, "loss": 1.0458, "step": 22450 }, { "epoch": 0.95, "grad_norm": 17.274506746035797, "learning_rate": 8.635525554644224e-06, "loss": 1.0211, "step": 22455 }, { "epoch": 0.95, "grad_norm": 12.771203714954781, "learning_rate": 8.634680242818402e-06, "loss": 1.0848, "step": 22460 }, { "epoch": 0.95, "grad_norm": 6.557339811779501, "learning_rate": 8.633834710632626e-06, "loss": 1.0283, "step": 22465 }, { "epoch": 0.95, "grad_norm": 5.975748174040248, "learning_rate": 8.632988958138155e-06, "loss": 1.0806, "step": 22470 }, { "epoch": 0.95, "grad_norm": 12.3011142207786, "learning_rate": 8.632142985386263e-06, "loss": 1.0379, "step": 22475 }, { "epoch": 0.95, "grad_norm": 6.9229304586236555, "learning_rate": 8.631296792428242e-06, "loss": 1.0316, "step": 22480 }, { "epoch": 0.95, "grad_norm": 7.602299842531993, "learning_rate": 8.630450379315392e-06, "loss": 1.0479, "step": 22485 }, { "epoch": 0.95, "grad_norm": 6.920623212628911, "learning_rate": 8.629603746099032e-06, "loss": 1.0572, "step": 22490 }, { "epoch": 0.95, "grad_norm": 7.6966586530370416, "learning_rate": 8.628756892830485e-06, "loss": 1.0222, "step": 22495 }, { "epoch": 0.95, "grad_norm": 5.145376790688642, "learning_rate": 8.627909819561098e-06, "loss": 1.0162, "step": 22500 }, { "epoch": 0.95, "grad_norm": 5.611645023317141, "learning_rate": 8.627062526342225e-06, "loss": 1.0677, "step": 22505 }, { "epoch": 0.95, "grad_norm": 4.939565094224612, "learning_rate": 8.626215013225234e-06, "loss": 1.07, "step": 22510 }, { "epoch": 0.95, "grad_norm": 6.724080822028831, "learning_rate": 8.625367280261508e-06, "loss": 1.0568, "step": 22515 }, { "epoch": 0.95, "grad_norm": 4.8959575722004764, "learning_rate": 8.624519327502444e-06, "loss": 1.0604, "step": 22520 }, { "epoch": 0.95, "grad_norm": 6.802583091897265, "learning_rate": 8.623671154999448e-06, "loss": 1.0278, "step": 22525 }, { "epoch": 0.95, "grad_norm": 8.773076392246919, "learning_rate": 8.622822762803944e-06, "loss": 1.0675, "step": 22530 }, { "epoch": 0.95, "grad_norm": 5.8317559020018885, "learning_rate": 8.621974150967367e-06, "loss": 1.0364, "step": 22535 }, { "epoch": 0.95, "grad_norm": 8.339746924570607, "learning_rate": 8.621125319541168e-06, "loss": 1.023, "step": 22540 }, { "epoch": 0.95, "grad_norm": 16.008673226914222, "learning_rate": 8.620276268576806e-06, "loss": 1.0415, "step": 22545 }, { "epoch": 0.95, "grad_norm": 9.598222235446073, "learning_rate": 8.619426998125758e-06, "loss": 1.0118, "step": 22550 }, { "epoch": 0.95, "grad_norm": 4.626896345472039, "learning_rate": 8.618577508239512e-06, "loss": 1.0377, "step": 22555 }, { "epoch": 0.95, "grad_norm": 9.368129709555259, "learning_rate": 8.617727798969571e-06, "loss": 1.0723, "step": 22560 }, { "epoch": 0.95, "grad_norm": 13.792895549901294, "learning_rate": 8.61687787036745e-06, "loss": 1.051, "step": 22565 }, { "epoch": 0.96, "grad_norm": 4.861500876880135, "learning_rate": 8.616027722484678e-06, "loss": 1.0185, "step": 22570 }, { "epoch": 0.96, "grad_norm": 5.978222204899534, "learning_rate": 8.615177355372794e-06, "loss": 1.034, "step": 22575 }, { "epoch": 0.96, "grad_norm": 5.727826171694468, "learning_rate": 8.614326769083359e-06, "loss": 1.0456, "step": 22580 }, { "epoch": 0.96, "grad_norm": 6.943462237602688, "learning_rate": 8.613475963667937e-06, "loss": 1.096, "step": 22585 }, { "epoch": 0.96, "grad_norm": 9.078749701164291, "learning_rate": 8.612624939178111e-06, "loss": 1.0733, "step": 22590 }, { "epoch": 0.96, "grad_norm": 23.593619568594583, "learning_rate": 8.611773695665477e-06, "loss": 1.0045, "step": 22595 }, { "epoch": 0.96, "grad_norm": 25.877109034643134, "learning_rate": 8.610922233181642e-06, "loss": 1.0181, "step": 22600 }, { "epoch": 0.96, "grad_norm": 6.4353597828378994, "learning_rate": 8.61007055177823e-06, "loss": 1.0615, "step": 22605 }, { "epoch": 0.96, "grad_norm": 17.499574040833348, "learning_rate": 8.609218651506872e-06, "loss": 1.013, "step": 22610 }, { "epoch": 0.96, "grad_norm": 13.659242476773333, "learning_rate": 8.60836653241922e-06, "loss": 1.0199, "step": 22615 }, { "epoch": 0.96, "grad_norm": 5.608400698627674, "learning_rate": 8.60751419456693e-06, "loss": 1.0506, "step": 22620 }, { "epoch": 0.96, "grad_norm": 8.702057135255766, "learning_rate": 8.606661638001686e-06, "loss": 1.0716, "step": 22625 }, { "epoch": 0.96, "grad_norm": 18.393959498606716, "learning_rate": 8.605808862775168e-06, "loss": 1.0154, "step": 22630 }, { "epoch": 0.96, "grad_norm": 20.833851670917028, "learning_rate": 8.60495586893908e-06, "loss": 0.9968, "step": 22635 }, { "epoch": 0.96, "grad_norm": 16.3987131118621, "learning_rate": 8.604102656545138e-06, "loss": 1.0248, "step": 22640 }, { "epoch": 0.96, "grad_norm": 25.58544936694752, "learning_rate": 8.603249225645066e-06, "loss": 1.0616, "step": 22645 }, { "epoch": 0.96, "grad_norm": 8.823068895473675, "learning_rate": 8.602395576290608e-06, "loss": 1.1135, "step": 22650 }, { "epoch": 0.96, "grad_norm": 11.178873329500044, "learning_rate": 8.601541708533517e-06, "loss": 1.0362, "step": 22655 }, { "epoch": 0.96, "grad_norm": 11.781944605070839, "learning_rate": 8.600687622425563e-06, "loss": 1.0649, "step": 22660 }, { "epoch": 0.96, "grad_norm": 5.097052698035258, "learning_rate": 8.599833318018522e-06, "loss": 1.0343, "step": 22665 }, { "epoch": 0.96, "grad_norm": 9.273011000979423, "learning_rate": 8.598978795364192e-06, "loss": 1.021, "step": 22670 }, { "epoch": 0.96, "grad_norm": 14.48210656093624, "learning_rate": 8.598124054514379e-06, "loss": 1.0797, "step": 22675 }, { "epoch": 0.96, "grad_norm": 6.864921184004051, "learning_rate": 8.597269095520902e-06, "loss": 1.0049, "step": 22680 }, { "epoch": 0.96, "grad_norm": 10.527082078116695, "learning_rate": 8.596413918435595e-06, "loss": 1.0382, "step": 22685 }, { "epoch": 0.96, "grad_norm": 12.188234754386398, "learning_rate": 8.595558523310307e-06, "loss": 1.0364, "step": 22690 }, { "epoch": 0.96, "grad_norm": 18.7546662979122, "learning_rate": 8.594702910196899e-06, "loss": 1.0623, "step": 22695 }, { "epoch": 0.96, "grad_norm": 20.44543714615951, "learning_rate": 8.593847079147239e-06, "loss": 1.0609, "step": 22700 }, { "epoch": 0.96, "grad_norm": 18.597882662544343, "learning_rate": 8.592991030213217e-06, "loss": 1.0423, "step": 22705 }, { "epoch": 0.96, "grad_norm": 9.733908470621946, "learning_rate": 8.592134763446733e-06, "loss": 1.0656, "step": 22710 }, { "epoch": 0.96, "grad_norm": 11.51079960986976, "learning_rate": 8.5912782788997e-06, "loss": 1.1192, "step": 22715 }, { "epoch": 0.96, "grad_norm": 5.5463955593727405, "learning_rate": 8.590421576624042e-06, "loss": 1.0244, "step": 22720 }, { "epoch": 0.96, "grad_norm": 5.199460656469777, "learning_rate": 8.589564656671701e-06, "loss": 1.0145, "step": 22725 }, { "epoch": 0.96, "grad_norm": 4.914944971914244, "learning_rate": 8.588707519094628e-06, "loss": 1.0361, "step": 22730 }, { "epoch": 0.96, "grad_norm": 15.26421108734658, "learning_rate": 8.587850163944788e-06, "loss": 1.0387, "step": 22735 }, { "epoch": 0.96, "grad_norm": 7.116683932140762, "learning_rate": 8.586992591274162e-06, "loss": 1.0479, "step": 22740 }, { "epoch": 0.96, "grad_norm": 11.364069259977557, "learning_rate": 8.586134801134741e-06, "loss": 1.024, "step": 22745 }, { "epoch": 0.96, "grad_norm": 7.5665919065188785, "learning_rate": 8.58527679357853e-06, "loss": 1.0422, "step": 22750 }, { "epoch": 0.96, "grad_norm": 5.114018625468076, "learning_rate": 8.584418568657547e-06, "loss": 1.0539, "step": 22755 }, { "epoch": 0.96, "grad_norm": 14.03998921719011, "learning_rate": 8.583560126423827e-06, "loss": 1.0758, "step": 22760 }, { "epoch": 0.96, "grad_norm": 19.54082984279, "learning_rate": 8.58270146692941e-06, "loss": 1.057, "step": 22765 }, { "epoch": 0.96, "grad_norm": 6.759790183732632, "learning_rate": 8.581842590226355e-06, "loss": 0.986, "step": 22770 }, { "epoch": 0.96, "grad_norm": 10.08746887017593, "learning_rate": 8.580983496366736e-06, "loss": 1.0062, "step": 22775 }, { "epoch": 0.96, "grad_norm": 14.319336390442473, "learning_rate": 8.580124185402636e-06, "loss": 1.0391, "step": 22780 }, { "epoch": 0.96, "grad_norm": 11.715863968001175, "learning_rate": 8.579264657386152e-06, "loss": 1.0558, "step": 22785 }, { "epoch": 0.96, "grad_norm": 4.9620813893898434, "learning_rate": 8.578404912369396e-06, "loss": 1.0364, "step": 22790 }, { "epoch": 0.96, "grad_norm": 5.853247406950727, "learning_rate": 8.57754495040449e-06, "loss": 1.0449, "step": 22795 }, { "epoch": 0.96, "grad_norm": 4.9325868163133855, "learning_rate": 8.57668477154357e-06, "loss": 1.0235, "step": 22800 }, { "epoch": 0.97, "grad_norm": 14.217207569081221, "learning_rate": 8.57582437583879e-06, "loss": 1.0196, "step": 22805 }, { "epoch": 0.97, "grad_norm": 4.6218494476714085, "learning_rate": 8.57496376334231e-06, "loss": 1.0021, "step": 22810 }, { "epoch": 0.97, "grad_norm": 5.609844277642452, "learning_rate": 8.574102934106308e-06, "loss": 1.0401, "step": 22815 }, { "epoch": 0.97, "grad_norm": 10.298393187484265, "learning_rate": 8.573241888182973e-06, "loss": 1.0818, "step": 22820 }, { "epoch": 0.97, "grad_norm": 6.640461642732947, "learning_rate": 8.572380625624506e-06, "loss": 1.0781, "step": 22825 }, { "epoch": 0.97, "grad_norm": 5.461278443647827, "learning_rate": 8.571519146483128e-06, "loss": 1.0423, "step": 22830 }, { "epoch": 0.97, "grad_norm": 14.584826313438121, "learning_rate": 8.57065745081106e-06, "loss": 1.0371, "step": 22835 }, { "epoch": 0.97, "grad_norm": 5.432323586901132, "learning_rate": 8.56979553866055e-06, "loss": 1.0653, "step": 22840 }, { "epoch": 0.97, "grad_norm": 10.274285011381062, "learning_rate": 8.568933410083853e-06, "loss": 1.0197, "step": 22845 }, { "epoch": 0.97, "grad_norm": 6.258067392149234, "learning_rate": 8.568071065133236e-06, "loss": 1.0112, "step": 22850 }, { "epoch": 0.97, "grad_norm": 5.439167452221515, "learning_rate": 8.56720850386098e-06, "loss": 1.0398, "step": 22855 }, { "epoch": 0.97, "grad_norm": 8.939122885874223, "learning_rate": 8.566345726319378e-06, "loss": 1.0363, "step": 22860 }, { "epoch": 0.97, "grad_norm": 5.57590426612838, "learning_rate": 8.565482732560741e-06, "loss": 0.9979, "step": 22865 }, { "epoch": 0.97, "grad_norm": 5.291005880357732, "learning_rate": 8.56461952263739e-06, "loss": 1.0466, "step": 22870 }, { "epoch": 0.97, "grad_norm": 7.933556562433127, "learning_rate": 8.563756096601655e-06, "loss": 1.0146, "step": 22875 }, { "epoch": 0.97, "grad_norm": 4.7314152230084545, "learning_rate": 8.562892454505886e-06, "loss": 1.0047, "step": 22880 }, { "epoch": 0.97, "grad_norm": 5.589110578264134, "learning_rate": 8.562028596402443e-06, "loss": 0.9974, "step": 22885 }, { "epoch": 0.97, "grad_norm": 5.708398943177179, "learning_rate": 8.561164522343697e-06, "loss": 1.0338, "step": 22890 }, { "epoch": 0.97, "grad_norm": 6.799722849333198, "learning_rate": 8.560300232382037e-06, "loss": 1.0386, "step": 22895 }, { "epoch": 0.97, "grad_norm": 6.503503606800315, "learning_rate": 8.559435726569862e-06, "loss": 1.058, "step": 22900 }, { "epoch": 0.97, "grad_norm": 4.459599596724459, "learning_rate": 8.558571004959582e-06, "loss": 0.9967, "step": 22905 }, { "epoch": 0.97, "grad_norm": 13.447624775927896, "learning_rate": 8.557706067603623e-06, "loss": 1.0293, "step": 22910 }, { "epoch": 0.97, "grad_norm": 8.15624737203624, "learning_rate": 8.556840914554428e-06, "loss": 1.0353, "step": 22915 }, { "epoch": 0.97, "grad_norm": 16.9682109648794, "learning_rate": 8.555975545864444e-06, "loss": 0.9997, "step": 22920 }, { "epoch": 0.97, "grad_norm": 29.099404126642884, "learning_rate": 8.555109961586137e-06, "loss": 1.0416, "step": 22925 }, { "epoch": 0.97, "grad_norm": 5.2843312621134935, "learning_rate": 8.554244161771982e-06, "loss": 1.0187, "step": 22930 }, { "epoch": 0.97, "grad_norm": 5.78562255681709, "learning_rate": 8.553378146474477e-06, "loss": 1.0735, "step": 22935 }, { "epoch": 0.97, "grad_norm": 11.797934466693091, "learning_rate": 8.55251191574612e-06, "loss": 1.078, "step": 22940 }, { "epoch": 0.97, "grad_norm": 6.736168536629084, "learning_rate": 8.55164546963943e-06, "loss": 1.0345, "step": 22945 }, { "epoch": 0.97, "grad_norm": 7.937905640880309, "learning_rate": 8.550778808206937e-06, "loss": 1.0122, "step": 22950 }, { "epoch": 0.97, "grad_norm": 10.165484689090224, "learning_rate": 8.549911931501185e-06, "loss": 1.0478, "step": 22955 }, { "epoch": 0.97, "grad_norm": 4.9071265823273125, "learning_rate": 8.549044839574727e-06, "loss": 1.0159, "step": 22960 }, { "epoch": 0.97, "grad_norm": 6.3988932790998545, "learning_rate": 8.548177532480135e-06, "loss": 1.0035, "step": 22965 }, { "epoch": 0.97, "grad_norm": 14.548916511792239, "learning_rate": 8.547310010269993e-06, "loss": 1.0499, "step": 22970 }, { "epoch": 0.97, "grad_norm": 6.064724224720091, "learning_rate": 8.546442272996891e-06, "loss": 1.0223, "step": 22975 }, { "epoch": 0.97, "grad_norm": 6.0437475407919905, "learning_rate": 8.545574320713442e-06, "loss": 1.0337, "step": 22980 }, { "epoch": 0.97, "grad_norm": 6.2573158804506805, "learning_rate": 8.544706153472266e-06, "loss": 1.0309, "step": 22985 }, { "epoch": 0.97, "grad_norm": 16.754930512289445, "learning_rate": 8.543837771325997e-06, "loss": 1.0839, "step": 22990 }, { "epoch": 0.97, "grad_norm": 16.255288409879782, "learning_rate": 8.542969174327283e-06, "loss": 1.0398, "step": 22995 }, { "epoch": 0.97, "grad_norm": 15.428333095801877, "learning_rate": 8.542100362528783e-06, "loss": 1.0421, "step": 23000 }, { "epoch": 0.97, "grad_norm": 21.36290421824013, "learning_rate": 8.541231335983171e-06, "loss": 0.9987, "step": 23005 }, { "epoch": 0.97, "grad_norm": 11.899910202340994, "learning_rate": 8.540362094743135e-06, "loss": 1.0552, "step": 23010 }, { "epoch": 0.97, "grad_norm": 9.980222913180013, "learning_rate": 8.539492638861375e-06, "loss": 1.0167, "step": 23015 }, { "epoch": 0.97, "grad_norm": 18.49719666601898, "learning_rate": 8.5386229683906e-06, "loss": 1.0419, "step": 23020 }, { "epoch": 0.97, "grad_norm": 10.104077917230324, "learning_rate": 8.537753083383539e-06, "loss": 1.0512, "step": 23025 }, { "epoch": 0.97, "grad_norm": 4.7957468631237115, "learning_rate": 8.53688298389293e-06, "loss": 1.0305, "step": 23030 }, { "epoch": 0.97, "grad_norm": 16.937029322802367, "learning_rate": 8.536012669971523e-06, "loss": 1.0241, "step": 23035 }, { "epoch": 0.98, "grad_norm": 6.726734339725752, "learning_rate": 8.535142141672083e-06, "loss": 1.0711, "step": 23040 }, { "epoch": 0.98, "grad_norm": 9.590678251498707, "learning_rate": 8.534271399047389e-06, "loss": 1.0331, "step": 23045 }, { "epoch": 0.98, "grad_norm": 27.443177665631616, "learning_rate": 8.53340044215023e-06, "loss": 1.057, "step": 23050 }, { "epoch": 0.98, "grad_norm": 8.709461693344375, "learning_rate": 8.532529271033412e-06, "loss": 1.0476, "step": 23055 }, { "epoch": 0.98, "grad_norm": 10.52151672361176, "learning_rate": 8.531657885749747e-06, "loss": 1.05, "step": 23060 }, { "epoch": 0.98, "grad_norm": 28.16659603080454, "learning_rate": 8.530786286352068e-06, "loss": 1.0471, "step": 23065 }, { "epoch": 0.98, "grad_norm": 8.819881132559455, "learning_rate": 8.529914472893217e-06, "loss": 0.9943, "step": 23070 }, { "epoch": 0.98, "grad_norm": 15.970063304840865, "learning_rate": 8.52904244542605e-06, "loss": 1.0482, "step": 23075 }, { "epoch": 0.98, "grad_norm": 5.066594950420572, "learning_rate": 8.528170204003434e-06, "loss": 1.0257, "step": 23080 }, { "epoch": 0.98, "grad_norm": 12.304265522139255, "learning_rate": 8.527297748678252e-06, "loss": 1.0259, "step": 23085 }, { "epoch": 0.98, "grad_norm": 16.77287028967692, "learning_rate": 8.526425079503395e-06, "loss": 1.0389, "step": 23090 }, { "epoch": 0.98, "grad_norm": 9.90383393067753, "learning_rate": 8.525552196531775e-06, "loss": 1.0915, "step": 23095 }, { "epoch": 0.98, "grad_norm": 7.969770022144005, "learning_rate": 8.524679099816308e-06, "loss": 1.0145, "step": 23100 }, { "epoch": 0.98, "grad_norm": 14.615720422693036, "learning_rate": 8.523805789409933e-06, "loss": 1.0382, "step": 23105 }, { "epoch": 0.98, "grad_norm": 8.896574009718107, "learning_rate": 8.52293226536559e-06, "loss": 1.0231, "step": 23110 }, { "epoch": 0.98, "grad_norm": 5.03820018486783, "learning_rate": 8.522058527736242e-06, "loss": 1.0995, "step": 23115 }, { "epoch": 0.98, "grad_norm": 4.7266038124399055, "learning_rate": 8.52118457657486e-06, "loss": 1.0219, "step": 23120 }, { "epoch": 0.98, "grad_norm": 5.03469004883265, "learning_rate": 8.520310411934427e-06, "loss": 1.0416, "step": 23125 }, { "epoch": 0.98, "grad_norm": 6.051595793411523, "learning_rate": 8.519436033867947e-06, "loss": 0.9894, "step": 23130 }, { "epoch": 0.98, "grad_norm": 5.454713193809196, "learning_rate": 8.518561442428424e-06, "loss": 1.003, "step": 23135 }, { "epoch": 0.98, "grad_norm": 11.884689564512003, "learning_rate": 8.517686637668888e-06, "loss": 1.0639, "step": 23140 }, { "epoch": 0.98, "grad_norm": 11.012427419432159, "learning_rate": 8.51681161964237e-06, "loss": 1.0001, "step": 23145 }, { "epoch": 0.98, "grad_norm": 9.043031054799105, "learning_rate": 8.515936388401924e-06, "loss": 1.0307, "step": 23150 }, { "epoch": 0.98, "grad_norm": 5.265952046332991, "learning_rate": 8.51506094400061e-06, "loss": 1.0292, "step": 23155 }, { "epoch": 0.98, "grad_norm": 5.608910602343494, "learning_rate": 8.514185286491506e-06, "loss": 1.0632, "step": 23160 }, { "epoch": 0.98, "grad_norm": 5.752875066980779, "learning_rate": 8.5133094159277e-06, "loss": 1.0472, "step": 23165 }, { "epoch": 0.98, "grad_norm": 6.073088789417399, "learning_rate": 8.512433332362291e-06, "loss": 1.0378, "step": 23170 }, { "epoch": 0.98, "grad_norm": 7.039947304869808, "learning_rate": 8.511557035848397e-06, "loss": 1.0537, "step": 23175 }, { "epoch": 0.98, "grad_norm": 4.8651125297603715, "learning_rate": 8.510680526439142e-06, "loss": 1.0348, "step": 23180 }, { "epoch": 0.98, "grad_norm": 15.136058309539601, "learning_rate": 8.509803804187669e-06, "loss": 1.0417, "step": 23185 }, { "epoch": 0.98, "grad_norm": 9.431586827139782, "learning_rate": 8.50892686914713e-06, "loss": 1.0127, "step": 23190 }, { "epoch": 0.98, "grad_norm": 4.9659224370651085, "learning_rate": 8.50804972137069e-06, "loss": 1.0283, "step": 23195 }, { "epoch": 0.98, "grad_norm": 12.362531898597064, "learning_rate": 8.507172360911529e-06, "loss": 1.0001, "step": 23200 }, { "epoch": 0.98, "grad_norm": 20.03845547654598, "learning_rate": 8.506294787822837e-06, "loss": 1.0498, "step": 23205 }, { "epoch": 0.98, "grad_norm": 4.951606531113273, "learning_rate": 8.505417002157823e-06, "loss": 0.9917, "step": 23210 }, { "epoch": 0.98, "grad_norm": 5.601171206012808, "learning_rate": 8.504539003969701e-06, "loss": 1.0504, "step": 23215 }, { "epoch": 0.98, "grad_norm": 9.241876040040765, "learning_rate": 8.5036607933117e-06, "loss": 1.0188, "step": 23220 }, { "epoch": 0.98, "grad_norm": 17.39617153236078, "learning_rate": 8.502782370237068e-06, "loss": 1.0297, "step": 23225 }, { "epoch": 0.98, "grad_norm": 19.921580882180873, "learning_rate": 8.501903734799058e-06, "loss": 1.0572, "step": 23230 }, { "epoch": 0.98, "grad_norm": 6.593051124968328, "learning_rate": 8.501024887050939e-06, "loss": 1.0269, "step": 23235 }, { "epoch": 0.98, "grad_norm": 18.783322251033454, "learning_rate": 8.500145827045994e-06, "loss": 1.0638, "step": 23240 }, { "epoch": 0.98, "grad_norm": 5.308844649106488, "learning_rate": 8.499266554837518e-06, "loss": 0.9901, "step": 23245 }, { "epoch": 0.98, "grad_norm": 14.12850652294799, "learning_rate": 8.498387070478816e-06, "loss": 1.0395, "step": 23250 }, { "epoch": 0.98, "grad_norm": 17.50235764908691, "learning_rate": 8.497507374023212e-06, "loss": 1.0566, "step": 23255 }, { "epoch": 0.98, "grad_norm": 4.817943554467996, "learning_rate": 8.49662746552404e-06, "loss": 1.0042, "step": 23260 }, { "epoch": 0.98, "grad_norm": 10.51947684277591, "learning_rate": 8.495747345034644e-06, "loss": 1.0333, "step": 23265 }, { "epoch": 0.98, "grad_norm": 7.151864044990613, "learning_rate": 8.49486701260838e-06, "loss": 1.0171, "step": 23270 }, { "epoch": 0.99, "grad_norm": 8.793646968134784, "learning_rate": 8.493986468298626e-06, "loss": 0.9703, "step": 23275 }, { "epoch": 0.99, "grad_norm": 5.268255583096469, "learning_rate": 8.493105712158767e-06, "loss": 0.9962, "step": 23280 }, { "epoch": 0.99, "grad_norm": 5.290962586119373, "learning_rate": 8.492224744242194e-06, "loss": 1.0246, "step": 23285 }, { "epoch": 0.99, "grad_norm": 6.521967868598463, "learning_rate": 8.491343564602324e-06, "loss": 1.0415, "step": 23290 }, { "epoch": 0.99, "grad_norm": 5.948269786217876, "learning_rate": 8.490462173292576e-06, "loss": 1.0004, "step": 23295 }, { "epoch": 0.99, "grad_norm": 7.609054069180209, "learning_rate": 8.489580570366389e-06, "loss": 1.046, "step": 23300 }, { "epoch": 0.99, "grad_norm": 27.12831081982543, "learning_rate": 8.48869875587721e-06, "loss": 1.0055, "step": 23305 }, { "epoch": 0.99, "grad_norm": 7.078232057142618, "learning_rate": 8.487816729878501e-06, "loss": 1.043, "step": 23310 }, { "epoch": 0.99, "grad_norm": 9.08809490334739, "learning_rate": 8.48693449242374e-06, "loss": 1.0341, "step": 23315 }, { "epoch": 0.99, "grad_norm": 12.181579037380805, "learning_rate": 8.48605204356641e-06, "loss": 0.9927, "step": 23320 }, { "epoch": 0.99, "grad_norm": 19.513917064074878, "learning_rate": 8.485169383360014e-06, "loss": 0.9949, "step": 23325 }, { "epoch": 0.99, "grad_norm": 11.671188730020175, "learning_rate": 8.484286511858062e-06, "loss": 1.0067, "step": 23330 }, { "epoch": 0.99, "grad_norm": 15.877637792063496, "learning_rate": 8.483403429114085e-06, "loss": 1.0481, "step": 23335 }, { "epoch": 0.99, "grad_norm": 28.355402625944034, "learning_rate": 8.482520135181617e-06, "loss": 1.0114, "step": 23340 }, { "epoch": 0.99, "grad_norm": 32.307020953693794, "learning_rate": 8.481636630114212e-06, "loss": 1.0086, "step": 23345 }, { "epoch": 0.99, "grad_norm": 22.947329093412538, "learning_rate": 8.480752913965434e-06, "loss": 1.0109, "step": 23350 }, { "epoch": 0.99, "grad_norm": 6.741749530268085, "learning_rate": 8.479868986788858e-06, "loss": 1.0693, "step": 23355 }, { "epoch": 0.99, "grad_norm": 6.808812079773028, "learning_rate": 8.478984848638077e-06, "loss": 0.9898, "step": 23360 }, { "epoch": 0.99, "grad_norm": 8.817170577309794, "learning_rate": 8.478100499566693e-06, "loss": 1.0201, "step": 23365 }, { "epoch": 0.99, "grad_norm": 14.042753637957894, "learning_rate": 8.47721593962832e-06, "loss": 1.0125, "step": 23370 }, { "epoch": 0.99, "grad_norm": 14.082615930342339, "learning_rate": 8.476331168876588e-06, "loss": 1.0727, "step": 23375 }, { "epoch": 0.99, "grad_norm": 8.407795793199803, "learning_rate": 8.475446187365136e-06, "loss": 1.0423, "step": 23380 }, { "epoch": 0.99, "grad_norm": 6.912832440040569, "learning_rate": 8.474560995147618e-06, "loss": 1.0174, "step": 23385 }, { "epoch": 0.99, "grad_norm": 13.54450292386654, "learning_rate": 8.473675592277702e-06, "loss": 1.0222, "step": 23390 }, { "epoch": 0.99, "grad_norm": 7.27306608562583, "learning_rate": 8.47278997880907e-06, "loss": 1.0343, "step": 23395 }, { "epoch": 0.99, "grad_norm": 8.273221841701524, "learning_rate": 8.471904154795407e-06, "loss": 1.0499, "step": 23400 }, { "epoch": 0.99, "grad_norm": 9.892920524724065, "learning_rate": 8.471018120290422e-06, "loss": 1.0146, "step": 23405 }, { "epoch": 0.99, "grad_norm": 14.493540436041627, "learning_rate": 8.470131875347834e-06, "loss": 1.0415, "step": 23410 }, { "epoch": 0.99, "grad_norm": 17.336811305379513, "learning_rate": 8.469245420021373e-06, "loss": 1.0267, "step": 23415 }, { "epoch": 0.99, "grad_norm": 5.635234002713541, "learning_rate": 8.46835875436478e-06, "loss": 1.023, "step": 23420 }, { "epoch": 0.99, "grad_norm": 7.00175757248925, "learning_rate": 8.467471878431812e-06, "loss": 1.022, "step": 23425 }, { "epoch": 0.99, "grad_norm": 10.20532814275073, "learning_rate": 8.466584792276237e-06, "loss": 1.0721, "step": 23430 }, { "epoch": 0.99, "grad_norm": 11.8541351706445, "learning_rate": 8.465697495951839e-06, "loss": 0.9971, "step": 23435 }, { "epoch": 0.99, "grad_norm": 7.010403253158212, "learning_rate": 8.464809989512409e-06, "loss": 1.0142, "step": 23440 }, { "epoch": 0.99, "grad_norm": 5.404646170758126, "learning_rate": 8.463922273011755e-06, "loss": 1.0646, "step": 23445 }, { "epoch": 0.99, "grad_norm": 5.467273917314427, "learning_rate": 8.463034346503697e-06, "loss": 1.0091, "step": 23450 }, { "epoch": 0.99, "grad_norm": 5.875605929914122, "learning_rate": 8.46214621004207e-06, "loss": 1.0182, "step": 23455 }, { "epoch": 0.99, "grad_norm": 11.200797996398627, "learning_rate": 8.461257863680715e-06, "loss": 0.9846, "step": 23460 }, { "epoch": 0.99, "grad_norm": 9.054734007179876, "learning_rate": 8.46036930747349e-06, "loss": 1.0155, "step": 23465 }, { "epoch": 0.99, "grad_norm": 8.136718430267301, "learning_rate": 8.459480541474268e-06, "loss": 1.0221, "step": 23470 }, { "epoch": 0.99, "grad_norm": 13.302955018043857, "learning_rate": 8.45859156573693e-06, "loss": 1.0227, "step": 23475 }, { "epoch": 0.99, "grad_norm": 11.30483994874588, "learning_rate": 8.457702380315373e-06, "loss": 1.0444, "step": 23480 }, { "epoch": 0.99, "grad_norm": 6.413382685398307, "learning_rate": 8.456812985263506e-06, "loss": 1.0122, "step": 23485 }, { "epoch": 0.99, "grad_norm": 5.459691452536524, "learning_rate": 8.455923380635249e-06, "loss": 1.0255, "step": 23490 }, { "epoch": 0.99, "grad_norm": 19.29235383257498, "learning_rate": 8.455033566484539e-06, "loss": 1.0588, "step": 23495 }, { "epoch": 0.99, "grad_norm": 38.757148731564264, "learning_rate": 8.45414354286532e-06, "loss": 1.0682, "step": 23500 }, { "epoch": 0.99, "grad_norm": 16.13836724915053, "learning_rate": 8.453253309831553e-06, "loss": 1.0186, "step": 23505 }, { "epoch": 0.99, "grad_norm": 12.270015011940659, "learning_rate": 8.452362867437209e-06, "loss": 1.0316, "step": 23510 }, { "epoch": 1.0, "grad_norm": 6.937654674803527, "learning_rate": 8.451472215736273e-06, "loss": 1.0214, "step": 23515 }, { "epoch": 1.0, "grad_norm": 14.76738521883225, "learning_rate": 8.450581354782745e-06, "loss": 1.0293, "step": 23520 }, { "epoch": 1.0, "grad_norm": 13.023266108812958, "learning_rate": 8.449690284630633e-06, "loss": 1.036, "step": 23525 }, { "epoch": 1.0, "grad_norm": 8.959896773654664, "learning_rate": 8.448799005333961e-06, "loss": 1.0692, "step": 23530 }, { "epoch": 1.0, "grad_norm": 6.580881282273475, "learning_rate": 8.447907516946764e-06, "loss": 1.0389, "step": 23535 }, { "epoch": 1.0, "grad_norm": 9.030960711324296, "learning_rate": 8.44701581952309e-06, "loss": 0.9856, "step": 23540 }, { "epoch": 1.0, "grad_norm": 21.20139650692982, "learning_rate": 8.446123913116999e-06, "loss": 1.0042, "step": 23545 }, { "epoch": 1.0, "grad_norm": 7.68008802117942, "learning_rate": 8.445231797782567e-06, "loss": 1.0087, "step": 23550 }, { "epoch": 1.0, "grad_norm": 15.610982203593359, "learning_rate": 8.44433947357388e-06, "loss": 1.0116, "step": 23555 }, { "epoch": 1.0, "grad_norm": 9.94457946694081, "learning_rate": 8.443446940545036e-06, "loss": 1.0129, "step": 23560 }, { "epoch": 1.0, "grad_norm": 7.809800318645303, "learning_rate": 8.442554198750145e-06, "loss": 1.0405, "step": 23565 }, { "epoch": 1.0, "grad_norm": 8.147377555924988, "learning_rate": 8.441661248243337e-06, "loss": 1.0282, "step": 23570 }, { "epoch": 1.0, "grad_norm": 5.115743486277359, "learning_rate": 8.440768089078743e-06, "loss": 1.0095, "step": 23575 }, { "epoch": 1.0, "grad_norm": 5.05918066235286, "learning_rate": 8.439874721310514e-06, "loss": 1.0088, "step": 23580 }, { "epoch": 1.0, "grad_norm": 7.611997174602991, "learning_rate": 8.438981144992814e-06, "loss": 1.0275, "step": 23585 }, { "epoch": 1.0, "grad_norm": 10.143948913638901, "learning_rate": 8.438087360179818e-06, "loss": 1.0135, "step": 23590 }, { "epoch": 1.0, "grad_norm": 5.9681691944224085, "learning_rate": 8.437193366925713e-06, "loss": 1.0233, "step": 23595 }, { "epoch": 1.0, "grad_norm": 4.939220956826862, "learning_rate": 8.436299165284698e-06, "loss": 1.0449, "step": 23600 }, { "epoch": 1.0, "grad_norm": 8.410859961631028, "learning_rate": 8.435404755310985e-06, "loss": 1.0485, "step": 23605 }, { "epoch": 1.0, "grad_norm": 5.554943205074981, "learning_rate": 8.434510137058804e-06, "loss": 1.0411, "step": 23610 }, { "epoch": 1.0, "grad_norm": 7.287320141489989, "learning_rate": 8.433615310582388e-06, "loss": 1.0516, "step": 23615 }, { "epoch": 1.0, "grad_norm": 5.950023929476283, "learning_rate": 8.43272027593599e-06, "loss": 1.0374, "step": 23620 }, { "epoch": 1.0, "grad_norm": 8.932865995739235, "learning_rate": 8.431825033173874e-06, "loss": 1.0034, "step": 23625 }, { "epoch": 1.0, "eval_loss": 1.063048243522644, "eval_runtime": 37.3271, "eval_samples_per_second": 31.318, "eval_steps_per_second": 3.938, "step": 23628 }, { "epoch": 1.0, "grad_norm": 7.47134981684741, "learning_rate": 8.430929582350313e-06, "loss": 0.9972, "step": 23630 }, { "epoch": 1.0, "grad_norm": 11.484846539009633, "learning_rate": 8.4300339235196e-06, "loss": 0.8836, "step": 23635 }, { "epoch": 1.0, "grad_norm": 4.735414921089642, "learning_rate": 8.429138056736033e-06, "loss": 0.9112, "step": 23640 }, { "epoch": 1.0, "grad_norm": 5.9708377499865, "learning_rate": 8.428241982053925e-06, "loss": 0.8925, "step": 23645 }, { "epoch": 1.0, "grad_norm": 8.24996479615225, "learning_rate": 8.427345699527605e-06, "loss": 0.9281, "step": 23650 }, { "epoch": 1.0, "grad_norm": 4.352222500245511, "learning_rate": 8.42644920921141e-06, "loss": 0.7964, "step": 23655 }, { "epoch": 1.0, "grad_norm": 10.509460803468615, "learning_rate": 8.425552511159694e-06, "loss": 0.8545, "step": 23660 }, { "epoch": 1.0, "grad_norm": 4.492095378442041, "learning_rate": 8.424655605426819e-06, "loss": 0.8666, "step": 23665 }, { "epoch": 1.0, "grad_norm": 5.613763619418842, "learning_rate": 8.42375849206716e-06, "loss": 0.928, "step": 23670 }, { "epoch": 1.0, "grad_norm": 18.052390896206354, "learning_rate": 8.422861171135111e-06, "loss": 0.8648, "step": 23675 }, { "epoch": 1.0, "grad_norm": 8.554035526215605, "learning_rate": 8.421963642685072e-06, "loss": 0.9003, "step": 23680 }, { "epoch": 1.0, "grad_norm": 4.701076948439349, "learning_rate": 8.421065906771454e-06, "loss": 0.8336, "step": 23685 }, { "epoch": 1.0, "grad_norm": 10.894132029624963, "learning_rate": 8.42016796344869e-06, "loss": 0.8511, "step": 23690 }, { "epoch": 1.0, "grad_norm": 11.075267667723061, "learning_rate": 8.419269812771216e-06, "loss": 0.9159, "step": 23695 }, { "epoch": 1.0, "grad_norm": 19.865335345987205, "learning_rate": 8.418371454793487e-06, "loss": 0.8974, "step": 23700 }, { "epoch": 1.0, "grad_norm": 12.09374081122974, "learning_rate": 8.417472889569965e-06, "loss": 0.9152, "step": 23705 }, { "epoch": 1.0, "grad_norm": 8.109418472952596, "learning_rate": 8.416574117155125e-06, "loss": 0.8536, "step": 23710 }, { "epoch": 1.0, "grad_norm": 8.715071149704299, "learning_rate": 8.415675137603463e-06, "loss": 0.8788, "step": 23715 }, { "epoch": 1.0, "grad_norm": 15.849180203379067, "learning_rate": 8.414775950969478e-06, "loss": 0.8406, "step": 23720 }, { "epoch": 1.0, "grad_norm": 12.692252844840338, "learning_rate": 8.413876557307686e-06, "loss": 0.8736, "step": 23725 }, { "epoch": 1.0, "grad_norm": 18.299949279276852, "learning_rate": 8.412976956672614e-06, "loss": 0.885, "step": 23730 }, { "epoch": 1.0, "grad_norm": 8.010611493789524, "learning_rate": 8.412077149118801e-06, "loss": 0.8484, "step": 23735 }, { "epoch": 1.0, "grad_norm": 5.596850204273611, "learning_rate": 8.411177134700802e-06, "loss": 0.8757, "step": 23740 }, { "epoch": 1.0, "grad_norm": 4.284237471238228, "learning_rate": 8.41027691347318e-06, "loss": 0.8257, "step": 23745 }, { "epoch": 1.01, "grad_norm": 5.74659215368054, "learning_rate": 8.409376485490517e-06, "loss": 0.872, "step": 23750 }, { "epoch": 1.01, "grad_norm": 5.976388229287632, "learning_rate": 8.4084758508074e-06, "loss": 0.8681, "step": 23755 }, { "epoch": 1.01, "grad_norm": 10.09592169311273, "learning_rate": 8.40757500947843e-06, "loss": 0.8713, "step": 23760 }, { "epoch": 1.01, "grad_norm": 10.861659943765588, "learning_rate": 8.406673961558225e-06, "loss": 0.8717, "step": 23765 }, { "epoch": 1.01, "grad_norm": 5.276349622542162, "learning_rate": 8.405772707101414e-06, "loss": 0.853, "step": 23770 }, { "epoch": 1.01, "grad_norm": 7.178001037523651, "learning_rate": 8.404871246162635e-06, "loss": 0.9112, "step": 23775 }, { "epoch": 1.01, "grad_norm": 16.444284443936446, "learning_rate": 8.403969578796542e-06, "loss": 0.8815, "step": 23780 }, { "epoch": 1.01, "grad_norm": 6.544319636872853, "learning_rate": 8.4030677050578e-06, "loss": 0.9059, "step": 23785 }, { "epoch": 1.01, "grad_norm": 6.0101367787362845, "learning_rate": 8.402165625001087e-06, "loss": 0.8507, "step": 23790 }, { "epoch": 1.01, "grad_norm": 5.235502305671002, "learning_rate": 8.401263338681092e-06, "loss": 0.8854, "step": 23795 }, { "epoch": 1.01, "grad_norm": 4.992561316105224, "learning_rate": 8.40036084615252e-06, "loss": 0.8775, "step": 23800 }, { "epoch": 1.01, "grad_norm": 15.58149670533829, "learning_rate": 8.399458147470088e-06, "loss": 0.8842, "step": 23805 }, { "epoch": 1.01, "grad_norm": 10.087206777389202, "learning_rate": 8.39855524268852e-06, "loss": 0.8692, "step": 23810 }, { "epoch": 1.01, "grad_norm": 5.162604667308198, "learning_rate": 8.397652131862558e-06, "loss": 0.8706, "step": 23815 }, { "epoch": 1.01, "grad_norm": 6.219265727431301, "learning_rate": 8.396748815046957e-06, "loss": 0.8956, "step": 23820 }, { "epoch": 1.01, "grad_norm": 6.046611143807226, "learning_rate": 8.39584529229648e-06, "loss": 0.8421, "step": 23825 }, { "epoch": 1.01, "grad_norm": 5.220776564853208, "learning_rate": 8.394941563665907e-06, "loss": 0.8411, "step": 23830 }, { "epoch": 1.01, "grad_norm": 9.533136360348138, "learning_rate": 8.394037629210025e-06, "loss": 0.852, "step": 23835 }, { "epoch": 1.01, "grad_norm": 6.349075164678321, "learning_rate": 8.393133488983641e-06, "loss": 0.9081, "step": 23840 }, { "epoch": 1.01, "grad_norm": 5.6848376405254655, "learning_rate": 8.392229143041568e-06, "loss": 0.8643, "step": 23845 }, { "epoch": 1.01, "grad_norm": 7.2565669840665885, "learning_rate": 8.391324591438634e-06, "loss": 0.8966, "step": 23850 }, { "epoch": 1.01, "grad_norm": 15.204771299936896, "learning_rate": 8.390419834229679e-06, "loss": 0.8765, "step": 23855 }, { "epoch": 1.01, "grad_norm": 17.42822410463676, "learning_rate": 8.389514871469558e-06, "loss": 0.868, "step": 23860 }, { "epoch": 1.01, "grad_norm": 8.026132565859108, "learning_rate": 8.388609703213132e-06, "loss": 0.8364, "step": 23865 }, { "epoch": 1.01, "grad_norm": 4.946876204519523, "learning_rate": 8.387704329515283e-06, "loss": 0.8676, "step": 23870 }, { "epoch": 1.01, "grad_norm": 9.11263701971295, "learning_rate": 8.386798750430898e-06, "loss": 0.9468, "step": 23875 }, { "epoch": 1.01, "grad_norm": 8.571974886967865, "learning_rate": 8.385892966014881e-06, "loss": 0.8369, "step": 23880 }, { "epoch": 1.01, "grad_norm": 7.907853064488422, "learning_rate": 8.384986976322147e-06, "loss": 0.8241, "step": 23885 }, { "epoch": 1.01, "grad_norm": 5.1588994651635, "learning_rate": 8.384080781407625e-06, "loss": 0.8467, "step": 23890 }, { "epoch": 1.01, "grad_norm": 4.9195596915901065, "learning_rate": 8.38317438132625e-06, "loss": 0.865, "step": 23895 }, { "epoch": 1.01, "grad_norm": 4.6919349248440545, "learning_rate": 8.38226777613298e-06, "loss": 0.8336, "step": 23900 }, { "epoch": 1.01, "grad_norm": 10.480509141963909, "learning_rate": 8.381360965882777e-06, "loss": 0.8655, "step": 23905 }, { "epoch": 1.01, "grad_norm": 14.76175781260011, "learning_rate": 8.38045395063062e-06, "loss": 0.878, "step": 23910 }, { "epoch": 1.01, "grad_norm": 7.420032128307376, "learning_rate": 8.379546730431495e-06, "loss": 0.8942, "step": 23915 }, { "epoch": 1.01, "grad_norm": 7.726736496268883, "learning_rate": 8.37863930534041e-06, "loss": 0.8963, "step": 23920 }, { "epoch": 1.01, "grad_norm": 24.13648194752957, "learning_rate": 8.377731675412372e-06, "loss": 0.8677, "step": 23925 }, { "epoch": 1.01, "grad_norm": 11.382567562865813, "learning_rate": 8.376823840702415e-06, "loss": 0.8477, "step": 23930 }, { "epoch": 1.01, "grad_norm": 31.36303889985935, "learning_rate": 8.375915801265575e-06, "loss": 0.933, "step": 23935 }, { "epoch": 1.01, "grad_norm": 17.0140828667438, "learning_rate": 8.375007557156905e-06, "loss": 0.8857, "step": 23940 }, { "epoch": 1.01, "grad_norm": 27.187262423191953, "learning_rate": 8.374099108431466e-06, "loss": 0.9232, "step": 23945 }, { "epoch": 1.01, "grad_norm": 20.594163618995726, "learning_rate": 8.373190455144337e-06, "loss": 0.8905, "step": 23950 }, { "epoch": 1.01, "grad_norm": 13.488371883894047, "learning_rate": 8.372281597350607e-06, "loss": 0.8435, "step": 23955 }, { "epoch": 1.01, "grad_norm": 8.094525503585649, "learning_rate": 8.371372535105378e-06, "loss": 0.8701, "step": 23960 }, { "epoch": 1.01, "grad_norm": 8.30858375756817, "learning_rate": 8.370463268463761e-06, "loss": 0.89, "step": 23965 }, { "epoch": 1.01, "grad_norm": 8.673000248740133, "learning_rate": 8.369553797480886e-06, "loss": 0.8482, "step": 23970 }, { "epoch": 1.01, "grad_norm": 5.159525108034029, "learning_rate": 8.368644122211889e-06, "loss": 0.8542, "step": 23975 }, { "epoch": 1.01, "grad_norm": 11.864629066153018, "learning_rate": 8.36773424271192e-06, "loss": 0.8511, "step": 23980 }, { "epoch": 1.02, "grad_norm": 4.87637759382671, "learning_rate": 8.366824159036146e-06, "loss": 0.879, "step": 23985 }, { "epoch": 1.02, "grad_norm": 4.836157320522113, "learning_rate": 8.365913871239739e-06, "loss": 0.8592, "step": 23990 }, { "epoch": 1.02, "grad_norm": 6.937417032202544, "learning_rate": 8.36500337937789e-06, "loss": 0.8748, "step": 23995 }, { "epoch": 1.02, "grad_norm": 12.291599742739209, "learning_rate": 8.364092683505798e-06, "loss": 0.8749, "step": 24000 }, { "epoch": 1.02, "grad_norm": 5.168247710412413, "learning_rate": 8.363181783678675e-06, "loss": 0.8529, "step": 24005 }, { "epoch": 1.02, "grad_norm": 8.343512792677986, "learning_rate": 8.362270679951747e-06, "loss": 0.8965, "step": 24010 }, { "epoch": 1.02, "grad_norm": 7.684304435280374, "learning_rate": 8.361359372380251e-06, "loss": 0.8765, "step": 24015 }, { "epoch": 1.02, "grad_norm": 7.507017609525433, "learning_rate": 8.36044786101944e-06, "loss": 0.8369, "step": 24020 }, { "epoch": 1.02, "grad_norm": 4.8398627627755655, "learning_rate": 8.359536145924572e-06, "loss": 0.8664, "step": 24025 }, { "epoch": 1.02, "grad_norm": 14.605598797139457, "learning_rate": 8.358624227150925e-06, "loss": 0.8827, "step": 24030 }, { "epoch": 1.02, "grad_norm": 8.244928171356866, "learning_rate": 8.357712104753782e-06, "loss": 0.8636, "step": 24035 }, { "epoch": 1.02, "grad_norm": 11.498267151570188, "learning_rate": 8.356799778788445e-06, "loss": 0.8643, "step": 24040 }, { "epoch": 1.02, "grad_norm": 7.779164263837633, "learning_rate": 8.355887249310225e-06, "loss": 0.9066, "step": 24045 }, { "epoch": 1.02, "grad_norm": 11.43695863623216, "learning_rate": 8.354974516374445e-06, "loss": 0.9021, "step": 24050 }, { "epoch": 1.02, "grad_norm": 6.908179428442711, "learning_rate": 8.354061580036446e-06, "loss": 0.8977, "step": 24055 }, { "epoch": 1.02, "grad_norm": 6.990950689941788, "learning_rate": 8.353148440351571e-06, "loss": 0.9168, "step": 24060 }, { "epoch": 1.02, "grad_norm": 7.1383915589603655, "learning_rate": 8.352235097375184e-06, "loss": 0.8929, "step": 24065 }, { "epoch": 1.02, "grad_norm": 6.947358046783045, "learning_rate": 8.351321551162655e-06, "loss": 0.8628, "step": 24070 }, { "epoch": 1.02, "grad_norm": 7.968321599730195, "learning_rate": 8.350407801769373e-06, "loss": 0.9497, "step": 24075 }, { "epoch": 1.02, "grad_norm": 5.109981609425329, "learning_rate": 8.349493849250736e-06, "loss": 0.842, "step": 24080 }, { "epoch": 1.02, "grad_norm": 4.835631065221668, "learning_rate": 8.348579693662153e-06, "loss": 0.8613, "step": 24085 }, { "epoch": 1.02, "grad_norm": 9.931005737393345, "learning_rate": 8.347665335059047e-06, "loss": 0.9413, "step": 24090 }, { "epoch": 1.02, "grad_norm": 8.835393998349327, "learning_rate": 8.346750773496853e-06, "loss": 0.861, "step": 24095 }, { "epoch": 1.02, "grad_norm": 7.334493493491368, "learning_rate": 8.345836009031017e-06, "loss": 0.8629, "step": 24100 }, { "epoch": 1.02, "grad_norm": 7.7080029750118, "learning_rate": 8.344921041716997e-06, "loss": 0.8705, "step": 24105 }, { "epoch": 1.02, "grad_norm": 26.046007950988546, "learning_rate": 8.34400587161027e-06, "loss": 0.8762, "step": 24110 }, { "epoch": 1.02, "grad_norm": 48.86154741646651, "learning_rate": 8.343090498766316e-06, "loss": 0.8744, "step": 24115 }, { "epoch": 1.02, "grad_norm": 23.730019677936074, "learning_rate": 8.342174923240634e-06, "loss": 0.8784, "step": 24120 }, { "epoch": 1.02, "grad_norm": 27.889318036607673, "learning_rate": 8.34125914508873e-06, "loss": 0.9226, "step": 24125 }, { "epoch": 1.02, "grad_norm": 30.59932174572011, "learning_rate": 8.340343164366125e-06, "loss": 0.9453, "step": 24130 }, { "epoch": 1.02, "grad_norm": 24.94479690396093, "learning_rate": 8.339426981128354e-06, "loss": 0.8866, "step": 24135 }, { "epoch": 1.02, "grad_norm": 9.937662057467135, "learning_rate": 8.338510595430962e-06, "loss": 0.9114, "step": 24140 }, { "epoch": 1.02, "grad_norm": 13.338887814379154, "learning_rate": 8.337594007329506e-06, "loss": 0.8807, "step": 24145 }, { "epoch": 1.02, "grad_norm": 5.492255273441464, "learning_rate": 8.336677216879557e-06, "loss": 0.9021, "step": 24150 }, { "epoch": 1.02, "grad_norm": 7.94147429487286, "learning_rate": 8.335760224136696e-06, "loss": 0.8701, "step": 24155 }, { "epoch": 1.02, "grad_norm": 4.783563256906834, "learning_rate": 8.33484302915652e-06, "loss": 0.8889, "step": 24160 }, { "epoch": 1.02, "grad_norm": 7.469533691528355, "learning_rate": 8.333925631994633e-06, "loss": 0.8853, "step": 24165 }, { "epoch": 1.02, "grad_norm": 5.800084500142283, "learning_rate": 8.333008032706656e-06, "loss": 0.8391, "step": 24170 }, { "epoch": 1.02, "grad_norm": 10.945085626406911, "learning_rate": 8.332090231348219e-06, "loss": 0.913, "step": 24175 }, { "epoch": 1.02, "grad_norm": 6.300160162872269, "learning_rate": 8.331172227974966e-06, "loss": 0.8493, "step": 24180 }, { "epoch": 1.02, "grad_norm": 4.756158405453012, "learning_rate": 8.330254022642554e-06, "loss": 0.8468, "step": 24185 }, { "epoch": 1.02, "grad_norm": 4.748301353505016, "learning_rate": 8.32933561540665e-06, "loss": 0.8723, "step": 24190 }, { "epoch": 1.02, "grad_norm": 5.629260730013607, "learning_rate": 8.328417006322935e-06, "loss": 0.8901, "step": 24195 }, { "epoch": 1.02, "grad_norm": 7.4393011270578295, "learning_rate": 8.327498195447102e-06, "loss": 0.8814, "step": 24200 }, { "epoch": 1.02, "grad_norm": 4.196855903772969, "learning_rate": 8.326579182834852e-06, "loss": 0.8143, "step": 24205 }, { "epoch": 1.02, "grad_norm": 10.643205037603853, "learning_rate": 8.325659968541906e-06, "loss": 0.8483, "step": 24210 }, { "epoch": 1.02, "grad_norm": 6.312005926330905, "learning_rate": 8.324740552623993e-06, "loss": 0.9047, "step": 24215 }, { "epoch": 1.03, "grad_norm": 11.760047657221973, "learning_rate": 8.323820935136852e-06, "loss": 0.9146, "step": 24220 }, { "epoch": 1.03, "grad_norm": 11.162311325856344, "learning_rate": 8.322901116136239e-06, "loss": 0.8813, "step": 24225 }, { "epoch": 1.03, "grad_norm": 4.387668670128087, "learning_rate": 8.32198109567792e-06, "loss": 0.8187, "step": 24230 }, { "epoch": 1.03, "grad_norm": 5.998859866395861, "learning_rate": 8.321060873817672e-06, "loss": 0.8478, "step": 24235 }, { "epoch": 1.03, "grad_norm": 5.736757584286885, "learning_rate": 8.320140450611287e-06, "loss": 0.9175, "step": 24240 }, { "epoch": 1.03, "grad_norm": 6.359264818184678, "learning_rate": 8.319219826114566e-06, "loss": 0.8786, "step": 24245 }, { "epoch": 1.03, "grad_norm": 8.57252863320637, "learning_rate": 8.318299000383322e-06, "loss": 0.8691, "step": 24250 }, { "epoch": 1.03, "grad_norm": 4.69145506179168, "learning_rate": 8.317377973473385e-06, "loss": 0.8591, "step": 24255 }, { "epoch": 1.03, "grad_norm": 5.217263280146151, "learning_rate": 8.316456745440594e-06, "loss": 0.8945, "step": 24260 }, { "epoch": 1.03, "grad_norm": 8.473210643455328, "learning_rate": 8.315535316340798e-06, "loss": 0.8751, "step": 24265 }, { "epoch": 1.03, "grad_norm": 12.182122488829963, "learning_rate": 8.314613686229863e-06, "loss": 0.8868, "step": 24270 }, { "epoch": 1.03, "grad_norm": 6.966629692121335, "learning_rate": 8.313691855163664e-06, "loss": 0.8943, "step": 24275 }, { "epoch": 1.03, "grad_norm": 6.868992189440033, "learning_rate": 8.312769823198089e-06, "loss": 0.8496, "step": 24280 }, { "epoch": 1.03, "grad_norm": 11.166057713549508, "learning_rate": 8.311847590389034e-06, "loss": 0.8979, "step": 24285 }, { "epoch": 1.03, "grad_norm": 9.742813899773127, "learning_rate": 8.31092515679242e-06, "loss": 0.8984, "step": 24290 }, { "epoch": 1.03, "grad_norm": 5.312501326377522, "learning_rate": 8.310002522464163e-06, "loss": 0.8948, "step": 24295 }, { "epoch": 1.03, "grad_norm": 6.400950836308031, "learning_rate": 8.309079687460204e-06, "loss": 0.9205, "step": 24300 }, { "epoch": 1.03, "grad_norm": 10.180131985970997, "learning_rate": 8.30815665183649e-06, "loss": 0.903, "step": 24305 }, { "epoch": 1.03, "grad_norm": 4.731119138366711, "learning_rate": 8.307233415648984e-06, "loss": 0.864, "step": 24310 }, { "epoch": 1.03, "grad_norm": 4.865497128228217, "learning_rate": 8.306309978953657e-06, "loss": 0.855, "step": 24315 }, { "epoch": 1.03, "grad_norm": 8.02747360707304, "learning_rate": 8.305386341806497e-06, "loss": 0.8963, "step": 24320 }, { "epoch": 1.03, "grad_norm": 5.767420368749239, "learning_rate": 8.304462504263497e-06, "loss": 0.8671, "step": 24325 }, { "epoch": 1.03, "grad_norm": 4.9610402111887355, "learning_rate": 8.303538466380669e-06, "loss": 0.8605, "step": 24330 }, { "epoch": 1.03, "grad_norm": 6.627551178682409, "learning_rate": 8.302614228214036e-06, "loss": 0.9156, "step": 24335 }, { "epoch": 1.03, "grad_norm": 7.96799829639129, "learning_rate": 8.30168978981963e-06, "loss": 0.8803, "step": 24340 }, { "epoch": 1.03, "grad_norm": 5.73228482673005, "learning_rate": 8.300765151253499e-06, "loss": 0.843, "step": 24345 }, { "epoch": 1.03, "grad_norm": 7.923786369193929, "learning_rate": 8.299840312571696e-06, "loss": 0.8605, "step": 24350 }, { "epoch": 1.03, "grad_norm": 5.291284169978704, "learning_rate": 8.298915273830297e-06, "loss": 0.9109, "step": 24355 }, { "epoch": 1.03, "grad_norm": 5.5994469548330725, "learning_rate": 8.297990035085384e-06, "loss": 0.8545, "step": 24360 }, { "epoch": 1.03, "grad_norm": 4.832755989735144, "learning_rate": 8.297064596393047e-06, "loss": 0.8877, "step": 24365 }, { "epoch": 1.03, "grad_norm": 4.573937446762367, "learning_rate": 8.296138957809396e-06, "loss": 0.9379, "step": 24370 }, { "epoch": 1.03, "grad_norm": 7.121601295767272, "learning_rate": 8.295213119390548e-06, "loss": 0.9013, "step": 24375 }, { "epoch": 1.03, "grad_norm": 18.78337432516734, "learning_rate": 8.294287081192639e-06, "loss": 0.8541, "step": 24380 }, { "epoch": 1.03, "grad_norm": 15.399830764568895, "learning_rate": 8.293360843271802e-06, "loss": 0.8751, "step": 24385 }, { "epoch": 1.03, "grad_norm": 5.652853585889718, "learning_rate": 8.292434405684202e-06, "loss": 0.8888, "step": 24390 }, { "epoch": 1.03, "grad_norm": 7.943937360585071, "learning_rate": 8.291507768486002e-06, "loss": 0.8678, "step": 24395 }, { "epoch": 1.03, "grad_norm": 6.1991155539661325, "learning_rate": 8.290580931733381e-06, "loss": 0.9089, "step": 24400 }, { "epoch": 1.03, "grad_norm": 6.580522247341891, "learning_rate": 8.28965389548253e-06, "loss": 0.9016, "step": 24405 }, { "epoch": 1.03, "grad_norm": 9.447246254266961, "learning_rate": 8.288726659789655e-06, "loss": 0.9149, "step": 24410 }, { "epoch": 1.03, "grad_norm": 6.189605830176344, "learning_rate": 8.287799224710968e-06, "loss": 0.9519, "step": 24415 }, { "epoch": 1.03, "grad_norm": 4.573528428371724, "learning_rate": 8.286871590302698e-06, "loss": 0.8535, "step": 24420 }, { "epoch": 1.03, "grad_norm": 6.554531673121981, "learning_rate": 8.285943756621088e-06, "loss": 0.8538, "step": 24425 }, { "epoch": 1.03, "grad_norm": 5.21153748010033, "learning_rate": 8.285015723722384e-06, "loss": 0.8856, "step": 24430 }, { "epoch": 1.03, "grad_norm": 5.415904023147494, "learning_rate": 8.284087491662856e-06, "loss": 0.859, "step": 24435 }, { "epoch": 1.03, "grad_norm": 6.072848005480355, "learning_rate": 8.283159060498776e-06, "loss": 0.8839, "step": 24440 }, { "epoch": 1.03, "grad_norm": 12.743067454312094, "learning_rate": 8.282230430286432e-06, "loss": 0.8586, "step": 24445 }, { "epoch": 1.03, "grad_norm": 13.953526049570026, "learning_rate": 8.281301601082126e-06, "loss": 0.878, "step": 24450 }, { "epoch": 1.03, "grad_norm": 23.909154613931833, "learning_rate": 8.280372572942169e-06, "loss": 0.8804, "step": 24455 }, { "epoch": 1.04, "grad_norm": 10.077103246066287, "learning_rate": 8.279443345922885e-06, "loss": 0.8442, "step": 24460 }, { "epoch": 1.04, "grad_norm": 33.90951048034864, "learning_rate": 8.278513920080612e-06, "loss": 0.8686, "step": 24465 }, { "epoch": 1.04, "grad_norm": 7.879425851014187, "learning_rate": 8.277584295471697e-06, "loss": 0.9286, "step": 24470 }, { "epoch": 1.04, "grad_norm": 9.75295514247547, "learning_rate": 8.2766544721525e-06, "loss": 0.9079, "step": 24475 }, { "epoch": 1.04, "grad_norm": 24.358493897548712, "learning_rate": 8.275724450179394e-06, "loss": 0.9027, "step": 24480 }, { "epoch": 1.04, "grad_norm": 13.294865160711101, "learning_rate": 8.274794229608764e-06, "loss": 0.8849, "step": 24485 }, { "epoch": 1.04, "grad_norm": 13.786356372237712, "learning_rate": 8.273863810497007e-06, "loss": 0.8372, "step": 24490 }, { "epoch": 1.04, "grad_norm": 17.221597869640195, "learning_rate": 8.27293319290053e-06, "loss": 0.8598, "step": 24495 }, { "epoch": 1.04, "grad_norm": 6.197777378344038, "learning_rate": 8.272002376875753e-06, "loss": 0.8733, "step": 24500 }, { "epoch": 1.04, "grad_norm": 7.144404771237298, "learning_rate": 8.271071362479112e-06, "loss": 0.8741, "step": 24505 }, { "epoch": 1.04, "grad_norm": 4.387627758409195, "learning_rate": 8.270140149767048e-06, "loss": 0.8637, "step": 24510 }, { "epoch": 1.04, "grad_norm": 8.07399330336378, "learning_rate": 8.26920873879602e-06, "loss": 0.9039, "step": 24515 }, { "epoch": 1.04, "grad_norm": 17.435600119165812, "learning_rate": 8.268277129622494e-06, "loss": 0.8738, "step": 24520 }, { "epoch": 1.04, "grad_norm": 9.579910558029948, "learning_rate": 8.267345322302956e-06, "loss": 0.9057, "step": 24525 }, { "epoch": 1.04, "grad_norm": 15.406908301573534, "learning_rate": 8.266413316893894e-06, "loss": 0.8414, "step": 24530 }, { "epoch": 1.04, "grad_norm": 11.09430482992784, "learning_rate": 8.265481113451812e-06, "loss": 0.8645, "step": 24535 }, { "epoch": 1.04, "grad_norm": 5.578750850188795, "learning_rate": 8.26454871203323e-06, "loss": 0.839, "step": 24540 }, { "epoch": 1.04, "grad_norm": 18.169412030873623, "learning_rate": 8.263616112694676e-06, "loss": 0.8567, "step": 24545 }, { "epoch": 1.04, "grad_norm": 7.213727805025681, "learning_rate": 8.262683315492691e-06, "loss": 0.888, "step": 24550 }, { "epoch": 1.04, "grad_norm": 21.114962103237136, "learning_rate": 8.261750320483827e-06, "loss": 0.8644, "step": 24555 }, { "epoch": 1.04, "grad_norm": 15.248806360276067, "learning_rate": 8.260817127724649e-06, "loss": 0.8877, "step": 24560 }, { "epoch": 1.04, "grad_norm": 16.55383841480191, "learning_rate": 8.259883737271732e-06, "loss": 0.8702, "step": 24565 }, { "epoch": 1.04, "grad_norm": 9.683249351725927, "learning_rate": 8.258950149181669e-06, "loss": 0.8733, "step": 24570 }, { "epoch": 1.04, "grad_norm": 13.11788429551088, "learning_rate": 8.258016363511057e-06, "loss": 0.8659, "step": 24575 }, { "epoch": 1.04, "grad_norm": 14.36547218796155, "learning_rate": 8.257082380316508e-06, "loss": 0.8629, "step": 24580 }, { "epoch": 1.04, "grad_norm": 10.80398287640242, "learning_rate": 8.25614819965465e-06, "loss": 0.8544, "step": 24585 }, { "epoch": 1.04, "grad_norm": 22.711621755931816, "learning_rate": 8.255213821582117e-06, "loss": 0.8532, "step": 24590 }, { "epoch": 1.04, "grad_norm": 9.824880181766071, "learning_rate": 8.25427924615556e-06, "loss": 0.9031, "step": 24595 }, { "epoch": 1.04, "grad_norm": 6.654799644382605, "learning_rate": 8.253344473431637e-06, "loss": 0.867, "step": 24600 }, { "epoch": 1.04, "grad_norm": 5.611546900082446, "learning_rate": 8.252409503467023e-06, "loss": 0.8845, "step": 24605 }, { "epoch": 1.04, "grad_norm": 4.925623162805631, "learning_rate": 8.251474336318399e-06, "loss": 0.8977, "step": 24610 }, { "epoch": 1.04, "grad_norm": 9.173953985280534, "learning_rate": 8.250538972042463e-06, "loss": 0.9142, "step": 24615 }, { "epoch": 1.04, "grad_norm": 5.157813884794019, "learning_rate": 8.249603410695925e-06, "loss": 0.8477, "step": 24620 }, { "epoch": 1.04, "grad_norm": 6.965555155137428, "learning_rate": 8.248667652335507e-06, "loss": 0.8383, "step": 24625 }, { "epoch": 1.04, "grad_norm": 6.527702275654706, "learning_rate": 8.247731697017936e-06, "loss": 0.8629, "step": 24630 }, { "epoch": 1.04, "grad_norm": 7.443119807927224, "learning_rate": 8.246795544799957e-06, "loss": 0.8774, "step": 24635 }, { "epoch": 1.04, "grad_norm": 13.053775959688798, "learning_rate": 8.245859195738331e-06, "loss": 0.8763, "step": 24640 }, { "epoch": 1.04, "grad_norm": 10.423611695402935, "learning_rate": 8.24492264988982e-06, "loss": 0.8962, "step": 24645 }, { "epoch": 1.04, "grad_norm": 6.565582406802536, "learning_rate": 8.24398590731121e-06, "loss": 0.9159, "step": 24650 }, { "epoch": 1.04, "grad_norm": 4.873725323121731, "learning_rate": 8.243048968059287e-06, "loss": 0.9205, "step": 24655 }, { "epoch": 1.04, "grad_norm": 7.5362223458521145, "learning_rate": 8.24211183219086e-06, "loss": 0.8893, "step": 24660 }, { "epoch": 1.04, "grad_norm": 6.171531771944546, "learning_rate": 8.241174499762741e-06, "loss": 0.8547, "step": 24665 }, { "epoch": 1.04, "grad_norm": 7.601137342930457, "learning_rate": 8.24023697083176e-06, "loss": 0.8464, "step": 24670 }, { "epoch": 1.04, "grad_norm": 9.705145527319646, "learning_rate": 8.239299245454757e-06, "loss": 0.8584, "step": 24675 }, { "epoch": 1.04, "grad_norm": 6.238972983584677, "learning_rate": 8.238361323688581e-06, "loss": 0.8719, "step": 24680 }, { "epoch": 1.04, "grad_norm": 7.811439509500927, "learning_rate": 8.237423205590099e-06, "loss": 0.8768, "step": 24685 }, { "epoch": 1.04, "grad_norm": 9.287015520252488, "learning_rate": 8.23648489121618e-06, "loss": 0.8771, "step": 24690 }, { "epoch": 1.05, "grad_norm": 6.292480737217653, "learning_rate": 8.235546380623718e-06, "loss": 0.8561, "step": 24695 }, { "epoch": 1.05, "grad_norm": 4.994876796592222, "learning_rate": 8.23460767386961e-06, "loss": 0.8518, "step": 24700 }, { "epoch": 1.05, "grad_norm": 5.109196999249068, "learning_rate": 8.233668771010766e-06, "loss": 0.8619, "step": 24705 }, { "epoch": 1.05, "grad_norm": 7.142546213784474, "learning_rate": 8.232729672104109e-06, "loss": 0.9101, "step": 24710 }, { "epoch": 1.05, "grad_norm": 15.1050883400635, "learning_rate": 8.231790377206576e-06, "loss": 0.8911, "step": 24715 }, { "epoch": 1.05, "grad_norm": 7.217023777828318, "learning_rate": 8.23085088637511e-06, "loss": 0.885, "step": 24720 }, { "epoch": 1.05, "grad_norm": 5.437660790147719, "learning_rate": 8.229911199666671e-06, "loss": 0.8989, "step": 24725 }, { "epoch": 1.05, "grad_norm": 5.611352558849838, "learning_rate": 8.228971317138232e-06, "loss": 0.8816, "step": 24730 }, { "epoch": 1.05, "grad_norm": 8.536254236386037, "learning_rate": 8.228031238846774e-06, "loss": 0.8339, "step": 24735 }, { "epoch": 1.05, "grad_norm": 5.415014858555917, "learning_rate": 8.227090964849288e-06, "loss": 0.8912, "step": 24740 }, { "epoch": 1.05, "grad_norm": 5.810009369170103, "learning_rate": 8.226150495202782e-06, "loss": 0.8767, "step": 24745 }, { "epoch": 1.05, "grad_norm": 7.885314497446922, "learning_rate": 8.225209829964278e-06, "loss": 0.8767, "step": 24750 }, { "epoch": 1.05, "grad_norm": 6.8135293528946255, "learning_rate": 8.2242689691908e-06, "loss": 0.8604, "step": 24755 }, { "epoch": 1.05, "grad_norm": 4.933352369879482, "learning_rate": 8.223327912939392e-06, "loss": 0.9145, "step": 24760 }, { "epoch": 1.05, "grad_norm": 5.413063637484613, "learning_rate": 8.222386661267107e-06, "loss": 0.877, "step": 24765 }, { "epoch": 1.05, "grad_norm": 5.793822763722874, "learning_rate": 8.221445214231011e-06, "loss": 0.8263, "step": 24770 }, { "epoch": 1.05, "grad_norm": 5.125289140801641, "learning_rate": 8.22050357188818e-06, "loss": 0.8419, "step": 24775 }, { "epoch": 1.05, "grad_norm": 6.8686965745449315, "learning_rate": 8.219561734295705e-06, "loss": 0.8715, "step": 24780 }, { "epoch": 1.05, "grad_norm": 9.041420031010027, "learning_rate": 8.218619701510684e-06, "loss": 0.8789, "step": 24785 }, { "epoch": 1.05, "grad_norm": 4.712998761765499, "learning_rate": 8.217677473590234e-06, "loss": 0.8794, "step": 24790 }, { "epoch": 1.05, "grad_norm": 5.914252320453015, "learning_rate": 8.216735050591473e-06, "loss": 0.8808, "step": 24795 }, { "epoch": 1.05, "grad_norm": 4.09387911224469, "learning_rate": 8.215792432571544e-06, "loss": 0.8852, "step": 24800 }, { "epoch": 1.05, "grad_norm": 8.891490035289086, "learning_rate": 8.214849619587591e-06, "loss": 0.8498, "step": 24805 }, { "epoch": 1.05, "grad_norm": 12.913462709974251, "learning_rate": 8.213906611696773e-06, "loss": 0.9566, "step": 24810 }, { "epoch": 1.05, "grad_norm": 11.33936747214587, "learning_rate": 8.212963408956267e-06, "loss": 0.9205, "step": 24815 }, { "epoch": 1.05, "grad_norm": 9.92715170899013, "learning_rate": 8.212020011423254e-06, "loss": 0.8525, "step": 24820 }, { "epoch": 1.05, "grad_norm": 13.999563790375015, "learning_rate": 8.211076419154928e-06, "loss": 0.8383, "step": 24825 }, { "epoch": 1.05, "grad_norm": 9.517823968007406, "learning_rate": 8.210132632208499e-06, "loss": 0.8912, "step": 24830 }, { "epoch": 1.05, "grad_norm": 10.537348980898546, "learning_rate": 8.20918865064118e-06, "loss": 0.8533, "step": 24835 }, { "epoch": 1.05, "grad_norm": 5.28702612414382, "learning_rate": 8.20824447451021e-06, "loss": 0.8776, "step": 24840 }, { "epoch": 1.05, "grad_norm": 27.98022112669318, "learning_rate": 8.207300103872826e-06, "loss": 0.8754, "step": 24845 }, { "epoch": 1.05, "grad_norm": 10.189362496980022, "learning_rate": 8.206355538786284e-06, "loss": 0.9066, "step": 24850 }, { "epoch": 1.05, "grad_norm": 11.89154233633554, "learning_rate": 8.20541077930785e-06, "loss": 0.8611, "step": 24855 }, { "epoch": 1.05, "grad_norm": 18.76840398692148, "learning_rate": 8.204465825494804e-06, "loss": 0.8816, "step": 24860 }, { "epoch": 1.05, "grad_norm": 19.340724963940847, "learning_rate": 8.203520677404433e-06, "loss": 0.8457, "step": 24865 }, { "epoch": 1.05, "grad_norm": 13.177593523558741, "learning_rate": 8.20257533509404e-06, "loss": 0.8507, "step": 24870 }, { "epoch": 1.05, "grad_norm": 17.13913612038679, "learning_rate": 8.201629798620939e-06, "loss": 0.8685, "step": 24875 }, { "epoch": 1.05, "grad_norm": 7.837962530031618, "learning_rate": 8.20068406804245e-06, "loss": 0.8457, "step": 24880 }, { "epoch": 1.05, "grad_norm": 7.285817982113224, "learning_rate": 8.199738143415918e-06, "loss": 0.8517, "step": 24885 }, { "epoch": 1.05, "grad_norm": 6.5602116364480105, "learning_rate": 8.198792024798685e-06, "loss": 0.8723, "step": 24890 }, { "epoch": 1.05, "grad_norm": 10.394414026933362, "learning_rate": 8.197845712248116e-06, "loss": 0.8562, "step": 24895 }, { "epoch": 1.05, "grad_norm": 5.070725000625126, "learning_rate": 8.19689920582158e-06, "loss": 0.8566, "step": 24900 }, { "epoch": 1.05, "grad_norm": 7.396003301168955, "learning_rate": 8.195952505576461e-06, "loss": 0.8886, "step": 24905 }, { "epoch": 1.05, "grad_norm": 9.784735276861356, "learning_rate": 8.195005611570157e-06, "loss": 0.8725, "step": 24910 }, { "epoch": 1.05, "grad_norm": 10.17933778434733, "learning_rate": 8.194058523860073e-06, "loss": 0.8548, "step": 24915 }, { "epoch": 1.05, "grad_norm": 9.985055575213194, "learning_rate": 8.19311124250363e-06, "loss": 0.862, "step": 24920 }, { "epoch": 1.05, "grad_norm": 8.461189066968783, "learning_rate": 8.192163767558256e-06, "loss": 0.8753, "step": 24925 }, { "epoch": 1.06, "grad_norm": 5.047149930240928, "learning_rate": 8.191216099081399e-06, "loss": 0.8741, "step": 24930 }, { "epoch": 1.06, "grad_norm": 6.525203834218405, "learning_rate": 8.190268237130508e-06, "loss": 0.8811, "step": 24935 }, { "epoch": 1.06, "grad_norm": 6.377899310343711, "learning_rate": 8.189320181763051e-06, "loss": 0.8645, "step": 24940 }, { "epoch": 1.06, "grad_norm": 16.156141805125113, "learning_rate": 8.188371933036505e-06, "loss": 0.8466, "step": 24945 }, { "epoch": 1.06, "grad_norm": 5.755348336243058, "learning_rate": 8.187423491008362e-06, "loss": 0.8533, "step": 24950 }, { "epoch": 1.06, "grad_norm": 6.298152600986602, "learning_rate": 8.18647485573612e-06, "loss": 0.8541, "step": 24955 }, { "epoch": 1.06, "grad_norm": 9.952340785786104, "learning_rate": 8.185526027277294e-06, "loss": 0.9046, "step": 24960 }, { "epoch": 1.06, "grad_norm": 6.040647229546104, "learning_rate": 8.184577005689408e-06, "loss": 0.8904, "step": 24965 }, { "epoch": 1.06, "grad_norm": 7.248428851700628, "learning_rate": 8.183627791030001e-06, "loss": 0.854, "step": 24970 }, { "epoch": 1.06, "grad_norm": 6.566589666009911, "learning_rate": 8.182678383356617e-06, "loss": 0.8572, "step": 24975 }, { "epoch": 1.06, "grad_norm": 7.080922202072892, "learning_rate": 8.181728782726817e-06, "loss": 0.945, "step": 24980 }, { "epoch": 1.06, "grad_norm": 4.390552877339715, "learning_rate": 8.180778989198174e-06, "loss": 0.8783, "step": 24985 }, { "epoch": 1.06, "grad_norm": 5.028702812717927, "learning_rate": 8.17982900282827e-06, "loss": 0.893, "step": 24990 }, { "epoch": 1.06, "grad_norm": 7.214799107210826, "learning_rate": 8.178878823674701e-06, "loss": 0.8929, "step": 24995 }, { "epoch": 1.06, "grad_norm": 5.447727340755326, "learning_rate": 8.17792845179507e-06, "loss": 0.8788, "step": 25000 }, { "epoch": 1.06, "grad_norm": 13.083600188989156, "learning_rate": 8.176977887246999e-06, "loss": 0.8993, "step": 25005 }, { "epoch": 1.06, "grad_norm": 8.524152034080783, "learning_rate": 8.176027130088118e-06, "loss": 0.8704, "step": 25010 }, { "epoch": 1.06, "grad_norm": 14.462433914343498, "learning_rate": 8.175076180376066e-06, "loss": 0.9058, "step": 25015 }, { "epoch": 1.06, "grad_norm": 9.796374398794727, "learning_rate": 8.174125038168498e-06, "loss": 0.9308, "step": 25020 }, { "epoch": 1.06, "grad_norm": 8.760889357393078, "learning_rate": 8.173173703523078e-06, "loss": 0.8737, "step": 25025 }, { "epoch": 1.06, "grad_norm": 5.660280960448217, "learning_rate": 8.172222176497485e-06, "loss": 0.852, "step": 25030 }, { "epoch": 1.06, "grad_norm": 8.746842373928137, "learning_rate": 8.171270457149405e-06, "loss": 0.8727, "step": 25035 }, { "epoch": 1.06, "grad_norm": 5.147661343372264, "learning_rate": 8.170318545536538e-06, "loss": 0.8661, "step": 25040 }, { "epoch": 1.06, "grad_norm": 6.330545017607854, "learning_rate": 8.169366441716595e-06, "loss": 0.8776, "step": 25045 }, { "epoch": 1.06, "grad_norm": 12.567453466442913, "learning_rate": 8.1684141457473e-06, "loss": 0.8896, "step": 25050 }, { "epoch": 1.06, "grad_norm": 4.853275954291904, "learning_rate": 8.16746165768639e-06, "loss": 0.8797, "step": 25055 }, { "epoch": 1.06, "grad_norm": 6.67841009398702, "learning_rate": 8.16650897759161e-06, "loss": 0.8529, "step": 25060 }, { "epoch": 1.06, "grad_norm": 9.68369100127473, "learning_rate": 8.165556105520718e-06, "loss": 0.8797, "step": 25065 }, { "epoch": 1.06, "grad_norm": 7.435460296155595, "learning_rate": 8.164603041531482e-06, "loss": 0.9023, "step": 25070 }, { "epoch": 1.06, "grad_norm": 5.210080494681193, "learning_rate": 8.163649785681687e-06, "loss": 0.9002, "step": 25075 }, { "epoch": 1.06, "grad_norm": 8.884237745329049, "learning_rate": 8.162696338029124e-06, "loss": 0.8902, "step": 25080 }, { "epoch": 1.06, "grad_norm": 6.0294033902849815, "learning_rate": 8.161742698631598e-06, "loss": 0.9034, "step": 25085 }, { "epoch": 1.06, "grad_norm": 7.97050328242212, "learning_rate": 8.160788867546923e-06, "loss": 0.8719, "step": 25090 }, { "epoch": 1.06, "grad_norm": 5.110338959489951, "learning_rate": 8.159834844832932e-06, "loss": 0.8898, "step": 25095 }, { "epoch": 1.06, "grad_norm": 6.663584537769211, "learning_rate": 8.158880630547463e-06, "loss": 0.8426, "step": 25100 }, { "epoch": 1.06, "grad_norm": 18.77526748751355, "learning_rate": 8.157926224748363e-06, "loss": 0.9022, "step": 25105 }, { "epoch": 1.06, "grad_norm": 12.125939002791226, "learning_rate": 8.156971627493499e-06, "loss": 0.8652, "step": 25110 }, { "epoch": 1.06, "grad_norm": 16.92518764101726, "learning_rate": 8.156016838840746e-06, "loss": 0.885, "step": 25115 }, { "epoch": 1.06, "grad_norm": 7.161537676888788, "learning_rate": 8.155061858847987e-06, "loss": 0.8601, "step": 25120 }, { "epoch": 1.06, "grad_norm": 5.151492349625277, "learning_rate": 8.154106687573122e-06, "loss": 0.8006, "step": 25125 }, { "epoch": 1.06, "grad_norm": 5.459551875399119, "learning_rate": 8.153151325074058e-06, "loss": 0.8585, "step": 25130 }, { "epoch": 1.06, "grad_norm": 5.223074625967917, "learning_rate": 8.152195771408717e-06, "loss": 0.877, "step": 25135 }, { "epoch": 1.06, "grad_norm": 8.409045252801864, "learning_rate": 8.151240026635033e-06, "loss": 0.8777, "step": 25140 }, { "epoch": 1.06, "grad_norm": 17.709059014264987, "learning_rate": 8.150284090810948e-06, "loss": 0.8717, "step": 25145 }, { "epoch": 1.06, "grad_norm": 15.421726616259106, "learning_rate": 8.149327963994417e-06, "loss": 0.8602, "step": 25150 }, { "epoch": 1.06, "grad_norm": 5.232292331829905, "learning_rate": 8.14837164624341e-06, "loss": 0.8907, "step": 25155 }, { "epoch": 1.06, "grad_norm": 6.69605382126191, "learning_rate": 8.147415137615902e-06, "loss": 0.933, "step": 25160 }, { "epoch": 1.07, "grad_norm": 15.386834417103419, "learning_rate": 8.146458438169887e-06, "loss": 0.8893, "step": 25165 }, { "epoch": 1.07, "grad_norm": 13.622953122638997, "learning_rate": 8.145501547963364e-06, "loss": 0.8827, "step": 25170 }, { "epoch": 1.07, "grad_norm": 4.482481357356965, "learning_rate": 8.144544467054347e-06, "loss": 0.8498, "step": 25175 }, { "epoch": 1.07, "grad_norm": 18.59822555373446, "learning_rate": 8.143587195500861e-06, "loss": 0.9383, "step": 25180 }, { "epoch": 1.07, "grad_norm": 10.04318198282557, "learning_rate": 8.142629733360944e-06, "loss": 0.9337, "step": 25185 }, { "epoch": 1.07, "grad_norm": 19.16056344299214, "learning_rate": 8.141672080692645e-06, "loss": 0.8785, "step": 25190 }, { "epoch": 1.07, "grad_norm": 13.642227117383678, "learning_rate": 8.140714237554019e-06, "loss": 0.9065, "step": 25195 }, { "epoch": 1.07, "grad_norm": 9.207305919383431, "learning_rate": 8.139756204003141e-06, "loss": 0.8765, "step": 25200 }, { "epoch": 1.07, "grad_norm": 8.879306934148238, "learning_rate": 8.138797980098094e-06, "loss": 0.8544, "step": 25205 }, { "epoch": 1.07, "grad_norm": 7.176265237688608, "learning_rate": 8.13783956589697e-06, "loss": 0.8793, "step": 25210 }, { "epoch": 1.07, "grad_norm": 7.371276628003012, "learning_rate": 8.136880961457876e-06, "loss": 0.9012, "step": 25215 }, { "epoch": 1.07, "grad_norm": 10.845515552368298, "learning_rate": 8.13592216683893e-06, "loss": 0.9124, "step": 25220 }, { "epoch": 1.07, "grad_norm": 42.02464244322, "learning_rate": 8.134963182098258e-06, "loss": 0.8717, "step": 25225 }, { "epoch": 1.07, "grad_norm": 27.624042002708634, "learning_rate": 8.134004007294005e-06, "loss": 0.8573, "step": 25230 }, { "epoch": 1.07, "grad_norm": 14.055011799851112, "learning_rate": 8.13304464248432e-06, "loss": 0.8961, "step": 25235 }, { "epoch": 1.07, "grad_norm": 27.219485466255527, "learning_rate": 8.132085087727369e-06, "loss": 0.8261, "step": 25240 }, { "epoch": 1.07, "grad_norm": 14.717331427640557, "learning_rate": 8.131125343081321e-06, "loss": 0.8214, "step": 25245 }, { "epoch": 1.07, "grad_norm": 16.060883445361377, "learning_rate": 8.130165408604369e-06, "loss": 0.8898, "step": 25250 }, { "epoch": 1.07, "grad_norm": 9.371913244279149, "learning_rate": 8.129205284354707e-06, "loss": 0.8827, "step": 25255 }, { "epoch": 1.07, "grad_norm": 8.520258042709106, "learning_rate": 8.128244970390546e-06, "loss": 0.9218, "step": 25260 }, { "epoch": 1.07, "grad_norm": 17.140680393577295, "learning_rate": 8.127284466770108e-06, "loss": 0.8814, "step": 25265 }, { "epoch": 1.07, "grad_norm": 12.450424514421915, "learning_rate": 8.126323773551624e-06, "loss": 0.8496, "step": 25270 }, { "epoch": 1.07, "grad_norm": 14.037960018758197, "learning_rate": 8.125362890793339e-06, "loss": 0.9003, "step": 25275 }, { "epoch": 1.07, "grad_norm": 10.500772545688967, "learning_rate": 8.124401818553508e-06, "loss": 0.8611, "step": 25280 }, { "epoch": 1.07, "grad_norm": 10.951178068148634, "learning_rate": 8.123440556890399e-06, "loss": 0.8481, "step": 25285 }, { "epoch": 1.07, "grad_norm": 5.482921304706396, "learning_rate": 8.122479105862286e-06, "loss": 0.9034, "step": 25290 }, { "epoch": 1.07, "grad_norm": 4.879728783353924, "learning_rate": 8.121517465527465e-06, "loss": 0.8653, "step": 25295 }, { "epoch": 1.07, "grad_norm": 6.46199076765795, "learning_rate": 8.120555635944234e-06, "loss": 0.8787, "step": 25300 }, { "epoch": 1.07, "grad_norm": 14.245695291533346, "learning_rate": 8.119593617170906e-06, "loss": 0.872, "step": 25305 }, { "epoch": 1.07, "grad_norm": 4.47106259526486, "learning_rate": 8.118631409265809e-06, "loss": 0.8575, "step": 25310 }, { "epoch": 1.07, "grad_norm": 8.393189694720299, "learning_rate": 8.117669012287274e-06, "loss": 0.8337, "step": 25315 }, { "epoch": 1.07, "grad_norm": 11.706976207005242, "learning_rate": 8.11670642629365e-06, "loss": 0.8796, "step": 25320 }, { "epoch": 1.07, "grad_norm": 9.298548483288503, "learning_rate": 8.115743651343295e-06, "loss": 0.8362, "step": 25325 }, { "epoch": 1.07, "grad_norm": 7.62089790959027, "learning_rate": 8.11478068749458e-06, "loss": 0.8517, "step": 25330 }, { "epoch": 1.07, "grad_norm": 13.217788235010783, "learning_rate": 8.11381753480589e-06, "loss": 0.8825, "step": 25335 }, { "epoch": 1.07, "grad_norm": 9.368228159237754, "learning_rate": 8.112854193335613e-06, "loss": 0.8542, "step": 25340 }, { "epoch": 1.07, "grad_norm": 11.153957631591267, "learning_rate": 8.111890663142154e-06, "loss": 0.8592, "step": 25345 }, { "epoch": 1.07, "grad_norm": 16.906510396923906, "learning_rate": 8.110926944283932e-06, "loss": 0.8709, "step": 25350 }, { "epoch": 1.07, "grad_norm": 21.694519542909006, "learning_rate": 8.109963036819373e-06, "loss": 0.8463, "step": 25355 }, { "epoch": 1.07, "grad_norm": 16.810842855949446, "learning_rate": 8.108998940806916e-06, "loss": 0.8817, "step": 25360 }, { "epoch": 1.07, "grad_norm": 10.398551787132956, "learning_rate": 8.108034656305011e-06, "loss": 0.8719, "step": 25365 }, { "epoch": 1.07, "grad_norm": 5.639916160900846, "learning_rate": 8.107070183372118e-06, "loss": 0.8351, "step": 25370 }, { "epoch": 1.07, "grad_norm": 9.263861379214783, "learning_rate": 8.106105522066715e-06, "loss": 0.8545, "step": 25375 }, { "epoch": 1.07, "grad_norm": 9.449413336189044, "learning_rate": 8.10514067244728e-06, "loss": 0.8616, "step": 25380 }, { "epoch": 1.07, "grad_norm": 5.941075232733145, "learning_rate": 8.104175634572316e-06, "loss": 0.8422, "step": 25385 }, { "epoch": 1.07, "grad_norm": 8.043681790486543, "learning_rate": 8.103210408500324e-06, "loss": 0.8653, "step": 25390 }, { "epoch": 1.07, "grad_norm": 6.449934044436362, "learning_rate": 8.102244994289827e-06, "loss": 0.8426, "step": 25395 }, { "epoch": 1.07, "grad_norm": 7.788802553175485, "learning_rate": 8.101279391999354e-06, "loss": 0.8309, "step": 25400 }, { "epoch": 1.08, "grad_norm": 10.688512780888843, "learning_rate": 8.100313601687448e-06, "loss": 0.8788, "step": 25405 }, { "epoch": 1.08, "grad_norm": 6.769083051412717, "learning_rate": 8.099347623412659e-06, "loss": 0.8753, "step": 25410 }, { "epoch": 1.08, "grad_norm": 7.876451117223648, "learning_rate": 8.098381457233553e-06, "loss": 0.8761, "step": 25415 }, { "epoch": 1.08, "grad_norm": 7.8238583766418985, "learning_rate": 8.097415103208706e-06, "loss": 0.8621, "step": 25420 }, { "epoch": 1.08, "grad_norm": 10.976909261461111, "learning_rate": 8.096448561396706e-06, "loss": 0.8841, "step": 25425 }, { "epoch": 1.08, "grad_norm": 8.512689370962415, "learning_rate": 8.095481831856148e-06, "loss": 0.8832, "step": 25430 }, { "epoch": 1.08, "grad_norm": 8.39359475834383, "learning_rate": 8.094514914645646e-06, "loss": 0.8979, "step": 25435 }, { "epoch": 1.08, "grad_norm": 17.400975150587065, "learning_rate": 8.093547809823821e-06, "loss": 0.8933, "step": 25440 }, { "epoch": 1.08, "grad_norm": 17.616837411935776, "learning_rate": 8.092580517449305e-06, "loss": 0.7896, "step": 25445 }, { "epoch": 1.08, "grad_norm": 7.758981529465788, "learning_rate": 8.091613037580739e-06, "loss": 0.887, "step": 25450 }, { "epoch": 1.08, "grad_norm": 7.40101586473215, "learning_rate": 8.090645370276783e-06, "loss": 0.9036, "step": 25455 }, { "epoch": 1.08, "grad_norm": 7.714757552984231, "learning_rate": 8.0896775155961e-06, "loss": 0.917, "step": 25460 }, { "epoch": 1.08, "grad_norm": 6.728322393244592, "learning_rate": 8.088709473597373e-06, "loss": 0.8589, "step": 25465 }, { "epoch": 1.08, "grad_norm": 6.721356900003896, "learning_rate": 8.087741244339287e-06, "loss": 0.8685, "step": 25470 }, { "epoch": 1.08, "grad_norm": 5.501083457602186, "learning_rate": 8.086772827880547e-06, "loss": 0.8691, "step": 25475 }, { "epoch": 1.08, "grad_norm": 6.5141982684354796, "learning_rate": 8.085804224279861e-06, "loss": 0.822, "step": 25480 }, { "epoch": 1.08, "grad_norm": 5.28554874554032, "learning_rate": 8.084835433595954e-06, "loss": 0.8734, "step": 25485 }, { "epoch": 1.08, "grad_norm": 4.309531759924036, "learning_rate": 8.083866455887562e-06, "loss": 0.8477, "step": 25490 }, { "epoch": 1.08, "grad_norm": 4.522826605714632, "learning_rate": 8.08289729121343e-06, "loss": 0.8694, "step": 25495 }, { "epoch": 1.08, "grad_norm": 4.906033594042015, "learning_rate": 8.081927939632317e-06, "loss": 0.8566, "step": 25500 }, { "epoch": 1.08, "grad_norm": 5.350908645257273, "learning_rate": 8.080958401202991e-06, "loss": 0.8299, "step": 25505 }, { "epoch": 1.08, "grad_norm": 7.3371257898694155, "learning_rate": 8.079988675984231e-06, "loss": 0.874, "step": 25510 }, { "epoch": 1.08, "grad_norm": 8.735118510974356, "learning_rate": 8.079018764034832e-06, "loss": 0.8469, "step": 25515 }, { "epoch": 1.08, "grad_norm": 7.1169073382059205, "learning_rate": 8.078048665413594e-06, "loss": 0.8729, "step": 25520 }, { "epoch": 1.08, "grad_norm": 6.048799588499916, "learning_rate": 8.077078380179331e-06, "loss": 0.8951, "step": 25525 }, { "epoch": 1.08, "grad_norm": 5.800440727369588, "learning_rate": 8.076107908390872e-06, "loss": 0.8499, "step": 25530 }, { "epoch": 1.08, "grad_norm": 8.799500015142693, "learning_rate": 8.07513725010705e-06, "loss": 0.8327, "step": 25535 }, { "epoch": 1.08, "grad_norm": 7.476954778671344, "learning_rate": 8.074166405386714e-06, "loss": 0.9115, "step": 25540 }, { "epoch": 1.08, "grad_norm": 6.886379614249767, "learning_rate": 8.073195374288725e-06, "loss": 0.8922, "step": 25545 }, { "epoch": 1.08, "grad_norm": 8.47774889886093, "learning_rate": 8.072224156871953e-06, "loss": 0.8281, "step": 25550 }, { "epoch": 1.08, "grad_norm": 5.910630046193432, "learning_rate": 8.07125275319528e-06, "loss": 0.866, "step": 25555 }, { "epoch": 1.08, "grad_norm": 8.843111271943483, "learning_rate": 8.0702811633176e-06, "loss": 0.8739, "step": 25560 }, { "epoch": 1.08, "grad_norm": 10.183151655698447, "learning_rate": 8.069309387297814e-06, "loss": 0.8692, "step": 25565 }, { "epoch": 1.08, "grad_norm": 17.049354155448636, "learning_rate": 8.068337425194844e-06, "loss": 0.8894, "step": 25570 }, { "epoch": 1.08, "grad_norm": 13.408895707763493, "learning_rate": 8.067365277067612e-06, "loss": 0.8692, "step": 25575 }, { "epoch": 1.08, "grad_norm": 20.460245148921782, "learning_rate": 8.066392942975059e-06, "loss": 0.878, "step": 25580 }, { "epoch": 1.08, "grad_norm": 11.358839716949952, "learning_rate": 8.065420422976135e-06, "loss": 0.8634, "step": 25585 }, { "epoch": 1.08, "grad_norm": 4.455306316482305, "learning_rate": 8.0644477171298e-06, "loss": 0.8695, "step": 25590 }, { "epoch": 1.08, "grad_norm": 6.568262201494377, "learning_rate": 8.063474825495026e-06, "loss": 0.8589, "step": 25595 }, { "epoch": 1.08, "grad_norm": 9.823863298858734, "learning_rate": 8.062501748130796e-06, "loss": 0.8362, "step": 25600 }, { "epoch": 1.08, "grad_norm": 6.945576410242681, "learning_rate": 8.061528485096107e-06, "loss": 0.8614, "step": 25605 }, { "epoch": 1.08, "grad_norm": 6.855890023407875, "learning_rate": 8.060555036449965e-06, "loss": 0.8372, "step": 25610 }, { "epoch": 1.08, "grad_norm": 5.994632923702126, "learning_rate": 8.059581402251384e-06, "loss": 0.8975, "step": 25615 }, { "epoch": 1.08, "grad_norm": 9.732606836065855, "learning_rate": 8.058607582559397e-06, "loss": 0.8667, "step": 25620 }, { "epoch": 1.08, "grad_norm": 5.514484607296094, "learning_rate": 8.05763357743304e-06, "loss": 0.8502, "step": 25625 }, { "epoch": 1.08, "grad_norm": 8.33383914685765, "learning_rate": 8.056659386931367e-06, "loss": 0.8803, "step": 25630 }, { "epoch": 1.08, "grad_norm": 8.781920285047889, "learning_rate": 8.055685011113439e-06, "loss": 0.872, "step": 25635 }, { "epoch": 1.09, "grad_norm": 20.052349810923946, "learning_rate": 8.05471045003833e-06, "loss": 0.8793, "step": 25640 }, { "epoch": 1.09, "grad_norm": 4.865399164145096, "learning_rate": 8.053735703765125e-06, "loss": 0.9175, "step": 25645 }, { "epoch": 1.09, "grad_norm": 5.176753269916266, "learning_rate": 8.052760772352916e-06, "loss": 0.8524, "step": 25650 }, { "epoch": 1.09, "grad_norm": 5.148275771150095, "learning_rate": 8.051785655860817e-06, "loss": 0.8461, "step": 25655 }, { "epoch": 1.09, "grad_norm": 5.877476467046608, "learning_rate": 8.050810354347941e-06, "loss": 0.8777, "step": 25660 }, { "epoch": 1.09, "grad_norm": 5.24096921947043, "learning_rate": 8.04983486787342e-06, "loss": 0.9171, "step": 25665 }, { "epoch": 1.09, "grad_norm": 13.776186080489287, "learning_rate": 8.048859196496395e-06, "loss": 0.8736, "step": 25670 }, { "epoch": 1.09, "grad_norm": 11.168948874751703, "learning_rate": 8.047883340276016e-06, "loss": 0.8428, "step": 25675 }, { "epoch": 1.09, "grad_norm": 16.58707484326654, "learning_rate": 8.046907299271451e-06, "loss": 0.8719, "step": 25680 }, { "epoch": 1.09, "grad_norm": 13.407123580900468, "learning_rate": 8.045931073541869e-06, "loss": 0.8842, "step": 25685 }, { "epoch": 1.09, "grad_norm": 11.688239916780878, "learning_rate": 8.04495466314646e-06, "loss": 0.8484, "step": 25690 }, { "epoch": 1.09, "grad_norm": 7.0145413475936875, "learning_rate": 8.043978068144418e-06, "loss": 0.9074, "step": 25695 }, { "epoch": 1.09, "grad_norm": 8.655220322296769, "learning_rate": 8.043001288594952e-06, "loss": 0.832, "step": 25700 }, { "epoch": 1.09, "grad_norm": 5.655896965371052, "learning_rate": 8.04202432455728e-06, "loss": 0.8705, "step": 25705 }, { "epoch": 1.09, "grad_norm": 7.646697002295684, "learning_rate": 8.041047176090635e-06, "loss": 0.8692, "step": 25710 }, { "epoch": 1.09, "grad_norm": 4.943349605285469, "learning_rate": 8.040069843254258e-06, "loss": 0.8486, "step": 25715 }, { "epoch": 1.09, "grad_norm": 5.3644949390245165, "learning_rate": 8.0390923261074e-06, "loss": 0.8522, "step": 25720 }, { "epoch": 1.09, "grad_norm": 6.3719193998857095, "learning_rate": 8.038114624709328e-06, "loss": 0.8685, "step": 25725 }, { "epoch": 1.09, "grad_norm": 4.86060402463312, "learning_rate": 8.037136739119315e-06, "loss": 0.8505, "step": 25730 }, { "epoch": 1.09, "grad_norm": 8.0805585058419, "learning_rate": 8.036158669396646e-06, "loss": 0.9115, "step": 25735 }, { "epoch": 1.09, "grad_norm": 18.465607613503146, "learning_rate": 8.035180415600621e-06, "loss": 0.8402, "step": 25740 }, { "epoch": 1.09, "grad_norm": 16.352602004998136, "learning_rate": 8.034201977790548e-06, "loss": 0.8695, "step": 25745 }, { "epoch": 1.09, "grad_norm": 6.370508561390915, "learning_rate": 8.033223356025746e-06, "loss": 0.8559, "step": 25750 }, { "epoch": 1.09, "grad_norm": 5.251686379445301, "learning_rate": 8.032244550365549e-06, "loss": 0.8805, "step": 25755 }, { "epoch": 1.09, "grad_norm": 5.399152481844639, "learning_rate": 8.031265560869293e-06, "loss": 0.8776, "step": 25760 }, { "epoch": 1.09, "grad_norm": 19.369761465997048, "learning_rate": 8.030286387596337e-06, "loss": 0.8584, "step": 25765 }, { "epoch": 1.09, "grad_norm": 23.655973894547344, "learning_rate": 8.029307030606043e-06, "loss": 0.8815, "step": 25770 }, { "epoch": 1.09, "grad_norm": 10.528715431816428, "learning_rate": 8.028327489957787e-06, "loss": 0.9026, "step": 25775 }, { "epoch": 1.09, "grad_norm": 13.824315673789933, "learning_rate": 8.027347765710956e-06, "loss": 0.8678, "step": 25780 }, { "epoch": 1.09, "grad_norm": 6.271860445422015, "learning_rate": 8.026367857924945e-06, "loss": 0.8436, "step": 25785 }, { "epoch": 1.09, "grad_norm": 8.070055992732557, "learning_rate": 8.025387766659169e-06, "loss": 0.8853, "step": 25790 }, { "epoch": 1.09, "grad_norm": 6.787664628073545, "learning_rate": 8.024407491973041e-06, "loss": 0.8603, "step": 25795 }, { "epoch": 1.09, "grad_norm": 8.48574233385568, "learning_rate": 8.023427033925998e-06, "loss": 0.8843, "step": 25800 }, { "epoch": 1.09, "grad_norm": 17.14355182985522, "learning_rate": 8.02244639257748e-06, "loss": 0.8622, "step": 25805 }, { "epoch": 1.09, "grad_norm": 22.698802917790847, "learning_rate": 8.021465567986937e-06, "loss": 0.8623, "step": 25810 }, { "epoch": 1.09, "grad_norm": 24.978878559406873, "learning_rate": 8.020484560213839e-06, "loss": 0.8768, "step": 25815 }, { "epoch": 1.09, "grad_norm": 7.859632622654838, "learning_rate": 8.019503369317658e-06, "loss": 0.8328, "step": 25820 }, { "epoch": 1.09, "grad_norm": 8.156135491009852, "learning_rate": 8.018521995357881e-06, "loss": 0.8322, "step": 25825 }, { "epoch": 1.09, "grad_norm": 6.1718075339290275, "learning_rate": 8.01754043839401e-06, "loss": 0.8852, "step": 25830 }, { "epoch": 1.09, "grad_norm": 14.868105382476271, "learning_rate": 8.016558698485548e-06, "loss": 0.906, "step": 25835 }, { "epoch": 1.09, "grad_norm": 10.586508122436015, "learning_rate": 8.015576775692018e-06, "loss": 0.8814, "step": 25840 }, { "epoch": 1.09, "grad_norm": 5.153466709562309, "learning_rate": 8.01459467007295e-06, "loss": 0.8317, "step": 25845 }, { "epoch": 1.09, "grad_norm": 4.773015256721841, "learning_rate": 8.01361238168789e-06, "loss": 0.9061, "step": 25850 }, { "epoch": 1.09, "grad_norm": 8.363952338448904, "learning_rate": 8.012629910596386e-06, "loss": 0.8457, "step": 25855 }, { "epoch": 1.09, "grad_norm": 5.3403625219143525, "learning_rate": 8.011647256858005e-06, "loss": 0.8697, "step": 25860 }, { "epoch": 1.09, "grad_norm": 11.559433307459656, "learning_rate": 8.010664420532322e-06, "loss": 0.8599, "step": 25865 }, { "epoch": 1.09, "grad_norm": 13.611868273622242, "learning_rate": 8.009681401678923e-06, "loss": 0.8354, "step": 25870 }, { "epoch": 1.1, "grad_norm": 13.184817812150095, "learning_rate": 8.008698200357405e-06, "loss": 0.888, "step": 25875 }, { "epoch": 1.1, "grad_norm": 13.778169734887975, "learning_rate": 8.00771481662738e-06, "loss": 0.8199, "step": 25880 }, { "epoch": 1.1, "grad_norm": 13.961395734334369, "learning_rate": 8.006731250548464e-06, "loss": 0.9039, "step": 25885 }, { "epoch": 1.1, "grad_norm": 20.475687786303837, "learning_rate": 8.005747502180288e-06, "loss": 0.8698, "step": 25890 }, { "epoch": 1.1, "grad_norm": 4.470655738657399, "learning_rate": 8.004763571582497e-06, "loss": 0.842, "step": 25895 }, { "epoch": 1.1, "grad_norm": 9.670784407083827, "learning_rate": 8.003779458814739e-06, "loss": 0.8929, "step": 25900 }, { "epoch": 1.1, "grad_norm": 13.447836649571068, "learning_rate": 8.002795163936681e-06, "loss": 0.9021, "step": 25905 }, { "epoch": 1.1, "grad_norm": 4.895677954365911, "learning_rate": 8.001810687007997e-06, "loss": 0.8484, "step": 25910 }, { "epoch": 1.1, "grad_norm": 6.348058610796533, "learning_rate": 8.000826028088374e-06, "loss": 0.8678, "step": 25915 }, { "epoch": 1.1, "grad_norm": 7.442044068857169, "learning_rate": 7.999841187237507e-06, "loss": 0.8438, "step": 25920 }, { "epoch": 1.1, "grad_norm": 5.697303256082096, "learning_rate": 7.998856164515107e-06, "loss": 0.8572, "step": 25925 }, { "epoch": 1.1, "grad_norm": 6.417868423191606, "learning_rate": 7.997870959980889e-06, "loss": 0.9233, "step": 25930 }, { "epoch": 1.1, "grad_norm": 9.660283134574927, "learning_rate": 7.996885573694585e-06, "loss": 0.8578, "step": 25935 }, { "epoch": 1.1, "grad_norm": 4.413815718340507, "learning_rate": 7.99590000571594e-06, "loss": 0.8521, "step": 25940 }, { "epoch": 1.1, "grad_norm": 6.66924631816616, "learning_rate": 7.9949142561047e-06, "loss": 0.8689, "step": 25945 }, { "epoch": 1.1, "grad_norm": 9.7151451929432, "learning_rate": 7.99392832492063e-06, "loss": 0.8809, "step": 25950 }, { "epoch": 1.1, "grad_norm": 7.061240815707852, "learning_rate": 7.992942212223504e-06, "loss": 0.8851, "step": 25955 }, { "epoch": 1.1, "grad_norm": 15.479900462634221, "learning_rate": 7.991955918073109e-06, "loss": 0.8816, "step": 25960 }, { "epoch": 1.1, "grad_norm": 12.548875450606191, "learning_rate": 7.99096944252924e-06, "loss": 0.8834, "step": 25965 }, { "epoch": 1.1, "grad_norm": 22.6096576128708, "learning_rate": 7.989982785651702e-06, "loss": 0.8738, "step": 25970 }, { "epoch": 1.1, "grad_norm": 22.369854671946587, "learning_rate": 7.988995947500317e-06, "loss": 0.8493, "step": 25975 }, { "epoch": 1.1, "grad_norm": 27.00822721519099, "learning_rate": 7.988008928134911e-06, "loss": 0.8446, "step": 25980 }, { "epoch": 1.1, "grad_norm": 42.04359968538319, "learning_rate": 7.987021727615326e-06, "loss": 0.8649, "step": 25985 }, { "epoch": 1.1, "grad_norm": 39.66896717495057, "learning_rate": 7.986034346001413e-06, "loss": 0.8756, "step": 25990 }, { "epoch": 1.1, "grad_norm": 12.514514387072678, "learning_rate": 7.98504678335303e-06, "loss": 0.8687, "step": 25995 }, { "epoch": 1.1, "grad_norm": 21.961964168573633, "learning_rate": 7.984059039730057e-06, "loss": 0.9258, "step": 26000 }, { "epoch": 1.1, "grad_norm": 10.587983893889762, "learning_rate": 7.983071115192372e-06, "loss": 0.8884, "step": 26005 }, { "epoch": 1.1, "grad_norm": 10.909622389012378, "learning_rate": 7.982083009799873e-06, "loss": 0.862, "step": 26010 }, { "epoch": 1.1, "grad_norm": 7.483467070218326, "learning_rate": 7.981094723612463e-06, "loss": 0.8729, "step": 26015 }, { "epoch": 1.1, "grad_norm": 10.129897051040773, "learning_rate": 7.980106256690063e-06, "loss": 0.8269, "step": 26020 }, { "epoch": 1.1, "grad_norm": 6.308340333664583, "learning_rate": 7.9791176090926e-06, "loss": 0.8409, "step": 26025 }, { "epoch": 1.1, "grad_norm": 7.864691547736599, "learning_rate": 7.97812878088001e-06, "loss": 0.8548, "step": 26030 }, { "epoch": 1.1, "grad_norm": 11.76402197889257, "learning_rate": 7.977139772112245e-06, "loss": 0.8513, "step": 26035 }, { "epoch": 1.1, "grad_norm": 5.422863501669311, "learning_rate": 7.976150582849265e-06, "loss": 0.8474, "step": 26040 }, { "epoch": 1.1, "grad_norm": 7.193621857563075, "learning_rate": 7.975161213151041e-06, "loss": 0.8691, "step": 26045 }, { "epoch": 1.1, "grad_norm": 10.366649439526833, "learning_rate": 7.974171663077557e-06, "loss": 0.8842, "step": 26050 }, { "epoch": 1.1, "grad_norm": 6.381058963025625, "learning_rate": 7.973181932688805e-06, "loss": 0.8432, "step": 26055 }, { "epoch": 1.1, "grad_norm": 12.788441498061113, "learning_rate": 7.97219202204479e-06, "loss": 0.888, "step": 26060 }, { "epoch": 1.1, "grad_norm": 7.723974431855365, "learning_rate": 7.97120193120553e-06, "loss": 0.8761, "step": 26065 }, { "epoch": 1.1, "grad_norm": 5.515420774520167, "learning_rate": 7.970211660231046e-06, "loss": 0.8214, "step": 26070 }, { "epoch": 1.1, "grad_norm": 6.359362255999275, "learning_rate": 7.969221209181379e-06, "loss": 0.8413, "step": 26075 }, { "epoch": 1.1, "grad_norm": 7.7258402674579205, "learning_rate": 7.968230578116578e-06, "loss": 0.936, "step": 26080 }, { "epoch": 1.1, "grad_norm": 6.88223033280284, "learning_rate": 7.967239767096699e-06, "loss": 0.8554, "step": 26085 }, { "epoch": 1.1, "grad_norm": 8.223486309806825, "learning_rate": 7.966248776181812e-06, "loss": 0.8501, "step": 26090 }, { "epoch": 1.1, "grad_norm": 9.35221063475463, "learning_rate": 7.965257605432e-06, "loss": 0.8774, "step": 26095 }, { "epoch": 1.1, "grad_norm": 5.706253774820753, "learning_rate": 7.964266254907353e-06, "loss": 0.8865, "step": 26100 }, { "epoch": 1.1, "grad_norm": 4.810241986121962, "learning_rate": 7.963274724667977e-06, "loss": 0.8506, "step": 26105 }, { "epoch": 1.11, "grad_norm": 5.524693264060233, "learning_rate": 7.962283014773981e-06, "loss": 0.8677, "step": 26110 }, { "epoch": 1.11, "grad_norm": 5.647485460122754, "learning_rate": 7.961291125285492e-06, "loss": 0.8864, "step": 26115 }, { "epoch": 1.11, "grad_norm": 4.342578105314222, "learning_rate": 7.960299056262645e-06, "loss": 0.8742, "step": 26120 }, { "epoch": 1.11, "grad_norm": 5.883492960986341, "learning_rate": 7.959306807765586e-06, "loss": 0.8624, "step": 26125 }, { "epoch": 1.11, "grad_norm": 5.2127920605974545, "learning_rate": 7.958314379854473e-06, "loss": 0.8546, "step": 26130 }, { "epoch": 1.11, "grad_norm": 5.358431148667071, "learning_rate": 7.957321772589471e-06, "loss": 0.8813, "step": 26135 }, { "epoch": 1.11, "grad_norm": 5.083872585523129, "learning_rate": 7.956328986030763e-06, "loss": 0.849, "step": 26140 }, { "epoch": 1.11, "grad_norm": 6.352373832275167, "learning_rate": 7.955336020238536e-06, "loss": 0.8705, "step": 26145 }, { "epoch": 1.11, "grad_norm": 4.708133325457954, "learning_rate": 7.954342875272991e-06, "loss": 0.8724, "step": 26150 }, { "epoch": 1.11, "grad_norm": 8.127430736276292, "learning_rate": 7.953349551194339e-06, "loss": 0.8599, "step": 26155 }, { "epoch": 1.11, "grad_norm": 5.438810760041203, "learning_rate": 7.952356048062804e-06, "loss": 0.8983, "step": 26160 }, { "epoch": 1.11, "grad_norm": 5.715100680774751, "learning_rate": 7.95136236593862e-06, "loss": 0.8521, "step": 26165 }, { "epoch": 1.11, "grad_norm": 5.458606231230873, "learning_rate": 7.950368504882027e-06, "loss": 0.871, "step": 26170 }, { "epoch": 1.11, "grad_norm": 11.749413807861309, "learning_rate": 7.949374464953282e-06, "loss": 0.8538, "step": 26175 }, { "epoch": 1.11, "grad_norm": 13.335430301614918, "learning_rate": 7.948380246212652e-06, "loss": 0.8431, "step": 26180 }, { "epoch": 1.11, "grad_norm": 21.461542093516865, "learning_rate": 7.947385848720412e-06, "loss": 0.828, "step": 26185 }, { "epoch": 1.11, "grad_norm": 5.388743793404191, "learning_rate": 7.94639127253685e-06, "loss": 0.8589, "step": 26190 }, { "epoch": 1.11, "grad_norm": 5.602818741191548, "learning_rate": 7.945396517722264e-06, "loss": 0.8574, "step": 26195 }, { "epoch": 1.11, "grad_norm": 14.37538850341335, "learning_rate": 7.944401584336962e-06, "loss": 0.8204, "step": 26200 }, { "epoch": 1.11, "grad_norm": 12.14909866165158, "learning_rate": 7.943406472441267e-06, "loss": 0.8342, "step": 26205 }, { "epoch": 1.11, "grad_norm": 17.678975504214247, "learning_rate": 7.942411182095506e-06, "loss": 0.8153, "step": 26210 }, { "epoch": 1.11, "grad_norm": 14.938447870937582, "learning_rate": 7.941415713360024e-06, "loss": 0.8592, "step": 26215 }, { "epoch": 1.11, "grad_norm": 16.556492004981912, "learning_rate": 7.940420066295169e-06, "loss": 0.8618, "step": 26220 }, { "epoch": 1.11, "grad_norm": 6.675382693372199, "learning_rate": 7.939424240961308e-06, "loss": 0.8701, "step": 26225 }, { "epoch": 1.11, "grad_norm": 13.944574998291337, "learning_rate": 7.938428237418814e-06, "loss": 0.8584, "step": 26230 }, { "epoch": 1.11, "grad_norm": 12.331353040685906, "learning_rate": 7.937432055728072e-06, "loss": 0.8625, "step": 26235 }, { "epoch": 1.11, "grad_norm": 5.699687380772396, "learning_rate": 7.936435695949474e-06, "loss": 0.8612, "step": 26240 }, { "epoch": 1.11, "grad_norm": 5.0692182989667876, "learning_rate": 7.935439158143432e-06, "loss": 0.8632, "step": 26245 }, { "epoch": 1.11, "grad_norm": 4.4494704988481075, "learning_rate": 7.934442442370361e-06, "loss": 0.8861, "step": 26250 }, { "epoch": 1.11, "grad_norm": 6.7805451667261645, "learning_rate": 7.933445548690687e-06, "loss": 0.897, "step": 26255 }, { "epoch": 1.11, "grad_norm": 5.79196521993131, "learning_rate": 7.932448477164849e-06, "loss": 0.8042, "step": 26260 }, { "epoch": 1.11, "grad_norm": 7.499576239836002, "learning_rate": 7.931451227853299e-06, "loss": 0.856, "step": 26265 }, { "epoch": 1.11, "grad_norm": 8.230448573932353, "learning_rate": 7.930453800816495e-06, "loss": 0.8556, "step": 26270 }, { "epoch": 1.11, "grad_norm": 5.982385423422259, "learning_rate": 7.929456196114911e-06, "loss": 0.8132, "step": 26275 }, { "epoch": 1.11, "grad_norm": 5.12457874234298, "learning_rate": 7.928458413809024e-06, "loss": 0.8544, "step": 26280 }, { "epoch": 1.11, "grad_norm": 4.930002930423708, "learning_rate": 7.927460453959333e-06, "loss": 0.8689, "step": 26285 }, { "epoch": 1.11, "grad_norm": 7.788295212520432, "learning_rate": 7.926462316626335e-06, "loss": 0.8523, "step": 26290 }, { "epoch": 1.11, "grad_norm": 8.37551487558383, "learning_rate": 7.925464001870549e-06, "loss": 0.8706, "step": 26295 }, { "epoch": 1.11, "grad_norm": 5.554335521737757, "learning_rate": 7.924465509752494e-06, "loss": 0.8771, "step": 26300 }, { "epoch": 1.11, "grad_norm": 8.320329767579542, "learning_rate": 7.923466840332715e-06, "loss": 0.848, "step": 26305 }, { "epoch": 1.11, "grad_norm": 6.353197471583026, "learning_rate": 7.922467993671749e-06, "loss": 0.8652, "step": 26310 }, { "epoch": 1.11, "grad_norm": 12.707890319678297, "learning_rate": 7.921468969830157e-06, "loss": 0.832, "step": 26315 }, { "epoch": 1.11, "grad_norm": 16.082846990192856, "learning_rate": 7.920469768868508e-06, "loss": 0.8951, "step": 26320 }, { "epoch": 1.11, "grad_norm": 12.155509125144548, "learning_rate": 7.919470390847379e-06, "loss": 0.8326, "step": 26325 }, { "epoch": 1.11, "grad_norm": 8.4691323602816, "learning_rate": 7.91847083582736e-06, "loss": 0.846, "step": 26330 }, { "epoch": 1.11, "grad_norm": 11.498080104062787, "learning_rate": 7.91747110386905e-06, "loss": 0.8174, "step": 26335 }, { "epoch": 1.11, "grad_norm": 16.724565374138287, "learning_rate": 7.916471195033062e-06, "loss": 0.8793, "step": 26340 }, { "epoch": 1.11, "grad_norm": 5.084562589331612, "learning_rate": 7.915471109380015e-06, "loss": 0.9458, "step": 26345 }, { "epoch": 1.12, "grad_norm": 5.153207951653013, "learning_rate": 7.914470846970543e-06, "loss": 0.8639, "step": 26350 }, { "epoch": 1.12, "grad_norm": 5.044821088268971, "learning_rate": 7.91347040786529e-06, "loss": 0.8634, "step": 26355 }, { "epoch": 1.12, "grad_norm": 4.313021099573543, "learning_rate": 7.912469792124905e-06, "loss": 0.8509, "step": 26360 }, { "epoch": 1.12, "grad_norm": 5.112470298646595, "learning_rate": 7.911468999810057e-06, "loss": 0.8506, "step": 26365 }, { "epoch": 1.12, "grad_norm": 8.219758190596584, "learning_rate": 7.91046803098142e-06, "loss": 0.8513, "step": 26370 }, { "epoch": 1.12, "grad_norm": 6.242959216042871, "learning_rate": 7.90946688569968e-06, "loss": 0.8752, "step": 26375 }, { "epoch": 1.12, "grad_norm": 5.937742429038265, "learning_rate": 7.908465564025531e-06, "loss": 0.8602, "step": 26380 }, { "epoch": 1.12, "grad_norm": 4.861255864363088, "learning_rate": 7.907464066019682e-06, "loss": 0.878, "step": 26385 }, { "epoch": 1.12, "grad_norm": 6.479599626662508, "learning_rate": 7.906462391742852e-06, "loss": 0.868, "step": 26390 }, { "epoch": 1.12, "grad_norm": 10.55122443045248, "learning_rate": 7.905460541255767e-06, "loss": 0.8502, "step": 26395 }, { "epoch": 1.12, "grad_norm": 10.507384465887881, "learning_rate": 7.904458514619169e-06, "loss": 0.821, "step": 26400 }, { "epoch": 1.12, "grad_norm": 7.619197406412334, "learning_rate": 7.903456311893807e-06, "loss": 0.8539, "step": 26405 }, { "epoch": 1.12, "grad_norm": 9.14679174209778, "learning_rate": 7.90245393314044e-06, "loss": 0.9354, "step": 26410 }, { "epoch": 1.12, "grad_norm": 5.852325797491746, "learning_rate": 7.901451378419841e-06, "loss": 0.8481, "step": 26415 }, { "epoch": 1.12, "grad_norm": 5.236739403660115, "learning_rate": 7.900448647792791e-06, "loss": 0.8399, "step": 26420 }, { "epoch": 1.12, "grad_norm": 6.985129513759088, "learning_rate": 7.899445741320084e-06, "loss": 0.8593, "step": 26425 }, { "epoch": 1.12, "grad_norm": 9.215414650311526, "learning_rate": 7.898442659062522e-06, "loss": 0.8446, "step": 26430 }, { "epoch": 1.12, "grad_norm": 10.799833885818252, "learning_rate": 7.897439401080919e-06, "loss": 0.8644, "step": 26435 }, { "epoch": 1.12, "grad_norm": 6.404066915088412, "learning_rate": 7.896435967436101e-06, "loss": 0.8331, "step": 26440 }, { "epoch": 1.12, "grad_norm": 6.662322846064061, "learning_rate": 7.8954323581889e-06, "loss": 0.8761, "step": 26445 }, { "epoch": 1.12, "grad_norm": 7.529573161338292, "learning_rate": 7.894428573400165e-06, "loss": 0.8992, "step": 26450 }, { "epoch": 1.12, "grad_norm": 6.923357730500779, "learning_rate": 7.89342461313075e-06, "loss": 0.8588, "step": 26455 }, { "epoch": 1.12, "grad_norm": 9.421254811733112, "learning_rate": 7.892420477441527e-06, "loss": 0.9093, "step": 26460 }, { "epoch": 1.12, "grad_norm": 9.978084914221466, "learning_rate": 7.891416166393367e-06, "loss": 0.8671, "step": 26465 }, { "epoch": 1.12, "grad_norm": 10.665992272479397, "learning_rate": 7.890411680047163e-06, "loss": 0.8236, "step": 26470 }, { "epoch": 1.12, "grad_norm": 7.566813411659888, "learning_rate": 7.889407018463812e-06, "loss": 0.837, "step": 26475 }, { "epoch": 1.12, "grad_norm": 4.553667692697166, "learning_rate": 7.888402181704226e-06, "loss": 0.8303, "step": 26480 }, { "epoch": 1.12, "grad_norm": 11.124962721895892, "learning_rate": 7.887397169829323e-06, "loss": 0.8545, "step": 26485 }, { "epoch": 1.12, "grad_norm": 8.453632999725867, "learning_rate": 7.886391982900034e-06, "loss": 0.878, "step": 26490 }, { "epoch": 1.12, "grad_norm": 13.215457009175124, "learning_rate": 7.885386620977302e-06, "loss": 0.8492, "step": 26495 }, { "epoch": 1.12, "grad_norm": 4.655740398770599, "learning_rate": 7.88438108412208e-06, "loss": 0.8311, "step": 26500 }, { "epoch": 1.12, "grad_norm": 4.960290676498367, "learning_rate": 7.883375372395326e-06, "loss": 0.9035, "step": 26505 }, { "epoch": 1.12, "grad_norm": 4.793186051565295, "learning_rate": 7.882369485858018e-06, "loss": 0.8517, "step": 26510 }, { "epoch": 1.12, "grad_norm": 9.792223978566913, "learning_rate": 7.881363424571137e-06, "loss": 0.8673, "step": 26515 }, { "epoch": 1.12, "grad_norm": 6.799364079032052, "learning_rate": 7.880357188595683e-06, "loss": 0.8701, "step": 26520 }, { "epoch": 1.12, "grad_norm": 6.329275825766881, "learning_rate": 7.879350777992653e-06, "loss": 0.8771, "step": 26525 }, { "epoch": 1.12, "grad_norm": 6.4701577246268664, "learning_rate": 7.878344192823069e-06, "loss": 0.8886, "step": 26530 }, { "epoch": 1.12, "grad_norm": 6.890569453564953, "learning_rate": 7.877337433147954e-06, "loss": 0.9337, "step": 26535 }, { "epoch": 1.12, "grad_norm": 4.897192483503493, "learning_rate": 7.876330499028344e-06, "loss": 0.8643, "step": 26540 }, { "epoch": 1.12, "grad_norm": 6.1961950066339355, "learning_rate": 7.875323390525291e-06, "loss": 0.8619, "step": 26545 }, { "epoch": 1.12, "grad_norm": 6.560107205409909, "learning_rate": 7.87431610769985e-06, "loss": 0.8342, "step": 26550 }, { "epoch": 1.12, "grad_norm": 10.971556707739259, "learning_rate": 7.87330865061309e-06, "loss": 0.8848, "step": 26555 }, { "epoch": 1.12, "grad_norm": 7.719900411231033, "learning_rate": 7.872301019326089e-06, "loss": 0.8724, "step": 26560 }, { "epoch": 1.12, "grad_norm": 5.089131466554082, "learning_rate": 7.871293213899937e-06, "loss": 0.8987, "step": 26565 }, { "epoch": 1.12, "grad_norm": 6.990412022404094, "learning_rate": 7.870285234395737e-06, "loss": 0.862, "step": 26570 }, { "epoch": 1.12, "grad_norm": 5.748927013077684, "learning_rate": 7.869277080874597e-06, "loss": 0.8281, "step": 26575 }, { "epoch": 1.12, "grad_norm": 5.5222190643836315, "learning_rate": 7.86826875339764e-06, "loss": 0.8592, "step": 26580 }, { "epoch": 1.13, "grad_norm": 6.554976429327962, "learning_rate": 7.867260252025997e-06, "loss": 0.9364, "step": 26585 }, { "epoch": 1.13, "grad_norm": 10.755445220111715, "learning_rate": 7.866251576820811e-06, "loss": 0.8656, "step": 26590 }, { "epoch": 1.13, "grad_norm": 9.20211654830716, "learning_rate": 7.865242727843233e-06, "loss": 0.8728, "step": 26595 }, { "epoch": 1.13, "grad_norm": 21.90600740447074, "learning_rate": 7.86423370515443e-06, "loss": 0.8825, "step": 26600 }, { "epoch": 1.13, "grad_norm": 24.250115247433797, "learning_rate": 7.863224508815575e-06, "loss": 0.8953, "step": 26605 }, { "epoch": 1.13, "grad_norm": 5.776777047109916, "learning_rate": 7.862215138887852e-06, "loss": 0.8734, "step": 26610 }, { "epoch": 1.13, "grad_norm": 11.222521053195054, "learning_rate": 7.861205595432455e-06, "loss": 0.8521, "step": 26615 }, { "epoch": 1.13, "grad_norm": 15.4757726145903, "learning_rate": 7.860195878510592e-06, "loss": 0.8691, "step": 26620 }, { "epoch": 1.13, "grad_norm": 5.753732597558064, "learning_rate": 7.859185988183475e-06, "loss": 0.8422, "step": 26625 }, { "epoch": 1.13, "grad_norm": 10.745038201599531, "learning_rate": 7.858175924512335e-06, "loss": 0.8609, "step": 26630 }, { "epoch": 1.13, "grad_norm": 12.237458005685918, "learning_rate": 7.85716568755841e-06, "loss": 0.9102, "step": 26635 }, { "epoch": 1.13, "grad_norm": 19.52675624499569, "learning_rate": 7.856155277382942e-06, "loss": 0.827, "step": 26640 }, { "epoch": 1.13, "grad_norm": 11.243646000641661, "learning_rate": 7.855144694047195e-06, "loss": 0.8395, "step": 26645 }, { "epoch": 1.13, "grad_norm": 13.421330366379776, "learning_rate": 7.854133937612433e-06, "loss": 0.857, "step": 26650 }, { "epoch": 1.13, "grad_norm": 5.013234633397708, "learning_rate": 7.85312300813994e-06, "loss": 0.8337, "step": 26655 }, { "epoch": 1.13, "grad_norm": 6.72309012691772, "learning_rate": 7.852111905691001e-06, "loss": 0.8483, "step": 26660 }, { "epoch": 1.13, "grad_norm": 11.988992360580719, "learning_rate": 7.85110063032692e-06, "loss": 0.8367, "step": 26665 }, { "epoch": 1.13, "grad_norm": 15.060130387338289, "learning_rate": 7.850089182109003e-06, "loss": 0.832, "step": 26670 }, { "epoch": 1.13, "grad_norm": 4.612872499527278, "learning_rate": 7.849077561098576e-06, "loss": 0.8793, "step": 26675 }, { "epoch": 1.13, "grad_norm": 6.667532171093065, "learning_rate": 7.848065767356972e-06, "loss": 0.8645, "step": 26680 }, { "epoch": 1.13, "grad_norm": 7.807332830117759, "learning_rate": 7.847053800945525e-06, "loss": 0.8319, "step": 26685 }, { "epoch": 1.13, "grad_norm": 5.385690809313313, "learning_rate": 7.846041661925597e-06, "loss": 0.8669, "step": 26690 }, { "epoch": 1.13, "grad_norm": 9.233918128482422, "learning_rate": 7.845029350358542e-06, "loss": 0.8701, "step": 26695 }, { "epoch": 1.13, "grad_norm": 4.5241799427556675, "learning_rate": 7.844016866305739e-06, "loss": 0.8717, "step": 26700 }, { "epoch": 1.13, "grad_norm": 4.654524793418459, "learning_rate": 7.84300420982857e-06, "loss": 0.8381, "step": 26705 }, { "epoch": 1.13, "grad_norm": 4.317974951972202, "learning_rate": 7.841991380988433e-06, "loss": 0.7977, "step": 26710 }, { "epoch": 1.13, "grad_norm": 4.922015417926553, "learning_rate": 7.840978379846728e-06, "loss": 0.9152, "step": 26715 }, { "epoch": 1.13, "grad_norm": 8.420550301219485, "learning_rate": 7.83996520646487e-06, "loss": 0.839, "step": 26720 }, { "epoch": 1.13, "grad_norm": 11.441949788537928, "learning_rate": 7.83895186090429e-06, "loss": 0.8481, "step": 26725 }, { "epoch": 1.13, "grad_norm": 13.784853697703845, "learning_rate": 7.837938343226421e-06, "loss": 0.8501, "step": 26730 }, { "epoch": 1.13, "grad_norm": 6.99350935362089, "learning_rate": 7.836924653492709e-06, "loss": 0.9007, "step": 26735 }, { "epoch": 1.13, "grad_norm": 6.391258245343709, "learning_rate": 7.835910791764611e-06, "loss": 0.8617, "step": 26740 }, { "epoch": 1.13, "grad_norm": 9.713336044989521, "learning_rate": 7.834896758103596e-06, "loss": 0.8839, "step": 26745 }, { "epoch": 1.13, "grad_norm": 13.715219668104211, "learning_rate": 7.83388255257114e-06, "loss": 0.8872, "step": 26750 }, { "epoch": 1.13, "grad_norm": 7.033699236550981, "learning_rate": 7.832868175228733e-06, "loss": 0.8557, "step": 26755 }, { "epoch": 1.13, "grad_norm": 5.8878581659656675, "learning_rate": 7.831853626137873e-06, "loss": 0.8921, "step": 26760 }, { "epoch": 1.13, "grad_norm": 5.794052430097345, "learning_rate": 7.83083890536007e-06, "loss": 0.8657, "step": 26765 }, { "epoch": 1.13, "grad_norm": 5.161985488106933, "learning_rate": 7.829824012956843e-06, "loss": 0.8564, "step": 26770 }, { "epoch": 1.13, "grad_norm": 6.517373623150537, "learning_rate": 7.828808948989721e-06, "loss": 0.846, "step": 26775 }, { "epoch": 1.13, "grad_norm": 4.965864831707195, "learning_rate": 7.827793713520244e-06, "loss": 0.8263, "step": 26780 }, { "epoch": 1.13, "grad_norm": 7.572072833578794, "learning_rate": 7.826778306609963e-06, "loss": 0.8784, "step": 26785 }, { "epoch": 1.13, "grad_norm": 19.58769577147176, "learning_rate": 7.825762728320441e-06, "loss": 0.8592, "step": 26790 }, { "epoch": 1.13, "grad_norm": 6.856431530750798, "learning_rate": 7.824746978713249e-06, "loss": 0.8463, "step": 26795 }, { "epoch": 1.13, "grad_norm": 13.415610109036363, "learning_rate": 7.823731057849968e-06, "loss": 0.8759, "step": 26800 }, { "epoch": 1.13, "grad_norm": 5.355237783571644, "learning_rate": 7.82271496579219e-06, "loss": 0.9095, "step": 26805 }, { "epoch": 1.13, "grad_norm": 5.5291020808105475, "learning_rate": 7.821698702601518e-06, "loss": 0.8485, "step": 26810 }, { "epoch": 1.13, "grad_norm": 6.432170156810988, "learning_rate": 7.820682268339565e-06, "loss": 0.8143, "step": 26815 }, { "epoch": 1.14, "grad_norm": 12.885790040533996, "learning_rate": 7.819665663067955e-06, "loss": 0.9016, "step": 26820 }, { "epoch": 1.14, "grad_norm": 7.308668580777147, "learning_rate": 7.818648886848322e-06, "loss": 0.8184, "step": 26825 }, { "epoch": 1.14, "grad_norm": 17.97692222708809, "learning_rate": 7.81763193974231e-06, "loss": 0.8614, "step": 26830 }, { "epoch": 1.14, "grad_norm": 19.73903313184167, "learning_rate": 7.816614821811573e-06, "loss": 0.9027, "step": 26835 }, { "epoch": 1.14, "grad_norm": 7.2638924445763635, "learning_rate": 7.815597533117774e-06, "loss": 0.8326, "step": 26840 }, { "epoch": 1.14, "grad_norm": 14.284736657157557, "learning_rate": 7.814580073722592e-06, "loss": 0.8259, "step": 26845 }, { "epoch": 1.14, "grad_norm": 14.520932253709928, "learning_rate": 7.81356244368771e-06, "loss": 0.839, "step": 26850 }, { "epoch": 1.14, "grad_norm": 15.195141194142588, "learning_rate": 7.812544643074826e-06, "loss": 0.8753, "step": 26855 }, { "epoch": 1.14, "grad_norm": 15.99062710551827, "learning_rate": 7.811526671945644e-06, "loss": 0.8192, "step": 26860 }, { "epoch": 1.14, "grad_norm": 5.50763504403253, "learning_rate": 7.81050853036188e-06, "loss": 0.814, "step": 26865 }, { "epoch": 1.14, "grad_norm": 13.57963237693707, "learning_rate": 7.809490218385265e-06, "loss": 0.8535, "step": 26870 }, { "epoch": 1.14, "grad_norm": 7.699340883645577, "learning_rate": 7.808471736077532e-06, "loss": 0.8563, "step": 26875 }, { "epoch": 1.14, "grad_norm": 6.223232849495145, "learning_rate": 7.80745308350043e-06, "loss": 0.8689, "step": 26880 }, { "epoch": 1.14, "grad_norm": 6.2102132997441775, "learning_rate": 7.806434260715718e-06, "loss": 0.8486, "step": 26885 }, { "epoch": 1.14, "grad_norm": 10.111656855257813, "learning_rate": 7.805415267785164e-06, "loss": 0.8409, "step": 26890 }, { "epoch": 1.14, "grad_norm": 4.789176996540388, "learning_rate": 7.804396104770544e-06, "loss": 0.8714, "step": 26895 }, { "epoch": 1.14, "grad_norm": 6.654885199060267, "learning_rate": 7.803376771733651e-06, "loss": 0.8712, "step": 26900 }, { "epoch": 1.14, "grad_norm": 15.298537759433874, "learning_rate": 7.80235726873628e-06, "loss": 0.8876, "step": 26905 }, { "epoch": 1.14, "grad_norm": 7.197520155448941, "learning_rate": 7.801337595840243e-06, "loss": 0.8779, "step": 26910 }, { "epoch": 1.14, "grad_norm": 9.409360441838256, "learning_rate": 7.80031775310736e-06, "loss": 0.8654, "step": 26915 }, { "epoch": 1.14, "grad_norm": 13.718151268956833, "learning_rate": 7.799297740599461e-06, "loss": 0.899, "step": 26920 }, { "epoch": 1.14, "grad_norm": 6.515417767567687, "learning_rate": 7.798277558378383e-06, "loss": 0.8193, "step": 26925 }, { "epoch": 1.14, "grad_norm": 6.335845840609774, "learning_rate": 7.797257206505982e-06, "loss": 0.8364, "step": 26930 }, { "epoch": 1.14, "grad_norm": 5.953376764899409, "learning_rate": 7.796236685044113e-06, "loss": 0.859, "step": 26935 }, { "epoch": 1.14, "grad_norm": 11.149289751225545, "learning_rate": 7.795215994054655e-06, "loss": 0.8733, "step": 26940 }, { "epoch": 1.14, "grad_norm": 5.44217338293341, "learning_rate": 7.79419513359948e-06, "loss": 0.9356, "step": 26945 }, { "epoch": 1.14, "grad_norm": 9.944065439393688, "learning_rate": 7.793174103740488e-06, "loss": 0.8772, "step": 26950 }, { "epoch": 1.14, "grad_norm": 15.660101175955392, "learning_rate": 7.792152904539576e-06, "loss": 0.8493, "step": 26955 }, { "epoch": 1.14, "grad_norm": 18.14015428218497, "learning_rate": 7.791131536058659e-06, "loss": 0.8227, "step": 26960 }, { "epoch": 1.14, "grad_norm": 14.479782528678234, "learning_rate": 7.790109998359656e-06, "loss": 0.863, "step": 26965 }, { "epoch": 1.14, "grad_norm": 8.04743827991232, "learning_rate": 7.789088291504505e-06, "loss": 0.8603, "step": 26970 }, { "epoch": 1.14, "grad_norm": 10.527038238649569, "learning_rate": 7.788066415555144e-06, "loss": 0.8582, "step": 26975 }, { "epoch": 1.14, "grad_norm": 11.425065904239588, "learning_rate": 7.78704437057353e-06, "loss": 0.8496, "step": 26980 }, { "epoch": 1.14, "grad_norm": 13.217916132253075, "learning_rate": 7.786022156621626e-06, "loss": 0.834, "step": 26985 }, { "epoch": 1.14, "grad_norm": 12.816355601444743, "learning_rate": 7.784999773761405e-06, "loss": 0.911, "step": 26990 }, { "epoch": 1.14, "grad_norm": 4.8496703016782545, "learning_rate": 7.78397722205485e-06, "loss": 0.8728, "step": 26995 }, { "epoch": 1.14, "grad_norm": 9.911400235634874, "learning_rate": 7.782954501563959e-06, "loss": 0.8628, "step": 27000 }, { "epoch": 1.14, "grad_norm": 22.1676903894186, "learning_rate": 7.781931612350732e-06, "loss": 0.8378, "step": 27005 }, { "epoch": 1.14, "grad_norm": 19.198931511428537, "learning_rate": 7.780908554477187e-06, "loss": 0.8387, "step": 27010 }, { "epoch": 1.14, "grad_norm": 16.119500222539244, "learning_rate": 7.779885328005347e-06, "loss": 0.8696, "step": 27015 }, { "epoch": 1.14, "grad_norm": 20.206130804311602, "learning_rate": 7.778861932997248e-06, "loss": 0.8735, "step": 27020 }, { "epoch": 1.14, "grad_norm": 14.646468899449912, "learning_rate": 7.777838369514934e-06, "loss": 0.8737, "step": 27025 }, { "epoch": 1.14, "grad_norm": 15.936270658117936, "learning_rate": 7.776814637620465e-06, "loss": 0.8123, "step": 27030 }, { "epoch": 1.14, "grad_norm": 5.600928576146234, "learning_rate": 7.775790737375901e-06, "loss": 0.808, "step": 27035 }, { "epoch": 1.14, "grad_norm": 6.7706822854091, "learning_rate": 7.774766668843323e-06, "loss": 0.8824, "step": 27040 }, { "epoch": 1.14, "grad_norm": 5.15918773156841, "learning_rate": 7.773742432084815e-06, "loss": 0.8399, "step": 27045 }, { "epoch": 1.14, "grad_norm": 7.21056195321401, "learning_rate": 7.772718027162473e-06, "loss": 0.8816, "step": 27050 }, { "epoch": 1.15, "grad_norm": 4.77728011241169, "learning_rate": 7.771693454138405e-06, "loss": 0.8817, "step": 27055 }, { "epoch": 1.15, "grad_norm": 5.484314690605723, "learning_rate": 7.770668713074726e-06, "loss": 0.8462, "step": 27060 }, { "epoch": 1.15, "grad_norm": 12.142716095242328, "learning_rate": 7.769643804033564e-06, "loss": 0.8604, "step": 27065 }, { "epoch": 1.15, "grad_norm": 6.475203472027149, "learning_rate": 7.768618727077057e-06, "loss": 0.8166, "step": 27070 }, { "epoch": 1.15, "grad_norm": 14.945967883466762, "learning_rate": 7.767593482267352e-06, "loss": 0.8973, "step": 27075 }, { "epoch": 1.15, "grad_norm": 5.1150238727608865, "learning_rate": 7.766568069666607e-06, "loss": 0.8458, "step": 27080 }, { "epoch": 1.15, "grad_norm": 7.0843257541669145, "learning_rate": 7.765542489336989e-06, "loss": 0.8492, "step": 27085 }, { "epoch": 1.15, "grad_norm": 7.154301201245478, "learning_rate": 7.764516741340675e-06, "loss": 0.8529, "step": 27090 }, { "epoch": 1.15, "grad_norm": 7.359557595392436, "learning_rate": 7.763490825739857e-06, "loss": 0.8921, "step": 27095 }, { "epoch": 1.15, "grad_norm": 10.648640141356333, "learning_rate": 7.762464742596726e-06, "loss": 0.8638, "step": 27100 }, { "epoch": 1.15, "grad_norm": 17.84050128196062, "learning_rate": 7.761438491973499e-06, "loss": 0.8589, "step": 27105 }, { "epoch": 1.15, "grad_norm": 7.360991389523278, "learning_rate": 7.760412073932387e-06, "loss": 0.814, "step": 27110 }, { "epoch": 1.15, "grad_norm": 14.856043847272236, "learning_rate": 7.759385488535623e-06, "loss": 0.8704, "step": 27115 }, { "epoch": 1.15, "grad_norm": 13.981582047051566, "learning_rate": 7.758358735845446e-06, "loss": 0.8762, "step": 27120 }, { "epoch": 1.15, "grad_norm": 15.52080847728403, "learning_rate": 7.757331815924103e-06, "loss": 0.875, "step": 27125 }, { "epoch": 1.15, "grad_norm": 8.520287958726929, "learning_rate": 7.756304728833853e-06, "loss": 0.8626, "step": 27130 }, { "epoch": 1.15, "grad_norm": 13.67124852286572, "learning_rate": 7.755277474636967e-06, "loss": 0.8896, "step": 27135 }, { "epoch": 1.15, "grad_norm": 15.133427616989637, "learning_rate": 7.754250053395725e-06, "loss": 0.8867, "step": 27140 }, { "epoch": 1.15, "grad_norm": 12.203771082525684, "learning_rate": 7.753222465172413e-06, "loss": 0.8522, "step": 27145 }, { "epoch": 1.15, "grad_norm": 19.52042664030729, "learning_rate": 7.752194710029334e-06, "loss": 0.8567, "step": 27150 }, { "epoch": 1.15, "grad_norm": 8.556782586130002, "learning_rate": 7.751166788028796e-06, "loss": 0.8211, "step": 27155 }, { "epoch": 1.15, "grad_norm": 4.438704807861239, "learning_rate": 7.75013869923312e-06, "loss": 0.8498, "step": 27160 }, { "epoch": 1.15, "grad_norm": 5.342816116882408, "learning_rate": 7.749110443704636e-06, "loss": 0.8522, "step": 27165 }, { "epoch": 1.15, "grad_norm": 10.063566831098308, "learning_rate": 7.748082021505682e-06, "loss": 0.9002, "step": 27170 }, { "epoch": 1.15, "grad_norm": 7.079935585329831, "learning_rate": 7.747053432698613e-06, "loss": 0.8668, "step": 27175 }, { "epoch": 1.15, "grad_norm": 11.911554457975734, "learning_rate": 7.746024677345782e-06, "loss": 0.882, "step": 27180 }, { "epoch": 1.15, "grad_norm": 5.282740796062666, "learning_rate": 7.744995755509566e-06, "loss": 0.8524, "step": 27185 }, { "epoch": 1.15, "grad_norm": 9.311826855993818, "learning_rate": 7.743966667252343e-06, "loss": 0.8402, "step": 27190 }, { "epoch": 1.15, "grad_norm": 15.175072831580708, "learning_rate": 7.742937412636503e-06, "loss": 0.8775, "step": 27195 }, { "epoch": 1.15, "grad_norm": 26.816963976243624, "learning_rate": 7.741907991724447e-06, "loss": 0.9046, "step": 27200 }, { "epoch": 1.15, "grad_norm": 8.966737467527985, "learning_rate": 7.740878404578584e-06, "loss": 0.8657, "step": 27205 }, { "epoch": 1.15, "grad_norm": 11.449970539582631, "learning_rate": 7.73984865126134e-06, "loss": 0.9281, "step": 27210 }, { "epoch": 1.15, "grad_norm": 16.050798275405686, "learning_rate": 7.738818731835142e-06, "loss": 0.8304, "step": 27215 }, { "epoch": 1.15, "grad_norm": 24.048969562311843, "learning_rate": 7.737788646362432e-06, "loss": 0.8703, "step": 27220 }, { "epoch": 1.15, "grad_norm": 23.897415197635976, "learning_rate": 7.73675839490566e-06, "loss": 0.8251, "step": 27225 }, { "epoch": 1.15, "grad_norm": 6.999074851963846, "learning_rate": 7.735727977527287e-06, "loss": 0.881, "step": 27230 }, { "epoch": 1.15, "grad_norm": 5.68607491237158, "learning_rate": 7.734697394289787e-06, "loss": 0.8468, "step": 27235 }, { "epoch": 1.15, "grad_norm": 18.016385641529947, "learning_rate": 7.733666645255639e-06, "loss": 0.8517, "step": 27240 }, { "epoch": 1.15, "grad_norm": 7.6008166815681415, "learning_rate": 7.732635730487333e-06, "loss": 0.8528, "step": 27245 }, { "epoch": 1.15, "grad_norm": 15.798518565649362, "learning_rate": 7.731604650047373e-06, "loss": 0.868, "step": 27250 }, { "epoch": 1.15, "grad_norm": 21.522841309858762, "learning_rate": 7.73057340399827e-06, "loss": 0.9072, "step": 27255 }, { "epoch": 1.15, "grad_norm": 26.643299711535217, "learning_rate": 7.729541992402546e-06, "loss": 0.855, "step": 27260 }, { "epoch": 1.15, "grad_norm": 13.253308131733148, "learning_rate": 7.728510415322727e-06, "loss": 0.8417, "step": 27265 }, { "epoch": 1.15, "grad_norm": 7.263470141494027, "learning_rate": 7.727478672821362e-06, "loss": 0.8868, "step": 27270 }, { "epoch": 1.15, "grad_norm": 7.5953735070437265, "learning_rate": 7.726446764960998e-06, "loss": 0.9227, "step": 27275 }, { "epoch": 1.15, "grad_norm": 14.24061089109959, "learning_rate": 7.725414691804196e-06, "loss": 0.8622, "step": 27280 }, { "epoch": 1.15, "grad_norm": 9.712110940298755, "learning_rate": 7.72438245341353e-06, "loss": 0.8283, "step": 27285 }, { "epoch": 1.15, "grad_norm": 17.708881360823202, "learning_rate": 7.723350049851583e-06, "loss": 0.8316, "step": 27290 }, { "epoch": 1.16, "grad_norm": 5.997175146017793, "learning_rate": 7.722317481180944e-06, "loss": 0.8628, "step": 27295 }, { "epoch": 1.16, "grad_norm": 5.348698573207775, "learning_rate": 7.721284747464212e-06, "loss": 0.8732, "step": 27300 }, { "epoch": 1.16, "grad_norm": 10.541107517511868, "learning_rate": 7.720251848764004e-06, "loss": 0.8598, "step": 27305 }, { "epoch": 1.16, "grad_norm": 10.860351573617423, "learning_rate": 7.71921878514294e-06, "loss": 0.865, "step": 27310 }, { "epoch": 1.16, "grad_norm": 7.330076277294611, "learning_rate": 7.718185556663648e-06, "loss": 0.847, "step": 27315 }, { "epoch": 1.16, "grad_norm": 10.703301277916182, "learning_rate": 7.717152163388774e-06, "loss": 0.8661, "step": 27320 }, { "epoch": 1.16, "grad_norm": 6.629398535030925, "learning_rate": 7.716118605380966e-06, "loss": 0.8911, "step": 27325 }, { "epoch": 1.16, "grad_norm": 9.98793569161913, "learning_rate": 7.71508488270289e-06, "loss": 0.8734, "step": 27330 }, { "epoch": 1.16, "grad_norm": 23.638872432904368, "learning_rate": 7.714050995417215e-06, "loss": 0.8318, "step": 27335 }, { "epoch": 1.16, "grad_norm": 11.340252996496963, "learning_rate": 7.713016943586622e-06, "loss": 0.8486, "step": 27340 }, { "epoch": 1.16, "grad_norm": 17.09403077891058, "learning_rate": 7.711982727273804e-06, "loss": 0.888, "step": 27345 }, { "epoch": 1.16, "grad_norm": 5.7832109546675, "learning_rate": 7.71094834654146e-06, "loss": 0.8309, "step": 27350 }, { "epoch": 1.16, "grad_norm": 6.883792935301827, "learning_rate": 7.709913801452305e-06, "loss": 0.8601, "step": 27355 }, { "epoch": 1.16, "grad_norm": 8.286690897081227, "learning_rate": 7.708879092069057e-06, "loss": 0.863, "step": 27360 }, { "epoch": 1.16, "grad_norm": 5.36152425118263, "learning_rate": 7.707844218454449e-06, "loss": 0.823, "step": 27365 }, { "epoch": 1.16, "grad_norm": 4.890568786615989, "learning_rate": 7.706809180671221e-06, "loss": 0.8539, "step": 27370 }, { "epoch": 1.16, "grad_norm": 6.262723437729285, "learning_rate": 7.70577397878213e-06, "loss": 0.8614, "step": 27375 }, { "epoch": 1.16, "grad_norm": 5.86794575809123, "learning_rate": 7.70473861284993e-06, "loss": 0.8262, "step": 27380 }, { "epoch": 1.16, "grad_norm": 5.403378586839154, "learning_rate": 7.703703082937392e-06, "loss": 0.864, "step": 27385 }, { "epoch": 1.16, "grad_norm": 11.868381746572243, "learning_rate": 7.702667389107305e-06, "loss": 0.8406, "step": 27390 }, { "epoch": 1.16, "grad_norm": 4.952566892750742, "learning_rate": 7.701631531422453e-06, "loss": 0.7994, "step": 27395 }, { "epoch": 1.16, "grad_norm": 8.885562734938025, "learning_rate": 7.700595509945639e-06, "loss": 0.8447, "step": 27400 }, { "epoch": 1.16, "grad_norm": 7.025146300008063, "learning_rate": 7.699559324739673e-06, "loss": 0.9225, "step": 27405 }, { "epoch": 1.16, "grad_norm": 5.423858067637031, "learning_rate": 7.69852297586738e-06, "loss": 0.8856, "step": 27410 }, { "epoch": 1.16, "grad_norm": 5.020228151338761, "learning_rate": 7.697486463391585e-06, "loss": 0.8315, "step": 27415 }, { "epoch": 1.16, "grad_norm": 6.793320892112957, "learning_rate": 7.696449787375135e-06, "loss": 0.8654, "step": 27420 }, { "epoch": 1.16, "grad_norm": 4.8249425336504075, "learning_rate": 7.695412947880873e-06, "loss": 0.841, "step": 27425 }, { "epoch": 1.16, "grad_norm": 5.28639020911608, "learning_rate": 7.694375944971666e-06, "loss": 0.8291, "step": 27430 }, { "epoch": 1.16, "grad_norm": 10.81409659379978, "learning_rate": 7.693338778710382e-06, "loss": 0.8128, "step": 27435 }, { "epoch": 1.16, "grad_norm": 11.739210748964835, "learning_rate": 7.692301449159898e-06, "loss": 0.8782, "step": 27440 }, { "epoch": 1.16, "grad_norm": 9.854060773466067, "learning_rate": 7.691263956383111e-06, "loss": 0.8451, "step": 27445 }, { "epoch": 1.16, "grad_norm": 8.489044808294599, "learning_rate": 7.690226300442918e-06, "loss": 0.8194, "step": 27450 }, { "epoch": 1.16, "grad_norm": 9.09447171241856, "learning_rate": 7.689188481402227e-06, "loss": 0.8351, "step": 27455 }, { "epoch": 1.16, "grad_norm": 7.745927544295226, "learning_rate": 7.688150499323961e-06, "loss": 0.8382, "step": 27460 }, { "epoch": 1.16, "grad_norm": 22.595565634608043, "learning_rate": 7.687112354271046e-06, "loss": 0.8696, "step": 27465 }, { "epoch": 1.16, "grad_norm": 10.978275778853865, "learning_rate": 7.686074046306427e-06, "loss": 0.8688, "step": 27470 }, { "epoch": 1.16, "grad_norm": 19.67266723351257, "learning_rate": 7.685035575493049e-06, "loss": 0.8501, "step": 27475 }, { "epoch": 1.16, "grad_norm": 24.370771439683413, "learning_rate": 7.683996941893872e-06, "loss": 0.8403, "step": 27480 }, { "epoch": 1.16, "grad_norm": 10.409431963560548, "learning_rate": 7.682958145571868e-06, "loss": 0.879, "step": 27485 }, { "epoch": 1.16, "grad_norm": 4.888276552284849, "learning_rate": 7.681919186590015e-06, "loss": 0.8346, "step": 27490 }, { "epoch": 1.16, "grad_norm": 7.086681885252242, "learning_rate": 7.6808800650113e-06, "loss": 0.8495, "step": 27495 }, { "epoch": 1.16, "grad_norm": 6.168482707177028, "learning_rate": 7.679840780898726e-06, "loss": 0.8301, "step": 27500 }, { "epoch": 1.16, "grad_norm": 5.741519529628608, "learning_rate": 7.678801334315298e-06, "loss": 0.8459, "step": 27505 }, { "epoch": 1.16, "grad_norm": 5.733702314062286, "learning_rate": 7.677761725324036e-06, "loss": 0.8592, "step": 27510 }, { "epoch": 1.16, "grad_norm": 7.683418825144909, "learning_rate": 7.676721953987967e-06, "loss": 0.8546, "step": 27515 }, { "epoch": 1.16, "grad_norm": 5.751840703909939, "learning_rate": 7.67568202037013e-06, "loss": 0.8836, "step": 27520 }, { "epoch": 1.16, "grad_norm": 10.254284232162648, "learning_rate": 7.674641924533575e-06, "loss": 0.8579, "step": 27525 }, { "epoch": 1.17, "grad_norm": 11.646552164090183, "learning_rate": 7.673601666541358e-06, "loss": 0.8455, "step": 27530 }, { "epoch": 1.17, "grad_norm": 8.966369117631704, "learning_rate": 7.672561246456548e-06, "loss": 0.8395, "step": 27535 }, { "epoch": 1.17, "grad_norm": 6.238456420578824, "learning_rate": 7.67152066434222e-06, "loss": 0.8474, "step": 27540 }, { "epoch": 1.17, "grad_norm": 4.71478357741574, "learning_rate": 7.670479920261466e-06, "loss": 0.8308, "step": 27545 }, { "epoch": 1.17, "grad_norm": 13.103983033831756, "learning_rate": 7.66943901427738e-06, "loss": 0.8912, "step": 27550 }, { "epoch": 1.17, "grad_norm": 8.275955979338747, "learning_rate": 7.668397946453068e-06, "loss": 0.895, "step": 27555 }, { "epoch": 1.17, "grad_norm": 6.210058126881039, "learning_rate": 7.667356716851649e-06, "loss": 0.8681, "step": 27560 }, { "epoch": 1.17, "grad_norm": 12.554655831956167, "learning_rate": 7.66631532553625e-06, "loss": 0.8254, "step": 27565 }, { "epoch": 1.17, "grad_norm": 19.210384089349912, "learning_rate": 7.665273772570007e-06, "loss": 0.8186, "step": 27570 }, { "epoch": 1.17, "grad_norm": 7.436548726078364, "learning_rate": 7.664232058016066e-06, "loss": 0.8753, "step": 27575 }, { "epoch": 1.17, "grad_norm": 7.9379106745617385, "learning_rate": 7.663190181937582e-06, "loss": 0.8053, "step": 27580 }, { "epoch": 1.17, "grad_norm": 5.686629557898449, "learning_rate": 7.662148144397724e-06, "loss": 0.8161, "step": 27585 }, { "epoch": 1.17, "grad_norm": 7.991520918826527, "learning_rate": 7.661105945459662e-06, "loss": 0.8321, "step": 27590 }, { "epoch": 1.17, "grad_norm": 5.32080752634358, "learning_rate": 7.660063585186588e-06, "loss": 0.8518, "step": 27595 }, { "epoch": 1.17, "grad_norm": 5.671226647791415, "learning_rate": 7.659021063641692e-06, "loss": 0.8171, "step": 27600 }, { "epoch": 1.17, "grad_norm": 5.871194076066952, "learning_rate": 7.657978380888184e-06, "loss": 0.8302, "step": 27605 }, { "epoch": 1.17, "grad_norm": 6.228548617239947, "learning_rate": 7.656935536989273e-06, "loss": 0.839, "step": 27610 }, { "epoch": 1.17, "grad_norm": 13.934839079395935, "learning_rate": 7.65589253200819e-06, "loss": 0.8628, "step": 27615 }, { "epoch": 1.17, "grad_norm": 13.5939206736836, "learning_rate": 7.654849366008165e-06, "loss": 0.8727, "step": 27620 }, { "epoch": 1.17, "grad_norm": 5.127171905459924, "learning_rate": 7.653806039052441e-06, "loss": 0.8345, "step": 27625 }, { "epoch": 1.17, "grad_norm": 6.304199276197281, "learning_rate": 7.652762551204276e-06, "loss": 0.8103, "step": 27630 }, { "epoch": 1.17, "grad_norm": 15.504266219417596, "learning_rate": 7.651718902526932e-06, "loss": 0.851, "step": 27635 }, { "epoch": 1.17, "grad_norm": 7.676149477882025, "learning_rate": 7.650675093083679e-06, "loss": 0.8925, "step": 27640 }, { "epoch": 1.17, "grad_norm": 10.977951798524913, "learning_rate": 7.649631122937807e-06, "loss": 0.8828, "step": 27645 }, { "epoch": 1.17, "grad_norm": 15.22570829620542, "learning_rate": 7.6485869921526e-06, "loss": 0.8213, "step": 27650 }, { "epoch": 1.17, "grad_norm": 25.062734538707396, "learning_rate": 7.647542700791368e-06, "loss": 0.8285, "step": 27655 }, { "epoch": 1.17, "grad_norm": 10.055435828160332, "learning_rate": 7.646498248917421e-06, "loss": 0.8802, "step": 27660 }, { "epoch": 1.17, "grad_norm": 16.89700212996837, "learning_rate": 7.64545363659408e-06, "loss": 0.8672, "step": 27665 }, { "epoch": 1.17, "grad_norm": 10.833302054850313, "learning_rate": 7.644408863884677e-06, "loss": 0.8414, "step": 27670 }, { "epoch": 1.17, "grad_norm": 15.597371231663407, "learning_rate": 7.643363930852555e-06, "loss": 0.8796, "step": 27675 }, { "epoch": 1.17, "grad_norm": 21.20763458877111, "learning_rate": 7.642318837561065e-06, "loss": 0.8104, "step": 27680 }, { "epoch": 1.17, "grad_norm": 5.648144568217421, "learning_rate": 7.641273584073565e-06, "loss": 0.836, "step": 27685 }, { "epoch": 1.17, "grad_norm": 7.369535311799559, "learning_rate": 7.640228170453427e-06, "loss": 0.887, "step": 27690 }, { "epoch": 1.17, "grad_norm": 7.482130076599322, "learning_rate": 7.639182596764034e-06, "loss": 0.8058, "step": 27695 }, { "epoch": 1.17, "grad_norm": 15.201762077919843, "learning_rate": 7.638136863068774e-06, "loss": 0.8682, "step": 27700 }, { "epoch": 1.17, "grad_norm": 10.707946209431555, "learning_rate": 7.637090969431047e-06, "loss": 0.8088, "step": 27705 }, { "epoch": 1.17, "grad_norm": 19.13798040483209, "learning_rate": 7.636044915914261e-06, "loss": 0.8363, "step": 27710 }, { "epoch": 1.17, "grad_norm": 7.965068418045872, "learning_rate": 7.634998702581837e-06, "loss": 0.8334, "step": 27715 }, { "epoch": 1.17, "grad_norm": 5.580006337187495, "learning_rate": 7.633952329497204e-06, "loss": 0.8491, "step": 27720 }, { "epoch": 1.17, "grad_norm": 6.764165416025875, "learning_rate": 7.6329057967238e-06, "loss": 0.8473, "step": 27725 }, { "epoch": 1.17, "grad_norm": 5.324253795361866, "learning_rate": 7.631859104325072e-06, "loss": 0.8369, "step": 27730 }, { "epoch": 1.17, "grad_norm": 4.962681037737329, "learning_rate": 7.63081225236448e-06, "loss": 0.8451, "step": 27735 }, { "epoch": 1.17, "grad_norm": 9.590880885944284, "learning_rate": 7.62976524090549e-06, "loss": 0.8431, "step": 27740 }, { "epoch": 1.17, "grad_norm": 18.235409726344564, "learning_rate": 7.628718070011581e-06, "loss": 0.8286, "step": 27745 }, { "epoch": 1.17, "grad_norm": 8.764192789691922, "learning_rate": 7.627670739746236e-06, "loss": 0.8494, "step": 27750 }, { "epoch": 1.17, "grad_norm": 4.783324195088866, "learning_rate": 7.626623250172955e-06, "loss": 0.8124, "step": 27755 }, { "epoch": 1.17, "grad_norm": 4.90817889085928, "learning_rate": 7.625575601355243e-06, "loss": 0.8559, "step": 27760 }, { "epoch": 1.18, "grad_norm": 4.713028266312869, "learning_rate": 7.624527793356618e-06, "loss": 0.8384, "step": 27765 }, { "epoch": 1.18, "grad_norm": 4.724507524271952, "learning_rate": 7.623479826240602e-06, "loss": 0.8265, "step": 27770 }, { "epoch": 1.18, "grad_norm": 6.008352369710291, "learning_rate": 7.622431700070732e-06, "loss": 0.8551, "step": 27775 }, { "epoch": 1.18, "grad_norm": 11.534868258920003, "learning_rate": 7.6213834149105526e-06, "loss": 0.8483, "step": 27780 }, { "epoch": 1.18, "grad_norm": 9.829646302102221, "learning_rate": 7.620334970823618e-06, "loss": 0.8429, "step": 27785 }, { "epoch": 1.18, "grad_norm": 7.588730456509883, "learning_rate": 7.619286367873493e-06, "loss": 0.8264, "step": 27790 }, { "epoch": 1.18, "grad_norm": 6.1152142599429276, "learning_rate": 7.618237606123752e-06, "loss": 0.864, "step": 27795 }, { "epoch": 1.18, "grad_norm": 4.686253510485167, "learning_rate": 7.617188685637975e-06, "loss": 0.841, "step": 27800 }, { "epoch": 1.18, "grad_norm": 5.73788659006235, "learning_rate": 7.616139606479759e-06, "loss": 0.8917, "step": 27805 }, { "epoch": 1.18, "grad_norm": 5.348402895636764, "learning_rate": 7.615090368712703e-06, "loss": 0.8361, "step": 27810 }, { "epoch": 1.18, "grad_norm": 4.812146510386785, "learning_rate": 7.614040972400422e-06, "loss": 0.851, "step": 27815 }, { "epoch": 1.18, "grad_norm": 5.157319708126221, "learning_rate": 7.612991417606538e-06, "loss": 0.8291, "step": 27820 }, { "epoch": 1.18, "grad_norm": 5.418173409894394, "learning_rate": 7.6119417043946805e-06, "loss": 0.8654, "step": 27825 }, { "epoch": 1.18, "grad_norm": 5.446844649442984, "learning_rate": 7.610891832828491e-06, "loss": 0.8539, "step": 27830 }, { "epoch": 1.18, "grad_norm": 6.893240079957829, "learning_rate": 7.609841802971619e-06, "loss": 0.8362, "step": 27835 }, { "epoch": 1.18, "grad_norm": 10.81288220380818, "learning_rate": 7.608791614887727e-06, "loss": 0.8395, "step": 27840 }, { "epoch": 1.18, "grad_norm": 27.360290542192228, "learning_rate": 7.607741268640484e-06, "loss": 0.8763, "step": 27845 }, { "epoch": 1.18, "grad_norm": 10.969049667578217, "learning_rate": 7.606690764293569e-06, "loss": 0.819, "step": 27850 }, { "epoch": 1.18, "grad_norm": 10.307470076649825, "learning_rate": 7.6056401019106716e-06, "loss": 0.8368, "step": 27855 }, { "epoch": 1.18, "grad_norm": 10.839924621751747, "learning_rate": 7.60458928155549e-06, "loss": 0.8401, "step": 27860 }, { "epoch": 1.18, "grad_norm": 13.908765461839273, "learning_rate": 7.603538303291732e-06, "loss": 0.906, "step": 27865 }, { "epoch": 1.18, "grad_norm": 21.778484662244413, "learning_rate": 7.602487167183114e-06, "loss": 0.8406, "step": 27870 }, { "epoch": 1.18, "grad_norm": 10.548158152746032, "learning_rate": 7.601435873293368e-06, "loss": 0.8429, "step": 27875 }, { "epoch": 1.18, "grad_norm": 9.501855722902018, "learning_rate": 7.6003844216862265e-06, "loss": 0.8602, "step": 27880 }, { "epoch": 1.18, "grad_norm": 5.879120679560759, "learning_rate": 7.599332812425436e-06, "loss": 0.8546, "step": 27885 }, { "epoch": 1.18, "grad_norm": 17.821519682218852, "learning_rate": 7.598281045574755e-06, "loss": 0.835, "step": 27890 }, { "epoch": 1.18, "grad_norm": 5.105085015962766, "learning_rate": 7.597229121197947e-06, "loss": 0.8288, "step": 27895 }, { "epoch": 1.18, "grad_norm": 6.434859715371597, "learning_rate": 7.59617703935879e-06, "loss": 0.8117, "step": 27900 }, { "epoch": 1.18, "grad_norm": 10.32888579585104, "learning_rate": 7.595124800121063e-06, "loss": 0.8642, "step": 27905 }, { "epoch": 1.18, "grad_norm": 13.581333073163272, "learning_rate": 7.594072403548567e-06, "loss": 0.8552, "step": 27910 }, { "epoch": 1.18, "grad_norm": 7.36309030212805, "learning_rate": 7.5930198497051e-06, "loss": 0.8287, "step": 27915 }, { "epoch": 1.18, "grad_norm": 12.660316002134852, "learning_rate": 7.591967138654477e-06, "loss": 0.8964, "step": 27920 }, { "epoch": 1.18, "grad_norm": 4.582443166828062, "learning_rate": 7.590914270460523e-06, "loss": 0.8729, "step": 27925 }, { "epoch": 1.18, "grad_norm": 5.762290179426884, "learning_rate": 7.589861245187067e-06, "loss": 0.8597, "step": 27930 }, { "epoch": 1.18, "grad_norm": 4.380482049638994, "learning_rate": 7.588808062897955e-06, "loss": 0.7956, "step": 27935 }, { "epoch": 1.18, "grad_norm": 13.000222406937715, "learning_rate": 7.587754723657032e-06, "loss": 0.8011, "step": 27940 }, { "epoch": 1.18, "grad_norm": 11.397991076702855, "learning_rate": 7.586701227528165e-06, "loss": 0.8411, "step": 27945 }, { "epoch": 1.18, "grad_norm": 8.004559924893229, "learning_rate": 7.585647574575221e-06, "loss": 0.8328, "step": 27950 }, { "epoch": 1.18, "grad_norm": 12.925492474488394, "learning_rate": 7.584593764862081e-06, "loss": 0.8311, "step": 27955 }, { "epoch": 1.18, "grad_norm": 6.690818949852794, "learning_rate": 7.583539798452635e-06, "loss": 0.8353, "step": 27960 }, { "epoch": 1.18, "grad_norm": 7.513292805208883, "learning_rate": 7.582485675410781e-06, "loss": 0.826, "step": 27965 }, { "epoch": 1.18, "grad_norm": 6.929384630067674, "learning_rate": 7.581431395800427e-06, "loss": 0.8414, "step": 27970 }, { "epoch": 1.18, "grad_norm": 7.098686476165517, "learning_rate": 7.580376959685491e-06, "loss": 0.8656, "step": 27975 }, { "epoch": 1.18, "grad_norm": 10.870683407795962, "learning_rate": 7.5793223671299e-06, "loss": 0.8121, "step": 27980 }, { "epoch": 1.18, "grad_norm": 4.745546473969652, "learning_rate": 7.578267618197594e-06, "loss": 0.8368, "step": 27985 }, { "epoch": 1.18, "grad_norm": 6.95174176893512, "learning_rate": 7.577212712952515e-06, "loss": 0.8737, "step": 27990 }, { "epoch": 1.18, "grad_norm": 12.8345052279372, "learning_rate": 7.576157651458622e-06, "loss": 0.8408, "step": 27995 }, { "epoch": 1.19, "grad_norm": 5.005977161319825, "learning_rate": 7.575102433779877e-06, "loss": 0.8544, "step": 28000 }, { "epoch": 1.19, "grad_norm": 7.986276334704683, "learning_rate": 7.574047059980258e-06, "loss": 0.8041, "step": 28005 }, { "epoch": 1.19, "grad_norm": 5.412370995795932, "learning_rate": 7.572991530123748e-06, "loss": 0.8415, "step": 28010 }, { "epoch": 1.19, "grad_norm": 7.374804435038488, "learning_rate": 7.571935844274338e-06, "loss": 0.8653, "step": 28015 }, { "epoch": 1.19, "grad_norm": 7.6048587232680145, "learning_rate": 7.570880002496037e-06, "loss": 0.8162, "step": 28020 }, { "epoch": 1.19, "grad_norm": 12.194847011302116, "learning_rate": 7.569824004852852e-06, "loss": 0.805, "step": 28025 }, { "epoch": 1.19, "grad_norm": 7.717125467874262, "learning_rate": 7.568767851408809e-06, "loss": 0.8487, "step": 28030 }, { "epoch": 1.19, "grad_norm": 4.627311285886776, "learning_rate": 7.567711542227937e-06, "loss": 0.8773, "step": 28035 }, { "epoch": 1.19, "grad_norm": 13.873388470512738, "learning_rate": 7.566655077374278e-06, "loss": 0.8703, "step": 28040 }, { "epoch": 1.19, "grad_norm": 4.4912792796707475, "learning_rate": 7.5655984569118805e-06, "loss": 0.8721, "step": 28045 }, { "epoch": 1.19, "grad_norm": 7.932026194886099, "learning_rate": 7.564541680904806e-06, "loss": 0.8558, "step": 28050 }, { "epoch": 1.19, "grad_norm": 14.238880536736147, "learning_rate": 7.563484749417125e-06, "loss": 0.8727, "step": 28055 }, { "epoch": 1.19, "grad_norm": 14.376906356562099, "learning_rate": 7.5624276625129144e-06, "loss": 0.9071, "step": 28060 }, { "epoch": 1.19, "grad_norm": 17.00879540931177, "learning_rate": 7.561370420256262e-06, "loss": 0.875, "step": 28065 }, { "epoch": 1.19, "grad_norm": 5.822858557009525, "learning_rate": 7.560313022711266e-06, "loss": 0.8789, "step": 28070 }, { "epoch": 1.19, "grad_norm": 6.181121490015597, "learning_rate": 7.559255469942033e-06, "loss": 0.8142, "step": 28075 }, { "epoch": 1.19, "grad_norm": 7.9937130752354575, "learning_rate": 7.558197762012679e-06, "loss": 0.8431, "step": 28080 }, { "epoch": 1.19, "grad_norm": 7.780546674453072, "learning_rate": 7.557139898987331e-06, "loss": 0.8091, "step": 28085 }, { "epoch": 1.19, "grad_norm": 6.779829060036907, "learning_rate": 7.556081880930122e-06, "loss": 0.9289, "step": 28090 }, { "epoch": 1.19, "grad_norm": 5.509477277862589, "learning_rate": 7.555023707905198e-06, "loss": 0.9028, "step": 28095 }, { "epoch": 1.19, "grad_norm": 6.440547603784951, "learning_rate": 7.553965379976714e-06, "loss": 0.8512, "step": 28100 }, { "epoch": 1.19, "grad_norm": 8.764955861768891, "learning_rate": 7.552906897208829e-06, "loss": 0.87, "step": 28105 }, { "epoch": 1.19, "grad_norm": 15.635585903830606, "learning_rate": 7.551848259665721e-06, "loss": 0.8769, "step": 28110 }, { "epoch": 1.19, "grad_norm": 5.8037009192162525, "learning_rate": 7.55078946741157e-06, "loss": 0.8418, "step": 28115 }, { "epoch": 1.19, "grad_norm": 5.21938486468252, "learning_rate": 7.5497305205105665e-06, "loss": 0.851, "step": 28120 }, { "epoch": 1.19, "grad_norm": 5.209014816444418, "learning_rate": 7.548671419026912e-06, "loss": 0.8407, "step": 28125 }, { "epoch": 1.19, "grad_norm": 4.658079300211783, "learning_rate": 7.547612163024815e-06, "loss": 0.823, "step": 28130 }, { "epoch": 1.19, "grad_norm": 4.426687639723021, "learning_rate": 7.5465527525685e-06, "loss": 0.849, "step": 28135 }, { "epoch": 1.19, "grad_norm": 5.708656188891774, "learning_rate": 7.54549318772219e-06, "loss": 0.8513, "step": 28140 }, { "epoch": 1.19, "grad_norm": 5.678332807196017, "learning_rate": 7.544433468550128e-06, "loss": 0.8162, "step": 28145 }, { "epoch": 1.19, "grad_norm": 7.877331279988588, "learning_rate": 7.54337359511656e-06, "loss": 0.854, "step": 28150 }, { "epoch": 1.19, "grad_norm": 8.860323470715516, "learning_rate": 7.542313567485742e-06, "loss": 0.8003, "step": 28155 }, { "epoch": 1.19, "grad_norm": 7.62422576715441, "learning_rate": 7.5412533857219405e-06, "loss": 0.8222, "step": 28160 }, { "epoch": 1.19, "grad_norm": 7.795437231638475, "learning_rate": 7.540193049889432e-06, "loss": 0.8784, "step": 28165 }, { "epoch": 1.19, "grad_norm": 12.866829067887812, "learning_rate": 7.5391325600525015e-06, "loss": 0.8931, "step": 28170 }, { "epoch": 1.19, "grad_norm": 4.778881804171628, "learning_rate": 7.5380719162754425e-06, "loss": 0.8579, "step": 28175 }, { "epoch": 1.19, "grad_norm": 4.744665997778414, "learning_rate": 7.537011118622559e-06, "loss": 0.8019, "step": 28180 }, { "epoch": 1.19, "grad_norm": 12.678200182813343, "learning_rate": 7.535950167158166e-06, "loss": 0.7903, "step": 28185 }, { "epoch": 1.19, "grad_norm": 4.696953752553162, "learning_rate": 7.534889061946582e-06, "loss": 0.8018, "step": 28190 }, { "epoch": 1.19, "grad_norm": 14.251070906183966, "learning_rate": 7.5338278030521424e-06, "loss": 0.8586, "step": 28195 }, { "epoch": 1.19, "grad_norm": 11.622139844463858, "learning_rate": 7.532766390539187e-06, "loss": 0.8603, "step": 28200 }, { "epoch": 1.19, "grad_norm": 4.615156466994754, "learning_rate": 7.531704824472066e-06, "loss": 0.8286, "step": 28205 }, { "epoch": 1.19, "grad_norm": 4.816880063748879, "learning_rate": 7.530643104915139e-06, "loss": 0.8171, "step": 28210 }, { "epoch": 1.19, "grad_norm": 4.461003187741392, "learning_rate": 7.529581231932772e-06, "loss": 0.8013, "step": 28215 }, { "epoch": 1.19, "grad_norm": 5.178885886254754, "learning_rate": 7.528519205589349e-06, "loss": 0.8242, "step": 28220 }, { "epoch": 1.19, "grad_norm": 9.23610397110475, "learning_rate": 7.527457025949253e-06, "loss": 0.8435, "step": 28225 }, { "epoch": 1.19, "grad_norm": 4.469312700507896, "learning_rate": 7.526394693076884e-06, "loss": 0.8174, "step": 28230 }, { "epoch": 1.19, "grad_norm": 6.563343252192742, "learning_rate": 7.525332207036645e-06, "loss": 0.7947, "step": 28235 }, { "epoch": 1.2, "grad_norm": 10.18442601981919, "learning_rate": 7.524269567892954e-06, "loss": 0.8229, "step": 28240 }, { "epoch": 1.2, "grad_norm": 5.452600003756412, "learning_rate": 7.5232067757102345e-06, "loss": 0.8295, "step": 28245 }, { "epoch": 1.2, "grad_norm": 5.785289802378125, "learning_rate": 7.52214383055292e-06, "loss": 0.8537, "step": 28250 }, { "epoch": 1.2, "grad_norm": 4.922889225903498, "learning_rate": 7.521080732485455e-06, "loss": 0.8666, "step": 28255 }, { "epoch": 1.2, "grad_norm": 4.814412743718632, "learning_rate": 7.5200174815722895e-06, "loss": 0.8583, "step": 28260 }, { "epoch": 1.2, "grad_norm": 19.656432152275066, "learning_rate": 7.518954077877889e-06, "loss": 0.8562, "step": 28265 }, { "epoch": 1.2, "grad_norm": 25.79774643494239, "learning_rate": 7.517890521466722e-06, "loss": 0.8531, "step": 28270 }, { "epoch": 1.2, "grad_norm": 27.494003605576125, "learning_rate": 7.516826812403268e-06, "loss": 0.8245, "step": 28275 }, { "epoch": 1.2, "grad_norm": 16.629294015985117, "learning_rate": 7.515762950752019e-06, "loss": 0.8235, "step": 28280 }, { "epoch": 1.2, "grad_norm": 9.772158858080155, "learning_rate": 7.514698936577474e-06, "loss": 0.8455, "step": 28285 }, { "epoch": 1.2, "grad_norm": 13.422855221250417, "learning_rate": 7.513634769944138e-06, "loss": 0.8694, "step": 28290 }, { "epoch": 1.2, "grad_norm": 34.3007385896735, "learning_rate": 7.512570450916531e-06, "loss": 0.8451, "step": 28295 }, { "epoch": 1.2, "grad_norm": 33.74170991704953, "learning_rate": 7.511505979559177e-06, "loss": 0.8868, "step": 28300 }, { "epoch": 1.2, "grad_norm": 26.5443760946743, "learning_rate": 7.5104413559366155e-06, "loss": 0.8272, "step": 28305 }, { "epoch": 1.2, "grad_norm": 6.403280458128843, "learning_rate": 7.509376580113388e-06, "loss": 0.8915, "step": 28310 }, { "epoch": 1.2, "grad_norm": 13.799545885245172, "learning_rate": 7.508311652154051e-06, "loss": 0.8551, "step": 28315 }, { "epoch": 1.2, "grad_norm": 7.428312638585163, "learning_rate": 7.507246572123167e-06, "loss": 0.8214, "step": 28320 }, { "epoch": 1.2, "grad_norm": 6.798636870842741, "learning_rate": 7.506181340085308e-06, "loss": 0.8292, "step": 28325 }, { "epoch": 1.2, "grad_norm": 6.950266598737123, "learning_rate": 7.505115956105056e-06, "loss": 0.8732, "step": 28330 }, { "epoch": 1.2, "grad_norm": 5.303589137257754, "learning_rate": 7.504050420247003e-06, "loss": 0.8236, "step": 28335 }, { "epoch": 1.2, "grad_norm": 6.490367762856796, "learning_rate": 7.5029847325757496e-06, "loss": 0.9007, "step": 28340 }, { "epoch": 1.2, "grad_norm": 9.21824151629349, "learning_rate": 7.501918893155904e-06, "loss": 0.889, "step": 28345 }, { "epoch": 1.2, "grad_norm": 5.194442878935659, "learning_rate": 7.500852902052086e-06, "loss": 0.827, "step": 28350 }, { "epoch": 1.2, "grad_norm": 4.948833689413944, "learning_rate": 7.499786759328923e-06, "loss": 0.8176, "step": 28355 }, { "epoch": 1.2, "grad_norm": 6.9000578222542215, "learning_rate": 7.498720465051051e-06, "loss": 0.8505, "step": 28360 }, { "epoch": 1.2, "grad_norm": 6.87231332950506, "learning_rate": 7.4976540192831195e-06, "loss": 0.837, "step": 28365 }, { "epoch": 1.2, "grad_norm": 5.398897433212114, "learning_rate": 7.49658742208978e-06, "loss": 0.832, "step": 28370 }, { "epoch": 1.2, "grad_norm": 4.365084479002949, "learning_rate": 7.495520673535701e-06, "loss": 0.8384, "step": 28375 }, { "epoch": 1.2, "grad_norm": 7.220681949658558, "learning_rate": 7.494453773685553e-06, "loss": 0.8448, "step": 28380 }, { "epoch": 1.2, "grad_norm": 20.32257642371168, "learning_rate": 7.49338672260402e-06, "loss": 0.8133, "step": 28385 }, { "epoch": 1.2, "grad_norm": 12.690042424466201, "learning_rate": 7.492319520355796e-06, "loss": 0.933, "step": 28390 }, { "epoch": 1.2, "grad_norm": 24.03521144511985, "learning_rate": 7.491252167005581e-06, "loss": 0.7949, "step": 28395 }, { "epoch": 1.2, "grad_norm": 4.7537894778255545, "learning_rate": 7.490184662618083e-06, "loss": 0.8727, "step": 28400 }, { "epoch": 1.2, "grad_norm": 6.92990339266652, "learning_rate": 7.489117007258027e-06, "loss": 0.8297, "step": 28405 }, { "epoch": 1.2, "grad_norm": 6.485700305949638, "learning_rate": 7.488049200990135e-06, "loss": 0.8353, "step": 28410 }, { "epoch": 1.2, "grad_norm": 9.359578029863616, "learning_rate": 7.486981243879152e-06, "loss": 0.881, "step": 28415 }, { "epoch": 1.2, "grad_norm": 6.314239787702651, "learning_rate": 7.4859131359898195e-06, "loss": 0.8805, "step": 28420 }, { "epoch": 1.2, "grad_norm": 6.5035130003843244, "learning_rate": 7.4848448773868975e-06, "loss": 0.8678, "step": 28425 }, { "epoch": 1.2, "grad_norm": 4.368264559870695, "learning_rate": 7.483776468135151e-06, "loss": 0.8477, "step": 28430 }, { "epoch": 1.2, "grad_norm": 4.624408879333209, "learning_rate": 7.4827079082993514e-06, "loss": 0.8439, "step": 28435 }, { "epoch": 1.2, "grad_norm": 5.858398529538015, "learning_rate": 7.4816391979442845e-06, "loss": 0.8804, "step": 28440 }, { "epoch": 1.2, "grad_norm": 6.2035357681283445, "learning_rate": 7.480570337134743e-06, "loss": 0.8525, "step": 28445 }, { "epoch": 1.2, "grad_norm": 15.272458704317378, "learning_rate": 7.47950132593553e-06, "loss": 0.8753, "step": 28450 }, { "epoch": 1.2, "grad_norm": 8.887833676265465, "learning_rate": 7.478432164411454e-06, "loss": 0.8793, "step": 28455 }, { "epoch": 1.2, "grad_norm": 6.419626101916099, "learning_rate": 7.4773628526273364e-06, "loss": 0.8525, "step": 28460 }, { "epoch": 1.2, "grad_norm": 12.867686269089686, "learning_rate": 7.476293390648007e-06, "loss": 0.8639, "step": 28465 }, { "epoch": 1.2, "grad_norm": 8.022212328657547, "learning_rate": 7.475223778538303e-06, "loss": 0.8157, "step": 28470 }, { "epoch": 1.21, "grad_norm": 8.386911682796224, "learning_rate": 7.4741540163630735e-06, "loss": 0.8525, "step": 28475 }, { "epoch": 1.21, "grad_norm": 8.593286990162671, "learning_rate": 7.473084104187173e-06, "loss": 0.8669, "step": 28480 }, { "epoch": 1.21, "grad_norm": 7.373393707788437, "learning_rate": 7.472014042075469e-06, "loss": 0.8452, "step": 28485 }, { "epoch": 1.21, "grad_norm": 7.581158035130508, "learning_rate": 7.470943830092834e-06, "loss": 0.8699, "step": 28490 }, { "epoch": 1.21, "grad_norm": 5.790392270440567, "learning_rate": 7.469873468304152e-06, "loss": 0.8333, "step": 28495 }, { "epoch": 1.21, "grad_norm": 11.77505867533784, "learning_rate": 7.468802956774318e-06, "loss": 0.8345, "step": 28500 }, { "epoch": 1.21, "grad_norm": 8.070534224632215, "learning_rate": 7.467732295568232e-06, "loss": 0.8408, "step": 28505 }, { "epoch": 1.21, "grad_norm": 6.8857497670450325, "learning_rate": 7.466661484750807e-06, "loss": 0.8339, "step": 28510 }, { "epoch": 1.21, "grad_norm": 7.434247891587606, "learning_rate": 7.465590524386962e-06, "loss": 0.8501, "step": 28515 }, { "epoch": 1.21, "grad_norm": 14.102312206253615, "learning_rate": 7.464519414541625e-06, "loss": 0.8666, "step": 28520 }, { "epoch": 1.21, "grad_norm": 11.500519354926956, "learning_rate": 7.463448155279737e-06, "loss": 0.8265, "step": 28525 }, { "epoch": 1.21, "grad_norm": 7.548423021792267, "learning_rate": 7.4623767466662414e-06, "loss": 0.8194, "step": 28530 }, { "epoch": 1.21, "grad_norm": 5.908599948378591, "learning_rate": 7.4613051887661e-06, "loss": 0.8574, "step": 28535 }, { "epoch": 1.21, "grad_norm": 4.916027115139375, "learning_rate": 7.460233481644273e-06, "loss": 0.9028, "step": 28540 }, { "epoch": 1.21, "grad_norm": 8.48309857593801, "learning_rate": 7.459161625365736e-06, "loss": 0.8178, "step": 28545 }, { "epoch": 1.21, "grad_norm": 6.107350983721261, "learning_rate": 7.458089619995474e-06, "loss": 0.8632, "step": 28550 }, { "epoch": 1.21, "grad_norm": 6.246321740012907, "learning_rate": 7.45701746559848e-06, "loss": 0.8429, "step": 28555 }, { "epoch": 1.21, "grad_norm": 10.082515216465119, "learning_rate": 7.455945162239754e-06, "loss": 0.8402, "step": 28560 }, { "epoch": 1.21, "grad_norm": 9.098963077268278, "learning_rate": 7.454872709984307e-06, "loss": 0.8279, "step": 28565 }, { "epoch": 1.21, "grad_norm": 15.86357991671177, "learning_rate": 7.4538001088971575e-06, "loss": 0.8225, "step": 28570 }, { "epoch": 1.21, "grad_norm": 9.59044500600705, "learning_rate": 7.452727359043337e-06, "loss": 0.8236, "step": 28575 }, { "epoch": 1.21, "grad_norm": 5.317914184332231, "learning_rate": 7.4516544604878805e-06, "loss": 0.8112, "step": 28580 }, { "epoch": 1.21, "grad_norm": 11.64596292735616, "learning_rate": 7.450581413295835e-06, "loss": 0.8334, "step": 28585 }, { "epoch": 1.21, "grad_norm": 6.6326296240670075, "learning_rate": 7.449508217532258e-06, "loss": 0.8371, "step": 28590 }, { "epoch": 1.21, "grad_norm": 8.219978055554021, "learning_rate": 7.448434873262212e-06, "loss": 0.8624, "step": 28595 }, { "epoch": 1.21, "grad_norm": 12.57640479439711, "learning_rate": 7.447361380550774e-06, "loss": 0.8369, "step": 28600 }, { "epoch": 1.21, "grad_norm": 5.213650905184059, "learning_rate": 7.446287739463022e-06, "loss": 0.835, "step": 28605 }, { "epoch": 1.21, "grad_norm": 12.94241083232243, "learning_rate": 7.44521395006405e-06, "loss": 0.8947, "step": 28610 }, { "epoch": 1.21, "grad_norm": 29.09712069062493, "learning_rate": 7.4441400124189614e-06, "loss": 0.8364, "step": 28615 }, { "epoch": 1.21, "grad_norm": 15.546233264346412, "learning_rate": 7.443065926592861e-06, "loss": 0.8553, "step": 28620 }, { "epoch": 1.21, "grad_norm": 8.170031708672434, "learning_rate": 7.44199169265087e-06, "loss": 0.8634, "step": 28625 }, { "epoch": 1.21, "grad_norm": 15.110375012190065, "learning_rate": 7.440917310658116e-06, "loss": 0.8617, "step": 28630 }, { "epoch": 1.21, "grad_norm": 11.951723142896075, "learning_rate": 7.439842780679738e-06, "loss": 0.8464, "step": 28635 }, { "epoch": 1.21, "grad_norm": 5.8626077438645785, "learning_rate": 7.438768102780876e-06, "loss": 0.8519, "step": 28640 }, { "epoch": 1.21, "grad_norm": 5.357008880848115, "learning_rate": 7.437693277026688e-06, "loss": 0.8293, "step": 28645 }, { "epoch": 1.21, "grad_norm": 7.413801563252123, "learning_rate": 7.436618303482338e-06, "loss": 0.8374, "step": 28650 }, { "epoch": 1.21, "grad_norm": 9.057557074653534, "learning_rate": 7.435543182212996e-06, "loss": 0.8692, "step": 28655 }, { "epoch": 1.21, "grad_norm": 5.495667929921721, "learning_rate": 7.434467913283846e-06, "loss": 0.8047, "step": 28660 }, { "epoch": 1.21, "grad_norm": 9.84849054490829, "learning_rate": 7.433392496760077e-06, "loss": 0.8497, "step": 28665 }, { "epoch": 1.21, "grad_norm": 5.190450300692767, "learning_rate": 7.432316932706889e-06, "loss": 0.8823, "step": 28670 }, { "epoch": 1.21, "grad_norm": 4.876990637220433, "learning_rate": 7.43124122118949e-06, "loss": 0.8129, "step": 28675 }, { "epoch": 1.21, "grad_norm": 4.473585781561264, "learning_rate": 7.430165362273098e-06, "loss": 0.8109, "step": 28680 }, { "epoch": 1.21, "grad_norm": 5.461410768968516, "learning_rate": 7.429089356022937e-06, "loss": 0.8424, "step": 28685 }, { "epoch": 1.21, "grad_norm": 5.26289224485984, "learning_rate": 7.428013202504243e-06, "loss": 0.84, "step": 28690 }, { "epoch": 1.21, "grad_norm": 8.42143812914792, "learning_rate": 7.426936901782262e-06, "loss": 0.8242, "step": 28695 }, { "epoch": 1.21, "grad_norm": 4.815883699026703, "learning_rate": 7.425860453922246e-06, "loss": 0.8552, "step": 28700 }, { "epoch": 1.21, "grad_norm": 6.121663687823316, "learning_rate": 7.424783858989454e-06, "loss": 0.8945, "step": 28705 }, { "epoch": 1.22, "grad_norm": 12.180276211451368, "learning_rate": 7.42370711704916e-06, "loss": 0.8593, "step": 28710 }, { "epoch": 1.22, "grad_norm": 40.08373653153939, "learning_rate": 7.422630228166644e-06, "loss": 0.8565, "step": 28715 }, { "epoch": 1.22, "grad_norm": 31.2602347547327, "learning_rate": 7.421553192407192e-06, "loss": 0.8305, "step": 28720 }, { "epoch": 1.22, "grad_norm": 25.98790113741822, "learning_rate": 7.420476009836103e-06, "loss": 0.8505, "step": 28725 }, { "epoch": 1.22, "grad_norm": 13.550034102350098, "learning_rate": 7.419398680518683e-06, "loss": 0.8677, "step": 28730 }, { "epoch": 1.22, "grad_norm": 8.176228148569209, "learning_rate": 7.418321204520249e-06, "loss": 0.8445, "step": 28735 }, { "epoch": 1.22, "grad_norm": 5.58581753196945, "learning_rate": 7.417243581906123e-06, "loss": 0.8359, "step": 28740 }, { "epoch": 1.22, "grad_norm": 8.281340271445188, "learning_rate": 7.4161658127416395e-06, "loss": 0.8229, "step": 28745 }, { "epoch": 1.22, "grad_norm": 9.837573024405199, "learning_rate": 7.415087897092138e-06, "loss": 0.8523, "step": 28750 }, { "epoch": 1.22, "grad_norm": 5.172319214021831, "learning_rate": 7.414009835022973e-06, "loss": 0.8501, "step": 28755 }, { "epoch": 1.22, "grad_norm": 5.252227960383379, "learning_rate": 7.412931626599501e-06, "loss": 0.8412, "step": 28760 }, { "epoch": 1.22, "grad_norm": 8.176933885634275, "learning_rate": 7.411853271887093e-06, "loss": 0.8259, "step": 28765 }, { "epoch": 1.22, "grad_norm": 4.312258475329861, "learning_rate": 7.410774770951125e-06, "loss": 0.7868, "step": 28770 }, { "epoch": 1.22, "grad_norm": 5.067049698051338, "learning_rate": 7.409696123856983e-06, "loss": 0.831, "step": 28775 }, { "epoch": 1.22, "grad_norm": 4.746892122131091, "learning_rate": 7.408617330670064e-06, "loss": 0.8188, "step": 28780 }, { "epoch": 1.22, "grad_norm": 7.265860410818273, "learning_rate": 7.4075383914557705e-06, "loss": 0.8627, "step": 28785 }, { "epoch": 1.22, "grad_norm": 15.880108598758094, "learning_rate": 7.406459306279516e-06, "loss": 0.8239, "step": 28790 }, { "epoch": 1.22, "grad_norm": 4.760247481397366, "learning_rate": 7.405380075206725e-06, "loss": 0.8611, "step": 28795 }, { "epoch": 1.22, "grad_norm": 7.381576655412649, "learning_rate": 7.404300698302822e-06, "loss": 0.8453, "step": 28800 }, { "epoch": 1.22, "grad_norm": 13.684408087651821, "learning_rate": 7.40322117563325e-06, "loss": 0.7993, "step": 28805 }, { "epoch": 1.22, "grad_norm": 6.921208671215888, "learning_rate": 7.402141507263458e-06, "loss": 0.7988, "step": 28810 }, { "epoch": 1.22, "grad_norm": 6.275857173628684, "learning_rate": 7.4010616932589016e-06, "loss": 0.8506, "step": 28815 }, { "epoch": 1.22, "grad_norm": 4.823043376800442, "learning_rate": 7.399981733685049e-06, "loss": 0.8692, "step": 28820 }, { "epoch": 1.22, "grad_norm": 10.193322236689127, "learning_rate": 7.398901628607371e-06, "loss": 0.8079, "step": 28825 }, { "epoch": 1.22, "grad_norm": 9.55495463812664, "learning_rate": 7.397821378091354e-06, "loss": 0.8619, "step": 28830 }, { "epoch": 1.22, "grad_norm": 5.588245702214667, "learning_rate": 7.3967409822024905e-06, "loss": 0.8163, "step": 28835 }, { "epoch": 1.22, "grad_norm": 5.691221586612508, "learning_rate": 7.39566044100628e-06, "loss": 0.8735, "step": 28840 }, { "epoch": 1.22, "grad_norm": 4.551002387084731, "learning_rate": 7.394579754568235e-06, "loss": 0.8342, "step": 28845 }, { "epoch": 1.22, "grad_norm": 12.572532785115394, "learning_rate": 7.393498922953872e-06, "loss": 0.8181, "step": 28850 }, { "epoch": 1.22, "grad_norm": 8.284503205119472, "learning_rate": 7.39241794622872e-06, "loss": 0.8516, "step": 28855 }, { "epoch": 1.22, "grad_norm": 13.032932299880988, "learning_rate": 7.391336824458315e-06, "loss": 0.8543, "step": 28860 }, { "epoch": 1.22, "grad_norm": 11.10857730651208, "learning_rate": 7.390255557708202e-06, "loss": 0.8265, "step": 28865 }, { "epoch": 1.22, "grad_norm": 9.231292686944624, "learning_rate": 7.389174146043936e-06, "loss": 0.8984, "step": 28870 }, { "epoch": 1.22, "grad_norm": 16.419166169581416, "learning_rate": 7.388092589531078e-06, "loss": 0.8584, "step": 28875 }, { "epoch": 1.22, "grad_norm": 31.25228712239634, "learning_rate": 7.387010888235202e-06, "loss": 0.8506, "step": 28880 }, { "epoch": 1.22, "grad_norm": 8.685911314917098, "learning_rate": 7.385929042221885e-06, "loss": 0.8793, "step": 28885 }, { "epoch": 1.22, "grad_norm": 7.473763885931934, "learning_rate": 7.3848470515567206e-06, "loss": 0.8349, "step": 28890 }, { "epoch": 1.22, "grad_norm": 7.14487750495818, "learning_rate": 7.383764916305303e-06, "loss": 0.8298, "step": 28895 }, { "epoch": 1.22, "grad_norm": 6.4558479577407635, "learning_rate": 7.382682636533239e-06, "loss": 0.8345, "step": 28900 }, { "epoch": 1.22, "grad_norm": 12.822365263007674, "learning_rate": 7.381600212306147e-06, "loss": 0.8189, "step": 28905 }, { "epoch": 1.22, "grad_norm": 7.805840131674199, "learning_rate": 7.380517643689649e-06, "loss": 0.8419, "step": 28910 }, { "epoch": 1.22, "grad_norm": 10.689359727818442, "learning_rate": 7.379434930749377e-06, "loss": 0.8597, "step": 28915 }, { "epoch": 1.22, "grad_norm": 6.82115528686245, "learning_rate": 7.378352073550976e-06, "loss": 0.8676, "step": 28920 }, { "epoch": 1.22, "grad_norm": 4.896500908341524, "learning_rate": 7.377269072160093e-06, "loss": 0.8595, "step": 28925 }, { "epoch": 1.22, "grad_norm": 4.379893600618618, "learning_rate": 7.3761859266423896e-06, "loss": 0.8571, "step": 28930 }, { "epoch": 1.22, "grad_norm": 4.491961691159467, "learning_rate": 7.375102637063532e-06, "loss": 0.772, "step": 28935 }, { "epoch": 1.22, "grad_norm": 5.980315049821421, "learning_rate": 7.3740192034891975e-06, "loss": 0.8055, "step": 28940 }, { "epoch": 1.23, "grad_norm": 4.955471521419596, "learning_rate": 7.3729356259850715e-06, "loss": 0.8667, "step": 28945 }, { "epoch": 1.23, "grad_norm": 16.38808048316717, "learning_rate": 7.371851904616848e-06, "loss": 0.8374, "step": 28950 }, { "epoch": 1.23, "grad_norm": 4.522585680230388, "learning_rate": 7.3707680394502315e-06, "loss": 0.8402, "step": 28955 }, { "epoch": 1.23, "grad_norm": 4.478789699465472, "learning_rate": 7.369684030550932e-06, "loss": 0.841, "step": 28960 }, { "epoch": 1.23, "grad_norm": 8.115442217696387, "learning_rate": 7.368599877984669e-06, "loss": 0.8592, "step": 28965 }, { "epoch": 1.23, "grad_norm": 10.456542204353687, "learning_rate": 7.3675155818171715e-06, "loss": 0.8576, "step": 28970 }, { "epoch": 1.23, "grad_norm": 15.112071167837685, "learning_rate": 7.36643114211418e-06, "loss": 0.8768, "step": 28975 }, { "epoch": 1.23, "grad_norm": 5.593518248100902, "learning_rate": 7.3653465589414375e-06, "loss": 0.8486, "step": 28980 }, { "epoch": 1.23, "grad_norm": 6.888671014717431, "learning_rate": 7.364261832364701e-06, "loss": 0.7903, "step": 28985 }, { "epoch": 1.23, "grad_norm": 7.9975094943152, "learning_rate": 7.363176962449734e-06, "loss": 0.8474, "step": 28990 }, { "epoch": 1.23, "grad_norm": 21.043961229223278, "learning_rate": 7.362091949262306e-06, "loss": 0.8114, "step": 28995 }, { "epoch": 1.23, "grad_norm": 27.24738938766072, "learning_rate": 7.361006792868205e-06, "loss": 0.8018, "step": 29000 }, { "epoch": 1.23, "grad_norm": 13.92275874520696, "learning_rate": 7.3599214933332155e-06, "loss": 0.8362, "step": 29005 }, { "epoch": 1.23, "grad_norm": 7.243077377047206, "learning_rate": 7.358836050723135e-06, "loss": 0.8351, "step": 29010 }, { "epoch": 1.23, "grad_norm": 4.20317227239975, "learning_rate": 7.357750465103775e-06, "loss": 0.8294, "step": 29015 }, { "epoch": 1.23, "grad_norm": 6.333451603714722, "learning_rate": 7.3566647365409485e-06, "loss": 0.8288, "step": 29020 }, { "epoch": 1.23, "grad_norm": 12.045718477436173, "learning_rate": 7.355578865100482e-06, "loss": 0.8099, "step": 29025 }, { "epoch": 1.23, "grad_norm": 16.050989470551833, "learning_rate": 7.3544928508482075e-06, "loss": 0.8772, "step": 29030 }, { "epoch": 1.23, "grad_norm": 5.577805652411866, "learning_rate": 7.353406693849965e-06, "loss": 0.8318, "step": 29035 }, { "epoch": 1.23, "grad_norm": 11.071099280151154, "learning_rate": 7.352320394171609e-06, "loss": 0.8489, "step": 29040 }, { "epoch": 1.23, "grad_norm": 8.536610185534432, "learning_rate": 7.351233951878996e-06, "loss": 0.8477, "step": 29045 }, { "epoch": 1.23, "grad_norm": 13.565799746508434, "learning_rate": 7.350147367037993e-06, "loss": 0.8337, "step": 29050 }, { "epoch": 1.23, "grad_norm": 5.637309960920233, "learning_rate": 7.349060639714478e-06, "loss": 0.8779, "step": 29055 }, { "epoch": 1.23, "grad_norm": 6.14280788308352, "learning_rate": 7.347973769974336e-06, "loss": 0.8341, "step": 29060 }, { "epoch": 1.23, "grad_norm": 7.4523962552507115, "learning_rate": 7.346886757883461e-06, "loss": 0.8216, "step": 29065 }, { "epoch": 1.23, "grad_norm": 5.225313854637131, "learning_rate": 7.345799603507754e-06, "loss": 0.8413, "step": 29070 }, { "epoch": 1.23, "grad_norm": 16.336602963552096, "learning_rate": 7.3447123069131275e-06, "loss": 0.8908, "step": 29075 }, { "epoch": 1.23, "grad_norm": 16.202059921097902, "learning_rate": 7.3436248681654995e-06, "loss": 0.8454, "step": 29080 }, { "epoch": 1.23, "grad_norm": 9.217272290208314, "learning_rate": 7.342537287330801e-06, "loss": 0.8698, "step": 29085 }, { "epoch": 1.23, "grad_norm": 6.3760256901915024, "learning_rate": 7.341449564474965e-06, "loss": 0.8415, "step": 29090 }, { "epoch": 1.23, "grad_norm": 5.126219696437688, "learning_rate": 7.34036169966394e-06, "loss": 0.8746, "step": 29095 }, { "epoch": 1.23, "grad_norm": 9.569498847629877, "learning_rate": 7.339273692963679e-06, "loss": 0.8014, "step": 29100 }, { "epoch": 1.23, "grad_norm": 8.820757424697069, "learning_rate": 7.338185544440143e-06, "loss": 0.8414, "step": 29105 }, { "epoch": 1.23, "grad_norm": 22.178867219973096, "learning_rate": 7.337097254159307e-06, "loss": 0.8348, "step": 29110 }, { "epoch": 1.23, "grad_norm": 25.718057039665243, "learning_rate": 7.3360088221871474e-06, "loss": 0.8279, "step": 29115 }, { "epoch": 1.23, "grad_norm": 19.84135891604384, "learning_rate": 7.334920248589654e-06, "loss": 0.8229, "step": 29120 }, { "epoch": 1.23, "grad_norm": 5.5264950980969125, "learning_rate": 7.3338315334328244e-06, "loss": 0.8556, "step": 29125 }, { "epoch": 1.23, "grad_norm": 4.69602826829856, "learning_rate": 7.332742676782663e-06, "loss": 0.8663, "step": 29130 }, { "epoch": 1.23, "grad_norm": 10.38347927925216, "learning_rate": 7.331653678705184e-06, "loss": 0.8954, "step": 29135 }, { "epoch": 1.23, "grad_norm": 11.686709273365198, "learning_rate": 7.33056453926641e-06, "loss": 0.8736, "step": 29140 }, { "epoch": 1.23, "grad_norm": 8.637430529097813, "learning_rate": 7.3294752585323735e-06, "loss": 0.8411, "step": 29145 }, { "epoch": 1.23, "grad_norm": 5.370082315510036, "learning_rate": 7.328385836569114e-06, "loss": 0.8307, "step": 29150 }, { "epoch": 1.23, "grad_norm": 5.046395513314983, "learning_rate": 7.3272962734426785e-06, "loss": 0.8119, "step": 29155 }, { "epoch": 1.23, "grad_norm": 5.266396992448873, "learning_rate": 7.326206569219127e-06, "loss": 0.8174, "step": 29160 }, { "epoch": 1.23, "grad_norm": 14.051216831171775, "learning_rate": 7.3251167239645225e-06, "loss": 0.8065, "step": 29165 }, { "epoch": 1.23, "grad_norm": 10.62295564404839, "learning_rate": 7.3240267377449405e-06, "loss": 0.809, "step": 29170 }, { "epoch": 1.23, "grad_norm": 9.845446142338483, "learning_rate": 7.322936610626463e-06, "loss": 0.8275, "step": 29175 }, { "epoch": 1.23, "grad_norm": 9.239379955376876, "learning_rate": 7.32184634267518e-06, "loss": 0.8658, "step": 29180 }, { "epoch": 1.24, "grad_norm": 4.858690595472981, "learning_rate": 7.320755933957193e-06, "loss": 0.8575, "step": 29185 }, { "epoch": 1.24, "grad_norm": 6.105704610039998, "learning_rate": 7.3196653845386115e-06, "loss": 0.8372, "step": 29190 }, { "epoch": 1.24, "grad_norm": 13.853643856358639, "learning_rate": 7.318574694485548e-06, "loss": 0.8411, "step": 29195 }, { "epoch": 1.24, "grad_norm": 16.33363374073994, "learning_rate": 7.317483863864132e-06, "loss": 0.8372, "step": 29200 }, { "epoch": 1.24, "grad_norm": 11.276578318383763, "learning_rate": 7.316392892740495e-06, "loss": 0.8044, "step": 29205 }, { "epoch": 1.24, "grad_norm": 4.407926808464365, "learning_rate": 7.31530178118078e-06, "loss": 0.8196, "step": 29210 }, { "epoch": 1.24, "grad_norm": 5.106938516624817, "learning_rate": 7.314210529251138e-06, "loss": 0.8281, "step": 29215 }, { "epoch": 1.24, "grad_norm": 5.936377290854027, "learning_rate": 7.313119137017728e-06, "loss": 0.8558, "step": 29220 }, { "epoch": 1.24, "grad_norm": 4.726034332992367, "learning_rate": 7.31202760454672e-06, "loss": 0.8203, "step": 29225 }, { "epoch": 1.24, "grad_norm": 11.866780966485535, "learning_rate": 7.310935931904286e-06, "loss": 0.8235, "step": 29230 }, { "epoch": 1.24, "grad_norm": 8.152282577698498, "learning_rate": 7.309844119156615e-06, "loss": 0.8163, "step": 29235 }, { "epoch": 1.24, "grad_norm": 6.4549454312139485, "learning_rate": 7.308752166369899e-06, "loss": 0.8583, "step": 29240 }, { "epoch": 1.24, "grad_norm": 9.723888347405351, "learning_rate": 7.307660073610339e-06, "loss": 0.8456, "step": 29245 }, { "epoch": 1.24, "grad_norm": 4.48381014277602, "learning_rate": 7.306567840944148e-06, "loss": 0.8528, "step": 29250 }, { "epoch": 1.24, "grad_norm": 4.821784134757839, "learning_rate": 7.305475468437541e-06, "loss": 0.8326, "step": 29255 }, { "epoch": 1.24, "grad_norm": 10.177411450799287, "learning_rate": 7.304382956156748e-06, "loss": 0.9001, "step": 29260 }, { "epoch": 1.24, "grad_norm": 12.197740145653627, "learning_rate": 7.3032903041680035e-06, "loss": 0.8796, "step": 29265 }, { "epoch": 1.24, "grad_norm": 15.146197711207414, "learning_rate": 7.302197512537553e-06, "loss": 0.8141, "step": 29270 }, { "epoch": 1.24, "grad_norm": 14.588899352718979, "learning_rate": 7.30110458133165e-06, "loss": 0.7912, "step": 29275 }, { "epoch": 1.24, "grad_norm": 18.317943918552075, "learning_rate": 7.300011510616554e-06, "loss": 0.8218, "step": 29280 }, { "epoch": 1.24, "grad_norm": 12.767231643223573, "learning_rate": 7.298918300458534e-06, "loss": 0.8558, "step": 29285 }, { "epoch": 1.24, "grad_norm": 13.071614531742537, "learning_rate": 7.297824950923869e-06, "loss": 0.8416, "step": 29290 }, { "epoch": 1.24, "grad_norm": 9.742379957112458, "learning_rate": 7.296731462078845e-06, "loss": 0.8609, "step": 29295 }, { "epoch": 1.24, "grad_norm": 12.509863489478066, "learning_rate": 7.295637833989759e-06, "loss": 0.878, "step": 29300 }, { "epoch": 1.24, "grad_norm": 12.372861781184831, "learning_rate": 7.294544066722913e-06, "loss": 0.8243, "step": 29305 }, { "epoch": 1.24, "grad_norm": 11.801422988956855, "learning_rate": 7.29345016034462e-06, "loss": 0.7955, "step": 29310 }, { "epoch": 1.24, "grad_norm": 16.522064923505, "learning_rate": 7.292356114921197e-06, "loss": 0.8169, "step": 29315 }, { "epoch": 1.24, "grad_norm": 16.30935027650137, "learning_rate": 7.291261930518977e-06, "loss": 0.8094, "step": 29320 }, { "epoch": 1.24, "grad_norm": 16.834573616715605, "learning_rate": 7.290167607204293e-06, "loss": 0.8556, "step": 29325 }, { "epoch": 1.24, "grad_norm": 7.393644376665236, "learning_rate": 7.289073145043496e-06, "loss": 0.8021, "step": 29330 }, { "epoch": 1.24, "grad_norm": 4.807999546801356, "learning_rate": 7.287978544102936e-06, "loss": 0.8571, "step": 29335 }, { "epoch": 1.24, "grad_norm": 5.9648350108960955, "learning_rate": 7.286883804448975e-06, "loss": 0.8259, "step": 29340 }, { "epoch": 1.24, "grad_norm": 12.53822869513032, "learning_rate": 7.2857889261479864e-06, "loss": 0.8266, "step": 29345 }, { "epoch": 1.24, "grad_norm": 6.9651634565440315, "learning_rate": 7.2846939092663475e-06, "loss": 0.8503, "step": 29350 }, { "epoch": 1.24, "grad_norm": 7.512055928241283, "learning_rate": 7.283598753870447e-06, "loss": 0.805, "step": 29355 }, { "epoch": 1.24, "grad_norm": 10.656744584243025, "learning_rate": 7.282503460026681e-06, "loss": 0.8722, "step": 29360 }, { "epoch": 1.24, "grad_norm": 12.702422182665197, "learning_rate": 7.281408027801453e-06, "loss": 0.8299, "step": 29365 }, { "epoch": 1.24, "grad_norm": 9.88179543244977, "learning_rate": 7.280312457261177e-06, "loss": 0.8746, "step": 29370 }, { "epoch": 1.24, "grad_norm": 6.625958265756664, "learning_rate": 7.2792167484722716e-06, "loss": 0.8691, "step": 29375 }, { "epoch": 1.24, "grad_norm": 5.175460045103117, "learning_rate": 7.278120901501171e-06, "loss": 0.8734, "step": 29380 }, { "epoch": 1.24, "grad_norm": 6.0374724798087245, "learning_rate": 7.2770249164143085e-06, "loss": 0.8467, "step": 29385 }, { "epoch": 1.24, "grad_norm": 4.9447351737332585, "learning_rate": 7.275928793278134e-06, "loss": 0.7984, "step": 29390 }, { "epoch": 1.24, "grad_norm": 5.765239433909384, "learning_rate": 7.274832532159101e-06, "loss": 0.84, "step": 29395 }, { "epoch": 1.24, "grad_norm": 18.969070211009956, "learning_rate": 7.27373613312367e-06, "loss": 0.8546, "step": 29400 }, { "epoch": 1.24, "grad_norm": 6.55334589889723, "learning_rate": 7.272639596238317e-06, "loss": 0.8623, "step": 29405 }, { "epoch": 1.24, "grad_norm": 10.854588120999194, "learning_rate": 7.271542921569519e-06, "loss": 0.8339, "step": 29410 }, { "epoch": 1.24, "grad_norm": 4.628159325090979, "learning_rate": 7.270446109183764e-06, "loss": 0.8348, "step": 29415 }, { "epoch": 1.25, "grad_norm": 4.527769835290532, "learning_rate": 7.269349159147551e-06, "loss": 0.8596, "step": 29420 }, { "epoch": 1.25, "grad_norm": 12.660792412007858, "learning_rate": 7.268252071527381e-06, "loss": 0.8488, "step": 29425 }, { "epoch": 1.25, "grad_norm": 4.912923489216101, "learning_rate": 7.26715484638977e-06, "loss": 0.7763, "step": 29430 }, { "epoch": 1.25, "grad_norm": 4.99618155816127, "learning_rate": 7.266057483801239e-06, "loss": 0.8575, "step": 29435 }, { "epoch": 1.25, "grad_norm": 4.256170042305894, "learning_rate": 7.264959983828317e-06, "loss": 0.8493, "step": 29440 }, { "epoch": 1.25, "grad_norm": 4.858757837784894, "learning_rate": 7.263862346537544e-06, "loss": 0.8471, "step": 29445 }, { "epoch": 1.25, "grad_norm": 11.152206596373935, "learning_rate": 7.262764571995464e-06, "loss": 0.8254, "step": 29450 }, { "epoch": 1.25, "grad_norm": 9.01167043438302, "learning_rate": 7.261666660268635e-06, "loss": 0.8428, "step": 29455 }, { "epoch": 1.25, "grad_norm": 14.02501198411762, "learning_rate": 7.2605686114236174e-06, "loss": 0.8207, "step": 29460 }, { "epoch": 1.25, "grad_norm": 6.148897798329034, "learning_rate": 7.2594704255269835e-06, "loss": 0.8053, "step": 29465 }, { "epoch": 1.25, "grad_norm": 7.856003729115084, "learning_rate": 7.258372102645313e-06, "loss": 0.8252, "step": 29470 }, { "epoch": 1.25, "grad_norm": 5.605999149460576, "learning_rate": 7.257273642845194e-06, "loss": 0.8476, "step": 29475 }, { "epoch": 1.25, "grad_norm": 6.936180719158974, "learning_rate": 7.256175046193223e-06, "loss": 0.8538, "step": 29480 }, { "epoch": 1.25, "grad_norm": 8.694873933728037, "learning_rate": 7.2550763127560055e-06, "loss": 0.7998, "step": 29485 }, { "epoch": 1.25, "grad_norm": 9.005500778403437, "learning_rate": 7.253977442600153e-06, "loss": 0.8358, "step": 29490 }, { "epoch": 1.25, "grad_norm": 9.8578578427582, "learning_rate": 7.252878435792288e-06, "loss": 0.863, "step": 29495 }, { "epoch": 1.25, "grad_norm": 6.807718655827233, "learning_rate": 7.251779292399039e-06, "loss": 0.8712, "step": 29500 }, { "epoch": 1.25, "grad_norm": 8.528911006867819, "learning_rate": 7.250680012487045e-06, "loss": 0.8376, "step": 29505 }, { "epoch": 1.25, "grad_norm": 5.188934788366203, "learning_rate": 7.2495805961229506e-06, "loss": 0.8171, "step": 29510 }, { "epoch": 1.25, "grad_norm": 6.703753503479719, "learning_rate": 7.248481043373412e-06, "loss": 0.8633, "step": 29515 }, { "epoch": 1.25, "grad_norm": 20.157590759494898, "learning_rate": 7.24738135430509e-06, "loss": 0.8348, "step": 29520 }, { "epoch": 1.25, "grad_norm": 23.32965649754932, "learning_rate": 7.2462815289846575e-06, "loss": 0.8434, "step": 29525 }, { "epoch": 1.25, "grad_norm": 9.713036855559205, "learning_rate": 7.2451815674787916e-06, "loss": 0.838, "step": 29530 }, { "epoch": 1.25, "grad_norm": 9.484524017693479, "learning_rate": 7.2440814698541805e-06, "loss": 0.8264, "step": 29535 }, { "epoch": 1.25, "grad_norm": 6.676486139520441, "learning_rate": 7.242981236177521e-06, "loss": 0.7958, "step": 29540 }, { "epoch": 1.25, "grad_norm": 4.701090946462555, "learning_rate": 7.2418808665155135e-06, "loss": 0.8532, "step": 29545 }, { "epoch": 1.25, "grad_norm": 6.740492645623074, "learning_rate": 7.240780360934876e-06, "loss": 0.8624, "step": 29550 }, { "epoch": 1.25, "grad_norm": 11.348530755680857, "learning_rate": 7.2396797195023246e-06, "loss": 0.7957, "step": 29555 }, { "epoch": 1.25, "grad_norm": 4.451479577235648, "learning_rate": 7.238578942284588e-06, "loss": 0.8064, "step": 29560 }, { "epoch": 1.25, "grad_norm": 6.10087789848718, "learning_rate": 7.2374780293484045e-06, "loss": 0.8371, "step": 29565 }, { "epoch": 1.25, "grad_norm": 8.446994852041026, "learning_rate": 7.2363769807605185e-06, "loss": 0.8292, "step": 29570 }, { "epoch": 1.25, "grad_norm": 5.818436699407271, "learning_rate": 7.235275796587685e-06, "loss": 0.8299, "step": 29575 }, { "epoch": 1.25, "grad_norm": 15.23901620938986, "learning_rate": 7.2341744768966634e-06, "loss": 0.8569, "step": 29580 }, { "epoch": 1.25, "grad_norm": 30.841845289449214, "learning_rate": 7.233073021754224e-06, "loss": 0.8234, "step": 29585 }, { "epoch": 1.25, "grad_norm": 30.438206820369167, "learning_rate": 7.231971431227146e-06, "loss": 0.818, "step": 29590 }, { "epoch": 1.25, "grad_norm": 14.502394987712721, "learning_rate": 7.230869705382213e-06, "loss": 0.779, "step": 29595 }, { "epoch": 1.25, "grad_norm": 4.794983742770824, "learning_rate": 7.229767844286222e-06, "loss": 0.8196, "step": 29600 }, { "epoch": 1.25, "grad_norm": 6.516537531055367, "learning_rate": 7.228665848005975e-06, "loss": 0.8043, "step": 29605 }, { "epoch": 1.25, "grad_norm": 7.593921595982875, "learning_rate": 7.2275637166082825e-06, "loss": 0.8608, "step": 29610 }, { "epoch": 1.25, "grad_norm": 8.829725951709456, "learning_rate": 7.226461450159963e-06, "loss": 0.804, "step": 29615 }, { "epoch": 1.25, "grad_norm": 12.601119702098064, "learning_rate": 7.225359048727843e-06, "loss": 0.8669, "step": 29620 }, { "epoch": 1.25, "grad_norm": 6.731621951276361, "learning_rate": 7.22425651237876e-06, "loss": 0.8656, "step": 29625 }, { "epoch": 1.25, "grad_norm": 5.035318345850336, "learning_rate": 7.223153841179555e-06, "loss": 0.8691, "step": 29630 }, { "epoch": 1.25, "grad_norm": 4.7706115583553625, "learning_rate": 7.222051035197083e-06, "loss": 0.8032, "step": 29635 }, { "epoch": 1.25, "grad_norm": 4.871245622649054, "learning_rate": 7.220948094498201e-06, "loss": 0.8351, "step": 29640 }, { "epoch": 1.25, "grad_norm": 6.408729100339189, "learning_rate": 7.219845019149778e-06, "loss": 0.8328, "step": 29645 }, { "epoch": 1.25, "grad_norm": 10.043280881192205, "learning_rate": 7.21874180921869e-06, "loss": 0.8324, "step": 29650 }, { "epoch": 1.26, "grad_norm": 5.569204109694414, "learning_rate": 7.217638464771822e-06, "loss": 0.834, "step": 29655 }, { "epoch": 1.26, "grad_norm": 7.013292002671288, "learning_rate": 7.216534985876067e-06, "loss": 0.8142, "step": 29660 }, { "epoch": 1.26, "grad_norm": 5.191731935573789, "learning_rate": 7.215431372598324e-06, "loss": 0.8634, "step": 29665 }, { "epoch": 1.26, "grad_norm": 7.476164786543851, "learning_rate": 7.214327625005502e-06, "loss": 0.8276, "step": 29670 }, { "epoch": 1.26, "grad_norm": 4.7686015597918985, "learning_rate": 7.213223743164519e-06, "loss": 0.8088, "step": 29675 }, { "epoch": 1.26, "grad_norm": 5.524827207714984, "learning_rate": 7.212119727142301e-06, "loss": 0.7945, "step": 29680 }, { "epoch": 1.26, "grad_norm": 4.400357228242433, "learning_rate": 7.21101557700578e-06, "loss": 0.822, "step": 29685 }, { "epoch": 1.26, "grad_norm": 5.287508002072973, "learning_rate": 7.209911292821895e-06, "loss": 0.899, "step": 29690 }, { "epoch": 1.26, "grad_norm": 4.858578429962108, "learning_rate": 7.208806874657599e-06, "loss": 0.8326, "step": 29695 }, { "epoch": 1.26, "grad_norm": 6.9568991801922495, "learning_rate": 7.207702322579849e-06, "loss": 0.7937, "step": 29700 }, { "epoch": 1.26, "grad_norm": 6.060902116530706, "learning_rate": 7.20659763665561e-06, "loss": 0.8261, "step": 29705 }, { "epoch": 1.26, "grad_norm": 4.320079869779115, "learning_rate": 7.205492816951856e-06, "loss": 0.8031, "step": 29710 }, { "epoch": 1.26, "grad_norm": 5.490553942136585, "learning_rate": 7.204387863535568e-06, "loss": 0.8394, "step": 29715 }, { "epoch": 1.26, "grad_norm": 22.94529087735934, "learning_rate": 7.203282776473738e-06, "loss": 0.8256, "step": 29720 }, { "epoch": 1.26, "grad_norm": 18.620025872079975, "learning_rate": 7.202177555833364e-06, "loss": 0.8276, "step": 29725 }, { "epoch": 1.26, "grad_norm": 12.20123193795549, "learning_rate": 7.20107220168145e-06, "loss": 0.8131, "step": 29730 }, { "epoch": 1.26, "grad_norm": 5.500358002285791, "learning_rate": 7.199966714085012e-06, "loss": 0.8314, "step": 29735 }, { "epoch": 1.26, "grad_norm": 5.023400003934056, "learning_rate": 7.198861093111072e-06, "loss": 0.8283, "step": 29740 }, { "epoch": 1.26, "grad_norm": 11.659605185452397, "learning_rate": 7.197755338826663e-06, "loss": 0.892, "step": 29745 }, { "epoch": 1.26, "grad_norm": 17.435221718273503, "learning_rate": 7.19664945129882e-06, "loss": 0.8528, "step": 29750 }, { "epoch": 1.26, "grad_norm": 12.609138868985822, "learning_rate": 7.195543430594592e-06, "loss": 0.8005, "step": 29755 }, { "epoch": 1.26, "grad_norm": 4.566826661565516, "learning_rate": 7.194437276781032e-06, "loss": 0.8386, "step": 29760 }, { "epoch": 1.26, "grad_norm": 9.40258414128485, "learning_rate": 7.193330989925205e-06, "loss": 0.8362, "step": 29765 }, { "epoch": 1.26, "grad_norm": 17.94998461763534, "learning_rate": 7.192224570094179e-06, "loss": 0.8383, "step": 29770 }, { "epoch": 1.26, "grad_norm": 17.446892458292783, "learning_rate": 7.1911180173550365e-06, "loss": 0.8736, "step": 29775 }, { "epoch": 1.26, "grad_norm": 17.194438449584965, "learning_rate": 7.190011331774862e-06, "loss": 0.8522, "step": 29780 }, { "epoch": 1.26, "grad_norm": 9.15836325094222, "learning_rate": 7.188904513420751e-06, "loss": 0.8158, "step": 29785 }, { "epoch": 1.26, "grad_norm": 12.246171630612176, "learning_rate": 7.187797562359807e-06, "loss": 0.8552, "step": 29790 }, { "epoch": 1.26, "grad_norm": 11.480493201526757, "learning_rate": 7.186690478659142e-06, "loss": 0.8919, "step": 29795 }, { "epoch": 1.26, "grad_norm": 15.29436640897875, "learning_rate": 7.185583262385874e-06, "loss": 0.8495, "step": 29800 }, { "epoch": 1.26, "grad_norm": 16.75781068810704, "learning_rate": 7.184475913607131e-06, "loss": 0.7748, "step": 29805 }, { "epoch": 1.26, "grad_norm": 13.243322280388988, "learning_rate": 7.183368432390048e-06, "loss": 0.8363, "step": 29810 }, { "epoch": 1.26, "grad_norm": 12.32277189580591, "learning_rate": 7.1822608188017664e-06, "loss": 0.8474, "step": 29815 }, { "epoch": 1.26, "grad_norm": 5.933760192679248, "learning_rate": 7.1811530729094415e-06, "loss": 0.8241, "step": 29820 }, { "epoch": 1.26, "grad_norm": 9.748001295573413, "learning_rate": 7.180045194780229e-06, "loss": 0.7955, "step": 29825 }, { "epoch": 1.26, "grad_norm": 7.705233959104965, "learning_rate": 7.1789371844812996e-06, "loss": 0.8613, "step": 29830 }, { "epoch": 1.26, "grad_norm": 7.360026036500931, "learning_rate": 7.1778290420798255e-06, "loss": 0.8901, "step": 29835 }, { "epoch": 1.26, "grad_norm": 8.24755712957183, "learning_rate": 7.176720767642992e-06, "loss": 0.8383, "step": 29840 }, { "epoch": 1.26, "grad_norm": 9.233027563859196, "learning_rate": 7.17561236123799e-06, "loss": 0.8075, "step": 29845 }, { "epoch": 1.26, "grad_norm": 5.41474004468858, "learning_rate": 7.174503822932019e-06, "loss": 0.8191, "step": 29850 }, { "epoch": 1.26, "grad_norm": 4.554102288736465, "learning_rate": 7.173395152792288e-06, "loss": 0.8487, "step": 29855 }, { "epoch": 1.26, "grad_norm": 4.718494092386051, "learning_rate": 7.172286350886011e-06, "loss": 0.7789, "step": 29860 }, { "epoch": 1.26, "grad_norm": 4.840213149751106, "learning_rate": 7.171177417280408e-06, "loss": 0.7893, "step": 29865 }, { "epoch": 1.26, "grad_norm": 6.0942096767544784, "learning_rate": 7.1700683520427164e-06, "loss": 0.8417, "step": 29870 }, { "epoch": 1.26, "grad_norm": 5.823508151141095, "learning_rate": 7.168959155240171e-06, "loss": 0.8457, "step": 29875 }, { "epoch": 1.26, "grad_norm": 5.889126439719206, "learning_rate": 7.167849826940022e-06, "loss": 0.8591, "step": 29880 }, { "epoch": 1.26, "grad_norm": 5.363740375639456, "learning_rate": 7.166740367209524e-06, "loss": 0.8409, "step": 29885 }, { "epoch": 1.26, "grad_norm": 4.949181665082757, "learning_rate": 7.165630776115938e-06, "loss": 0.7992, "step": 29890 }, { "epoch": 1.27, "grad_norm": 6.274609147746191, "learning_rate": 7.164521053726539e-06, "loss": 0.8235, "step": 29895 }, { "epoch": 1.27, "grad_norm": 8.2457391828557, "learning_rate": 7.163411200108602e-06, "loss": 0.8344, "step": 29900 }, { "epoch": 1.27, "grad_norm": 5.03284665877464, "learning_rate": 7.1623012153294195e-06, "loss": 0.9036, "step": 29905 }, { "epoch": 1.27, "grad_norm": 5.15690940550589, "learning_rate": 7.1611910994562816e-06, "loss": 0.8094, "step": 29910 }, { "epoch": 1.27, "grad_norm": 5.7023573036996815, "learning_rate": 7.160080852556494e-06, "loss": 0.8336, "step": 29915 }, { "epoch": 1.27, "grad_norm": 8.735035717766237, "learning_rate": 7.158970474697366e-06, "loss": 0.8675, "step": 29920 }, { "epoch": 1.27, "grad_norm": 4.687302877373789, "learning_rate": 7.157859965946219e-06, "loss": 0.8061, "step": 29925 }, { "epoch": 1.27, "grad_norm": 10.597599726749966, "learning_rate": 7.156749326370379e-06, "loss": 0.8478, "step": 29930 }, { "epoch": 1.27, "grad_norm": 7.678172636868652, "learning_rate": 7.155638556037179e-06, "loss": 0.8029, "step": 29935 }, { "epoch": 1.27, "grad_norm": 9.042772133185384, "learning_rate": 7.154527655013963e-06, "loss": 0.8299, "step": 29940 }, { "epoch": 1.27, "grad_norm": 5.4137584243863035, "learning_rate": 7.153416623368082e-06, "loss": 0.8047, "step": 29945 }, { "epoch": 1.27, "grad_norm": 6.779318593410042, "learning_rate": 7.152305461166893e-06, "loss": 0.8468, "step": 29950 }, { "epoch": 1.27, "grad_norm": 7.683268593499577, "learning_rate": 7.151194168477765e-06, "loss": 0.8366, "step": 29955 }, { "epoch": 1.27, "grad_norm": 9.202841736955973, "learning_rate": 7.150082745368071e-06, "loss": 0.856, "step": 29960 }, { "epoch": 1.27, "grad_norm": 5.74698312608711, "learning_rate": 7.148971191905193e-06, "loss": 0.8268, "step": 29965 }, { "epoch": 1.27, "grad_norm": 13.620053454352865, "learning_rate": 7.147859508156523e-06, "loss": 0.8727, "step": 29970 }, { "epoch": 1.27, "grad_norm": 10.30299209246428, "learning_rate": 7.146747694189455e-06, "loss": 0.8286, "step": 29975 }, { "epoch": 1.27, "grad_norm": 17.84742173687575, "learning_rate": 7.1456357500714e-06, "loss": 0.845, "step": 29980 }, { "epoch": 1.27, "grad_norm": 9.462707383785776, "learning_rate": 7.1445236758697685e-06, "loss": 0.831, "step": 29985 }, { "epoch": 1.27, "grad_norm": 11.923176002101156, "learning_rate": 7.143411471651984e-06, "loss": 0.8552, "step": 29990 }, { "epoch": 1.27, "grad_norm": 4.617685315647928, "learning_rate": 7.142299137485477e-06, "loss": 0.8683, "step": 29995 }, { "epoch": 1.27, "grad_norm": 13.48666076840623, "learning_rate": 7.141186673437681e-06, "loss": 0.8094, "step": 30000 }, { "epoch": 1.27, "grad_norm": 7.790314995126945, "learning_rate": 7.140074079576046e-06, "loss": 0.8444, "step": 30005 }, { "epoch": 1.27, "grad_norm": 5.3584278837068755, "learning_rate": 7.1389613559680226e-06, "loss": 0.8139, "step": 30010 }, { "epoch": 1.27, "grad_norm": 7.579526282844967, "learning_rate": 7.1378485026810726e-06, "loss": 0.8037, "step": 30015 }, { "epoch": 1.27, "grad_norm": 5.497370313315846, "learning_rate": 7.136735519782666e-06, "loss": 0.8511, "step": 30020 }, { "epoch": 1.27, "grad_norm": 8.868112884843894, "learning_rate": 7.135622407340278e-06, "loss": 0.8709, "step": 30025 }, { "epoch": 1.27, "grad_norm": 16.47655938002021, "learning_rate": 7.134509165421395e-06, "loss": 0.8252, "step": 30030 }, { "epoch": 1.27, "grad_norm": 12.863726584047223, "learning_rate": 7.133395794093507e-06, "loss": 0.7943, "step": 30035 }, { "epoch": 1.27, "grad_norm": 10.63944516989122, "learning_rate": 7.132282293424117e-06, "loss": 0.8772, "step": 30040 }, { "epoch": 1.27, "grad_norm": 7.043440768260508, "learning_rate": 7.1311686634807335e-06, "loss": 0.8003, "step": 30045 }, { "epoch": 1.27, "grad_norm": 6.016223481684054, "learning_rate": 7.13005490433087e-06, "loss": 0.8138, "step": 30050 }, { "epoch": 1.27, "grad_norm": 5.533192022029013, "learning_rate": 7.128941016042053e-06, "loss": 0.8365, "step": 30055 }, { "epoch": 1.27, "grad_norm": 6.085027975222191, "learning_rate": 7.12782699868181e-06, "loss": 0.8327, "step": 30060 }, { "epoch": 1.27, "grad_norm": 7.976621528814561, "learning_rate": 7.126712852317688e-06, "loss": 0.7661, "step": 30065 }, { "epoch": 1.27, "grad_norm": 8.752234974111703, "learning_rate": 7.125598577017229e-06, "loss": 0.8189, "step": 30070 }, { "epoch": 1.27, "grad_norm": 4.963205185987787, "learning_rate": 7.1244841728479875e-06, "loss": 0.8638, "step": 30075 }, { "epoch": 1.27, "grad_norm": 10.507446730850619, "learning_rate": 7.123369639877531e-06, "loss": 0.8139, "step": 30080 }, { "epoch": 1.27, "grad_norm": 15.032151920413579, "learning_rate": 7.122254978173426e-06, "loss": 0.8474, "step": 30085 }, { "epoch": 1.27, "grad_norm": 17.498418053837263, "learning_rate": 7.1211401878032546e-06, "loss": 0.8349, "step": 30090 }, { "epoch": 1.27, "grad_norm": 9.064477368120707, "learning_rate": 7.1200252688346e-06, "loss": 0.8027, "step": 30095 }, { "epoch": 1.27, "grad_norm": 8.782146255556922, "learning_rate": 7.11891022133506e-06, "loss": 0.8418, "step": 30100 }, { "epoch": 1.27, "grad_norm": 12.877113042272308, "learning_rate": 7.117795045372233e-06, "loss": 0.7941, "step": 30105 }, { "epoch": 1.27, "grad_norm": 7.611728568047225, "learning_rate": 7.116679741013731e-06, "loss": 0.8681, "step": 30110 }, { "epoch": 1.27, "grad_norm": 11.47638333755626, "learning_rate": 7.115564308327172e-06, "loss": 0.8081, "step": 30115 }, { "epoch": 1.27, "grad_norm": 9.840736983749753, "learning_rate": 7.11444874738018e-06, "loss": 0.7786, "step": 30120 }, { "epoch": 1.27, "grad_norm": 12.796416217118242, "learning_rate": 7.113333058240388e-06, "loss": 0.8456, "step": 30125 }, { "epoch": 1.28, "grad_norm": 4.588679646362951, "learning_rate": 7.11221724097544e-06, "loss": 0.8299, "step": 30130 }, { "epoch": 1.28, "grad_norm": 5.919185949709744, "learning_rate": 7.11110129565298e-06, "loss": 0.8144, "step": 30135 }, { "epoch": 1.28, "grad_norm": 5.290741294604033, "learning_rate": 7.109985222340667e-06, "loss": 0.8372, "step": 30140 }, { "epoch": 1.28, "grad_norm": 7.43352744007455, "learning_rate": 7.108869021106166e-06, "loss": 0.8083, "step": 30145 }, { "epoch": 1.28, "grad_norm": 9.037044408829985, "learning_rate": 7.107752692017148e-06, "loss": 0.8528, "step": 30150 }, { "epoch": 1.28, "grad_norm": 6.502362942761619, "learning_rate": 7.106636235141293e-06, "loss": 0.8467, "step": 30155 }, { "epoch": 1.28, "grad_norm": 8.623877190625405, "learning_rate": 7.105519650546289e-06, "loss": 0.8181, "step": 30160 }, { "epoch": 1.28, "grad_norm": 14.75090657932128, "learning_rate": 7.104402938299829e-06, "loss": 0.827, "step": 30165 }, { "epoch": 1.28, "grad_norm": 16.44633905001911, "learning_rate": 7.103286098469618e-06, "loss": 0.8152, "step": 30170 }, { "epoch": 1.28, "grad_norm": 5.399227906527695, "learning_rate": 7.102169131123369e-06, "loss": 0.7824, "step": 30175 }, { "epoch": 1.28, "grad_norm": 5.7203933405543825, "learning_rate": 7.101052036328794e-06, "loss": 0.8211, "step": 30180 }, { "epoch": 1.28, "grad_norm": 4.906991550785131, "learning_rate": 7.099934814153625e-06, "loss": 0.7989, "step": 30185 }, { "epoch": 1.28, "grad_norm": 8.15562582352719, "learning_rate": 7.098817464665593e-06, "loss": 0.831, "step": 30190 }, { "epoch": 1.28, "grad_norm": 9.22342610960438, "learning_rate": 7.09769998793244e-06, "loss": 0.8611, "step": 30195 }, { "epoch": 1.28, "grad_norm": 11.977201015932861, "learning_rate": 7.096582384021916e-06, "loss": 0.8598, "step": 30200 }, { "epoch": 1.28, "grad_norm": 6.802049910883823, "learning_rate": 7.095464653001778e-06, "loss": 0.8494, "step": 30205 }, { "epoch": 1.28, "grad_norm": 4.779810188745332, "learning_rate": 7.094346794939791e-06, "loss": 0.8334, "step": 30210 }, { "epoch": 1.28, "grad_norm": 6.81770732352753, "learning_rate": 7.0932288099037265e-06, "loss": 0.816, "step": 30215 }, { "epoch": 1.28, "grad_norm": 5.353533725321616, "learning_rate": 7.092110697961364e-06, "loss": 0.8193, "step": 30220 }, { "epoch": 1.28, "grad_norm": 4.981228234484451, "learning_rate": 7.090992459180494e-06, "loss": 0.7971, "step": 30225 }, { "epoch": 1.28, "grad_norm": 7.2959011153503495, "learning_rate": 7.089874093628909e-06, "loss": 0.7858, "step": 30230 }, { "epoch": 1.28, "grad_norm": 9.176177150324127, "learning_rate": 7.088755601374414e-06, "loss": 0.8076, "step": 30235 }, { "epoch": 1.28, "grad_norm": 6.945037934640414, "learning_rate": 7.08763698248482e-06, "loss": 0.859, "step": 30240 }, { "epoch": 1.28, "grad_norm": 4.7905779675592735, "learning_rate": 7.086518237027945e-06, "loss": 0.8367, "step": 30245 }, { "epoch": 1.28, "grad_norm": 4.280954925373214, "learning_rate": 7.085399365071616e-06, "loss": 0.8146, "step": 30250 }, { "epoch": 1.28, "grad_norm": 7.115545630346348, "learning_rate": 7.084280366683666e-06, "loss": 0.8332, "step": 30255 }, { "epoch": 1.28, "grad_norm": 5.4397933176183795, "learning_rate": 7.083161241931936e-06, "loss": 0.8288, "step": 30260 }, { "epoch": 1.28, "grad_norm": 7.447509744142868, "learning_rate": 7.082041990884276e-06, "loss": 0.8501, "step": 30265 }, { "epoch": 1.28, "grad_norm": 8.012698567214477, "learning_rate": 7.080922613608544e-06, "loss": 0.8345, "step": 30270 }, { "epoch": 1.28, "grad_norm": 15.480053757174062, "learning_rate": 7.0798031101726025e-06, "loss": 0.8226, "step": 30275 }, { "epoch": 1.28, "grad_norm": 5.077484796241971, "learning_rate": 7.078683480644325e-06, "loss": 0.8806, "step": 30280 }, { "epoch": 1.28, "grad_norm": 4.430139291054002, "learning_rate": 7.077563725091592e-06, "loss": 0.8396, "step": 30285 }, { "epoch": 1.28, "grad_norm": 5.585228979579798, "learning_rate": 7.076443843582288e-06, "loss": 0.8394, "step": 30290 }, { "epoch": 1.28, "grad_norm": 9.457439343561308, "learning_rate": 7.075323836184311e-06, "loss": 0.8422, "step": 30295 }, { "epoch": 1.28, "grad_norm": 12.64569517508503, "learning_rate": 7.074203702965563e-06, "loss": 0.815, "step": 30300 }, { "epoch": 1.28, "grad_norm": 12.97579791063769, "learning_rate": 7.0730834439939545e-06, "loss": 0.8217, "step": 30305 }, { "epoch": 1.28, "grad_norm": 5.42422861188891, "learning_rate": 7.071963059337403e-06, "loss": 0.7951, "step": 30310 }, { "epoch": 1.28, "grad_norm": 5.41837395054203, "learning_rate": 7.070842549063834e-06, "loss": 0.826, "step": 30315 }, { "epoch": 1.28, "grad_norm": 16.685378384653003, "learning_rate": 7.069721913241182e-06, "loss": 0.8537, "step": 30320 }, { "epoch": 1.28, "grad_norm": 32.95023878959854, "learning_rate": 7.068601151937387e-06, "loss": 0.8177, "step": 30325 }, { "epoch": 1.28, "grad_norm": 17.483250125378625, "learning_rate": 7.067480265220397e-06, "loss": 0.8383, "step": 30330 }, { "epoch": 1.28, "grad_norm": 14.165176240829023, "learning_rate": 7.066359253158169e-06, "loss": 0.8406, "step": 30335 }, { "epoch": 1.28, "grad_norm": 13.065664946436552, "learning_rate": 7.065238115818666e-06, "loss": 0.8286, "step": 30340 }, { "epoch": 1.28, "grad_norm": 4.915332267403049, "learning_rate": 7.064116853269859e-06, "loss": 0.8271, "step": 30345 }, { "epoch": 1.28, "grad_norm": 4.84254043980535, "learning_rate": 7.062995465579727e-06, "loss": 0.8456, "step": 30350 }, { "epoch": 1.28, "grad_norm": 19.634857644328868, "learning_rate": 7.0618739528162565e-06, "loss": 0.8412, "step": 30355 }, { "epoch": 1.28, "grad_norm": 10.70987988091038, "learning_rate": 7.0607523150474425e-06, "loss": 0.8314, "step": 30360 }, { "epoch": 1.29, "grad_norm": 5.718628550490718, "learning_rate": 7.059630552341285e-06, "loss": 0.8076, "step": 30365 }, { "epoch": 1.29, "grad_norm": 11.204307126376989, "learning_rate": 7.058508664765794e-06, "loss": 0.8174, "step": 30370 }, { "epoch": 1.29, "grad_norm": 6.41185352794296, "learning_rate": 7.057386652388987e-06, "loss": 0.8262, "step": 30375 }, { "epoch": 1.29, "grad_norm": 7.2596983684043295, "learning_rate": 7.056264515278886e-06, "loss": 0.8243, "step": 30380 }, { "epoch": 1.29, "grad_norm": 22.85214537775442, "learning_rate": 7.055142253503527e-06, "loss": 0.8543, "step": 30385 }, { "epoch": 1.29, "grad_norm": 24.729839385395405, "learning_rate": 7.0540198671309436e-06, "loss": 0.8428, "step": 30390 }, { "epoch": 1.29, "grad_norm": 24.50980318297952, "learning_rate": 7.052897356229188e-06, "loss": 0.8386, "step": 30395 }, { "epoch": 1.29, "grad_norm": 14.45767582806743, "learning_rate": 7.051774720866314e-06, "loss": 0.8429, "step": 30400 }, { "epoch": 1.29, "grad_norm": 5.898463530518916, "learning_rate": 7.05065196111038e-06, "loss": 0.8154, "step": 30405 }, { "epoch": 1.29, "grad_norm": 8.331203312707453, "learning_rate": 7.049529077029458e-06, "loss": 0.8403, "step": 30410 }, { "epoch": 1.29, "grad_norm": 4.660713071629328, "learning_rate": 7.048406068691627e-06, "loss": 0.8236, "step": 30415 }, { "epoch": 1.29, "grad_norm": 11.429330143684927, "learning_rate": 7.0472829361649675e-06, "loss": 0.7983, "step": 30420 }, { "epoch": 1.29, "grad_norm": 25.04186786502489, "learning_rate": 7.046159679517575e-06, "loss": 0.7984, "step": 30425 }, { "epoch": 1.29, "grad_norm": 12.136675449494412, "learning_rate": 7.045036298817549e-06, "loss": 0.8029, "step": 30430 }, { "epoch": 1.29, "grad_norm": 4.714908734615776, "learning_rate": 7.043912794132995e-06, "loss": 0.8133, "step": 30435 }, { "epoch": 1.29, "grad_norm": 5.108933449934547, "learning_rate": 7.042789165532029e-06, "loss": 0.7943, "step": 30440 }, { "epoch": 1.29, "grad_norm": 4.69024776413526, "learning_rate": 7.041665413082774e-06, "loss": 0.796, "step": 30445 }, { "epoch": 1.29, "grad_norm": 6.036148133406255, "learning_rate": 7.040541536853358e-06, "loss": 0.8585, "step": 30450 }, { "epoch": 1.29, "grad_norm": 7.148474456546869, "learning_rate": 7.03941753691192e-06, "loss": 0.838, "step": 30455 }, { "epoch": 1.29, "grad_norm": 5.940179552476616, "learning_rate": 7.038293413326605e-06, "loss": 0.8395, "step": 30460 }, { "epoch": 1.29, "grad_norm": 5.512693903460496, "learning_rate": 7.037169166165561e-06, "loss": 0.8223, "step": 30465 }, { "epoch": 1.29, "grad_norm": 6.41055574080407, "learning_rate": 7.036044795496953e-06, "loss": 0.8046, "step": 30470 }, { "epoch": 1.29, "grad_norm": 7.567798351990528, "learning_rate": 7.0349203013889465e-06, "loss": 0.8236, "step": 30475 }, { "epoch": 1.29, "grad_norm": 6.53895454166985, "learning_rate": 7.033795683909717e-06, "loss": 0.8042, "step": 30480 }, { "epoch": 1.29, "grad_norm": 11.051791032307255, "learning_rate": 7.032670943127445e-06, "loss": 0.8443, "step": 30485 }, { "epoch": 1.29, "grad_norm": 6.9302784118252045, "learning_rate": 7.0315460791103204e-06, "loss": 0.8411, "step": 30490 }, { "epoch": 1.29, "grad_norm": 4.642594464526272, "learning_rate": 7.030421091926545e-06, "loss": 0.8634, "step": 30495 }, { "epoch": 1.29, "grad_norm": 5.024134146354589, "learning_rate": 7.029295981644315e-06, "loss": 0.851, "step": 30500 }, { "epoch": 1.29, "grad_norm": 4.4433286682859245, "learning_rate": 7.02817074833185e-06, "loss": 0.8113, "step": 30505 }, { "epoch": 1.29, "grad_norm": 4.610507527751198, "learning_rate": 7.027045392057365e-06, "loss": 0.8569, "step": 30510 }, { "epoch": 1.29, "grad_norm": 5.9668189217640215, "learning_rate": 7.025919912889088e-06, "loss": 0.8427, "step": 30515 }, { "epoch": 1.29, "grad_norm": 5.236809348129294, "learning_rate": 7.024794310895256e-06, "loss": 0.8128, "step": 30520 }, { "epoch": 1.29, "grad_norm": 7.030674911060837, "learning_rate": 7.023668586144108e-06, "loss": 0.819, "step": 30525 }, { "epoch": 1.29, "grad_norm": 5.317666513898798, "learning_rate": 7.022542738703896e-06, "loss": 0.8243, "step": 30530 }, { "epoch": 1.29, "grad_norm": 24.203473246081643, "learning_rate": 7.0214167686428735e-06, "loss": 0.8614, "step": 30535 }, { "epoch": 1.29, "grad_norm": 24.28796578743221, "learning_rate": 7.020290676029309e-06, "loss": 0.8386, "step": 30540 }, { "epoch": 1.29, "grad_norm": 27.337473993683528, "learning_rate": 7.01916446093147e-06, "loss": 0.8346, "step": 30545 }, { "epoch": 1.29, "grad_norm": 12.401974137982343, "learning_rate": 7.018038123417636e-06, "loss": 0.8349, "step": 30550 }, { "epoch": 1.29, "grad_norm": 13.33358126598494, "learning_rate": 7.0169116635560985e-06, "loss": 0.868, "step": 30555 }, { "epoch": 1.29, "grad_norm": 12.388096772122134, "learning_rate": 7.0157850814151455e-06, "loss": 0.7887, "step": 30560 }, { "epoch": 1.29, "grad_norm": 14.810305112478773, "learning_rate": 7.01465837706308e-06, "loss": 0.787, "step": 30565 }, { "epoch": 1.29, "grad_norm": 24.886070183737026, "learning_rate": 7.013531550568214e-06, "loss": 0.809, "step": 30570 }, { "epoch": 1.29, "grad_norm": 16.114102943682518, "learning_rate": 7.01240460199886e-06, "loss": 0.8226, "step": 30575 }, { "epoch": 1.29, "grad_norm": 8.980425962840513, "learning_rate": 7.011277531423343e-06, "loss": 0.8512, "step": 30580 }, { "epoch": 1.29, "grad_norm": 11.86037313496318, "learning_rate": 7.010150338909992e-06, "loss": 0.7819, "step": 30585 }, { "epoch": 1.29, "grad_norm": 19.42306686722444, "learning_rate": 7.009023024527148e-06, "loss": 0.8218, "step": 30590 }, { "epoch": 1.29, "grad_norm": 6.555130073169696, "learning_rate": 7.007895588343157e-06, "loss": 0.8374, "step": 30595 }, { "epoch": 1.3, "grad_norm": 7.336590353901856, "learning_rate": 7.00676803042637e-06, "loss": 0.8041, "step": 30600 }, { "epoch": 1.3, "grad_norm": 9.825797956082603, "learning_rate": 7.005640350845149e-06, "loss": 0.8263, "step": 30605 }, { "epoch": 1.3, "grad_norm": 4.837724925885693, "learning_rate": 7.004512549667859e-06, "loss": 0.8452, "step": 30610 }, { "epoch": 1.3, "grad_norm": 9.104028869198386, "learning_rate": 7.0033846269628804e-06, "loss": 0.7596, "step": 30615 }, { "epoch": 1.3, "grad_norm": 10.70957553902516, "learning_rate": 7.002256582798592e-06, "loss": 0.8344, "step": 30620 }, { "epoch": 1.3, "grad_norm": 4.675970869828424, "learning_rate": 7.001128417243384e-06, "loss": 0.8259, "step": 30625 }, { "epoch": 1.3, "grad_norm": 8.142709495931403, "learning_rate": 7.000000130365656e-06, "loss": 0.8531, "step": 30630 }, { "epoch": 1.3, "grad_norm": 8.312334922014445, "learning_rate": 6.99887172223381e-06, "loss": 0.8093, "step": 30635 }, { "epoch": 1.3, "grad_norm": 12.282191181222677, "learning_rate": 6.99774319291626e-06, "loss": 0.8398, "step": 30640 }, { "epoch": 1.3, "grad_norm": 16.754288082981116, "learning_rate": 6.996614542481426e-06, "loss": 0.8118, "step": 30645 }, { "epoch": 1.3, "grad_norm": 14.447771665892928, "learning_rate": 6.995485770997731e-06, "loss": 0.762, "step": 30650 }, { "epoch": 1.3, "grad_norm": 5.355712766535142, "learning_rate": 6.994356878533612e-06, "loss": 0.8203, "step": 30655 }, { "epoch": 1.3, "grad_norm": 8.837225438225289, "learning_rate": 6.9932278651575125e-06, "loss": 0.8345, "step": 30660 }, { "epoch": 1.3, "grad_norm": 7.001692547152853, "learning_rate": 6.992098730937877e-06, "loss": 0.8173, "step": 30665 }, { "epoch": 1.3, "grad_norm": 6.487802695080293, "learning_rate": 6.9909694759431635e-06, "loss": 0.8039, "step": 30670 }, { "epoch": 1.3, "grad_norm": 5.525908000183807, "learning_rate": 6.989840100241836e-06, "loss": 0.8165, "step": 30675 }, { "epoch": 1.3, "grad_norm": 6.635586668198554, "learning_rate": 6.988710603902363e-06, "loss": 0.8122, "step": 30680 }, { "epoch": 1.3, "grad_norm": 4.8631152551421435, "learning_rate": 6.987580986993225e-06, "loss": 0.7868, "step": 30685 }, { "epoch": 1.3, "grad_norm": 16.55916805902345, "learning_rate": 6.9864512495829065e-06, "loss": 0.8027, "step": 30690 }, { "epoch": 1.3, "grad_norm": 10.958229928836165, "learning_rate": 6.9853213917398986e-06, "loss": 0.8197, "step": 30695 }, { "epoch": 1.3, "grad_norm": 13.783998417869906, "learning_rate": 6.984191413532704e-06, "loss": 0.8273, "step": 30700 }, { "epoch": 1.3, "grad_norm": 12.669461103046313, "learning_rate": 6.983061315029829e-06, "loss": 0.8137, "step": 30705 }, { "epoch": 1.3, "grad_norm": 6.760957022273189, "learning_rate": 6.981931096299786e-06, "loss": 0.8425, "step": 30710 }, { "epoch": 1.3, "grad_norm": 5.963954319178731, "learning_rate": 6.980800757411102e-06, "loss": 0.7891, "step": 30715 }, { "epoch": 1.3, "grad_norm": 9.355479991868174, "learning_rate": 6.979670298432298e-06, "loss": 0.7645, "step": 30720 }, { "epoch": 1.3, "grad_norm": 7.362935104138875, "learning_rate": 6.978539719431918e-06, "loss": 0.8449, "step": 30725 }, { "epoch": 1.3, "grad_norm": 4.522756340793397, "learning_rate": 6.977409020478503e-06, "loss": 0.7937, "step": 30730 }, { "epoch": 1.3, "grad_norm": 5.816311483771239, "learning_rate": 6.976278201640603e-06, "loss": 0.7903, "step": 30735 }, { "epoch": 1.3, "grad_norm": 8.903836933197944, "learning_rate": 6.975147262986779e-06, "loss": 0.8355, "step": 30740 }, { "epoch": 1.3, "grad_norm": 5.539756801411209, "learning_rate": 6.9740162045855915e-06, "loss": 0.8207, "step": 30745 }, { "epoch": 1.3, "grad_norm": 5.026375055223772, "learning_rate": 6.972885026505617e-06, "loss": 0.7699, "step": 30750 }, { "epoch": 1.3, "grad_norm": 4.996235742086593, "learning_rate": 6.971753728815435e-06, "loss": 0.8156, "step": 30755 }, { "epoch": 1.3, "grad_norm": 5.754319185533612, "learning_rate": 6.970622311583633e-06, "loss": 0.8144, "step": 30760 }, { "epoch": 1.3, "grad_norm": 4.998246538356853, "learning_rate": 6.969490774878804e-06, "loss": 0.7746, "step": 30765 }, { "epoch": 1.3, "grad_norm": 4.404548546581281, "learning_rate": 6.96835911876955e-06, "loss": 0.8437, "step": 30770 }, { "epoch": 1.3, "grad_norm": 8.122872351642947, "learning_rate": 6.967227343324482e-06, "loss": 0.8031, "step": 30775 }, { "epoch": 1.3, "grad_norm": 9.33870983485933, "learning_rate": 6.966095448612213e-06, "loss": 0.8507, "step": 30780 }, { "epoch": 1.3, "grad_norm": 5.361076621921054, "learning_rate": 6.964963434701371e-06, "loss": 0.8085, "step": 30785 }, { "epoch": 1.3, "grad_norm": 5.70317649130165, "learning_rate": 6.963831301660582e-06, "loss": 0.8134, "step": 30790 }, { "epoch": 1.3, "grad_norm": 7.211607917963832, "learning_rate": 6.962699049558486e-06, "loss": 0.8037, "step": 30795 }, { "epoch": 1.3, "grad_norm": 7.104388599288947, "learning_rate": 6.9615666784637285e-06, "loss": 0.8672, "step": 30800 }, { "epoch": 1.3, "grad_norm": 10.18235691078436, "learning_rate": 6.96043418844496e-06, "loss": 0.789, "step": 30805 }, { "epoch": 1.3, "grad_norm": 7.123682910712552, "learning_rate": 6.959301579570843e-06, "loss": 0.7993, "step": 30810 }, { "epoch": 1.3, "grad_norm": 5.14090401788208, "learning_rate": 6.958168851910042e-06, "loss": 0.8333, "step": 30815 }, { "epoch": 1.3, "grad_norm": 5.895621629562826, "learning_rate": 6.9570360055312325e-06, "loss": 0.8915, "step": 30820 }, { "epoch": 1.3, "grad_norm": 8.662599902475666, "learning_rate": 6.955903040503093e-06, "loss": 0.8059, "step": 30825 }, { "epoch": 1.3, "grad_norm": 4.45118165268154, "learning_rate": 6.954769956894314e-06, "loss": 0.8395, "step": 30830 }, { "epoch": 1.3, "grad_norm": 10.223819807189278, "learning_rate": 6.953636754773591e-06, "loss": 0.8309, "step": 30835 }, { "epoch": 1.31, "grad_norm": 4.2764424779869055, "learning_rate": 6.952503434209624e-06, "loss": 0.8022, "step": 30840 }, { "epoch": 1.31, "grad_norm": 4.830396664182078, "learning_rate": 6.951369995271127e-06, "loss": 0.8246, "step": 30845 }, { "epoch": 1.31, "grad_norm": 7.56976739805219, "learning_rate": 6.9502364380268164e-06, "loss": 0.8331, "step": 30850 }, { "epoch": 1.31, "grad_norm": 5.742287881718884, "learning_rate": 6.949102762545412e-06, "loss": 0.8292, "step": 30855 }, { "epoch": 1.31, "grad_norm": 6.679770503302572, "learning_rate": 6.947968968895651e-06, "loss": 0.793, "step": 30860 }, { "epoch": 1.31, "grad_norm": 16.797062556412232, "learning_rate": 6.946835057146267e-06, "loss": 0.7925, "step": 30865 }, { "epoch": 1.31, "grad_norm": 12.40722891215225, "learning_rate": 6.9457010273660096e-06, "loss": 0.8365, "step": 30870 }, { "epoch": 1.31, "grad_norm": 4.979602383123886, "learning_rate": 6.944566879623631e-06, "loss": 0.8228, "step": 30875 }, { "epoch": 1.31, "grad_norm": 5.58948389218297, "learning_rate": 6.9434326139878885e-06, "loss": 0.8421, "step": 30880 }, { "epoch": 1.31, "grad_norm": 4.845698518834456, "learning_rate": 6.942298230527552e-06, "loss": 0.8289, "step": 30885 }, { "epoch": 1.31, "grad_norm": 5.303449986674132, "learning_rate": 6.941163729311395e-06, "loss": 0.8862, "step": 30890 }, { "epoch": 1.31, "grad_norm": 6.038966819927321, "learning_rate": 6.9400291104082e-06, "loss": 0.8405, "step": 30895 }, { "epoch": 1.31, "grad_norm": 4.761079627248277, "learning_rate": 6.938894373886753e-06, "loss": 0.8239, "step": 30900 }, { "epoch": 1.31, "grad_norm": 13.479778902798289, "learning_rate": 6.937759519815852e-06, "loss": 0.8446, "step": 30905 }, { "epoch": 1.31, "grad_norm": 7.708365921329065, "learning_rate": 6.9366245482643e-06, "loss": 0.7883, "step": 30910 }, { "epoch": 1.31, "grad_norm": 8.102379765514295, "learning_rate": 6.935489459300905e-06, "loss": 0.851, "step": 30915 }, { "epoch": 1.31, "grad_norm": 4.862474080922375, "learning_rate": 6.934354252994485e-06, "loss": 0.8156, "step": 30920 }, { "epoch": 1.31, "grad_norm": 7.808549772730339, "learning_rate": 6.933218929413865e-06, "loss": 0.8472, "step": 30925 }, { "epoch": 1.31, "grad_norm": 4.891913876879858, "learning_rate": 6.932083488627875e-06, "loss": 0.792, "step": 30930 }, { "epoch": 1.31, "grad_norm": 5.916705479239607, "learning_rate": 6.9309479307053565e-06, "loss": 0.7972, "step": 30935 }, { "epoch": 1.31, "grad_norm": 4.463334166346825, "learning_rate": 6.92981225571515e-06, "loss": 0.8528, "step": 30940 }, { "epoch": 1.31, "grad_norm": 5.324382622070353, "learning_rate": 6.928676463726113e-06, "loss": 0.7971, "step": 30945 }, { "epoch": 1.31, "grad_norm": 4.315107254965493, "learning_rate": 6.927540554807102e-06, "loss": 0.8278, "step": 30950 }, { "epoch": 1.31, "grad_norm": 16.965968164940747, "learning_rate": 6.926404529026984e-06, "loss": 0.8127, "step": 30955 }, { "epoch": 1.31, "grad_norm": 21.841737829591953, "learning_rate": 6.9252683864546355e-06, "loss": 0.8154, "step": 30960 }, { "epoch": 1.31, "grad_norm": 33.836462859227524, "learning_rate": 6.924132127158936e-06, "loss": 0.8485, "step": 30965 }, { "epoch": 1.31, "grad_norm": 5.765209986787647, "learning_rate": 6.922995751208772e-06, "loss": 0.7866, "step": 30970 }, { "epoch": 1.31, "grad_norm": 16.884119925220077, "learning_rate": 6.9218592586730405e-06, "loss": 0.8084, "step": 30975 }, { "epoch": 1.31, "grad_norm": 5.365945345825806, "learning_rate": 6.920722649620644e-06, "loss": 0.816, "step": 30980 }, { "epoch": 1.31, "grad_norm": 7.566912526098902, "learning_rate": 6.919585924120491e-06, "loss": 0.8345, "step": 30985 }, { "epoch": 1.31, "grad_norm": 4.659700538285211, "learning_rate": 6.918449082241496e-06, "loss": 0.8196, "step": 30990 }, { "epoch": 1.31, "grad_norm": 15.584478451457812, "learning_rate": 6.917312124052586e-06, "loss": 0.8369, "step": 30995 }, { "epoch": 1.31, "grad_norm": 4.950440858112083, "learning_rate": 6.916175049622687e-06, "loss": 0.7735, "step": 31000 }, { "epoch": 1.31, "grad_norm": 8.609722652457403, "learning_rate": 6.915037859020741e-06, "loss": 0.8072, "step": 31005 }, { "epoch": 1.31, "grad_norm": 19.020926752614063, "learning_rate": 6.913900552315691e-06, "loss": 0.8706, "step": 31010 }, { "epoch": 1.31, "grad_norm": 11.174738472136832, "learning_rate": 6.912763129576486e-06, "loss": 0.8151, "step": 31015 }, { "epoch": 1.31, "grad_norm": 8.180748671412141, "learning_rate": 6.911625590872089e-06, "loss": 0.8115, "step": 31020 }, { "epoch": 1.31, "grad_norm": 8.211292639061794, "learning_rate": 6.910487936271462e-06, "loss": 0.794, "step": 31025 }, { "epoch": 1.31, "grad_norm": 7.424041501824461, "learning_rate": 6.909350165843579e-06, "loss": 0.8302, "step": 31030 }, { "epoch": 1.31, "grad_norm": 5.7380674197718315, "learning_rate": 6.90821227965742e-06, "loss": 0.8158, "step": 31035 }, { "epoch": 1.31, "grad_norm": 6.434439417845401, "learning_rate": 6.907074277781971e-06, "loss": 0.8438, "step": 31040 }, { "epoch": 1.31, "grad_norm": 6.873490769424722, "learning_rate": 6.905936160286226e-06, "loss": 0.8051, "step": 31045 }, { "epoch": 1.31, "grad_norm": 19.66564185370898, "learning_rate": 6.904797927239185e-06, "loss": 0.8186, "step": 31050 }, { "epoch": 1.31, "grad_norm": 22.23823661026982, "learning_rate": 6.903659578709858e-06, "loss": 0.8318, "step": 31055 }, { "epoch": 1.31, "grad_norm": 19.702697828348985, "learning_rate": 6.902521114767257e-06, "loss": 0.834, "step": 31060 }, { "epoch": 1.31, "grad_norm": 21.443861857640417, "learning_rate": 6.901382535480406e-06, "loss": 0.858, "step": 31065 }, { "epoch": 1.31, "grad_norm": 16.64254718485684, "learning_rate": 6.900243840918332e-06, "loss": 0.7975, "step": 31070 }, { "epoch": 1.32, "grad_norm": 6.359165475770751, "learning_rate": 6.89910503115007e-06, "loss": 0.8341, "step": 31075 }, { "epoch": 1.32, "grad_norm": 17.11748782735752, "learning_rate": 6.897966106244667e-06, "loss": 0.8206, "step": 31080 }, { "epoch": 1.32, "grad_norm": 24.011234453425086, "learning_rate": 6.896827066271167e-06, "loss": 0.8003, "step": 31085 }, { "epoch": 1.32, "grad_norm": 5.080668993815845, "learning_rate": 6.89568791129863e-06, "loss": 0.8101, "step": 31090 }, { "epoch": 1.32, "grad_norm": 5.34183228047882, "learning_rate": 6.89454864139612e-06, "loss": 0.7856, "step": 31095 }, { "epoch": 1.32, "grad_norm": 5.579679750279931, "learning_rate": 6.893409256632704e-06, "loss": 0.8492, "step": 31100 }, { "epoch": 1.32, "grad_norm": 12.514889603919148, "learning_rate": 6.892269757077463e-06, "loss": 0.8367, "step": 31105 }, { "epoch": 1.32, "grad_norm": 13.40912054443203, "learning_rate": 6.89113014279948e-06, "loss": 0.8517, "step": 31110 }, { "epoch": 1.32, "grad_norm": 13.700540264779526, "learning_rate": 6.889990413867849e-06, "loss": 0.8318, "step": 31115 }, { "epoch": 1.32, "grad_norm": 5.46024652359266, "learning_rate": 6.888850570351665e-06, "loss": 0.833, "step": 31120 }, { "epoch": 1.32, "grad_norm": 5.324634788545719, "learning_rate": 6.8877106123200346e-06, "loss": 0.8094, "step": 31125 }, { "epoch": 1.32, "grad_norm": 6.465528378515589, "learning_rate": 6.88657053984207e-06, "loss": 0.8523, "step": 31130 }, { "epoch": 1.32, "grad_norm": 4.445692398449989, "learning_rate": 6.88543035298689e-06, "loss": 0.7675, "step": 31135 }, { "epoch": 1.32, "grad_norm": 7.3969669811682675, "learning_rate": 6.884290051823623e-06, "loss": 0.7991, "step": 31140 }, { "epoch": 1.32, "grad_norm": 6.45256981627089, "learning_rate": 6.883149636421401e-06, "loss": 0.8223, "step": 31145 }, { "epoch": 1.32, "grad_norm": 10.9331332734134, "learning_rate": 6.882009106849363e-06, "loss": 0.8136, "step": 31150 }, { "epoch": 1.32, "grad_norm": 6.228833380550154, "learning_rate": 6.8808684631766565e-06, "loss": 0.824, "step": 31155 }, { "epoch": 1.32, "grad_norm": 4.413588158623257, "learning_rate": 6.879727705472434e-06, "loss": 0.8265, "step": 31160 }, { "epoch": 1.32, "grad_norm": 4.7725263977321015, "learning_rate": 6.878586833805859e-06, "loss": 0.8347, "step": 31165 }, { "epoch": 1.32, "grad_norm": 7.932397816123524, "learning_rate": 6.877445848246097e-06, "loss": 0.8069, "step": 31170 }, { "epoch": 1.32, "grad_norm": 5.010484090671348, "learning_rate": 6.876304748862323e-06, "loss": 0.8056, "step": 31175 }, { "epoch": 1.32, "grad_norm": 6.490760312674396, "learning_rate": 6.875163535723721e-06, "loss": 0.8202, "step": 31180 }, { "epoch": 1.32, "grad_norm": 9.268119507485787, "learning_rate": 6.874022208899476e-06, "loss": 0.8172, "step": 31185 }, { "epoch": 1.32, "grad_norm": 5.638722675772335, "learning_rate": 6.872880768458784e-06, "loss": 0.8377, "step": 31190 }, { "epoch": 1.32, "grad_norm": 7.493596328948122, "learning_rate": 6.8717392144708495e-06, "loss": 0.8301, "step": 31195 }, { "epoch": 1.32, "grad_norm": 4.465295497228103, "learning_rate": 6.870597547004877e-06, "loss": 0.8664, "step": 31200 }, { "epoch": 1.32, "grad_norm": 5.191942900066486, "learning_rate": 6.869455766130087e-06, "loss": 0.7713, "step": 31205 }, { "epoch": 1.32, "grad_norm": 4.217687216698285, "learning_rate": 6.8683138719157e-06, "loss": 0.7921, "step": 31210 }, { "epoch": 1.32, "grad_norm": 4.683055698999031, "learning_rate": 6.867171864430947e-06, "loss": 0.8269, "step": 31215 }, { "epoch": 1.32, "grad_norm": 4.951119368831036, "learning_rate": 6.866029743745064e-06, "loss": 0.8515, "step": 31220 }, { "epoch": 1.32, "grad_norm": 6.333029550032008, "learning_rate": 6.864887509927294e-06, "loss": 0.7966, "step": 31225 }, { "epoch": 1.32, "grad_norm": 9.17231477234489, "learning_rate": 6.863745163046888e-06, "loss": 0.8462, "step": 31230 }, { "epoch": 1.32, "grad_norm": 7.422157637656466, "learning_rate": 6.862602703173102e-06, "loss": 0.8648, "step": 31235 }, { "epoch": 1.32, "grad_norm": 10.05975947037493, "learning_rate": 6.8614601303751995e-06, "loss": 0.8076, "step": 31240 }, { "epoch": 1.32, "grad_norm": 13.68493938951246, "learning_rate": 6.8603174447224526e-06, "loss": 0.8154, "step": 31245 }, { "epoch": 1.32, "grad_norm": 5.209584375849382, "learning_rate": 6.85917464628414e-06, "loss": 0.8378, "step": 31250 }, { "epoch": 1.32, "grad_norm": 10.00331145625349, "learning_rate": 6.858031735129544e-06, "loss": 0.807, "step": 31255 }, { "epoch": 1.32, "grad_norm": 7.899734727129706, "learning_rate": 6.856888711327958e-06, "loss": 0.7955, "step": 31260 }, { "epoch": 1.32, "grad_norm": 5.229021843999381, "learning_rate": 6.8557455749486775e-06, "loss": 0.811, "step": 31265 }, { "epoch": 1.32, "grad_norm": 4.365187739738876, "learning_rate": 6.854602326061011e-06, "loss": 0.7873, "step": 31270 }, { "epoch": 1.32, "grad_norm": 4.667410717831659, "learning_rate": 6.853458964734268e-06, "loss": 0.7877, "step": 31275 }, { "epoch": 1.32, "grad_norm": 7.03533562104333, "learning_rate": 6.852315491037767e-06, "loss": 0.7636, "step": 31280 }, { "epoch": 1.32, "grad_norm": 4.863416256128626, "learning_rate": 6.851171905040832e-06, "loss": 0.8136, "step": 31285 }, { "epoch": 1.32, "grad_norm": 6.154900215702729, "learning_rate": 6.8500282068128e-06, "loss": 0.8148, "step": 31290 }, { "epoch": 1.32, "grad_norm": 8.602310918845237, "learning_rate": 6.848884396423005e-06, "loss": 0.8085, "step": 31295 }, { "epoch": 1.32, "grad_norm": 14.449414666645763, "learning_rate": 6.847740473940797e-06, "loss": 0.8129, "step": 31300 }, { "epoch": 1.32, "grad_norm": 8.251807570340368, "learning_rate": 6.846596439435527e-06, "loss": 0.8183, "step": 31305 }, { "epoch": 1.33, "grad_norm": 6.378056369690087, "learning_rate": 6.845452292976553e-06, "loss": 0.8724, "step": 31310 }, { "epoch": 1.33, "grad_norm": 17.095156356830913, "learning_rate": 6.8443080346332435e-06, "loss": 0.8339, "step": 31315 }, { "epoch": 1.33, "grad_norm": 12.430751516694825, "learning_rate": 6.84316366447497e-06, "loss": 0.776, "step": 31320 }, { "epoch": 1.33, "grad_norm": 6.031436140997562, "learning_rate": 6.842019182571112e-06, "loss": 0.8029, "step": 31325 }, { "epoch": 1.33, "grad_norm": 19.97993466899796, "learning_rate": 6.840874588991056e-06, "loss": 0.8162, "step": 31330 }, { "epoch": 1.33, "grad_norm": 23.295443252191475, "learning_rate": 6.8397298838041985e-06, "loss": 0.8238, "step": 31335 }, { "epoch": 1.33, "grad_norm": 13.639961102082475, "learning_rate": 6.838585067079937e-06, "loss": 0.8396, "step": 31340 }, { "epoch": 1.33, "grad_norm": 5.90193327258198, "learning_rate": 6.837440138887677e-06, "loss": 0.7631, "step": 31345 }, { "epoch": 1.33, "grad_norm": 6.927493434624856, "learning_rate": 6.8362950992968344e-06, "loss": 0.8159, "step": 31350 }, { "epoch": 1.33, "grad_norm": 6.506265271732505, "learning_rate": 6.835149948376829e-06, "loss": 0.7872, "step": 31355 }, { "epoch": 1.33, "grad_norm": 10.966219732554292, "learning_rate": 6.834004686197088e-06, "loss": 0.8225, "step": 31360 }, { "epoch": 1.33, "grad_norm": 6.003876153384063, "learning_rate": 6.832859312827045e-06, "loss": 0.8427, "step": 31365 }, { "epoch": 1.33, "grad_norm": 15.756294919745246, "learning_rate": 6.831713828336141e-06, "loss": 0.8567, "step": 31370 }, { "epoch": 1.33, "grad_norm": 12.702167109653397, "learning_rate": 6.830568232793823e-06, "loss": 0.832, "step": 31375 }, { "epoch": 1.33, "grad_norm": 25.31887942303852, "learning_rate": 6.8294225262695445e-06, "loss": 0.8627, "step": 31380 }, { "epoch": 1.33, "grad_norm": 17.394052905117952, "learning_rate": 6.828276708832768e-06, "loss": 0.8083, "step": 31385 }, { "epoch": 1.33, "grad_norm": 6.172379441602869, "learning_rate": 6.82713078055296e-06, "loss": 0.7886, "step": 31390 }, { "epoch": 1.33, "grad_norm": 14.484732433309771, "learning_rate": 6.8259847414995936e-06, "loss": 0.8096, "step": 31395 }, { "epoch": 1.33, "grad_norm": 18.410166146811918, "learning_rate": 6.824838591742152e-06, "loss": 0.8486, "step": 31400 }, { "epoch": 1.33, "grad_norm": 16.721298881434233, "learning_rate": 6.82369233135012e-06, "loss": 0.8369, "step": 31405 }, { "epoch": 1.33, "grad_norm": 18.900204059770356, "learning_rate": 6.822545960392996e-06, "loss": 0.827, "step": 31410 }, { "epoch": 1.33, "grad_norm": 25.611122503884104, "learning_rate": 6.821399478940278e-06, "loss": 0.8098, "step": 31415 }, { "epoch": 1.33, "grad_norm": 9.506566126164305, "learning_rate": 6.820252887061475e-06, "loss": 0.8295, "step": 31420 }, { "epoch": 1.33, "grad_norm": 6.010432172607462, "learning_rate": 6.819106184826101e-06, "loss": 0.8348, "step": 31425 }, { "epoch": 1.33, "grad_norm": 4.793095339700078, "learning_rate": 6.817959372303678e-06, "loss": 0.7756, "step": 31430 }, { "epoch": 1.33, "grad_norm": 13.78281975395877, "learning_rate": 6.816812449563732e-06, "loss": 0.797, "step": 31435 }, { "epoch": 1.33, "grad_norm": 11.689210457184577, "learning_rate": 6.815665416675798e-06, "loss": 0.8204, "step": 31440 }, { "epoch": 1.33, "grad_norm": 8.664428585515743, "learning_rate": 6.814518273709421e-06, "loss": 0.788, "step": 31445 }, { "epoch": 1.33, "grad_norm": 19.312871792742744, "learning_rate": 6.813371020734144e-06, "loss": 0.8318, "step": 31450 }, { "epoch": 1.33, "grad_norm": 19.445655064882253, "learning_rate": 6.812223657819523e-06, "loss": 0.7813, "step": 31455 }, { "epoch": 1.33, "grad_norm": 10.373334805431014, "learning_rate": 6.811076185035122e-06, "loss": 0.8369, "step": 31460 }, { "epoch": 1.33, "grad_norm": 5.879837187896734, "learning_rate": 6.809928602450504e-06, "loss": 0.8434, "step": 31465 }, { "epoch": 1.33, "grad_norm": 5.196774391129288, "learning_rate": 6.808780910135249e-06, "loss": 0.8484, "step": 31470 }, { "epoch": 1.33, "grad_norm": 7.397063609569595, "learning_rate": 6.807633108158932e-06, "loss": 0.7571, "step": 31475 }, { "epoch": 1.33, "grad_norm": 5.959546346616855, "learning_rate": 6.806485196591146e-06, "loss": 0.8147, "step": 31480 }, { "epoch": 1.33, "grad_norm": 12.641369300705877, "learning_rate": 6.805337175501484e-06, "loss": 0.8616, "step": 31485 }, { "epoch": 1.33, "grad_norm": 4.557826201345145, "learning_rate": 6.804189044959546e-06, "loss": 0.764, "step": 31490 }, { "epoch": 1.33, "grad_norm": 14.186386222750375, "learning_rate": 6.8030408050349406e-06, "loss": 0.8312, "step": 31495 }, { "epoch": 1.33, "grad_norm": 15.337535082676023, "learning_rate": 6.801892455797283e-06, "loss": 0.7825, "step": 31500 }, { "epoch": 1.33, "grad_norm": 8.296646091300543, "learning_rate": 6.8007439973161925e-06, "loss": 0.774, "step": 31505 }, { "epoch": 1.33, "grad_norm": 9.166268796621337, "learning_rate": 6.7995954296612984e-06, "loss": 0.8601, "step": 31510 }, { "epoch": 1.33, "grad_norm": 16.27819911188128, "learning_rate": 6.798446752902233e-06, "loss": 0.8805, "step": 31515 }, { "epoch": 1.33, "grad_norm": 47.625725196408226, "learning_rate": 6.7972979671086405e-06, "loss": 0.8416, "step": 31520 }, { "epoch": 1.33, "grad_norm": 19.767712052270472, "learning_rate": 6.796149072350167e-06, "loss": 0.8429, "step": 31525 }, { "epoch": 1.33, "grad_norm": 20.79116171190734, "learning_rate": 6.795000068696463e-06, "loss": 0.8206, "step": 31530 }, { "epoch": 1.33, "grad_norm": 10.819522049356816, "learning_rate": 6.793850956217194e-06, "loss": 0.7943, "step": 31535 }, { "epoch": 1.33, "grad_norm": 5.518776462759827, "learning_rate": 6.7927017349820245e-06, "loss": 0.8211, "step": 31540 }, { "epoch": 1.34, "grad_norm": 16.422082918751077, "learning_rate": 6.791552405060631e-06, "loss": 0.7964, "step": 31545 }, { "epoch": 1.34, "grad_norm": 11.036342679420907, "learning_rate": 6.7904029665226915e-06, "loss": 0.8343, "step": 31550 }, { "epoch": 1.34, "grad_norm": 9.569793418008702, "learning_rate": 6.789253419437893e-06, "loss": 0.784, "step": 31555 }, { "epoch": 1.34, "grad_norm": 11.588332425899969, "learning_rate": 6.788103763875932e-06, "loss": 0.778, "step": 31560 }, { "epoch": 1.34, "grad_norm": 19.174368261859875, "learning_rate": 6.786953999906505e-06, "loss": 0.8076, "step": 31565 }, { "epoch": 1.34, "grad_norm": 6.4990375598683245, "learning_rate": 6.785804127599321e-06, "loss": 0.83, "step": 31570 }, { "epoch": 1.34, "grad_norm": 24.200845150802245, "learning_rate": 6.784654147024093e-06, "loss": 0.8255, "step": 31575 }, { "epoch": 1.34, "grad_norm": 9.367909310517959, "learning_rate": 6.783504058250542e-06, "loss": 0.8669, "step": 31580 }, { "epoch": 1.34, "grad_norm": 15.12209636471602, "learning_rate": 6.782353861348393e-06, "loss": 0.7961, "step": 31585 }, { "epoch": 1.34, "grad_norm": 10.973117701752242, "learning_rate": 6.781203556387379e-06, "loss": 0.8136, "step": 31590 }, { "epoch": 1.34, "grad_norm": 20.1898445358115, "learning_rate": 6.78005314343724e-06, "loss": 0.8092, "step": 31595 }, { "epoch": 1.34, "grad_norm": 16.864538798387898, "learning_rate": 6.778902622567722e-06, "loss": 0.8317, "step": 31600 }, { "epoch": 1.34, "grad_norm": 6.244042217321878, "learning_rate": 6.777751993848579e-06, "loss": 0.7993, "step": 31605 }, { "epoch": 1.34, "grad_norm": 12.433485844518673, "learning_rate": 6.776601257349569e-06, "loss": 0.819, "step": 31610 }, { "epoch": 1.34, "grad_norm": 6.31247172500217, "learning_rate": 6.775450413140456e-06, "loss": 0.777, "step": 31615 }, { "epoch": 1.34, "grad_norm": 9.339174850572915, "learning_rate": 6.774299461291016e-06, "loss": 0.8185, "step": 31620 }, { "epoch": 1.34, "grad_norm": 5.299699819827479, "learning_rate": 6.773148401871026e-06, "loss": 0.8345, "step": 31625 }, { "epoch": 1.34, "grad_norm": 9.900612523785766, "learning_rate": 6.771997234950272e-06, "loss": 0.8437, "step": 31630 }, { "epoch": 1.34, "grad_norm": 7.039582811154645, "learning_rate": 6.770845960598544e-06, "loss": 0.8118, "step": 31635 }, { "epoch": 1.34, "grad_norm": 4.560754764098867, "learning_rate": 6.769694578885643e-06, "loss": 0.7962, "step": 31640 }, { "epoch": 1.34, "grad_norm": 4.896549377616818, "learning_rate": 6.768543089881371e-06, "loss": 0.8106, "step": 31645 }, { "epoch": 1.34, "grad_norm": 6.601025156551856, "learning_rate": 6.767391493655541e-06, "loss": 0.8035, "step": 31650 }, { "epoch": 1.34, "grad_norm": 5.892486634227984, "learning_rate": 6.766239790277971e-06, "loss": 0.8494, "step": 31655 }, { "epoch": 1.34, "grad_norm": 7.433502059918096, "learning_rate": 6.765087979818486e-06, "loss": 0.797, "step": 31660 }, { "epoch": 1.34, "grad_norm": 11.0254929184585, "learning_rate": 6.7639360623469145e-06, "loss": 0.8413, "step": 31665 }, { "epoch": 1.34, "grad_norm": 11.887678097193918, "learning_rate": 6.762784037933097e-06, "loss": 0.8211, "step": 31670 }, { "epoch": 1.34, "grad_norm": 5.780897689494343, "learning_rate": 6.761631906646874e-06, "loss": 0.7977, "step": 31675 }, { "epoch": 1.34, "grad_norm": 10.65914544739127, "learning_rate": 6.760479668558098e-06, "loss": 0.8169, "step": 31680 }, { "epoch": 1.34, "grad_norm": 18.286237221174733, "learning_rate": 6.759327323736626e-06, "loss": 0.7648, "step": 31685 }, { "epoch": 1.34, "grad_norm": 6.581387026705098, "learning_rate": 6.758174872252321e-06, "loss": 0.7528, "step": 31690 }, { "epoch": 1.34, "grad_norm": 7.822560259599014, "learning_rate": 6.757022314175052e-06, "loss": 0.777, "step": 31695 }, { "epoch": 1.34, "grad_norm": 9.502515827187835, "learning_rate": 6.755869649574694e-06, "loss": 0.8465, "step": 31700 }, { "epoch": 1.34, "grad_norm": 6.654670820696322, "learning_rate": 6.754716878521134e-06, "loss": 0.7904, "step": 31705 }, { "epoch": 1.34, "grad_norm": 5.100792696165554, "learning_rate": 6.753564001084256e-06, "loss": 0.8189, "step": 31710 }, { "epoch": 1.34, "grad_norm": 6.804483887970345, "learning_rate": 6.752411017333959e-06, "loss": 0.8051, "step": 31715 }, { "epoch": 1.34, "grad_norm": 5.485755129905997, "learning_rate": 6.751257927340144e-06, "loss": 0.801, "step": 31720 }, { "epoch": 1.34, "grad_norm": 4.089077139718888, "learning_rate": 6.75010473117272e-06, "loss": 0.791, "step": 31725 }, { "epoch": 1.34, "grad_norm": 6.114034195886247, "learning_rate": 6.7489514289016e-06, "loss": 0.8125, "step": 31730 }, { "epoch": 1.34, "grad_norm": 4.276872543649539, "learning_rate": 6.747798020596707e-06, "loss": 0.823, "step": 31735 }, { "epoch": 1.34, "grad_norm": 5.574592824064354, "learning_rate": 6.746644506327969e-06, "loss": 0.7833, "step": 31740 }, { "epoch": 1.34, "grad_norm": 9.10504667784637, "learning_rate": 6.745490886165318e-06, "loss": 0.8155, "step": 31745 }, { "epoch": 1.34, "grad_norm": 7.894681598238104, "learning_rate": 6.744337160178697e-06, "loss": 0.8316, "step": 31750 }, { "epoch": 1.34, "grad_norm": 4.020143291407074, "learning_rate": 6.743183328438052e-06, "loss": 0.8154, "step": 31755 }, { "epoch": 1.34, "grad_norm": 14.171505264071087, "learning_rate": 6.742029391013336e-06, "loss": 0.8082, "step": 31760 }, { "epoch": 1.34, "grad_norm": 9.594653998683382, "learning_rate": 6.7408753479745095e-06, "loss": 0.8284, "step": 31765 }, { "epoch": 1.34, "grad_norm": 5.287271512358595, "learning_rate": 6.7397211993915405e-06, "loss": 0.8329, "step": 31770 }, { "epoch": 1.34, "grad_norm": 7.8004329655422655, "learning_rate": 6.738566945334396e-06, "loss": 0.8088, "step": 31775 }, { "epoch": 1.34, "grad_norm": 10.190836102541125, "learning_rate": 6.737412585873062e-06, "loss": 0.8302, "step": 31780 }, { "epoch": 1.35, "grad_norm": 5.129920044401575, "learning_rate": 6.736258121077518e-06, "loss": 0.8238, "step": 31785 }, { "epoch": 1.35, "grad_norm": 8.708113228581283, "learning_rate": 6.7351035510177606e-06, "loss": 0.8417, "step": 31790 }, { "epoch": 1.35, "grad_norm": 14.25551287517732, "learning_rate": 6.733948875763785e-06, "loss": 0.8109, "step": 31795 }, { "epoch": 1.35, "grad_norm": 13.275068895355647, "learning_rate": 6.732794095385598e-06, "loss": 0.7882, "step": 31800 }, { "epoch": 1.35, "grad_norm": 7.35647081621935, "learning_rate": 6.731639209953207e-06, "loss": 0.814, "step": 31805 }, { "epoch": 1.35, "grad_norm": 13.709381174935903, "learning_rate": 6.7304842195366324e-06, "loss": 0.8244, "step": 31810 }, { "epoch": 1.35, "grad_norm": 7.114871848618827, "learning_rate": 6.729329124205897e-06, "loss": 0.8389, "step": 31815 }, { "epoch": 1.35, "grad_norm": 5.939740472556877, "learning_rate": 6.7281739240310294e-06, "loss": 0.8285, "step": 31820 }, { "epoch": 1.35, "grad_norm": 5.033439218103713, "learning_rate": 6.727018619082068e-06, "loss": 0.8475, "step": 31825 }, { "epoch": 1.35, "grad_norm": 5.808520457385149, "learning_rate": 6.725863209429055e-06, "loss": 0.8773, "step": 31830 }, { "epoch": 1.35, "grad_norm": 4.3878602119783565, "learning_rate": 6.72470769514204e-06, "loss": 0.7937, "step": 31835 }, { "epoch": 1.35, "grad_norm": 11.274461854370793, "learning_rate": 6.7235520762910775e-06, "loss": 0.8429, "step": 31840 }, { "epoch": 1.35, "grad_norm": 4.755765416077909, "learning_rate": 6.722396352946228e-06, "loss": 0.8248, "step": 31845 }, { "epoch": 1.35, "grad_norm": 9.860363697027857, "learning_rate": 6.721240525177562e-06, "loss": 0.8068, "step": 31850 }, { "epoch": 1.35, "grad_norm": 18.114145051573207, "learning_rate": 6.720084593055154e-06, "loss": 0.8431, "step": 31855 }, { "epoch": 1.35, "grad_norm": 17.16302310924887, "learning_rate": 6.718928556649082e-06, "loss": 0.8022, "step": 31860 }, { "epoch": 1.35, "grad_norm": 4.767305281369644, "learning_rate": 6.717772416029436e-06, "loss": 0.7999, "step": 31865 }, { "epoch": 1.35, "grad_norm": 5.741653233161158, "learning_rate": 6.716616171266308e-06, "loss": 0.8258, "step": 31870 }, { "epoch": 1.35, "grad_norm": 10.462555329349241, "learning_rate": 6.715459822429799e-06, "loss": 0.8056, "step": 31875 }, { "epoch": 1.35, "grad_norm": 9.701989509259906, "learning_rate": 6.714303369590014e-06, "loss": 0.8043, "step": 31880 }, { "epoch": 1.35, "grad_norm": 8.567365458568052, "learning_rate": 6.713146812817064e-06, "loss": 0.7893, "step": 31885 }, { "epoch": 1.35, "grad_norm": 19.794658080432235, "learning_rate": 6.71199015218107e-06, "loss": 0.8277, "step": 31890 }, { "epoch": 1.35, "grad_norm": 9.685438673188287, "learning_rate": 6.710833387752156e-06, "loss": 0.779, "step": 31895 }, { "epoch": 1.35, "grad_norm": 5.298095262023212, "learning_rate": 6.709676519600452e-06, "loss": 0.8256, "step": 31900 }, { "epoch": 1.35, "grad_norm": 12.48602344525622, "learning_rate": 6.7085195477960976e-06, "loss": 0.8383, "step": 31905 }, { "epoch": 1.35, "grad_norm": 4.767572478989148, "learning_rate": 6.707362472409234e-06, "loss": 0.7929, "step": 31910 }, { "epoch": 1.35, "grad_norm": 7.309513982109161, "learning_rate": 6.706205293510015e-06, "loss": 0.8418, "step": 31915 }, { "epoch": 1.35, "grad_norm": 5.306730080758769, "learning_rate": 6.705048011168593e-06, "loss": 0.8337, "step": 31920 }, { "epoch": 1.35, "grad_norm": 6.682741573736404, "learning_rate": 6.703890625455134e-06, "loss": 0.8474, "step": 31925 }, { "epoch": 1.35, "grad_norm": 5.602086176561714, "learning_rate": 6.702733136439805e-06, "loss": 0.818, "step": 31930 }, { "epoch": 1.35, "grad_norm": 9.483415185946203, "learning_rate": 6.701575544192781e-06, "loss": 0.8338, "step": 31935 }, { "epoch": 1.35, "grad_norm": 5.170168058882046, "learning_rate": 6.7004178487842456e-06, "loss": 0.7898, "step": 31940 }, { "epoch": 1.35, "grad_norm": 12.91649822469014, "learning_rate": 6.699260050284382e-06, "loss": 0.8455, "step": 31945 }, { "epoch": 1.35, "grad_norm": 7.6715116268511405, "learning_rate": 6.698102148763389e-06, "loss": 0.8018, "step": 31950 }, { "epoch": 1.35, "grad_norm": 6.2767965603169396, "learning_rate": 6.696944144291464e-06, "loss": 0.8078, "step": 31955 }, { "epoch": 1.35, "grad_norm": 4.6326577606380654, "learning_rate": 6.695786036938814e-06, "loss": 0.7884, "step": 31960 }, { "epoch": 1.35, "grad_norm": 5.289676632047995, "learning_rate": 6.694627826775651e-06, "loss": 0.7961, "step": 31965 }, { "epoch": 1.35, "grad_norm": 5.1555033170089555, "learning_rate": 6.693469513872196e-06, "loss": 0.8206, "step": 31970 }, { "epoch": 1.35, "grad_norm": 7.183753112534834, "learning_rate": 6.692311098298672e-06, "loss": 0.7579, "step": 31975 }, { "epoch": 1.35, "grad_norm": 5.018166063272219, "learning_rate": 6.691152580125311e-06, "loss": 0.8111, "step": 31980 }, { "epoch": 1.35, "grad_norm": 5.173746213642771, "learning_rate": 6.689993959422349e-06, "loss": 0.7949, "step": 31985 }, { "epoch": 1.35, "grad_norm": 8.853147095611986, "learning_rate": 6.688835236260033e-06, "loss": 0.7895, "step": 31990 }, { "epoch": 1.35, "grad_norm": 6.250482274041793, "learning_rate": 6.687676410708611e-06, "loss": 0.7942, "step": 31995 }, { "epoch": 1.35, "grad_norm": 13.971344879746761, "learning_rate": 6.68651748283834e-06, "loss": 0.8175, "step": 32000 }, { "epoch": 1.35, "grad_norm": 6.977647652881739, "learning_rate": 6.68535845271948e-06, "loss": 0.8118, "step": 32005 }, { "epoch": 1.35, "grad_norm": 4.235039549092665, "learning_rate": 6.684199320422303e-06, "loss": 0.8022, "step": 32010 }, { "epoch": 1.35, "grad_norm": 4.290900853102789, "learning_rate": 6.6830400860170806e-06, "loss": 0.7892, "step": 32015 }, { "epoch": 1.36, "grad_norm": 4.680708863022627, "learning_rate": 6.681880749574098e-06, "loss": 0.795, "step": 32020 }, { "epoch": 1.36, "grad_norm": 6.290484996625954, "learning_rate": 6.6807213111636385e-06, "loss": 0.7846, "step": 32025 }, { "epoch": 1.36, "grad_norm": 5.453024347844048, "learning_rate": 6.679561770855996e-06, "loss": 0.7906, "step": 32030 }, { "epoch": 1.36, "grad_norm": 16.617071394365883, "learning_rate": 6.678402128721472e-06, "loss": 0.8228, "step": 32035 }, { "epoch": 1.36, "grad_norm": 7.750826967654463, "learning_rate": 6.677242384830369e-06, "loss": 0.83, "step": 32040 }, { "epoch": 1.36, "grad_norm": 7.627319244546042, "learning_rate": 6.676082539253002e-06, "loss": 0.7833, "step": 32045 }, { "epoch": 1.36, "grad_norm": 9.238031146101363, "learning_rate": 6.674922592059686e-06, "loss": 0.8148, "step": 32050 }, { "epoch": 1.36, "grad_norm": 9.823354439990842, "learning_rate": 6.673762543320749e-06, "loss": 0.8188, "step": 32055 }, { "epoch": 1.36, "grad_norm": 5.879124291301238, "learning_rate": 6.672602393106518e-06, "loss": 0.7962, "step": 32060 }, { "epoch": 1.36, "grad_norm": 10.836026514565734, "learning_rate": 6.67144214148733e-06, "loss": 0.8163, "step": 32065 }, { "epoch": 1.36, "grad_norm": 5.396220305766949, "learning_rate": 6.67028178853353e-06, "loss": 0.8057, "step": 32070 }, { "epoch": 1.36, "grad_norm": 4.4991739663200665, "learning_rate": 6.669121334315465e-06, "loss": 0.7804, "step": 32075 }, { "epoch": 1.36, "grad_norm": 10.449682550297233, "learning_rate": 6.667960778903489e-06, "loss": 0.7993, "step": 32080 }, { "epoch": 1.36, "grad_norm": 12.056767609423206, "learning_rate": 6.666800122367964e-06, "loss": 0.7833, "step": 32085 }, { "epoch": 1.36, "grad_norm": 11.56840000003019, "learning_rate": 6.665639364779259e-06, "loss": 0.8524, "step": 32090 }, { "epoch": 1.36, "grad_norm": 8.607816670368353, "learning_rate": 6.664478506207743e-06, "loss": 0.7912, "step": 32095 }, { "epoch": 1.36, "grad_norm": 17.68101572851812, "learning_rate": 6.663317546723801e-06, "loss": 0.8102, "step": 32100 }, { "epoch": 1.36, "grad_norm": 11.92862765264717, "learning_rate": 6.6621564863978135e-06, "loss": 0.7908, "step": 32105 }, { "epoch": 1.36, "grad_norm": 21.238242795003643, "learning_rate": 6.660995325300175e-06, "loss": 0.7893, "step": 32110 }, { "epoch": 1.36, "grad_norm": 10.322622829820148, "learning_rate": 6.659834063501282e-06, "loss": 0.8, "step": 32115 }, { "epoch": 1.36, "grad_norm": 6.277311317931259, "learning_rate": 6.6586727010715415e-06, "loss": 0.8447, "step": 32120 }, { "epoch": 1.36, "grad_norm": 4.693527108013454, "learning_rate": 6.657511238081358e-06, "loss": 0.793, "step": 32125 }, { "epoch": 1.36, "grad_norm": 4.5672305348038345, "learning_rate": 6.656349674601152e-06, "loss": 0.7956, "step": 32130 }, { "epoch": 1.36, "grad_norm": 5.402174255570077, "learning_rate": 6.655188010701343e-06, "loss": 0.8127, "step": 32135 }, { "epoch": 1.36, "grad_norm": 12.081318049278226, "learning_rate": 6.654026246452361e-06, "loss": 0.7536, "step": 32140 }, { "epoch": 1.36, "grad_norm": 8.81285903572821, "learning_rate": 6.6528643819246395e-06, "loss": 0.7832, "step": 32145 }, { "epoch": 1.36, "grad_norm": 5.08482005069644, "learning_rate": 6.651702417188619e-06, "loss": 0.8138, "step": 32150 }, { "epoch": 1.36, "grad_norm": 9.846238044570578, "learning_rate": 6.650540352314747e-06, "loss": 0.7794, "step": 32155 }, { "epoch": 1.36, "grad_norm": 8.144626082558325, "learning_rate": 6.6493781873734755e-06, "loss": 0.8291, "step": 32160 }, { "epoch": 1.36, "grad_norm": 7.047689191069621, "learning_rate": 6.6482159224352615e-06, "loss": 0.7648, "step": 32165 }, { "epoch": 1.36, "grad_norm": 8.111607108198218, "learning_rate": 6.647053557570573e-06, "loss": 0.8132, "step": 32170 }, { "epoch": 1.36, "grad_norm": 5.472533579474425, "learning_rate": 6.645891092849877e-06, "loss": 0.7966, "step": 32175 }, { "epoch": 1.36, "grad_norm": 6.631407652063096, "learning_rate": 6.644728528343653e-06, "loss": 0.7741, "step": 32180 }, { "epoch": 1.36, "grad_norm": 4.765140595486738, "learning_rate": 6.643565864122384e-06, "loss": 0.7683, "step": 32185 }, { "epoch": 1.36, "grad_norm": 6.887997941692297, "learning_rate": 6.642403100256556e-06, "loss": 0.7975, "step": 32190 }, { "epoch": 1.36, "grad_norm": 5.2645799983892445, "learning_rate": 6.6412402368166675e-06, "loss": 0.8247, "step": 32195 }, { "epoch": 1.36, "grad_norm": 9.461615499203763, "learning_rate": 6.640077273873217e-06, "loss": 0.8026, "step": 32200 }, { "epoch": 1.36, "grad_norm": 7.594891055826621, "learning_rate": 6.638914211496712e-06, "loss": 0.8077, "step": 32205 }, { "epoch": 1.36, "grad_norm": 4.787958265610801, "learning_rate": 6.637751049757666e-06, "loss": 0.8466, "step": 32210 }, { "epoch": 1.36, "grad_norm": 12.930173160126325, "learning_rate": 6.6365877887265996e-06, "loss": 0.8249, "step": 32215 }, { "epoch": 1.36, "grad_norm": 4.560529549866392, "learning_rate": 6.635424428474035e-06, "loss": 0.8025, "step": 32220 }, { "epoch": 1.36, "grad_norm": 9.00697433640159, "learning_rate": 6.634260969070505e-06, "loss": 0.7966, "step": 32225 }, { "epoch": 1.36, "grad_norm": 4.437825235323011, "learning_rate": 6.633097410586546e-06, "loss": 0.7504, "step": 32230 }, { "epoch": 1.36, "grad_norm": 9.830814334802387, "learning_rate": 6.631933753092701e-06, "loss": 0.7846, "step": 32235 }, { "epoch": 1.36, "grad_norm": 4.353841662231677, "learning_rate": 6.63076999665952e-06, "loss": 0.7476, "step": 32240 }, { "epoch": 1.36, "grad_norm": 4.300028442633021, "learning_rate": 6.629606141357558e-06, "loss": 0.8064, "step": 32245 }, { "epoch": 1.36, "grad_norm": 4.673621634256113, "learning_rate": 6.628442187257376e-06, "loss": 0.8188, "step": 32250 }, { "epoch": 1.37, "grad_norm": 6.183244474109752, "learning_rate": 6.62727813442954e-06, "loss": 0.7883, "step": 32255 }, { "epoch": 1.37, "grad_norm": 4.792167352539829, "learning_rate": 6.626113982944624e-06, "loss": 0.8107, "step": 32260 }, { "epoch": 1.37, "grad_norm": 4.570759933467777, "learning_rate": 6.624949732873208e-06, "loss": 0.7867, "step": 32265 }, { "epoch": 1.37, "grad_norm": 4.631323941883451, "learning_rate": 6.6237853842858765e-06, "loss": 0.8215, "step": 32270 }, { "epoch": 1.37, "grad_norm": 9.801800503868858, "learning_rate": 6.622620937253219e-06, "loss": 0.8718, "step": 32275 }, { "epoch": 1.37, "grad_norm": 5.473835377380429, "learning_rate": 6.621456391845835e-06, "loss": 0.8327, "step": 32280 }, { "epoch": 1.37, "grad_norm": 4.480106037183473, "learning_rate": 6.620291748134325e-06, "loss": 0.7819, "step": 32285 }, { "epoch": 1.37, "grad_norm": 5.17451115173463, "learning_rate": 6.6191270061893e-06, "loss": 0.7802, "step": 32290 }, { "epoch": 1.37, "grad_norm": 5.728453902835553, "learning_rate": 6.617962166081372e-06, "loss": 0.8007, "step": 32295 }, { "epoch": 1.37, "grad_norm": 7.4990375782794505, "learning_rate": 6.616797227881165e-06, "loss": 0.8069, "step": 32300 }, { "epoch": 1.37, "grad_norm": 4.714997479769019, "learning_rate": 6.615632191659306e-06, "loss": 0.8232, "step": 32305 }, { "epoch": 1.37, "grad_norm": 18.637504225427705, "learning_rate": 6.614467057486423e-06, "loss": 0.7999, "step": 32310 }, { "epoch": 1.37, "grad_norm": 18.40429884734917, "learning_rate": 6.613301825433161e-06, "loss": 0.8194, "step": 32315 }, { "epoch": 1.37, "grad_norm": 5.099649593287371, "learning_rate": 6.612136495570159e-06, "loss": 0.7648, "step": 32320 }, { "epoch": 1.37, "grad_norm": 5.2940724434682975, "learning_rate": 6.610971067968071e-06, "loss": 0.7721, "step": 32325 }, { "epoch": 1.37, "grad_norm": 5.1509816617884905, "learning_rate": 6.609805542697553e-06, "loss": 0.755, "step": 32330 }, { "epoch": 1.37, "grad_norm": 5.161805000881345, "learning_rate": 6.608639919829265e-06, "loss": 0.8172, "step": 32335 }, { "epoch": 1.37, "grad_norm": 5.367467846373024, "learning_rate": 6.607474199433879e-06, "loss": 0.7779, "step": 32340 }, { "epoch": 1.37, "grad_norm": 12.563513436686147, "learning_rate": 6.606308381582067e-06, "loss": 0.8556, "step": 32345 }, { "epoch": 1.37, "grad_norm": 7.2996953397330815, "learning_rate": 6.605142466344509e-06, "loss": 0.8194, "step": 32350 }, { "epoch": 1.37, "grad_norm": 4.9488242763629735, "learning_rate": 6.60397645379189e-06, "loss": 0.8183, "step": 32355 }, { "epoch": 1.37, "grad_norm": 7.073979705043436, "learning_rate": 6.602810343994905e-06, "loss": 0.808, "step": 32360 }, { "epoch": 1.37, "grad_norm": 5.573140832652359, "learning_rate": 6.6016441370242505e-06, "loss": 0.7939, "step": 32365 }, { "epoch": 1.37, "grad_norm": 13.969276038631799, "learning_rate": 6.600477832950628e-06, "loss": 0.8084, "step": 32370 }, { "epoch": 1.37, "grad_norm": 14.963157480677271, "learning_rate": 6.59931143184475e-06, "loss": 0.7922, "step": 32375 }, { "epoch": 1.37, "grad_norm": 5.198505535026127, "learning_rate": 6.59814493377733e-06, "loss": 0.8133, "step": 32380 }, { "epoch": 1.37, "grad_norm": 4.643582439644814, "learning_rate": 6.596978338819091e-06, "loss": 0.7805, "step": 32385 }, { "epoch": 1.37, "grad_norm": 5.385407765723516, "learning_rate": 6.595811647040759e-06, "loss": 0.7762, "step": 32390 }, { "epoch": 1.37, "grad_norm": 4.5015323830659995, "learning_rate": 6.5946448585130676e-06, "loss": 0.7523, "step": 32395 }, { "epoch": 1.37, "grad_norm": 5.159939330906181, "learning_rate": 6.593477973306756e-06, "loss": 0.8023, "step": 32400 }, { "epoch": 1.37, "grad_norm": 6.11369429154442, "learning_rate": 6.5923109914925684e-06, "loss": 0.804, "step": 32405 }, { "epoch": 1.37, "grad_norm": 6.344438228089828, "learning_rate": 6.5911439131412544e-06, "loss": 0.8225, "step": 32410 }, { "epoch": 1.37, "grad_norm": 7.6978849016135245, "learning_rate": 6.589976738323574e-06, "loss": 0.7852, "step": 32415 }, { "epoch": 1.37, "grad_norm": 17.514977884739444, "learning_rate": 6.5888094671102854e-06, "loss": 0.8106, "step": 32420 }, { "epoch": 1.37, "grad_norm": 4.908595176775958, "learning_rate": 6.58764209957216e-06, "loss": 0.7702, "step": 32425 }, { "epoch": 1.37, "grad_norm": 6.83791187035084, "learning_rate": 6.58647463577997e-06, "loss": 0.8238, "step": 32430 }, { "epoch": 1.37, "grad_norm": 5.778864084982646, "learning_rate": 6.585307075804496e-06, "loss": 0.786, "step": 32435 }, { "epoch": 1.37, "grad_norm": 4.907398784986031, "learning_rate": 6.584139419716526e-06, "loss": 0.7727, "step": 32440 }, { "epoch": 1.37, "grad_norm": 5.66029977546783, "learning_rate": 6.582971667586846e-06, "loss": 0.8112, "step": 32445 }, { "epoch": 1.37, "grad_norm": 9.886202823778657, "learning_rate": 6.581803819486258e-06, "loss": 0.7989, "step": 32450 }, { "epoch": 1.37, "grad_norm": 6.3331709971619885, "learning_rate": 6.5806358754855624e-06, "loss": 0.8022, "step": 32455 }, { "epoch": 1.37, "grad_norm": 9.500697958400549, "learning_rate": 6.579467835655571e-06, "loss": 0.7605, "step": 32460 }, { "epoch": 1.37, "grad_norm": 4.54198043563717, "learning_rate": 6.578299700067098e-06, "loss": 0.8157, "step": 32465 }, { "epoch": 1.37, "grad_norm": 8.31600163649908, "learning_rate": 6.5771314687909605e-06, "loss": 0.828, "step": 32470 }, { "epoch": 1.37, "grad_norm": 10.572914577542127, "learning_rate": 6.57596314189799e-06, "loss": 0.8027, "step": 32475 }, { "epoch": 1.37, "grad_norm": 15.188386766580155, "learning_rate": 6.574794719459014e-06, "loss": 0.7974, "step": 32480 }, { "epoch": 1.37, "grad_norm": 5.662823127094822, "learning_rate": 6.573626201544876e-06, "loss": 0.8467, "step": 32485 }, { "epoch": 1.38, "grad_norm": 14.081916121729945, "learning_rate": 6.572457588226415e-06, "loss": 0.7898, "step": 32490 }, { "epoch": 1.38, "grad_norm": 18.785464812145943, "learning_rate": 6.571288879574482e-06, "loss": 0.8673, "step": 32495 }, { "epoch": 1.38, "grad_norm": 5.755488643781528, "learning_rate": 6.570120075659934e-06, "loss": 0.7964, "step": 32500 }, { "epoch": 1.38, "grad_norm": 7.166809908757483, "learning_rate": 6.568951176553628e-06, "loss": 0.824, "step": 32505 }, { "epoch": 1.38, "grad_norm": 5.307587071819875, "learning_rate": 6.567782182326436e-06, "loss": 0.7766, "step": 32510 }, { "epoch": 1.38, "grad_norm": 5.83883578174952, "learning_rate": 6.566613093049228e-06, "loss": 0.7994, "step": 32515 }, { "epoch": 1.38, "grad_norm": 8.146078237845996, "learning_rate": 6.565443908792883e-06, "loss": 0.7952, "step": 32520 }, { "epoch": 1.38, "grad_norm": 11.816944415709806, "learning_rate": 6.564274629628284e-06, "loss": 0.8041, "step": 32525 }, { "epoch": 1.38, "grad_norm": 15.282933831006238, "learning_rate": 6.563105255626323e-06, "loss": 0.7669, "step": 32530 }, { "epoch": 1.38, "grad_norm": 5.666109573171221, "learning_rate": 6.561935786857893e-06, "loss": 0.8105, "step": 32535 }, { "epoch": 1.38, "grad_norm": 5.6800349433169925, "learning_rate": 6.560766223393898e-06, "loss": 0.7784, "step": 32540 }, { "epoch": 1.38, "grad_norm": 4.496286880645884, "learning_rate": 6.559596565305245e-06, "loss": 0.8262, "step": 32545 }, { "epoch": 1.38, "grad_norm": 9.879429061928658, "learning_rate": 6.558426812662846e-06, "loss": 0.7936, "step": 32550 }, { "epoch": 1.38, "grad_norm": 11.119821893339374, "learning_rate": 6.55725696553762e-06, "loss": 0.8015, "step": 32555 }, { "epoch": 1.38, "grad_norm": 9.9960900871121, "learning_rate": 6.556087024000491e-06, "loss": 0.7697, "step": 32560 }, { "epoch": 1.38, "grad_norm": 4.361444628954888, "learning_rate": 6.554916988122388e-06, "loss": 0.7796, "step": 32565 }, { "epoch": 1.38, "grad_norm": 14.211284584648379, "learning_rate": 6.553746857974249e-06, "loss": 0.8043, "step": 32570 }, { "epoch": 1.38, "grad_norm": 8.360755360936613, "learning_rate": 6.552576633627016e-06, "loss": 0.825, "step": 32575 }, { "epoch": 1.38, "grad_norm": 8.894446346638047, "learning_rate": 6.551406315151633e-06, "loss": 0.8297, "step": 32580 }, { "epoch": 1.38, "grad_norm": 10.420766007658528, "learning_rate": 6.550235902619055e-06, "loss": 0.7941, "step": 32585 }, { "epoch": 1.38, "grad_norm": 16.414845560769777, "learning_rate": 6.549065396100241e-06, "loss": 0.8052, "step": 32590 }, { "epoch": 1.38, "grad_norm": 5.392968661482823, "learning_rate": 6.547894795666156e-06, "loss": 0.7983, "step": 32595 }, { "epoch": 1.38, "grad_norm": 11.40753938346193, "learning_rate": 6.546724101387769e-06, "loss": 0.8255, "step": 32600 }, { "epoch": 1.38, "grad_norm": 9.237436926703271, "learning_rate": 6.545553313336055e-06, "loss": 0.7818, "step": 32605 }, { "epoch": 1.38, "grad_norm": 8.162929333120063, "learning_rate": 6.544382431581997e-06, "loss": 0.8344, "step": 32610 }, { "epoch": 1.38, "grad_norm": 6.813629755858621, "learning_rate": 6.54321145619658e-06, "loss": 0.826, "step": 32615 }, { "epoch": 1.38, "grad_norm": 15.184581253096587, "learning_rate": 6.542040387250799e-06, "loss": 0.8154, "step": 32620 }, { "epoch": 1.38, "grad_norm": 9.568646136670125, "learning_rate": 6.54086922481565e-06, "loss": 0.8408, "step": 32625 }, { "epoch": 1.38, "grad_norm": 8.703307532363448, "learning_rate": 6.539697968962141e-06, "loss": 0.7795, "step": 32630 }, { "epoch": 1.38, "grad_norm": 10.042921483392426, "learning_rate": 6.538526619761279e-06, "loss": 0.7783, "step": 32635 }, { "epoch": 1.38, "grad_norm": 8.396146155657716, "learning_rate": 6.5373551772840786e-06, "loss": 0.7911, "step": 32640 }, { "epoch": 1.38, "grad_norm": 8.770082468006368, "learning_rate": 6.536183641601563e-06, "loss": 0.7921, "step": 32645 }, { "epoch": 1.38, "grad_norm": 10.10295234548853, "learning_rate": 6.535012012784758e-06, "loss": 0.8245, "step": 32650 }, { "epoch": 1.38, "grad_norm": 20.26148071786032, "learning_rate": 6.533840290904696e-06, "loss": 0.8252, "step": 32655 }, { "epoch": 1.38, "grad_norm": 22.434907191319688, "learning_rate": 6.532668476032416e-06, "loss": 0.8516, "step": 32660 }, { "epoch": 1.38, "grad_norm": 25.311980031398974, "learning_rate": 6.531496568238959e-06, "loss": 0.8098, "step": 32665 }, { "epoch": 1.38, "grad_norm": 9.580428320557882, "learning_rate": 6.5303245675953784e-06, "loss": 0.8003, "step": 32670 }, { "epoch": 1.38, "grad_norm": 5.371229164114469, "learning_rate": 6.529152474172727e-06, "loss": 0.8219, "step": 32675 }, { "epoch": 1.38, "grad_norm": 7.770562744930191, "learning_rate": 6.527980288042065e-06, "loss": 0.8192, "step": 32680 }, { "epoch": 1.38, "grad_norm": 7.029725948565768, "learning_rate": 6.526808009274459e-06, "loss": 0.8362, "step": 32685 }, { "epoch": 1.38, "grad_norm": 7.7190400686814185, "learning_rate": 6.52563563794098e-06, "loss": 0.7996, "step": 32690 }, { "epoch": 1.38, "grad_norm": 8.036272784128906, "learning_rate": 6.5244631741127075e-06, "loss": 0.797, "step": 32695 }, { "epoch": 1.38, "grad_norm": 7.8832423450680675, "learning_rate": 6.52329061786072e-06, "loss": 0.8033, "step": 32700 }, { "epoch": 1.38, "grad_norm": 6.469805645486168, "learning_rate": 6.522117969256113e-06, "loss": 0.7837, "step": 32705 }, { "epoch": 1.38, "grad_norm": 5.756403888206813, "learning_rate": 6.520945228369976e-06, "loss": 0.8233, "step": 32710 }, { "epoch": 1.38, "grad_norm": 5.406053134866068, "learning_rate": 6.519772395273409e-06, "loss": 0.7705, "step": 32715 }, { "epoch": 1.38, "grad_norm": 6.10376853462327, "learning_rate": 6.518599470037519e-06, "loss": 0.7986, "step": 32720 }, { "epoch": 1.38, "grad_norm": 4.431267666400869, "learning_rate": 6.517426452733416e-06, "loss": 0.7989, "step": 32725 }, { "epoch": 1.39, "grad_norm": 8.482098569642227, "learning_rate": 6.516253343432218e-06, "loss": 0.8006, "step": 32730 }, { "epoch": 1.39, "grad_norm": 4.557287201702734, "learning_rate": 6.5150801422050445e-06, "loss": 0.7984, "step": 32735 }, { "epoch": 1.39, "grad_norm": 14.528786229154706, "learning_rate": 6.513906849123025e-06, "loss": 0.8252, "step": 32740 }, { "epoch": 1.39, "grad_norm": 18.49454056620756, "learning_rate": 6.512733464257293e-06, "loss": 0.7925, "step": 32745 }, { "epoch": 1.39, "grad_norm": 5.602707025626699, "learning_rate": 6.511559987678986e-06, "loss": 0.8401, "step": 32750 }, { "epoch": 1.39, "grad_norm": 6.19021634647184, "learning_rate": 6.5103864194592514e-06, "loss": 0.7782, "step": 32755 }, { "epoch": 1.39, "grad_norm": 4.666238686300844, "learning_rate": 6.509212759669237e-06, "loss": 0.8023, "step": 32760 }, { "epoch": 1.39, "grad_norm": 4.3126717746543575, "learning_rate": 6.508039008380097e-06, "loss": 0.8175, "step": 32765 }, { "epoch": 1.39, "grad_norm": 7.775910729010544, "learning_rate": 6.506865165662996e-06, "loss": 0.8035, "step": 32770 }, { "epoch": 1.39, "grad_norm": 6.38812005156396, "learning_rate": 6.5056912315890965e-06, "loss": 0.7921, "step": 32775 }, { "epoch": 1.39, "grad_norm": 4.748458645809002, "learning_rate": 6.504517206229574e-06, "loss": 0.787, "step": 32780 }, { "epoch": 1.39, "grad_norm": 22.702776966715145, "learning_rate": 6.503343089655603e-06, "loss": 0.7975, "step": 32785 }, { "epoch": 1.39, "grad_norm": 15.95715238560528, "learning_rate": 6.502168881938371e-06, "loss": 0.7758, "step": 32790 }, { "epoch": 1.39, "grad_norm": 16.985775748563615, "learning_rate": 6.500994583149064e-06, "loss": 0.8131, "step": 32795 }, { "epoch": 1.39, "grad_norm": 9.03461569930784, "learning_rate": 6.499820193358876e-06, "loss": 0.8242, "step": 32800 }, { "epoch": 1.39, "grad_norm": 17.662827897229604, "learning_rate": 6.498645712639009e-06, "loss": 0.8076, "step": 32805 }, { "epoch": 1.39, "grad_norm": 9.560416102776916, "learning_rate": 6.497471141060665e-06, "loss": 0.7785, "step": 32810 }, { "epoch": 1.39, "grad_norm": 8.372175709025138, "learning_rate": 6.4962964786950575e-06, "loss": 0.7569, "step": 32815 }, { "epoch": 1.39, "grad_norm": 4.437434248339504, "learning_rate": 6.495121725613402e-06, "loss": 0.8259, "step": 32820 }, { "epoch": 1.39, "grad_norm": 4.539643541515003, "learning_rate": 6.49394688188692e-06, "loss": 0.8098, "step": 32825 }, { "epoch": 1.39, "grad_norm": 7.9717784472017845, "learning_rate": 6.49277194758684e-06, "loss": 0.7639, "step": 32830 }, { "epoch": 1.39, "grad_norm": 6.383438675293918, "learning_rate": 6.491596922784393e-06, "loss": 0.8322, "step": 32835 }, { "epoch": 1.39, "grad_norm": 10.059129819988367, "learning_rate": 6.4904218075508185e-06, "loss": 0.8008, "step": 32840 }, { "epoch": 1.39, "grad_norm": 4.218741220432124, "learning_rate": 6.489246601957361e-06, "loss": 0.7665, "step": 32845 }, { "epoch": 1.39, "grad_norm": 7.475843003876764, "learning_rate": 6.488071306075268e-06, "loss": 0.8084, "step": 32850 }, { "epoch": 1.39, "grad_norm": 8.571811936135655, "learning_rate": 6.486895919975795e-06, "loss": 0.8084, "step": 32855 }, { "epoch": 1.39, "grad_norm": 4.851511390449732, "learning_rate": 6.485720443730201e-06, "loss": 0.7944, "step": 32860 }, { "epoch": 1.39, "grad_norm": 4.9321290529365855, "learning_rate": 6.484544877409754e-06, "loss": 0.7924, "step": 32865 }, { "epoch": 1.39, "grad_norm": 4.884257405141088, "learning_rate": 6.483369221085724e-06, "loss": 0.7733, "step": 32870 }, { "epoch": 1.39, "grad_norm": 4.797512815579661, "learning_rate": 6.482193474829387e-06, "loss": 0.7908, "step": 32875 }, { "epoch": 1.39, "grad_norm": 4.438727093393671, "learning_rate": 6.481017638712026e-06, "loss": 0.7749, "step": 32880 }, { "epoch": 1.39, "grad_norm": 5.067616559359568, "learning_rate": 6.479841712804928e-06, "loss": 0.7907, "step": 32885 }, { "epoch": 1.39, "grad_norm": 5.043006135494658, "learning_rate": 6.478665697179386e-06, "loss": 0.8117, "step": 32890 }, { "epoch": 1.39, "grad_norm": 7.1475784821434365, "learning_rate": 6.477489591906697e-06, "loss": 0.8447, "step": 32895 }, { "epoch": 1.39, "grad_norm": 14.548836230878772, "learning_rate": 6.476313397058168e-06, "loss": 0.7743, "step": 32900 }, { "epoch": 1.39, "grad_norm": 7.937513110656233, "learning_rate": 6.475137112705106e-06, "loss": 0.7877, "step": 32905 }, { "epoch": 1.39, "grad_norm": 6.10053323463538, "learning_rate": 6.473960738918826e-06, "loss": 0.7963, "step": 32910 }, { "epoch": 1.39, "grad_norm": 7.692046715571448, "learning_rate": 6.4727842757706474e-06, "loss": 0.8175, "step": 32915 }, { "epoch": 1.39, "grad_norm": 12.912291714255062, "learning_rate": 6.4716077233318965e-06, "loss": 0.8111, "step": 32920 }, { "epoch": 1.39, "grad_norm": 4.605816354637375, "learning_rate": 6.470431081673905e-06, "loss": 0.7596, "step": 32925 }, { "epoch": 1.39, "grad_norm": 4.432619590299122, "learning_rate": 6.469254350868007e-06, "loss": 0.8114, "step": 32930 }, { "epoch": 1.39, "grad_norm": 4.793072470239717, "learning_rate": 6.468077530985547e-06, "loss": 0.838, "step": 32935 }, { "epoch": 1.39, "grad_norm": 4.819586946104696, "learning_rate": 6.46690062209787e-06, "loss": 0.8069, "step": 32940 }, { "epoch": 1.39, "grad_norm": 4.539770561655789, "learning_rate": 6.465723624276329e-06, "loss": 0.7688, "step": 32945 }, { "epoch": 1.39, "grad_norm": 6.46751269024532, "learning_rate": 6.464546537592282e-06, "loss": 0.7979, "step": 32950 }, { "epoch": 1.39, "grad_norm": 6.475284238357774, "learning_rate": 6.463369362117093e-06, "loss": 0.8248, "step": 32955 }, { "epoch": 1.39, "grad_norm": 6.422390208930225, "learning_rate": 6.4621920979221285e-06, "loss": 0.7806, "step": 32960 }, { "epoch": 1.4, "grad_norm": 6.490123617153963, "learning_rate": 6.4610147450787665e-06, "loss": 0.8019, "step": 32965 }, { "epoch": 1.4, "grad_norm": 4.867896980274606, "learning_rate": 6.459837303658382e-06, "loss": 0.807, "step": 32970 }, { "epoch": 1.4, "grad_norm": 9.283771355576128, "learning_rate": 6.4586597737323635e-06, "loss": 0.7448, "step": 32975 }, { "epoch": 1.4, "grad_norm": 6.306157580264152, "learning_rate": 6.4574821553720975e-06, "loss": 0.7908, "step": 32980 }, { "epoch": 1.4, "grad_norm": 6.530920095575955, "learning_rate": 6.456304448648983e-06, "loss": 0.8084, "step": 32985 }, { "epoch": 1.4, "grad_norm": 5.0158768451884805, "learning_rate": 6.4551266536344195e-06, "loss": 0.8082, "step": 32990 }, { "epoch": 1.4, "grad_norm": 4.724381609113554, "learning_rate": 6.453948770399813e-06, "loss": 0.7738, "step": 32995 }, { "epoch": 1.4, "grad_norm": 4.41866028276665, "learning_rate": 6.452770799016575e-06, "loss": 0.8257, "step": 33000 }, { "epoch": 1.4, "grad_norm": 4.9074283272337205, "learning_rate": 6.451592739556123e-06, "loss": 0.7976, "step": 33005 }, { "epoch": 1.4, "grad_norm": 5.9899901727741485, "learning_rate": 6.45041459208988e-06, "loss": 0.7905, "step": 33010 }, { "epoch": 1.4, "grad_norm": 4.99282904133313, "learning_rate": 6.449236356689272e-06, "loss": 0.8027, "step": 33015 }, { "epoch": 1.4, "grad_norm": 6.262841221526618, "learning_rate": 6.448058033425731e-06, "loss": 0.8057, "step": 33020 }, { "epoch": 1.4, "grad_norm": 5.737270791244499, "learning_rate": 6.446879622370699e-06, "loss": 0.7611, "step": 33025 }, { "epoch": 1.4, "grad_norm": 4.991359646274038, "learning_rate": 6.445701123595616e-06, "loss": 0.8194, "step": 33030 }, { "epoch": 1.4, "grad_norm": 5.732435506791664, "learning_rate": 6.444522537171932e-06, "loss": 0.8227, "step": 33035 }, { "epoch": 1.4, "grad_norm": 6.376097302555189, "learning_rate": 6.443343863171103e-06, "loss": 0.8081, "step": 33040 }, { "epoch": 1.4, "grad_norm": 6.271384082786036, "learning_rate": 6.442165101664586e-06, "loss": 0.8102, "step": 33045 }, { "epoch": 1.4, "grad_norm": 5.456062673521002, "learning_rate": 6.440986252723848e-06, "loss": 0.8188, "step": 33050 }, { "epoch": 1.4, "grad_norm": 4.099204094506896, "learning_rate": 6.439807316420357e-06, "loss": 0.7717, "step": 33055 }, { "epoch": 1.4, "grad_norm": 7.318946498438357, "learning_rate": 6.43862829282559e-06, "loss": 0.8177, "step": 33060 }, { "epoch": 1.4, "grad_norm": 7.05900790819711, "learning_rate": 6.437449182011028e-06, "loss": 0.8013, "step": 33065 }, { "epoch": 1.4, "grad_norm": 6.837506999870931, "learning_rate": 6.436269984048154e-06, "loss": 0.7632, "step": 33070 }, { "epoch": 1.4, "grad_norm": 5.76772902700047, "learning_rate": 6.4350906990084625e-06, "loss": 0.8133, "step": 33075 }, { "epoch": 1.4, "grad_norm": 18.121361402924915, "learning_rate": 6.433911326963449e-06, "loss": 0.8207, "step": 33080 }, { "epoch": 1.4, "grad_norm": 12.163078268785592, "learning_rate": 6.432731867984618e-06, "loss": 0.7886, "step": 33085 }, { "epoch": 1.4, "grad_norm": 28.314550616970372, "learning_rate": 6.4315523221434705e-06, "loss": 0.821, "step": 33090 }, { "epoch": 1.4, "grad_norm": 10.949218419785698, "learning_rate": 6.430372689511525e-06, "loss": 0.7796, "step": 33095 }, { "epoch": 1.4, "grad_norm": 6.330186748409118, "learning_rate": 6.429192970160296e-06, "loss": 0.7771, "step": 33100 }, { "epoch": 1.4, "grad_norm": 5.443302786957219, "learning_rate": 6.4280131641613064e-06, "loss": 0.8082, "step": 33105 }, { "epoch": 1.4, "grad_norm": 5.890992968591108, "learning_rate": 6.426833271586086e-06, "loss": 0.8284, "step": 33110 }, { "epoch": 1.4, "grad_norm": 7.816618518452142, "learning_rate": 6.425653292506165e-06, "loss": 0.7816, "step": 33115 }, { "epoch": 1.4, "grad_norm": 5.630217699055115, "learning_rate": 6.424473226993086e-06, "loss": 0.8186, "step": 33120 }, { "epoch": 1.4, "grad_norm": 4.589063235720089, "learning_rate": 6.423293075118392e-06, "loss": 0.7795, "step": 33125 }, { "epoch": 1.4, "grad_norm": 5.61802093160942, "learning_rate": 6.422112836953629e-06, "loss": 0.7753, "step": 33130 }, { "epoch": 1.4, "grad_norm": 14.050159465578894, "learning_rate": 6.4209325125703556e-06, "loss": 0.7584, "step": 33135 }, { "epoch": 1.4, "grad_norm": 4.707300672888438, "learning_rate": 6.419752102040129e-06, "loss": 0.8287, "step": 33140 }, { "epoch": 1.4, "grad_norm": 5.147396651089398, "learning_rate": 6.4185716054345136e-06, "loss": 0.8272, "step": 33145 }, { "epoch": 1.4, "grad_norm": 5.016212261495118, "learning_rate": 6.417391022825082e-06, "loss": 0.7761, "step": 33150 }, { "epoch": 1.4, "grad_norm": 5.292363560199518, "learning_rate": 6.416210354283407e-06, "loss": 0.7786, "step": 33155 }, { "epoch": 1.4, "grad_norm": 4.878868743910045, "learning_rate": 6.415029599881068e-06, "loss": 0.7825, "step": 33160 }, { "epoch": 1.4, "grad_norm": 5.149938082552027, "learning_rate": 6.413848759689654e-06, "loss": 0.7942, "step": 33165 }, { "epoch": 1.4, "grad_norm": 8.207944339852757, "learning_rate": 6.412667833780756e-06, "loss": 0.8172, "step": 33170 }, { "epoch": 1.4, "grad_norm": 10.37069909339702, "learning_rate": 6.4114868222259666e-06, "loss": 0.785, "step": 33175 }, { "epoch": 1.4, "grad_norm": 4.2154684632893105, "learning_rate": 6.410305725096889e-06, "loss": 0.8063, "step": 33180 }, { "epoch": 1.4, "grad_norm": 11.45192542070303, "learning_rate": 6.409124542465129e-06, "loss": 0.7802, "step": 33185 }, { "epoch": 1.4, "grad_norm": 11.921757922717042, "learning_rate": 6.407943274402299e-06, "loss": 0.8465, "step": 33190 }, { "epoch": 1.4, "grad_norm": 5.37072460277755, "learning_rate": 6.406761920980016e-06, "loss": 0.8254, "step": 33195 }, { "epoch": 1.41, "grad_norm": 4.617662375873842, "learning_rate": 6.4055804822699005e-06, "loss": 0.8255, "step": 33200 }, { "epoch": 1.41, "grad_norm": 7.2688111173906105, "learning_rate": 6.404398958343582e-06, "loss": 0.7359, "step": 33205 }, { "epoch": 1.41, "grad_norm": 8.436218622614176, "learning_rate": 6.403217349272691e-06, "loss": 0.7612, "step": 33210 }, { "epoch": 1.41, "grad_norm": 4.851132645174651, "learning_rate": 6.402035655128864e-06, "loss": 0.803, "step": 33215 }, { "epoch": 1.41, "grad_norm": 10.082411408152227, "learning_rate": 6.400853875983747e-06, "loss": 0.8313, "step": 33220 }, { "epoch": 1.41, "grad_norm": 12.6743400529898, "learning_rate": 6.399672011908986e-06, "loss": 0.7687, "step": 33225 }, { "epoch": 1.41, "grad_norm": 4.719349151513585, "learning_rate": 6.398490062976231e-06, "loss": 0.7814, "step": 33230 }, { "epoch": 1.41, "grad_norm": 4.076231193956435, "learning_rate": 6.397308029257146e-06, "loss": 0.8014, "step": 33235 }, { "epoch": 1.41, "grad_norm": 5.434478584553953, "learning_rate": 6.396125910823389e-06, "loss": 0.7796, "step": 33240 }, { "epoch": 1.41, "grad_norm": 8.483799636494808, "learning_rate": 6.394943707746631e-06, "loss": 0.7927, "step": 33245 }, { "epoch": 1.41, "grad_norm": 15.5409722751984, "learning_rate": 6.393761420098546e-06, "loss": 0.7982, "step": 33250 }, { "epoch": 1.41, "grad_norm": 5.8007651998966, "learning_rate": 6.392579047950811e-06, "loss": 0.7798, "step": 33255 }, { "epoch": 1.41, "grad_norm": 13.248744314510748, "learning_rate": 6.391396591375111e-06, "loss": 0.7997, "step": 33260 }, { "epoch": 1.41, "grad_norm": 5.230725644655068, "learning_rate": 6.390214050443132e-06, "loss": 0.8061, "step": 33265 }, { "epoch": 1.41, "grad_norm": 19.291779957251343, "learning_rate": 6.389031425226572e-06, "loss": 0.8385, "step": 33270 }, { "epoch": 1.41, "grad_norm": 15.954965177255726, "learning_rate": 6.387848715797127e-06, "loss": 0.7882, "step": 33275 }, { "epoch": 1.41, "grad_norm": 14.123110945750108, "learning_rate": 6.3866659222265026e-06, "loss": 0.8327, "step": 33280 }, { "epoch": 1.41, "grad_norm": 17.895379547355656, "learning_rate": 6.385483044586409e-06, "loss": 0.7791, "step": 33285 }, { "epoch": 1.41, "grad_norm": 11.318118901413069, "learning_rate": 6.3843000829485576e-06, "loss": 0.7541, "step": 33290 }, { "epoch": 1.41, "grad_norm": 12.013842823949265, "learning_rate": 6.383117037384672e-06, "loss": 0.8191, "step": 33295 }, { "epoch": 1.41, "grad_norm": 15.153129798921043, "learning_rate": 6.381933907966473e-06, "loss": 0.8208, "step": 33300 }, { "epoch": 1.41, "grad_norm": 19.808212049826725, "learning_rate": 6.380750694765692e-06, "loss": 0.7517, "step": 33305 }, { "epoch": 1.41, "grad_norm": 5.728000212955964, "learning_rate": 6.379567397854065e-06, "loss": 0.7933, "step": 33310 }, { "epoch": 1.41, "grad_norm": 9.240451849535159, "learning_rate": 6.378384017303327e-06, "loss": 0.81, "step": 33315 }, { "epoch": 1.41, "grad_norm": 7.939889136556447, "learning_rate": 6.3772005531852295e-06, "loss": 0.8212, "step": 33320 }, { "epoch": 1.41, "grad_norm": 4.358252774736982, "learning_rate": 6.376017005571518e-06, "loss": 0.7655, "step": 33325 }, { "epoch": 1.41, "grad_norm": 5.220183882948222, "learning_rate": 6.374833374533949e-06, "loss": 0.7903, "step": 33330 }, { "epoch": 1.41, "grad_norm": 4.928255244762764, "learning_rate": 6.373649660144281e-06, "loss": 0.7766, "step": 33335 }, { "epoch": 1.41, "grad_norm": 8.0141816038703, "learning_rate": 6.372465862474282e-06, "loss": 0.8497, "step": 33340 }, { "epoch": 1.41, "grad_norm": 4.483851405393595, "learning_rate": 6.371281981595719e-06, "loss": 0.7836, "step": 33345 }, { "epoch": 1.41, "grad_norm": 6.344566871669768, "learning_rate": 6.370098017580369e-06, "loss": 0.7768, "step": 33350 }, { "epoch": 1.41, "grad_norm": 4.714596951241924, "learning_rate": 6.368913970500013e-06, "loss": 0.8153, "step": 33355 }, { "epoch": 1.41, "grad_norm": 9.943582547085338, "learning_rate": 6.3677298404264335e-06, "loss": 0.806, "step": 33360 }, { "epoch": 1.41, "grad_norm": 6.7785065165581875, "learning_rate": 6.366545627431424e-06, "loss": 0.7838, "step": 33365 }, { "epoch": 1.41, "grad_norm": 5.010861643261537, "learning_rate": 6.365361331586778e-06, "loss": 0.7871, "step": 33370 }, { "epoch": 1.41, "grad_norm": 11.887829757289023, "learning_rate": 6.3641769529642964e-06, "loss": 0.8005, "step": 33375 }, { "epoch": 1.41, "grad_norm": 4.764175795180371, "learning_rate": 6.3629924916357845e-06, "loss": 0.8547, "step": 33380 }, { "epoch": 1.41, "grad_norm": 7.758475788918587, "learning_rate": 6.361807947673051e-06, "loss": 0.7809, "step": 33385 }, { "epoch": 1.41, "grad_norm": 8.382764633663472, "learning_rate": 6.360623321147914e-06, "loss": 0.7832, "step": 33390 }, { "epoch": 1.41, "grad_norm": 6.191875748999542, "learning_rate": 6.359438612132194e-06, "loss": 0.8141, "step": 33395 }, { "epoch": 1.41, "grad_norm": 5.410148922650489, "learning_rate": 6.358253820697714e-06, "loss": 0.7731, "step": 33400 }, { "epoch": 1.41, "grad_norm": 4.330549440811483, "learning_rate": 6.357068946916307e-06, "loss": 0.8004, "step": 33405 }, { "epoch": 1.41, "grad_norm": 7.97921071145875, "learning_rate": 6.3558839908598055e-06, "loss": 0.8369, "step": 33410 }, { "epoch": 1.41, "grad_norm": 4.6257965337114575, "learning_rate": 6.354698952600053e-06, "loss": 0.7489, "step": 33415 }, { "epoch": 1.41, "grad_norm": 8.38487796644744, "learning_rate": 6.353513832208893e-06, "loss": 0.8001, "step": 33420 }, { "epoch": 1.41, "grad_norm": 6.684241706869719, "learning_rate": 6.352328629758176e-06, "loss": 0.7593, "step": 33425 }, { "epoch": 1.41, "grad_norm": 4.1881531758514905, "learning_rate": 6.3511433453197575e-06, "loss": 0.7582, "step": 33430 }, { "epoch": 1.42, "grad_norm": 4.04387711529924, "learning_rate": 6.349957978965497e-06, "loss": 0.8078, "step": 33435 }, { "epoch": 1.42, "grad_norm": 15.097050926566288, "learning_rate": 6.348772530767263e-06, "loss": 0.8075, "step": 33440 }, { "epoch": 1.42, "grad_norm": 5.17846601168293, "learning_rate": 6.347587000796922e-06, "loss": 0.7599, "step": 33445 }, { "epoch": 1.42, "grad_norm": 12.224113310524332, "learning_rate": 6.346401389126351e-06, "loss": 0.7499, "step": 33450 }, { "epoch": 1.42, "grad_norm": 5.185016709481542, "learning_rate": 6.345215695827429e-06, "loss": 0.8238, "step": 33455 }, { "epoch": 1.42, "grad_norm": 5.8090163457243165, "learning_rate": 6.344029920972042e-06, "loss": 0.7777, "step": 33460 }, { "epoch": 1.42, "grad_norm": 12.755642457408706, "learning_rate": 6.342844064632081e-06, "loss": 0.8175, "step": 33465 }, { "epoch": 1.42, "grad_norm": 18.142957836402566, "learning_rate": 6.341658126879438e-06, "loss": 0.8104, "step": 33470 }, { "epoch": 1.42, "grad_norm": 6.376409497988296, "learning_rate": 6.3404721077860155e-06, "loss": 0.7643, "step": 33475 }, { "epoch": 1.42, "grad_norm": 9.703728206972405, "learning_rate": 6.339286007423718e-06, "loss": 0.7743, "step": 33480 }, { "epoch": 1.42, "grad_norm": 9.314511191441458, "learning_rate": 6.338099825864455e-06, "loss": 0.7653, "step": 33485 }, { "epoch": 1.42, "grad_norm": 13.324673040779766, "learning_rate": 6.3369135631801415e-06, "loss": 0.8298, "step": 33490 }, { "epoch": 1.42, "grad_norm": 5.39915612087775, "learning_rate": 6.335727219442696e-06, "loss": 0.772, "step": 33495 }, { "epoch": 1.42, "grad_norm": 7.679108621575984, "learning_rate": 6.334540794724043e-06, "loss": 0.7848, "step": 33500 }, { "epoch": 1.42, "grad_norm": 4.323469051210022, "learning_rate": 6.333354289096113e-06, "loss": 0.8046, "step": 33505 }, { "epoch": 1.42, "grad_norm": 10.796342203657705, "learning_rate": 6.3321677026308406e-06, "loss": 0.7659, "step": 33510 }, { "epoch": 1.42, "grad_norm": 7.9143980655472905, "learning_rate": 6.3309810354001646e-06, "loss": 0.7437, "step": 33515 }, { "epoch": 1.42, "grad_norm": 5.096871757892698, "learning_rate": 6.3297942874760275e-06, "loss": 0.7913, "step": 33520 }, { "epoch": 1.42, "grad_norm": 4.783201381586201, "learning_rate": 6.328607458930381e-06, "loss": 0.762, "step": 33525 }, { "epoch": 1.42, "grad_norm": 7.304642127169017, "learning_rate": 6.327420549835178e-06, "loss": 0.8302, "step": 33530 }, { "epoch": 1.42, "grad_norm": 5.123473305508212, "learning_rate": 6.3262335602623755e-06, "loss": 0.8062, "step": 33535 }, { "epoch": 1.42, "grad_norm": 5.328024947408747, "learning_rate": 6.32504649028394e-06, "loss": 0.7547, "step": 33540 }, { "epoch": 1.42, "grad_norm": 5.497910507077813, "learning_rate": 6.323859339971839e-06, "loss": 0.7605, "step": 33545 }, { "epoch": 1.42, "grad_norm": 4.957325703067418, "learning_rate": 6.3226721093980445e-06, "loss": 0.7616, "step": 33550 }, { "epoch": 1.42, "grad_norm": 13.985064366163243, "learning_rate": 6.321484798634538e-06, "loss": 0.8261, "step": 33555 }, { "epoch": 1.42, "grad_norm": 29.002979507589, "learning_rate": 6.320297407753299e-06, "loss": 0.8156, "step": 33560 }, { "epoch": 1.42, "grad_norm": 11.703443151285368, "learning_rate": 6.3191099368263186e-06, "loss": 0.7547, "step": 33565 }, { "epoch": 1.42, "grad_norm": 5.021728980228457, "learning_rate": 6.317922385925588e-06, "loss": 0.7965, "step": 33570 }, { "epoch": 1.42, "grad_norm": 4.664802376961735, "learning_rate": 6.316734755123106e-06, "loss": 0.7797, "step": 33575 }, { "epoch": 1.42, "grad_norm": 4.963776082460479, "learning_rate": 6.3155470444908724e-06, "loss": 0.8411, "step": 33580 }, { "epoch": 1.42, "grad_norm": 5.558474013703338, "learning_rate": 6.314359254100898e-06, "loss": 0.8267, "step": 33585 }, { "epoch": 1.42, "grad_norm": 9.263537466413258, "learning_rate": 6.313171384025193e-06, "loss": 0.8876, "step": 33590 }, { "epoch": 1.42, "grad_norm": 10.799881519017585, "learning_rate": 6.311983434335775e-06, "loss": 0.8009, "step": 33595 }, { "epoch": 1.42, "grad_norm": 4.724402504621985, "learning_rate": 6.310795405104666e-06, "loss": 0.7801, "step": 33600 }, { "epoch": 1.42, "grad_norm": 5.567847531157446, "learning_rate": 6.309607296403892e-06, "loss": 0.7782, "step": 33605 }, { "epoch": 1.42, "grad_norm": 9.222203584142193, "learning_rate": 6.308419108305486e-06, "loss": 0.7709, "step": 33610 }, { "epoch": 1.42, "grad_norm": 6.08037064499177, "learning_rate": 6.307230840881484e-06, "loss": 0.7934, "step": 33615 }, { "epoch": 1.42, "grad_norm": 5.185159184413078, "learning_rate": 6.306042494203925e-06, "loss": 0.7643, "step": 33620 }, { "epoch": 1.42, "grad_norm": 5.7554089682114205, "learning_rate": 6.304854068344859e-06, "loss": 0.7933, "step": 33625 }, { "epoch": 1.42, "grad_norm": 6.676777303939975, "learning_rate": 6.3036655633763325e-06, "loss": 0.8071, "step": 33630 }, { "epoch": 1.42, "grad_norm": 8.938662027458149, "learning_rate": 6.302476979370404e-06, "loss": 0.7602, "step": 33635 }, { "epoch": 1.42, "grad_norm": 8.891418483254615, "learning_rate": 6.301288316399133e-06, "loss": 0.8057, "step": 33640 }, { "epoch": 1.42, "grad_norm": 11.726390402404421, "learning_rate": 6.300099574534582e-06, "loss": 0.7785, "step": 33645 }, { "epoch": 1.42, "grad_norm": 15.967132115859908, "learning_rate": 6.298910753848826e-06, "loss": 0.7672, "step": 33650 }, { "epoch": 1.42, "grad_norm": 4.646060518597896, "learning_rate": 6.297721854413937e-06, "loss": 0.8515, "step": 33655 }, { "epoch": 1.42, "grad_norm": 7.883630140493178, "learning_rate": 6.296532876301995e-06, "loss": 0.7694, "step": 33660 }, { "epoch": 1.42, "grad_norm": 6.781839380970123, "learning_rate": 6.2953438195850816e-06, "loss": 0.8043, "step": 33665 }, { "epoch": 1.42, "grad_norm": 4.690140167382787, "learning_rate": 6.29415468433529e-06, "loss": 0.7567, "step": 33670 }, { "epoch": 1.43, "grad_norm": 4.975010848973904, "learning_rate": 6.29296547062471e-06, "loss": 0.8031, "step": 33675 }, { "epoch": 1.43, "grad_norm": 6.248268129993717, "learning_rate": 6.291776178525443e-06, "loss": 0.7715, "step": 33680 }, { "epoch": 1.43, "grad_norm": 13.015578254711917, "learning_rate": 6.2905868081095925e-06, "loss": 0.7465, "step": 33685 }, { "epoch": 1.43, "grad_norm": 9.446898035359444, "learning_rate": 6.289397359449263e-06, "loss": 0.8119, "step": 33690 }, { "epoch": 1.43, "grad_norm": 6.267318710835655, "learning_rate": 6.28820783261657e-06, "loss": 0.819, "step": 33695 }, { "epoch": 1.43, "grad_norm": 5.5200918795535205, "learning_rate": 6.287018227683633e-06, "loss": 0.8024, "step": 33700 }, { "epoch": 1.43, "grad_norm": 4.646725312417338, "learning_rate": 6.28582854472257e-06, "loss": 0.7962, "step": 33705 }, { "epoch": 1.43, "grad_norm": 14.331725110947344, "learning_rate": 6.28463878380551e-06, "loss": 0.7607, "step": 33710 }, { "epoch": 1.43, "grad_norm": 19.970792824740546, "learning_rate": 6.283448945004584e-06, "loss": 0.8046, "step": 33715 }, { "epoch": 1.43, "grad_norm": 5.083635721352184, "learning_rate": 6.282259028391931e-06, "loss": 0.7686, "step": 33720 }, { "epoch": 1.43, "grad_norm": 7.515417092340349, "learning_rate": 6.281069034039689e-06, "loss": 0.7954, "step": 33725 }, { "epoch": 1.43, "grad_norm": 22.857086869745768, "learning_rate": 6.279878962020005e-06, "loss": 0.7398, "step": 33730 }, { "epoch": 1.43, "grad_norm": 4.969475322008545, "learning_rate": 6.27868881240503e-06, "loss": 0.7709, "step": 33735 }, { "epoch": 1.43, "grad_norm": 9.196962414187352, "learning_rate": 6.2774985852669194e-06, "loss": 0.813, "step": 33740 }, { "epoch": 1.43, "grad_norm": 6.95479997659839, "learning_rate": 6.27630828067783e-06, "loss": 0.7687, "step": 33745 }, { "epoch": 1.43, "grad_norm": 5.166100612565272, "learning_rate": 6.27511789870993e-06, "loss": 0.8269, "step": 33750 }, { "epoch": 1.43, "grad_norm": 7.991588145496196, "learning_rate": 6.27392743943539e-06, "loss": 0.796, "step": 33755 }, { "epoch": 1.43, "grad_norm": 17.550808065110896, "learning_rate": 6.27273690292638e-06, "loss": 0.7678, "step": 33760 }, { "epoch": 1.43, "grad_norm": 16.91998056655981, "learning_rate": 6.271546289255079e-06, "loss": 0.7924, "step": 33765 }, { "epoch": 1.43, "grad_norm": 23.228724521364768, "learning_rate": 6.270355598493675e-06, "loss": 0.8144, "step": 33770 }, { "epoch": 1.43, "grad_norm": 6.352412549267018, "learning_rate": 6.269164830714352e-06, "loss": 0.7723, "step": 33775 }, { "epoch": 1.43, "grad_norm": 5.100729005443817, "learning_rate": 6.267973985989303e-06, "loss": 0.7942, "step": 33780 }, { "epoch": 1.43, "grad_norm": 5.343829188990839, "learning_rate": 6.266783064390726e-06, "loss": 0.794, "step": 33785 }, { "epoch": 1.43, "grad_norm": 19.850079813418155, "learning_rate": 6.2655920659908245e-06, "loss": 0.7717, "step": 33790 }, { "epoch": 1.43, "grad_norm": 6.114012409801975, "learning_rate": 6.264400990861803e-06, "loss": 0.7782, "step": 33795 }, { "epoch": 1.43, "grad_norm": 4.62194088276353, "learning_rate": 6.263209839075874e-06, "loss": 0.7711, "step": 33800 }, { "epoch": 1.43, "grad_norm": 16.459426670595082, "learning_rate": 6.262018610705254e-06, "loss": 0.758, "step": 33805 }, { "epoch": 1.43, "grad_norm": 7.9227259749298025, "learning_rate": 6.260827305822163e-06, "loss": 0.7933, "step": 33810 }, { "epoch": 1.43, "grad_norm": 12.60348967551295, "learning_rate": 6.259635924498826e-06, "loss": 0.7749, "step": 33815 }, { "epoch": 1.43, "grad_norm": 6.571657138223135, "learning_rate": 6.2584444668074725e-06, "loss": 0.7442, "step": 33820 }, { "epoch": 1.43, "grad_norm": 8.474382021308996, "learning_rate": 6.257252932820338e-06, "loss": 0.8083, "step": 33825 }, { "epoch": 1.43, "grad_norm": 4.279164341693866, "learning_rate": 6.256061322609662e-06, "loss": 0.7926, "step": 33830 }, { "epoch": 1.43, "grad_norm": 11.978414178981435, "learning_rate": 6.254869636247689e-06, "loss": 0.7767, "step": 33835 }, { "epoch": 1.43, "grad_norm": 9.87756131730474, "learning_rate": 6.253677873806664e-06, "loss": 0.7771, "step": 33840 }, { "epoch": 1.43, "grad_norm": 5.837892847089357, "learning_rate": 6.252486035358845e-06, "loss": 0.7666, "step": 33845 }, { "epoch": 1.43, "grad_norm": 7.0688123275456425, "learning_rate": 6.251294120976485e-06, "loss": 0.8196, "step": 33850 }, { "epoch": 1.43, "grad_norm": 5.980715273564473, "learning_rate": 6.250102130731848e-06, "loss": 0.8142, "step": 33855 }, { "epoch": 1.43, "grad_norm": 7.53340927478789, "learning_rate": 6.248910064697203e-06, "loss": 0.7988, "step": 33860 }, { "epoch": 1.43, "grad_norm": 4.519232237945045, "learning_rate": 6.247717922944817e-06, "loss": 0.8561, "step": 33865 }, { "epoch": 1.43, "grad_norm": 5.077288648296106, "learning_rate": 6.2465257055469705e-06, "loss": 0.7627, "step": 33870 }, { "epoch": 1.43, "grad_norm": 11.071584510719482, "learning_rate": 6.2453334125759416e-06, "loss": 0.8222, "step": 33875 }, { "epoch": 1.43, "grad_norm": 4.3692577351745765, "learning_rate": 6.244141044104016e-06, "loss": 0.7532, "step": 33880 }, { "epoch": 1.43, "grad_norm": 8.042192469217852, "learning_rate": 6.2429486002034845e-06, "loss": 0.7934, "step": 33885 }, { "epoch": 1.43, "grad_norm": 5.109208215289776, "learning_rate": 6.241756080946639e-06, "loss": 0.7858, "step": 33890 }, { "epoch": 1.43, "grad_norm": 5.403447878678749, "learning_rate": 6.240563486405783e-06, "loss": 0.7837, "step": 33895 }, { "epoch": 1.43, "grad_norm": 4.588711799024247, "learning_rate": 6.239370816653214e-06, "loss": 0.8065, "step": 33900 }, { "epoch": 1.43, "grad_norm": 4.656594978787163, "learning_rate": 6.238178071761245e-06, "loss": 0.8167, "step": 33905 }, { "epoch": 1.44, "grad_norm": 9.073669763022908, "learning_rate": 6.236985251802185e-06, "loss": 0.7854, "step": 33910 }, { "epoch": 1.44, "grad_norm": 9.06583975803752, "learning_rate": 6.235792356848354e-06, "loss": 0.8506, "step": 33915 }, { "epoch": 1.44, "grad_norm": 10.439466141932854, "learning_rate": 6.2345993869720725e-06, "loss": 0.8071, "step": 33920 }, { "epoch": 1.44, "grad_norm": 9.110829511322205, "learning_rate": 6.233406342245666e-06, "loss": 0.7767, "step": 33925 }, { "epoch": 1.44, "grad_norm": 7.670148774262995, "learning_rate": 6.2322132227414665e-06, "loss": 0.7891, "step": 33930 }, { "epoch": 1.44, "grad_norm": 4.960863602233144, "learning_rate": 6.231020028531809e-06, "loss": 0.7794, "step": 33935 }, { "epoch": 1.44, "grad_norm": 5.557495383739372, "learning_rate": 6.229826759689034e-06, "loss": 0.7617, "step": 33940 }, { "epoch": 1.44, "grad_norm": 4.688243051270011, "learning_rate": 6.228633416285484e-06, "loss": 0.7753, "step": 33945 }, { "epoch": 1.44, "grad_norm": 4.958934412467835, "learning_rate": 6.22743999839351e-06, "loss": 0.7936, "step": 33950 }, { "epoch": 1.44, "grad_norm": 6.0968665325614175, "learning_rate": 6.226246506085464e-06, "loss": 0.8259, "step": 33955 }, { "epoch": 1.44, "grad_norm": 6.001261741953069, "learning_rate": 6.225052939433703e-06, "loss": 0.7559, "step": 33960 }, { "epoch": 1.44, "grad_norm": 5.27468128935905, "learning_rate": 6.2238592985105925e-06, "loss": 0.7481, "step": 33965 }, { "epoch": 1.44, "grad_norm": 10.000343085440802, "learning_rate": 6.2226655833884975e-06, "loss": 0.8172, "step": 33970 }, { "epoch": 1.44, "grad_norm": 5.937616089023601, "learning_rate": 6.221471794139788e-06, "loss": 0.7364, "step": 33975 }, { "epoch": 1.44, "grad_norm": 4.700235336896541, "learning_rate": 6.2202779308368454e-06, "loss": 0.7968, "step": 33980 }, { "epoch": 1.44, "grad_norm": 4.88406638499069, "learning_rate": 6.219083993552043e-06, "loss": 0.8028, "step": 33985 }, { "epoch": 1.44, "grad_norm": 4.659719834745839, "learning_rate": 6.2178899823577694e-06, "loss": 0.7831, "step": 33990 }, { "epoch": 1.44, "grad_norm": 5.62628577536684, "learning_rate": 6.216695897326414e-06, "loss": 0.7824, "step": 33995 }, { "epoch": 1.44, "grad_norm": 4.4910818638030925, "learning_rate": 6.215501738530371e-06, "loss": 0.8096, "step": 34000 }, { "epoch": 1.44, "grad_norm": 4.167064205932446, "learning_rate": 6.214307506042038e-06, "loss": 0.7939, "step": 34005 }, { "epoch": 1.44, "grad_norm": 5.487534658645625, "learning_rate": 6.213113199933816e-06, "loss": 0.7648, "step": 34010 }, { "epoch": 1.44, "grad_norm": 5.433151402327117, "learning_rate": 6.2119188202781155e-06, "loss": 0.7668, "step": 34015 }, { "epoch": 1.44, "grad_norm": 7.65246327594702, "learning_rate": 6.210724367147346e-06, "loss": 0.7781, "step": 34020 }, { "epoch": 1.44, "grad_norm": 4.667865949709403, "learning_rate": 6.209529840613925e-06, "loss": 0.7894, "step": 34025 }, { "epoch": 1.44, "grad_norm": 5.114879666857776, "learning_rate": 6.208335240750272e-06, "loss": 0.7945, "step": 34030 }, { "epoch": 1.44, "grad_norm": 6.546163771367936, "learning_rate": 6.207140567628813e-06, "loss": 0.8149, "step": 34035 }, { "epoch": 1.44, "grad_norm": 6.099522284386283, "learning_rate": 6.205945821321977e-06, "loss": 0.7798, "step": 34040 }, { "epoch": 1.44, "grad_norm": 5.572394397884539, "learning_rate": 6.204751001902198e-06, "loss": 0.7947, "step": 34045 }, { "epoch": 1.44, "grad_norm": 4.667708867504958, "learning_rate": 6.203556109441915e-06, "loss": 0.7913, "step": 34050 }, { "epoch": 1.44, "grad_norm": 4.816099801353484, "learning_rate": 6.20236114401357e-06, "loss": 0.7463, "step": 34055 }, { "epoch": 1.44, "grad_norm": 5.53170190313959, "learning_rate": 6.201166105689611e-06, "loss": 0.7813, "step": 34060 }, { "epoch": 1.44, "grad_norm": 4.3808703746127735, "learning_rate": 6.199970994542489e-06, "loss": 0.7974, "step": 34065 }, { "epoch": 1.44, "grad_norm": 4.787181093943561, "learning_rate": 6.19877581064466e-06, "loss": 0.7887, "step": 34070 }, { "epoch": 1.44, "grad_norm": 4.3078981966530225, "learning_rate": 6.197580554068584e-06, "loss": 0.7488, "step": 34075 }, { "epoch": 1.44, "grad_norm": 12.39698128917764, "learning_rate": 6.196385224886726e-06, "loss": 0.7854, "step": 34080 }, { "epoch": 1.44, "grad_norm": 4.721623734254808, "learning_rate": 6.195189823171558e-06, "loss": 0.7943, "step": 34085 }, { "epoch": 1.44, "grad_norm": 5.604864372023594, "learning_rate": 6.1939943489955514e-06, "loss": 0.797, "step": 34090 }, { "epoch": 1.44, "grad_norm": 10.437604271693152, "learning_rate": 6.192798802431183e-06, "loss": 0.749, "step": 34095 }, { "epoch": 1.44, "grad_norm": 6.005849860727069, "learning_rate": 6.191603183550937e-06, "loss": 0.811, "step": 34100 }, { "epoch": 1.44, "grad_norm": 5.826263499121277, "learning_rate": 6.190407492427301e-06, "loss": 0.7877, "step": 34105 }, { "epoch": 1.44, "grad_norm": 11.518243211036426, "learning_rate": 6.189211729132764e-06, "loss": 0.7353, "step": 34110 }, { "epoch": 1.44, "grad_norm": 6.081839292047142, "learning_rate": 6.188015893739824e-06, "loss": 0.7289, "step": 34115 }, { "epoch": 1.44, "grad_norm": 8.62162030836602, "learning_rate": 6.186819986320977e-06, "loss": 0.7679, "step": 34120 }, { "epoch": 1.44, "grad_norm": 5.49739037988598, "learning_rate": 6.185624006948734e-06, "loss": 0.8126, "step": 34125 }, { "epoch": 1.44, "grad_norm": 7.365828539025667, "learning_rate": 6.184427955695597e-06, "loss": 0.7926, "step": 34130 }, { "epoch": 1.44, "grad_norm": 20.122613763498403, "learning_rate": 6.1832318326340836e-06, "loss": 0.7522, "step": 34135 }, { "epoch": 1.44, "grad_norm": 25.722705199570957, "learning_rate": 6.18203563783671e-06, "loss": 0.8103, "step": 34140 }, { "epoch": 1.45, "grad_norm": 14.894930124946361, "learning_rate": 6.180839371375995e-06, "loss": 0.8014, "step": 34145 }, { "epoch": 1.45, "grad_norm": 4.68502490070025, "learning_rate": 6.179643033324469e-06, "loss": 0.7883, "step": 34150 }, { "epoch": 1.45, "grad_norm": 7.730206205907829, "learning_rate": 6.178446623754659e-06, "loss": 0.77, "step": 34155 }, { "epoch": 1.45, "grad_norm": 6.860438992600811, "learning_rate": 6.177250142739104e-06, "loss": 0.7538, "step": 34160 }, { "epoch": 1.45, "grad_norm": 12.896813817108388, "learning_rate": 6.176053590350339e-06, "loss": 0.7473, "step": 34165 }, { "epoch": 1.45, "grad_norm": 5.828523534967848, "learning_rate": 6.174856966660908e-06, "loss": 0.7885, "step": 34170 }, { "epoch": 1.45, "grad_norm": 8.649846692872117, "learning_rate": 6.1736602717433615e-06, "loss": 0.7943, "step": 34175 }, { "epoch": 1.45, "grad_norm": 9.597196832230006, "learning_rate": 6.1724635056702485e-06, "loss": 0.7731, "step": 34180 }, { "epoch": 1.45, "grad_norm": 4.882036265353549, "learning_rate": 6.171266668514129e-06, "loss": 0.803, "step": 34185 }, { "epoch": 1.45, "grad_norm": 4.740605598760074, "learning_rate": 6.170069760347559e-06, "loss": 0.7841, "step": 34190 }, { "epoch": 1.45, "grad_norm": 4.567177106672672, "learning_rate": 6.168872781243107e-06, "loss": 0.7899, "step": 34195 }, { "epoch": 1.45, "grad_norm": 9.353479658910347, "learning_rate": 6.167675731273341e-06, "loss": 0.8117, "step": 34200 }, { "epoch": 1.45, "grad_norm": 5.853490805474365, "learning_rate": 6.166478610510836e-06, "loss": 0.8074, "step": 34205 }, { "epoch": 1.45, "grad_norm": 6.255780180375012, "learning_rate": 6.165281419028167e-06, "loss": 0.7763, "step": 34210 }, { "epoch": 1.45, "grad_norm": 4.3549192222816, "learning_rate": 6.164084156897919e-06, "loss": 0.7495, "step": 34215 }, { "epoch": 1.45, "grad_norm": 6.2778366458818855, "learning_rate": 6.162886824192678e-06, "loss": 0.7804, "step": 34220 }, { "epoch": 1.45, "grad_norm": 4.294693646326014, "learning_rate": 6.161689420985034e-06, "loss": 0.7912, "step": 34225 }, { "epoch": 1.45, "grad_norm": 4.602763280942825, "learning_rate": 6.160491947347582e-06, "loss": 0.772, "step": 34230 }, { "epoch": 1.45, "grad_norm": 4.748799228167618, "learning_rate": 6.159294403352922e-06, "loss": 0.7842, "step": 34235 }, { "epoch": 1.45, "grad_norm": 5.620880059970412, "learning_rate": 6.158096789073655e-06, "loss": 0.7685, "step": 34240 }, { "epoch": 1.45, "grad_norm": 6.277112971517686, "learning_rate": 6.156899104582395e-06, "loss": 0.7652, "step": 34245 }, { "epoch": 1.45, "grad_norm": 10.026545398758971, "learning_rate": 6.155701349951747e-06, "loss": 0.7775, "step": 34250 }, { "epoch": 1.45, "grad_norm": 4.7567221270075555, "learning_rate": 6.15450352525433e-06, "loss": 0.802, "step": 34255 }, { "epoch": 1.45, "grad_norm": 10.374829777110586, "learning_rate": 6.153305630562767e-06, "loss": 0.7814, "step": 34260 }, { "epoch": 1.45, "grad_norm": 6.443677687440992, "learning_rate": 6.152107665949679e-06, "loss": 0.8103, "step": 34265 }, { "epoch": 1.45, "grad_norm": 11.498029050040106, "learning_rate": 6.1509096314876984e-06, "loss": 0.7773, "step": 34270 }, { "epoch": 1.45, "grad_norm": 24.0894683841244, "learning_rate": 6.149711527249457e-06, "loss": 0.7564, "step": 34275 }, { "epoch": 1.45, "grad_norm": 19.423177352698108, "learning_rate": 6.14851335330759e-06, "loss": 0.8138, "step": 34280 }, { "epoch": 1.45, "grad_norm": 5.303172273041681, "learning_rate": 6.1473151097347435e-06, "loss": 0.8146, "step": 34285 }, { "epoch": 1.45, "grad_norm": 4.256656282540311, "learning_rate": 6.146116796603561e-06, "loss": 0.8087, "step": 34290 }, { "epoch": 1.45, "grad_norm": 8.758563319486756, "learning_rate": 6.1449184139866934e-06, "loss": 0.8088, "step": 34295 }, { "epoch": 1.45, "grad_norm": 4.69419904807984, "learning_rate": 6.143719961956794e-06, "loss": 0.8175, "step": 34300 }, { "epoch": 1.45, "grad_norm": 5.421471046486495, "learning_rate": 6.1425214405865245e-06, "loss": 0.7725, "step": 34305 }, { "epoch": 1.45, "grad_norm": 10.216531894528826, "learning_rate": 6.141322849948543e-06, "loss": 0.8278, "step": 34310 }, { "epoch": 1.45, "grad_norm": 5.687174838359153, "learning_rate": 6.14012419011552e-06, "loss": 0.8016, "step": 34315 }, { "epoch": 1.45, "grad_norm": 5.1806717544155845, "learning_rate": 6.1389254611601245e-06, "loss": 0.7771, "step": 34320 }, { "epoch": 1.45, "grad_norm": 6.41011278936483, "learning_rate": 6.137726663155034e-06, "loss": 0.7781, "step": 34325 }, { "epoch": 1.45, "grad_norm": 12.714702846575664, "learning_rate": 6.136527796172928e-06, "loss": 0.7715, "step": 34330 }, { "epoch": 1.45, "grad_norm": 8.205782145134632, "learning_rate": 6.135328860286488e-06, "loss": 0.7896, "step": 34335 }, { "epoch": 1.45, "grad_norm": 5.128060304488903, "learning_rate": 6.134129855568404e-06, "loss": 0.7967, "step": 34340 }, { "epoch": 1.45, "grad_norm": 4.341593979154896, "learning_rate": 6.132930782091367e-06, "loss": 0.8202, "step": 34345 }, { "epoch": 1.45, "grad_norm": 6.28490483128136, "learning_rate": 6.131731639928075e-06, "loss": 0.7953, "step": 34350 }, { "epoch": 1.45, "grad_norm": 6.789051641452311, "learning_rate": 6.1305324291512265e-06, "loss": 0.7899, "step": 34355 }, { "epoch": 1.45, "grad_norm": 5.966130253862901, "learning_rate": 6.129333149833528e-06, "loss": 0.8268, "step": 34360 }, { "epoch": 1.45, "grad_norm": 6.465795886937925, "learning_rate": 6.128133802047686e-06, "loss": 0.7709, "step": 34365 }, { "epoch": 1.45, "grad_norm": 4.5942301506624545, "learning_rate": 6.126934385866414e-06, "loss": 0.7303, "step": 34370 }, { "epoch": 1.45, "grad_norm": 4.3871066357512785, "learning_rate": 6.125734901362431e-06, "loss": 0.7267, "step": 34375 }, { "epoch": 1.46, "grad_norm": 6.178169114929558, "learning_rate": 6.124535348608458e-06, "loss": 0.7922, "step": 34380 }, { "epoch": 1.46, "grad_norm": 12.911457921035186, "learning_rate": 6.123335727677217e-06, "loss": 0.794, "step": 34385 }, { "epoch": 1.46, "grad_norm": 8.252914434087787, "learning_rate": 6.122136038641441e-06, "loss": 0.7794, "step": 34390 }, { "epoch": 1.46, "grad_norm": 4.479340026538647, "learning_rate": 6.120936281573861e-06, "loss": 0.778, "step": 34395 }, { "epoch": 1.46, "grad_norm": 5.626486971773706, "learning_rate": 6.119736456547217e-06, "loss": 0.7493, "step": 34400 }, { "epoch": 1.46, "grad_norm": 4.593541292224104, "learning_rate": 6.1185365636342495e-06, "loss": 0.7461, "step": 34405 }, { "epoch": 1.46, "grad_norm": 4.406773413162359, "learning_rate": 6.117336602907706e-06, "loss": 0.8254, "step": 34410 }, { "epoch": 1.46, "grad_norm": 5.286827112078306, "learning_rate": 6.116136574440335e-06, "loss": 0.7656, "step": 34415 }, { "epoch": 1.46, "grad_norm": 12.977814096397365, "learning_rate": 6.1149364783048904e-06, "loss": 0.7774, "step": 34420 }, { "epoch": 1.46, "grad_norm": 4.959256348545565, "learning_rate": 6.1137363145741315e-06, "loss": 0.7844, "step": 34425 }, { "epoch": 1.46, "grad_norm": 10.737034691521146, "learning_rate": 6.112536083320821e-06, "loss": 0.8205, "step": 34430 }, { "epoch": 1.46, "grad_norm": 6.1674633446038065, "learning_rate": 6.111335784617725e-06, "loss": 0.7892, "step": 34435 }, { "epoch": 1.46, "grad_norm": 6.030513230670901, "learning_rate": 6.110135418537612e-06, "loss": 0.8109, "step": 34440 }, { "epoch": 1.46, "grad_norm": 5.567682056714353, "learning_rate": 6.108934985153261e-06, "loss": 0.7966, "step": 34445 }, { "epoch": 1.46, "grad_norm": 4.307998245626137, "learning_rate": 6.107734484537448e-06, "loss": 0.7596, "step": 34450 }, { "epoch": 1.46, "grad_norm": 8.76410913127957, "learning_rate": 6.106533916762955e-06, "loss": 0.7688, "step": 34455 }, { "epoch": 1.46, "grad_norm": 9.128187306238752, "learning_rate": 6.105333281902571e-06, "loss": 0.8032, "step": 34460 }, { "epoch": 1.46, "grad_norm": 11.524959566805476, "learning_rate": 6.104132580029086e-06, "loss": 0.7664, "step": 34465 }, { "epoch": 1.46, "grad_norm": 6.718037816576605, "learning_rate": 6.1029318112152945e-06, "loss": 0.7488, "step": 34470 }, { "epoch": 1.46, "grad_norm": 5.500707717709202, "learning_rate": 6.101730975533996e-06, "loss": 0.7445, "step": 34475 }, { "epoch": 1.46, "grad_norm": 8.588835994024326, "learning_rate": 6.100530073057994e-06, "loss": 0.7783, "step": 34480 }, { "epoch": 1.46, "grad_norm": 5.805799803838212, "learning_rate": 6.099329103860095e-06, "loss": 0.7925, "step": 34485 }, { "epoch": 1.46, "grad_norm": 6.7701995916443085, "learning_rate": 6.098128068013112e-06, "loss": 0.8119, "step": 34490 }, { "epoch": 1.46, "grad_norm": 6.0087936675207745, "learning_rate": 6.096926965589858e-06, "loss": 0.7929, "step": 34495 }, { "epoch": 1.46, "grad_norm": 6.292341715882617, "learning_rate": 6.095725796663151e-06, "loss": 0.7722, "step": 34500 }, { "epoch": 1.46, "grad_norm": 7.227944077813412, "learning_rate": 6.094524561305819e-06, "loss": 0.7675, "step": 34505 }, { "epoch": 1.46, "grad_norm": 7.234618327444643, "learning_rate": 6.093323259590686e-06, "loss": 0.7882, "step": 34510 }, { "epoch": 1.46, "grad_norm": 10.553847309789164, "learning_rate": 6.092121891590586e-06, "loss": 0.7601, "step": 34515 }, { "epoch": 1.46, "grad_norm": 6.055198094877963, "learning_rate": 6.0909204573783505e-06, "loss": 0.7575, "step": 34520 }, { "epoch": 1.46, "grad_norm": 5.124209601561907, "learning_rate": 6.089718957026821e-06, "loss": 0.8143, "step": 34525 }, { "epoch": 1.46, "grad_norm": 4.504296651792603, "learning_rate": 6.088517390608842e-06, "loss": 0.7681, "step": 34530 }, { "epoch": 1.46, "grad_norm": 9.588529299483298, "learning_rate": 6.0873157581972595e-06, "loss": 0.7848, "step": 34535 }, { "epoch": 1.46, "grad_norm": 7.346537961021802, "learning_rate": 6.086114059864926e-06, "loss": 0.7879, "step": 34540 }, { "epoch": 1.46, "grad_norm": 5.823574832875575, "learning_rate": 6.084912295684696e-06, "loss": 0.8131, "step": 34545 }, { "epoch": 1.46, "grad_norm": 8.620419284662399, "learning_rate": 6.08371046572943e-06, "loss": 0.7867, "step": 34550 }, { "epoch": 1.46, "grad_norm": 12.222807749306725, "learning_rate": 6.0825085700719895e-06, "loss": 0.7994, "step": 34555 }, { "epoch": 1.46, "grad_norm": 5.639912162591293, "learning_rate": 6.0813066087852435e-06, "loss": 0.7547, "step": 34560 }, { "epoch": 1.46, "grad_norm": 8.382533502959495, "learning_rate": 6.080104581942063e-06, "loss": 0.7785, "step": 34565 }, { "epoch": 1.46, "grad_norm": 4.375276319096297, "learning_rate": 6.078902489615322e-06, "loss": 0.8186, "step": 34570 }, { "epoch": 1.46, "grad_norm": 5.0707377002053535, "learning_rate": 6.077700331877903e-06, "loss": 0.8248, "step": 34575 }, { "epoch": 1.46, "grad_norm": 6.103196402738586, "learning_rate": 6.076498108802686e-06, "loss": 0.7597, "step": 34580 }, { "epoch": 1.46, "grad_norm": 4.996083284476239, "learning_rate": 6.07529582046256e-06, "loss": 0.8063, "step": 34585 }, { "epoch": 1.46, "grad_norm": 10.66828348415592, "learning_rate": 6.074093466930416e-06, "loss": 0.7621, "step": 34590 }, { "epoch": 1.46, "grad_norm": 8.404291384536124, "learning_rate": 6.072891048279148e-06, "loss": 0.8191, "step": 34595 }, { "epoch": 1.46, "grad_norm": 24.259725752370393, "learning_rate": 6.071688564581657e-06, "loss": 0.7726, "step": 34600 }, { "epoch": 1.46, "grad_norm": 11.219175306689486, "learning_rate": 6.070486015910846e-06, "loss": 0.7823, "step": 34605 }, { "epoch": 1.46, "grad_norm": 10.525062508578785, "learning_rate": 6.069283402339619e-06, "loss": 0.7665, "step": 34610 }, { "epoch": 1.46, "grad_norm": 5.459301537740056, "learning_rate": 6.06808072394089e-06, "loss": 0.7652, "step": 34615 }, { "epoch": 1.47, "grad_norm": 4.58255467813776, "learning_rate": 6.0668779807875726e-06, "loss": 0.7787, "step": 34620 }, { "epoch": 1.47, "grad_norm": 7.191498946876796, "learning_rate": 6.065675172952587e-06, "loss": 0.7447, "step": 34625 }, { "epoch": 1.47, "grad_norm": 5.617402950500728, "learning_rate": 6.064472300508853e-06, "loss": 0.7729, "step": 34630 }, { "epoch": 1.47, "grad_norm": 6.084350507353659, "learning_rate": 6.063269363529301e-06, "loss": 0.7681, "step": 34635 }, { "epoch": 1.47, "grad_norm": 5.866721723374138, "learning_rate": 6.062066362086858e-06, "loss": 0.7451, "step": 34640 }, { "epoch": 1.47, "grad_norm": 5.512566141460544, "learning_rate": 6.0608632962544586e-06, "loss": 0.7813, "step": 34645 }, { "epoch": 1.47, "grad_norm": 5.4016321978687305, "learning_rate": 6.059660166105045e-06, "loss": 0.7689, "step": 34650 }, { "epoch": 1.47, "grad_norm": 4.282949318772552, "learning_rate": 6.058456971711555e-06, "loss": 0.7594, "step": 34655 }, { "epoch": 1.47, "grad_norm": 4.358737300231551, "learning_rate": 6.057253713146938e-06, "loss": 0.759, "step": 34660 }, { "epoch": 1.47, "grad_norm": 5.115906535287775, "learning_rate": 6.056050390484141e-06, "loss": 0.7735, "step": 34665 }, { "epoch": 1.47, "grad_norm": 9.665351400546173, "learning_rate": 6.054847003796121e-06, "loss": 0.8246, "step": 34670 }, { "epoch": 1.47, "grad_norm": 5.024610394489488, "learning_rate": 6.053643553155834e-06, "loss": 0.8077, "step": 34675 }, { "epoch": 1.47, "grad_norm": 4.884158504475991, "learning_rate": 6.052440038636242e-06, "loss": 0.8103, "step": 34680 }, { "epoch": 1.47, "grad_norm": 4.782583386386305, "learning_rate": 6.051236460310309e-06, "loss": 0.8074, "step": 34685 }, { "epoch": 1.47, "grad_norm": 5.209505905604866, "learning_rate": 6.050032818251008e-06, "loss": 0.7417, "step": 34690 }, { "epoch": 1.47, "grad_norm": 5.357238451931976, "learning_rate": 6.048829112531308e-06, "loss": 0.7502, "step": 34695 }, { "epoch": 1.47, "grad_norm": 7.644898695136965, "learning_rate": 6.047625343224191e-06, "loss": 0.8242, "step": 34700 }, { "epoch": 1.47, "grad_norm": 4.857206336949919, "learning_rate": 6.0464215104026334e-06, "loss": 0.7721, "step": 34705 }, { "epoch": 1.47, "grad_norm": 4.519163476186186, "learning_rate": 6.045217614139623e-06, "loss": 0.8065, "step": 34710 }, { "epoch": 1.47, "grad_norm": 4.549848579505487, "learning_rate": 6.0440136545081485e-06, "loss": 0.793, "step": 34715 }, { "epoch": 1.47, "grad_norm": 5.497533940545804, "learning_rate": 6.042809631581199e-06, "loss": 0.729, "step": 34720 }, { "epoch": 1.47, "grad_norm": 4.700488283101436, "learning_rate": 6.041605545431774e-06, "loss": 0.761, "step": 34725 }, { "epoch": 1.47, "grad_norm": 8.86190420513941, "learning_rate": 6.0404013961328726e-06, "loss": 0.7528, "step": 34730 }, { "epoch": 1.47, "grad_norm": 7.055714871569262, "learning_rate": 6.0391971837575e-06, "loss": 0.7376, "step": 34735 }, { "epoch": 1.47, "grad_norm": 9.41196321307138, "learning_rate": 6.037992908378664e-06, "loss": 0.7494, "step": 34740 }, { "epoch": 1.47, "grad_norm": 7.0385730436908345, "learning_rate": 6.036788570069373e-06, "loss": 0.7191, "step": 34745 }, { "epoch": 1.47, "grad_norm": 5.003093368622524, "learning_rate": 6.035584168902646e-06, "loss": 0.7571, "step": 34750 }, { "epoch": 1.47, "grad_norm": 4.873117387778862, "learning_rate": 6.0343797049515e-06, "loss": 0.7493, "step": 34755 }, { "epoch": 1.47, "grad_norm": 4.810647592350096, "learning_rate": 6.033175178288962e-06, "loss": 0.7659, "step": 34760 }, { "epoch": 1.47, "grad_norm": 10.284068096030106, "learning_rate": 6.031970588988057e-06, "loss": 0.7491, "step": 34765 }, { "epoch": 1.47, "grad_norm": 10.175686183762476, "learning_rate": 6.030765937121812e-06, "loss": 0.7783, "step": 34770 }, { "epoch": 1.47, "grad_norm": 9.596337317241252, "learning_rate": 6.029561222763265e-06, "loss": 0.7874, "step": 34775 }, { "epoch": 1.47, "grad_norm": 13.7759997480062, "learning_rate": 6.028356445985453e-06, "loss": 0.777, "step": 34780 }, { "epoch": 1.47, "grad_norm": 8.139453311981725, "learning_rate": 6.027151606861422e-06, "loss": 0.7567, "step": 34785 }, { "epoch": 1.47, "grad_norm": 13.773812043866542, "learning_rate": 6.025946705464213e-06, "loss": 0.7813, "step": 34790 }, { "epoch": 1.47, "grad_norm": 9.867209758105302, "learning_rate": 6.024741741866877e-06, "loss": 0.8086, "step": 34795 }, { "epoch": 1.47, "grad_norm": 5.239636871008306, "learning_rate": 6.023536716142468e-06, "loss": 0.7967, "step": 34800 }, { "epoch": 1.47, "grad_norm": 4.534730403953438, "learning_rate": 6.022331628364042e-06, "loss": 0.7739, "step": 34805 }, { "epoch": 1.47, "grad_norm": 5.464949817251281, "learning_rate": 6.021126478604661e-06, "loss": 0.7884, "step": 34810 }, { "epoch": 1.47, "grad_norm": 5.426086292689652, "learning_rate": 6.01992126693739e-06, "loss": 0.7801, "step": 34815 }, { "epoch": 1.47, "grad_norm": 16.32247221820926, "learning_rate": 6.018715993435296e-06, "loss": 0.7359, "step": 34820 }, { "epoch": 1.47, "grad_norm": 13.79328730450873, "learning_rate": 6.017510658171453e-06, "loss": 0.7521, "step": 34825 }, { "epoch": 1.47, "grad_norm": 7.808002527600671, "learning_rate": 6.016305261218936e-06, "loss": 0.7575, "step": 34830 }, { "epoch": 1.47, "grad_norm": 9.487535670880174, "learning_rate": 6.015099802650823e-06, "loss": 0.803, "step": 34835 }, { "epoch": 1.47, "grad_norm": 4.777230569040924, "learning_rate": 6.0138942825402e-06, "loss": 0.7478, "step": 34840 }, { "epoch": 1.47, "grad_norm": 7.67012680625771, "learning_rate": 6.012688700960154e-06, "loss": 0.7774, "step": 34845 }, { "epoch": 1.47, "grad_norm": 19.342245885094446, "learning_rate": 6.011483057983774e-06, "loss": 0.7549, "step": 34850 }, { "epoch": 1.48, "grad_norm": 26.812684259109933, "learning_rate": 6.010277353684156e-06, "loss": 0.7555, "step": 34855 }, { "epoch": 1.48, "grad_norm": 18.024609643387702, "learning_rate": 6.009071588134398e-06, "loss": 0.7879, "step": 34860 }, { "epoch": 1.48, "grad_norm": 6.560421071687058, "learning_rate": 6.007865761407602e-06, "loss": 0.7976, "step": 34865 }, { "epoch": 1.48, "grad_norm": 8.450221479476452, "learning_rate": 6.006659873576874e-06, "loss": 0.754, "step": 34870 }, { "epoch": 1.48, "grad_norm": 14.359205042335839, "learning_rate": 6.005453924715322e-06, "loss": 0.7661, "step": 34875 }, { "epoch": 1.48, "grad_norm": 10.690273356588177, "learning_rate": 6.004247914896062e-06, "loss": 0.7897, "step": 34880 }, { "epoch": 1.48, "grad_norm": 6.1483865199357055, "learning_rate": 6.0030418441922076e-06, "loss": 0.7807, "step": 34885 }, { "epoch": 1.48, "grad_norm": 12.946443086295215, "learning_rate": 6.00183571267688e-06, "loss": 0.8033, "step": 34890 }, { "epoch": 1.48, "grad_norm": 18.642388402880393, "learning_rate": 6.000629520423206e-06, "loss": 0.7579, "step": 34895 }, { "epoch": 1.48, "grad_norm": 8.727261823869453, "learning_rate": 5.999423267504309e-06, "loss": 0.7718, "step": 34900 }, { "epoch": 1.48, "grad_norm": 6.157817298581853, "learning_rate": 5.998216953993327e-06, "loss": 0.7833, "step": 34905 }, { "epoch": 1.48, "grad_norm": 14.140947733949853, "learning_rate": 5.99701057996339e-06, "loss": 0.7482, "step": 34910 }, { "epoch": 1.48, "grad_norm": 6.381456934094643, "learning_rate": 5.9958041454876355e-06, "loss": 0.8263, "step": 34915 }, { "epoch": 1.48, "grad_norm": 4.905388882591197, "learning_rate": 5.9945976506392125e-06, "loss": 0.7652, "step": 34920 }, { "epoch": 1.48, "grad_norm": 5.568367978560908, "learning_rate": 5.993391095491261e-06, "loss": 0.7709, "step": 34925 }, { "epoch": 1.48, "grad_norm": 4.683460944199713, "learning_rate": 5.992184480116936e-06, "loss": 0.7457, "step": 34930 }, { "epoch": 1.48, "grad_norm": 4.471657182779599, "learning_rate": 5.990977804589387e-06, "loss": 0.7851, "step": 34935 }, { "epoch": 1.48, "grad_norm": 8.38249526274511, "learning_rate": 5.9897710689817725e-06, "loss": 0.8057, "step": 34940 }, { "epoch": 1.48, "grad_norm": 8.210248195827969, "learning_rate": 5.988564273367254e-06, "loss": 0.7694, "step": 34945 }, { "epoch": 1.48, "grad_norm": 5.663181419039169, "learning_rate": 5.987357417818997e-06, "loss": 0.7782, "step": 34950 }, { "epoch": 1.48, "grad_norm": 9.47329177949332, "learning_rate": 5.986150502410166e-06, "loss": 0.7803, "step": 34955 }, { "epoch": 1.48, "grad_norm": 7.061709888828411, "learning_rate": 5.984943527213937e-06, "loss": 0.8249, "step": 34960 }, { "epoch": 1.48, "grad_norm": 5.429913146980737, "learning_rate": 5.98373649230348e-06, "loss": 0.7828, "step": 34965 }, { "epoch": 1.48, "grad_norm": 6.04745980119588, "learning_rate": 5.9825293977519795e-06, "loss": 0.778, "step": 34970 }, { "epoch": 1.48, "grad_norm": 7.11873690850023, "learning_rate": 5.981322243632614e-06, "loss": 0.7668, "step": 34975 }, { "epoch": 1.48, "grad_norm": 6.6130197712584105, "learning_rate": 5.980115030018574e-06, "loss": 0.8051, "step": 34980 }, { "epoch": 1.48, "grad_norm": 6.259833092708131, "learning_rate": 5.9789077569830455e-06, "loss": 0.7928, "step": 34985 }, { "epoch": 1.48, "grad_norm": 4.543789940994861, "learning_rate": 5.977700424599222e-06, "loss": 0.7593, "step": 34990 }, { "epoch": 1.48, "grad_norm": 7.8286899833381955, "learning_rate": 5.976493032940303e-06, "loss": 0.7652, "step": 34995 }, { "epoch": 1.48, "grad_norm": 6.972048611444596, "learning_rate": 5.975285582079486e-06, "loss": 0.7681, "step": 35000 }, { "epoch": 1.48, "grad_norm": 8.319066407229352, "learning_rate": 5.97407807208998e-06, "loss": 0.7794, "step": 35005 }, { "epoch": 1.48, "grad_norm": 5.7751126999389655, "learning_rate": 5.972870503044989e-06, "loss": 0.7534, "step": 35010 }, { "epoch": 1.48, "grad_norm": 10.447201816780197, "learning_rate": 5.971662875017723e-06, "loss": 0.766, "step": 35015 }, { "epoch": 1.48, "grad_norm": 6.4453953433613655, "learning_rate": 5.9704551880814e-06, "loss": 0.851, "step": 35020 }, { "epoch": 1.48, "grad_norm": 4.963713827637875, "learning_rate": 5.969247442309238e-06, "loss": 0.7208, "step": 35025 }, { "epoch": 1.48, "grad_norm": 4.378033749537752, "learning_rate": 5.968039637774459e-06, "loss": 0.7576, "step": 35030 }, { "epoch": 1.48, "grad_norm": 4.343690921483835, "learning_rate": 5.9668317745502864e-06, "loss": 0.7738, "step": 35035 }, { "epoch": 1.48, "grad_norm": 5.8343421442963574, "learning_rate": 5.965623852709953e-06, "loss": 0.8001, "step": 35040 }, { "epoch": 1.48, "grad_norm": 5.718532206921225, "learning_rate": 5.96441587232669e-06, "loss": 0.7726, "step": 35045 }, { "epoch": 1.48, "grad_norm": 4.1624921362561516, "learning_rate": 5.963207833473732e-06, "loss": 0.7792, "step": 35050 }, { "epoch": 1.48, "grad_norm": 11.661130437160107, "learning_rate": 5.961999736224321e-06, "loss": 0.7392, "step": 35055 }, { "epoch": 1.48, "grad_norm": 5.733973181887682, "learning_rate": 5.960791580651699e-06, "loss": 0.793, "step": 35060 }, { "epoch": 1.48, "grad_norm": 5.627034840101632, "learning_rate": 5.959583366829114e-06, "loss": 0.7908, "step": 35065 }, { "epoch": 1.48, "grad_norm": 4.341425503063352, "learning_rate": 5.958375094829817e-06, "loss": 0.7531, "step": 35070 }, { "epoch": 1.48, "grad_norm": 4.120420558187537, "learning_rate": 5.957166764727059e-06, "loss": 0.7586, "step": 35075 }, { "epoch": 1.48, "grad_norm": 9.331085668020656, "learning_rate": 5.955958376594101e-06, "loss": 0.7536, "step": 35080 }, { "epoch": 1.48, "grad_norm": 8.018280502380685, "learning_rate": 5.954749930504201e-06, "loss": 0.7955, "step": 35085 }, { "epoch": 1.49, "grad_norm": 6.438022521781885, "learning_rate": 5.953541426530627e-06, "loss": 0.7894, "step": 35090 }, { "epoch": 1.49, "grad_norm": 5.0111579975110265, "learning_rate": 5.952332864746644e-06, "loss": 0.7839, "step": 35095 }, { "epoch": 1.49, "grad_norm": 5.118333088525564, "learning_rate": 5.951124245225524e-06, "loss": 0.8123, "step": 35100 }, { "epoch": 1.49, "grad_norm": 4.117332967827528, "learning_rate": 5.949915568040544e-06, "loss": 0.7557, "step": 35105 }, { "epoch": 1.49, "grad_norm": 13.600719817831894, "learning_rate": 5.948706833264981e-06, "loss": 0.7899, "step": 35110 }, { "epoch": 1.49, "grad_norm": 29.470359768645483, "learning_rate": 5.947498040972118e-06, "loss": 0.7932, "step": 35115 }, { "epoch": 1.49, "grad_norm": 31.794206009136563, "learning_rate": 5.946289191235236e-06, "loss": 0.7913, "step": 35120 }, { "epoch": 1.49, "grad_norm": 13.288278017831857, "learning_rate": 5.945080284127631e-06, "loss": 0.7705, "step": 35125 }, { "epoch": 1.49, "grad_norm": 19.42499878419053, "learning_rate": 5.943871319722592e-06, "loss": 0.8036, "step": 35130 }, { "epoch": 1.49, "grad_norm": 4.994820360128343, "learning_rate": 5.9426622980934134e-06, "loss": 0.8007, "step": 35135 }, { "epoch": 1.49, "grad_norm": 10.017635858779661, "learning_rate": 5.9414532193133976e-06, "loss": 0.7784, "step": 35140 }, { "epoch": 1.49, "grad_norm": 13.203849381434013, "learning_rate": 5.940244083455846e-06, "loss": 0.7797, "step": 35145 }, { "epoch": 1.49, "grad_norm": 6.848857003423155, "learning_rate": 5.939034890594065e-06, "loss": 0.7996, "step": 35150 }, { "epoch": 1.49, "grad_norm": 12.85879472578569, "learning_rate": 5.937825640801366e-06, "loss": 0.7635, "step": 35155 }, { "epoch": 1.49, "grad_norm": 20.381594119497006, "learning_rate": 5.936616334151059e-06, "loss": 0.7964, "step": 35160 }, { "epoch": 1.49, "grad_norm": 6.358394741703964, "learning_rate": 5.935406970716464e-06, "loss": 0.7383, "step": 35165 }, { "epoch": 1.49, "grad_norm": 5.735599362541418, "learning_rate": 5.934197550570898e-06, "loss": 0.7872, "step": 35170 }, { "epoch": 1.49, "grad_norm": 5.846131333348263, "learning_rate": 5.932988073787687e-06, "loss": 0.745, "step": 35175 }, { "epoch": 1.49, "grad_norm": 5.924324501919118, "learning_rate": 5.931778540440158e-06, "loss": 0.7518, "step": 35180 }, { "epoch": 1.49, "grad_norm": 5.845810999800216, "learning_rate": 5.9305689506016406e-06, "loss": 0.7678, "step": 35185 }, { "epoch": 1.49, "grad_norm": 4.601173023700071, "learning_rate": 5.92935930434547e-06, "loss": 0.7635, "step": 35190 }, { "epoch": 1.49, "grad_norm": 4.8765654223981505, "learning_rate": 5.928149601744981e-06, "loss": 0.7924, "step": 35195 }, { "epoch": 1.49, "grad_norm": 5.78094392533559, "learning_rate": 5.926939842873515e-06, "loss": 0.7401, "step": 35200 }, { "epoch": 1.49, "grad_norm": 7.062577935214561, "learning_rate": 5.925730027804416e-06, "loss": 0.791, "step": 35205 }, { "epoch": 1.49, "grad_norm": 5.121683566628302, "learning_rate": 5.924520156611035e-06, "loss": 0.7438, "step": 35210 }, { "epoch": 1.49, "grad_norm": 7.52259579818607, "learning_rate": 5.923310229366718e-06, "loss": 0.7954, "step": 35215 }, { "epoch": 1.49, "grad_norm": 10.471378872273766, "learning_rate": 5.922100246144822e-06, "loss": 0.7706, "step": 35220 }, { "epoch": 1.49, "grad_norm": 8.22188342090364, "learning_rate": 5.920890207018704e-06, "loss": 0.8041, "step": 35225 }, { "epoch": 1.49, "grad_norm": 4.522946450339393, "learning_rate": 5.919680112061725e-06, "loss": 0.7637, "step": 35230 }, { "epoch": 1.49, "grad_norm": 14.550441160637263, "learning_rate": 5.91846996134725e-06, "loss": 0.8375, "step": 35235 }, { "epoch": 1.49, "grad_norm": 6.663077709231732, "learning_rate": 5.9172597549486485e-06, "loss": 0.7743, "step": 35240 }, { "epoch": 1.49, "grad_norm": 7.256855744868679, "learning_rate": 5.916049492939288e-06, "loss": 0.7798, "step": 35245 }, { "epoch": 1.49, "grad_norm": 22.98557804025475, "learning_rate": 5.914839175392546e-06, "loss": 0.7192, "step": 35250 }, { "epoch": 1.49, "grad_norm": 12.068742861569723, "learning_rate": 5.913628802381799e-06, "loss": 0.8207, "step": 35255 }, { "epoch": 1.49, "grad_norm": 5.117032859502086, "learning_rate": 5.912418373980429e-06, "loss": 0.7635, "step": 35260 }, { "epoch": 1.49, "grad_norm": 12.794649520892104, "learning_rate": 5.911207890261821e-06, "loss": 0.7625, "step": 35265 }, { "epoch": 1.49, "grad_norm": 7.070369730109931, "learning_rate": 5.909997351299361e-06, "loss": 0.7956, "step": 35270 }, { "epoch": 1.49, "grad_norm": 9.570821767512408, "learning_rate": 5.908786757166445e-06, "loss": 0.8233, "step": 35275 }, { "epoch": 1.49, "grad_norm": 14.982739472421656, "learning_rate": 5.9075761079364625e-06, "loss": 0.7684, "step": 35280 }, { "epoch": 1.49, "grad_norm": 8.881102930158718, "learning_rate": 5.906365403682815e-06, "loss": 0.7595, "step": 35285 }, { "epoch": 1.49, "grad_norm": 5.056373961808256, "learning_rate": 5.905154644478903e-06, "loss": 0.7726, "step": 35290 }, { "epoch": 1.49, "grad_norm": 7.423977334871507, "learning_rate": 5.90394383039813e-06, "loss": 0.8075, "step": 35295 }, { "epoch": 1.49, "grad_norm": 5.092197116195112, "learning_rate": 5.902732961513906e-06, "loss": 0.7682, "step": 35300 }, { "epoch": 1.49, "grad_norm": 10.553450175873829, "learning_rate": 5.901522037899641e-06, "loss": 0.75, "step": 35305 }, { "epoch": 1.49, "grad_norm": 12.527406798476562, "learning_rate": 5.900311059628751e-06, "loss": 0.7762, "step": 35310 }, { "epoch": 1.49, "grad_norm": 7.113472707145592, "learning_rate": 5.899100026774654e-06, "loss": 0.7522, "step": 35315 }, { "epoch": 1.49, "grad_norm": 6.619710956856185, "learning_rate": 5.897888939410768e-06, "loss": 0.7874, "step": 35320 }, { "epoch": 1.5, "grad_norm": 5.0232321403466615, "learning_rate": 5.896677797610524e-06, "loss": 0.791, "step": 35325 }, { "epoch": 1.5, "grad_norm": 6.310499922417376, "learning_rate": 5.8954666014473435e-06, "loss": 0.7619, "step": 35330 }, { "epoch": 1.5, "grad_norm": 7.281310666375591, "learning_rate": 5.894255350994663e-06, "loss": 0.8334, "step": 35335 }, { "epoch": 1.5, "grad_norm": 8.293957115605188, "learning_rate": 5.893044046325915e-06, "loss": 0.805, "step": 35340 }, { "epoch": 1.5, "grad_norm": 4.335017674432076, "learning_rate": 5.891832687514534e-06, "loss": 0.7716, "step": 35345 }, { "epoch": 1.5, "grad_norm": 7.061030441380728, "learning_rate": 5.890621274633967e-06, "loss": 0.7489, "step": 35350 }, { "epoch": 1.5, "grad_norm": 4.607128326135727, "learning_rate": 5.889409807757654e-06, "loss": 0.8118, "step": 35355 }, { "epoch": 1.5, "grad_norm": 6.122108985717882, "learning_rate": 5.888198286959045e-06, "loss": 0.7844, "step": 35360 }, { "epoch": 1.5, "grad_norm": 4.0880148218002486, "learning_rate": 5.8869867123115885e-06, "loss": 0.7751, "step": 35365 }, { "epoch": 1.5, "grad_norm": 8.644669976822856, "learning_rate": 5.885775083888742e-06, "loss": 0.7638, "step": 35370 }, { "epoch": 1.5, "grad_norm": 12.381372015891664, "learning_rate": 5.884563401763961e-06, "loss": 0.7805, "step": 35375 }, { "epoch": 1.5, "grad_norm": 4.768665337631596, "learning_rate": 5.883351666010706e-06, "loss": 0.7989, "step": 35380 }, { "epoch": 1.5, "grad_norm": 6.557053215105897, "learning_rate": 5.882139876702442e-06, "loss": 0.7579, "step": 35385 }, { "epoch": 1.5, "grad_norm": 5.783536033212442, "learning_rate": 5.880928033912634e-06, "loss": 0.8017, "step": 35390 }, { "epoch": 1.5, "grad_norm": 5.6395420550262765, "learning_rate": 5.879716137714757e-06, "loss": 0.7291, "step": 35395 }, { "epoch": 1.5, "grad_norm": 4.080613746063198, "learning_rate": 5.87850418818228e-06, "loss": 0.7793, "step": 35400 }, { "epoch": 1.5, "grad_norm": 5.146536356446158, "learning_rate": 5.877292185388682e-06, "loss": 0.7679, "step": 35405 }, { "epoch": 1.5, "grad_norm": 5.345736457139933, "learning_rate": 5.876080129407445e-06, "loss": 0.7139, "step": 35410 }, { "epoch": 1.5, "grad_norm": 3.9984387349237687, "learning_rate": 5.874868020312047e-06, "loss": 0.7595, "step": 35415 }, { "epoch": 1.5, "grad_norm": 4.89020555453679, "learning_rate": 5.8736558581759795e-06, "loss": 0.7791, "step": 35420 }, { "epoch": 1.5, "grad_norm": 6.153144115118243, "learning_rate": 5.872443643072731e-06, "loss": 0.7639, "step": 35425 }, { "epoch": 1.5, "grad_norm": 10.81834548691463, "learning_rate": 5.871231375075793e-06, "loss": 0.8271, "step": 35430 }, { "epoch": 1.5, "grad_norm": 7.401384550286797, "learning_rate": 5.8700190542586665e-06, "loss": 0.7826, "step": 35435 }, { "epoch": 1.5, "grad_norm": 6.30012910191852, "learning_rate": 5.868806680694844e-06, "loss": 0.7556, "step": 35440 }, { "epoch": 1.5, "grad_norm": 6.303222478019857, "learning_rate": 5.867594254457833e-06, "loss": 0.8094, "step": 35445 }, { "epoch": 1.5, "grad_norm": 6.143857861405899, "learning_rate": 5.866381775621137e-06, "loss": 0.7679, "step": 35450 }, { "epoch": 1.5, "grad_norm": 4.995035463516286, "learning_rate": 5.865169244258266e-06, "loss": 0.7654, "step": 35455 }, { "epoch": 1.5, "grad_norm": 7.585811920156551, "learning_rate": 5.863956660442733e-06, "loss": 0.7812, "step": 35460 }, { "epoch": 1.5, "grad_norm": 7.083215085815355, "learning_rate": 5.862744024248053e-06, "loss": 0.7431, "step": 35465 }, { "epoch": 1.5, "grad_norm": 4.60419105595136, "learning_rate": 5.861531335747743e-06, "loss": 0.766, "step": 35470 }, { "epoch": 1.5, "grad_norm": 9.882406985262726, "learning_rate": 5.8603185950153265e-06, "loss": 0.7674, "step": 35475 }, { "epoch": 1.5, "grad_norm": 8.657436157098296, "learning_rate": 5.8591058021243275e-06, "loss": 0.8046, "step": 35480 }, { "epoch": 1.5, "grad_norm": 6.455621908503985, "learning_rate": 5.857892957148275e-06, "loss": 0.736, "step": 35485 }, { "epoch": 1.5, "grad_norm": 12.332551964529308, "learning_rate": 5.856680060160698e-06, "loss": 0.7621, "step": 35490 }, { "epoch": 1.5, "grad_norm": 19.025869716264832, "learning_rate": 5.855467111235133e-06, "loss": 0.7743, "step": 35495 }, { "epoch": 1.5, "grad_norm": 16.007599014024574, "learning_rate": 5.8542541104451164e-06, "loss": 0.7752, "step": 35500 }, { "epoch": 1.5, "grad_norm": 14.225709600333296, "learning_rate": 5.85304105786419e-06, "loss": 0.7487, "step": 35505 }, { "epoch": 1.5, "grad_norm": 6.745927281763014, "learning_rate": 5.851827953565897e-06, "loss": 0.7976, "step": 35510 }, { "epoch": 1.5, "grad_norm": 5.112620909188447, "learning_rate": 5.8506147976237844e-06, "loss": 0.8209, "step": 35515 }, { "epoch": 1.5, "grad_norm": 6.796835108917794, "learning_rate": 5.849401590111403e-06, "loss": 0.7245, "step": 35520 }, { "epoch": 1.5, "grad_norm": 4.365738500065053, "learning_rate": 5.848188331102303e-06, "loss": 0.7414, "step": 35525 }, { "epoch": 1.5, "grad_norm": 5.127402861085467, "learning_rate": 5.846975020670045e-06, "loss": 0.7891, "step": 35530 }, { "epoch": 1.5, "grad_norm": 9.235317738855576, "learning_rate": 5.845761658888186e-06, "loss": 0.7956, "step": 35535 }, { "epoch": 1.5, "grad_norm": 8.917507279621178, "learning_rate": 5.844548245830288e-06, "loss": 0.7963, "step": 35540 }, { "epoch": 1.5, "grad_norm": 13.690374060051681, "learning_rate": 5.843334781569918e-06, "loss": 0.7975, "step": 35545 }, { "epoch": 1.5, "grad_norm": 4.480398716800464, "learning_rate": 5.8421212661806435e-06, "loss": 0.7493, "step": 35550 }, { "epoch": 1.5, "grad_norm": 8.442052432991687, "learning_rate": 5.840907699736039e-06, "loss": 0.7961, "step": 35555 }, { "epoch": 1.5, "grad_norm": 5.520735819421966, "learning_rate": 5.8396940823096775e-06, "loss": 0.7847, "step": 35560 }, { "epoch": 1.51, "grad_norm": 27.124322271049827, "learning_rate": 5.838480413975135e-06, "loss": 0.7678, "step": 35565 }, { "epoch": 1.51, "grad_norm": 5.384877079445805, "learning_rate": 5.837266694805996e-06, "loss": 0.762, "step": 35570 }, { "epoch": 1.51, "grad_norm": 6.25118319333567, "learning_rate": 5.836052924875844e-06, "loss": 0.8021, "step": 35575 }, { "epoch": 1.51, "grad_norm": 4.48196828136772, "learning_rate": 5.834839104258265e-06, "loss": 0.7533, "step": 35580 }, { "epoch": 1.51, "grad_norm": 4.08614152008201, "learning_rate": 5.833625233026852e-06, "loss": 0.7676, "step": 35585 }, { "epoch": 1.51, "grad_norm": 6.2639469462546495, "learning_rate": 5.8324113112551936e-06, "loss": 0.7847, "step": 35590 }, { "epoch": 1.51, "grad_norm": 6.0812249878379125, "learning_rate": 5.831197339016893e-06, "loss": 0.7479, "step": 35595 }, { "epoch": 1.51, "grad_norm": 10.816922415291735, "learning_rate": 5.829983316385543e-06, "loss": 0.7472, "step": 35600 }, { "epoch": 1.51, "grad_norm": 6.21452289376387, "learning_rate": 5.828769243434752e-06, "loss": 0.7534, "step": 35605 }, { "epoch": 1.51, "grad_norm": 5.2607972272546375, "learning_rate": 5.827555120238121e-06, "loss": 0.7879, "step": 35610 }, { "epoch": 1.51, "grad_norm": 9.931020446241725, "learning_rate": 5.826340946869261e-06, "loss": 0.7352, "step": 35615 }, { "epoch": 1.51, "grad_norm": 10.8032613008011, "learning_rate": 5.825126723401784e-06, "loss": 0.7646, "step": 35620 }, { "epoch": 1.51, "grad_norm": 5.782401302081939, "learning_rate": 5.823912449909303e-06, "loss": 0.7733, "step": 35625 }, { "epoch": 1.51, "grad_norm": 11.593323104936328, "learning_rate": 5.822698126465439e-06, "loss": 0.7898, "step": 35630 }, { "epoch": 1.51, "grad_norm": 14.355576140796922, "learning_rate": 5.821483753143809e-06, "loss": 0.8175, "step": 35635 }, { "epoch": 1.51, "grad_norm": 5.388751730702089, "learning_rate": 5.8202693300180404e-06, "loss": 0.7494, "step": 35640 }, { "epoch": 1.51, "grad_norm": 5.34556255886083, "learning_rate": 5.819054857161758e-06, "loss": 0.8051, "step": 35645 }, { "epoch": 1.51, "grad_norm": 15.569846832882002, "learning_rate": 5.817840334648591e-06, "loss": 0.7739, "step": 35650 }, { "epoch": 1.51, "grad_norm": 12.183574664236257, "learning_rate": 5.816625762552175e-06, "loss": 0.7563, "step": 35655 }, { "epoch": 1.51, "grad_norm": 8.893357106700686, "learning_rate": 5.815411140946143e-06, "loss": 0.7582, "step": 35660 }, { "epoch": 1.51, "grad_norm": 5.104639409493043, "learning_rate": 5.814196469904137e-06, "loss": 0.8255, "step": 35665 }, { "epoch": 1.51, "grad_norm": 6.433111627799949, "learning_rate": 5.8129817494997976e-06, "loss": 0.8104, "step": 35670 }, { "epoch": 1.51, "grad_norm": 4.42666332375167, "learning_rate": 5.811766979806768e-06, "loss": 0.7436, "step": 35675 }, { "epoch": 1.51, "grad_norm": 4.9008503954022595, "learning_rate": 5.8105521608987e-06, "loss": 0.7785, "step": 35680 }, { "epoch": 1.51, "grad_norm": 7.641816750575053, "learning_rate": 5.80933729284924e-06, "loss": 0.752, "step": 35685 }, { "epoch": 1.51, "grad_norm": 4.415346017358438, "learning_rate": 5.808122375732044e-06, "loss": 0.7697, "step": 35690 }, { "epoch": 1.51, "grad_norm": 6.149767847667662, "learning_rate": 5.806907409620768e-06, "loss": 0.7944, "step": 35695 }, { "epoch": 1.51, "grad_norm": 10.34104419587703, "learning_rate": 5.8056923945890745e-06, "loss": 0.7603, "step": 35700 }, { "epoch": 1.51, "grad_norm": 4.582477482951482, "learning_rate": 5.804477330710623e-06, "loss": 0.7386, "step": 35705 }, { "epoch": 1.51, "grad_norm": 15.042520545906932, "learning_rate": 5.8032622180590805e-06, "loss": 0.8398, "step": 35710 }, { "epoch": 1.51, "grad_norm": 22.80175122182459, "learning_rate": 5.802047056708116e-06, "loss": 0.7194, "step": 35715 }, { "epoch": 1.51, "grad_norm": 6.541306011039479, "learning_rate": 5.800831846731402e-06, "loss": 0.8017, "step": 35720 }, { "epoch": 1.51, "grad_norm": 7.8057041667914, "learning_rate": 5.799616588202611e-06, "loss": 0.7824, "step": 35725 }, { "epoch": 1.51, "grad_norm": 6.887167836404786, "learning_rate": 5.7984012811954225e-06, "loss": 0.7633, "step": 35730 }, { "epoch": 1.51, "grad_norm": 8.210730717948925, "learning_rate": 5.797185925783515e-06, "loss": 0.8119, "step": 35735 }, { "epoch": 1.51, "grad_norm": 17.731726587136, "learning_rate": 5.795970522040573e-06, "loss": 0.7673, "step": 35740 }, { "epoch": 1.51, "grad_norm": 16.73682256121197, "learning_rate": 5.794755070040283e-06, "loss": 0.7242, "step": 35745 }, { "epoch": 1.51, "grad_norm": 4.269560649304046, "learning_rate": 5.793539569856335e-06, "loss": 0.7915, "step": 35750 }, { "epoch": 1.51, "grad_norm": 7.779977761564655, "learning_rate": 5.79232402156242e-06, "loss": 0.7668, "step": 35755 }, { "epoch": 1.51, "grad_norm": 6.117695598947614, "learning_rate": 5.7911084252322334e-06, "loss": 0.784, "step": 35760 }, { "epoch": 1.51, "grad_norm": 4.387822661169172, "learning_rate": 5.789892780939475e-06, "loss": 0.784, "step": 35765 }, { "epoch": 1.51, "grad_norm": 4.720664706631668, "learning_rate": 5.7886770887578404e-06, "loss": 0.7507, "step": 35770 }, { "epoch": 1.51, "grad_norm": 5.073725072186133, "learning_rate": 5.787461348761039e-06, "loss": 0.7379, "step": 35775 }, { "epoch": 1.51, "grad_norm": 4.456495687757091, "learning_rate": 5.786245561022774e-06, "loss": 0.7745, "step": 35780 }, { "epoch": 1.51, "grad_norm": 7.007717042346071, "learning_rate": 5.785029725616758e-06, "loss": 0.7611, "step": 35785 }, { "epoch": 1.51, "grad_norm": 6.078397353446753, "learning_rate": 5.783813842616703e-06, "loss": 0.7829, "step": 35790 }, { "epoch": 1.51, "grad_norm": 4.601490639695328, "learning_rate": 5.782597912096322e-06, "loss": 0.7485, "step": 35795 }, { "epoch": 1.52, "grad_norm": 7.6023290843292015, "learning_rate": 5.781381934129335e-06, "loss": 0.769, "step": 35800 }, { "epoch": 1.52, "grad_norm": 14.11802200120585, "learning_rate": 5.780165908789463e-06, "loss": 0.7777, "step": 35805 }, { "epoch": 1.52, "grad_norm": 6.593952283561465, "learning_rate": 5.778949836150431e-06, "loss": 0.7263, "step": 35810 }, { "epoch": 1.52, "grad_norm": 4.867453009854076, "learning_rate": 5.777733716285965e-06, "loss": 0.7693, "step": 35815 }, { "epoch": 1.52, "grad_norm": 16.354286632613253, "learning_rate": 5.776517549269793e-06, "loss": 0.7407, "step": 35820 }, { "epoch": 1.52, "grad_norm": 52.873242025649986, "learning_rate": 5.775301335175651e-06, "loss": 0.7625, "step": 35825 }, { "epoch": 1.52, "grad_norm": 13.794491435493061, "learning_rate": 5.7740850740772725e-06, "loss": 0.7685, "step": 35830 }, { "epoch": 1.52, "grad_norm": 5.796282872941366, "learning_rate": 5.772868766048394e-06, "loss": 0.7556, "step": 35835 }, { "epoch": 1.52, "grad_norm": 14.50886051152214, "learning_rate": 5.771652411162763e-06, "loss": 0.7381, "step": 35840 }, { "epoch": 1.52, "grad_norm": 13.5102747545092, "learning_rate": 5.7704360094941145e-06, "loss": 0.7293, "step": 35845 }, { "epoch": 1.52, "grad_norm": 6.125003338167975, "learning_rate": 5.7692195611162035e-06, "loss": 0.7566, "step": 35850 }, { "epoch": 1.52, "grad_norm": 26.973032001743942, "learning_rate": 5.768003066102774e-06, "loss": 0.7832, "step": 35855 }, { "epoch": 1.52, "grad_norm": 9.180360226734198, "learning_rate": 5.766786524527582e-06, "loss": 0.793, "step": 35860 }, { "epoch": 1.52, "grad_norm": 10.830977219195862, "learning_rate": 5.765569936464382e-06, "loss": 0.7358, "step": 35865 }, { "epoch": 1.52, "grad_norm": 12.52365519570048, "learning_rate": 5.764353301986932e-06, "loss": 0.7772, "step": 35870 }, { "epoch": 1.52, "grad_norm": 5.343703416531583, "learning_rate": 5.763136621168992e-06, "loss": 0.7344, "step": 35875 }, { "epoch": 1.52, "grad_norm": 19.158814711821826, "learning_rate": 5.761919894084326e-06, "loss": 0.729, "step": 35880 }, { "epoch": 1.52, "grad_norm": 13.10645471797121, "learning_rate": 5.760703120806702e-06, "loss": 0.7296, "step": 35885 }, { "epoch": 1.52, "grad_norm": 7.48759836142084, "learning_rate": 5.759486301409889e-06, "loss": 0.769, "step": 35890 }, { "epoch": 1.52, "grad_norm": 4.086018467180757, "learning_rate": 5.758269435967657e-06, "loss": 0.7409, "step": 35895 }, { "epoch": 1.52, "grad_norm": 8.176159238151056, "learning_rate": 5.757052524553785e-06, "loss": 0.7767, "step": 35900 }, { "epoch": 1.52, "grad_norm": 4.629734405044549, "learning_rate": 5.7558355672420456e-06, "loss": 0.738, "step": 35905 }, { "epoch": 1.52, "grad_norm": 4.754163772768, "learning_rate": 5.754618564106225e-06, "loss": 0.7705, "step": 35910 }, { "epoch": 1.52, "grad_norm": 5.227100604205821, "learning_rate": 5.753401515220102e-06, "loss": 0.725, "step": 35915 }, { "epoch": 1.52, "grad_norm": 6.697862327769576, "learning_rate": 5.752184420657465e-06, "loss": 0.7598, "step": 35920 }, { "epoch": 1.52, "grad_norm": 6.942626067055615, "learning_rate": 5.750967280492103e-06, "loss": 0.7377, "step": 35925 }, { "epoch": 1.52, "grad_norm": 4.47493741978674, "learning_rate": 5.749750094797804e-06, "loss": 0.7357, "step": 35930 }, { "epoch": 1.52, "grad_norm": 5.619366415417316, "learning_rate": 5.748532863648367e-06, "loss": 0.7916, "step": 35935 }, { "epoch": 1.52, "grad_norm": 6.49675906704056, "learning_rate": 5.747315587117587e-06, "loss": 0.7139, "step": 35940 }, { "epoch": 1.52, "grad_norm": 12.666261080625173, "learning_rate": 5.746098265279265e-06, "loss": 0.7243, "step": 35945 }, { "epoch": 1.52, "grad_norm": 7.536966332918606, "learning_rate": 5.744880898207203e-06, "loss": 0.7425, "step": 35950 }, { "epoch": 1.52, "grad_norm": 4.9274467738400585, "learning_rate": 5.743663485975205e-06, "loss": 0.7964, "step": 35955 }, { "epoch": 1.52, "grad_norm": 5.153674004720579, "learning_rate": 5.7424460286570815e-06, "loss": 0.7697, "step": 35960 }, { "epoch": 1.52, "grad_norm": 5.330447631159754, "learning_rate": 5.741228526326641e-06, "loss": 0.767, "step": 35965 }, { "epoch": 1.52, "grad_norm": 6.588202054051127, "learning_rate": 5.740010979057699e-06, "loss": 0.7844, "step": 35970 }, { "epoch": 1.52, "grad_norm": 5.534273914711299, "learning_rate": 5.738793386924072e-06, "loss": 0.781, "step": 35975 }, { "epoch": 1.52, "grad_norm": 6.95904690094936, "learning_rate": 5.737575749999577e-06, "loss": 0.7591, "step": 35980 }, { "epoch": 1.52, "grad_norm": 4.895528455288323, "learning_rate": 5.736358068358036e-06, "loss": 0.7099, "step": 35985 }, { "epoch": 1.52, "grad_norm": 5.243089113684096, "learning_rate": 5.7351403420732755e-06, "loss": 0.7826, "step": 35990 }, { "epoch": 1.52, "grad_norm": 5.617223246735131, "learning_rate": 5.7339225712191224e-06, "loss": 0.7343, "step": 35995 }, { "epoch": 1.52, "grad_norm": 7.8746207313840575, "learning_rate": 5.7327047558694045e-06, "loss": 0.7528, "step": 36000 }, { "epoch": 1.52, "grad_norm": 5.937826515880196, "learning_rate": 5.731486896097956e-06, "loss": 0.7682, "step": 36005 }, { "epoch": 1.52, "grad_norm": 5.821692722242406, "learning_rate": 5.730268991978613e-06, "loss": 0.7982, "step": 36010 }, { "epoch": 1.52, "grad_norm": 4.497330473162316, "learning_rate": 5.7290510435852094e-06, "loss": 0.7536, "step": 36015 }, { "epoch": 1.52, "grad_norm": 7.593124973338194, "learning_rate": 5.72783305099159e-06, "loss": 0.716, "step": 36020 }, { "epoch": 1.52, "grad_norm": 5.921043953278889, "learning_rate": 5.726615014271595e-06, "loss": 0.7794, "step": 36025 }, { "epoch": 1.52, "grad_norm": 4.162342563214488, "learning_rate": 5.725396933499074e-06, "loss": 0.6628, "step": 36030 }, { "epoch": 1.53, "grad_norm": 4.547397242211524, "learning_rate": 5.724178808747874e-06, "loss": 0.7536, "step": 36035 }, { "epoch": 1.53, "grad_norm": 8.16295011354217, "learning_rate": 5.722960640091843e-06, "loss": 0.7627, "step": 36040 }, { "epoch": 1.53, "grad_norm": 6.8016290002261535, "learning_rate": 5.7217424276048405e-06, "loss": 0.7826, "step": 36045 }, { "epoch": 1.53, "grad_norm": 4.762170826235397, "learning_rate": 5.720524171360719e-06, "loss": 0.7415, "step": 36050 }, { "epoch": 1.53, "grad_norm": 4.531002839286745, "learning_rate": 5.719305871433341e-06, "loss": 0.723, "step": 36055 }, { "epoch": 1.53, "grad_norm": 6.353437384591896, "learning_rate": 5.718087527896566e-06, "loss": 0.7358, "step": 36060 }, { "epoch": 1.53, "grad_norm": 6.219340645320895, "learning_rate": 5.716869140824259e-06, "loss": 0.7416, "step": 36065 }, { "epoch": 1.53, "grad_norm": 4.904330943635854, "learning_rate": 5.715650710290288e-06, "loss": 0.7453, "step": 36070 }, { "epoch": 1.53, "grad_norm": 4.680002927586169, "learning_rate": 5.714432236368522e-06, "loss": 0.7335, "step": 36075 }, { "epoch": 1.53, "grad_norm": 4.221813272055969, "learning_rate": 5.713213719132835e-06, "loss": 0.7351, "step": 36080 }, { "epoch": 1.53, "grad_norm": 4.582071266389329, "learning_rate": 5.711995158657101e-06, "loss": 0.7845, "step": 36085 }, { "epoch": 1.53, "grad_norm": 5.50926529727831, "learning_rate": 5.7107765550151985e-06, "loss": 0.7748, "step": 36090 }, { "epoch": 1.53, "grad_norm": 5.414969406458174, "learning_rate": 5.709557908281006e-06, "loss": 0.7819, "step": 36095 }, { "epoch": 1.53, "grad_norm": 6.354600689733497, "learning_rate": 5.708339218528406e-06, "loss": 0.7845, "step": 36100 }, { "epoch": 1.53, "grad_norm": 4.025808887933558, "learning_rate": 5.707120485831288e-06, "loss": 0.7012, "step": 36105 }, { "epoch": 1.53, "grad_norm": 5.756287376701307, "learning_rate": 5.705901710263537e-06, "loss": 0.7851, "step": 36110 }, { "epoch": 1.53, "grad_norm": 5.827886235046456, "learning_rate": 5.704682891899044e-06, "loss": 0.7235, "step": 36115 }, { "epoch": 1.53, "grad_norm": 6.668113011226178, "learning_rate": 5.703464030811704e-06, "loss": 0.7527, "step": 36120 }, { "epoch": 1.53, "grad_norm": 10.59265568040324, "learning_rate": 5.702245127075411e-06, "loss": 0.7311, "step": 36125 }, { "epoch": 1.53, "grad_norm": 5.146470669806949, "learning_rate": 5.701026180764065e-06, "loss": 0.7851, "step": 36130 }, { "epoch": 1.53, "grad_norm": 4.832302421405683, "learning_rate": 5.699807191951567e-06, "loss": 0.7738, "step": 36135 }, { "epoch": 1.53, "grad_norm": 5.115130581117767, "learning_rate": 5.69858816071182e-06, "loss": 0.7432, "step": 36140 }, { "epoch": 1.53, "grad_norm": 10.929869080720659, "learning_rate": 5.69736908711873e-06, "loss": 0.7788, "step": 36145 }, { "epoch": 1.53, "grad_norm": 7.4237374532795695, "learning_rate": 5.696149971246207e-06, "loss": 0.7369, "step": 36150 }, { "epoch": 1.53, "grad_norm": 4.821355642582877, "learning_rate": 5.694930813168162e-06, "loss": 0.7537, "step": 36155 }, { "epoch": 1.53, "grad_norm": 11.189306895025428, "learning_rate": 5.693711612958508e-06, "loss": 0.767, "step": 36160 }, { "epoch": 1.53, "grad_norm": 4.78248144737292, "learning_rate": 5.692492370691163e-06, "loss": 0.7862, "step": 36165 }, { "epoch": 1.53, "grad_norm": 5.426321763742699, "learning_rate": 5.6912730864400455e-06, "loss": 0.7457, "step": 36170 }, { "epoch": 1.53, "grad_norm": 4.681999734820031, "learning_rate": 5.690053760279076e-06, "loss": 0.7556, "step": 36175 }, { "epoch": 1.53, "grad_norm": 6.498484256595738, "learning_rate": 5.688834392282181e-06, "loss": 0.7346, "step": 36180 }, { "epoch": 1.53, "grad_norm": 4.013656962446652, "learning_rate": 5.687614982523284e-06, "loss": 0.742, "step": 36185 }, { "epoch": 1.53, "grad_norm": 5.054359516641117, "learning_rate": 5.686395531076317e-06, "loss": 0.7819, "step": 36190 }, { "epoch": 1.53, "grad_norm": 5.035879832579504, "learning_rate": 5.685176038015212e-06, "loss": 0.7413, "step": 36195 }, { "epoch": 1.53, "grad_norm": 5.363547220597939, "learning_rate": 5.6839565034139e-06, "loss": 0.7503, "step": 36200 }, { "epoch": 1.53, "grad_norm": 6.8779848928246485, "learning_rate": 5.682736927346322e-06, "loss": 0.7763, "step": 36205 }, { "epoch": 1.53, "grad_norm": 10.641753574126716, "learning_rate": 5.681517309886413e-06, "loss": 0.7721, "step": 36210 }, { "epoch": 1.53, "grad_norm": 8.569354750994023, "learning_rate": 5.680297651108118e-06, "loss": 0.7398, "step": 36215 }, { "epoch": 1.53, "grad_norm": 4.704886319648279, "learning_rate": 5.6790779510853815e-06, "loss": 0.7071, "step": 36220 }, { "epoch": 1.53, "grad_norm": 4.642769271909682, "learning_rate": 5.6778582098921466e-06, "loss": 0.7392, "step": 36225 }, { "epoch": 1.53, "grad_norm": 10.188283644795725, "learning_rate": 5.676638427602366e-06, "loss": 0.7401, "step": 36230 }, { "epoch": 1.53, "grad_norm": 8.356340993264226, "learning_rate": 5.67541860428999e-06, "loss": 0.7584, "step": 36235 }, { "epoch": 1.53, "grad_norm": 12.863059147962543, "learning_rate": 5.674198740028977e-06, "loss": 0.782, "step": 36240 }, { "epoch": 1.53, "grad_norm": 9.285215615121325, "learning_rate": 5.672978834893278e-06, "loss": 0.8201, "step": 36245 }, { "epoch": 1.53, "grad_norm": 8.05121558073283, "learning_rate": 5.671758888956855e-06, "loss": 0.7479, "step": 36250 }, { "epoch": 1.53, "grad_norm": 7.038312854881935, "learning_rate": 5.670538902293669e-06, "loss": 0.7801, "step": 36255 }, { "epoch": 1.53, "grad_norm": 4.859644511209882, "learning_rate": 5.669318874977683e-06, "loss": 0.7252, "step": 36260 }, { "epoch": 1.53, "grad_norm": 5.309271617820732, "learning_rate": 5.6680988070828665e-06, "loss": 0.7442, "step": 36265 }, { "epoch": 1.54, "grad_norm": 6.99130699160241, "learning_rate": 5.666878698683187e-06, "loss": 0.8292, "step": 36270 }, { "epoch": 1.54, "grad_norm": 7.081780629673708, "learning_rate": 5.6656585498526176e-06, "loss": 0.7489, "step": 36275 }, { "epoch": 1.54, "grad_norm": 4.794065489779067, "learning_rate": 5.66443836066513e-06, "loss": 0.7764, "step": 36280 }, { "epoch": 1.54, "grad_norm": 16.977509881643627, "learning_rate": 5.663218131194701e-06, "loss": 0.7875, "step": 36285 }, { "epoch": 1.54, "grad_norm": 7.144576665501161, "learning_rate": 5.661997861515312e-06, "loss": 0.7724, "step": 36290 }, { "epoch": 1.54, "grad_norm": 5.670751785886699, "learning_rate": 5.66077755170094e-06, "loss": 0.7555, "step": 36295 }, { "epoch": 1.54, "grad_norm": 5.247671311920664, "learning_rate": 5.659557201825574e-06, "loss": 0.7446, "step": 36300 }, { "epoch": 1.54, "grad_norm": 4.192342486706729, "learning_rate": 5.658336811963196e-06, "loss": 0.7134, "step": 36305 }, { "epoch": 1.54, "grad_norm": 7.815642519282774, "learning_rate": 5.657116382187797e-06, "loss": 0.7561, "step": 36310 }, { "epoch": 1.54, "grad_norm": 10.224115556171064, "learning_rate": 5.655895912573368e-06, "loss": 0.7362, "step": 36315 }, { "epoch": 1.54, "grad_norm": 5.264816330795972, "learning_rate": 5.6546754031939e-06, "loss": 0.7703, "step": 36320 }, { "epoch": 1.54, "grad_norm": 10.4430298561932, "learning_rate": 5.653454854123393e-06, "loss": 0.7336, "step": 36325 }, { "epoch": 1.54, "grad_norm": 14.418816036880918, "learning_rate": 5.652234265435842e-06, "loss": 0.7778, "step": 36330 }, { "epoch": 1.54, "grad_norm": 6.244555476837727, "learning_rate": 5.65101363720525e-06, "loss": 0.7436, "step": 36335 }, { "epoch": 1.54, "grad_norm": 8.042267677053026, "learning_rate": 5.649792969505617e-06, "loss": 0.7758, "step": 36340 }, { "epoch": 1.54, "grad_norm": 4.6970479191015375, "learning_rate": 5.64857226241095e-06, "loss": 0.7654, "step": 36345 }, { "epoch": 1.54, "grad_norm": 5.744264625884218, "learning_rate": 5.6473515159952585e-06, "loss": 0.7713, "step": 36350 }, { "epoch": 1.54, "grad_norm": 7.432690146891411, "learning_rate": 5.64613073033255e-06, "loss": 0.7379, "step": 36355 }, { "epoch": 1.54, "grad_norm": 8.916425821797883, "learning_rate": 5.64490990549684e-06, "loss": 0.7628, "step": 36360 }, { "epoch": 1.54, "grad_norm": 9.901548770114443, "learning_rate": 5.643689041562141e-06, "loss": 0.7758, "step": 36365 }, { "epoch": 1.54, "grad_norm": 11.545978616397411, "learning_rate": 5.642468138602472e-06, "loss": 0.7543, "step": 36370 }, { "epoch": 1.54, "grad_norm": 9.766205097303478, "learning_rate": 5.641247196691852e-06, "loss": 0.7655, "step": 36375 }, { "epoch": 1.54, "grad_norm": 4.7761349349455555, "learning_rate": 5.640026215904303e-06, "loss": 0.7471, "step": 36380 }, { "epoch": 1.54, "grad_norm": 9.039754176101685, "learning_rate": 5.63880519631385e-06, "loss": 0.7601, "step": 36385 }, { "epoch": 1.54, "grad_norm": 7.167325564652919, "learning_rate": 5.63758413799452e-06, "loss": 0.7086, "step": 36390 }, { "epoch": 1.54, "grad_norm": 7.641494168042841, "learning_rate": 5.6363630410203415e-06, "loss": 0.7199, "step": 36395 }, { "epoch": 1.54, "grad_norm": 5.796779792245474, "learning_rate": 5.635141905465347e-06, "loss": 0.7812, "step": 36400 }, { "epoch": 1.54, "grad_norm": 6.980531616127717, "learning_rate": 5.633920731403569e-06, "loss": 0.7511, "step": 36405 }, { "epoch": 1.54, "grad_norm": 5.173917776880962, "learning_rate": 5.632699518909045e-06, "loss": 0.7786, "step": 36410 }, { "epoch": 1.54, "grad_norm": 5.0748340996694505, "learning_rate": 5.631478268055812e-06, "loss": 0.7422, "step": 36415 }, { "epoch": 1.54, "grad_norm": 4.531527906834546, "learning_rate": 5.6302569789179115e-06, "loss": 0.7267, "step": 36420 }, { "epoch": 1.54, "grad_norm": 7.421548362470831, "learning_rate": 5.629035651569388e-06, "loss": 0.7809, "step": 36425 }, { "epoch": 1.54, "grad_norm": 7.371329999365018, "learning_rate": 5.627814286084284e-06, "loss": 0.7634, "step": 36430 }, { "epoch": 1.54, "grad_norm": 5.591503494981009, "learning_rate": 5.626592882536651e-06, "loss": 0.7816, "step": 36435 }, { "epoch": 1.54, "grad_norm": 18.435414824542768, "learning_rate": 5.625371441000537e-06, "loss": 0.7811, "step": 36440 }, { "epoch": 1.54, "grad_norm": 10.143455007023272, "learning_rate": 5.624149961549992e-06, "loss": 0.7538, "step": 36445 }, { "epoch": 1.54, "grad_norm": 14.285501295528324, "learning_rate": 5.622928444259077e-06, "loss": 0.7921, "step": 36450 }, { "epoch": 1.54, "grad_norm": 6.233101724093227, "learning_rate": 5.6217068892018435e-06, "loss": 0.7432, "step": 36455 }, { "epoch": 1.54, "grad_norm": 5.852665172259824, "learning_rate": 5.620485296452353e-06, "loss": 0.8275, "step": 36460 }, { "epoch": 1.54, "grad_norm": 4.80568451354192, "learning_rate": 5.619263666084667e-06, "loss": 0.766, "step": 36465 }, { "epoch": 1.54, "grad_norm": 7.240333718505863, "learning_rate": 5.618041998172848e-06, "loss": 0.778, "step": 36470 }, { "epoch": 1.54, "grad_norm": 4.8567276212325945, "learning_rate": 5.616820292790963e-06, "loss": 0.7552, "step": 36475 }, { "epoch": 1.54, "grad_norm": 6.710169902749028, "learning_rate": 5.615598550013082e-06, "loss": 0.7824, "step": 36480 }, { "epoch": 1.54, "grad_norm": 11.247173380305359, "learning_rate": 5.614376769913274e-06, "loss": 0.7802, "step": 36485 }, { "epoch": 1.54, "grad_norm": 8.039763231784095, "learning_rate": 5.6131549525656104e-06, "loss": 0.7764, "step": 36490 }, { "epoch": 1.54, "grad_norm": 4.564663446305456, "learning_rate": 5.61193309804417e-06, "loss": 0.8137, "step": 36495 }, { "epoch": 1.54, "grad_norm": 4.336584212915461, "learning_rate": 5.610711206423028e-06, "loss": 0.7739, "step": 36500 }, { "epoch": 1.54, "grad_norm": 6.234405442418801, "learning_rate": 5.6094892777762636e-06, "loss": 0.768, "step": 36505 }, { "epoch": 1.55, "grad_norm": 5.969296988930027, "learning_rate": 5.608267312177961e-06, "loss": 0.7673, "step": 36510 }, { "epoch": 1.55, "grad_norm": 5.214312746362722, "learning_rate": 5.607045309702201e-06, "loss": 0.7446, "step": 36515 }, { "epoch": 1.55, "grad_norm": 6.482227268583426, "learning_rate": 5.605823270423074e-06, "loss": 0.7361, "step": 36520 }, { "epoch": 1.55, "grad_norm": 4.960761888692762, "learning_rate": 5.604601194414666e-06, "loss": 0.7529, "step": 36525 }, { "epoch": 1.55, "grad_norm": 5.510615187036223, "learning_rate": 5.603379081751067e-06, "loss": 0.7641, "step": 36530 }, { "epoch": 1.55, "grad_norm": 4.309386769968196, "learning_rate": 5.602156932506373e-06, "loss": 0.7817, "step": 36535 }, { "epoch": 1.55, "grad_norm": 11.82336960305453, "learning_rate": 5.600934746754677e-06, "loss": 0.7117, "step": 36540 }, { "epoch": 1.55, "grad_norm": 14.332505091778648, "learning_rate": 5.599712524570079e-06, "loss": 0.7448, "step": 36545 }, { "epoch": 1.55, "grad_norm": 5.589062503407123, "learning_rate": 5.598490266026677e-06, "loss": 0.772, "step": 36550 }, { "epoch": 1.55, "grad_norm": 13.912471931875253, "learning_rate": 5.597267971198571e-06, "loss": 0.7377, "step": 36555 }, { "epoch": 1.55, "grad_norm": 14.817308098016637, "learning_rate": 5.59604564015987e-06, "loss": 0.7899, "step": 36560 }, { "epoch": 1.55, "grad_norm": 4.177076279811699, "learning_rate": 5.594823272984675e-06, "loss": 0.718, "step": 36565 }, { "epoch": 1.55, "grad_norm": 11.832901420938317, "learning_rate": 5.593600869747099e-06, "loss": 0.7437, "step": 36570 }, { "epoch": 1.55, "grad_norm": 19.19514347981489, "learning_rate": 5.59237843052125e-06, "loss": 0.7732, "step": 36575 }, { "epoch": 1.55, "grad_norm": 4.912274216084402, "learning_rate": 5.591155955381241e-06, "loss": 0.7584, "step": 36580 }, { "epoch": 1.55, "grad_norm": 12.474081367444263, "learning_rate": 5.58993344440119e-06, "loss": 0.764, "step": 36585 }, { "epoch": 1.55, "grad_norm": 16.75594749942165, "learning_rate": 5.588710897655209e-06, "loss": 0.7556, "step": 36590 }, { "epoch": 1.55, "grad_norm": 5.164864774329933, "learning_rate": 5.587488315217422e-06, "loss": 0.7189, "step": 36595 }, { "epoch": 1.55, "grad_norm": 4.266079169083353, "learning_rate": 5.586265697161947e-06, "loss": 0.7449, "step": 36600 }, { "epoch": 1.55, "grad_norm": 5.495028947158499, "learning_rate": 5.585043043562913e-06, "loss": 0.7374, "step": 36605 }, { "epoch": 1.55, "grad_norm": 4.467384448965979, "learning_rate": 5.583820354494442e-06, "loss": 0.7627, "step": 36610 }, { "epoch": 1.55, "grad_norm": 7.9600212350243575, "learning_rate": 5.58259763003066e-06, "loss": 0.7467, "step": 36615 }, { "epoch": 1.55, "grad_norm": 5.85516884370735, "learning_rate": 5.5813748702457015e-06, "loss": 0.7847, "step": 36620 }, { "epoch": 1.55, "grad_norm": 7.186771345561652, "learning_rate": 5.5801520752136965e-06, "loss": 0.768, "step": 36625 }, { "epoch": 1.55, "grad_norm": 6.264683175783478, "learning_rate": 5.5789292450087805e-06, "loss": 0.7426, "step": 36630 }, { "epoch": 1.55, "grad_norm": 6.09136167291706, "learning_rate": 5.57770637970509e-06, "loss": 0.7527, "step": 36635 }, { "epoch": 1.55, "grad_norm": 4.480207582642863, "learning_rate": 5.576483479376763e-06, "loss": 0.7782, "step": 36640 }, { "epoch": 1.55, "grad_norm": 9.232047433146006, "learning_rate": 5.575260544097941e-06, "loss": 0.7613, "step": 36645 }, { "epoch": 1.55, "grad_norm": 4.783226481209886, "learning_rate": 5.5740375739427665e-06, "loss": 0.7327, "step": 36650 }, { "epoch": 1.55, "grad_norm": 6.440824695582905, "learning_rate": 5.572814568985385e-06, "loss": 0.751, "step": 36655 }, { "epoch": 1.55, "grad_norm": 4.587148485966971, "learning_rate": 5.571591529299942e-06, "loss": 0.7349, "step": 36660 }, { "epoch": 1.55, "grad_norm": 11.529727946081056, "learning_rate": 5.570368454960591e-06, "loss": 0.7332, "step": 36665 }, { "epoch": 1.55, "grad_norm": 12.257924553311978, "learning_rate": 5.569145346041479e-06, "loss": 0.7478, "step": 36670 }, { "epoch": 1.55, "grad_norm": 5.019248582223086, "learning_rate": 5.5679222026167615e-06, "loss": 0.7672, "step": 36675 }, { "epoch": 1.55, "grad_norm": 4.803416178103751, "learning_rate": 5.5666990247605935e-06, "loss": 0.7472, "step": 36680 }, { "epoch": 1.55, "grad_norm": 4.144868083154078, "learning_rate": 5.565475812547133e-06, "loss": 0.7656, "step": 36685 }, { "epoch": 1.55, "grad_norm": 5.718405158032616, "learning_rate": 5.5642525660505386e-06, "loss": 0.7811, "step": 36690 }, { "epoch": 1.55, "grad_norm": 7.249719790407091, "learning_rate": 5.5630292853449755e-06, "loss": 0.7625, "step": 36695 }, { "epoch": 1.55, "grad_norm": 5.875057832942347, "learning_rate": 5.561805970504602e-06, "loss": 0.7669, "step": 36700 }, { "epoch": 1.55, "grad_norm": 9.412357271744503, "learning_rate": 5.56058262160359e-06, "loss": 0.7546, "step": 36705 }, { "epoch": 1.55, "grad_norm": 8.040148220149561, "learning_rate": 5.559359238716105e-06, "loss": 0.7552, "step": 36710 }, { "epoch": 1.55, "grad_norm": 4.620364459755838, "learning_rate": 5.558135821916315e-06, "loss": 0.7821, "step": 36715 }, { "epoch": 1.55, "grad_norm": 4.3094381143222495, "learning_rate": 5.556912371278395e-06, "loss": 0.7616, "step": 36720 }, { "epoch": 1.55, "grad_norm": 6.2167938283736275, "learning_rate": 5.555688886876519e-06, "loss": 0.7925, "step": 36725 }, { "epoch": 1.55, "grad_norm": 5.520388479915492, "learning_rate": 5.554465368784864e-06, "loss": 0.7506, "step": 36730 }, { "epoch": 1.55, "grad_norm": 8.494528871130239, "learning_rate": 5.5532418170776035e-06, "loss": 0.7851, "step": 36735 }, { "epoch": 1.55, "grad_norm": 5.572486231956009, "learning_rate": 5.552018231828924e-06, "loss": 0.7661, "step": 36740 }, { "epoch": 1.56, "grad_norm": 4.780519966654631, "learning_rate": 5.5507946131130045e-06, "loss": 0.726, "step": 36745 }, { "epoch": 1.56, "grad_norm": 13.976860510585473, "learning_rate": 5.5495709610040295e-06, "loss": 0.7215, "step": 36750 }, { "epoch": 1.56, "grad_norm": 6.19871886315069, "learning_rate": 5.548347275576186e-06, "loss": 0.776, "step": 36755 }, { "epoch": 1.56, "grad_norm": 5.004589988484238, "learning_rate": 5.547123556903662e-06, "loss": 0.779, "step": 36760 }, { "epoch": 1.56, "grad_norm": 7.277347717713167, "learning_rate": 5.545899805060649e-06, "loss": 0.781, "step": 36765 }, { "epoch": 1.56, "grad_norm": 5.56578136290133, "learning_rate": 5.544676020121339e-06, "loss": 0.7548, "step": 36770 }, { "epoch": 1.56, "grad_norm": 6.48614619135727, "learning_rate": 5.543452202159926e-06, "loss": 0.7486, "step": 36775 }, { "epoch": 1.56, "grad_norm": 8.830718026235273, "learning_rate": 5.542228351250607e-06, "loss": 0.7879, "step": 36780 }, { "epoch": 1.56, "grad_norm": 6.312951263409624, "learning_rate": 5.541004467467579e-06, "loss": 0.7459, "step": 36785 }, { "epoch": 1.56, "grad_norm": 22.77551179356512, "learning_rate": 5.539780550885046e-06, "loss": 0.7134, "step": 36790 }, { "epoch": 1.56, "grad_norm": 14.304148161836249, "learning_rate": 5.538556601577207e-06, "loss": 0.7481, "step": 36795 }, { "epoch": 1.56, "grad_norm": 13.651924047565467, "learning_rate": 5.5373326196182665e-06, "loss": 0.7607, "step": 36800 }, { "epoch": 1.56, "grad_norm": 15.777224903436965, "learning_rate": 5.536108605082433e-06, "loss": 0.7454, "step": 36805 }, { "epoch": 1.56, "grad_norm": 7.568584327788012, "learning_rate": 5.534884558043915e-06, "loss": 0.7162, "step": 36810 }, { "epoch": 1.56, "grad_norm": 8.328506333587683, "learning_rate": 5.533660478576921e-06, "loss": 0.7083, "step": 36815 }, { "epoch": 1.56, "grad_norm": 7.341493420058843, "learning_rate": 5.532436366755662e-06, "loss": 0.7264, "step": 36820 }, { "epoch": 1.56, "grad_norm": 9.104313499610381, "learning_rate": 5.531212222654358e-06, "loss": 0.7818, "step": 36825 }, { "epoch": 1.56, "grad_norm": 27.67038211777149, "learning_rate": 5.52998804634722e-06, "loss": 0.7663, "step": 36830 }, { "epoch": 1.56, "grad_norm": 8.19357400529888, "learning_rate": 5.528763837908468e-06, "loss": 0.7296, "step": 36835 }, { "epoch": 1.56, "grad_norm": 5.478316491759568, "learning_rate": 5.527539597412322e-06, "loss": 0.7805, "step": 36840 }, { "epoch": 1.56, "grad_norm": 24.447764618212663, "learning_rate": 5.526315324933003e-06, "loss": 0.7881, "step": 36845 }, { "epoch": 1.56, "grad_norm": 6.4875036111420465, "learning_rate": 5.525091020544737e-06, "loss": 0.7885, "step": 36850 }, { "epoch": 1.56, "grad_norm": 12.387838188757721, "learning_rate": 5.52386668432175e-06, "loss": 0.7267, "step": 36855 }, { "epoch": 1.56, "grad_norm": 6.339878239996746, "learning_rate": 5.522642316338268e-06, "loss": 0.7527, "step": 36860 }, { "epoch": 1.56, "grad_norm": 5.707450650792179, "learning_rate": 5.521417916668522e-06, "loss": 0.7607, "step": 36865 }, { "epoch": 1.56, "grad_norm": 11.741998808300487, "learning_rate": 5.5201934853867435e-06, "loss": 0.7716, "step": 36870 }, { "epoch": 1.56, "grad_norm": 9.698326350076073, "learning_rate": 5.518969022567167e-06, "loss": 0.7573, "step": 36875 }, { "epoch": 1.56, "grad_norm": 6.803692543465708, "learning_rate": 5.5177445282840265e-06, "loss": 0.7371, "step": 36880 }, { "epoch": 1.56, "grad_norm": 4.3719268098684, "learning_rate": 5.51652000261156e-06, "loss": 0.7564, "step": 36885 }, { "epoch": 1.56, "grad_norm": 12.196675314629372, "learning_rate": 5.515295445624011e-06, "loss": 0.7474, "step": 36890 }, { "epoch": 1.56, "grad_norm": 5.713126578903906, "learning_rate": 5.514070857395613e-06, "loss": 0.7905, "step": 36895 }, { "epoch": 1.56, "grad_norm": 6.740431084470808, "learning_rate": 5.512846238000614e-06, "loss": 0.7316, "step": 36900 }, { "epoch": 1.56, "grad_norm": 7.853639860388595, "learning_rate": 5.5116215875132574e-06, "loss": 0.7161, "step": 36905 }, { "epoch": 1.56, "grad_norm": 4.530670146470907, "learning_rate": 5.510396906007792e-06, "loss": 0.7529, "step": 36910 }, { "epoch": 1.56, "grad_norm": 6.035015143263146, "learning_rate": 5.5091721935584666e-06, "loss": 0.7692, "step": 36915 }, { "epoch": 1.56, "grad_norm": 4.147999387556059, "learning_rate": 5.507947450239529e-06, "loss": 0.7392, "step": 36920 }, { "epoch": 1.56, "grad_norm": 5.784650884013642, "learning_rate": 5.5067226761252345e-06, "loss": 0.8287, "step": 36925 }, { "epoch": 1.56, "grad_norm": 4.587609517583636, "learning_rate": 5.5054978712898365e-06, "loss": 0.7384, "step": 36930 }, { "epoch": 1.56, "grad_norm": 4.126016071152624, "learning_rate": 5.504273035807592e-06, "loss": 0.7464, "step": 36935 }, { "epoch": 1.56, "grad_norm": 4.173315600431369, "learning_rate": 5.503048169752759e-06, "loss": 0.7107, "step": 36940 }, { "epoch": 1.56, "grad_norm": 7.086160944977605, "learning_rate": 5.501823273199596e-06, "loss": 0.7543, "step": 36945 }, { "epoch": 1.56, "grad_norm": 7.736721827131469, "learning_rate": 5.500598346222367e-06, "loss": 0.7332, "step": 36950 }, { "epoch": 1.56, "grad_norm": 8.901911838464171, "learning_rate": 5.499373388895335e-06, "loss": 0.7431, "step": 36955 }, { "epoch": 1.56, "grad_norm": 4.381959441030425, "learning_rate": 5.498148401292765e-06, "loss": 0.7375, "step": 36960 }, { "epoch": 1.56, "grad_norm": 4.671123928888369, "learning_rate": 5.496923383488926e-06, "loss": 0.726, "step": 36965 }, { "epoch": 1.56, "grad_norm": 11.333592541522494, "learning_rate": 5.495698335558084e-06, "loss": 0.7822, "step": 36970 }, { "epoch": 1.56, "grad_norm": 7.22204678869541, "learning_rate": 5.494473257574514e-06, "loss": 0.742, "step": 36975 }, { "epoch": 1.57, "grad_norm": 5.527447785608108, "learning_rate": 5.493248149612485e-06, "loss": 0.7398, "step": 36980 }, { "epoch": 1.57, "grad_norm": 10.893179711971822, "learning_rate": 5.492023011746275e-06, "loss": 0.7549, "step": 36985 }, { "epoch": 1.57, "grad_norm": 5.715380391043272, "learning_rate": 5.490797844050159e-06, "loss": 0.7281, "step": 36990 }, { "epoch": 1.57, "grad_norm": 4.946908492612203, "learning_rate": 5.489572646598414e-06, "loss": 0.7548, "step": 36995 }, { "epoch": 1.57, "grad_norm": 5.647822114470655, "learning_rate": 5.488347419465323e-06, "loss": 0.7374, "step": 37000 }, { "epoch": 1.57, "grad_norm": 6.5237322997598035, "learning_rate": 5.4871221627251645e-06, "loss": 0.7341, "step": 37005 }, { "epoch": 1.57, "grad_norm": 5.367895384519147, "learning_rate": 5.485896876452227e-06, "loss": 0.7747, "step": 37010 }, { "epoch": 1.57, "grad_norm": 6.469921205848223, "learning_rate": 5.4846715607207915e-06, "loss": 0.7705, "step": 37015 }, { "epoch": 1.57, "grad_norm": 7.9152224973251855, "learning_rate": 5.483446215605146e-06, "loss": 0.7125, "step": 37020 }, { "epoch": 1.57, "grad_norm": 5.771546057557418, "learning_rate": 5.482220841179583e-06, "loss": 0.746, "step": 37025 }, { "epoch": 1.57, "grad_norm": 5.0988749714523465, "learning_rate": 5.480995437518388e-06, "loss": 0.7634, "step": 37030 }, { "epoch": 1.57, "grad_norm": 5.39238024456054, "learning_rate": 5.479770004695859e-06, "loss": 0.7688, "step": 37035 }, { "epoch": 1.57, "grad_norm": 4.7098113880967265, "learning_rate": 5.478544542786286e-06, "loss": 0.7485, "step": 37040 }, { "epoch": 1.57, "grad_norm": 8.175536373528734, "learning_rate": 5.4773190518639675e-06, "loss": 0.7435, "step": 37045 }, { "epoch": 1.57, "grad_norm": 5.443792539342895, "learning_rate": 5.476093532003202e-06, "loss": 0.7484, "step": 37050 }, { "epoch": 1.57, "grad_norm": 5.75881595350611, "learning_rate": 5.474867983278287e-06, "loss": 0.7591, "step": 37055 }, { "epoch": 1.57, "grad_norm": 9.582404820949536, "learning_rate": 5.473642405763525e-06, "loss": 0.7538, "step": 37060 }, { "epoch": 1.57, "grad_norm": 4.325441087532176, "learning_rate": 5.472416799533219e-06, "loss": 0.7678, "step": 37065 }, { "epoch": 1.57, "grad_norm": 6.721838023340378, "learning_rate": 5.471191164661674e-06, "loss": 0.7453, "step": 37070 }, { "epoch": 1.57, "grad_norm": 5.817735874791188, "learning_rate": 5.469965501223198e-06, "loss": 0.7452, "step": 37075 }, { "epoch": 1.57, "grad_norm": 5.6256209188353195, "learning_rate": 5.468739809292095e-06, "loss": 0.7714, "step": 37080 }, { "epoch": 1.57, "grad_norm": 5.894218377929878, "learning_rate": 5.46751408894268e-06, "loss": 0.7607, "step": 37085 }, { "epoch": 1.57, "grad_norm": 6.8640520559668845, "learning_rate": 5.4662883402492624e-06, "loss": 0.7284, "step": 37090 }, { "epoch": 1.57, "grad_norm": 9.93288853207604, "learning_rate": 5.465062563286157e-06, "loss": 0.7406, "step": 37095 }, { "epoch": 1.57, "grad_norm": 6.523399390325565, "learning_rate": 5.463836758127678e-06, "loss": 0.7243, "step": 37100 }, { "epoch": 1.57, "grad_norm": 4.58465293303299, "learning_rate": 5.462610924848142e-06, "loss": 0.7515, "step": 37105 }, { "epoch": 1.57, "grad_norm": 4.775018568474361, "learning_rate": 5.461385063521869e-06, "loss": 0.7374, "step": 37110 }, { "epoch": 1.57, "grad_norm": 4.105984588538315, "learning_rate": 5.460159174223177e-06, "loss": 0.8132, "step": 37115 }, { "epoch": 1.57, "grad_norm": 4.422627585951194, "learning_rate": 5.458933257026392e-06, "loss": 0.7376, "step": 37120 }, { "epoch": 1.57, "grad_norm": 10.439643363015225, "learning_rate": 5.457707312005836e-06, "loss": 0.7472, "step": 37125 }, { "epoch": 1.57, "grad_norm": 5.649858787735931, "learning_rate": 5.4564813392358304e-06, "loss": 0.7478, "step": 37130 }, { "epoch": 1.57, "grad_norm": 14.425469717219329, "learning_rate": 5.455255338790709e-06, "loss": 0.7333, "step": 37135 }, { "epoch": 1.57, "grad_norm": 5.942382375528479, "learning_rate": 5.4540293107447965e-06, "loss": 0.7238, "step": 37140 }, { "epoch": 1.57, "grad_norm": 7.59034038781036, "learning_rate": 5.452803255172424e-06, "loss": 0.775, "step": 37145 }, { "epoch": 1.57, "grad_norm": 7.586906620111466, "learning_rate": 5.4515771721479235e-06, "loss": 0.7326, "step": 37150 }, { "epoch": 1.57, "grad_norm": 8.000738088083427, "learning_rate": 5.45035106174563e-06, "loss": 0.7545, "step": 37155 }, { "epoch": 1.57, "grad_norm": 31.09949310373615, "learning_rate": 5.449124924039878e-06, "loss": 0.7566, "step": 37160 }, { "epoch": 1.57, "grad_norm": 20.697724272530564, "learning_rate": 5.4478987591050035e-06, "loss": 0.7572, "step": 37165 }, { "epoch": 1.57, "grad_norm": 4.9269519440933, "learning_rate": 5.446672567015348e-06, "loss": 0.7745, "step": 37170 }, { "epoch": 1.57, "grad_norm": 16.864229821019595, "learning_rate": 5.44544634784525e-06, "loss": 0.7429, "step": 37175 }, { "epoch": 1.57, "grad_norm": 5.417397221759656, "learning_rate": 5.444220101669053e-06, "loss": 0.744, "step": 37180 }, { "epoch": 1.57, "grad_norm": 10.43728825453933, "learning_rate": 5.4429938285610995e-06, "loss": 0.7111, "step": 37185 }, { "epoch": 1.57, "grad_norm": 14.265135331748061, "learning_rate": 5.441767528595734e-06, "loss": 0.7582, "step": 37190 }, { "epoch": 1.57, "grad_norm": 5.087609702952805, "learning_rate": 5.4405412018473045e-06, "loss": 0.7063, "step": 37195 }, { "epoch": 1.57, "grad_norm": 8.394425093210955, "learning_rate": 5.439314848390159e-06, "loss": 0.7444, "step": 37200 }, { "epoch": 1.57, "grad_norm": 4.776987664159522, "learning_rate": 5.43808846829865e-06, "loss": 0.7445, "step": 37205 }, { "epoch": 1.57, "grad_norm": 7.030360076516311, "learning_rate": 5.436862061647128e-06, "loss": 0.7669, "step": 37210 }, { "epoch": 1.58, "grad_norm": 5.602799442243262, "learning_rate": 5.435635628509945e-06, "loss": 0.7506, "step": 37215 }, { "epoch": 1.58, "grad_norm": 18.320943870537874, "learning_rate": 5.434409168961457e-06, "loss": 0.7607, "step": 37220 }, { "epoch": 1.58, "grad_norm": 9.269175158454024, "learning_rate": 5.43318268307602e-06, "loss": 0.7488, "step": 37225 }, { "epoch": 1.58, "grad_norm": 5.751540197825144, "learning_rate": 5.431956170927995e-06, "loss": 0.7432, "step": 37230 }, { "epoch": 1.58, "grad_norm": 21.256596588834853, "learning_rate": 5.430729632591738e-06, "loss": 0.7414, "step": 37235 }, { "epoch": 1.58, "grad_norm": 14.155499118333113, "learning_rate": 5.429503068141613e-06, "loss": 0.717, "step": 37240 }, { "epoch": 1.58, "grad_norm": 8.89397437103705, "learning_rate": 5.428276477651981e-06, "loss": 0.759, "step": 37245 }, { "epoch": 1.58, "grad_norm": 8.34298624571846, "learning_rate": 5.427049861197208e-06, "loss": 0.7348, "step": 37250 }, { "epoch": 1.58, "grad_norm": 5.327354589612307, "learning_rate": 5.42582321885166e-06, "loss": 0.7524, "step": 37255 }, { "epoch": 1.58, "grad_norm": 14.62395183877402, "learning_rate": 5.424596550689706e-06, "loss": 0.7811, "step": 37260 }, { "epoch": 1.58, "grad_norm": 4.100585618363656, "learning_rate": 5.4233698567857115e-06, "loss": 0.7523, "step": 37265 }, { "epoch": 1.58, "grad_norm": 14.928983039776778, "learning_rate": 5.4221431372140505e-06, "loss": 0.7466, "step": 37270 }, { "epoch": 1.58, "grad_norm": 4.214963999958787, "learning_rate": 5.420916392049095e-06, "loss": 0.7161, "step": 37275 }, { "epoch": 1.58, "grad_norm": 6.411810849723901, "learning_rate": 5.419689621365218e-06, "loss": 0.7612, "step": 37280 }, { "epoch": 1.58, "grad_norm": 4.983503185711556, "learning_rate": 5.4184628252367965e-06, "loss": 0.7681, "step": 37285 }, { "epoch": 1.58, "grad_norm": 5.21019927678597, "learning_rate": 5.417236003738204e-06, "loss": 0.7619, "step": 37290 }, { "epoch": 1.58, "grad_norm": 4.9375492392208455, "learning_rate": 5.416009156943825e-06, "loss": 0.7547, "step": 37295 }, { "epoch": 1.58, "grad_norm": 5.728254528343914, "learning_rate": 5.414782284928034e-06, "loss": 0.7768, "step": 37300 }, { "epoch": 1.58, "grad_norm": 4.356442019958166, "learning_rate": 5.413555387765216e-06, "loss": 0.7699, "step": 37305 }, { "epoch": 1.58, "grad_norm": 4.294594513123982, "learning_rate": 5.412328465529751e-06, "loss": 0.7526, "step": 37310 }, { "epoch": 1.58, "grad_norm": 4.501263970491084, "learning_rate": 5.411101518296028e-06, "loss": 0.7642, "step": 37315 }, { "epoch": 1.58, "grad_norm": 7.829606427474315, "learning_rate": 5.409874546138431e-06, "loss": 0.7622, "step": 37320 }, { "epoch": 1.58, "grad_norm": 6.19396541593531, "learning_rate": 5.408647549131345e-06, "loss": 0.741, "step": 37325 }, { "epoch": 1.58, "grad_norm": 6.632286932890838, "learning_rate": 5.4074205273491645e-06, "loss": 0.7923, "step": 37330 }, { "epoch": 1.58, "grad_norm": 5.121621608875013, "learning_rate": 5.406193480866275e-06, "loss": 0.698, "step": 37335 }, { "epoch": 1.58, "grad_norm": 4.427187293589908, "learning_rate": 5.404966409757073e-06, "loss": 0.7292, "step": 37340 }, { "epoch": 1.58, "grad_norm": 4.401604642840885, "learning_rate": 5.403739314095952e-06, "loss": 0.7845, "step": 37345 }, { "epoch": 1.58, "grad_norm": 4.640809259029113, "learning_rate": 5.402512193957302e-06, "loss": 0.7243, "step": 37350 }, { "epoch": 1.58, "grad_norm": 5.2337488170726845, "learning_rate": 5.401285049415525e-06, "loss": 0.7296, "step": 37355 }, { "epoch": 1.58, "grad_norm": 6.803299859630813, "learning_rate": 5.400057880545017e-06, "loss": 0.7251, "step": 37360 }, { "epoch": 1.58, "grad_norm": 4.263768078793284, "learning_rate": 5.398830687420179e-06, "loss": 0.7176, "step": 37365 }, { "epoch": 1.58, "grad_norm": 5.146789803540217, "learning_rate": 5.3976034701154115e-06, "loss": 0.7389, "step": 37370 }, { "epoch": 1.58, "grad_norm": 4.798472102905768, "learning_rate": 5.396376228705115e-06, "loss": 0.7902, "step": 37375 }, { "epoch": 1.58, "grad_norm": 5.965530444155731, "learning_rate": 5.395148963263699e-06, "loss": 0.7246, "step": 37380 }, { "epoch": 1.58, "grad_norm": 5.839877668421685, "learning_rate": 5.393921673865562e-06, "loss": 0.7497, "step": 37385 }, { "epoch": 1.58, "grad_norm": 5.227939821119902, "learning_rate": 5.392694360585114e-06, "loss": 0.7651, "step": 37390 }, { "epoch": 1.58, "grad_norm": 5.859012737287445, "learning_rate": 5.391467023496765e-06, "loss": 0.7445, "step": 37395 }, { "epoch": 1.58, "grad_norm": 5.856082471208197, "learning_rate": 5.390239662674923e-06, "loss": 0.7155, "step": 37400 }, { "epoch": 1.58, "grad_norm": 4.315321003108104, "learning_rate": 5.3890122781940005e-06, "loss": 0.7747, "step": 37405 }, { "epoch": 1.58, "grad_norm": 5.083693089221626, "learning_rate": 5.387784870128408e-06, "loss": 0.7484, "step": 37410 }, { "epoch": 1.58, "grad_norm": 14.834778880823702, "learning_rate": 5.386557438552562e-06, "loss": 0.7513, "step": 37415 }, { "epoch": 1.58, "grad_norm": 4.547063559609413, "learning_rate": 5.385329983540877e-06, "loss": 0.7601, "step": 37420 }, { "epoch": 1.58, "grad_norm": 7.828351467269713, "learning_rate": 5.384102505167771e-06, "loss": 0.7441, "step": 37425 }, { "epoch": 1.58, "grad_norm": 4.991494391598989, "learning_rate": 5.3828750035076605e-06, "loss": 0.7515, "step": 37430 }, { "epoch": 1.58, "grad_norm": 10.756184923926918, "learning_rate": 5.381647478634966e-06, "loss": 0.755, "step": 37435 }, { "epoch": 1.58, "grad_norm": 9.01039073193324, "learning_rate": 5.38041993062411e-06, "loss": 0.7342, "step": 37440 }, { "epoch": 1.58, "grad_norm": 6.005893322049624, "learning_rate": 5.3791923595495135e-06, "loss": 0.7385, "step": 37445 }, { "epoch": 1.58, "grad_norm": 9.237466588598005, "learning_rate": 5.3779647654856014e-06, "loss": 0.7664, "step": 37450 }, { "epoch": 1.59, "grad_norm": 5.375668314384845, "learning_rate": 5.376737148506799e-06, "loss": 0.7058, "step": 37455 }, { "epoch": 1.59, "grad_norm": 10.558184279955777, "learning_rate": 5.375509508687533e-06, "loss": 0.7285, "step": 37460 }, { "epoch": 1.59, "grad_norm": 4.542658679631809, "learning_rate": 5.374281846102232e-06, "loss": 0.7671, "step": 37465 }, { "epoch": 1.59, "grad_norm": 7.824039967567152, "learning_rate": 5.373054160825324e-06, "loss": 0.7366, "step": 37470 }, { "epoch": 1.59, "grad_norm": 7.806334703708295, "learning_rate": 5.371826452931242e-06, "loss": 0.7619, "step": 37475 }, { "epoch": 1.59, "grad_norm": 6.135966422980176, "learning_rate": 5.370598722494417e-06, "loss": 0.7057, "step": 37480 }, { "epoch": 1.59, "grad_norm": 13.427610602060648, "learning_rate": 5.369370969589284e-06, "loss": 0.7456, "step": 37485 }, { "epoch": 1.59, "grad_norm": 7.066176785301034, "learning_rate": 5.368143194290277e-06, "loss": 0.736, "step": 37490 }, { "epoch": 1.59, "grad_norm": 7.639154191180423, "learning_rate": 5.36691539667183e-06, "loss": 0.7467, "step": 37495 }, { "epoch": 1.59, "grad_norm": 7.649433836434678, "learning_rate": 5.365687576808386e-06, "loss": 0.7781, "step": 37500 }, { "epoch": 1.59, "grad_norm": 5.631794120537659, "learning_rate": 5.36445973477438e-06, "loss": 0.7618, "step": 37505 }, { "epoch": 1.59, "grad_norm": 4.518038653978764, "learning_rate": 5.3632318706442545e-06, "loss": 0.7772, "step": 37510 }, { "epoch": 1.59, "grad_norm": 5.702757138903261, "learning_rate": 5.362003984492451e-06, "loss": 0.7681, "step": 37515 }, { "epoch": 1.59, "grad_norm": 6.053473663573236, "learning_rate": 5.360776076393412e-06, "loss": 0.752, "step": 37520 }, { "epoch": 1.59, "grad_norm": 4.276327836729093, "learning_rate": 5.359548146421582e-06, "loss": 0.7582, "step": 37525 }, { "epoch": 1.59, "grad_norm": 4.118716133271822, "learning_rate": 5.358320194651405e-06, "loss": 0.7592, "step": 37530 }, { "epoch": 1.59, "grad_norm": 5.528297926027103, "learning_rate": 5.3570922211573325e-06, "loss": 0.6974, "step": 37535 }, { "epoch": 1.59, "grad_norm": 7.593812938615475, "learning_rate": 5.35586422601381e-06, "loss": 0.8095, "step": 37540 }, { "epoch": 1.59, "grad_norm": 6.8775369696974495, "learning_rate": 5.354636209295288e-06, "loss": 0.758, "step": 37545 }, { "epoch": 1.59, "grad_norm": 7.233224925054401, "learning_rate": 5.353408171076219e-06, "loss": 0.7124, "step": 37550 }, { "epoch": 1.59, "grad_norm": 7.077809648511176, "learning_rate": 5.35218011143105e-06, "loss": 0.7564, "step": 37555 }, { "epoch": 1.59, "grad_norm": 4.656496044516427, "learning_rate": 5.35095203043424e-06, "loss": 0.7514, "step": 37560 }, { "epoch": 1.59, "grad_norm": 4.964532200088741, "learning_rate": 5.349723928160243e-06, "loss": 0.7686, "step": 37565 }, { "epoch": 1.59, "grad_norm": 4.785002680947919, "learning_rate": 5.3484958046835125e-06, "loss": 0.7352, "step": 37570 }, { "epoch": 1.59, "grad_norm": 4.886805250032847, "learning_rate": 5.3472676600785086e-06, "loss": 0.7455, "step": 37575 }, { "epoch": 1.59, "grad_norm": 5.064987162246553, "learning_rate": 5.346039494419689e-06, "loss": 0.7331, "step": 37580 }, { "epoch": 1.59, "grad_norm": 4.424624370194576, "learning_rate": 5.344811307781515e-06, "loss": 0.7529, "step": 37585 }, { "epoch": 1.59, "grad_norm": 9.374170531821699, "learning_rate": 5.3435831002384475e-06, "loss": 0.7376, "step": 37590 }, { "epoch": 1.59, "grad_norm": 3.9041495239232895, "learning_rate": 5.342354871864946e-06, "loss": 0.7465, "step": 37595 }, { "epoch": 1.59, "grad_norm": 4.803163403239657, "learning_rate": 5.341126622735479e-06, "loss": 0.7339, "step": 37600 }, { "epoch": 1.59, "grad_norm": 4.480521533068638, "learning_rate": 5.33989835292451e-06, "loss": 0.7652, "step": 37605 }, { "epoch": 1.59, "grad_norm": 5.4214863303846785, "learning_rate": 5.338670062506505e-06, "loss": 0.7484, "step": 37610 }, { "epoch": 1.59, "grad_norm": 6.586590087923795, "learning_rate": 5.337441751555932e-06, "loss": 0.7517, "step": 37615 }, { "epoch": 1.59, "grad_norm": 7.463360755259694, "learning_rate": 5.336213420147259e-06, "loss": 0.7199, "step": 37620 }, { "epoch": 1.59, "grad_norm": 8.475288399969703, "learning_rate": 5.334985068354958e-06, "loss": 0.7463, "step": 37625 }, { "epoch": 1.59, "grad_norm": 5.9987082821957, "learning_rate": 5.333756696253496e-06, "loss": 0.762, "step": 37630 }, { "epoch": 1.59, "grad_norm": 6.162163834297545, "learning_rate": 5.33252830391735e-06, "loss": 0.7473, "step": 37635 }, { "epoch": 1.59, "grad_norm": 4.872435093013567, "learning_rate": 5.331299891420992e-06, "loss": 0.7027, "step": 37640 }, { "epoch": 1.59, "grad_norm": 6.542212780383935, "learning_rate": 5.330071458838899e-06, "loss": 0.7287, "step": 37645 }, { "epoch": 1.59, "grad_norm": 6.562253703132565, "learning_rate": 5.328843006245546e-06, "loss": 0.7151, "step": 37650 }, { "epoch": 1.59, "grad_norm": 4.895724675905981, "learning_rate": 5.327614533715408e-06, "loss": 0.7159, "step": 37655 }, { "epoch": 1.59, "grad_norm": 14.201465476356836, "learning_rate": 5.326386041322967e-06, "loss": 0.7442, "step": 37660 }, { "epoch": 1.59, "grad_norm": 9.161162559764266, "learning_rate": 5.325157529142701e-06, "loss": 0.7712, "step": 37665 }, { "epoch": 1.59, "grad_norm": 8.527358948463428, "learning_rate": 5.323928997249093e-06, "loss": 0.7303, "step": 37670 }, { "epoch": 1.59, "grad_norm": 6.689005078888132, "learning_rate": 5.322700445716624e-06, "loss": 0.7431, "step": 37675 }, { "epoch": 1.59, "grad_norm": 5.699739772856369, "learning_rate": 5.321471874619776e-06, "loss": 0.7229, "step": 37680 }, { "epoch": 1.59, "grad_norm": 6.214568447706823, "learning_rate": 5.320243284033036e-06, "loss": 0.7247, "step": 37685 }, { "epoch": 1.6, "grad_norm": 9.025205590394776, "learning_rate": 5.319014674030889e-06, "loss": 0.7509, "step": 37690 }, { "epoch": 1.6, "grad_norm": 5.561513320039655, "learning_rate": 5.317786044687823e-06, "loss": 0.7597, "step": 37695 }, { "epoch": 1.6, "grad_norm": 8.157538752685827, "learning_rate": 5.316557396078324e-06, "loss": 0.7117, "step": 37700 }, { "epoch": 1.6, "grad_norm": 4.23669726869206, "learning_rate": 5.315328728276884e-06, "loss": 0.7442, "step": 37705 }, { "epoch": 1.6, "grad_norm": 8.548821424513452, "learning_rate": 5.314100041357991e-06, "loss": 0.7472, "step": 37710 }, { "epoch": 1.6, "grad_norm": 5.436883503772997, "learning_rate": 5.312871335396137e-06, "loss": 0.7304, "step": 37715 }, { "epoch": 1.6, "grad_norm": 7.174153402921207, "learning_rate": 5.311642610465816e-06, "loss": 0.7974, "step": 37720 }, { "epoch": 1.6, "grad_norm": 7.523995783694644, "learning_rate": 5.310413866641522e-06, "loss": 0.7303, "step": 37725 }, { "epoch": 1.6, "grad_norm": 4.36651402925244, "learning_rate": 5.3091851039977495e-06, "loss": 0.7384, "step": 37730 }, { "epoch": 1.6, "grad_norm": 4.813853312993525, "learning_rate": 5.307956322608995e-06, "loss": 0.7347, "step": 37735 }, { "epoch": 1.6, "grad_norm": 5.816087668737082, "learning_rate": 5.306727522549754e-06, "loss": 0.7304, "step": 37740 }, { "epoch": 1.6, "grad_norm": 4.965818504706991, "learning_rate": 5.305498703894528e-06, "loss": 0.7455, "step": 37745 }, { "epoch": 1.6, "grad_norm": 10.184874659636407, "learning_rate": 5.304269866717816e-06, "loss": 0.746, "step": 37750 }, { "epoch": 1.6, "grad_norm": 4.236274334477789, "learning_rate": 5.303041011094118e-06, "loss": 0.6714, "step": 37755 }, { "epoch": 1.6, "grad_norm": 6.5494943414679945, "learning_rate": 5.301812137097936e-06, "loss": 0.7418, "step": 37760 }, { "epoch": 1.6, "grad_norm": 5.042245990822691, "learning_rate": 5.300583244803773e-06, "loss": 0.7423, "step": 37765 }, { "epoch": 1.6, "grad_norm": 7.585001465218098, "learning_rate": 5.299354334286133e-06, "loss": 0.7042, "step": 37770 }, { "epoch": 1.6, "grad_norm": 7.2537654455744125, "learning_rate": 5.298125405619522e-06, "loss": 0.7723, "step": 37775 }, { "epoch": 1.6, "grad_norm": 8.937966823473857, "learning_rate": 5.296896458878446e-06, "loss": 0.7572, "step": 37780 }, { "epoch": 1.6, "grad_norm": 7.4112664654020355, "learning_rate": 5.295667494137412e-06, "loss": 0.7479, "step": 37785 }, { "epoch": 1.6, "grad_norm": 10.217407720490653, "learning_rate": 5.294438511470929e-06, "loss": 0.71, "step": 37790 }, { "epoch": 1.6, "grad_norm": 4.196651159654953, "learning_rate": 5.293209510953506e-06, "loss": 0.7353, "step": 37795 }, { "epoch": 1.6, "grad_norm": 4.876056922498158, "learning_rate": 5.291980492659654e-06, "loss": 0.7797, "step": 37800 }, { "epoch": 1.6, "grad_norm": 5.355027706172651, "learning_rate": 5.290751456663885e-06, "loss": 0.7383, "step": 37805 }, { "epoch": 1.6, "grad_norm": 4.444181007814591, "learning_rate": 5.289522403040711e-06, "loss": 0.744, "step": 37810 }, { "epoch": 1.6, "grad_norm": 8.862747358515827, "learning_rate": 5.288293331864648e-06, "loss": 0.7491, "step": 37815 }, { "epoch": 1.6, "grad_norm": 5.436880312505482, "learning_rate": 5.28706424321021e-06, "loss": 0.7601, "step": 37820 }, { "epoch": 1.6, "grad_norm": 5.418785045296565, "learning_rate": 5.285835137151909e-06, "loss": 0.7405, "step": 37825 }, { "epoch": 1.6, "grad_norm": 4.466243589612315, "learning_rate": 5.284606013764268e-06, "loss": 0.7705, "step": 37830 }, { "epoch": 1.6, "grad_norm": 13.006736163739006, "learning_rate": 5.283376873121803e-06, "loss": 0.7416, "step": 37835 }, { "epoch": 1.6, "grad_norm": 10.5033396248192, "learning_rate": 5.282147715299031e-06, "loss": 0.7239, "step": 37840 }, { "epoch": 1.6, "grad_norm": 7.224224050082206, "learning_rate": 5.280918540370477e-06, "loss": 0.726, "step": 37845 }, { "epoch": 1.6, "grad_norm": 6.916785371914052, "learning_rate": 5.279689348410656e-06, "loss": 0.791, "step": 37850 }, { "epoch": 1.6, "grad_norm": 4.9549484259295795, "learning_rate": 5.278460139494096e-06, "loss": 0.7199, "step": 37855 }, { "epoch": 1.6, "grad_norm": 5.5292756530216876, "learning_rate": 5.277230913695317e-06, "loss": 0.7538, "step": 37860 }, { "epoch": 1.6, "grad_norm": 11.714822515629121, "learning_rate": 5.276001671088845e-06, "loss": 0.7411, "step": 37865 }, { "epoch": 1.6, "grad_norm": 11.6102897602213, "learning_rate": 5.274772411749205e-06, "loss": 0.7295, "step": 37870 }, { "epoch": 1.6, "grad_norm": 6.244398201443841, "learning_rate": 5.273543135750922e-06, "loss": 0.74, "step": 37875 }, { "epoch": 1.6, "grad_norm": 5.096831706126591, "learning_rate": 5.272313843168525e-06, "loss": 0.734, "step": 37880 }, { "epoch": 1.6, "grad_norm": 7.926655074334417, "learning_rate": 5.2710845340765415e-06, "loss": 0.7346, "step": 37885 }, { "epoch": 1.6, "grad_norm": 7.488196705088085, "learning_rate": 5.269855208549501e-06, "loss": 0.7391, "step": 37890 }, { "epoch": 1.6, "grad_norm": 4.736495404687314, "learning_rate": 5.268625866661935e-06, "loss": 0.7561, "step": 37895 }, { "epoch": 1.6, "grad_norm": 7.497778266473555, "learning_rate": 5.267396508488372e-06, "loss": 0.7553, "step": 37900 }, { "epoch": 1.6, "grad_norm": 8.688799150542911, "learning_rate": 5.266167134103349e-06, "loss": 0.7475, "step": 37905 }, { "epoch": 1.6, "grad_norm": 12.46596615943758, "learning_rate": 5.2649377435813945e-06, "loss": 0.7182, "step": 37910 }, { "epoch": 1.6, "grad_norm": 6.875630781382229, "learning_rate": 5.2637083369970466e-06, "loss": 0.7361, "step": 37915 }, { "epoch": 1.6, "grad_norm": 4.314581377171161, "learning_rate": 5.262478914424839e-06, "loss": 0.7261, "step": 37920 }, { "epoch": 1.61, "grad_norm": 8.905769432746213, "learning_rate": 5.261249475939306e-06, "loss": 0.7296, "step": 37925 }, { "epoch": 1.61, "grad_norm": 14.423302720315911, "learning_rate": 5.260020021614988e-06, "loss": 0.7278, "step": 37930 }, { "epoch": 1.61, "grad_norm": 7.698815658091996, "learning_rate": 5.258790551526421e-06, "loss": 0.7097, "step": 37935 }, { "epoch": 1.61, "grad_norm": 6.871909615106895, "learning_rate": 5.257561065748148e-06, "loss": 0.7324, "step": 37940 }, { "epoch": 1.61, "grad_norm": 5.425972072814886, "learning_rate": 5.256331564354704e-06, "loss": 0.74, "step": 37945 }, { "epoch": 1.61, "grad_norm": 7.945815598118916, "learning_rate": 5.255102047420633e-06, "loss": 0.7702, "step": 37950 }, { "epoch": 1.61, "grad_norm": 4.806838144095054, "learning_rate": 5.253872515020477e-06, "loss": 0.7387, "step": 37955 }, { "epoch": 1.61, "grad_norm": 17.825496731017804, "learning_rate": 5.252642967228777e-06, "loss": 0.7324, "step": 37960 }, { "epoch": 1.61, "grad_norm": 6.515405220851127, "learning_rate": 5.251413404120078e-06, "loss": 0.7181, "step": 37965 }, { "epoch": 1.61, "grad_norm": 5.861932318933985, "learning_rate": 5.250183825768925e-06, "loss": 0.7565, "step": 37970 }, { "epoch": 1.61, "grad_norm": 9.292486917293608, "learning_rate": 5.248954232249865e-06, "loss": 0.7045, "step": 37975 }, { "epoch": 1.61, "grad_norm": 5.0995285907541925, "learning_rate": 5.247724623637441e-06, "loss": 0.7475, "step": 37980 }, { "epoch": 1.61, "grad_norm": 15.261729559612967, "learning_rate": 5.246495000006205e-06, "loss": 0.725, "step": 37985 }, { "epoch": 1.61, "grad_norm": 6.23125595983438, "learning_rate": 5.2452653614307014e-06, "loss": 0.7076, "step": 37990 }, { "epoch": 1.61, "grad_norm": 5.050972318389208, "learning_rate": 5.244035707985481e-06, "loss": 0.7425, "step": 37995 }, { "epoch": 1.61, "grad_norm": 4.688104495775632, "learning_rate": 5.242806039745096e-06, "loss": 0.7494, "step": 38000 }, { "epoch": 1.61, "grad_norm": 8.208776026998654, "learning_rate": 5.241576356784096e-06, "loss": 0.7249, "step": 38005 }, { "epoch": 1.61, "grad_norm": 10.536042298761876, "learning_rate": 5.240346659177033e-06, "loss": 0.7233, "step": 38010 }, { "epoch": 1.61, "grad_norm": 4.1538416592362335, "learning_rate": 5.239116946998459e-06, "loss": 0.7711, "step": 38015 }, { "epoch": 1.61, "grad_norm": 5.96920591635588, "learning_rate": 5.237887220322929e-06, "loss": 0.7099, "step": 38020 }, { "epoch": 1.61, "grad_norm": 4.265050538591462, "learning_rate": 5.236657479224999e-06, "loss": 0.7468, "step": 38025 }, { "epoch": 1.61, "grad_norm": 9.388958314077751, "learning_rate": 5.235427723779222e-06, "loss": 0.7112, "step": 38030 }, { "epoch": 1.61, "grad_norm": 9.517372700063806, "learning_rate": 5.234197954060156e-06, "loss": 0.7333, "step": 38035 }, { "epoch": 1.61, "grad_norm": 8.324392253768671, "learning_rate": 5.2329681701423575e-06, "loss": 0.7537, "step": 38040 }, { "epoch": 1.61, "grad_norm": 10.09455151632928, "learning_rate": 5.2317383721003845e-06, "loss": 0.7797, "step": 38045 }, { "epoch": 1.61, "grad_norm": 9.428254444130227, "learning_rate": 5.230508560008798e-06, "loss": 0.7672, "step": 38050 }, { "epoch": 1.61, "grad_norm": 5.996490313842462, "learning_rate": 5.229278733942155e-06, "loss": 0.7222, "step": 38055 }, { "epoch": 1.61, "grad_norm": 7.948580857582867, "learning_rate": 5.22804889397502e-06, "loss": 0.7251, "step": 38060 }, { "epoch": 1.61, "grad_norm": 5.494819388927259, "learning_rate": 5.22681904018195e-06, "loss": 0.734, "step": 38065 }, { "epoch": 1.61, "grad_norm": 4.188351150721062, "learning_rate": 5.22558917263751e-06, "loss": 0.7164, "step": 38070 }, { "epoch": 1.61, "grad_norm": 4.775442334496566, "learning_rate": 5.224359291416264e-06, "loss": 0.7328, "step": 38075 }, { "epoch": 1.61, "grad_norm": 6.951136218643271, "learning_rate": 5.223129396592774e-06, "loss": 0.7173, "step": 38080 }, { "epoch": 1.61, "grad_norm": 5.233586879665359, "learning_rate": 5.221899488241607e-06, "loss": 0.7087, "step": 38085 }, { "epoch": 1.61, "grad_norm": 5.050689931576329, "learning_rate": 5.220669566437327e-06, "loss": 0.7023, "step": 38090 }, { "epoch": 1.61, "grad_norm": 11.441282611896956, "learning_rate": 5.2194396312545e-06, "loss": 0.7673, "step": 38095 }, { "epoch": 1.61, "grad_norm": 4.890061726238572, "learning_rate": 5.218209682767698e-06, "loss": 0.75, "step": 38100 }, { "epoch": 1.61, "grad_norm": 5.671385988457021, "learning_rate": 5.2169797210514825e-06, "loss": 0.7202, "step": 38105 }, { "epoch": 1.61, "grad_norm": 7.690109653089311, "learning_rate": 5.215749746180427e-06, "loss": 0.7376, "step": 38110 }, { "epoch": 1.61, "grad_norm": 5.2641787584295825, "learning_rate": 5.214519758229099e-06, "loss": 0.724, "step": 38115 }, { "epoch": 1.61, "grad_norm": 14.614666364112239, "learning_rate": 5.213289757272071e-06, "loss": 0.7263, "step": 38120 }, { "epoch": 1.61, "grad_norm": 4.242702602137728, "learning_rate": 5.212059743383914e-06, "loss": 0.7284, "step": 38125 }, { "epoch": 1.61, "grad_norm": 4.148072719344014, "learning_rate": 5.210829716639197e-06, "loss": 0.8041, "step": 38130 }, { "epoch": 1.61, "grad_norm": 7.247284594665144, "learning_rate": 5.209599677112497e-06, "loss": 0.7507, "step": 38135 }, { "epoch": 1.61, "grad_norm": 7.098537713939876, "learning_rate": 5.208369624878386e-06, "loss": 0.7774, "step": 38140 }, { "epoch": 1.61, "grad_norm": 6.438648231081955, "learning_rate": 5.2071395600114375e-06, "loss": 0.7511, "step": 38145 }, { "epoch": 1.61, "grad_norm": 4.268866208328094, "learning_rate": 5.205909482586228e-06, "loss": 0.6985, "step": 38150 }, { "epoch": 1.61, "grad_norm": 4.902995003229946, "learning_rate": 5.204679392677332e-06, "loss": 0.7597, "step": 38155 }, { "epoch": 1.61, "grad_norm": 5.357762100039163, "learning_rate": 5.203449290359329e-06, "loss": 0.7099, "step": 38160 }, { "epoch": 1.62, "grad_norm": 5.918202985188686, "learning_rate": 5.202219175706795e-06, "loss": 0.7446, "step": 38165 }, { "epoch": 1.62, "grad_norm": 4.192453276337107, "learning_rate": 5.200989048794306e-06, "loss": 0.7047, "step": 38170 }, { "epoch": 1.62, "grad_norm": 5.529710037890714, "learning_rate": 5.199758909696445e-06, "loss": 0.7169, "step": 38175 }, { "epoch": 1.62, "grad_norm": 4.869262658258585, "learning_rate": 5.198528758487788e-06, "loss": 0.7362, "step": 38180 }, { "epoch": 1.62, "grad_norm": 5.4660360564636505, "learning_rate": 5.197298595242919e-06, "loss": 0.7234, "step": 38185 }, { "epoch": 1.62, "grad_norm": 10.356827353203668, "learning_rate": 5.196068420036417e-06, "loss": 0.683, "step": 38190 }, { "epoch": 1.62, "grad_norm": 5.590534639319392, "learning_rate": 5.194838232942864e-06, "loss": 0.75, "step": 38195 }, { "epoch": 1.62, "grad_norm": 8.363151028413926, "learning_rate": 5.193608034036843e-06, "loss": 0.8102, "step": 38200 }, { "epoch": 1.62, "grad_norm": 12.957285686732694, "learning_rate": 5.192377823392935e-06, "loss": 0.6969, "step": 38205 }, { "epoch": 1.62, "grad_norm": 4.606741208167377, "learning_rate": 5.191147601085729e-06, "loss": 0.7694, "step": 38210 }, { "epoch": 1.62, "grad_norm": 5.2494295300462115, "learning_rate": 5.189917367189807e-06, "loss": 0.7297, "step": 38215 }, { "epoch": 1.62, "grad_norm": 9.22458194321671, "learning_rate": 5.188687121779753e-06, "loss": 0.7275, "step": 38220 }, { "epoch": 1.62, "grad_norm": 4.453282643846458, "learning_rate": 5.187456864930155e-06, "loss": 0.6971, "step": 38225 }, { "epoch": 1.62, "grad_norm": 4.700818045679973, "learning_rate": 5.186226596715599e-06, "loss": 0.7493, "step": 38230 }, { "epoch": 1.62, "grad_norm": 5.32433622098943, "learning_rate": 5.184996317210674e-06, "loss": 0.7384, "step": 38235 }, { "epoch": 1.62, "grad_norm": 5.047200545118724, "learning_rate": 5.183766026489966e-06, "loss": 0.7026, "step": 38240 }, { "epoch": 1.62, "grad_norm": 5.207624156426969, "learning_rate": 5.182535724628066e-06, "loss": 0.7458, "step": 38245 }, { "epoch": 1.62, "grad_norm": 6.43006641856987, "learning_rate": 5.181305411699563e-06, "loss": 0.7303, "step": 38250 }, { "epoch": 1.62, "grad_norm": 4.83888487269112, "learning_rate": 5.180075087779045e-06, "loss": 0.7353, "step": 38255 }, { "epoch": 1.62, "grad_norm": 4.824421628005328, "learning_rate": 5.178844752941107e-06, "loss": 0.7359, "step": 38260 }, { "epoch": 1.62, "grad_norm": 5.849450878000392, "learning_rate": 5.177614407260338e-06, "loss": 0.705, "step": 38265 }, { "epoch": 1.62, "grad_norm": 6.772899111878667, "learning_rate": 5.17638405081133e-06, "loss": 0.7187, "step": 38270 }, { "epoch": 1.62, "grad_norm": 4.723866777822633, "learning_rate": 5.175153683668678e-06, "loss": 0.7254, "step": 38275 }, { "epoch": 1.62, "grad_norm": 5.329843141040258, "learning_rate": 5.173923305906972e-06, "loss": 0.7504, "step": 38280 }, { "epoch": 1.62, "grad_norm": 7.059680921165908, "learning_rate": 5.17269291760081e-06, "loss": 0.7123, "step": 38285 }, { "epoch": 1.62, "grad_norm": 13.139054123490984, "learning_rate": 5.171462518824785e-06, "loss": 0.7514, "step": 38290 }, { "epoch": 1.62, "grad_norm": 24.460906690680105, "learning_rate": 5.170232109653491e-06, "loss": 0.7755, "step": 38295 }, { "epoch": 1.62, "grad_norm": 13.225668004013762, "learning_rate": 5.169001690161526e-06, "loss": 0.7057, "step": 38300 }, { "epoch": 1.62, "grad_norm": 4.8211498410596025, "learning_rate": 5.167771260423488e-06, "loss": 0.709, "step": 38305 }, { "epoch": 1.62, "grad_norm": 6.777076331759229, "learning_rate": 5.166540820513972e-06, "loss": 0.7334, "step": 38310 }, { "epoch": 1.62, "grad_norm": 5.895710982857276, "learning_rate": 5.165310370507575e-06, "loss": 0.7422, "step": 38315 }, { "epoch": 1.62, "grad_norm": 5.26165973165786, "learning_rate": 5.1640799104788985e-06, "loss": 0.7589, "step": 38320 }, { "epoch": 1.62, "grad_norm": 4.516287064019683, "learning_rate": 5.16284944050254e-06, "loss": 0.7663, "step": 38325 }, { "epoch": 1.62, "grad_norm": 4.94519302399792, "learning_rate": 5.1616189606531e-06, "loss": 0.7291, "step": 38330 }, { "epoch": 1.62, "grad_norm": 4.942469564670391, "learning_rate": 5.160388471005179e-06, "loss": 0.753, "step": 38335 }, { "epoch": 1.62, "grad_norm": 6.146351340809893, "learning_rate": 5.159157971633376e-06, "loss": 0.7499, "step": 38340 }, { "epoch": 1.62, "grad_norm": 5.896636475563514, "learning_rate": 5.157927462612296e-06, "loss": 0.704, "step": 38345 }, { "epoch": 1.62, "grad_norm": 5.503977712383521, "learning_rate": 5.1566969440165375e-06, "loss": 0.7228, "step": 38350 }, { "epoch": 1.62, "grad_norm": 4.125528059373691, "learning_rate": 5.155466415920706e-06, "loss": 0.7609, "step": 38355 }, { "epoch": 1.62, "grad_norm": 9.645500596115685, "learning_rate": 5.154235878399402e-06, "loss": 0.7687, "step": 38360 }, { "epoch": 1.62, "grad_norm": 8.68835216946066, "learning_rate": 5.153005331527232e-06, "loss": 0.7288, "step": 38365 }, { "epoch": 1.62, "grad_norm": 6.938718902797209, "learning_rate": 5.151774775378799e-06, "loss": 0.7843, "step": 38370 }, { "epoch": 1.62, "grad_norm": 5.063239477426115, "learning_rate": 5.150544210028709e-06, "loss": 0.7198, "step": 38375 }, { "epoch": 1.62, "grad_norm": 4.560870171179861, "learning_rate": 5.149313635551567e-06, "loss": 0.7269, "step": 38380 }, { "epoch": 1.62, "grad_norm": 7.387114697707688, "learning_rate": 5.148083052021979e-06, "loss": 0.7667, "step": 38385 }, { "epoch": 1.62, "grad_norm": 4.571397400733684, "learning_rate": 5.146852459514551e-06, "loss": 0.7458, "step": 38390 }, { "epoch": 1.62, "grad_norm": 5.013995175017584, "learning_rate": 5.145621858103892e-06, "loss": 0.7828, "step": 38395 }, { "epoch": 1.63, "grad_norm": 4.553235113742426, "learning_rate": 5.1443912478646076e-06, "loss": 0.7424, "step": 38400 }, { "epoch": 1.63, "grad_norm": 4.7876537739008365, "learning_rate": 5.143160628871307e-06, "loss": 0.7537, "step": 38405 }, { "epoch": 1.63, "grad_norm": 4.42354296202893, "learning_rate": 5.141930001198601e-06, "loss": 0.7257, "step": 38410 }, { "epoch": 1.63, "grad_norm": 7.07735561077932, "learning_rate": 5.140699364921095e-06, "loss": 0.7103, "step": 38415 }, { "epoch": 1.63, "grad_norm": 10.001674031349458, "learning_rate": 5.139468720113403e-06, "loss": 0.7512, "step": 38420 }, { "epoch": 1.63, "grad_norm": 6.729352659252857, "learning_rate": 5.138238066850132e-06, "loss": 0.7365, "step": 38425 }, { "epoch": 1.63, "grad_norm": 18.94733196076367, "learning_rate": 5.137007405205896e-06, "loss": 0.7132, "step": 38430 }, { "epoch": 1.63, "grad_norm": 5.464003027164705, "learning_rate": 5.1357767352553025e-06, "loss": 0.7165, "step": 38435 }, { "epoch": 1.63, "grad_norm": 10.10485007191571, "learning_rate": 5.134546057072965e-06, "loss": 0.7438, "step": 38440 }, { "epoch": 1.63, "grad_norm": 4.207127812957007, "learning_rate": 5.1333153707334985e-06, "loss": 0.7342, "step": 38445 }, { "epoch": 1.63, "grad_norm": 6.7435453679954955, "learning_rate": 5.132084676311512e-06, "loss": 0.7257, "step": 38450 }, { "epoch": 1.63, "grad_norm": 18.56717101228031, "learning_rate": 5.1308539738816225e-06, "loss": 0.7253, "step": 38455 }, { "epoch": 1.63, "grad_norm": 12.89417137245258, "learning_rate": 5.129623263518439e-06, "loss": 0.699, "step": 38460 }, { "epoch": 1.63, "grad_norm": 4.267103458787332, "learning_rate": 5.128392545296582e-06, "loss": 0.7318, "step": 38465 }, { "epoch": 1.63, "grad_norm": 7.148704075609645, "learning_rate": 5.127161819290662e-06, "loss": 0.7459, "step": 38470 }, { "epoch": 1.63, "grad_norm": 4.699177600899752, "learning_rate": 5.125931085575294e-06, "loss": 0.7291, "step": 38475 }, { "epoch": 1.63, "grad_norm": 4.308206033644535, "learning_rate": 5.124700344225098e-06, "loss": 0.7228, "step": 38480 }, { "epoch": 1.63, "grad_norm": 6.351906069194901, "learning_rate": 5.1234695953146855e-06, "loss": 0.7604, "step": 38485 }, { "epoch": 1.63, "grad_norm": 7.778468303711194, "learning_rate": 5.122238838918676e-06, "loss": 0.7339, "step": 38490 }, { "epoch": 1.63, "grad_norm": 4.6248196285404175, "learning_rate": 5.121008075111685e-06, "loss": 0.7569, "step": 38495 }, { "epoch": 1.63, "grad_norm": 11.706908765142028, "learning_rate": 5.119777303968331e-06, "loss": 0.7595, "step": 38500 }, { "epoch": 1.63, "grad_norm": 5.5344754047486, "learning_rate": 5.118546525563232e-06, "loss": 0.7209, "step": 38505 }, { "epoch": 1.63, "grad_norm": 7.93336636885656, "learning_rate": 5.117315739971007e-06, "loss": 0.781, "step": 38510 }, { "epoch": 1.63, "grad_norm": 5.7111109967346305, "learning_rate": 5.116084947266275e-06, "loss": 0.7385, "step": 38515 }, { "epoch": 1.63, "grad_norm": 10.7206835688398, "learning_rate": 5.1148541475236524e-06, "loss": 0.7267, "step": 38520 }, { "epoch": 1.63, "grad_norm": 8.979254290216709, "learning_rate": 5.113623340817763e-06, "loss": 0.7077, "step": 38525 }, { "epoch": 1.63, "grad_norm": 4.3100144186804705, "learning_rate": 5.112392527223224e-06, "loss": 0.7404, "step": 38530 }, { "epoch": 1.63, "grad_norm": 5.694691552937787, "learning_rate": 5.111161706814657e-06, "loss": 0.7404, "step": 38535 }, { "epoch": 1.63, "grad_norm": 5.417069111162206, "learning_rate": 5.109930879666683e-06, "loss": 0.7569, "step": 38540 }, { "epoch": 1.63, "grad_norm": 8.143259674468673, "learning_rate": 5.1087000458539235e-06, "loss": 0.7547, "step": 38545 }, { "epoch": 1.63, "grad_norm": 12.147930581744653, "learning_rate": 5.107469205451001e-06, "loss": 0.7441, "step": 38550 }, { "epoch": 1.63, "grad_norm": 24.780915801452174, "learning_rate": 5.106238358532538e-06, "loss": 0.7271, "step": 38555 }, { "epoch": 1.63, "grad_norm": 13.954128178077408, "learning_rate": 5.105007505173155e-06, "loss": 0.7158, "step": 38560 }, { "epoch": 1.63, "grad_norm": 7.604538578681041, "learning_rate": 5.1037766454474765e-06, "loss": 0.7011, "step": 38565 }, { "epoch": 1.63, "grad_norm": 4.722472459779395, "learning_rate": 5.102545779430126e-06, "loss": 0.7613, "step": 38570 }, { "epoch": 1.63, "grad_norm": 4.817638966081922, "learning_rate": 5.101314907195727e-06, "loss": 0.7076, "step": 38575 }, { "epoch": 1.63, "grad_norm": 4.462938828105389, "learning_rate": 5.100084028818904e-06, "loss": 0.748, "step": 38580 }, { "epoch": 1.63, "grad_norm": 9.868155733504105, "learning_rate": 5.098853144374279e-06, "loss": 0.7665, "step": 38585 }, { "epoch": 1.63, "grad_norm": 7.851905751315776, "learning_rate": 5.0976222539364824e-06, "loss": 0.7321, "step": 38590 }, { "epoch": 1.63, "grad_norm": 5.22019204904882, "learning_rate": 5.096391357580134e-06, "loss": 0.7523, "step": 38595 }, { "epoch": 1.63, "grad_norm": 5.708833159397612, "learning_rate": 5.095160455379861e-06, "loss": 0.7406, "step": 38600 }, { "epoch": 1.63, "grad_norm": 5.843196684451233, "learning_rate": 5.09392954741029e-06, "loss": 0.6938, "step": 38605 }, { "epoch": 1.63, "grad_norm": 10.061809364327924, "learning_rate": 5.09269863374605e-06, "loss": 0.7154, "step": 38610 }, { "epoch": 1.63, "grad_norm": 7.729138490638303, "learning_rate": 5.0914677144617616e-06, "loss": 0.7248, "step": 38615 }, { "epoch": 1.63, "grad_norm": 7.544875272525832, "learning_rate": 5.090236789632056e-06, "loss": 0.7064, "step": 38620 }, { "epoch": 1.63, "grad_norm": 10.947231427653804, "learning_rate": 5.089005859331561e-06, "loss": 0.738, "step": 38625 }, { "epoch": 1.63, "grad_norm": 7.169062010462444, "learning_rate": 5.087774923634901e-06, "loss": 0.7374, "step": 38630 }, { "epoch": 1.64, "grad_norm": 4.6152549406176275, "learning_rate": 5.086543982616706e-06, "loss": 0.7261, "step": 38635 }, { "epoch": 1.64, "grad_norm": 10.91028501616054, "learning_rate": 5.085313036351606e-06, "loss": 0.723, "step": 38640 }, { "epoch": 1.64, "grad_norm": 6.67682459044136, "learning_rate": 5.0840820849142255e-06, "loss": 0.7307, "step": 38645 }, { "epoch": 1.64, "grad_norm": 4.821486551977015, "learning_rate": 5.082851128379198e-06, "loss": 0.7239, "step": 38650 }, { "epoch": 1.64, "grad_norm": 7.5710057581430075, "learning_rate": 5.08162016682115e-06, "loss": 0.7385, "step": 38655 }, { "epoch": 1.64, "grad_norm": 10.005494273260792, "learning_rate": 5.080389200314711e-06, "loss": 0.7606, "step": 38660 }, { "epoch": 1.64, "grad_norm": 6.410629991613898, "learning_rate": 5.079158228934513e-06, "loss": 0.7249, "step": 38665 }, { "epoch": 1.64, "grad_norm": 5.413364675518424, "learning_rate": 5.0779272527551835e-06, "loss": 0.7097, "step": 38670 }, { "epoch": 1.64, "grad_norm": 10.187205683441405, "learning_rate": 5.076696271851357e-06, "loss": 0.7458, "step": 38675 }, { "epoch": 1.64, "grad_norm": 5.392312320910289, "learning_rate": 5.075465286297658e-06, "loss": 0.7296, "step": 38680 }, { "epoch": 1.64, "grad_norm": 4.512338703056201, "learning_rate": 5.074234296168722e-06, "loss": 0.7638, "step": 38685 }, { "epoch": 1.64, "grad_norm": 5.024975186374453, "learning_rate": 5.073003301539179e-06, "loss": 0.6939, "step": 38690 }, { "epoch": 1.64, "grad_norm": 6.707456538155756, "learning_rate": 5.071772302483662e-06, "loss": 0.6945, "step": 38695 }, { "epoch": 1.64, "grad_norm": 9.027207596013906, "learning_rate": 5.070541299076801e-06, "loss": 0.7412, "step": 38700 }, { "epoch": 1.64, "grad_norm": 5.648928578291132, "learning_rate": 5.069310291393228e-06, "loss": 0.727, "step": 38705 }, { "epoch": 1.64, "grad_norm": 5.7591575323394615, "learning_rate": 5.0680792795075774e-06, "loss": 0.6787, "step": 38710 }, { "epoch": 1.64, "grad_norm": 6.060886596846138, "learning_rate": 5.066848263494481e-06, "loss": 0.7224, "step": 38715 }, { "epoch": 1.64, "grad_norm": 4.155569464062171, "learning_rate": 5.06561724342857e-06, "loss": 0.7086, "step": 38720 }, { "epoch": 1.64, "grad_norm": 7.472648710194574, "learning_rate": 5.0643862193844795e-06, "loss": 0.7181, "step": 38725 }, { "epoch": 1.64, "grad_norm": 19.17727010110648, "learning_rate": 5.063155191436842e-06, "loss": 0.7455, "step": 38730 }, { "epoch": 1.64, "grad_norm": 6.7514713197903005, "learning_rate": 5.061924159660291e-06, "loss": 0.742, "step": 38735 }, { "epoch": 1.64, "grad_norm": 5.786379675127142, "learning_rate": 5.060693124129461e-06, "loss": 0.7212, "step": 38740 }, { "epoch": 1.64, "grad_norm": 8.225353448777373, "learning_rate": 5.059462084918985e-06, "loss": 0.6941, "step": 38745 }, { "epoch": 1.64, "grad_norm": 8.92921086634663, "learning_rate": 5.058231042103499e-06, "loss": 0.7104, "step": 38750 }, { "epoch": 1.64, "grad_norm": 15.895572467852839, "learning_rate": 5.0569999957576355e-06, "loss": 0.7477, "step": 38755 }, { "epoch": 1.64, "grad_norm": 8.54227981184666, "learning_rate": 5.055768945956031e-06, "loss": 0.736, "step": 38760 }, { "epoch": 1.64, "grad_norm": 17.389323403061443, "learning_rate": 5.054537892773319e-06, "loss": 0.7363, "step": 38765 }, { "epoch": 1.64, "grad_norm": 4.1832360109612345, "learning_rate": 5.053306836284136e-06, "loss": 0.7095, "step": 38770 }, { "epoch": 1.64, "grad_norm": 5.507847685104999, "learning_rate": 5.052075776563116e-06, "loss": 0.7349, "step": 38775 }, { "epoch": 1.64, "grad_norm": 5.574810514027077, "learning_rate": 5.050844713684894e-06, "loss": 0.7639, "step": 38780 }, { "epoch": 1.64, "grad_norm": 4.442106607597708, "learning_rate": 5.049613647724109e-06, "loss": 0.6987, "step": 38785 }, { "epoch": 1.64, "grad_norm": 6.0355019388825975, "learning_rate": 5.0483825787553915e-06, "loss": 0.737, "step": 38790 }, { "epoch": 1.64, "grad_norm": 7.681679663465263, "learning_rate": 5.047151506853384e-06, "loss": 0.7525, "step": 38795 }, { "epoch": 1.64, "grad_norm": 4.41609564935384, "learning_rate": 5.045920432092718e-06, "loss": 0.7196, "step": 38800 }, { "epoch": 1.64, "grad_norm": 6.096175048628023, "learning_rate": 5.0446893545480315e-06, "loss": 0.7239, "step": 38805 }, { "epoch": 1.64, "grad_norm": 4.458210694000894, "learning_rate": 5.043458274293961e-06, "loss": 0.7155, "step": 38810 }, { "epoch": 1.64, "grad_norm": 7.151649143661842, "learning_rate": 5.042227191405143e-06, "loss": 0.6856, "step": 38815 }, { "epoch": 1.64, "grad_norm": 4.819867556109795, "learning_rate": 5.040996105956217e-06, "loss": 0.7484, "step": 38820 }, { "epoch": 1.64, "grad_norm": 13.803193459452087, "learning_rate": 5.039765018021816e-06, "loss": 0.7409, "step": 38825 }, { "epoch": 1.64, "grad_norm": 5.57802568665564, "learning_rate": 5.038533927676578e-06, "loss": 0.7428, "step": 38830 }, { "epoch": 1.64, "grad_norm": 10.979246592257468, "learning_rate": 5.037302834995145e-06, "loss": 0.7076, "step": 38835 }, { "epoch": 1.64, "grad_norm": 15.00731497398954, "learning_rate": 5.036071740052147e-06, "loss": 0.7278, "step": 38840 }, { "epoch": 1.64, "grad_norm": 4.659352886272187, "learning_rate": 5.034840642922229e-06, "loss": 0.7542, "step": 38845 }, { "epoch": 1.64, "grad_norm": 9.62919838009354, "learning_rate": 5.033609543680023e-06, "loss": 0.7113, "step": 38850 }, { "epoch": 1.64, "grad_norm": 29.68659959981478, "learning_rate": 5.0323784424001695e-06, "loss": 0.7319, "step": 38855 }, { "epoch": 1.64, "grad_norm": 22.718921663937778, "learning_rate": 5.031147339157308e-06, "loss": 0.6668, "step": 38860 }, { "epoch": 1.64, "grad_norm": 6.583284880052477, "learning_rate": 5.0299162340260735e-06, "loss": 0.7271, "step": 38865 }, { "epoch": 1.65, "grad_norm": 34.708676010055925, "learning_rate": 5.0286851270811065e-06, "loss": 0.7208, "step": 38870 }, { "epoch": 1.65, "grad_norm": 15.969158357696985, "learning_rate": 5.0274540183970446e-06, "loss": 0.7454, "step": 38875 }, { "epoch": 1.65, "grad_norm": 27.920971463055974, "learning_rate": 5.026222908048527e-06, "loss": 0.7622, "step": 38880 }, { "epoch": 1.65, "grad_norm": 7.810871778982594, "learning_rate": 5.024991796110193e-06, "loss": 0.7124, "step": 38885 }, { "epoch": 1.65, "grad_norm": 12.50487040816802, "learning_rate": 5.023760682656676e-06, "loss": 0.7434, "step": 38890 }, { "epoch": 1.65, "grad_norm": 5.911366316988982, "learning_rate": 5.022529567762622e-06, "loss": 0.7158, "step": 38895 }, { "epoch": 1.65, "grad_norm": 12.35380177421443, "learning_rate": 5.021298451502665e-06, "loss": 0.7578, "step": 38900 }, { "epoch": 1.65, "grad_norm": 7.045920455908149, "learning_rate": 5.020067333951447e-06, "loss": 0.7923, "step": 38905 }, { "epoch": 1.65, "grad_norm": 6.046330821111917, "learning_rate": 5.018836215183605e-06, "loss": 0.7464, "step": 38910 }, { "epoch": 1.65, "grad_norm": 8.733348969364037, "learning_rate": 5.01760509527378e-06, "loss": 0.6798, "step": 38915 }, { "epoch": 1.65, "grad_norm": 4.029126130778101, "learning_rate": 5.016373974296609e-06, "loss": 0.7184, "step": 38920 }, { "epoch": 1.65, "grad_norm": 6.708077686296743, "learning_rate": 5.015142852326731e-06, "loss": 0.7517, "step": 38925 }, { "epoch": 1.65, "grad_norm": 4.867181842421245, "learning_rate": 5.0139117294387885e-06, "loss": 0.7234, "step": 38930 }, { "epoch": 1.65, "grad_norm": 15.224036817365553, "learning_rate": 5.0126806057074175e-06, "loss": 0.7098, "step": 38935 }, { "epoch": 1.65, "grad_norm": 5.936656548111875, "learning_rate": 5.011449481207259e-06, "loss": 0.781, "step": 38940 }, { "epoch": 1.65, "grad_norm": 13.211384679672173, "learning_rate": 5.010218356012953e-06, "loss": 0.7278, "step": 38945 }, { "epoch": 1.65, "grad_norm": 4.4169150718912675, "learning_rate": 5.008987230199136e-06, "loss": 0.7556, "step": 38950 }, { "epoch": 1.65, "grad_norm": 19.025581752349648, "learning_rate": 5.007756103840451e-06, "loss": 0.7185, "step": 38955 }, { "epoch": 1.65, "grad_norm": 12.317981852328783, "learning_rate": 5.006524977011536e-06, "loss": 0.7301, "step": 38960 }, { "epoch": 1.65, "grad_norm": 8.732610992722464, "learning_rate": 5.005293849787032e-06, "loss": 0.7152, "step": 38965 }, { "epoch": 1.65, "grad_norm": 4.744911670654091, "learning_rate": 5.004062722241577e-06, "loss": 0.7225, "step": 38970 }, { "epoch": 1.65, "grad_norm": 10.214394631683735, "learning_rate": 5.00283159444981e-06, "loss": 0.7108, "step": 38975 }, { "epoch": 1.65, "grad_norm": 4.411985973658635, "learning_rate": 5.0016004664863726e-06, "loss": 0.7054, "step": 38980 }, { "epoch": 1.65, "grad_norm": 5.516479047345034, "learning_rate": 5.000369338425904e-06, "loss": 0.7911, "step": 38985 }, { "epoch": 1.65, "grad_norm": 6.259185252382097, "learning_rate": 4.999138210343043e-06, "loss": 0.7097, "step": 38990 }, { "epoch": 1.65, "grad_norm": 4.7664153334286175, "learning_rate": 4.99790708231243e-06, "loss": 0.6867, "step": 38995 }, { "epoch": 1.65, "grad_norm": 5.418664024187641, "learning_rate": 4.996675954408703e-06, "loss": 0.7566, "step": 39000 }, { "epoch": 1.65, "grad_norm": 5.684163844704137, "learning_rate": 4.995444826706505e-06, "loss": 0.7507, "step": 39005 }, { "epoch": 1.65, "grad_norm": 5.367469543189487, "learning_rate": 4.994213699280473e-06, "loss": 0.7564, "step": 39010 }, { "epoch": 1.65, "grad_norm": 4.894946726685241, "learning_rate": 4.992982572205248e-06, "loss": 0.7026, "step": 39015 }, { "epoch": 1.65, "grad_norm": 5.322554527229888, "learning_rate": 4.991751445555468e-06, "loss": 0.7298, "step": 39020 }, { "epoch": 1.65, "grad_norm": 10.237237565124225, "learning_rate": 4.990520319405774e-06, "loss": 0.7274, "step": 39025 }, { "epoch": 1.65, "grad_norm": 6.291115550015801, "learning_rate": 4.989289193830805e-06, "loss": 0.7115, "step": 39030 }, { "epoch": 1.65, "grad_norm": 8.642118633755736, "learning_rate": 4.988058068905202e-06, "loss": 0.7116, "step": 39035 }, { "epoch": 1.65, "grad_norm": 12.411211068878652, "learning_rate": 4.986826944703603e-06, "loss": 0.6878, "step": 39040 }, { "epoch": 1.65, "grad_norm": 4.40767518200429, "learning_rate": 4.985595821300645e-06, "loss": 0.7228, "step": 39045 }, { "epoch": 1.65, "grad_norm": 6.229935722078179, "learning_rate": 4.984364698770972e-06, "loss": 0.7251, "step": 39050 }, { "epoch": 1.65, "grad_norm": 4.307865294263673, "learning_rate": 4.983133577189221e-06, "loss": 0.7157, "step": 39055 }, { "epoch": 1.65, "grad_norm": 9.522544810074576, "learning_rate": 4.981902456630031e-06, "loss": 0.7121, "step": 39060 }, { "epoch": 1.65, "grad_norm": 4.414956432548025, "learning_rate": 4.980671337168042e-06, "loss": 0.7081, "step": 39065 }, { "epoch": 1.65, "grad_norm": 6.829418573053323, "learning_rate": 4.979440218877892e-06, "loss": 0.7757, "step": 39070 }, { "epoch": 1.65, "grad_norm": 17.100654891590057, "learning_rate": 4.978209101834222e-06, "loss": 0.7311, "step": 39075 }, { "epoch": 1.65, "grad_norm": 7.431005134202259, "learning_rate": 4.97697798611167e-06, "loss": 0.7256, "step": 39080 }, { "epoch": 1.65, "grad_norm": 13.696994487838916, "learning_rate": 4.975746871784874e-06, "loss": 0.7508, "step": 39085 }, { "epoch": 1.65, "grad_norm": 14.161268596714205, "learning_rate": 4.9745157589284745e-06, "loss": 0.7231, "step": 39090 }, { "epoch": 1.65, "grad_norm": 5.352870700421167, "learning_rate": 4.9732846476171094e-06, "loss": 0.735, "step": 39095 }, { "epoch": 1.65, "grad_norm": 9.547002155121941, "learning_rate": 4.972053537925418e-06, "loss": 0.7196, "step": 39100 }, { "epoch": 1.65, "grad_norm": 7.9030813411944125, "learning_rate": 4.970822429928037e-06, "loss": 0.7626, "step": 39105 }, { "epoch": 1.66, "grad_norm": 11.560078119663775, "learning_rate": 4.969591323699607e-06, "loss": 0.7205, "step": 39110 }, { "epoch": 1.66, "grad_norm": 3.9546946218618078, "learning_rate": 4.9683602193147654e-06, "loss": 0.7055, "step": 39115 }, { "epoch": 1.66, "grad_norm": 6.182221932789773, "learning_rate": 4.967129116848149e-06, "loss": 0.7228, "step": 39120 }, { "epoch": 1.66, "grad_norm": 5.431510659738178, "learning_rate": 4.965898016374399e-06, "loss": 0.7037, "step": 39125 }, { "epoch": 1.66, "grad_norm": 5.751674381278151, "learning_rate": 4.964666917968152e-06, "loss": 0.7312, "step": 39130 }, { "epoch": 1.66, "grad_norm": 6.723498859834188, "learning_rate": 4.9634358217040445e-06, "loss": 0.757, "step": 39135 }, { "epoch": 1.66, "grad_norm": 7.49013897309288, "learning_rate": 4.962204727656717e-06, "loss": 0.7199, "step": 39140 }, { "epoch": 1.66, "grad_norm": 4.59015418272288, "learning_rate": 4.960973635900804e-06, "loss": 0.7022, "step": 39145 }, { "epoch": 1.66, "grad_norm": 4.595045530551842, "learning_rate": 4.959742546510946e-06, "loss": 0.7503, "step": 39150 }, { "epoch": 1.66, "grad_norm": 4.309591454294607, "learning_rate": 4.958511459561778e-06, "loss": 0.6915, "step": 39155 }, { "epoch": 1.66, "grad_norm": 4.699324864320143, "learning_rate": 4.957280375127937e-06, "loss": 0.733, "step": 39160 }, { "epoch": 1.66, "grad_norm": 4.296896202533893, "learning_rate": 4.956049293284062e-06, "loss": 0.7018, "step": 39165 }, { "epoch": 1.66, "grad_norm": 5.282659105772536, "learning_rate": 4.954818214104789e-06, "loss": 0.6878, "step": 39170 }, { "epoch": 1.66, "grad_norm": 4.660140119309361, "learning_rate": 4.9535871376647546e-06, "loss": 0.731, "step": 39175 }, { "epoch": 1.66, "grad_norm": 4.693880405776472, "learning_rate": 4.952356064038595e-06, "loss": 0.732, "step": 39180 }, { "epoch": 1.66, "grad_norm": 6.141226340867173, "learning_rate": 4.951124993300947e-06, "loss": 0.7091, "step": 39185 }, { "epoch": 1.66, "grad_norm": 6.238699535472709, "learning_rate": 4.949893925526448e-06, "loss": 0.72, "step": 39190 }, { "epoch": 1.66, "grad_norm": 4.409368649569806, "learning_rate": 4.9486628607897304e-06, "loss": 0.7547, "step": 39195 }, { "epoch": 1.66, "grad_norm": 6.090632741482874, "learning_rate": 4.947431799165436e-06, "loss": 0.7536, "step": 39200 }, { "epoch": 1.66, "grad_norm": 4.960384708621088, "learning_rate": 4.946200740728193e-06, "loss": 0.7092, "step": 39205 }, { "epoch": 1.66, "grad_norm": 13.668174317046393, "learning_rate": 4.9449696855526425e-06, "loss": 0.7431, "step": 39210 }, { "epoch": 1.66, "grad_norm": 7.209868264852339, "learning_rate": 4.9437386337134165e-06, "loss": 0.6946, "step": 39215 }, { "epoch": 1.66, "grad_norm": 5.768132793817733, "learning_rate": 4.942507585285152e-06, "loss": 0.7295, "step": 39220 }, { "epoch": 1.66, "grad_norm": 4.589142410257591, "learning_rate": 4.941276540342485e-06, "loss": 0.7153, "step": 39225 }, { "epoch": 1.66, "grad_norm": 4.93283486789867, "learning_rate": 4.9400454989600456e-06, "loss": 0.7498, "step": 39230 }, { "epoch": 1.66, "grad_norm": 5.458084878919453, "learning_rate": 4.938814461212472e-06, "loss": 0.7639, "step": 39235 }, { "epoch": 1.66, "grad_norm": 6.275800369154017, "learning_rate": 4.937583427174397e-06, "loss": 0.7189, "step": 39240 }, { "epoch": 1.66, "grad_norm": 6.333813191696346, "learning_rate": 4.936352396920457e-06, "loss": 0.7347, "step": 39245 }, { "epoch": 1.66, "grad_norm": 6.784280836713963, "learning_rate": 4.935121370525281e-06, "loss": 0.7298, "step": 39250 }, { "epoch": 1.66, "grad_norm": 6.472661722034233, "learning_rate": 4.933890348063507e-06, "loss": 0.7376, "step": 39255 }, { "epoch": 1.66, "grad_norm": 4.959245294242832, "learning_rate": 4.932659329609766e-06, "loss": 0.7467, "step": 39260 }, { "epoch": 1.66, "grad_norm": 4.853706487299998, "learning_rate": 4.931428315238691e-06, "loss": 0.7201, "step": 39265 }, { "epoch": 1.66, "grad_norm": 8.81919366211316, "learning_rate": 4.930197305024915e-06, "loss": 0.7198, "step": 39270 }, { "epoch": 1.66, "grad_norm": 5.204611284738818, "learning_rate": 4.928966299043072e-06, "loss": 0.7467, "step": 39275 }, { "epoch": 1.66, "grad_norm": 7.056566342560484, "learning_rate": 4.927735297367792e-06, "loss": 0.7232, "step": 39280 }, { "epoch": 1.66, "grad_norm": 12.032987357069556, "learning_rate": 4.926504300073709e-06, "loss": 0.7312, "step": 39285 }, { "epoch": 1.66, "grad_norm": 4.480974416592697, "learning_rate": 4.925273307235454e-06, "loss": 0.69, "step": 39290 }, { "epoch": 1.66, "grad_norm": 13.862688240696299, "learning_rate": 4.924042318927656e-06, "loss": 0.711, "step": 39295 }, { "epoch": 1.66, "grad_norm": 7.048897852494733, "learning_rate": 4.922811335224951e-06, "loss": 0.7429, "step": 39300 }, { "epoch": 1.66, "grad_norm": 6.908037277333349, "learning_rate": 4.921580356201966e-06, "loss": 0.7406, "step": 39305 }, { "epoch": 1.66, "grad_norm": 4.9227218040384795, "learning_rate": 4.920349381933333e-06, "loss": 0.7098, "step": 39310 }, { "epoch": 1.66, "grad_norm": 5.58803533390825, "learning_rate": 4.919118412493683e-06, "loss": 0.7395, "step": 39315 }, { "epoch": 1.66, "grad_norm": 14.716148994417422, "learning_rate": 4.917887447957645e-06, "loss": 0.698, "step": 39320 }, { "epoch": 1.66, "grad_norm": 10.682936779010463, "learning_rate": 4.91665648839985e-06, "loss": 0.7289, "step": 39325 }, { "epoch": 1.66, "grad_norm": 5.268157920334467, "learning_rate": 4.915425533894925e-06, "loss": 0.6791, "step": 39330 }, { "epoch": 1.66, "grad_norm": 10.329798296391582, "learning_rate": 4.914194584517502e-06, "loss": 0.7317, "step": 39335 }, { "epoch": 1.66, "grad_norm": 6.5981460657342845, "learning_rate": 4.9129636403422086e-06, "loss": 0.7316, "step": 39340 }, { "epoch": 1.67, "grad_norm": 5.090351394412334, "learning_rate": 4.911732701443673e-06, "loss": 0.715, "step": 39345 }, { "epoch": 1.67, "grad_norm": 21.277789314270045, "learning_rate": 4.910501767896524e-06, "loss": 0.771, "step": 39350 }, { "epoch": 1.67, "grad_norm": 10.221220727549627, "learning_rate": 4.909270839775388e-06, "loss": 0.7229, "step": 39355 }, { "epoch": 1.67, "grad_norm": 10.009485056935914, "learning_rate": 4.908039917154897e-06, "loss": 0.7358, "step": 39360 }, { "epoch": 1.67, "grad_norm": 5.703508019823032, "learning_rate": 4.906809000109672e-06, "loss": 0.7203, "step": 39365 }, { "epoch": 1.67, "grad_norm": 11.042882271869123, "learning_rate": 4.905578088714344e-06, "loss": 0.7013, "step": 39370 }, { "epoch": 1.67, "grad_norm": 6.131777378595308, "learning_rate": 4.904347183043537e-06, "loss": 0.7293, "step": 39375 }, { "epoch": 1.67, "grad_norm": 11.986949587345036, "learning_rate": 4.9031162831718795e-06, "loss": 0.7124, "step": 39380 }, { "epoch": 1.67, "grad_norm": 6.590058305911143, "learning_rate": 4.9018853891739965e-06, "loss": 0.6811, "step": 39385 }, { "epoch": 1.67, "grad_norm": 9.316860790830962, "learning_rate": 4.9006545011245125e-06, "loss": 0.7493, "step": 39390 }, { "epoch": 1.67, "grad_norm": 6.72287251393382, "learning_rate": 4.8994236190980545e-06, "loss": 0.7174, "step": 39395 }, { "epoch": 1.67, "grad_norm": 7.854920710539936, "learning_rate": 4.898192743169245e-06, "loss": 0.7014, "step": 39400 }, { "epoch": 1.67, "grad_norm": 4.728179292396962, "learning_rate": 4.896961873412711e-06, "loss": 0.7076, "step": 39405 }, { "epoch": 1.67, "grad_norm": 6.259499526374727, "learning_rate": 4.895731009903075e-06, "loss": 0.7227, "step": 39410 }, { "epoch": 1.67, "grad_norm": 6.491012071798648, "learning_rate": 4.8945001527149585e-06, "loss": 0.7395, "step": 39415 }, { "epoch": 1.67, "grad_norm": 4.581669912223495, "learning_rate": 4.8932693019229895e-06, "loss": 0.7019, "step": 39420 }, { "epoch": 1.67, "grad_norm": 6.3564090777258695, "learning_rate": 4.892038457601786e-06, "loss": 0.7311, "step": 39425 }, { "epoch": 1.67, "grad_norm": 12.427237124117909, "learning_rate": 4.890807619825975e-06, "loss": 0.7517, "step": 39430 }, { "epoch": 1.67, "grad_norm": 7.00105854768971, "learning_rate": 4.8895767886701765e-06, "loss": 0.7061, "step": 39435 }, { "epoch": 1.67, "grad_norm": 7.239250098341716, "learning_rate": 4.88834596420901e-06, "loss": 0.7166, "step": 39440 }, { "epoch": 1.67, "grad_norm": 5.070223604284234, "learning_rate": 4.887115146517101e-06, "loss": 0.699, "step": 39445 }, { "epoch": 1.67, "grad_norm": 4.760926303968526, "learning_rate": 4.885884335669066e-06, "loss": 0.7095, "step": 39450 }, { "epoch": 1.67, "grad_norm": 5.894969297362118, "learning_rate": 4.884653531739529e-06, "loss": 0.7275, "step": 39455 }, { "epoch": 1.67, "grad_norm": 4.423920306033041, "learning_rate": 4.883422734803109e-06, "loss": 0.7167, "step": 39460 }, { "epoch": 1.67, "grad_norm": 5.30283957107836, "learning_rate": 4.882191944934424e-06, "loss": 0.7234, "step": 39465 }, { "epoch": 1.67, "grad_norm": 7.114078170419382, "learning_rate": 4.8809611622080955e-06, "loss": 0.7554, "step": 39470 }, { "epoch": 1.67, "grad_norm": 4.570009689911861, "learning_rate": 4.8797303866987395e-06, "loss": 0.7097, "step": 39475 }, { "epoch": 1.67, "grad_norm": 6.13467054813339, "learning_rate": 4.8784996184809776e-06, "loss": 0.728, "step": 39480 }, { "epoch": 1.67, "grad_norm": 5.694407376195911, "learning_rate": 4.877268857629425e-06, "loss": 0.762, "step": 39485 }, { "epoch": 1.67, "grad_norm": 4.658332704738483, "learning_rate": 4.8760381042187e-06, "loss": 0.7053, "step": 39490 }, { "epoch": 1.67, "grad_norm": 5.97803009345767, "learning_rate": 4.87480735832342e-06, "loss": 0.7013, "step": 39495 }, { "epoch": 1.67, "grad_norm": 10.12072431355536, "learning_rate": 4.873576620018201e-06, "loss": 0.712, "step": 39500 }, { "epoch": 1.67, "grad_norm": 5.419425943375269, "learning_rate": 4.872345889377659e-06, "loss": 0.6834, "step": 39505 }, { "epoch": 1.67, "grad_norm": 7.293104090786496, "learning_rate": 4.87111516647641e-06, "loss": 0.6874, "step": 39510 }, { "epoch": 1.67, "grad_norm": 5.941988238994929, "learning_rate": 4.869884451389068e-06, "loss": 0.7419, "step": 39515 }, { "epoch": 1.67, "grad_norm": 6.253452100644815, "learning_rate": 4.86865374419025e-06, "loss": 0.6969, "step": 39520 }, { "epoch": 1.67, "grad_norm": 4.262419430038404, "learning_rate": 4.867423044954569e-06, "loss": 0.7412, "step": 39525 }, { "epoch": 1.67, "grad_norm": 4.460377314094931, "learning_rate": 4.8661923537566365e-06, "loss": 0.7011, "step": 39530 }, { "epoch": 1.67, "grad_norm": 4.712015111269397, "learning_rate": 4.864961670671068e-06, "loss": 0.6956, "step": 39535 }, { "epoch": 1.67, "grad_norm": 4.966923424402484, "learning_rate": 4.863730995772476e-06, "loss": 0.7222, "step": 39540 }, { "epoch": 1.67, "grad_norm": 4.59073988007475, "learning_rate": 4.862500329135471e-06, "loss": 0.6867, "step": 39545 }, { "epoch": 1.67, "grad_norm": 5.46026171760473, "learning_rate": 4.861269670834668e-06, "loss": 0.7002, "step": 39550 }, { "epoch": 1.67, "grad_norm": 4.2631493064104635, "learning_rate": 4.860039020944674e-06, "loss": 0.7512, "step": 39555 }, { "epoch": 1.67, "grad_norm": 9.416840917886876, "learning_rate": 4.858808379540103e-06, "loss": 0.7044, "step": 39560 }, { "epoch": 1.67, "grad_norm": 6.351489257132697, "learning_rate": 4.857577746695564e-06, "loss": 0.7312, "step": 39565 }, { "epoch": 1.67, "grad_norm": 8.825593039475388, "learning_rate": 4.856347122485666e-06, "loss": 0.6902, "step": 39570 }, { "epoch": 1.67, "grad_norm": 5.794633943493452, "learning_rate": 4.85511650698502e-06, "loss": 0.7127, "step": 39575 }, { "epoch": 1.68, "grad_norm": 9.177521844398603, "learning_rate": 4.853885900268234e-06, "loss": 0.7398, "step": 39580 }, { "epoch": 1.68, "grad_norm": 9.493641002621619, "learning_rate": 4.852655302409913e-06, "loss": 0.724, "step": 39585 }, { "epoch": 1.68, "grad_norm": 4.3382402083022065, "learning_rate": 4.851424713484669e-06, "loss": 0.7383, "step": 39590 }, { "epoch": 1.68, "grad_norm": 19.71317207737549, "learning_rate": 4.850194133567107e-06, "loss": 0.7353, "step": 39595 }, { "epoch": 1.68, "grad_norm": 17.52905487377739, "learning_rate": 4.8489635627318325e-06, "loss": 0.7245, "step": 39600 }, { "epoch": 1.68, "grad_norm": 8.14286226907859, "learning_rate": 4.847733001053453e-06, "loss": 0.6979, "step": 39605 }, { "epoch": 1.68, "grad_norm": 11.522905329586116, "learning_rate": 4.846502448606572e-06, "loss": 0.7393, "step": 39610 }, { "epoch": 1.68, "grad_norm": 4.40470777967269, "learning_rate": 4.845271905465796e-06, "loss": 0.6901, "step": 39615 }, { "epoch": 1.68, "grad_norm": 4.47893831147272, "learning_rate": 4.844041371705729e-06, "loss": 0.7544, "step": 39620 }, { "epoch": 1.68, "grad_norm": 4.943106389762857, "learning_rate": 4.842810847400974e-06, "loss": 0.7191, "step": 39625 }, { "epoch": 1.68, "grad_norm": 6.756408825568372, "learning_rate": 4.841580332626135e-06, "loss": 0.7082, "step": 39630 }, { "epoch": 1.68, "grad_norm": 6.315338751477616, "learning_rate": 4.840349827455812e-06, "loss": 0.7228, "step": 39635 }, { "epoch": 1.68, "grad_norm": 5.401464589143955, "learning_rate": 4.83911933196461e-06, "loss": 0.7146, "step": 39640 }, { "epoch": 1.68, "grad_norm": 6.485151629827089, "learning_rate": 4.83788884622713e-06, "loss": 0.7651, "step": 39645 }, { "epoch": 1.68, "grad_norm": 5.114077904628738, "learning_rate": 4.8366583703179696e-06, "loss": 0.7168, "step": 39650 }, { "epoch": 1.68, "grad_norm": 5.145564990854587, "learning_rate": 4.835427904311733e-06, "loss": 0.7105, "step": 39655 }, { "epoch": 1.68, "grad_norm": 4.561224035126282, "learning_rate": 4.834197448283016e-06, "loss": 0.7441, "step": 39660 }, { "epoch": 1.68, "grad_norm": 5.0698409652583845, "learning_rate": 4.83296700230642e-06, "loss": 0.69, "step": 39665 }, { "epoch": 1.68, "grad_norm": 6.601807455960646, "learning_rate": 4.831736566456543e-06, "loss": 0.7276, "step": 39670 }, { "epoch": 1.68, "grad_norm": 7.624826856869231, "learning_rate": 4.830506140807982e-06, "loss": 0.7135, "step": 39675 }, { "epoch": 1.68, "grad_norm": 4.848990327930977, "learning_rate": 4.829275725435335e-06, "loss": 0.7073, "step": 39680 }, { "epoch": 1.68, "grad_norm": 4.437641584231019, "learning_rate": 4.828045320413198e-06, "loss": 0.7009, "step": 39685 }, { "epoch": 1.68, "grad_norm": 4.379580425533575, "learning_rate": 4.826814925816167e-06, "loss": 0.6975, "step": 39690 }, { "epoch": 1.68, "grad_norm": 5.831021137783145, "learning_rate": 4.825584541718835e-06, "loss": 0.7528, "step": 39695 }, { "epoch": 1.68, "grad_norm": 9.578861034865445, "learning_rate": 4.8243541681958e-06, "loss": 0.6722, "step": 39700 }, { "epoch": 1.68, "grad_norm": 4.291883998533101, "learning_rate": 4.8231238053216534e-06, "loss": 0.6925, "step": 39705 }, { "epoch": 1.68, "grad_norm": 7.360668821145277, "learning_rate": 4.821893453170991e-06, "loss": 0.688, "step": 39710 }, { "epoch": 1.68, "grad_norm": 6.406708146098044, "learning_rate": 4.820663111818402e-06, "loss": 0.7119, "step": 39715 }, { "epoch": 1.68, "grad_norm": 4.521141169289952, "learning_rate": 4.819432781338481e-06, "loss": 0.7396, "step": 39720 }, { "epoch": 1.68, "grad_norm": 6.283070472792619, "learning_rate": 4.818202461805819e-06, "loss": 0.7734, "step": 39725 }, { "epoch": 1.68, "grad_norm": 11.634371063328796, "learning_rate": 4.816972153295006e-06, "loss": 0.6897, "step": 39730 }, { "epoch": 1.68, "grad_norm": 5.129365848161817, "learning_rate": 4.815741855880632e-06, "loss": 0.732, "step": 39735 }, { "epoch": 1.68, "grad_norm": 5.686810705572237, "learning_rate": 4.814511569637288e-06, "loss": 0.7025, "step": 39740 }, { "epoch": 1.68, "grad_norm": 5.356507928196501, "learning_rate": 4.813281294639559e-06, "loss": 0.6915, "step": 39745 }, { "epoch": 1.68, "grad_norm": 9.483271930102498, "learning_rate": 4.812051030962036e-06, "loss": 0.7392, "step": 39750 }, { "epoch": 1.68, "grad_norm": 6.1640249320992, "learning_rate": 4.810820778679305e-06, "loss": 0.7636, "step": 39755 }, { "epoch": 1.68, "grad_norm": 4.4631727948873365, "learning_rate": 4.8095905378659535e-06, "loss": 0.7272, "step": 39760 }, { "epoch": 1.68, "grad_norm": 5.129178105104451, "learning_rate": 4.808360308596568e-06, "loss": 0.6786, "step": 39765 }, { "epoch": 1.68, "grad_norm": 5.24952624696542, "learning_rate": 4.80713009094573e-06, "loss": 0.7135, "step": 39770 }, { "epoch": 1.68, "grad_norm": 4.461017439161896, "learning_rate": 4.8058998849880275e-06, "loss": 0.7123, "step": 39775 }, { "epoch": 1.68, "grad_norm": 4.854926805542565, "learning_rate": 4.804669690798043e-06, "loss": 0.717, "step": 39780 }, { "epoch": 1.68, "grad_norm": 4.179146733034479, "learning_rate": 4.803439508450361e-06, "loss": 0.6931, "step": 39785 }, { "epoch": 1.68, "grad_norm": 5.040728834183274, "learning_rate": 4.8022093380195615e-06, "loss": 0.6892, "step": 39790 }, { "epoch": 1.68, "grad_norm": 6.425779939277026, "learning_rate": 4.800979179580227e-06, "loss": 0.716, "step": 39795 }, { "epoch": 1.68, "grad_norm": 4.249803249537637, "learning_rate": 4.79974903320694e-06, "loss": 0.688, "step": 39800 }, { "epoch": 1.68, "grad_norm": 9.369177953038742, "learning_rate": 4.798518898974277e-06, "loss": 0.701, "step": 39805 }, { "epoch": 1.68, "grad_norm": 8.63859999371445, "learning_rate": 4.797288776956822e-06, "loss": 0.7096, "step": 39810 }, { "epoch": 1.69, "grad_norm": 10.550037524415822, "learning_rate": 4.79605866722915e-06, "loss": 0.7181, "step": 39815 }, { "epoch": 1.69, "grad_norm": 6.951470479398299, "learning_rate": 4.7948285698658405e-06, "loss": 0.7087, "step": 39820 }, { "epoch": 1.69, "grad_norm": 11.595479918201391, "learning_rate": 4.7935984849414705e-06, "loss": 0.7452, "step": 39825 }, { "epoch": 1.69, "grad_norm": 4.6329078374578705, "learning_rate": 4.792368412530616e-06, "loss": 0.7389, "step": 39830 }, { "epoch": 1.69, "grad_norm": 6.095987535558149, "learning_rate": 4.791138352707854e-06, "loss": 0.7478, "step": 39835 }, { "epoch": 1.69, "grad_norm": 4.274888306313262, "learning_rate": 4.789908305547759e-06, "loss": 0.6992, "step": 39840 }, { "epoch": 1.69, "grad_norm": 6.35511128294863, "learning_rate": 4.788678271124904e-06, "loss": 0.7184, "step": 39845 }, { "epoch": 1.69, "grad_norm": 4.431734428530755, "learning_rate": 4.787448249513863e-06, "loss": 0.744, "step": 39850 }, { "epoch": 1.69, "grad_norm": 4.7091540250472255, "learning_rate": 4.786218240789208e-06, "loss": 0.7274, "step": 39855 }, { "epoch": 1.69, "grad_norm": 5.867142880139954, "learning_rate": 4.784988245025512e-06, "loss": 0.7613, "step": 39860 }, { "epoch": 1.69, "grad_norm": 6.280060261176426, "learning_rate": 4.7837582622973436e-06, "loss": 0.7267, "step": 39865 }, { "epoch": 1.69, "grad_norm": 4.942731454481499, "learning_rate": 4.7825282926792765e-06, "loss": 0.708, "step": 39870 }, { "epoch": 1.69, "grad_norm": 6.0298091550005815, "learning_rate": 4.781298336245879e-06, "loss": 0.7299, "step": 39875 }, { "epoch": 1.69, "grad_norm": 4.899510424477942, "learning_rate": 4.780068393071717e-06, "loss": 0.6671, "step": 39880 }, { "epoch": 1.69, "grad_norm": 5.5852446082870175, "learning_rate": 4.778838463231361e-06, "loss": 0.7021, "step": 39885 }, { "epoch": 1.69, "grad_norm": 5.093748580642361, "learning_rate": 4.777608546799378e-06, "loss": 0.7335, "step": 39890 }, { "epoch": 1.69, "grad_norm": 7.192362579982167, "learning_rate": 4.776378643850335e-06, "loss": 0.6636, "step": 39895 }, { "epoch": 1.69, "grad_norm": 5.033108171641508, "learning_rate": 4.775148754458795e-06, "loss": 0.7086, "step": 39900 }, { "epoch": 1.69, "grad_norm": 4.275264768661221, "learning_rate": 4.7739188786993225e-06, "loss": 0.701, "step": 39905 }, { "epoch": 1.69, "grad_norm": 6.885510907876548, "learning_rate": 4.772689016646484e-06, "loss": 0.6726, "step": 39910 }, { "epoch": 1.69, "grad_norm": 4.558695783358456, "learning_rate": 4.77145916837484e-06, "loss": 0.7225, "step": 39915 }, { "epoch": 1.69, "grad_norm": 4.217895765645389, "learning_rate": 4.770229333958955e-06, "loss": 0.7182, "step": 39920 }, { "epoch": 1.69, "grad_norm": 4.759919808358852, "learning_rate": 4.768999513473387e-06, "loss": 0.7038, "step": 39925 }, { "epoch": 1.69, "grad_norm": 4.404276679649746, "learning_rate": 4.767769706992698e-06, "loss": 0.7196, "step": 39930 }, { "epoch": 1.69, "grad_norm": 4.974461983498978, "learning_rate": 4.766539914591449e-06, "loss": 0.7524, "step": 39935 }, { "epoch": 1.69, "grad_norm": 3.9847870538195918, "learning_rate": 4.7653101363441955e-06, "loss": 0.7003, "step": 39940 }, { "epoch": 1.69, "grad_norm": 5.05523154703765, "learning_rate": 4.764080372325499e-06, "loss": 0.754, "step": 39945 }, { "epoch": 1.69, "grad_norm": 5.210498620113864, "learning_rate": 4.762850622609914e-06, "loss": 0.7019, "step": 39950 }, { "epoch": 1.69, "grad_norm": 9.861466082402762, "learning_rate": 4.761620887271996e-06, "loss": 0.6973, "step": 39955 }, { "epoch": 1.69, "grad_norm": 7.101914122790328, "learning_rate": 4.760391166386303e-06, "loss": 0.7179, "step": 39960 }, { "epoch": 1.69, "grad_norm": 8.648802924640338, "learning_rate": 4.759161460027387e-06, "loss": 0.7252, "step": 39965 }, { "epoch": 1.69, "grad_norm": 4.123606777815109, "learning_rate": 4.757931768269803e-06, "loss": 0.6936, "step": 39970 }, { "epoch": 1.69, "grad_norm": 9.756488595971078, "learning_rate": 4.756702091188102e-06, "loss": 0.721, "step": 39975 }, { "epoch": 1.69, "grad_norm": 4.332662992986555, "learning_rate": 4.755472428856836e-06, "loss": 0.7271, "step": 39980 }, { "epoch": 1.69, "grad_norm": 8.559651767187804, "learning_rate": 4.7542427813505575e-06, "loss": 0.7427, "step": 39985 }, { "epoch": 1.69, "grad_norm": 9.180718016379323, "learning_rate": 4.753013148743815e-06, "loss": 0.7406, "step": 39990 }, { "epoch": 1.69, "grad_norm": 4.217809733792976, "learning_rate": 4.7517835311111575e-06, "loss": 0.7086, "step": 39995 }, { "epoch": 1.69, "grad_norm": 4.745238403338596, "learning_rate": 4.750553928527134e-06, "loss": 0.6998, "step": 40000 }, { "epoch": 1.69, "grad_norm": 4.324525218520384, "learning_rate": 4.74932434106629e-06, "loss": 0.6864, "step": 40005 }, { "epoch": 1.69, "grad_norm": 4.891762423544337, "learning_rate": 4.748094768803175e-06, "loss": 0.7095, "step": 40010 }, { "epoch": 1.69, "grad_norm": 5.027405626283015, "learning_rate": 4.746865211812331e-06, "loss": 0.7473, "step": 40015 }, { "epoch": 1.69, "grad_norm": 7.488878478823422, "learning_rate": 4.745635670168304e-06, "loss": 0.7583, "step": 40020 }, { "epoch": 1.69, "grad_norm": 9.8943238792251, "learning_rate": 4.744406143945636e-06, "loss": 0.6804, "step": 40025 }, { "epoch": 1.69, "grad_norm": 9.976427902553, "learning_rate": 4.743176633218871e-06, "loss": 0.7185, "step": 40030 }, { "epoch": 1.69, "grad_norm": 14.360049235549553, "learning_rate": 4.741947138062549e-06, "loss": 0.6896, "step": 40035 }, { "epoch": 1.69, "grad_norm": 5.210505362779091, "learning_rate": 4.740717658551214e-06, "loss": 0.6848, "step": 40040 }, { "epoch": 1.69, "grad_norm": 10.888739064856459, "learning_rate": 4.7394881947594026e-06, "loss": 0.6733, "step": 40045 }, { "epoch": 1.69, "grad_norm": 9.480799646652288, "learning_rate": 4.738258746761654e-06, "loss": 0.7187, "step": 40050 }, { "epoch": 1.7, "grad_norm": 6.244256540168746, "learning_rate": 4.737029314632507e-06, "loss": 0.7168, "step": 40055 }, { "epoch": 1.7, "grad_norm": 8.070106624832603, "learning_rate": 4.735799898446498e-06, "loss": 0.7056, "step": 40060 }, { "epoch": 1.7, "grad_norm": 4.54451239076145, "learning_rate": 4.734570498278164e-06, "loss": 0.6979, "step": 40065 }, { "epoch": 1.7, "grad_norm": 6.4103768862418455, "learning_rate": 4.733341114202038e-06, "loss": 0.7191, "step": 40070 }, { "epoch": 1.7, "grad_norm": 5.37334694015689, "learning_rate": 4.732111746292654e-06, "loss": 0.7318, "step": 40075 }, { "epoch": 1.7, "grad_norm": 9.284819691901522, "learning_rate": 4.730882394624547e-06, "loss": 0.6985, "step": 40080 }, { "epoch": 1.7, "grad_norm": 4.879646331275877, "learning_rate": 4.7296530592722464e-06, "loss": 0.7274, "step": 40085 }, { "epoch": 1.7, "grad_norm": 4.413134281956985, "learning_rate": 4.7284237403102864e-06, "loss": 0.7219, "step": 40090 }, { "epoch": 1.7, "grad_norm": 7.627777403998653, "learning_rate": 4.727194437813194e-06, "loss": 0.6732, "step": 40095 }, { "epoch": 1.7, "grad_norm": 7.022429373693186, "learning_rate": 4.725965151855499e-06, "loss": 0.717, "step": 40100 }, { "epoch": 1.7, "grad_norm": 5.161534552545788, "learning_rate": 4.72473588251173e-06, "loss": 0.718, "step": 40105 }, { "epoch": 1.7, "grad_norm": 5.222594908998444, "learning_rate": 4.723506629856413e-06, "loss": 0.7452, "step": 40110 }, { "epoch": 1.7, "grad_norm": 5.164243188413465, "learning_rate": 4.722277393964075e-06, "loss": 0.6857, "step": 40115 }, { "epoch": 1.7, "grad_norm": 4.911601423939285, "learning_rate": 4.721048174909241e-06, "loss": 0.7031, "step": 40120 }, { "epoch": 1.7, "grad_norm": 4.303066566934856, "learning_rate": 4.719818972766434e-06, "loss": 0.6681, "step": 40125 }, { "epoch": 1.7, "grad_norm": 6.73612148922154, "learning_rate": 4.718589787610178e-06, "loss": 0.7469, "step": 40130 }, { "epoch": 1.7, "grad_norm": 5.392500709570763, "learning_rate": 4.717360619514994e-06, "loss": 0.709, "step": 40135 }, { "epoch": 1.7, "grad_norm": 6.327038120983462, "learning_rate": 4.716131468555402e-06, "loss": 0.7211, "step": 40140 }, { "epoch": 1.7, "grad_norm": 6.608148913910321, "learning_rate": 4.7149023348059245e-06, "loss": 0.7105, "step": 40145 }, { "epoch": 1.7, "grad_norm": 6.5791389418503465, "learning_rate": 4.713673218341077e-06, "loss": 0.7067, "step": 40150 }, { "epoch": 1.7, "grad_norm": 4.9580663846247655, "learning_rate": 4.712444119235381e-06, "loss": 0.7043, "step": 40155 }, { "epoch": 1.7, "grad_norm": 4.5917155931653015, "learning_rate": 4.71121503756335e-06, "loss": 0.6919, "step": 40160 }, { "epoch": 1.7, "grad_norm": 8.16942288658068, "learning_rate": 4.709985973399499e-06, "loss": 0.7134, "step": 40165 }, { "epoch": 1.7, "grad_norm": 8.608652546049841, "learning_rate": 4.708756926818348e-06, "loss": 0.698, "step": 40170 }, { "epoch": 1.7, "grad_norm": 4.745420255080977, "learning_rate": 4.707527897894404e-06, "loss": 0.7095, "step": 40175 }, { "epoch": 1.7, "grad_norm": 4.887335736110537, "learning_rate": 4.706298886702183e-06, "loss": 0.6958, "step": 40180 }, { "epoch": 1.7, "grad_norm": 4.485722528390209, "learning_rate": 4.705069893316194e-06, "loss": 0.7338, "step": 40185 }, { "epoch": 1.7, "grad_norm": 4.552133281193531, "learning_rate": 4.70384091781095e-06, "loss": 0.7142, "step": 40190 }, { "epoch": 1.7, "grad_norm": 4.302154707740554, "learning_rate": 4.702611960260957e-06, "loss": 0.6743, "step": 40195 }, { "epoch": 1.7, "grad_norm": 6.522868850517307, "learning_rate": 4.701383020740727e-06, "loss": 0.7358, "step": 40200 }, { "epoch": 1.7, "grad_norm": 12.653221367737457, "learning_rate": 4.700154099324764e-06, "loss": 0.696, "step": 40205 }, { "epoch": 1.7, "grad_norm": 10.998142257772557, "learning_rate": 4.698925196087573e-06, "loss": 0.7075, "step": 40210 }, { "epoch": 1.7, "grad_norm": 6.408545515555583, "learning_rate": 4.697696311103663e-06, "loss": 0.7205, "step": 40215 }, { "epoch": 1.7, "grad_norm": 4.825649986232741, "learning_rate": 4.696467444447534e-06, "loss": 0.683, "step": 40220 }, { "epoch": 1.7, "grad_norm": 11.771799509366787, "learning_rate": 4.69523859619369e-06, "loss": 0.7399, "step": 40225 }, { "epoch": 1.7, "grad_norm": 8.796182479290588, "learning_rate": 4.694009766416633e-06, "loss": 0.7237, "step": 40230 }, { "epoch": 1.7, "grad_norm": 5.663438462396627, "learning_rate": 4.692780955190861e-06, "loss": 0.7054, "step": 40235 }, { "epoch": 1.7, "grad_norm": 4.348336445676226, "learning_rate": 4.691552162590876e-06, "loss": 0.7062, "step": 40240 }, { "epoch": 1.7, "grad_norm": 5.866163268084782, "learning_rate": 4.690323388691173e-06, "loss": 0.7178, "step": 40245 }, { "epoch": 1.7, "grad_norm": 15.66581819928323, "learning_rate": 4.689094633566253e-06, "loss": 0.722, "step": 40250 }, { "epoch": 1.7, "grad_norm": 4.9008825748689535, "learning_rate": 4.687865897290609e-06, "loss": 0.7434, "step": 40255 }, { "epoch": 1.7, "grad_norm": 12.788330379681447, "learning_rate": 4.686637179938736e-06, "loss": 0.6936, "step": 40260 }, { "epoch": 1.7, "grad_norm": 4.528206323309659, "learning_rate": 4.685408481585128e-06, "loss": 0.7162, "step": 40265 }, { "epoch": 1.7, "grad_norm": 9.395030726229628, "learning_rate": 4.684179802304276e-06, "loss": 0.7015, "step": 40270 }, { "epoch": 1.7, "grad_norm": 4.228246568916481, "learning_rate": 4.682951142170673e-06, "loss": 0.6738, "step": 40275 }, { "epoch": 1.7, "grad_norm": 13.89603387132787, "learning_rate": 4.681722501258808e-06, "loss": 0.7307, "step": 40280 }, { "epoch": 1.7, "grad_norm": 8.846280609819637, "learning_rate": 4.680493879643171e-06, "loss": 0.6938, "step": 40285 }, { "epoch": 1.71, "grad_norm": 9.229496517600412, "learning_rate": 4.679265277398248e-06, "loss": 0.7186, "step": 40290 }, { "epoch": 1.71, "grad_norm": 8.264446525603521, "learning_rate": 4.6780366945985265e-06, "loss": 0.6943, "step": 40295 }, { "epoch": 1.71, "grad_norm": 7.812768525411609, "learning_rate": 4.6768081313184935e-06, "loss": 0.7278, "step": 40300 }, { "epoch": 1.71, "grad_norm": 8.67039790149491, "learning_rate": 4.675579587632631e-06, "loss": 0.7026, "step": 40305 }, { "epoch": 1.71, "grad_norm": 5.098840076953424, "learning_rate": 4.674351063615421e-06, "loss": 0.7088, "step": 40310 }, { "epoch": 1.71, "grad_norm": 7.757394437001381, "learning_rate": 4.673122559341348e-06, "loss": 0.7291, "step": 40315 }, { "epoch": 1.71, "grad_norm": 4.767413468165906, "learning_rate": 4.67189407488489e-06, "loss": 0.6992, "step": 40320 }, { "epoch": 1.71, "grad_norm": 8.593469047111174, "learning_rate": 4.6706656103205295e-06, "loss": 0.6824, "step": 40325 }, { "epoch": 1.71, "grad_norm": 10.180915464428734, "learning_rate": 4.6694371657227425e-06, "loss": 0.7234, "step": 40330 }, { "epoch": 1.71, "grad_norm": 5.082112532173597, "learning_rate": 4.668208741166007e-06, "loss": 0.7265, "step": 40335 }, { "epoch": 1.71, "grad_norm": 18.246601260167385, "learning_rate": 4.666980336724797e-06, "loss": 0.6961, "step": 40340 }, { "epoch": 1.71, "grad_norm": 11.592899768537889, "learning_rate": 4.665751952473589e-06, "loss": 0.6947, "step": 40345 }, { "epoch": 1.71, "grad_norm": 11.207575120032372, "learning_rate": 4.6645235884868555e-06, "loss": 0.7228, "step": 40350 }, { "epoch": 1.71, "grad_norm": 31.312642096407398, "learning_rate": 4.6632952448390675e-06, "loss": 0.6797, "step": 40355 }, { "epoch": 1.71, "grad_norm": 14.802131875737823, "learning_rate": 4.662066921604699e-06, "loss": 0.7206, "step": 40360 }, { "epoch": 1.71, "grad_norm": 5.0488991483100625, "learning_rate": 4.660838618858216e-06, "loss": 0.7286, "step": 40365 }, { "epoch": 1.71, "grad_norm": 5.203618222717946, "learning_rate": 4.659610336674089e-06, "loss": 0.6963, "step": 40370 }, { "epoch": 1.71, "grad_norm": 10.787936082574443, "learning_rate": 4.658382075126785e-06, "loss": 0.6964, "step": 40375 }, { "epoch": 1.71, "grad_norm": 6.421266854415682, "learning_rate": 4.657153834290769e-06, "loss": 0.7678, "step": 40380 }, { "epoch": 1.71, "grad_norm": 7.409093234752213, "learning_rate": 4.655925614240506e-06, "loss": 0.711, "step": 40385 }, { "epoch": 1.71, "grad_norm": 5.371735944911331, "learning_rate": 4.65469741505046e-06, "loss": 0.7147, "step": 40390 }, { "epoch": 1.71, "grad_norm": 4.316387419806424, "learning_rate": 4.653469236795092e-06, "loss": 0.7094, "step": 40395 }, { "epoch": 1.71, "grad_norm": 10.017469850902847, "learning_rate": 4.652241079548864e-06, "loss": 0.6888, "step": 40400 }, { "epoch": 1.71, "grad_norm": 7.062566063409066, "learning_rate": 4.651012943386234e-06, "loss": 0.6867, "step": 40405 }, { "epoch": 1.71, "grad_norm": 6.3979896850548235, "learning_rate": 4.649784828381662e-06, "loss": 0.7057, "step": 40410 }, { "epoch": 1.71, "grad_norm": 5.162163234031683, "learning_rate": 4.648556734609604e-06, "loss": 0.6761, "step": 40415 }, { "epoch": 1.71, "grad_norm": 5.90467689424634, "learning_rate": 4.647328662144518e-06, "loss": 0.7552, "step": 40420 }, { "epoch": 1.71, "grad_norm": 5.252126655323955, "learning_rate": 4.646100611060854e-06, "loss": 0.7123, "step": 40425 }, { "epoch": 1.71, "grad_norm": 4.300645138228368, "learning_rate": 4.644872581433068e-06, "loss": 0.7237, "step": 40430 }, { "epoch": 1.71, "grad_norm": 7.832616414623262, "learning_rate": 4.643644573335612e-06, "loss": 0.6949, "step": 40435 }, { "epoch": 1.71, "grad_norm": 5.20222271768449, "learning_rate": 4.6424165868429355e-06, "loss": 0.7091, "step": 40440 }, { "epoch": 1.71, "grad_norm": 8.994200385786906, "learning_rate": 4.641188622029488e-06, "loss": 0.7043, "step": 40445 }, { "epoch": 1.71, "grad_norm": 5.221544947824488, "learning_rate": 4.6399606789697175e-06, "loss": 0.7461, "step": 40450 }, { "epoch": 1.71, "grad_norm": 5.015290124780768, "learning_rate": 4.638732757738069e-06, "loss": 0.687, "step": 40455 }, { "epoch": 1.71, "grad_norm": 4.130280514478898, "learning_rate": 4.637504858408991e-06, "loss": 0.7403, "step": 40460 }, { "epoch": 1.71, "grad_norm": 8.290461657371866, "learning_rate": 4.636276981056927e-06, "loss": 0.6981, "step": 40465 }, { "epoch": 1.71, "grad_norm": 4.754473736695614, "learning_rate": 4.635049125756315e-06, "loss": 0.6769, "step": 40470 }, { "epoch": 1.71, "grad_norm": 11.71146981448402, "learning_rate": 4.633821292581601e-06, "loss": 0.6909, "step": 40475 }, { "epoch": 1.71, "grad_norm": 6.972559461512804, "learning_rate": 4.632593481607223e-06, "loss": 0.7271, "step": 40480 }, { "epoch": 1.71, "grad_norm": 8.25667688724223, "learning_rate": 4.631365692907621e-06, "loss": 0.7151, "step": 40485 }, { "epoch": 1.71, "grad_norm": 4.356874540646292, "learning_rate": 4.630137926557231e-06, "loss": 0.7142, "step": 40490 }, { "epoch": 1.71, "grad_norm": 6.293168262553135, "learning_rate": 4.62891018263049e-06, "loss": 0.7384, "step": 40495 }, { "epoch": 1.71, "grad_norm": 4.483999868269908, "learning_rate": 4.62768246120183e-06, "loss": 0.708, "step": 40500 }, { "epoch": 1.71, "grad_norm": 4.194548122553793, "learning_rate": 4.6264547623456865e-06, "loss": 0.6888, "step": 40505 }, { "epoch": 1.71, "grad_norm": 9.42388482192117, "learning_rate": 4.6252270861364905e-06, "loss": 0.7061, "step": 40510 }, { "epoch": 1.71, "grad_norm": 10.140510610854582, "learning_rate": 4.62399943264867e-06, "loss": 0.6982, "step": 40515 }, { "epoch": 1.71, "grad_norm": 9.740542082420419, "learning_rate": 4.622771801956659e-06, "loss": 0.6721, "step": 40520 }, { "epoch": 1.72, "grad_norm": 8.337390238200994, "learning_rate": 4.62154419413488e-06, "loss": 0.7091, "step": 40525 }, { "epoch": 1.72, "grad_norm": 5.050203097910411, "learning_rate": 4.620316609257765e-06, "loss": 0.7023, "step": 40530 }, { "epoch": 1.72, "grad_norm": 7.238168789705431, "learning_rate": 4.619089047399734e-06, "loss": 0.7101, "step": 40535 }, { "epoch": 1.72, "grad_norm": 6.165346306193902, "learning_rate": 4.617861508635211e-06, "loss": 0.6726, "step": 40540 }, { "epoch": 1.72, "grad_norm": 8.278345247822616, "learning_rate": 4.61663399303862e-06, "loss": 0.736, "step": 40545 }, { "epoch": 1.72, "grad_norm": 6.220449471327732, "learning_rate": 4.61540650068438e-06, "loss": 0.7167, "step": 40550 }, { "epoch": 1.72, "grad_norm": 4.811219220398779, "learning_rate": 4.614179031646912e-06, "loss": 0.6907, "step": 40555 }, { "epoch": 1.72, "grad_norm": 5.675247525986704, "learning_rate": 4.612951586000634e-06, "loss": 0.7089, "step": 40560 }, { "epoch": 1.72, "grad_norm": 9.225890302560444, "learning_rate": 4.611724163819958e-06, "loss": 0.7077, "step": 40565 }, { "epoch": 1.72, "grad_norm": 17.51088320925971, "learning_rate": 4.610496765179305e-06, "loss": 0.7444, "step": 40570 }, { "epoch": 1.72, "grad_norm": 5.031438583548248, "learning_rate": 4.609269390153084e-06, "loss": 0.6655, "step": 40575 }, { "epoch": 1.72, "grad_norm": 12.677225472810388, "learning_rate": 4.6080420388157105e-06, "loss": 0.6794, "step": 40580 }, { "epoch": 1.72, "grad_norm": 7.988997089062789, "learning_rate": 4.6068147112415935e-06, "loss": 0.6919, "step": 40585 }, { "epoch": 1.72, "grad_norm": 15.439196307514528, "learning_rate": 4.605587407505141e-06, "loss": 0.6795, "step": 40590 }, { "epoch": 1.72, "grad_norm": 4.5806037455168385, "learning_rate": 4.604360127680764e-06, "loss": 0.6972, "step": 40595 }, { "epoch": 1.72, "grad_norm": 6.17218698260249, "learning_rate": 4.603132871842865e-06, "loss": 0.6798, "step": 40600 }, { "epoch": 1.72, "grad_norm": 4.241287560036608, "learning_rate": 4.601905640065853e-06, "loss": 0.6988, "step": 40605 }, { "epoch": 1.72, "grad_norm": 4.336714082651554, "learning_rate": 4.600678432424128e-06, "loss": 0.6959, "step": 40610 }, { "epoch": 1.72, "grad_norm": 7.325714038513943, "learning_rate": 4.599451248992093e-06, "loss": 0.7076, "step": 40615 }, { "epoch": 1.72, "grad_norm": 5.309510793451503, "learning_rate": 4.598224089844151e-06, "loss": 0.7309, "step": 40620 }, { "epoch": 1.72, "grad_norm": 14.216833003213026, "learning_rate": 4.596996955054697e-06, "loss": 0.6979, "step": 40625 }, { "epoch": 1.72, "grad_norm": 18.367344923241486, "learning_rate": 4.595769844698132e-06, "loss": 0.7259, "step": 40630 }, { "epoch": 1.72, "grad_norm": 4.838433991858137, "learning_rate": 4.59454275884885e-06, "loss": 0.7206, "step": 40635 }, { "epoch": 1.72, "grad_norm": 6.293420884429469, "learning_rate": 4.593315697581245e-06, "loss": 0.7331, "step": 40640 }, { "epoch": 1.72, "grad_norm": 4.906156899267601, "learning_rate": 4.592088660969713e-06, "loss": 0.6803, "step": 40645 }, { "epoch": 1.72, "grad_norm": 4.595315555443005, "learning_rate": 4.590861649088643e-06, "loss": 0.6757, "step": 40650 }, { "epoch": 1.72, "grad_norm": 7.153649506587127, "learning_rate": 4.589634662012429e-06, "loss": 0.6789, "step": 40655 }, { "epoch": 1.72, "grad_norm": 5.707493079234177, "learning_rate": 4.588407699815454e-06, "loss": 0.7225, "step": 40660 }, { "epoch": 1.72, "grad_norm": 4.903997074231319, "learning_rate": 4.587180762572108e-06, "loss": 0.711, "step": 40665 }, { "epoch": 1.72, "grad_norm": 5.3011501092994, "learning_rate": 4.585953850356776e-06, "loss": 0.707, "step": 40670 }, { "epoch": 1.72, "grad_norm": 18.0811366856455, "learning_rate": 4.584726963243843e-06, "loss": 0.7669, "step": 40675 }, { "epoch": 1.72, "grad_norm": 5.696937386490977, "learning_rate": 4.583500101307691e-06, "loss": 0.6961, "step": 40680 }, { "epoch": 1.72, "grad_norm": 4.635201377707462, "learning_rate": 4.5822732646227e-06, "loss": 0.7318, "step": 40685 }, { "epoch": 1.72, "grad_norm": 6.313409920222185, "learning_rate": 4.5810464532632524e-06, "loss": 0.7465, "step": 40690 }, { "epoch": 1.72, "grad_norm": 6.359442707543011, "learning_rate": 4.579819667303722e-06, "loss": 0.6898, "step": 40695 }, { "epoch": 1.72, "grad_norm": 4.368049123471514, "learning_rate": 4.578592906818489e-06, "loss": 0.6818, "step": 40700 }, { "epoch": 1.72, "grad_norm": 5.161906066427198, "learning_rate": 4.577366171881927e-06, "loss": 0.7386, "step": 40705 }, { "epoch": 1.72, "grad_norm": 4.925707971255135, "learning_rate": 4.576139462568408e-06, "loss": 0.7412, "step": 40710 }, { "epoch": 1.72, "grad_norm": 7.682431489538206, "learning_rate": 4.574912778952305e-06, "loss": 0.6973, "step": 40715 }, { "epoch": 1.72, "grad_norm": 6.112265387489211, "learning_rate": 4.573686121107988e-06, "loss": 0.6969, "step": 40720 }, { "epoch": 1.72, "grad_norm": 7.5451346519498435, "learning_rate": 4.572459489109827e-06, "loss": 0.7124, "step": 40725 }, { "epoch": 1.72, "grad_norm": 4.696779053294159, "learning_rate": 4.571232883032187e-06, "loss": 0.7632, "step": 40730 }, { "epoch": 1.72, "grad_norm": 5.185652694528235, "learning_rate": 4.570006302949434e-06, "loss": 0.6959, "step": 40735 }, { "epoch": 1.72, "grad_norm": 4.653334686800743, "learning_rate": 4.5687797489359324e-06, "loss": 0.728, "step": 40740 }, { "epoch": 1.72, "grad_norm": 5.683994158080438, "learning_rate": 4.567553221066045e-06, "loss": 0.689, "step": 40745 }, { "epoch": 1.72, "grad_norm": 6.515685119949525, "learning_rate": 4.566326719414131e-06, "loss": 0.7024, "step": 40750 }, { "epoch": 1.72, "grad_norm": 8.282299888356041, "learning_rate": 4.565100244054552e-06, "loss": 0.7331, "step": 40755 }, { "epoch": 1.73, "grad_norm": 4.868601929400915, "learning_rate": 4.563873795061662e-06, "loss": 0.7003, "step": 40760 }, { "epoch": 1.73, "grad_norm": 4.257700709836006, "learning_rate": 4.562647372509821e-06, "loss": 0.7154, "step": 40765 }, { "epoch": 1.73, "grad_norm": 4.5806258142587195, "learning_rate": 4.561420976473381e-06, "loss": 0.7035, "step": 40770 }, { "epoch": 1.73, "grad_norm": 11.255740564145732, "learning_rate": 4.560194607026695e-06, "loss": 0.6957, "step": 40775 }, { "epoch": 1.73, "grad_norm": 4.214913681768481, "learning_rate": 4.558968264244115e-06, "loss": 0.7077, "step": 40780 }, { "epoch": 1.73, "grad_norm": 6.912957486619837, "learning_rate": 4.557741948199989e-06, "loss": 0.6667, "step": 40785 }, { "epoch": 1.73, "grad_norm": 6.984624281071443, "learning_rate": 4.556515658968668e-06, "loss": 0.7094, "step": 40790 }, { "epoch": 1.73, "grad_norm": 4.335371484562447, "learning_rate": 4.555289396624495e-06, "loss": 0.6877, "step": 40795 }, { "epoch": 1.73, "grad_norm": 4.739566591570247, "learning_rate": 4.554063161241816e-06, "loss": 0.7047, "step": 40800 }, { "epoch": 1.73, "grad_norm": 5.777488957227169, "learning_rate": 4.552836952894974e-06, "loss": 0.7277, "step": 40805 }, { "epoch": 1.73, "grad_norm": 4.593148297552575, "learning_rate": 4.55161077165831e-06, "loss": 0.7152, "step": 40810 }, { "epoch": 1.73, "grad_norm": 8.539526122500462, "learning_rate": 4.550384617606165e-06, "loss": 0.745, "step": 40815 }, { "epoch": 1.73, "grad_norm": 5.859223225750056, "learning_rate": 4.549158490812878e-06, "loss": 0.6885, "step": 40820 }, { "epoch": 1.73, "grad_norm": 8.701733611189258, "learning_rate": 4.54793239135278e-06, "loss": 0.7104, "step": 40825 }, { "epoch": 1.73, "grad_norm": 5.935317457453757, "learning_rate": 4.546706319300211e-06, "loss": 0.7211, "step": 40830 }, { "epoch": 1.73, "grad_norm": 4.271784197889793, "learning_rate": 4.545480274729503e-06, "loss": 0.7043, "step": 40835 }, { "epoch": 1.73, "grad_norm": 4.008213123870857, "learning_rate": 4.5442542577149875e-06, "loss": 0.6835, "step": 40840 }, { "epoch": 1.73, "grad_norm": 5.213446684695098, "learning_rate": 4.5430282683309925e-06, "loss": 0.6895, "step": 40845 }, { "epoch": 1.73, "grad_norm": 5.115450079178496, "learning_rate": 4.541802306651849e-06, "loss": 0.6631, "step": 40850 }, { "epoch": 1.73, "grad_norm": 4.5697187845498615, "learning_rate": 4.540576372751879e-06, "loss": 0.6993, "step": 40855 }, { "epoch": 1.73, "grad_norm": 6.168700241834609, "learning_rate": 4.539350466705414e-06, "loss": 0.6785, "step": 40860 }, { "epoch": 1.73, "grad_norm": 8.43332745961926, "learning_rate": 4.5381245885867715e-06, "loss": 0.6951, "step": 40865 }, { "epoch": 1.73, "grad_norm": 5.383333325342679, "learning_rate": 4.536898738470274e-06, "loss": 0.712, "step": 40870 }, { "epoch": 1.73, "grad_norm": 4.74270911697505, "learning_rate": 4.535672916430244e-06, "loss": 0.7038, "step": 40875 }, { "epoch": 1.73, "grad_norm": 4.510567056136337, "learning_rate": 4.5344471225409955e-06, "loss": 0.6934, "step": 40880 }, { "epoch": 1.73, "grad_norm": 7.071575026211772, "learning_rate": 4.533221356876848e-06, "loss": 0.6985, "step": 40885 }, { "epoch": 1.73, "grad_norm": 5.848061227840281, "learning_rate": 4.531995619512114e-06, "loss": 0.6734, "step": 40890 }, { "epoch": 1.73, "grad_norm": 7.549032250938532, "learning_rate": 4.530769910521106e-06, "loss": 0.6804, "step": 40895 }, { "epoch": 1.73, "grad_norm": 9.576719701028109, "learning_rate": 4.529544229978138e-06, "loss": 0.6786, "step": 40900 }, { "epoch": 1.73, "grad_norm": 9.510570651790196, "learning_rate": 4.528318577957516e-06, "loss": 0.6736, "step": 40905 }, { "epoch": 1.73, "grad_norm": 4.807543214672011, "learning_rate": 4.52709295453355e-06, "loss": 0.7002, "step": 40910 }, { "epoch": 1.73, "grad_norm": 8.291479651011716, "learning_rate": 4.525867359780546e-06, "loss": 0.7004, "step": 40915 }, { "epoch": 1.73, "grad_norm": 4.479014636639145, "learning_rate": 4.524641793772806e-06, "loss": 0.7219, "step": 40920 }, { "epoch": 1.73, "grad_norm": 7.299863400762476, "learning_rate": 4.523416256584634e-06, "loss": 0.6748, "step": 40925 }, { "epoch": 1.73, "grad_norm": 8.78407426375857, "learning_rate": 4.52219074829033e-06, "loss": 0.7149, "step": 40930 }, { "epoch": 1.73, "grad_norm": 10.122704198058736, "learning_rate": 4.520965268964196e-06, "loss": 0.6858, "step": 40935 }, { "epoch": 1.73, "grad_norm": 11.098143264324804, "learning_rate": 4.519739818680525e-06, "loss": 0.6954, "step": 40940 }, { "epoch": 1.73, "grad_norm": 9.367903757270598, "learning_rate": 4.518514397513613e-06, "loss": 0.7089, "step": 40945 }, { "epoch": 1.73, "grad_norm": 22.725613467330067, "learning_rate": 4.517289005537757e-06, "loss": 0.6728, "step": 40950 }, { "epoch": 1.73, "grad_norm": 7.816836361598131, "learning_rate": 4.516063642827245e-06, "loss": 0.6951, "step": 40955 }, { "epoch": 1.73, "grad_norm": 8.66108065631532, "learning_rate": 4.514838309456371e-06, "loss": 0.7259, "step": 40960 }, { "epoch": 1.73, "grad_norm": 4.330762698203375, "learning_rate": 4.513613005499419e-06, "loss": 0.7139, "step": 40965 }, { "epoch": 1.73, "grad_norm": 6.725894270411569, "learning_rate": 4.5123877310306785e-06, "loss": 0.6975, "step": 40970 }, { "epoch": 1.73, "grad_norm": 9.425054397753808, "learning_rate": 4.511162486124434e-06, "loss": 0.6841, "step": 40975 }, { "epoch": 1.73, "grad_norm": 4.025001568247798, "learning_rate": 4.509937270854969e-06, "loss": 0.6711, "step": 40980 }, { "epoch": 1.73, "grad_norm": 12.54357927818446, "learning_rate": 4.508712085296563e-06, "loss": 0.7224, "step": 40985 }, { "epoch": 1.73, "grad_norm": 6.4010894105050244, "learning_rate": 4.507486929523494e-06, "loss": 0.7212, "step": 40990 }, { "epoch": 1.73, "grad_norm": 5.862641347189652, "learning_rate": 4.506261803610045e-06, "loss": 0.721, "step": 40995 }, { "epoch": 1.74, "grad_norm": 5.399701280584715, "learning_rate": 4.505036707630486e-06, "loss": 0.7054, "step": 41000 }, { "epoch": 1.74, "grad_norm": 6.162720720048479, "learning_rate": 4.503811641659094e-06, "loss": 0.711, "step": 41005 }, { "epoch": 1.74, "grad_norm": 4.418193931178021, "learning_rate": 4.502586605770141e-06, "loss": 0.7297, "step": 41010 }, { "epoch": 1.74, "grad_norm": 4.90839659657626, "learning_rate": 4.5013616000378965e-06, "loss": 0.7299, "step": 41015 }, { "epoch": 1.74, "grad_norm": 5.0669579051267934, "learning_rate": 4.50013662453663e-06, "loss": 0.7261, "step": 41020 }, { "epoch": 1.74, "grad_norm": 4.169493863682579, "learning_rate": 4.498911679340608e-06, "loss": 0.6801, "step": 41025 }, { "epoch": 1.74, "grad_norm": 6.307078054410746, "learning_rate": 4.497686764524093e-06, "loss": 0.6934, "step": 41030 }, { "epoch": 1.74, "grad_norm": 7.255976975057522, "learning_rate": 4.496461880161352e-06, "loss": 0.6885, "step": 41035 }, { "epoch": 1.74, "grad_norm": 7.953837583434858, "learning_rate": 4.495237026326642e-06, "loss": 0.6641, "step": 41040 }, { "epoch": 1.74, "grad_norm": 9.832547129702194, "learning_rate": 4.494012203094226e-06, "loss": 0.6928, "step": 41045 }, { "epoch": 1.74, "grad_norm": 5.075160721166977, "learning_rate": 4.492787410538359e-06, "loss": 0.7237, "step": 41050 }, { "epoch": 1.74, "grad_norm": 7.4175850901384255, "learning_rate": 4.491562648733296e-06, "loss": 0.7334, "step": 41055 }, { "epoch": 1.74, "grad_norm": 4.616093129241495, "learning_rate": 4.490337917753294e-06, "loss": 0.7259, "step": 41060 }, { "epoch": 1.74, "grad_norm": 4.749246833602826, "learning_rate": 4.489113217672601e-06, "loss": 0.7311, "step": 41065 }, { "epoch": 1.74, "grad_norm": 4.955244291903903, "learning_rate": 4.48788854856547e-06, "loss": 0.715, "step": 41070 }, { "epoch": 1.74, "grad_norm": 6.239642515167693, "learning_rate": 4.486663910506148e-06, "loss": 0.7216, "step": 41075 }, { "epoch": 1.74, "grad_norm": 5.038770190952199, "learning_rate": 4.4854393035688794e-06, "loss": 0.7101, "step": 41080 }, { "epoch": 1.74, "grad_norm": 4.609423069896483, "learning_rate": 4.484214727827913e-06, "loss": 0.6984, "step": 41085 }, { "epoch": 1.74, "grad_norm": 4.611287757428572, "learning_rate": 4.482990183357485e-06, "loss": 0.6969, "step": 41090 }, { "epoch": 1.74, "grad_norm": 4.242686371183405, "learning_rate": 4.481765670231842e-06, "loss": 0.6848, "step": 41095 }, { "epoch": 1.74, "grad_norm": 11.98686357214418, "learning_rate": 4.4805411885252185e-06, "loss": 0.6808, "step": 41100 }, { "epoch": 1.74, "grad_norm": 8.57281245912513, "learning_rate": 4.479316738311852e-06, "loss": 0.7113, "step": 41105 }, { "epoch": 1.74, "grad_norm": 5.530923429144713, "learning_rate": 4.47809231966598e-06, "loss": 0.72, "step": 41110 }, { "epoch": 1.74, "grad_norm": 4.811481825265242, "learning_rate": 4.4768679326618305e-06, "loss": 0.6888, "step": 41115 }, { "epoch": 1.74, "grad_norm": 4.272928658675119, "learning_rate": 4.47564357737364e-06, "loss": 0.7013, "step": 41120 }, { "epoch": 1.74, "grad_norm": 4.850833580960397, "learning_rate": 4.474419253875634e-06, "loss": 0.6752, "step": 41125 }, { "epoch": 1.74, "grad_norm": 4.4410982327331645, "learning_rate": 4.4731949622420405e-06, "loss": 0.7232, "step": 41130 }, { "epoch": 1.74, "grad_norm": 4.273107944058135, "learning_rate": 4.4719707025470845e-06, "loss": 0.6574, "step": 41135 }, { "epoch": 1.74, "grad_norm": 13.901110685840688, "learning_rate": 4.470746474864991e-06, "loss": 0.701, "step": 41140 }, { "epoch": 1.74, "grad_norm": 6.598665791509871, "learning_rate": 4.469522279269978e-06, "loss": 0.6955, "step": 41145 }, { "epoch": 1.74, "grad_norm": 4.67130034064445, "learning_rate": 4.468298115836266e-06, "loss": 0.7239, "step": 41150 }, { "epoch": 1.74, "grad_norm": 6.023251418608654, "learning_rate": 4.4670739846380755e-06, "loss": 0.6773, "step": 41155 }, { "epoch": 1.74, "grad_norm": 17.671309476789585, "learning_rate": 4.465849885749617e-06, "loss": 0.7023, "step": 41160 }, { "epoch": 1.74, "grad_norm": 19.21573675240208, "learning_rate": 4.464625819245108e-06, "loss": 0.7143, "step": 41165 }, { "epoch": 1.74, "grad_norm": 6.667298144271295, "learning_rate": 4.4634017851987575e-06, "loss": 0.6741, "step": 41170 }, { "epoch": 1.74, "grad_norm": 14.460748625139859, "learning_rate": 4.462177783684776e-06, "loss": 0.6641, "step": 41175 }, { "epoch": 1.74, "grad_norm": 15.645927793533927, "learning_rate": 4.460953814777373e-06, "loss": 0.6891, "step": 41180 }, { "epoch": 1.74, "grad_norm": 11.057490905392672, "learning_rate": 4.45972987855075e-06, "loss": 0.7125, "step": 41185 }, { "epoch": 1.74, "grad_norm": 10.789285281800444, "learning_rate": 4.458505975079115e-06, "loss": 0.7229, "step": 41190 }, { "epoch": 1.74, "grad_norm": 7.853955356774067, "learning_rate": 4.4572821044366675e-06, "loss": 0.7051, "step": 41195 }, { "epoch": 1.74, "grad_norm": 6.254016092310586, "learning_rate": 4.456058266697607e-06, "loss": 0.6897, "step": 41200 }, { "epoch": 1.74, "grad_norm": 12.657675526416494, "learning_rate": 4.454834461936132e-06, "loss": 0.721, "step": 41205 }, { "epoch": 1.74, "grad_norm": 4.324148505672216, "learning_rate": 4.453610690226436e-06, "loss": 0.7158, "step": 41210 }, { "epoch": 1.74, "grad_norm": 18.312927544035162, "learning_rate": 4.4523869516427164e-06, "loss": 0.6942, "step": 41215 }, { "epoch": 1.74, "grad_norm": 5.699636477499295, "learning_rate": 4.451163246259162e-06, "loss": 0.6575, "step": 41220 }, { "epoch": 1.74, "grad_norm": 4.401382496816984, "learning_rate": 4.449939574149963e-06, "loss": 0.7004, "step": 41225 }, { "epoch": 1.74, "grad_norm": 4.776999971028778, "learning_rate": 4.448715935389309e-06, "loss": 0.7341, "step": 41230 }, { "epoch": 1.75, "grad_norm": 10.184943873675213, "learning_rate": 4.447492330051382e-06, "loss": 0.713, "step": 41235 }, { "epoch": 1.75, "grad_norm": 4.407191375103588, "learning_rate": 4.446268758210369e-06, "loss": 0.7119, "step": 41240 }, { "epoch": 1.75, "grad_norm": 6.523253447246215, "learning_rate": 4.44504521994045e-06, "loss": 0.722, "step": 41245 }, { "epoch": 1.75, "grad_norm": 5.1688406822208135, "learning_rate": 4.443821715315803e-06, "loss": 0.6904, "step": 41250 }, { "epoch": 1.75, "grad_norm": 4.715719931227288, "learning_rate": 4.442598244410609e-06, "loss": 0.7005, "step": 41255 }, { "epoch": 1.75, "grad_norm": 9.353349145935558, "learning_rate": 4.44137480729904e-06, "loss": 0.6897, "step": 41260 }, { "epoch": 1.75, "grad_norm": 6.030706416853716, "learning_rate": 4.440151404055272e-06, "loss": 0.7239, "step": 41265 }, { "epoch": 1.75, "grad_norm": 7.321695796013148, "learning_rate": 4.4389280347534754e-06, "loss": 0.6988, "step": 41270 }, { "epoch": 1.75, "grad_norm": 4.3983322149477475, "learning_rate": 4.437704699467819e-06, "loss": 0.6902, "step": 41275 }, { "epoch": 1.75, "grad_norm": 5.225537836446942, "learning_rate": 4.4364813982724705e-06, "loss": 0.6905, "step": 41280 }, { "epoch": 1.75, "grad_norm": 5.188808089580969, "learning_rate": 4.435258131241595e-06, "loss": 0.6843, "step": 41285 }, { "epoch": 1.75, "grad_norm": 5.1482019008955815, "learning_rate": 4.434034898449355e-06, "loss": 0.6927, "step": 41290 }, { "epoch": 1.75, "grad_norm": 4.973390228071283, "learning_rate": 4.432811699969913e-06, "loss": 0.6978, "step": 41295 }, { "epoch": 1.75, "grad_norm": 4.4529929114909805, "learning_rate": 4.431588535877425e-06, "loss": 0.6496, "step": 41300 }, { "epoch": 1.75, "grad_norm": 4.6656455899527085, "learning_rate": 4.430365406246053e-06, "loss": 0.7164, "step": 41305 }, { "epoch": 1.75, "grad_norm": 7.971301505837978, "learning_rate": 4.429142311149947e-06, "loss": 0.708, "step": 41310 }, { "epoch": 1.75, "grad_norm": 5.06910380127235, "learning_rate": 4.42791925066326e-06, "loss": 0.7298, "step": 41315 }, { "epoch": 1.75, "grad_norm": 4.531469998738722, "learning_rate": 4.426696224860144e-06, "loss": 0.6812, "step": 41320 }, { "epoch": 1.75, "grad_norm": 5.459097723260169, "learning_rate": 4.425473233814749e-06, "loss": 0.7048, "step": 41325 }, { "epoch": 1.75, "grad_norm": 7.13636657777727, "learning_rate": 4.424250277601218e-06, "loss": 0.7104, "step": 41330 }, { "epoch": 1.75, "grad_norm": 4.640087489898789, "learning_rate": 4.423027356293696e-06, "loss": 0.7005, "step": 41335 }, { "epoch": 1.75, "grad_norm": 4.317660024328741, "learning_rate": 4.421804469966328e-06, "loss": 0.7149, "step": 41340 }, { "epoch": 1.75, "grad_norm": 7.333737575520007, "learning_rate": 4.4205816186932496e-06, "loss": 0.6837, "step": 41345 }, { "epoch": 1.75, "grad_norm": 6.437089497856815, "learning_rate": 4.419358802548602e-06, "loss": 0.6659, "step": 41350 }, { "epoch": 1.75, "grad_norm": 5.116117130564452, "learning_rate": 4.41813602160652e-06, "loss": 0.6979, "step": 41355 }, { "epoch": 1.75, "grad_norm": 5.101355851296357, "learning_rate": 4.416913275941137e-06, "loss": 0.6496, "step": 41360 }, { "epoch": 1.75, "grad_norm": 5.240924472928226, "learning_rate": 4.4156905656265845e-06, "loss": 0.6745, "step": 41365 }, { "epoch": 1.75, "grad_norm": 5.639549609725373, "learning_rate": 4.414467890736991e-06, "loss": 0.6949, "step": 41370 }, { "epoch": 1.75, "grad_norm": 9.183726622677437, "learning_rate": 4.413245251346485e-06, "loss": 0.7528, "step": 41375 }, { "epoch": 1.75, "grad_norm": 5.183016943714907, "learning_rate": 4.412022647529192e-06, "loss": 0.6911, "step": 41380 }, { "epoch": 1.75, "grad_norm": 5.696687503694703, "learning_rate": 4.410800079359233e-06, "loss": 0.6985, "step": 41385 }, { "epoch": 1.75, "grad_norm": 7.514664715052782, "learning_rate": 4.4095775469107296e-06, "loss": 0.6822, "step": 41390 }, { "epoch": 1.75, "grad_norm": 9.576376008610477, "learning_rate": 4.408355050257801e-06, "loss": 0.7156, "step": 41395 }, { "epoch": 1.75, "grad_norm": 7.615669366833428, "learning_rate": 4.407132589474563e-06, "loss": 0.701, "step": 41400 }, { "epoch": 1.75, "grad_norm": 5.803455207254274, "learning_rate": 4.4059101646351295e-06, "loss": 0.6718, "step": 41405 }, { "epoch": 1.75, "grad_norm": 8.78459997879738, "learning_rate": 4.404687775813612e-06, "loss": 0.662, "step": 41410 }, { "epoch": 1.75, "grad_norm": 8.926111065450465, "learning_rate": 4.403465423084122e-06, "loss": 0.6695, "step": 41415 }, { "epoch": 1.75, "grad_norm": 8.054707762876705, "learning_rate": 4.4022431065207654e-06, "loss": 0.6881, "step": 41420 }, { "epoch": 1.75, "grad_norm": 4.9626230718184905, "learning_rate": 4.40102082619765e-06, "loss": 0.7088, "step": 41425 }, { "epoch": 1.75, "grad_norm": 12.7569359141551, "learning_rate": 4.3997985821888764e-06, "loss": 0.6712, "step": 41430 }, { "epoch": 1.75, "grad_norm": 5.608489758689082, "learning_rate": 4.398576374568547e-06, "loss": 0.7206, "step": 41435 }, { "epoch": 1.75, "grad_norm": 10.74472222385305, "learning_rate": 4.397354203410761e-06, "loss": 0.7072, "step": 41440 }, { "epoch": 1.75, "grad_norm": 6.217847917707725, "learning_rate": 4.396132068789613e-06, "loss": 0.6887, "step": 41445 }, { "epoch": 1.75, "grad_norm": 4.615819081892022, "learning_rate": 4.3949099707792004e-06, "loss": 0.6812, "step": 41450 }, { "epoch": 1.75, "grad_norm": 6.3576411326308016, "learning_rate": 4.393687909453614e-06, "loss": 0.6962, "step": 41455 }, { "epoch": 1.75, "grad_norm": 4.381837709432909, "learning_rate": 4.392465884886942e-06, "loss": 0.6629, "step": 41460 }, { "epoch": 1.75, "grad_norm": 10.752500399141914, "learning_rate": 4.391243897153276e-06, "loss": 0.6688, "step": 41465 }, { "epoch": 1.76, "grad_norm": 6.111570643144335, "learning_rate": 4.3900219463267e-06, "loss": 0.706, "step": 41470 }, { "epoch": 1.76, "grad_norm": 5.146414717517184, "learning_rate": 4.3888000324812955e-06, "loss": 0.6998, "step": 41475 }, { "epoch": 1.76, "grad_norm": 4.181769277666174, "learning_rate": 4.387578155691143e-06, "loss": 0.6951, "step": 41480 }, { "epoch": 1.76, "grad_norm": 4.457317139763903, "learning_rate": 4.386356316030325e-06, "loss": 0.7038, "step": 41485 }, { "epoch": 1.76, "grad_norm": 7.3381424911736355, "learning_rate": 4.385134513572914e-06, "loss": 0.7007, "step": 41490 }, { "epoch": 1.76, "grad_norm": 6.578728868410613, "learning_rate": 4.383912748392988e-06, "loss": 0.6626, "step": 41495 }, { "epoch": 1.76, "grad_norm": 20.444096498890847, "learning_rate": 4.382691020564617e-06, "loss": 0.7567, "step": 41500 }, { "epoch": 1.76, "grad_norm": 10.47502861388027, "learning_rate": 4.38146933016187e-06, "loss": 0.698, "step": 41505 }, { "epoch": 1.76, "grad_norm": 7.741848196621235, "learning_rate": 4.380247677258818e-06, "loss": 0.7276, "step": 41510 }, { "epoch": 1.76, "grad_norm": 5.119542702248495, "learning_rate": 4.379026061929521e-06, "loss": 0.6925, "step": 41515 }, { "epoch": 1.76, "grad_norm": 4.503191406435533, "learning_rate": 4.377804484248046e-06, "loss": 0.7031, "step": 41520 }, { "epoch": 1.76, "grad_norm": 5.46876410824929, "learning_rate": 4.376582944288453e-06, "loss": 0.6366, "step": 41525 }, { "epoch": 1.76, "grad_norm": 5.383836475755269, "learning_rate": 4.375361442124799e-06, "loss": 0.6506, "step": 41530 }, { "epoch": 1.76, "grad_norm": 9.274656290398722, "learning_rate": 4.374139977831141e-06, "loss": 0.7038, "step": 41535 }, { "epoch": 1.76, "grad_norm": 4.134286668807976, "learning_rate": 4.372918551481532e-06, "loss": 0.6657, "step": 41540 }, { "epoch": 1.76, "grad_norm": 4.397411484274887, "learning_rate": 4.371697163150025e-06, "loss": 0.6471, "step": 41545 }, { "epoch": 1.76, "grad_norm": 4.245770500846576, "learning_rate": 4.370475812910669e-06, "loss": 0.6983, "step": 41550 }, { "epoch": 1.76, "grad_norm": 4.304664905376411, "learning_rate": 4.369254500837508e-06, "loss": 0.6831, "step": 41555 }, { "epoch": 1.76, "grad_norm": 5.994402427718099, "learning_rate": 4.36803322700459e-06, "loss": 0.6873, "step": 41560 }, { "epoch": 1.76, "grad_norm": 5.39437785142945, "learning_rate": 4.366811991485955e-06, "loss": 0.7108, "step": 41565 }, { "epoch": 1.76, "grad_norm": 6.329835864751196, "learning_rate": 4.365590794355645e-06, "loss": 0.6978, "step": 41570 }, { "epoch": 1.76, "grad_norm": 5.993750631379819, "learning_rate": 4.364369635687696e-06, "loss": 0.6681, "step": 41575 }, { "epoch": 1.76, "grad_norm": 6.911751070974839, "learning_rate": 4.363148515556143e-06, "loss": 0.6779, "step": 41580 }, { "epoch": 1.76, "grad_norm": 5.444657954570195, "learning_rate": 4.361927434035021e-06, "loss": 0.7388, "step": 41585 }, { "epoch": 1.76, "grad_norm": 10.413420115750064, "learning_rate": 4.360706391198359e-06, "loss": 0.7059, "step": 41590 }, { "epoch": 1.76, "grad_norm": 6.7506309876311, "learning_rate": 4.359485387120184e-06, "loss": 0.7236, "step": 41595 }, { "epoch": 1.76, "grad_norm": 4.33538576420163, "learning_rate": 4.358264421874524e-06, "loss": 0.7279, "step": 41600 }, { "epoch": 1.76, "grad_norm": 4.303221688509321, "learning_rate": 4.357043495535401e-06, "loss": 0.6847, "step": 41605 }, { "epoch": 1.76, "grad_norm": 5.756077045874945, "learning_rate": 4.355822608176839e-06, "loss": 0.6866, "step": 41610 }, { "epoch": 1.76, "grad_norm": 6.42686479470545, "learning_rate": 4.354601759872854e-06, "loss": 0.6803, "step": 41615 }, { "epoch": 1.76, "grad_norm": 5.124553141007188, "learning_rate": 4.3533809506974624e-06, "loss": 0.7105, "step": 41620 }, { "epoch": 1.76, "grad_norm": 5.933124820547007, "learning_rate": 4.3521601807246795e-06, "loss": 0.6977, "step": 41625 }, { "epoch": 1.76, "grad_norm": 5.112338062158038, "learning_rate": 4.350939450028519e-06, "loss": 0.6691, "step": 41630 }, { "epoch": 1.76, "grad_norm": 10.169957823657374, "learning_rate": 4.349718758682984e-06, "loss": 0.7036, "step": 41635 }, { "epoch": 1.76, "grad_norm": 4.339562749315671, "learning_rate": 4.348498106762087e-06, "loss": 0.6688, "step": 41640 }, { "epoch": 1.76, "grad_norm": 10.337215799821285, "learning_rate": 4.34727749433983e-06, "loss": 0.7371, "step": 41645 }, { "epoch": 1.76, "grad_norm": 12.984369647833498, "learning_rate": 4.346056921490216e-06, "loss": 0.693, "step": 41650 }, { "epoch": 1.76, "grad_norm": 6.683125618128475, "learning_rate": 4.3448363882872456e-06, "loss": 0.6708, "step": 41655 }, { "epoch": 1.76, "grad_norm": 11.01406555218198, "learning_rate": 4.343615894804915e-06, "loss": 0.7226, "step": 41660 }, { "epoch": 1.76, "grad_norm": 6.651445346898479, "learning_rate": 4.3423954411172175e-06, "loss": 0.7204, "step": 41665 }, { "epoch": 1.76, "grad_norm": 5.845400441709653, "learning_rate": 4.341175027298148e-06, "loss": 0.6808, "step": 41670 }, { "epoch": 1.76, "grad_norm": 5.576923633832933, "learning_rate": 4.339954653421697e-06, "loss": 0.6844, "step": 41675 }, { "epoch": 1.76, "grad_norm": 5.525879128160491, "learning_rate": 4.338734319561851e-06, "loss": 0.6981, "step": 41680 }, { "epoch": 1.76, "grad_norm": 4.511082053619075, "learning_rate": 4.337514025792595e-06, "loss": 0.7227, "step": 41685 }, { "epoch": 1.76, "grad_norm": 4.235310244332541, "learning_rate": 4.336293772187911e-06, "loss": 0.7018, "step": 41690 }, { "epoch": 1.76, "grad_norm": 6.77417592961108, "learning_rate": 4.3350735588217814e-06, "loss": 0.6807, "step": 41695 }, { "epoch": 1.76, "grad_norm": 9.124046805727305, "learning_rate": 4.333853385768183e-06, "loss": 0.6757, "step": 41700 }, { "epoch": 1.77, "grad_norm": 9.19145591510403, "learning_rate": 4.3326332531010934e-06, "loss": 0.7487, "step": 41705 }, { "epoch": 1.77, "grad_norm": 4.134822808298634, "learning_rate": 4.331413160894483e-06, "loss": 0.6891, "step": 41710 }, { "epoch": 1.77, "grad_norm": 6.000803596675463, "learning_rate": 4.330193109222322e-06, "loss": 0.6827, "step": 41715 }, { "epoch": 1.77, "grad_norm": 26.28811974164926, "learning_rate": 4.3289730981585815e-06, "loss": 0.6878, "step": 41720 }, { "epoch": 1.77, "grad_norm": 13.687295803189956, "learning_rate": 4.327753127777224e-06, "loss": 0.6814, "step": 41725 }, { "epoch": 1.77, "grad_norm": 8.649028554761147, "learning_rate": 4.3265331981522156e-06, "loss": 0.6652, "step": 41730 }, { "epoch": 1.77, "grad_norm": 4.856391345297885, "learning_rate": 4.325313309357515e-06, "loss": 0.677, "step": 41735 }, { "epoch": 1.77, "grad_norm": 5.6117888242585074, "learning_rate": 4.324093461467081e-06, "loss": 0.6944, "step": 41740 }, { "epoch": 1.77, "grad_norm": 4.693178156978733, "learning_rate": 4.322873654554869e-06, "loss": 0.6818, "step": 41745 }, { "epoch": 1.77, "grad_norm": 5.5912449123003976, "learning_rate": 4.321653888694833e-06, "loss": 0.686, "step": 41750 }, { "epoch": 1.77, "grad_norm": 5.522191686228286, "learning_rate": 4.320434163960925e-06, "loss": 0.6938, "step": 41755 }, { "epoch": 1.77, "grad_norm": 5.6925472814253055, "learning_rate": 4.319214480427091e-06, "loss": 0.6799, "step": 41760 }, { "epoch": 1.77, "grad_norm": 4.789851216435146, "learning_rate": 4.317994838167276e-06, "loss": 0.6858, "step": 41765 }, { "epoch": 1.77, "grad_norm": 5.586793691870011, "learning_rate": 4.316775237255428e-06, "loss": 0.6823, "step": 41770 }, { "epoch": 1.77, "grad_norm": 4.723823950520834, "learning_rate": 4.315555677765483e-06, "loss": 0.6999, "step": 41775 }, { "epoch": 1.77, "grad_norm": 6.445372470781785, "learning_rate": 4.314336159771382e-06, "loss": 0.7092, "step": 41780 }, { "epoch": 1.77, "grad_norm": 4.394353726974539, "learning_rate": 4.31311668334706e-06, "loss": 0.6831, "step": 41785 }, { "epoch": 1.77, "grad_norm": 7.018983060982324, "learning_rate": 4.311897248566451e-06, "loss": 0.7202, "step": 41790 }, { "epoch": 1.77, "grad_norm": 5.263328794580536, "learning_rate": 4.310677855503483e-06, "loss": 0.727, "step": 41795 }, { "epoch": 1.77, "grad_norm": 14.392740548779859, "learning_rate": 4.309458504232087e-06, "loss": 0.7236, "step": 41800 }, { "epoch": 1.77, "grad_norm": 5.288968009324223, "learning_rate": 4.3082391948261885e-06, "loss": 0.7049, "step": 41805 }, { "epoch": 1.77, "grad_norm": 7.3329306659887665, "learning_rate": 4.307019927359708e-06, "loss": 0.661, "step": 41810 }, { "epoch": 1.77, "grad_norm": 15.928413824814557, "learning_rate": 4.30580070190657e-06, "loss": 0.7232, "step": 41815 }, { "epoch": 1.77, "grad_norm": 7.83135742146612, "learning_rate": 4.304581518540689e-06, "loss": 0.6546, "step": 41820 }, { "epoch": 1.77, "grad_norm": 5.009168171582179, "learning_rate": 4.303362377335984e-06, "loss": 0.693, "step": 41825 }, { "epoch": 1.77, "grad_norm": 5.073120865542738, "learning_rate": 4.302143278366365e-06, "loss": 0.672, "step": 41830 }, { "epoch": 1.77, "grad_norm": 4.5140358420284485, "learning_rate": 4.300924221705743e-06, "loss": 0.7033, "step": 41835 }, { "epoch": 1.77, "grad_norm": 5.676453752497495, "learning_rate": 4.299705207428027e-06, "loss": 0.6792, "step": 41840 }, { "epoch": 1.77, "grad_norm": 4.462030509091536, "learning_rate": 4.298486235607121e-06, "loss": 0.6854, "step": 41845 }, { "epoch": 1.77, "grad_norm": 4.810799553620261, "learning_rate": 4.2972673063169285e-06, "loss": 0.6486, "step": 41850 }, { "epoch": 1.77, "grad_norm": 4.687485354310291, "learning_rate": 4.296048419631349e-06, "loss": 0.6905, "step": 41855 }, { "epoch": 1.77, "grad_norm": 4.940863919819981, "learning_rate": 4.29482957562428e-06, "loss": 0.6779, "step": 41860 }, { "epoch": 1.77, "grad_norm": 4.7574506375981285, "learning_rate": 4.293610774369617e-06, "loss": 0.6892, "step": 41865 }, { "epoch": 1.77, "grad_norm": 4.859548398044227, "learning_rate": 4.292392015941253e-06, "loss": 0.6961, "step": 41870 }, { "epoch": 1.77, "grad_norm": 5.819842460526834, "learning_rate": 4.291173300413075e-06, "loss": 0.7269, "step": 41875 }, { "epoch": 1.77, "grad_norm": 4.490555457291418, "learning_rate": 4.289954627858973e-06, "loss": 0.6737, "step": 41880 }, { "epoch": 1.77, "grad_norm": 4.863252744797067, "learning_rate": 4.2887359983528295e-06, "loss": 0.6704, "step": 41885 }, { "epoch": 1.77, "grad_norm": 4.432766473447019, "learning_rate": 4.287517411968529e-06, "loss": 0.7065, "step": 41890 }, { "epoch": 1.77, "grad_norm": 5.811268726667623, "learning_rate": 4.2862988687799484e-06, "loss": 0.6977, "step": 41895 }, { "epoch": 1.77, "grad_norm": 6.471372115842373, "learning_rate": 4.285080368860964e-06, "loss": 0.7074, "step": 41900 }, { "epoch": 1.77, "grad_norm": 6.064357412941498, "learning_rate": 4.283861912285452e-06, "loss": 0.6836, "step": 41905 }, { "epoch": 1.77, "grad_norm": 5.737171949375136, "learning_rate": 4.282643499127282e-06, "loss": 0.7065, "step": 41910 }, { "epoch": 1.77, "grad_norm": 5.288568100906577, "learning_rate": 4.2814251294603245e-06, "loss": 0.7151, "step": 41915 }, { "epoch": 1.77, "grad_norm": 4.220457335673189, "learning_rate": 4.280206803358445e-06, "loss": 0.6631, "step": 41920 }, { "epoch": 1.77, "grad_norm": 4.575144700356605, "learning_rate": 4.278988520895505e-06, "loss": 0.6862, "step": 41925 }, { "epoch": 1.77, "grad_norm": 5.005245709547172, "learning_rate": 4.277770282145369e-06, "loss": 0.7374, "step": 41930 }, { "epoch": 1.77, "grad_norm": 4.299086854987855, "learning_rate": 4.276552087181892e-06, "loss": 0.6783, "step": 41935 }, { "epoch": 1.77, "grad_norm": 4.5816918177444865, "learning_rate": 4.275333936078932e-06, "loss": 0.6681, "step": 41940 }, { "epoch": 1.78, "grad_norm": 4.206306678317147, "learning_rate": 4.2741158289103416e-06, "loss": 0.6974, "step": 41945 }, { "epoch": 1.78, "grad_norm": 5.373012306092978, "learning_rate": 4.27289776574997e-06, "loss": 0.7036, "step": 41950 }, { "epoch": 1.78, "grad_norm": 4.269530211442046, "learning_rate": 4.271679746671664e-06, "loss": 0.718, "step": 41955 }, { "epoch": 1.78, "grad_norm": 4.679785523190371, "learning_rate": 4.2704617717492705e-06, "loss": 0.7078, "step": 41960 }, { "epoch": 1.78, "grad_norm": 4.631037808795886, "learning_rate": 4.269243841056631e-06, "loss": 0.677, "step": 41965 }, { "epoch": 1.78, "grad_norm": 5.121768362462444, "learning_rate": 4.268025954667584e-06, "loss": 0.6861, "step": 41970 }, { "epoch": 1.78, "grad_norm": 10.513819952364718, "learning_rate": 4.266808112655968e-06, "loss": 0.7119, "step": 41975 }, { "epoch": 1.78, "grad_norm": 5.281636669906912, "learning_rate": 4.265590315095615e-06, "loss": 0.7243, "step": 41980 }, { "epoch": 1.78, "grad_norm": 4.639357720002547, "learning_rate": 4.264372562060359e-06, "loss": 0.6819, "step": 41985 }, { "epoch": 1.78, "grad_norm": 7.321326010370948, "learning_rate": 4.263154853624028e-06, "loss": 0.6715, "step": 41990 }, { "epoch": 1.78, "grad_norm": 6.537412296080619, "learning_rate": 4.261937189860447e-06, "loss": 0.7035, "step": 41995 }, { "epoch": 1.78, "grad_norm": 3.9775277436946994, "learning_rate": 4.260719570843441e-06, "loss": 0.6385, "step": 42000 }, { "epoch": 1.78, "grad_norm": 4.998980405852894, "learning_rate": 4.25950199664683e-06, "loss": 0.6954, "step": 42005 }, { "epoch": 1.78, "grad_norm": 6.21519075230146, "learning_rate": 4.258284467344431e-06, "loss": 0.669, "step": 42010 }, { "epoch": 1.78, "grad_norm": 4.804256399748408, "learning_rate": 4.257066983010061e-06, "loss": 0.709, "step": 42015 }, { "epoch": 1.78, "grad_norm": 4.702268189519116, "learning_rate": 4.255849543717529e-06, "loss": 0.6648, "step": 42020 }, { "epoch": 1.78, "grad_norm": 7.332881912935103, "learning_rate": 4.254632149540649e-06, "loss": 0.7021, "step": 42025 }, { "epoch": 1.78, "grad_norm": 6.074398822331957, "learning_rate": 4.253414800553225e-06, "loss": 0.6661, "step": 42030 }, { "epoch": 1.78, "grad_norm": 6.531264449761033, "learning_rate": 4.252197496829064e-06, "loss": 0.6817, "step": 42035 }, { "epoch": 1.78, "grad_norm": 4.731946287279799, "learning_rate": 4.250980238441967e-06, "loss": 0.6703, "step": 42040 }, { "epoch": 1.78, "grad_norm": 5.005358749709967, "learning_rate": 4.249763025465729e-06, "loss": 0.6715, "step": 42045 }, { "epoch": 1.78, "grad_norm": 6.309545467515519, "learning_rate": 4.2485458579741504e-06, "loss": 0.6743, "step": 42050 }, { "epoch": 1.78, "grad_norm": 6.5921692257259235, "learning_rate": 4.247328736041022e-06, "loss": 0.6833, "step": 42055 }, { "epoch": 1.78, "grad_norm": 5.207482283506148, "learning_rate": 4.2461116597401365e-06, "loss": 0.7019, "step": 42060 }, { "epoch": 1.78, "grad_norm": 4.81628216055002, "learning_rate": 4.24489462914528e-06, "loss": 0.683, "step": 42065 }, { "epoch": 1.78, "grad_norm": 7.206246918770953, "learning_rate": 4.243677644330237e-06, "loss": 0.7101, "step": 42070 }, { "epoch": 1.78, "grad_norm": 7.875733211150892, "learning_rate": 4.242460705368792e-06, "loss": 0.6966, "step": 42075 }, { "epoch": 1.78, "grad_norm": 5.014781819657245, "learning_rate": 4.241243812334722e-06, "loss": 0.6427, "step": 42080 }, { "epoch": 1.78, "grad_norm": 13.81215859906448, "learning_rate": 4.240026965301806e-06, "loss": 0.7045, "step": 42085 }, { "epoch": 1.78, "grad_norm": 5.53422817954452, "learning_rate": 4.238810164343816e-06, "loss": 0.7183, "step": 42090 }, { "epoch": 1.78, "grad_norm": 19.773675846800426, "learning_rate": 4.237593409534522e-06, "loss": 0.6958, "step": 42095 }, { "epoch": 1.78, "grad_norm": 5.030200045497101, "learning_rate": 4.236376700947696e-06, "loss": 0.6603, "step": 42100 }, { "epoch": 1.78, "grad_norm": 8.65824640530128, "learning_rate": 4.235160038657099e-06, "loss": 0.6604, "step": 42105 }, { "epoch": 1.78, "grad_norm": 4.885639185714243, "learning_rate": 4.233943422736498e-06, "loss": 0.7285, "step": 42110 }, { "epoch": 1.78, "grad_norm": 10.270268632635593, "learning_rate": 4.232726853259651e-06, "loss": 0.683, "step": 42115 }, { "epoch": 1.78, "grad_norm": 9.545263576809146, "learning_rate": 4.231510330300315e-06, "loss": 0.7042, "step": 42120 }, { "epoch": 1.78, "grad_norm": 9.130343490756728, "learning_rate": 4.2302938539322415e-06, "loss": 0.6602, "step": 42125 }, { "epoch": 1.78, "grad_norm": 6.253400470017544, "learning_rate": 4.229077424229187e-06, "loss": 0.7072, "step": 42130 }, { "epoch": 1.78, "grad_norm": 4.746128899357891, "learning_rate": 4.227861041264896e-06, "loss": 0.6675, "step": 42135 }, { "epoch": 1.78, "grad_norm": 7.960438129249485, "learning_rate": 4.2266447051131155e-06, "loss": 0.7239, "step": 42140 }, { "epoch": 1.78, "grad_norm": 7.976974691785344, "learning_rate": 4.225428415847589e-06, "loss": 0.7006, "step": 42145 }, { "epoch": 1.78, "grad_norm": 5.120069175085671, "learning_rate": 4.224212173542056e-06, "loss": 0.7032, "step": 42150 }, { "epoch": 1.78, "grad_norm": 7.72126767478556, "learning_rate": 4.222995978270254e-06, "loss": 0.6733, "step": 42155 }, { "epoch": 1.78, "grad_norm": 7.5826021749362305, "learning_rate": 4.221779830105917e-06, "loss": 0.7106, "step": 42160 }, { "epoch": 1.78, "grad_norm": 4.978620790444742, "learning_rate": 4.220563729122776e-06, "loss": 0.7018, "step": 42165 }, { "epoch": 1.78, "grad_norm": 9.401651812723802, "learning_rate": 4.219347675394561e-06, "loss": 0.7191, "step": 42170 }, { "epoch": 1.78, "grad_norm": 6.14442799549802, "learning_rate": 4.218131668994997e-06, "loss": 0.7023, "step": 42175 }, { "epoch": 1.79, "grad_norm": 4.8815002277518875, "learning_rate": 4.216915709997806e-06, "loss": 0.6612, "step": 42180 }, { "epoch": 1.79, "grad_norm": 9.441470440867683, "learning_rate": 4.21569979847671e-06, "loss": 0.6899, "step": 42185 }, { "epoch": 1.79, "grad_norm": 13.741208315742687, "learning_rate": 4.2144839345054224e-06, "loss": 0.7296, "step": 42190 }, { "epoch": 1.79, "grad_norm": 6.039423664982864, "learning_rate": 4.213268118157663e-06, "loss": 0.706, "step": 42195 }, { "epoch": 1.79, "grad_norm": 11.480485569013796, "learning_rate": 4.2120523495071395e-06, "loss": 0.6494, "step": 42200 }, { "epoch": 1.79, "grad_norm": 4.90351864993707, "learning_rate": 4.21083662862756e-06, "loss": 0.6936, "step": 42205 }, { "epoch": 1.79, "grad_norm": 10.664897595372203, "learning_rate": 4.209620955592632e-06, "loss": 0.6577, "step": 42210 }, { "epoch": 1.79, "grad_norm": 6.755547350467756, "learning_rate": 4.208405330476056e-06, "loss": 0.6862, "step": 42215 }, { "epoch": 1.79, "grad_norm": 10.955391716986584, "learning_rate": 4.207189753351535e-06, "loss": 0.671, "step": 42220 }, { "epoch": 1.79, "grad_norm": 9.943061193500395, "learning_rate": 4.205974224292764e-06, "loss": 0.6708, "step": 42225 }, { "epoch": 1.79, "grad_norm": 8.045553013170885, "learning_rate": 4.2047587433734356e-06, "loss": 0.717, "step": 42230 }, { "epoch": 1.79, "grad_norm": 4.840505266032793, "learning_rate": 4.203543310667243e-06, "loss": 0.6746, "step": 42235 }, { "epoch": 1.79, "grad_norm": 10.795634284051419, "learning_rate": 4.202327926247873e-06, "loss": 0.6936, "step": 42240 }, { "epoch": 1.79, "grad_norm": 11.753644661513787, "learning_rate": 4.201112590189011e-06, "loss": 0.6807, "step": 42245 }, { "epoch": 1.79, "grad_norm": 12.240200134571912, "learning_rate": 4.19989730256434e-06, "loss": 0.6586, "step": 42250 }, { "epoch": 1.79, "grad_norm": 9.076927703189254, "learning_rate": 4.1986820634475375e-06, "loss": 0.7056, "step": 42255 }, { "epoch": 1.79, "grad_norm": 11.94973996266796, "learning_rate": 4.1974668729122834e-06, "loss": 0.7293, "step": 42260 }, { "epoch": 1.79, "grad_norm": 7.360933031483773, "learning_rate": 4.196251731032247e-06, "loss": 0.6841, "step": 42265 }, { "epoch": 1.79, "grad_norm": 9.079093125525384, "learning_rate": 4.195036637881102e-06, "loss": 0.707, "step": 42270 }, { "epoch": 1.79, "grad_norm": 5.494899354348531, "learning_rate": 4.193821593532515e-06, "loss": 0.6813, "step": 42275 }, { "epoch": 1.79, "grad_norm": 5.022060179574725, "learning_rate": 4.1926065980601486e-06, "loss": 0.6812, "step": 42280 }, { "epoch": 1.79, "grad_norm": 4.140898430755351, "learning_rate": 4.191391651537666e-06, "loss": 0.6815, "step": 42285 }, { "epoch": 1.79, "grad_norm": 11.22738000981659, "learning_rate": 4.190176754038726e-06, "loss": 0.6835, "step": 42290 }, { "epoch": 1.79, "grad_norm": 7.892685114888502, "learning_rate": 4.188961905636984e-06, "loss": 0.6789, "step": 42295 }, { "epoch": 1.79, "grad_norm": 4.1555990938047564, "learning_rate": 4.187747106406092e-06, "loss": 0.6931, "step": 42300 }, { "epoch": 1.79, "grad_norm": 4.229148641640653, "learning_rate": 4.186532356419701e-06, "loss": 0.6846, "step": 42305 }, { "epoch": 1.79, "grad_norm": 10.700529805122455, "learning_rate": 4.185317655751457e-06, "loss": 0.6994, "step": 42310 }, { "epoch": 1.79, "grad_norm": 3.990729889829295, "learning_rate": 4.184103004475004e-06, "loss": 0.6611, "step": 42315 }, { "epoch": 1.79, "grad_norm": 5.96171242645658, "learning_rate": 4.182888402663983e-06, "loss": 0.686, "step": 42320 }, { "epoch": 1.79, "grad_norm": 5.477819894215142, "learning_rate": 4.18167385039203e-06, "loss": 0.6837, "step": 42325 }, { "epoch": 1.79, "grad_norm": 4.343856673608614, "learning_rate": 4.180459347732782e-06, "loss": 0.6719, "step": 42330 }, { "epoch": 1.79, "grad_norm": 5.015435266272118, "learning_rate": 4.179244894759868e-06, "loss": 0.678, "step": 42335 }, { "epoch": 1.79, "grad_norm": 5.320618436407309, "learning_rate": 4.178030491546921e-06, "loss": 0.6832, "step": 42340 }, { "epoch": 1.79, "grad_norm": 5.767237463003962, "learning_rate": 4.176816138167563e-06, "loss": 0.6778, "step": 42345 }, { "epoch": 1.79, "grad_norm": 4.161767250032055, "learning_rate": 4.1756018346954166e-06, "loss": 0.671, "step": 42350 }, { "epoch": 1.79, "grad_norm": 4.369859810768393, "learning_rate": 4.174387581204104e-06, "loss": 0.6783, "step": 42355 }, { "epoch": 1.79, "grad_norm": 4.8489460281304435, "learning_rate": 4.17317337776724e-06, "loss": 0.6848, "step": 42360 }, { "epoch": 1.79, "grad_norm": 6.584790571671452, "learning_rate": 4.171959224458439e-06, "loss": 0.6649, "step": 42365 }, { "epoch": 1.79, "grad_norm": 7.258603398563617, "learning_rate": 4.170745121351311e-06, "loss": 0.7257, "step": 42370 }, { "epoch": 1.79, "grad_norm": 4.492669345379982, "learning_rate": 4.1695310685194625e-06, "loss": 0.6794, "step": 42375 }, { "epoch": 1.79, "grad_norm": 7.449003496315726, "learning_rate": 4.168317066036501e-06, "loss": 0.6958, "step": 42380 }, { "epoch": 1.79, "grad_norm": 9.835507540706411, "learning_rate": 4.167103113976024e-06, "loss": 0.6741, "step": 42385 }, { "epoch": 1.79, "grad_norm": 8.32568488237581, "learning_rate": 4.165889212411633e-06, "loss": 0.6725, "step": 42390 }, { "epoch": 1.79, "grad_norm": 5.8609541056138825, "learning_rate": 4.164675361416922e-06, "loss": 0.7071, "step": 42395 }, { "epoch": 1.79, "grad_norm": 4.342518761506975, "learning_rate": 4.163461561065484e-06, "loss": 0.6874, "step": 42400 }, { "epoch": 1.79, "grad_norm": 6.9037508711612485, "learning_rate": 4.1622478114309065e-06, "loss": 0.6895, "step": 42405 }, { "epoch": 1.79, "grad_norm": 7.442259485284025, "learning_rate": 4.161034112586776e-06, "loss": 0.6978, "step": 42410 }, { "epoch": 1.8, "grad_norm": 5.055801620382749, "learning_rate": 4.159820464606677e-06, "loss": 0.6781, "step": 42415 }, { "epoch": 1.8, "grad_norm": 7.3873154365969205, "learning_rate": 4.158606867564189e-06, "loss": 0.6823, "step": 42420 }, { "epoch": 1.8, "grad_norm": 5.683709204844898, "learning_rate": 4.157393321532886e-06, "loss": 0.7027, "step": 42425 }, { "epoch": 1.8, "grad_norm": 4.944155811403085, "learning_rate": 4.156179826586346e-06, "loss": 0.6951, "step": 42430 }, { "epoch": 1.8, "grad_norm": 4.686202837427037, "learning_rate": 4.154966382798138e-06, "loss": 0.6836, "step": 42435 }, { "epoch": 1.8, "grad_norm": 4.135823246484916, "learning_rate": 4.153752990241829e-06, "loss": 0.6663, "step": 42440 }, { "epoch": 1.8, "grad_norm": 4.196341787119352, "learning_rate": 4.152539648990982e-06, "loss": 0.6704, "step": 42445 }, { "epoch": 1.8, "grad_norm": 5.765125241332584, "learning_rate": 4.1513263591191606e-06, "loss": 0.7176, "step": 42450 }, { "epoch": 1.8, "grad_norm": 5.881593694758747, "learning_rate": 4.1501131206999216e-06, "loss": 0.6802, "step": 42455 }, { "epoch": 1.8, "grad_norm": 14.225096969964921, "learning_rate": 4.148899933806821e-06, "loss": 0.6609, "step": 42460 }, { "epoch": 1.8, "grad_norm": 5.350332188253708, "learning_rate": 4.14768679851341e-06, "loss": 0.6266, "step": 42465 }, { "epoch": 1.8, "grad_norm": 8.869290750545613, "learning_rate": 4.146473714893238e-06, "loss": 0.7065, "step": 42470 }, { "epoch": 1.8, "grad_norm": 4.201482551297676, "learning_rate": 4.145260683019851e-06, "loss": 0.6707, "step": 42475 }, { "epoch": 1.8, "grad_norm": 7.641458278395227, "learning_rate": 4.144047702966792e-06, "loss": 0.6744, "step": 42480 }, { "epoch": 1.8, "grad_norm": 4.82207023762934, "learning_rate": 4.142834774807597e-06, "loss": 0.6877, "step": 42485 }, { "epoch": 1.8, "grad_norm": 6.2165422331046845, "learning_rate": 4.141621898615806e-06, "loss": 0.6964, "step": 42490 }, { "epoch": 1.8, "grad_norm": 5.110702536763148, "learning_rate": 4.140409074464951e-06, "loss": 0.709, "step": 42495 }, { "epoch": 1.8, "grad_norm": 7.103655875020122, "learning_rate": 4.139196302428561e-06, "loss": 0.6727, "step": 42500 }, { "epoch": 1.8, "grad_norm": 8.314640417506359, "learning_rate": 4.137983582580165e-06, "loss": 0.6854, "step": 42505 }, { "epoch": 1.8, "grad_norm": 4.636320212020477, "learning_rate": 4.136770914993284e-06, "loss": 0.6591, "step": 42510 }, { "epoch": 1.8, "grad_norm": 6.041677791311336, "learning_rate": 4.13555829974144e-06, "loss": 0.6663, "step": 42515 }, { "epoch": 1.8, "grad_norm": 4.913731597963763, "learning_rate": 4.13434573689815e-06, "loss": 0.6548, "step": 42520 }, { "epoch": 1.8, "grad_norm": 5.256590195802156, "learning_rate": 4.133133226536929e-06, "loss": 0.6547, "step": 42525 }, { "epoch": 1.8, "grad_norm": 7.315666503135458, "learning_rate": 4.131920768731286e-06, "loss": 0.6528, "step": 42530 }, { "epoch": 1.8, "grad_norm": 4.632496180714526, "learning_rate": 4.1307083635547296e-06, "loss": 0.7262, "step": 42535 }, { "epoch": 1.8, "grad_norm": 4.815366664152842, "learning_rate": 4.1294960110807655e-06, "loss": 0.697, "step": 42540 }, { "epoch": 1.8, "grad_norm": 6.5961466485856, "learning_rate": 4.128283711382893e-06, "loss": 0.6659, "step": 42545 }, { "epoch": 1.8, "grad_norm": 4.565851561706001, "learning_rate": 4.127071464534612e-06, "loss": 0.6818, "step": 42550 }, { "epoch": 1.8, "grad_norm": 5.257829391267469, "learning_rate": 4.125859270609418e-06, "loss": 0.6916, "step": 42555 }, { "epoch": 1.8, "grad_norm": 5.78752117274731, "learning_rate": 4.124647129680799e-06, "loss": 0.7118, "step": 42560 }, { "epoch": 1.8, "grad_norm": 4.552219681399091, "learning_rate": 4.123435041822248e-06, "loss": 0.6533, "step": 42565 }, { "epoch": 1.8, "grad_norm": 5.6464112733559775, "learning_rate": 4.1222230071072465e-06, "loss": 0.6971, "step": 42570 }, { "epoch": 1.8, "grad_norm": 7.406294171726127, "learning_rate": 4.121011025609281e-06, "loss": 0.6694, "step": 42575 }, { "epoch": 1.8, "grad_norm": 4.266505689055188, "learning_rate": 4.119799097401827e-06, "loss": 0.6923, "step": 42580 }, { "epoch": 1.8, "grad_norm": 4.269559038937698, "learning_rate": 4.11858722255836e-06, "loss": 0.6775, "step": 42585 }, { "epoch": 1.8, "grad_norm": 4.248212633041329, "learning_rate": 4.117375401152354e-06, "loss": 0.7157, "step": 42590 }, { "epoch": 1.8, "grad_norm": 12.346544454992358, "learning_rate": 4.116163633257277e-06, "loss": 0.6549, "step": 42595 }, { "epoch": 1.8, "grad_norm": 4.463070212937211, "learning_rate": 4.114951918946599e-06, "loss": 0.65, "step": 42600 }, { "epoch": 1.8, "grad_norm": 4.493145910568353, "learning_rate": 4.113740258293776e-06, "loss": 0.7506, "step": 42605 }, { "epoch": 1.8, "grad_norm": 4.272638968208005, "learning_rate": 4.112528651372271e-06, "loss": 0.6304, "step": 42610 }, { "epoch": 1.8, "grad_norm": 7.379020612799964, "learning_rate": 4.111317098255539e-06, "loss": 0.658, "step": 42615 }, { "epoch": 1.8, "grad_norm": 4.84449738803698, "learning_rate": 4.110105599017035e-06, "loss": 0.6819, "step": 42620 }, { "epoch": 1.8, "grad_norm": 9.630285092050714, "learning_rate": 4.108894153730207e-06, "loss": 0.6707, "step": 42625 }, { "epoch": 1.8, "grad_norm": 5.238051576522061, "learning_rate": 4.1076827624685e-06, "loss": 0.678, "step": 42630 }, { "epoch": 1.8, "grad_norm": 4.754876266528547, "learning_rate": 4.106471425305361e-06, "loss": 0.7228, "step": 42635 }, { "epoch": 1.8, "grad_norm": 5.109287206768923, "learning_rate": 4.1052601423142254e-06, "loss": 0.7104, "step": 42640 }, { "epoch": 1.8, "grad_norm": 3.9729781728225624, "learning_rate": 4.104048913568533e-06, "loss": 0.693, "step": 42645 }, { "epoch": 1.81, "grad_norm": 4.789082408588649, "learning_rate": 4.102837739141716e-06, "loss": 0.6758, "step": 42650 }, { "epoch": 1.81, "grad_norm": 9.015963063830426, "learning_rate": 4.101626619107203e-06, "loss": 0.6664, "step": 42655 }, { "epoch": 1.81, "grad_norm": 6.168783415548996, "learning_rate": 4.100415553538423e-06, "loss": 0.7004, "step": 42660 }, { "epoch": 1.81, "grad_norm": 4.744637980304332, "learning_rate": 4.099204542508798e-06, "loss": 0.6847, "step": 42665 }, { "epoch": 1.81, "grad_norm": 4.490416352488437, "learning_rate": 4.097993586091748e-06, "loss": 0.6569, "step": 42670 }, { "epoch": 1.81, "grad_norm": 9.61737599647957, "learning_rate": 4.096782684360691e-06, "loss": 0.69, "step": 42675 }, { "epoch": 1.81, "grad_norm": 6.987011273728578, "learning_rate": 4.095571837389038e-06, "loss": 0.6954, "step": 42680 }, { "epoch": 1.81, "grad_norm": 7.9241795801314865, "learning_rate": 4.0943610452502e-06, "loss": 0.6537, "step": 42685 }, { "epoch": 1.81, "grad_norm": 11.97891480117837, "learning_rate": 4.093150308017585e-06, "loss": 0.6662, "step": 42690 }, { "epoch": 1.81, "grad_norm": 4.49038262641353, "learning_rate": 4.091939625764595e-06, "loss": 0.7031, "step": 42695 }, { "epoch": 1.81, "grad_norm": 6.867259272331799, "learning_rate": 4.090728998564632e-06, "loss": 0.6944, "step": 42700 }, { "epoch": 1.81, "grad_norm": 5.507223375357221, "learning_rate": 4.08951842649109e-06, "loss": 0.6657, "step": 42705 }, { "epoch": 1.81, "grad_norm": 4.1195692872820855, "learning_rate": 4.088307909617365e-06, "loss": 0.6778, "step": 42710 }, { "epoch": 1.81, "grad_norm": 4.739349229460742, "learning_rate": 4.087097448016845e-06, "loss": 0.6637, "step": 42715 }, { "epoch": 1.81, "grad_norm": 4.757396881454455, "learning_rate": 4.085887041762919e-06, "loss": 0.6634, "step": 42720 }, { "epoch": 1.81, "grad_norm": 7.662222596964793, "learning_rate": 4.084676690928969e-06, "loss": 0.7626, "step": 42725 }, { "epoch": 1.81, "grad_norm": 18.697364622642063, "learning_rate": 4.083466395588374e-06, "loss": 0.6745, "step": 42730 }, { "epoch": 1.81, "grad_norm": 25.430694875802114, "learning_rate": 4.082256155814512e-06, "loss": 0.673, "step": 42735 }, { "epoch": 1.81, "grad_norm": 11.785301143460835, "learning_rate": 4.081045971680756e-06, "loss": 0.6699, "step": 42740 }, { "epoch": 1.81, "grad_norm": 12.786413668671589, "learning_rate": 4.079835843260475e-06, "loss": 0.6817, "step": 42745 }, { "epoch": 1.81, "grad_norm": 8.201387530922869, "learning_rate": 4.0786257706270385e-06, "loss": 0.7032, "step": 42750 }, { "epoch": 1.81, "grad_norm": 8.429761470594315, "learning_rate": 4.077415753853806e-06, "loss": 0.6685, "step": 42755 }, { "epoch": 1.81, "grad_norm": 9.882651472248737, "learning_rate": 4.0762057930141406e-06, "loss": 0.683, "step": 42760 }, { "epoch": 1.81, "grad_norm": 4.456997518830199, "learning_rate": 4.074995888181395e-06, "loss": 0.6675, "step": 42765 }, { "epoch": 1.81, "grad_norm": 4.445490356441053, "learning_rate": 4.073786039428925e-06, "loss": 0.6734, "step": 42770 }, { "epoch": 1.81, "grad_norm": 5.587519992125487, "learning_rate": 4.072576246830078e-06, "loss": 0.6984, "step": 42775 }, { "epoch": 1.81, "grad_norm": 8.035548520216146, "learning_rate": 4.071366510458202e-06, "loss": 0.68, "step": 42780 }, { "epoch": 1.81, "grad_norm": 4.628397413006825, "learning_rate": 4.07015683038664e-06, "loss": 0.6383, "step": 42785 }, { "epoch": 1.81, "grad_norm": 3.9451838614915125, "learning_rate": 4.068947206688728e-06, "loss": 0.6726, "step": 42790 }, { "epoch": 1.81, "grad_norm": 8.403847273027461, "learning_rate": 4.067737639437807e-06, "loss": 0.6576, "step": 42795 }, { "epoch": 1.81, "grad_norm": 4.480057100090645, "learning_rate": 4.066528128707204e-06, "loss": 0.7262, "step": 42800 }, { "epoch": 1.81, "grad_norm": 5.599297243797112, "learning_rate": 4.0653186745702535e-06, "loss": 0.7005, "step": 42805 }, { "epoch": 1.81, "grad_norm": 6.7367784025859105, "learning_rate": 4.064109277100278e-06, "loss": 0.6824, "step": 42810 }, { "epoch": 1.81, "grad_norm": 5.735311627778898, "learning_rate": 4.062899936370599e-06, "loss": 0.6903, "step": 42815 }, { "epoch": 1.81, "grad_norm": 6.05287634308941, "learning_rate": 4.061690652454538e-06, "loss": 0.6592, "step": 42820 }, { "epoch": 1.81, "grad_norm": 4.964244157658633, "learning_rate": 4.060481425425408e-06, "loss": 0.6846, "step": 42825 }, { "epoch": 1.81, "grad_norm": 4.380740471536525, "learning_rate": 4.059272255356522e-06, "loss": 0.6917, "step": 42830 }, { "epoch": 1.81, "grad_norm": 4.204287396844997, "learning_rate": 4.058063142321189e-06, "loss": 0.7035, "step": 42835 }, { "epoch": 1.81, "grad_norm": 8.181019888144954, "learning_rate": 4.0568540863927105e-06, "loss": 0.6787, "step": 42840 }, { "epoch": 1.81, "grad_norm": 4.49555499702104, "learning_rate": 4.055645087644393e-06, "loss": 0.6609, "step": 42845 }, { "epoch": 1.81, "grad_norm": 13.346615494352267, "learning_rate": 4.054436146149531e-06, "loss": 0.6923, "step": 42850 }, { "epoch": 1.81, "grad_norm": 5.785637431083596, "learning_rate": 4.053227261981422e-06, "loss": 0.6811, "step": 42855 }, { "epoch": 1.81, "grad_norm": 4.678085778572287, "learning_rate": 4.0520184352133544e-06, "loss": 0.6507, "step": 42860 }, { "epoch": 1.81, "grad_norm": 6.5644814219539, "learning_rate": 4.050809665918615e-06, "loss": 0.7023, "step": 42865 }, { "epoch": 1.81, "grad_norm": 5.75551067191254, "learning_rate": 4.049600954170492e-06, "loss": 0.7079, "step": 42870 }, { "epoch": 1.81, "grad_norm": 4.476576985681184, "learning_rate": 4.0483923000422615e-06, "loss": 0.68, "step": 42875 }, { "epoch": 1.81, "grad_norm": 5.113299853337574, "learning_rate": 4.047183703607204e-06, "loss": 0.7218, "step": 42880 }, { "epoch": 1.81, "grad_norm": 6.686470891498753, "learning_rate": 4.045975164938592e-06, "loss": 0.6609, "step": 42885 }, { "epoch": 1.82, "grad_norm": 4.31719190751155, "learning_rate": 4.0447666841096955e-06, "loss": 0.6567, "step": 42890 }, { "epoch": 1.82, "grad_norm": 4.446139312597013, "learning_rate": 4.0435582611937815e-06, "loss": 0.7131, "step": 42895 }, { "epoch": 1.82, "grad_norm": 10.955837551796545, "learning_rate": 4.042349896264112e-06, "loss": 0.6462, "step": 42900 }, { "epoch": 1.82, "grad_norm": 7.24887101546627, "learning_rate": 4.041141589393948e-06, "loss": 0.6875, "step": 42905 }, { "epoch": 1.82, "grad_norm": 11.72253193493209, "learning_rate": 4.039933340656546e-06, "loss": 0.6963, "step": 42910 }, { "epoch": 1.82, "grad_norm": 4.424768905837444, "learning_rate": 4.038725150125156e-06, "loss": 0.6916, "step": 42915 }, { "epoch": 1.82, "grad_norm": 7.381023314025799, "learning_rate": 4.0375170178730285e-06, "loss": 0.6692, "step": 42920 }, { "epoch": 1.82, "grad_norm": 5.36596692451909, "learning_rate": 4.036308943973412e-06, "loss": 0.7168, "step": 42925 }, { "epoch": 1.82, "grad_norm": 4.773857226520101, "learning_rate": 4.035100928499543e-06, "loss": 0.6344, "step": 42930 }, { "epoch": 1.82, "grad_norm": 4.048114274200787, "learning_rate": 4.033892971524663e-06, "loss": 0.6491, "step": 42935 }, { "epoch": 1.82, "grad_norm": 4.9224788482671915, "learning_rate": 4.0326850731220065e-06, "loss": 0.63, "step": 42940 }, { "epoch": 1.82, "grad_norm": 4.959831529994112, "learning_rate": 4.031477233364804e-06, "loss": 0.69, "step": 42945 }, { "epoch": 1.82, "grad_norm": 5.139299680929505, "learning_rate": 4.030269452326284e-06, "loss": 0.6691, "step": 42950 }, { "epoch": 1.82, "grad_norm": 4.4284829007798585, "learning_rate": 4.029061730079672e-06, "loss": 0.682, "step": 42955 }, { "epoch": 1.82, "grad_norm": 4.517727119276334, "learning_rate": 4.027854066698186e-06, "loss": 0.6744, "step": 42960 }, { "epoch": 1.82, "grad_norm": 6.920386661228197, "learning_rate": 4.026646462255045e-06, "loss": 0.6657, "step": 42965 }, { "epoch": 1.82, "grad_norm": 4.200026848460221, "learning_rate": 4.025438916823461e-06, "loss": 0.6633, "step": 42970 }, { "epoch": 1.82, "grad_norm": 4.4373524987347, "learning_rate": 4.024231430476645e-06, "loss": 0.6675, "step": 42975 }, { "epoch": 1.82, "grad_norm": 5.223707927261378, "learning_rate": 4.023024003287804e-06, "loss": 0.7131, "step": 42980 }, { "epoch": 1.82, "grad_norm": 4.8532090677127595, "learning_rate": 4.021816635330139e-06, "loss": 0.6608, "step": 42985 }, { "epoch": 1.82, "grad_norm": 6.928582682343386, "learning_rate": 4.020609326676852e-06, "loss": 0.6995, "step": 42990 }, { "epoch": 1.82, "grad_norm": 4.911822876874502, "learning_rate": 4.0194020774011334e-06, "loss": 0.6669, "step": 42995 }, { "epoch": 1.82, "grad_norm": 6.197969201106623, "learning_rate": 4.018194887576181e-06, "loss": 0.6882, "step": 43000 }, { "epoch": 1.82, "grad_norm": 9.395820069797729, "learning_rate": 4.01698775727518e-06, "loss": 0.7191, "step": 43005 }, { "epoch": 1.82, "grad_norm": 6.805631581243783, "learning_rate": 4.0157806865713154e-06, "loss": 0.6535, "step": 43010 }, { "epoch": 1.82, "grad_norm": 4.077682574791148, "learning_rate": 4.014573675537769e-06, "loss": 0.7014, "step": 43015 }, { "epoch": 1.82, "grad_norm": 5.724832635019642, "learning_rate": 4.013366724247717e-06, "loss": 0.7108, "step": 43020 }, { "epoch": 1.82, "grad_norm": 4.82672469863374, "learning_rate": 4.012159832774336e-06, "loss": 0.6708, "step": 43025 }, { "epoch": 1.82, "grad_norm": 5.936812031625569, "learning_rate": 4.010953001190794e-06, "loss": 0.7028, "step": 43030 }, { "epoch": 1.82, "grad_norm": 4.296884618850802, "learning_rate": 4.009746229570256e-06, "loss": 0.6843, "step": 43035 }, { "epoch": 1.82, "grad_norm": 5.000845693940138, "learning_rate": 4.0085395179858895e-06, "loss": 0.6815, "step": 43040 }, { "epoch": 1.82, "grad_norm": 4.2912817379875206, "learning_rate": 4.007332866510852e-06, "loss": 0.6436, "step": 43045 }, { "epoch": 1.82, "grad_norm": 5.217812078661837, "learning_rate": 4.0061262752182975e-06, "loss": 0.6549, "step": 43050 }, { "epoch": 1.82, "grad_norm": 4.4760541301615415, "learning_rate": 4.004919744181379e-06, "loss": 0.6672, "step": 43055 }, { "epoch": 1.82, "grad_norm": 5.287533142374523, "learning_rate": 4.003713273473246e-06, "loss": 0.6656, "step": 43060 }, { "epoch": 1.82, "grad_norm": 4.592831254266362, "learning_rate": 4.002506863167044e-06, "loss": 0.6804, "step": 43065 }, { "epoch": 1.82, "grad_norm": 6.456323013845729, "learning_rate": 4.001300513335911e-06, "loss": 0.6713, "step": 43070 }, { "epoch": 1.82, "grad_norm": 5.649686742004004, "learning_rate": 4.0000942240529856e-06, "loss": 0.6639, "step": 43075 }, { "epoch": 1.82, "grad_norm": 8.147971227156999, "learning_rate": 3.9988879953914035e-06, "loss": 0.6982, "step": 43080 }, { "epoch": 1.82, "grad_norm": 9.02233394372782, "learning_rate": 3.997681827424294e-06, "loss": 0.6887, "step": 43085 }, { "epoch": 1.82, "grad_norm": 6.04463373169278, "learning_rate": 3.996475720224782e-06, "loss": 0.7025, "step": 43090 }, { "epoch": 1.82, "grad_norm": 5.210965913924422, "learning_rate": 3.9952696738659905e-06, "loss": 0.6874, "step": 43095 }, { "epoch": 1.82, "grad_norm": 5.689904098285844, "learning_rate": 3.99406368842104e-06, "loss": 0.6906, "step": 43100 }, { "epoch": 1.82, "grad_norm": 4.590081615381318, "learning_rate": 3.992857763963043e-06, "loss": 0.6603, "step": 43105 }, { "epoch": 1.82, "grad_norm": 7.009598935772262, "learning_rate": 3.991651900565116e-06, "loss": 0.6476, "step": 43110 }, { "epoch": 1.82, "grad_norm": 4.3650277809949936, "learning_rate": 3.990446098300362e-06, "loss": 0.6724, "step": 43115 }, { "epoch": 1.82, "grad_norm": 7.460492272736713, "learning_rate": 3.989240357241888e-06, "loss": 0.6995, "step": 43120 }, { "epoch": 1.83, "grad_norm": 5.5243059050061225, "learning_rate": 3.9880346774627935e-06, "loss": 0.6766, "step": 43125 }, { "epoch": 1.83, "grad_norm": 5.386004474443567, "learning_rate": 3.986829059036176e-06, "loss": 0.7087, "step": 43130 }, { "epoch": 1.83, "grad_norm": 6.777158511017598, "learning_rate": 3.985623502035128e-06, "loss": 0.6918, "step": 43135 }, { "epoch": 1.83, "grad_norm": 4.226336800798119, "learning_rate": 3.9844180065327395e-06, "loss": 0.6321, "step": 43140 }, { "epoch": 1.83, "grad_norm": 6.376727518405943, "learning_rate": 3.983212572602095e-06, "loss": 0.6707, "step": 43145 }, { "epoch": 1.83, "grad_norm": 4.511898541315905, "learning_rate": 3.982007200316279e-06, "loss": 0.7057, "step": 43150 }, { "epoch": 1.83, "grad_norm": 8.648768887154723, "learning_rate": 3.980801889748366e-06, "loss": 0.6536, "step": 43155 }, { "epoch": 1.83, "grad_norm": 5.634724018784112, "learning_rate": 3.979596640971433e-06, "loss": 0.6667, "step": 43160 }, { "epoch": 1.83, "grad_norm": 5.420363142647904, "learning_rate": 3.97839145405855e-06, "loss": 0.6986, "step": 43165 }, { "epoch": 1.83, "grad_norm": 4.2415089473267376, "learning_rate": 3.9771863290827836e-06, "loss": 0.6753, "step": 43170 }, { "epoch": 1.83, "grad_norm": 4.747292937693537, "learning_rate": 3.975981266117197e-06, "loss": 0.6609, "step": 43175 }, { "epoch": 1.83, "grad_norm": 6.70045843305413, "learning_rate": 3.9747762652348494e-06, "loss": 0.6858, "step": 43180 }, { "epoch": 1.83, "grad_norm": 4.332969479247198, "learning_rate": 3.973571326508799e-06, "loss": 0.644, "step": 43185 }, { "epoch": 1.83, "grad_norm": 10.005201811459234, "learning_rate": 3.972366450012095e-06, "loss": 0.6785, "step": 43190 }, { "epoch": 1.83, "grad_norm": 4.62608518933242, "learning_rate": 3.971161635817785e-06, "loss": 0.6739, "step": 43195 }, { "epoch": 1.83, "grad_norm": 4.28027197222515, "learning_rate": 3.969956883998916e-06, "loss": 0.6836, "step": 43200 }, { "epoch": 1.83, "grad_norm": 5.090102103521136, "learning_rate": 3.968752194628525e-06, "loss": 0.6756, "step": 43205 }, { "epoch": 1.83, "grad_norm": 5.655699841077139, "learning_rate": 3.967547567779652e-06, "loss": 0.6905, "step": 43210 }, { "epoch": 1.83, "grad_norm": 6.669396335251718, "learning_rate": 3.966343003525329e-06, "loss": 0.663, "step": 43215 }, { "epoch": 1.83, "grad_norm": 4.479655237704938, "learning_rate": 3.965138501938584e-06, "loss": 0.6641, "step": 43220 }, { "epoch": 1.83, "grad_norm": 4.332540527831468, "learning_rate": 3.963934063092445e-06, "loss": 0.6408, "step": 43225 }, { "epoch": 1.83, "grad_norm": 6.188852296245308, "learning_rate": 3.962729687059929e-06, "loss": 0.6873, "step": 43230 }, { "epoch": 1.83, "grad_norm": 4.517221854255131, "learning_rate": 3.96152537391406e-06, "loss": 0.6708, "step": 43235 }, { "epoch": 1.83, "grad_norm": 4.433775859643354, "learning_rate": 3.9603211237278475e-06, "loss": 0.6552, "step": 43240 }, { "epoch": 1.83, "grad_norm": 7.860964607100632, "learning_rate": 3.959116936574304e-06, "loss": 0.719, "step": 43245 }, { "epoch": 1.83, "grad_norm": 6.523789542782692, "learning_rate": 3.957912812526433e-06, "loss": 0.6333, "step": 43250 }, { "epoch": 1.83, "grad_norm": 5.517129032993037, "learning_rate": 3.956708751657239e-06, "loss": 0.6723, "step": 43255 }, { "epoch": 1.83, "grad_norm": 19.522112154133893, "learning_rate": 3.9555047540397205e-06, "loss": 0.6627, "step": 43260 }, { "epoch": 1.83, "grad_norm": 6.1641965941204075, "learning_rate": 3.954300819746871e-06, "loss": 0.6737, "step": 43265 }, { "epoch": 1.83, "grad_norm": 4.60216412896515, "learning_rate": 3.953096948851684e-06, "loss": 0.6794, "step": 43270 }, { "epoch": 1.83, "grad_norm": 9.899998609851128, "learning_rate": 3.951893141427143e-06, "loss": 0.7121, "step": 43275 }, { "epoch": 1.83, "grad_norm": 4.081734460823887, "learning_rate": 3.950689397546236e-06, "loss": 0.6708, "step": 43280 }, { "epoch": 1.83, "grad_norm": 4.768901156262409, "learning_rate": 3.94948571728194e-06, "loss": 0.6351, "step": 43285 }, { "epoch": 1.83, "grad_norm": 4.768818452468032, "learning_rate": 3.948282100707228e-06, "loss": 0.6552, "step": 43290 }, { "epoch": 1.83, "grad_norm": 7.916892897619035, "learning_rate": 3.947078547895077e-06, "loss": 0.6628, "step": 43295 }, { "epoch": 1.83, "grad_norm": 4.086628922300244, "learning_rate": 3.94587505891845e-06, "loss": 0.6966, "step": 43300 }, { "epoch": 1.83, "grad_norm": 7.269542017158951, "learning_rate": 3.944671633850315e-06, "loss": 0.6754, "step": 43305 }, { "epoch": 1.83, "grad_norm": 4.1233819927685715, "learning_rate": 3.9434682727636295e-06, "loss": 0.6616, "step": 43310 }, { "epoch": 1.83, "grad_norm": 5.184346981362648, "learning_rate": 3.9422649757313495e-06, "loss": 0.6528, "step": 43315 }, { "epoch": 1.83, "grad_norm": 5.262895066599015, "learning_rate": 3.9410617428264295e-06, "loss": 0.635, "step": 43320 }, { "epoch": 1.83, "grad_norm": 5.071039072377434, "learning_rate": 3.939858574121818e-06, "loss": 0.6945, "step": 43325 }, { "epoch": 1.83, "grad_norm": 4.347479955240949, "learning_rate": 3.938655469690456e-06, "loss": 0.7107, "step": 43330 }, { "epoch": 1.83, "grad_norm": 4.6231770735983595, "learning_rate": 3.937452429605288e-06, "loss": 0.633, "step": 43335 }, { "epoch": 1.83, "grad_norm": 4.2725807508336, "learning_rate": 3.936249453939248e-06, "loss": 0.6712, "step": 43340 }, { "epoch": 1.83, "grad_norm": 8.144957472916598, "learning_rate": 3.9350465427652715e-06, "loss": 0.6687, "step": 43345 }, { "epoch": 1.83, "grad_norm": 6.118678199907555, "learning_rate": 3.933843696156287e-06, "loss": 0.6556, "step": 43350 }, { "epoch": 1.83, "grad_norm": 5.85161572709164, "learning_rate": 3.932640914185217e-06, "loss": 0.6659, "step": 43355 }, { "epoch": 1.84, "grad_norm": 6.457813280540542, "learning_rate": 3.931438196924986e-06, "loss": 0.6673, "step": 43360 }, { "epoch": 1.84, "grad_norm": 5.209759208412717, "learning_rate": 3.930235544448508e-06, "loss": 0.7048, "step": 43365 }, { "epoch": 1.84, "grad_norm": 4.454293060768661, "learning_rate": 3.929032956828698e-06, "loss": 0.6994, "step": 43370 }, { "epoch": 1.84, "grad_norm": 4.216516722951402, "learning_rate": 3.927830434138466e-06, "loss": 0.6769, "step": 43375 }, { "epoch": 1.84, "grad_norm": 7.560956924148917, "learning_rate": 3.926627976450715e-06, "loss": 0.6781, "step": 43380 }, { "epoch": 1.84, "grad_norm": 5.077301188210084, "learning_rate": 3.925425583838351e-06, "loss": 0.666, "step": 43385 }, { "epoch": 1.84, "grad_norm": 4.340286980880687, "learning_rate": 3.924223256374266e-06, "loss": 0.6578, "step": 43390 }, { "epoch": 1.84, "grad_norm": 4.683872627804867, "learning_rate": 3.923020994131357e-06, "loss": 0.6669, "step": 43395 }, { "epoch": 1.84, "grad_norm": 7.835574324865804, "learning_rate": 3.9218187971825125e-06, "loss": 0.6834, "step": 43400 }, { "epoch": 1.84, "grad_norm": 6.6837097572560955, "learning_rate": 3.920616665600618e-06, "loss": 0.6454, "step": 43405 }, { "epoch": 1.84, "grad_norm": 6.2785676195646305, "learning_rate": 3.9194145994585585e-06, "loss": 0.6662, "step": 43410 }, { "epoch": 1.84, "grad_norm": 5.007243222649783, "learning_rate": 3.918212598829207e-06, "loss": 0.6721, "step": 43415 }, { "epoch": 1.84, "grad_norm": 6.7932671210252, "learning_rate": 3.9170106637854394e-06, "loss": 0.6537, "step": 43420 }, { "epoch": 1.84, "grad_norm": 4.8618005123900705, "learning_rate": 3.915808794400124e-06, "loss": 0.6424, "step": 43425 }, { "epoch": 1.84, "grad_norm": 4.871929239660475, "learning_rate": 3.914606990746129e-06, "loss": 0.716, "step": 43430 }, { "epoch": 1.84, "grad_norm": 4.5344097898610505, "learning_rate": 3.913405252896314e-06, "loss": 0.6954, "step": 43435 }, { "epoch": 1.84, "grad_norm": 6.75999473005663, "learning_rate": 3.912203580923539e-06, "loss": 0.6686, "step": 43440 }, { "epoch": 1.84, "grad_norm": 6.1841573787862325, "learning_rate": 3.911001974900656e-06, "loss": 0.6782, "step": 43445 }, { "epoch": 1.84, "grad_norm": 6.971867231279984, "learning_rate": 3.9098004349005145e-06, "loss": 0.6682, "step": 43450 }, { "epoch": 1.84, "grad_norm": 6.428575247710614, "learning_rate": 3.908598960995963e-06, "loss": 0.656, "step": 43455 }, { "epoch": 1.84, "grad_norm": 4.208412098841864, "learning_rate": 3.907397553259841e-06, "loss": 0.6449, "step": 43460 }, { "epoch": 1.84, "grad_norm": 5.450333295555813, "learning_rate": 3.906196211764987e-06, "loss": 0.6913, "step": 43465 }, { "epoch": 1.84, "grad_norm": 5.903947897866068, "learning_rate": 3.904994936584235e-06, "loss": 0.6692, "step": 43470 }, { "epoch": 1.84, "grad_norm": 5.954170114003696, "learning_rate": 3.903793727790414e-06, "loss": 0.702, "step": 43475 }, { "epoch": 1.84, "grad_norm": 7.900158794673702, "learning_rate": 3.902592585456351e-06, "loss": 0.6546, "step": 43480 }, { "epoch": 1.84, "grad_norm": 5.49567421163392, "learning_rate": 3.901391509654866e-06, "loss": 0.7139, "step": 43485 }, { "epoch": 1.84, "grad_norm": 5.584744835739641, "learning_rate": 3.9001905004587795e-06, "loss": 0.68, "step": 43490 }, { "epoch": 1.84, "grad_norm": 6.144948128211909, "learning_rate": 3.898989557940902e-06, "loss": 0.6618, "step": 43495 }, { "epoch": 1.84, "grad_norm": 4.643356151682625, "learning_rate": 3.897788682174044e-06, "loss": 0.6659, "step": 43500 }, { "epoch": 1.84, "grad_norm": 4.580475619039265, "learning_rate": 3.8965878732310135e-06, "loss": 0.6576, "step": 43505 }, { "epoch": 1.84, "grad_norm": 4.523542944231962, "learning_rate": 3.8953871311846075e-06, "loss": 0.6855, "step": 43510 }, { "epoch": 1.84, "grad_norm": 5.120210122480535, "learning_rate": 3.894186456107628e-06, "loss": 0.681, "step": 43515 }, { "epoch": 1.84, "grad_norm": 6.773309070503352, "learning_rate": 3.892985848072866e-06, "loss": 0.649, "step": 43520 }, { "epoch": 1.84, "grad_norm": 7.667611663644984, "learning_rate": 3.8917853071531104e-06, "loss": 0.7167, "step": 43525 }, { "epoch": 1.84, "grad_norm": 4.517046760241202, "learning_rate": 3.890584833421148e-06, "loss": 0.6888, "step": 43530 }, { "epoch": 1.84, "grad_norm": 4.514075944504898, "learning_rate": 3.889384426949758e-06, "loss": 0.6695, "step": 43535 }, { "epoch": 1.84, "grad_norm": 5.237499043077662, "learning_rate": 3.888184087811721e-06, "loss": 0.6532, "step": 43540 }, { "epoch": 1.84, "grad_norm": 5.692564774375484, "learning_rate": 3.8869838160798066e-06, "loss": 0.666, "step": 43545 }, { "epoch": 1.84, "grad_norm": 4.875781791813479, "learning_rate": 3.885783611826785e-06, "loss": 0.664, "step": 43550 }, { "epoch": 1.84, "grad_norm": 4.474151908840804, "learning_rate": 3.8845834751254205e-06, "loss": 0.6678, "step": 43555 }, { "epoch": 1.84, "grad_norm": 9.552780103830502, "learning_rate": 3.883383406048474e-06, "loss": 0.673, "step": 43560 }, { "epoch": 1.84, "grad_norm": 9.408244640195775, "learning_rate": 3.882183404668703e-06, "loss": 0.6908, "step": 43565 }, { "epoch": 1.84, "grad_norm": 4.447227750290457, "learning_rate": 3.8809834710588625e-06, "loss": 0.7058, "step": 43570 }, { "epoch": 1.84, "grad_norm": 7.5794096861507745, "learning_rate": 3.8797836052916955e-06, "loss": 0.6479, "step": 43575 }, { "epoch": 1.84, "grad_norm": 9.686855068802615, "learning_rate": 3.878583807439947e-06, "loss": 0.675, "step": 43580 }, { "epoch": 1.84, "grad_norm": 4.766168367206868, "learning_rate": 3.877384077576362e-06, "loss": 0.657, "step": 43585 }, { "epoch": 1.84, "grad_norm": 7.721907734351429, "learning_rate": 3.8761844157736724e-06, "loss": 0.7091, "step": 43590 }, { "epoch": 1.85, "grad_norm": 6.027681405503157, "learning_rate": 3.874984822104611e-06, "loss": 0.6949, "step": 43595 }, { "epoch": 1.85, "grad_norm": 5.755042645828961, "learning_rate": 3.873785296641906e-06, "loss": 0.6486, "step": 43600 }, { "epoch": 1.85, "grad_norm": 8.488500755511872, "learning_rate": 3.87258583945828e-06, "loss": 0.6395, "step": 43605 }, { "epoch": 1.85, "grad_norm": 4.276374043897645, "learning_rate": 3.871386450626455e-06, "loss": 0.6966, "step": 43610 }, { "epoch": 1.85, "grad_norm": 4.534321025774247, "learning_rate": 3.870187130219146e-06, "loss": 0.7133, "step": 43615 }, { "epoch": 1.85, "grad_norm": 4.352187711296351, "learning_rate": 3.868987878309062e-06, "loss": 0.6492, "step": 43620 }, { "epoch": 1.85, "grad_norm": 5.639115451362402, "learning_rate": 3.867788694968911e-06, "loss": 0.6417, "step": 43625 }, { "epoch": 1.85, "grad_norm": 4.31656874232237, "learning_rate": 3.866589580271398e-06, "loss": 0.6604, "step": 43630 }, { "epoch": 1.85, "grad_norm": 9.536399098647603, "learning_rate": 3.865390534289219e-06, "loss": 0.6356, "step": 43635 }, { "epoch": 1.85, "grad_norm": 6.448698959558375, "learning_rate": 3.864191557095071e-06, "loss": 0.6585, "step": 43640 }, { "epoch": 1.85, "grad_norm": 5.062678890480077, "learning_rate": 3.862992648761641e-06, "loss": 0.6616, "step": 43645 }, { "epoch": 1.85, "grad_norm": 6.032453399006003, "learning_rate": 3.8617938093616194e-06, "loss": 0.6656, "step": 43650 }, { "epoch": 1.85, "grad_norm": 4.446687201332551, "learning_rate": 3.860595038967686e-06, "loss": 0.67, "step": 43655 }, { "epoch": 1.85, "grad_norm": 4.647943885302295, "learning_rate": 3.859396337652518e-06, "loss": 0.7045, "step": 43660 }, { "epoch": 1.85, "grad_norm": 4.782269670955087, "learning_rate": 3.8581977054887924e-06, "loss": 0.6533, "step": 43665 }, { "epoch": 1.85, "grad_norm": 6.865917464806391, "learning_rate": 3.856999142549175e-06, "loss": 0.6553, "step": 43670 }, { "epoch": 1.85, "grad_norm": 7.109111346277947, "learning_rate": 3.855800648906334e-06, "loss": 0.6462, "step": 43675 }, { "epoch": 1.85, "grad_norm": 5.570900878539706, "learning_rate": 3.854602224632928e-06, "loss": 0.6016, "step": 43680 }, { "epoch": 1.85, "grad_norm": 4.764911045235446, "learning_rate": 3.853403869801616e-06, "loss": 0.6391, "step": 43685 }, { "epoch": 1.85, "grad_norm": 7.394139698808642, "learning_rate": 3.85220558448505e-06, "loss": 0.6575, "step": 43690 }, { "epoch": 1.85, "grad_norm": 6.770516318324496, "learning_rate": 3.851007368755878e-06, "loss": 0.6773, "step": 43695 }, { "epoch": 1.85, "grad_norm": 7.790336293660406, "learning_rate": 3.849809222686745e-06, "loss": 0.6344, "step": 43700 }, { "epoch": 1.85, "grad_norm": 8.741139450375439, "learning_rate": 3.848611146350292e-06, "loss": 0.6501, "step": 43705 }, { "epoch": 1.85, "grad_norm": 4.439603363694406, "learning_rate": 3.847413139819151e-06, "loss": 0.6503, "step": 43710 }, { "epoch": 1.85, "grad_norm": 4.349836624360438, "learning_rate": 3.846215203165959e-06, "loss": 0.6303, "step": 43715 }, { "epoch": 1.85, "grad_norm": 4.213788028895998, "learning_rate": 3.845017336463339e-06, "loss": 0.6586, "step": 43720 }, { "epoch": 1.85, "grad_norm": 4.9169042540232635, "learning_rate": 3.8438195397839165e-06, "loss": 0.6541, "step": 43725 }, { "epoch": 1.85, "grad_norm": 7.043401929949646, "learning_rate": 3.842621813200309e-06, "loss": 0.698, "step": 43730 }, { "epoch": 1.85, "grad_norm": 4.429431065122614, "learning_rate": 3.8414241567851345e-06, "loss": 0.7068, "step": 43735 }, { "epoch": 1.85, "grad_norm": 4.470004504089073, "learning_rate": 3.840226570610998e-06, "loss": 0.6766, "step": 43740 }, { "epoch": 1.85, "grad_norm": 4.391979936181031, "learning_rate": 3.839029054750509e-06, "loss": 0.6891, "step": 43745 }, { "epoch": 1.85, "grad_norm": 4.5788332917059815, "learning_rate": 3.837831609276269e-06, "loss": 0.6633, "step": 43750 }, { "epoch": 1.85, "grad_norm": 5.850638994402555, "learning_rate": 3.8366342342608736e-06, "loss": 0.6462, "step": 43755 }, { "epoch": 1.85, "grad_norm": 10.51537997514676, "learning_rate": 3.835436929776919e-06, "loss": 0.7082, "step": 43760 }, { "epoch": 1.85, "grad_norm": 4.498857852806296, "learning_rate": 3.834239695896993e-06, "loss": 0.6733, "step": 43765 }, { "epoch": 1.85, "grad_norm": 4.8515937201451464, "learning_rate": 3.83304253269368e-06, "loss": 0.6736, "step": 43770 }, { "epoch": 1.85, "grad_norm": 5.2572020506823565, "learning_rate": 3.831845440239561e-06, "loss": 0.6869, "step": 43775 }, { "epoch": 1.85, "grad_norm": 5.020769759559667, "learning_rate": 3.830648418607212e-06, "loss": 0.6398, "step": 43780 }, { "epoch": 1.85, "grad_norm": 4.519128285080232, "learning_rate": 3.8294514678692055e-06, "loss": 0.6847, "step": 43785 }, { "epoch": 1.85, "grad_norm": 8.44723560874819, "learning_rate": 3.828254588098107e-06, "loss": 0.669, "step": 43790 }, { "epoch": 1.85, "grad_norm": 7.467611578216941, "learning_rate": 3.827057779366483e-06, "loss": 0.6415, "step": 43795 }, { "epoch": 1.85, "grad_norm": 5.36134911507406, "learning_rate": 3.825861041746891e-06, "loss": 0.67, "step": 43800 }, { "epoch": 1.85, "grad_norm": 8.740652603841294, "learning_rate": 3.824664375311884e-06, "loss": 0.6809, "step": 43805 }, { "epoch": 1.85, "grad_norm": 6.1788167619479655, "learning_rate": 3.8234677801340145e-06, "loss": 0.6629, "step": 43810 }, { "epoch": 1.85, "grad_norm": 12.6951043969471, "learning_rate": 3.822271256285828e-06, "loss": 0.6762, "step": 43815 }, { "epoch": 1.85, "grad_norm": 5.095298892695629, "learning_rate": 3.821074803839866e-06, "loss": 0.6631, "step": 43820 }, { "epoch": 1.85, "grad_norm": 9.778351563642264, "learning_rate": 3.819878422868667e-06, "loss": 0.6458, "step": 43825 }, { "epoch": 1.85, "grad_norm": 7.896191021171513, "learning_rate": 3.818682113444762e-06, "loss": 0.6694, "step": 43830 }, { "epoch": 1.86, "grad_norm": 4.931489361361997, "learning_rate": 3.8174858756406805e-06, "loss": 0.6821, "step": 43835 }, { "epoch": 1.86, "grad_norm": 5.585951505558606, "learning_rate": 3.816289709528947e-06, "loss": 0.6392, "step": 43840 }, { "epoch": 1.86, "grad_norm": 5.60279178974891, "learning_rate": 3.815093615182083e-06, "loss": 0.6395, "step": 43845 }, { "epoch": 1.86, "grad_norm": 4.579924548857887, "learning_rate": 3.8138975926726024e-06, "loss": 0.6419, "step": 43850 }, { "epoch": 1.86, "grad_norm": 5.6139756124382805, "learning_rate": 3.812701642073016e-06, "loss": 0.6603, "step": 43855 }, { "epoch": 1.86, "grad_norm": 4.5943033111072085, "learning_rate": 3.8115057634558326e-06, "loss": 0.6494, "step": 43860 }, { "epoch": 1.86, "grad_norm": 4.037318693117249, "learning_rate": 3.810309956893553e-06, "loss": 0.6683, "step": 43865 }, { "epoch": 1.86, "grad_norm": 4.522294919260756, "learning_rate": 3.809114222458678e-06, "loss": 0.6527, "step": 43870 }, { "epoch": 1.86, "grad_norm": 3.9897730305758423, "learning_rate": 3.8079185602236997e-06, "loss": 0.6681, "step": 43875 }, { "epoch": 1.86, "grad_norm": 4.15341322089785, "learning_rate": 3.806722970261106e-06, "loss": 0.6871, "step": 43880 }, { "epoch": 1.86, "grad_norm": 9.672122642144972, "learning_rate": 3.805527452643385e-06, "loss": 0.6676, "step": 43885 }, { "epoch": 1.86, "grad_norm": 4.37488663199912, "learning_rate": 3.8043320074430163e-06, "loss": 0.6385, "step": 43890 }, { "epoch": 1.86, "grad_norm": 14.776323704367, "learning_rate": 3.803136634732477e-06, "loss": 0.6288, "step": 43895 }, { "epoch": 1.86, "grad_norm": 9.85214489135842, "learning_rate": 3.801941334584236e-06, "loss": 0.6778, "step": 43900 }, { "epoch": 1.86, "grad_norm": 14.444636540141767, "learning_rate": 3.800746107070764e-06, "loss": 0.6639, "step": 43905 }, { "epoch": 1.86, "grad_norm": 4.925559453177563, "learning_rate": 3.799550952264522e-06, "loss": 0.6648, "step": 43910 }, { "epoch": 1.86, "grad_norm": 4.913675003822992, "learning_rate": 3.7983558702379702e-06, "loss": 0.682, "step": 43915 }, { "epoch": 1.86, "grad_norm": 6.432791928824231, "learning_rate": 3.797160861063563e-06, "loss": 0.6377, "step": 43920 }, { "epoch": 1.86, "grad_norm": 9.531645697526058, "learning_rate": 3.795965924813748e-06, "loss": 0.68, "step": 43925 }, { "epoch": 1.86, "grad_norm": 6.838037462924723, "learning_rate": 3.7947710615609747e-06, "loss": 0.6799, "step": 43930 }, { "epoch": 1.86, "grad_norm": 5.279029454599246, "learning_rate": 3.793576271377681e-06, "loss": 0.701, "step": 43935 }, { "epoch": 1.86, "grad_norm": 7.927936563147069, "learning_rate": 3.792381554336303e-06, "loss": 0.6824, "step": 43940 }, { "epoch": 1.86, "grad_norm": 4.805734180570522, "learning_rate": 3.791186910509276e-06, "loss": 0.6653, "step": 43945 }, { "epoch": 1.86, "grad_norm": 6.410782797008012, "learning_rate": 3.7899923399690243e-06, "loss": 0.6663, "step": 43950 }, { "epoch": 1.86, "grad_norm": 4.429877460034502, "learning_rate": 3.788797842787974e-06, "loss": 0.6458, "step": 43955 }, { "epoch": 1.86, "grad_norm": 6.775474451982006, "learning_rate": 3.787603419038543e-06, "loss": 0.6828, "step": 43960 }, { "epoch": 1.86, "grad_norm": 9.09036353715279, "learning_rate": 3.7864090687931444e-06, "loss": 0.6529, "step": 43965 }, { "epoch": 1.86, "grad_norm": 4.821073616239193, "learning_rate": 3.78521479212419e-06, "loss": 0.6391, "step": 43970 }, { "epoch": 1.86, "grad_norm": 4.401276820707356, "learning_rate": 3.784020589104083e-06, "loss": 0.6678, "step": 43975 }, { "epoch": 1.86, "grad_norm": 4.283738731204707, "learning_rate": 3.7828264598052276e-06, "loss": 0.679, "step": 43980 }, { "epoch": 1.86, "grad_norm": 5.996358048887075, "learning_rate": 3.7816324043000184e-06, "loss": 0.6631, "step": 43985 }, { "epoch": 1.86, "grad_norm": 5.193741933782854, "learning_rate": 3.780438422660847e-06, "loss": 0.6566, "step": 43990 }, { "epoch": 1.86, "grad_norm": 5.563023484650932, "learning_rate": 3.779244514960103e-06, "loss": 0.663, "step": 43995 }, { "epoch": 1.86, "grad_norm": 5.825769330158614, "learning_rate": 3.7780506812701667e-06, "loss": 0.6571, "step": 44000 }, { "epoch": 1.86, "grad_norm": 6.11848957316736, "learning_rate": 3.7768569216634188e-06, "loss": 0.6297, "step": 44005 }, { "epoch": 1.86, "grad_norm": 14.974649806379034, "learning_rate": 3.7756632362122336e-06, "loss": 0.6635, "step": 44010 }, { "epoch": 1.86, "grad_norm": 5.983737236999226, "learning_rate": 3.774469624988979e-06, "loss": 0.6724, "step": 44015 }, { "epoch": 1.86, "grad_norm": 9.271968449915201, "learning_rate": 3.773276088066021e-06, "loss": 0.6979, "step": 44020 }, { "epoch": 1.86, "grad_norm": 6.094329929676694, "learning_rate": 3.7720826255157205e-06, "loss": 0.6494, "step": 44025 }, { "epoch": 1.86, "grad_norm": 10.57010020108791, "learning_rate": 3.7708892374104345e-06, "loss": 0.676, "step": 44030 }, { "epoch": 1.86, "grad_norm": 9.515036477726131, "learning_rate": 3.769695923822513e-06, "loss": 0.6651, "step": 44035 }, { "epoch": 1.86, "grad_norm": 12.682589075822317, "learning_rate": 3.768502684824303e-06, "loss": 0.6694, "step": 44040 }, { "epoch": 1.86, "grad_norm": 5.129663953747231, "learning_rate": 3.767309520488148e-06, "loss": 0.6372, "step": 44045 }, { "epoch": 1.86, "grad_norm": 8.839351985003534, "learning_rate": 3.7661164308863856e-06, "loss": 0.6973, "step": 44050 }, { "epoch": 1.86, "grad_norm": 5.305617604360949, "learning_rate": 3.764923416091353e-06, "loss": 0.6378, "step": 44055 }, { "epoch": 1.86, "grad_norm": 7.686272101175115, "learning_rate": 3.763730476175371e-06, "loss": 0.6645, "step": 44060 }, { "epoch": 1.86, "grad_norm": 8.366691750430366, "learning_rate": 3.7625376112107707e-06, "loss": 0.6407, "step": 44065 }, { "epoch": 1.87, "grad_norm": 10.505149037502722, "learning_rate": 3.761344821269869e-06, "loss": 0.6444, "step": 44070 }, { "epoch": 1.87, "grad_norm": 4.3949564334999, "learning_rate": 3.7601521064249837e-06, "loss": 0.6406, "step": 44075 }, { "epoch": 1.87, "grad_norm": 7.372347017816067, "learning_rate": 3.758959466748423e-06, "loss": 0.6576, "step": 44080 }, { "epoch": 1.87, "grad_norm": 7.484014620770744, "learning_rate": 3.7577669023124936e-06, "loss": 0.6775, "step": 44085 }, { "epoch": 1.87, "grad_norm": 4.386541888086565, "learning_rate": 3.7565744131894986e-06, "loss": 0.6256, "step": 44090 }, { "epoch": 1.87, "grad_norm": 5.699277612708559, "learning_rate": 3.755381999451734e-06, "loss": 0.6614, "step": 44095 }, { "epoch": 1.87, "grad_norm": 4.481357837371291, "learning_rate": 3.754189661171493e-06, "loss": 0.6694, "step": 44100 }, { "epoch": 1.87, "grad_norm": 4.636109677296466, "learning_rate": 3.7529973984210635e-06, "loss": 0.6577, "step": 44105 }, { "epoch": 1.87, "grad_norm": 8.70700062560069, "learning_rate": 3.751805211272727e-06, "loss": 0.6389, "step": 44110 }, { "epoch": 1.87, "grad_norm": 4.297900817427233, "learning_rate": 3.7506130997987655e-06, "loss": 0.6212, "step": 44115 }, { "epoch": 1.87, "grad_norm": 7.3646774737952105, "learning_rate": 3.74942106407145e-06, "loss": 0.6937, "step": 44120 }, { "epoch": 1.87, "grad_norm": 4.1712802889275435, "learning_rate": 3.7482291041630535e-06, "loss": 0.6481, "step": 44125 }, { "epoch": 1.87, "grad_norm": 5.744225160207648, "learning_rate": 3.747037220145838e-06, "loss": 0.6734, "step": 44130 }, { "epoch": 1.87, "grad_norm": 4.126949492969892, "learning_rate": 3.745845412092065e-06, "loss": 0.6454, "step": 44135 }, { "epoch": 1.87, "grad_norm": 4.753670704981321, "learning_rate": 3.7446536800739917e-06, "loss": 0.6498, "step": 44140 }, { "epoch": 1.87, "grad_norm": 8.168160052396411, "learning_rate": 3.7434620241638663e-06, "loss": 0.6741, "step": 44145 }, { "epoch": 1.87, "grad_norm": 4.604387165443849, "learning_rate": 3.7422704444339385e-06, "loss": 0.6712, "step": 44150 }, { "epoch": 1.87, "grad_norm": 5.220277388809433, "learning_rate": 3.7410789409564496e-06, "loss": 0.6784, "step": 44155 }, { "epoch": 1.87, "grad_norm": 8.881509414789399, "learning_rate": 3.739887513803634e-06, "loss": 0.7064, "step": 44160 }, { "epoch": 1.87, "grad_norm": 4.439599977866932, "learning_rate": 3.7386961630477282e-06, "loss": 0.679, "step": 44165 }, { "epoch": 1.87, "grad_norm": 5.902589839024459, "learning_rate": 3.7375048887609576e-06, "loss": 0.6881, "step": 44170 }, { "epoch": 1.87, "grad_norm": 4.288170191158458, "learning_rate": 3.736313691015548e-06, "loss": 0.6701, "step": 44175 }, { "epoch": 1.87, "grad_norm": 11.135060076279958, "learning_rate": 3.7351225698837167e-06, "loss": 0.6537, "step": 44180 }, { "epoch": 1.87, "grad_norm": 12.078523652516267, "learning_rate": 3.7339315254376774e-06, "loss": 0.6709, "step": 44185 }, { "epoch": 1.87, "grad_norm": 6.331701638701411, "learning_rate": 3.732740557749641e-06, "loss": 0.652, "step": 44190 }, { "epoch": 1.87, "grad_norm": 19.129978739265965, "learning_rate": 3.7315496668918128e-06, "loss": 0.6709, "step": 44195 }, { "epoch": 1.87, "grad_norm": 6.218896040187197, "learning_rate": 3.73035885293639e-06, "loss": 0.6566, "step": 44200 }, { "epoch": 1.87, "grad_norm": 4.959770789211603, "learning_rate": 3.7291681159555716e-06, "loss": 0.6647, "step": 44205 }, { "epoch": 1.87, "grad_norm": 7.501266073937086, "learning_rate": 3.7279774560215452e-06, "loss": 0.7091, "step": 44210 }, { "epoch": 1.87, "grad_norm": 10.594362686735336, "learning_rate": 3.7267868732065005e-06, "loss": 0.651, "step": 44215 }, { "epoch": 1.87, "grad_norm": 4.268700224192172, "learning_rate": 3.725596367582618e-06, "loss": 0.6616, "step": 44220 }, { "epoch": 1.87, "grad_norm": 9.253814690426344, "learning_rate": 3.7244059392220733e-06, "loss": 0.6722, "step": 44225 }, { "epoch": 1.87, "grad_norm": 7.685008860420982, "learning_rate": 3.7232155881970376e-06, "loss": 0.6425, "step": 44230 }, { "epoch": 1.87, "grad_norm": 7.0870372356279745, "learning_rate": 3.722025314579681e-06, "loss": 0.6456, "step": 44235 }, { "epoch": 1.87, "grad_norm": 5.392549351976747, "learning_rate": 3.7208351184421653e-06, "loss": 0.7355, "step": 44240 }, { "epoch": 1.87, "grad_norm": 14.40627952625747, "learning_rate": 3.7196449998566474e-06, "loss": 0.7191, "step": 44245 }, { "epoch": 1.87, "grad_norm": 4.348079146779934, "learning_rate": 3.7184549588952824e-06, "loss": 0.6772, "step": 44250 }, { "epoch": 1.87, "grad_norm": 5.05843181151354, "learning_rate": 3.7172649956302176e-06, "loss": 0.6656, "step": 44255 }, { "epoch": 1.87, "grad_norm": 4.653324205110013, "learning_rate": 3.716075110133599e-06, "loss": 0.6825, "step": 44260 }, { "epoch": 1.87, "grad_norm": 4.567897227527614, "learning_rate": 3.7148853024775644e-06, "loss": 0.6475, "step": 44265 }, { "epoch": 1.87, "grad_norm": 7.187016626228679, "learning_rate": 3.7136955727342473e-06, "loss": 0.6466, "step": 44270 }, { "epoch": 1.87, "grad_norm": 4.41432540444829, "learning_rate": 3.7125059209757797e-06, "loss": 0.6375, "step": 44275 }, { "epoch": 1.87, "grad_norm": 11.999951802505393, "learning_rate": 3.7113163472742846e-06, "loss": 0.6819, "step": 44280 }, { "epoch": 1.87, "grad_norm": 10.410040534360478, "learning_rate": 3.7101268517018853e-06, "loss": 0.6387, "step": 44285 }, { "epoch": 1.87, "grad_norm": 4.266805942584702, "learning_rate": 3.7089374343306946e-06, "loss": 0.6571, "step": 44290 }, { "epoch": 1.87, "grad_norm": 4.219704348557099, "learning_rate": 3.7077480952328233e-06, "loss": 0.6365, "step": 44295 }, { "epoch": 1.87, "grad_norm": 4.219626629964817, "learning_rate": 3.7065588344803792e-06, "loss": 0.6455, "step": 44300 }, { "epoch": 1.88, "grad_norm": 4.4584018551882325, "learning_rate": 3.705369652145463e-06, "loss": 0.6644, "step": 44305 }, { "epoch": 1.88, "grad_norm": 4.352919390332786, "learning_rate": 3.7041805483001717e-06, "loss": 0.6953, "step": 44310 }, { "epoch": 1.88, "grad_norm": 6.600554034792909, "learning_rate": 3.702991523016597e-06, "loss": 0.6399, "step": 44315 }, { "epoch": 1.88, "grad_norm": 4.301940191767937, "learning_rate": 3.7018025763668253e-06, "loss": 0.6361, "step": 44320 }, { "epoch": 1.88, "grad_norm": 4.489247757823377, "learning_rate": 3.70061370842294e-06, "loss": 0.6323, "step": 44325 }, { "epoch": 1.88, "grad_norm": 5.43452871028988, "learning_rate": 3.6994249192570164e-06, "loss": 0.6595, "step": 44330 }, { "epoch": 1.88, "grad_norm": 5.907822086285632, "learning_rate": 3.6982362089411308e-06, "loss": 0.6404, "step": 44335 }, { "epoch": 1.88, "grad_norm": 6.0957310320126075, "learning_rate": 3.697047577547349e-06, "loss": 0.6828, "step": 44340 }, { "epoch": 1.88, "grad_norm": 4.529642248641528, "learning_rate": 3.695859025147734e-06, "loss": 0.6853, "step": 44345 }, { "epoch": 1.88, "grad_norm": 7.0217240022793375, "learning_rate": 3.6946705518143455e-06, "loss": 0.6815, "step": 44350 }, { "epoch": 1.88, "grad_norm": 4.551933143512585, "learning_rate": 3.693482157619236e-06, "loss": 0.7043, "step": 44355 }, { "epoch": 1.88, "grad_norm": 4.415377018243964, "learning_rate": 3.6922938426344545e-06, "loss": 0.6558, "step": 44360 }, { "epoch": 1.88, "grad_norm": 7.679312374934443, "learning_rate": 3.6911056069320466e-06, "loss": 0.6371, "step": 44365 }, { "epoch": 1.88, "grad_norm": 5.637995054690598, "learning_rate": 3.689917450584049e-06, "loss": 0.6328, "step": 44370 }, { "epoch": 1.88, "grad_norm": 4.204780789956788, "learning_rate": 3.6887293736624984e-06, "loss": 0.6874, "step": 44375 }, { "epoch": 1.88, "grad_norm": 5.237011429660383, "learning_rate": 3.687541376239424e-06, "loss": 0.7083, "step": 44380 }, { "epoch": 1.88, "grad_norm": 4.297947248663635, "learning_rate": 3.68635345838685e-06, "loss": 0.6457, "step": 44385 }, { "epoch": 1.88, "grad_norm": 4.76440493555294, "learning_rate": 3.6851656201767937e-06, "loss": 0.6782, "step": 44390 }, { "epoch": 1.88, "grad_norm": 6.435663587975596, "learning_rate": 3.683977861681275e-06, "loss": 0.6422, "step": 44395 }, { "epoch": 1.88, "grad_norm": 8.28543532198817, "learning_rate": 3.6827901829723014e-06, "loss": 0.6271, "step": 44400 }, { "epoch": 1.88, "grad_norm": 5.358507988917818, "learning_rate": 3.6816025841218792e-06, "loss": 0.6746, "step": 44405 }, { "epoch": 1.88, "grad_norm": 12.462795419666628, "learning_rate": 3.680415065202009e-06, "loss": 0.6763, "step": 44410 }, { "epoch": 1.88, "grad_norm": 12.17572599566084, "learning_rate": 3.679227626284686e-06, "loss": 0.6464, "step": 44415 }, { "epoch": 1.88, "grad_norm": 10.243739229431425, "learning_rate": 3.678040267441902e-06, "loss": 0.6521, "step": 44420 }, { "epoch": 1.88, "grad_norm": 8.01596663579331, "learning_rate": 3.676852988745642e-06, "loss": 0.6391, "step": 44425 }, { "epoch": 1.88, "grad_norm": 6.40865246196711, "learning_rate": 3.675665790267888e-06, "loss": 0.6379, "step": 44430 }, { "epoch": 1.88, "grad_norm": 4.074104348512498, "learning_rate": 3.674478672080617e-06, "loss": 0.6167, "step": 44435 }, { "epoch": 1.88, "grad_norm": 4.485346762435941, "learning_rate": 3.673291634255799e-06, "loss": 0.6601, "step": 44440 }, { "epoch": 1.88, "grad_norm": 6.615909477516975, "learning_rate": 3.6721046768654013e-06, "loss": 0.6495, "step": 44445 }, { "epoch": 1.88, "grad_norm": 5.371002327584805, "learning_rate": 3.670917799981385e-06, "loss": 0.6379, "step": 44450 }, { "epoch": 1.88, "grad_norm": 4.299480539305591, "learning_rate": 3.6697310036757084e-06, "loss": 0.6459, "step": 44455 }, { "epoch": 1.88, "grad_norm": 6.752574073229355, "learning_rate": 3.668544288020323e-06, "loss": 0.6721, "step": 44460 }, { "epoch": 1.88, "grad_norm": 4.193309266251985, "learning_rate": 3.6673576530871748e-06, "loss": 0.6474, "step": 44465 }, { "epoch": 1.88, "grad_norm": 7.954155210563334, "learning_rate": 3.666171098948207e-06, "loss": 0.6593, "step": 44470 }, { "epoch": 1.88, "grad_norm": 7.781614179318758, "learning_rate": 3.6649846256753564e-06, "loss": 0.6596, "step": 44475 }, { "epoch": 1.88, "grad_norm": 8.311822699128, "learning_rate": 3.663798233340555e-06, "loss": 0.6668, "step": 44480 }, { "epoch": 1.88, "grad_norm": 4.445068603690859, "learning_rate": 3.6626119220157312e-06, "loss": 0.6201, "step": 44485 }, { "epoch": 1.88, "grad_norm": 8.241377148734646, "learning_rate": 3.6614256917728065e-06, "loss": 0.6431, "step": 44490 }, { "epoch": 1.88, "grad_norm": 13.188462070226933, "learning_rate": 3.6602395426837006e-06, "loss": 0.6656, "step": 44495 }, { "epoch": 1.88, "grad_norm": 7.021710764148414, "learning_rate": 3.6590534748203246e-06, "loss": 0.6674, "step": 44500 }, { "epoch": 1.88, "grad_norm": 7.053621235001356, "learning_rate": 3.657867488254585e-06, "loss": 0.6866, "step": 44505 }, { "epoch": 1.88, "grad_norm": 5.974071741242696, "learning_rate": 3.656681583058388e-06, "loss": 0.635, "step": 44510 }, { "epoch": 1.88, "grad_norm": 4.106047868128863, "learning_rate": 3.6554957593036282e-06, "loss": 0.6644, "step": 44515 }, { "epoch": 1.88, "grad_norm": 7.690061421448045, "learning_rate": 3.6543100170622016e-06, "loss": 0.6669, "step": 44520 }, { "epoch": 1.88, "grad_norm": 4.978681224202517, "learning_rate": 3.653124356405995e-06, "loss": 0.6626, "step": 44525 }, { "epoch": 1.88, "grad_norm": 4.710980572047136, "learning_rate": 3.6519387774068893e-06, "loss": 0.6568, "step": 44530 }, { "epoch": 1.88, "grad_norm": 4.349572648026729, "learning_rate": 3.6507532801367664e-06, "loss": 0.6661, "step": 44535 }, { "epoch": 1.89, "grad_norm": 8.68165840821385, "learning_rate": 3.6495678646674963e-06, "loss": 0.6664, "step": 44540 }, { "epoch": 1.89, "grad_norm": 6.780901752604387, "learning_rate": 3.6483825310709525e-06, "loss": 0.6724, "step": 44545 }, { "epoch": 1.89, "grad_norm": 4.402791580836213, "learning_rate": 3.647197279418991e-06, "loss": 0.6882, "step": 44550 }, { "epoch": 1.89, "grad_norm": 7.905953345437235, "learning_rate": 3.6460121097834742e-06, "loss": 0.6225, "step": 44555 }, { "epoch": 1.89, "grad_norm": 4.2943104417803895, "learning_rate": 3.6448270222362546e-06, "loss": 0.662, "step": 44560 }, { "epoch": 1.89, "grad_norm": 5.5793642379574395, "learning_rate": 3.643642016849182e-06, "loss": 0.6649, "step": 44565 }, { "epoch": 1.89, "grad_norm": 5.511713610976811, "learning_rate": 3.6424570936940986e-06, "loss": 0.6294, "step": 44570 }, { "epoch": 1.89, "grad_norm": 4.32538589315091, "learning_rate": 3.6412722528428413e-06, "loss": 0.6609, "step": 44575 }, { "epoch": 1.89, "grad_norm": 23.305939413798264, "learning_rate": 3.640087494367246e-06, "loss": 0.6481, "step": 44580 }, { "epoch": 1.89, "grad_norm": 7.3670008210935745, "learning_rate": 3.6389028183391396e-06, "loss": 0.6538, "step": 44585 }, { "epoch": 1.89, "grad_norm": 5.449395938897598, "learning_rate": 3.637718224830347e-06, "loss": 0.6771, "step": 44590 }, { "epoch": 1.89, "grad_norm": 5.522107576959655, "learning_rate": 3.6365337139126857e-06, "loss": 0.6113, "step": 44595 }, { "epoch": 1.89, "grad_norm": 5.881631118254926, "learning_rate": 3.635349285657968e-06, "loss": 0.6519, "step": 44600 }, { "epoch": 1.89, "grad_norm": 4.466151157650065, "learning_rate": 3.6341649401380043e-06, "loss": 0.6635, "step": 44605 }, { "epoch": 1.89, "grad_norm": 4.51232343423555, "learning_rate": 3.6329806774245963e-06, "loss": 0.64, "step": 44610 }, { "epoch": 1.89, "grad_norm": 7.906065495845976, "learning_rate": 3.6317964975895442e-06, "loss": 0.6541, "step": 44615 }, { "epoch": 1.89, "grad_norm": 4.6029962836525495, "learning_rate": 3.63061240070464e-06, "loss": 0.6374, "step": 44620 }, { "epoch": 1.89, "grad_norm": 5.02030287257848, "learning_rate": 3.6294283868416714e-06, "loss": 0.6591, "step": 44625 }, { "epoch": 1.89, "grad_norm": 4.060862699542064, "learning_rate": 3.628244456072424e-06, "loss": 0.6484, "step": 44630 }, { "epoch": 1.89, "grad_norm": 4.563518026792678, "learning_rate": 3.627060608468673e-06, "loss": 0.6576, "step": 44635 }, { "epoch": 1.89, "grad_norm": 4.723319087263991, "learning_rate": 3.6258768441021943e-06, "loss": 0.655, "step": 44640 }, { "epoch": 1.89, "grad_norm": 4.264733957462176, "learning_rate": 3.6246931630447548e-06, "loss": 0.6289, "step": 44645 }, { "epoch": 1.89, "grad_norm": 4.200621514320712, "learning_rate": 3.6235095653681167e-06, "loss": 0.6522, "step": 44650 }, { "epoch": 1.89, "grad_norm": 4.416073621557099, "learning_rate": 3.6223260511440407e-06, "loss": 0.6263, "step": 44655 }, { "epoch": 1.89, "grad_norm": 5.716151681518997, "learning_rate": 3.6211426204442768e-06, "loss": 0.6523, "step": 44660 }, { "epoch": 1.89, "grad_norm": 6.172433992365676, "learning_rate": 3.6199592733405746e-06, "loss": 0.6721, "step": 44665 }, { "epoch": 1.89, "grad_norm": 4.273415107719127, "learning_rate": 3.6187760099046774e-06, "loss": 0.6726, "step": 44670 }, { "epoch": 1.89, "grad_norm": 6.516759959038405, "learning_rate": 3.617592830208321e-06, "loss": 0.6572, "step": 44675 }, { "epoch": 1.89, "grad_norm": 4.670927772111441, "learning_rate": 3.6164097343232402e-06, "loss": 0.6671, "step": 44680 }, { "epoch": 1.89, "grad_norm": 3.919116688751525, "learning_rate": 3.615226722321161e-06, "loss": 0.6244, "step": 44685 }, { "epoch": 1.89, "grad_norm": 4.1650510615775795, "learning_rate": 3.614043794273807e-06, "loss": 0.6586, "step": 44690 }, { "epoch": 1.89, "grad_norm": 8.816618947748804, "learning_rate": 3.6128609502528963e-06, "loss": 0.659, "step": 44695 }, { "epoch": 1.89, "grad_norm": 6.26059501547986, "learning_rate": 3.611678190330138e-06, "loss": 0.6362, "step": 44700 }, { "epoch": 1.89, "grad_norm": 4.788416578470977, "learning_rate": 3.6104955145772445e-06, "loss": 0.689, "step": 44705 }, { "epoch": 1.89, "grad_norm": 5.674565215168355, "learning_rate": 3.6093129230659143e-06, "loss": 0.6971, "step": 44710 }, { "epoch": 1.89, "grad_norm": 5.689096346233111, "learning_rate": 3.6081304158678443e-06, "loss": 0.6371, "step": 44715 }, { "epoch": 1.89, "grad_norm": 4.223490064082183, "learning_rate": 3.606947993054727e-06, "loss": 0.6484, "step": 44720 }, { "epoch": 1.89, "grad_norm": 6.940292153112864, "learning_rate": 3.60576565469825e-06, "loss": 0.6198, "step": 44725 }, { "epoch": 1.89, "grad_norm": 4.442786455891358, "learning_rate": 3.604583400870093e-06, "loss": 0.6822, "step": 44730 }, { "epoch": 1.89, "grad_norm": 5.893123389925185, "learning_rate": 3.6034012316419358e-06, "loss": 0.6654, "step": 44735 }, { "epoch": 1.89, "grad_norm": 7.219036345636989, "learning_rate": 3.6022191470854484e-06, "loss": 0.7001, "step": 44740 }, { "epoch": 1.89, "grad_norm": 8.831970149389107, "learning_rate": 3.601037147272295e-06, "loss": 0.6497, "step": 44745 }, { "epoch": 1.89, "grad_norm": 4.62155156573502, "learning_rate": 3.5998552322741404e-06, "loss": 0.63, "step": 44750 }, { "epoch": 1.89, "grad_norm": 4.7965635258314885, "learning_rate": 3.598673402162637e-06, "loss": 0.6726, "step": 44755 }, { "epoch": 1.89, "grad_norm": 4.276739326914388, "learning_rate": 3.5974916570094393e-06, "loss": 0.6413, "step": 44760 }, { "epoch": 1.89, "grad_norm": 12.388191735773463, "learning_rate": 3.5963099968861905e-06, "loss": 0.6734, "step": 44765 }, { "epoch": 1.89, "grad_norm": 8.627701816732168, "learning_rate": 3.595128421864531e-06, "loss": 0.6518, "step": 44770 }, { "epoch": 1.89, "grad_norm": 5.174378873460719, "learning_rate": 3.5939469320160987e-06, "loss": 0.6575, "step": 44775 }, { "epoch": 1.9, "grad_norm": 5.3445151023155315, "learning_rate": 3.5927655274125217e-06, "loss": 0.6134, "step": 44780 }, { "epoch": 1.9, "grad_norm": 6.764080039728375, "learning_rate": 3.5915842081254248e-06, "loss": 0.6625, "step": 44785 }, { "epoch": 1.9, "grad_norm": 4.834115723605342, "learning_rate": 3.59040297422643e-06, "loss": 0.6227, "step": 44790 }, { "epoch": 1.9, "grad_norm": 5.26725284834661, "learning_rate": 3.589221825787149e-06, "loss": 0.6419, "step": 44795 }, { "epoch": 1.9, "grad_norm": 8.352050766409466, "learning_rate": 3.588040762879195e-06, "loss": 0.6173, "step": 44800 }, { "epoch": 1.9, "grad_norm": 5.896229313998804, "learning_rate": 3.58685978557417e-06, "loss": 0.6528, "step": 44805 }, { "epoch": 1.9, "grad_norm": 5.051583581058532, "learning_rate": 3.5856788939436724e-06, "loss": 0.6394, "step": 44810 }, { "epoch": 1.9, "grad_norm": 4.277739363039084, "learning_rate": 3.5844980880592995e-06, "loss": 0.676, "step": 44815 }, { "epoch": 1.9, "grad_norm": 6.292977386185439, "learning_rate": 3.583317367992636e-06, "loss": 0.6668, "step": 44820 }, { "epoch": 1.9, "grad_norm": 9.210157182157122, "learning_rate": 3.582136733815269e-06, "loss": 0.6336, "step": 44825 }, { "epoch": 1.9, "grad_norm": 5.057567042820143, "learning_rate": 3.580956185598775e-06, "loss": 0.6555, "step": 44830 }, { "epoch": 1.9, "grad_norm": 6.791431581048354, "learning_rate": 3.579775723414727e-06, "loss": 0.6451, "step": 44835 }, { "epoch": 1.9, "grad_norm": 4.286405328547201, "learning_rate": 3.5785953473346933e-06, "loss": 0.6364, "step": 44840 }, { "epoch": 1.9, "grad_norm": 5.8697762053581055, "learning_rate": 3.577415057430237e-06, "loss": 0.6849, "step": 44845 }, { "epoch": 1.9, "grad_norm": 4.204917099644239, "learning_rate": 3.576234853772916e-06, "loss": 0.6175, "step": 44850 }, { "epoch": 1.9, "grad_norm": 7.042863141688427, "learning_rate": 3.5750547364342823e-06, "loss": 0.6385, "step": 44855 }, { "epoch": 1.9, "grad_norm": 5.691394252136263, "learning_rate": 3.5738747054858812e-06, "loss": 0.6835, "step": 44860 }, { "epoch": 1.9, "grad_norm": 6.047136578018468, "learning_rate": 3.5726947609992585e-06, "loss": 0.6381, "step": 44865 }, { "epoch": 1.9, "grad_norm": 8.160475461695407, "learning_rate": 3.571514903045947e-06, "loss": 0.6276, "step": 44870 }, { "epoch": 1.9, "grad_norm": 8.401212819277797, "learning_rate": 3.5703351316974787e-06, "loss": 0.6738, "step": 44875 }, { "epoch": 1.9, "grad_norm": 7.501935376921121, "learning_rate": 3.5691554470253796e-06, "loss": 0.6776, "step": 44880 }, { "epoch": 1.9, "grad_norm": 6.54656871780731, "learning_rate": 3.5679758491011717e-06, "loss": 0.6659, "step": 44885 }, { "epoch": 1.9, "grad_norm": 6.2961646383851875, "learning_rate": 3.5667963379963687e-06, "loss": 0.6432, "step": 44890 }, { "epoch": 1.9, "grad_norm": 4.6476604863741215, "learning_rate": 3.5656169137824836e-06, "loss": 0.6629, "step": 44895 }, { "epoch": 1.9, "grad_norm": 6.170380621099829, "learning_rate": 3.5644375765310203e-06, "loss": 0.6848, "step": 44900 }, { "epoch": 1.9, "grad_norm": 4.616275601781574, "learning_rate": 3.563258326313477e-06, "loss": 0.6457, "step": 44905 }, { "epoch": 1.9, "grad_norm": 4.251624826277175, "learning_rate": 3.5620791632013495e-06, "loss": 0.6532, "step": 44910 }, { "epoch": 1.9, "grad_norm": 6.958784897243633, "learning_rate": 3.560900087266126e-06, "loss": 0.6317, "step": 44915 }, { "epoch": 1.9, "grad_norm": 5.82456531873667, "learning_rate": 3.559721098579293e-06, "loss": 0.7084, "step": 44920 }, { "epoch": 1.9, "grad_norm": 12.857695889974073, "learning_rate": 3.558542197212326e-06, "loss": 0.6209, "step": 44925 }, { "epoch": 1.9, "grad_norm": 11.113439912938025, "learning_rate": 3.5573633832366994e-06, "loss": 0.6661, "step": 44930 }, { "epoch": 1.9, "grad_norm": 14.446699449200743, "learning_rate": 3.556184656723883e-06, "loss": 0.6422, "step": 44935 }, { "epoch": 1.9, "grad_norm": 7.59533747962382, "learning_rate": 3.555006017745336e-06, "loss": 0.6181, "step": 44940 }, { "epoch": 1.9, "grad_norm": 24.873130660378386, "learning_rate": 3.5538274663725192e-06, "loss": 0.6507, "step": 44945 }, { "epoch": 1.9, "grad_norm": 15.665247878620855, "learning_rate": 3.5526490026768833e-06, "loss": 0.6483, "step": 44950 }, { "epoch": 1.9, "grad_norm": 7.955561939850273, "learning_rate": 3.5514706267298736e-06, "loss": 0.6783, "step": 44955 }, { "epoch": 1.9, "grad_norm": 4.6593053473092985, "learning_rate": 3.5502923386029355e-06, "loss": 0.6421, "step": 44960 }, { "epoch": 1.9, "grad_norm": 4.196117810003258, "learning_rate": 3.5491141383675e-06, "loss": 0.6601, "step": 44965 }, { "epoch": 1.9, "grad_norm": 7.760959060021661, "learning_rate": 3.5479360260950023e-06, "loss": 0.6483, "step": 44970 }, { "epoch": 1.9, "grad_norm": 7.334754722085779, "learning_rate": 3.546758001856866e-06, "loss": 0.6436, "step": 44975 }, { "epoch": 1.9, "grad_norm": 8.0903609451562, "learning_rate": 3.54558006572451e-06, "loss": 0.6434, "step": 44980 }, { "epoch": 1.9, "grad_norm": 4.09644229027741, "learning_rate": 3.544402217769352e-06, "loss": 0.636, "step": 44985 }, { "epoch": 1.9, "grad_norm": 4.7306181457901495, "learning_rate": 3.543224458062799e-06, "loss": 0.6412, "step": 44990 }, { "epoch": 1.9, "grad_norm": 4.692614154806212, "learning_rate": 3.5420467866762565e-06, "loss": 0.6364, "step": 44995 }, { "epoch": 1.9, "grad_norm": 4.627130481402302, "learning_rate": 3.5408692036811234e-06, "loss": 0.6543, "step": 45000 }, { "epoch": 1.9, "grad_norm": 4.249746776839556, "learning_rate": 3.5396917091487907e-06, "loss": 0.674, "step": 45005 }, { "epoch": 1.9, "grad_norm": 6.774863664854658, "learning_rate": 3.53851430315065e-06, "loss": 0.6671, "step": 45010 }, { "epoch": 1.91, "grad_norm": 7.859105662751808, "learning_rate": 3.5373369857580807e-06, "loss": 0.6327, "step": 45015 }, { "epoch": 1.91, "grad_norm": 4.988088410636686, "learning_rate": 3.5361597570424628e-06, "loss": 0.631, "step": 45020 }, { "epoch": 1.91, "grad_norm": 9.072233309313598, "learning_rate": 3.5349826170751665e-06, "loss": 0.7023, "step": 45025 }, { "epoch": 1.91, "grad_norm": 4.913391968382866, "learning_rate": 3.5338055659275605e-06, "loss": 0.6248, "step": 45030 }, { "epoch": 1.91, "grad_norm": 8.378224479194328, "learning_rate": 3.5326286036710017e-06, "loss": 0.7001, "step": 45035 }, { "epoch": 1.91, "grad_norm": 4.092740629656298, "learning_rate": 3.531451730376849e-06, "loss": 0.6623, "step": 45040 }, { "epoch": 1.91, "grad_norm": 6.636800725818945, "learning_rate": 3.5302749461164533e-06, "loss": 0.6431, "step": 45045 }, { "epoch": 1.91, "grad_norm": 4.3715367662667175, "learning_rate": 3.5290982509611567e-06, "loss": 0.6124, "step": 45050 }, { "epoch": 1.91, "grad_norm": 5.672022547363441, "learning_rate": 3.527921644982302e-06, "loss": 0.6249, "step": 45055 }, { "epoch": 1.91, "grad_norm": 4.1904369484872515, "learning_rate": 3.52674512825122e-06, "loss": 0.6138, "step": 45060 }, { "epoch": 1.91, "grad_norm": 5.19605496824398, "learning_rate": 3.5255687008392425e-06, "loss": 0.6618, "step": 45065 }, { "epoch": 1.91, "grad_norm": 6.144042280162643, "learning_rate": 3.5243923628176914e-06, "loss": 0.6589, "step": 45070 }, { "epoch": 1.91, "grad_norm": 7.237017772517374, "learning_rate": 3.5232161142578836e-06, "loss": 0.6536, "step": 45075 }, { "epoch": 1.91, "grad_norm": 4.534056114779388, "learning_rate": 3.5220399552311346e-06, "loss": 0.618, "step": 45080 }, { "epoch": 1.91, "grad_norm": 9.307479248222899, "learning_rate": 3.52086388580875e-06, "loss": 0.6429, "step": 45085 }, { "epoch": 1.91, "grad_norm": 13.560355134535, "learning_rate": 3.5196879060620286e-06, "loss": 0.6676, "step": 45090 }, { "epoch": 1.91, "grad_norm": 9.47544758678445, "learning_rate": 3.518512016062271e-06, "loss": 0.6703, "step": 45095 }, { "epoch": 1.91, "grad_norm": 6.799166683171064, "learning_rate": 3.5173362158807643e-06, "loss": 0.6364, "step": 45100 }, { "epoch": 1.91, "grad_norm": 7.909903853368974, "learning_rate": 3.5161605055887973e-06, "loss": 0.6506, "step": 45105 }, { "epoch": 1.91, "grad_norm": 4.45293110294423, "learning_rate": 3.5149848852576474e-06, "loss": 0.6492, "step": 45110 }, { "epoch": 1.91, "grad_norm": 4.426069364358127, "learning_rate": 3.5138093549585893e-06, "loss": 0.689, "step": 45115 }, { "epoch": 1.91, "grad_norm": 6.768298832492627, "learning_rate": 3.512633914762893e-06, "loss": 0.6291, "step": 45120 }, { "epoch": 1.91, "grad_norm": 4.219609560024683, "learning_rate": 3.51145856474182e-06, "loss": 0.6611, "step": 45125 }, { "epoch": 1.91, "grad_norm": 3.9343306322600107, "learning_rate": 3.5102833049666307e-06, "loss": 0.6336, "step": 45130 }, { "epoch": 1.91, "grad_norm": 3.973331859137556, "learning_rate": 3.5091081355085763e-06, "loss": 0.6248, "step": 45135 }, { "epoch": 1.91, "grad_norm": 8.813870047716192, "learning_rate": 3.507933056438904e-06, "loss": 0.6382, "step": 45140 }, { "epoch": 1.91, "grad_norm": 6.719698739752371, "learning_rate": 3.506758067828856e-06, "loss": 0.6486, "step": 45145 }, { "epoch": 1.91, "grad_norm": 10.309332937060988, "learning_rate": 3.5055831697496667e-06, "loss": 0.6529, "step": 45150 }, { "epoch": 1.91, "grad_norm": 5.194784020741479, "learning_rate": 3.5044083622725688e-06, "loss": 0.6329, "step": 45155 }, { "epoch": 1.91, "grad_norm": 6.807824937213841, "learning_rate": 3.5032336454687867e-06, "loss": 0.6419, "step": 45160 }, { "epoch": 1.91, "grad_norm": 6.25104281768087, "learning_rate": 3.5020590194095383e-06, "loss": 0.6267, "step": 45165 }, { "epoch": 1.91, "grad_norm": 5.501850128628913, "learning_rate": 3.5008844841660408e-06, "loss": 0.667, "step": 45170 }, { "epoch": 1.91, "grad_norm": 4.338320866381703, "learning_rate": 3.4997100398095003e-06, "loss": 0.642, "step": 45175 }, { "epoch": 1.91, "grad_norm": 4.480245221234186, "learning_rate": 3.4985356864111215e-06, "loss": 0.6134, "step": 45180 }, { "epoch": 1.91, "grad_norm": 6.434761186358677, "learning_rate": 3.4973614240421018e-06, "loss": 0.6542, "step": 45185 }, { "epoch": 1.91, "grad_norm": 4.922162208983262, "learning_rate": 3.496187252773633e-06, "loss": 0.6635, "step": 45190 }, { "epoch": 1.91, "grad_norm": 4.633068683052903, "learning_rate": 3.4950131726769e-06, "loss": 0.6623, "step": 45195 }, { "epoch": 1.91, "grad_norm": 5.310601094445128, "learning_rate": 3.4938391838230855e-06, "loss": 0.6409, "step": 45200 }, { "epoch": 1.91, "grad_norm": 4.279081427197244, "learning_rate": 3.492665286283366e-06, "loss": 0.6248, "step": 45205 }, { "epoch": 1.91, "grad_norm": 4.3555152370178245, "learning_rate": 3.4914914801289086e-06, "loss": 0.6935, "step": 45210 }, { "epoch": 1.91, "grad_norm": 4.59167451299066, "learning_rate": 3.49031776543088e-06, "loss": 0.64, "step": 45215 }, { "epoch": 1.91, "grad_norm": 4.4084231211790605, "learning_rate": 3.4891441422604377e-06, "loss": 0.6525, "step": 45220 }, { "epoch": 1.91, "grad_norm": 5.705064326137762, "learning_rate": 3.4879706106887367e-06, "loss": 0.671, "step": 45225 }, { "epoch": 1.91, "grad_norm": 5.291866420477239, "learning_rate": 3.4867971707869236e-06, "loss": 0.6444, "step": 45230 }, { "epoch": 1.91, "grad_norm": 4.3499356752475595, "learning_rate": 3.48562382262614e-06, "loss": 0.6001, "step": 45235 }, { "epoch": 1.91, "grad_norm": 4.680347670419772, "learning_rate": 3.4844505662775243e-06, "loss": 0.6516, "step": 45240 }, { "epoch": 1.91, "grad_norm": 4.374486625621403, "learning_rate": 3.483277401812205e-06, "loss": 0.6618, "step": 45245 }, { "epoch": 1.92, "grad_norm": 4.885142577134379, "learning_rate": 3.48210432930131e-06, "loss": 0.6421, "step": 45250 }, { "epoch": 1.92, "grad_norm": 5.50862123955264, "learning_rate": 3.480931348815959e-06, "loss": 0.6344, "step": 45255 }, { "epoch": 1.92, "grad_norm": 5.34030285205759, "learning_rate": 3.479758460427265e-06, "loss": 0.6208, "step": 45260 }, { "epoch": 1.92, "grad_norm": 5.980381917399348, "learning_rate": 3.478585664206338e-06, "loss": 0.6389, "step": 45265 }, { "epoch": 1.92, "grad_norm": 4.441674112278496, "learning_rate": 3.4774129602242803e-06, "loss": 0.6372, "step": 45270 }, { "epoch": 1.92, "grad_norm": 6.384317833165869, "learning_rate": 3.47624034855219e-06, "loss": 0.6896, "step": 45275 }, { "epoch": 1.92, "grad_norm": 4.996378917166695, "learning_rate": 3.4750678292611594e-06, "loss": 0.621, "step": 45280 }, { "epoch": 1.92, "grad_norm": 4.3055117919830215, "learning_rate": 3.473895402422274e-06, "loss": 0.6587, "step": 45285 }, { "epoch": 1.92, "grad_norm": 4.550251939016165, "learning_rate": 3.472723068106616e-06, "loss": 0.6374, "step": 45290 }, { "epoch": 1.92, "grad_norm": 5.217032013369853, "learning_rate": 3.471550826385258e-06, "loss": 0.6458, "step": 45295 }, { "epoch": 1.92, "grad_norm": 4.275140809942955, "learning_rate": 3.4703786773292725e-06, "loss": 0.657, "step": 45300 }, { "epoch": 1.92, "grad_norm": 5.014511494782284, "learning_rate": 3.4692066210097218e-06, "loss": 0.6436, "step": 45305 }, { "epoch": 1.92, "grad_norm": 7.6458925905416155, "learning_rate": 3.4680346574976644e-06, "loss": 0.6379, "step": 45310 }, { "epoch": 1.92, "grad_norm": 4.500046154213909, "learning_rate": 3.4668627868641537e-06, "loss": 0.6312, "step": 45315 }, { "epoch": 1.92, "grad_norm": 8.834121992633179, "learning_rate": 3.4656910091802346e-06, "loss": 0.6666, "step": 45320 }, { "epoch": 1.92, "grad_norm": 4.752444694831217, "learning_rate": 3.4645193245169517e-06, "loss": 0.6232, "step": 45325 }, { "epoch": 1.92, "grad_norm": 9.42214176923696, "learning_rate": 3.4633477329453395e-06, "loss": 0.6427, "step": 45330 }, { "epoch": 1.92, "grad_norm": 10.973104035260567, "learning_rate": 3.4621762345364264e-06, "loss": 0.6038, "step": 45335 }, { "epoch": 1.92, "grad_norm": 8.567708285093062, "learning_rate": 3.4610048293612397e-06, "loss": 0.6484, "step": 45340 }, { "epoch": 1.92, "grad_norm": 5.635953752822431, "learning_rate": 3.459833517490797e-06, "loss": 0.622, "step": 45345 }, { "epoch": 1.92, "grad_norm": 5.468031644076369, "learning_rate": 3.4586622989961127e-06, "loss": 0.6426, "step": 45350 }, { "epoch": 1.92, "grad_norm": 4.714235821334394, "learning_rate": 3.4574911739481907e-06, "loss": 0.6617, "step": 45355 }, { "epoch": 1.92, "grad_norm": 4.248707188603817, "learning_rate": 3.456320142418036e-06, "loss": 0.6336, "step": 45360 }, { "epoch": 1.92, "grad_norm": 16.237906247248514, "learning_rate": 3.4551492044766444e-06, "loss": 0.6956, "step": 45365 }, { "epoch": 1.92, "grad_norm": 4.042196472785983, "learning_rate": 3.453978360195005e-06, "loss": 0.6261, "step": 45370 }, { "epoch": 1.92, "grad_norm": 9.417801576332172, "learning_rate": 3.4528076096441044e-06, "loss": 0.6393, "step": 45375 }, { "epoch": 1.92, "grad_norm": 4.158103615600723, "learning_rate": 3.45163695289492e-06, "loss": 0.6699, "step": 45380 }, { "epoch": 1.92, "grad_norm": 14.036393587692878, "learning_rate": 3.4504663900184277e-06, "loss": 0.6537, "step": 45385 }, { "epoch": 1.92, "grad_norm": 6.903788158320299, "learning_rate": 3.4492959210855936e-06, "loss": 0.6255, "step": 45390 }, { "epoch": 1.92, "grad_norm": 7.238431591391574, "learning_rate": 3.448125546167379e-06, "loss": 0.6237, "step": 45395 }, { "epoch": 1.92, "grad_norm": 4.449758078284561, "learning_rate": 3.446955265334742e-06, "loss": 0.6502, "step": 45400 }, { "epoch": 1.92, "grad_norm": 6.462202171715268, "learning_rate": 3.445785078658632e-06, "loss": 0.6199, "step": 45405 }, { "epoch": 1.92, "grad_norm": 4.490308446448917, "learning_rate": 3.4446149862099955e-06, "loss": 0.6344, "step": 45410 }, { "epoch": 1.92, "grad_norm": 10.267292511315063, "learning_rate": 3.443444988059771e-06, "loss": 0.6591, "step": 45415 }, { "epoch": 1.92, "grad_norm": 4.593758709757272, "learning_rate": 3.442275084278891e-06, "loss": 0.6395, "step": 45420 }, { "epoch": 1.92, "grad_norm": 5.51366980802667, "learning_rate": 3.441105274938285e-06, "loss": 0.6146, "step": 45425 }, { "epoch": 1.92, "grad_norm": 4.301590409914633, "learning_rate": 3.4399355601088735e-06, "loss": 0.6242, "step": 45430 }, { "epoch": 1.92, "grad_norm": 8.204318838542918, "learning_rate": 3.4387659398615746e-06, "loss": 0.6259, "step": 45435 }, { "epoch": 1.92, "grad_norm": 4.503210359347232, "learning_rate": 3.437596414267298e-06, "loss": 0.6417, "step": 45440 }, { "epoch": 1.92, "grad_norm": 8.75473444726216, "learning_rate": 3.436426983396947e-06, "loss": 0.677, "step": 45445 }, { "epoch": 1.92, "grad_norm": 7.024313056666088, "learning_rate": 3.4352576473214238e-06, "loss": 0.6319, "step": 45450 }, { "epoch": 1.92, "grad_norm": 4.181929513580396, "learning_rate": 3.43408840611162e-06, "loss": 0.6577, "step": 45455 }, { "epoch": 1.92, "grad_norm": 4.491345230249925, "learning_rate": 3.4329192598384236e-06, "loss": 0.6504, "step": 45460 }, { "epoch": 1.92, "grad_norm": 8.44920034533202, "learning_rate": 3.4317502085727172e-06, "loss": 0.6447, "step": 45465 }, { "epoch": 1.92, "grad_norm": 9.877218677330802, "learning_rate": 3.430581252385374e-06, "loss": 0.6111, "step": 45470 }, { "epoch": 1.92, "grad_norm": 4.661850280878438, "learning_rate": 3.4294123913472694e-06, "loss": 0.6126, "step": 45475 }, { "epoch": 1.92, "grad_norm": 9.080635964206005, "learning_rate": 3.428243625529263e-06, "loss": 0.6175, "step": 45480 }, { "epoch": 1.93, "grad_norm": 4.2005051644733005, "learning_rate": 3.4270749550022176e-06, "loss": 0.6249, "step": 45485 }, { "epoch": 1.93, "grad_norm": 4.485702976987765, "learning_rate": 3.4259063798369842e-06, "loss": 0.6747, "step": 45490 }, { "epoch": 1.93, "grad_norm": 4.399072599858354, "learning_rate": 3.4247379001044085e-06, "loss": 0.6516, "step": 45495 }, { "epoch": 1.93, "grad_norm": 4.0871058696232145, "learning_rate": 3.423569515875336e-06, "loss": 0.6362, "step": 45500 }, { "epoch": 1.93, "grad_norm": 4.857536226276318, "learning_rate": 3.4224012272205982e-06, "loss": 0.6536, "step": 45505 }, { "epoch": 1.93, "grad_norm": 6.300340740463707, "learning_rate": 3.421233034211029e-06, "loss": 0.6256, "step": 45510 }, { "epoch": 1.93, "grad_norm": 5.338711691489341, "learning_rate": 3.420064936917451e-06, "loss": 0.6338, "step": 45515 }, { "epoch": 1.93, "grad_norm": 4.272017307438853, "learning_rate": 3.4188969354106805e-06, "loss": 0.6657, "step": 45520 }, { "epoch": 1.93, "grad_norm": 5.450867501709167, "learning_rate": 3.4177290297615314e-06, "loss": 0.604, "step": 45525 }, { "epoch": 1.93, "grad_norm": 4.512853510261402, "learning_rate": 3.416561220040811e-06, "loss": 0.6549, "step": 45530 }, { "epoch": 1.93, "grad_norm": 4.36361541965981, "learning_rate": 3.41539350631932e-06, "loss": 0.6046, "step": 45535 }, { "epoch": 1.93, "grad_norm": 7.454954899309723, "learning_rate": 3.4142258886678513e-06, "loss": 0.6334, "step": 45540 }, { "epoch": 1.93, "grad_norm": 4.772077205290527, "learning_rate": 3.413058367157198e-06, "loss": 0.6318, "step": 45545 }, { "epoch": 1.93, "grad_norm": 4.503724298710208, "learning_rate": 3.4118909418581405e-06, "loss": 0.597, "step": 45550 }, { "epoch": 1.93, "grad_norm": 12.623475782035229, "learning_rate": 3.4107236128414573e-06, "loss": 0.6733, "step": 45555 }, { "epoch": 1.93, "grad_norm": 7.097557427822642, "learning_rate": 3.4095563801779206e-06, "loss": 0.6495, "step": 45560 }, { "epoch": 1.93, "grad_norm": 21.7313838759738, "learning_rate": 3.4083892439382947e-06, "loss": 0.6339, "step": 45565 }, { "epoch": 1.93, "grad_norm": 6.393503216674844, "learning_rate": 3.407222204193341e-06, "loss": 0.6165, "step": 45570 }, { "epoch": 1.93, "grad_norm": 5.440694518791359, "learning_rate": 3.4060552610138135e-06, "loss": 0.666, "step": 45575 }, { "epoch": 1.93, "grad_norm": 10.732716293410824, "learning_rate": 3.4048884144704606e-06, "loss": 0.6778, "step": 45580 }, { "epoch": 1.93, "grad_norm": 5.939427483936214, "learning_rate": 3.4037216646340253e-06, "loss": 0.6379, "step": 45585 }, { "epoch": 1.93, "grad_norm": 5.9883694612238685, "learning_rate": 3.402555011575242e-06, "loss": 0.6469, "step": 45590 }, { "epoch": 1.93, "grad_norm": 4.176765190840058, "learning_rate": 3.4013884553648445e-06, "loss": 0.6567, "step": 45595 }, { "epoch": 1.93, "grad_norm": 5.542131464514124, "learning_rate": 3.4002219960735545e-06, "loss": 0.6235, "step": 45600 }, { "epoch": 1.93, "grad_norm": 4.194349170981911, "learning_rate": 3.3990556337720935e-06, "loss": 0.6105, "step": 45605 }, { "epoch": 1.93, "grad_norm": 4.404356559462518, "learning_rate": 3.3978893685311744e-06, "loss": 0.6318, "step": 45610 }, { "epoch": 1.93, "grad_norm": 4.750007107627502, "learning_rate": 3.3967232004215017e-06, "loss": 0.6178, "step": 45615 }, { "epoch": 1.93, "grad_norm": 4.69505036307874, "learning_rate": 3.39555712951378e-06, "loss": 0.6368, "step": 45620 }, { "epoch": 1.93, "grad_norm": 4.117163220933203, "learning_rate": 3.394391155878704e-06, "loss": 0.6416, "step": 45625 }, { "epoch": 1.93, "grad_norm": 4.112462350359201, "learning_rate": 3.3932252795869615e-06, "loss": 0.674, "step": 45630 }, { "epoch": 1.93, "grad_norm": 4.132069219570511, "learning_rate": 3.392059500709238e-06, "loss": 0.6729, "step": 45635 }, { "epoch": 1.93, "grad_norm": 7.203002296058865, "learning_rate": 3.39089381931621e-06, "loss": 0.6618, "step": 45640 }, { "epoch": 1.93, "grad_norm": 4.579340664910821, "learning_rate": 3.389728235478551e-06, "loss": 0.6486, "step": 45645 }, { "epoch": 1.93, "grad_norm": 5.260551560755507, "learning_rate": 3.388562749266926e-06, "loss": 0.6392, "step": 45650 }, { "epoch": 1.93, "grad_norm": 7.766336994927626, "learning_rate": 3.387397360751994e-06, "loss": 0.6226, "step": 45655 }, { "epoch": 1.93, "grad_norm": 4.0340342118630845, "learning_rate": 3.3862320700044104e-06, "loss": 0.6266, "step": 45660 }, { "epoch": 1.93, "grad_norm": 4.095916294720115, "learning_rate": 3.3850668770948224e-06, "loss": 0.6492, "step": 45665 }, { "epoch": 1.93, "grad_norm": 4.473676532956748, "learning_rate": 3.3839017820938735e-06, "loss": 0.6551, "step": 45670 }, { "epoch": 1.93, "grad_norm": 5.131206676174092, "learning_rate": 3.3827367850722004e-06, "loss": 0.6401, "step": 45675 }, { "epoch": 1.93, "grad_norm": 7.578780078412708, "learning_rate": 3.3815718861004304e-06, "loss": 0.673, "step": 45680 }, { "epoch": 1.93, "grad_norm": 8.040065874460522, "learning_rate": 3.3804070852491895e-06, "loss": 0.6498, "step": 45685 }, { "epoch": 1.93, "grad_norm": 5.011626671836225, "learning_rate": 3.379242382589097e-06, "loss": 0.6073, "step": 45690 }, { "epoch": 1.93, "grad_norm": 11.913637730490201, "learning_rate": 3.378077778190765e-06, "loss": 0.6561, "step": 45695 }, { "epoch": 1.93, "grad_norm": 7.848933547400499, "learning_rate": 3.376913272124799e-06, "loss": 0.6157, "step": 45700 }, { "epoch": 1.93, "grad_norm": 5.492708959668048, "learning_rate": 3.3757488644618e-06, "loss": 0.6128, "step": 45705 }, { "epoch": 1.93, "grad_norm": 7.8988249480678805, "learning_rate": 3.3745845552723633e-06, "loss": 0.6337, "step": 45710 }, { "epoch": 1.93, "grad_norm": 4.567005021576079, "learning_rate": 3.3734203446270774e-06, "loss": 0.6387, "step": 45715 }, { "epoch": 1.93, "grad_norm": 4.9329980037692085, "learning_rate": 3.372256232596526e-06, "loss": 0.6169, "step": 45720 }, { "epoch": 1.94, "grad_norm": 4.269886785569335, "learning_rate": 3.3710922192512825e-06, "loss": 0.6546, "step": 45725 }, { "epoch": 1.94, "grad_norm": 6.919019736793612, "learning_rate": 3.369928304661921e-06, "loss": 0.6254, "step": 45730 }, { "epoch": 1.94, "grad_norm": 7.66442794497914, "learning_rate": 3.3687644888990033e-06, "loss": 0.638, "step": 45735 }, { "epoch": 1.94, "grad_norm": 15.493949347183731, "learning_rate": 3.367600772033091e-06, "loss": 0.6299, "step": 45740 }, { "epoch": 1.94, "grad_norm": 4.354606585995278, "learning_rate": 3.366437154134736e-06, "loss": 0.6197, "step": 45745 }, { "epoch": 1.94, "grad_norm": 6.921038050777262, "learning_rate": 3.365273635274483e-06, "loss": 0.6411, "step": 45750 }, { "epoch": 1.94, "grad_norm": 5.9344316667966535, "learning_rate": 3.3641102155228756e-06, "loss": 0.6316, "step": 45755 }, { "epoch": 1.94, "grad_norm": 4.4496752183477275, "learning_rate": 3.3629468949504456e-06, "loss": 0.6299, "step": 45760 }, { "epoch": 1.94, "grad_norm": 5.235743501385427, "learning_rate": 3.3617836736277244e-06, "loss": 0.6454, "step": 45765 }, { "epoch": 1.94, "grad_norm": 16.034535703715164, "learning_rate": 3.3606205516252346e-06, "loss": 0.647, "step": 45770 }, { "epoch": 1.94, "grad_norm": 9.108209852288871, "learning_rate": 3.359457529013489e-06, "loss": 0.6352, "step": 45775 }, { "epoch": 1.94, "grad_norm": 6.967952029220819, "learning_rate": 3.3582946058630032e-06, "loss": 0.6236, "step": 45780 }, { "epoch": 1.94, "grad_norm": 5.250529989577205, "learning_rate": 3.3571317822442783e-06, "loss": 0.6348, "step": 45785 }, { "epoch": 1.94, "grad_norm": 4.32677807080038, "learning_rate": 3.3559690582278158e-06, "loss": 0.6225, "step": 45790 }, { "epoch": 1.94, "grad_norm": 5.972028537489398, "learning_rate": 3.354806433884106e-06, "loss": 0.6627, "step": 45795 }, { "epoch": 1.94, "grad_norm": 4.7083080350903685, "learning_rate": 3.353643909283635e-06, "loss": 0.6438, "step": 45800 }, { "epoch": 1.94, "grad_norm": 11.592423877122028, "learning_rate": 3.3524814844968855e-06, "loss": 0.6853, "step": 45805 }, { "epoch": 1.94, "grad_norm": 10.427975091157588, "learning_rate": 3.3513191595943297e-06, "loss": 0.6357, "step": 45810 }, { "epoch": 1.94, "grad_norm": 9.880817789950196, "learning_rate": 3.350156934646438e-06, "loss": 0.627, "step": 45815 }, { "epoch": 1.94, "grad_norm": 12.047327347134507, "learning_rate": 3.3489948097236712e-06, "loss": 0.6876, "step": 45820 }, { "epoch": 1.94, "grad_norm": 4.44083308623586, "learning_rate": 3.347832784896485e-06, "loss": 0.6063, "step": 45825 }, { "epoch": 1.94, "grad_norm": 5.801802607179371, "learning_rate": 3.3466708602353323e-06, "loss": 0.6282, "step": 45830 }, { "epoch": 1.94, "grad_norm": 6.735548984809331, "learning_rate": 3.3455090358106536e-06, "loss": 0.5939, "step": 45835 }, { "epoch": 1.94, "grad_norm": 4.962240227191571, "learning_rate": 3.3443473116928917e-06, "loss": 0.6186, "step": 45840 }, { "epoch": 1.94, "grad_norm": 4.225505360361397, "learning_rate": 3.3431856879524715e-06, "loss": 0.6178, "step": 45845 }, { "epoch": 1.94, "grad_norm": 7.397160821929835, "learning_rate": 3.342024164659825e-06, "loss": 0.6234, "step": 45850 }, { "epoch": 1.94, "grad_norm": 4.032425510398778, "learning_rate": 3.340862741885368e-06, "loss": 0.6018, "step": 45855 }, { "epoch": 1.94, "grad_norm": 5.670062510645949, "learning_rate": 3.339701419699517e-06, "loss": 0.6264, "step": 45860 }, { "epoch": 1.94, "grad_norm": 11.067726178184103, "learning_rate": 3.3385401981726785e-06, "loss": 0.6366, "step": 45865 }, { "epoch": 1.94, "grad_norm": 4.28763534656678, "learning_rate": 3.3373790773752524e-06, "loss": 0.618, "step": 45870 }, { "epoch": 1.94, "grad_norm": 10.574772222727425, "learning_rate": 3.336218057377637e-06, "loss": 0.6456, "step": 45875 }, { "epoch": 1.94, "grad_norm": 4.214404047278807, "learning_rate": 3.3350571382502184e-06, "loss": 0.6253, "step": 45880 }, { "epoch": 1.94, "grad_norm": 10.8430885459688, "learning_rate": 3.3338963200633824e-06, "loss": 0.646, "step": 45885 }, { "epoch": 1.94, "grad_norm": 6.5060356945676965, "learning_rate": 3.332735602887505e-06, "loss": 0.6154, "step": 45890 }, { "epoch": 1.94, "grad_norm": 4.5533715988187495, "learning_rate": 3.331574986792956e-06, "loss": 0.6329, "step": 45895 }, { "epoch": 1.94, "grad_norm": 6.515450463660592, "learning_rate": 3.3304144718501017e-06, "loss": 0.6429, "step": 45900 }, { "epoch": 1.94, "grad_norm": 8.21207190775567, "learning_rate": 3.3292540581292996e-06, "loss": 0.6332, "step": 45905 }, { "epoch": 1.94, "grad_norm": 5.196766114786937, "learning_rate": 3.3280937457009033e-06, "loss": 0.6482, "step": 45910 }, { "epoch": 1.94, "grad_norm": 4.36533027224504, "learning_rate": 3.3269335346352584e-06, "loss": 0.6967, "step": 45915 }, { "epoch": 1.94, "grad_norm": 4.236632562202655, "learning_rate": 3.325773425002704e-06, "loss": 0.6276, "step": 45920 }, { "epoch": 1.94, "grad_norm": 3.9829729003756187, "learning_rate": 3.3246134168735763e-06, "loss": 0.6111, "step": 45925 }, { "epoch": 1.94, "grad_norm": 4.809042970021163, "learning_rate": 3.323453510318202e-06, "loss": 0.6435, "step": 45930 }, { "epoch": 1.94, "grad_norm": 4.290893326992605, "learning_rate": 3.3222937054069022e-06, "loss": 0.6469, "step": 45935 }, { "epoch": 1.94, "grad_norm": 7.044868727381685, "learning_rate": 3.321134002209994e-06, "loss": 0.6816, "step": 45940 }, { "epoch": 1.94, "grad_norm": 4.363964059078036, "learning_rate": 3.319974400797785e-06, "loss": 0.6354, "step": 45945 }, { "epoch": 1.94, "grad_norm": 4.494400399115878, "learning_rate": 3.3188149012405807e-06, "loss": 0.6524, "step": 45950 }, { "epoch": 1.94, "grad_norm": 4.215074240108514, "learning_rate": 3.3176555036086767e-06, "loss": 0.5911, "step": 45955 }, { "epoch": 1.95, "grad_norm": 4.883991756835394, "learning_rate": 3.3164962079723627e-06, "loss": 0.6514, "step": 45960 }, { "epoch": 1.95, "grad_norm": 10.079120046684297, "learning_rate": 3.315337014401926e-06, "loss": 0.6155, "step": 45965 }, { "epoch": 1.95, "grad_norm": 4.406521268089119, "learning_rate": 3.314177922967643e-06, "loss": 0.6617, "step": 45970 }, { "epoch": 1.95, "grad_norm": 6.9655004200961494, "learning_rate": 3.3130189337397874e-06, "loss": 0.6425, "step": 45975 }, { "epoch": 1.95, "grad_norm": 10.389588515092377, "learning_rate": 3.3118600467886246e-06, "loss": 0.6239, "step": 45980 }, { "epoch": 1.95, "grad_norm": 21.67557341637365, "learning_rate": 3.3107012621844135e-06, "loss": 0.6373, "step": 45985 }, { "epoch": 1.95, "grad_norm": 7.9094337528096395, "learning_rate": 3.3095425799974103e-06, "loss": 0.6548, "step": 45990 }, { "epoch": 1.95, "grad_norm": 4.223563564680734, "learning_rate": 3.3083840002978595e-06, "loss": 0.6191, "step": 45995 }, { "epoch": 1.95, "grad_norm": 4.430397517965408, "learning_rate": 3.307225523156008e-06, "loss": 0.642, "step": 46000 }, { "epoch": 1.95, "grad_norm": 7.396713176379317, "learning_rate": 3.306067148642083e-06, "loss": 0.641, "step": 46005 }, { "epoch": 1.95, "grad_norm": 4.514384050287509, "learning_rate": 3.3049088768263182e-06, "loss": 0.65, "step": 46010 }, { "epoch": 1.95, "grad_norm": 4.86124879340204, "learning_rate": 3.3037507077789345e-06, "loss": 0.6375, "step": 46015 }, { "epoch": 1.95, "grad_norm": 4.790318871201607, "learning_rate": 3.302592641570149e-06, "loss": 0.6196, "step": 46020 }, { "epoch": 1.95, "grad_norm": 5.821584714640911, "learning_rate": 3.3014346782701727e-06, "loss": 0.5791, "step": 46025 }, { "epoch": 1.95, "grad_norm": 4.840158443681289, "learning_rate": 3.300276817949207e-06, "loss": 0.6014, "step": 46030 }, { "epoch": 1.95, "grad_norm": 5.82757733318781, "learning_rate": 3.2991190606774527e-06, "loss": 0.6567, "step": 46035 }, { "epoch": 1.95, "grad_norm": 6.672958695752343, "learning_rate": 3.2979614065250986e-06, "loss": 0.6507, "step": 46040 }, { "epoch": 1.95, "grad_norm": 4.889145133827618, "learning_rate": 3.2968038555623317e-06, "loss": 0.6342, "step": 46045 }, { "epoch": 1.95, "grad_norm": 6.467186230525887, "learning_rate": 3.29564640785933e-06, "loss": 0.6154, "step": 46050 }, { "epoch": 1.95, "grad_norm": 4.500253827801541, "learning_rate": 3.2944890634862653e-06, "loss": 0.6428, "step": 46055 }, { "epoch": 1.95, "grad_norm": 4.839399150012095, "learning_rate": 3.293331822513306e-06, "loss": 0.6356, "step": 46060 }, { "epoch": 1.95, "grad_norm": 5.578976660232285, "learning_rate": 3.2921746850106095e-06, "loss": 0.6553, "step": 46065 }, { "epoch": 1.95, "grad_norm": 5.862085021592242, "learning_rate": 3.291017651048333e-06, "loss": 0.6477, "step": 46070 }, { "epoch": 1.95, "grad_norm": 4.458583766146514, "learning_rate": 3.2898607206966226e-06, "loss": 0.6231, "step": 46075 }, { "epoch": 1.95, "grad_norm": 4.740639885305135, "learning_rate": 3.2887038940256176e-06, "loss": 0.6436, "step": 46080 }, { "epoch": 1.95, "grad_norm": 6.760726312295329, "learning_rate": 3.2875471711054564e-06, "loss": 0.6388, "step": 46085 }, { "epoch": 1.95, "grad_norm": 4.347443661246086, "learning_rate": 3.2863905520062643e-06, "loss": 0.6423, "step": 46090 }, { "epoch": 1.95, "grad_norm": 5.4424496483677665, "learning_rate": 3.2852340367981673e-06, "loss": 0.6261, "step": 46095 }, { "epoch": 1.95, "grad_norm": 4.888106935293909, "learning_rate": 3.284077625551279e-06, "loss": 0.6441, "step": 46100 }, { "epoch": 1.95, "grad_norm": 4.888889296232943, "learning_rate": 3.2829213183357088e-06, "loss": 0.6348, "step": 46105 }, { "epoch": 1.95, "grad_norm": 6.72029219062407, "learning_rate": 3.2817651152215625e-06, "loss": 0.6307, "step": 46110 }, { "epoch": 1.95, "grad_norm": 4.326520750235513, "learning_rate": 3.2806090162789352e-06, "loss": 0.6721, "step": 46115 }, { "epoch": 1.95, "grad_norm": 6.687512741651495, "learning_rate": 3.279453021577919e-06, "loss": 0.6616, "step": 46120 }, { "epoch": 1.95, "grad_norm": 8.069785976189747, "learning_rate": 3.2782971311885985e-06, "loss": 0.6561, "step": 46125 }, { "epoch": 1.95, "grad_norm": 5.284538894788858, "learning_rate": 3.2771413451810506e-06, "loss": 0.5985, "step": 46130 }, { "epoch": 1.95, "grad_norm": 5.755525355905471, "learning_rate": 3.275985663625349e-06, "loss": 0.5981, "step": 46135 }, { "epoch": 1.95, "grad_norm": 4.408298168423517, "learning_rate": 3.2748300865915573e-06, "loss": 0.6329, "step": 46140 }, { "epoch": 1.95, "grad_norm": 4.266761951064814, "learning_rate": 3.2736746141497365e-06, "loss": 0.5762, "step": 46145 }, { "epoch": 1.95, "grad_norm": 5.114212000773811, "learning_rate": 3.2725192463699397e-06, "loss": 0.5831, "step": 46150 }, { "epoch": 1.95, "grad_norm": 4.592012214619939, "learning_rate": 3.2713639833222105e-06, "loss": 0.6433, "step": 46155 }, { "epoch": 1.95, "grad_norm": 6.065914483302372, "learning_rate": 3.270208825076594e-06, "loss": 0.648, "step": 46160 }, { "epoch": 1.95, "grad_norm": 4.251480447530903, "learning_rate": 3.2690537717031194e-06, "loss": 0.6444, "step": 46165 }, { "epoch": 1.95, "grad_norm": 4.252766212017174, "learning_rate": 3.267898823271817e-06, "loss": 0.6661, "step": 46170 }, { "epoch": 1.95, "grad_norm": 5.5036786748066415, "learning_rate": 3.2667439798527046e-06, "loss": 0.6909, "step": 46175 }, { "epoch": 1.95, "grad_norm": 4.414793603928873, "learning_rate": 3.2655892415158007e-06, "loss": 0.6533, "step": 46180 }, { "epoch": 1.95, "grad_norm": 4.205093549194959, "learning_rate": 3.2644346083311113e-06, "loss": 0.6648, "step": 46185 }, { "epoch": 1.95, "grad_norm": 6.246429880134112, "learning_rate": 3.2632800803686405e-06, "loss": 0.6459, "step": 46190 }, { "epoch": 1.96, "grad_norm": 4.201117143664794, "learning_rate": 3.2621256576983823e-06, "loss": 0.6116, "step": 46195 }, { "epoch": 1.96, "grad_norm": 5.996890794660394, "learning_rate": 3.2609713403903255e-06, "loss": 0.6677, "step": 46200 }, { "epoch": 1.96, "grad_norm": 5.136461825442796, "learning_rate": 3.259817128514455e-06, "loss": 0.6279, "step": 46205 }, { "epoch": 1.96, "grad_norm": 5.54420595395502, "learning_rate": 3.2586630221407457e-06, "loss": 0.6056, "step": 46210 }, { "epoch": 1.96, "grad_norm": 4.180766402213014, "learning_rate": 3.257509021339168e-06, "loss": 0.6007, "step": 46215 }, { "epoch": 1.96, "grad_norm": 4.74045489944561, "learning_rate": 3.2563551261796865e-06, "loss": 0.6278, "step": 46220 }, { "epoch": 1.96, "grad_norm": 5.837661691052112, "learning_rate": 3.255201336732256e-06, "loss": 0.6158, "step": 46225 }, { "epoch": 1.96, "grad_norm": 4.538245831689887, "learning_rate": 3.25404765306683e-06, "loss": 0.6305, "step": 46230 }, { "epoch": 1.96, "grad_norm": 5.759702231750561, "learning_rate": 3.2528940752533522e-06, "loss": 0.6457, "step": 46235 }, { "epoch": 1.96, "grad_norm": 5.126159591537414, "learning_rate": 3.2517406033617595e-06, "loss": 0.6193, "step": 46240 }, { "epoch": 1.96, "grad_norm": 4.2848869354476005, "learning_rate": 3.250587237461985e-06, "loss": 0.6235, "step": 46245 }, { "epoch": 1.96, "grad_norm": 4.889217353836298, "learning_rate": 3.2494339776239525e-06, "loss": 0.6515, "step": 46250 }, { "epoch": 1.96, "grad_norm": 4.5114275920757425, "learning_rate": 3.2482808239175826e-06, "loss": 0.6171, "step": 46255 }, { "epoch": 1.96, "grad_norm": 5.991292830232938, "learning_rate": 3.247127776412786e-06, "loss": 0.6414, "step": 46260 }, { "epoch": 1.96, "grad_norm": 4.977066022495548, "learning_rate": 3.2459748351794683e-06, "loss": 0.6235, "step": 46265 }, { "epoch": 1.96, "grad_norm": 10.262167364400819, "learning_rate": 3.2448220002875313e-06, "loss": 0.6562, "step": 46270 }, { "epoch": 1.96, "grad_norm": 6.655723918808195, "learning_rate": 3.2436692718068654e-06, "loss": 0.6272, "step": 46275 }, { "epoch": 1.96, "grad_norm": 9.384576239066247, "learning_rate": 3.242516649807358e-06, "loss": 0.6388, "step": 46280 }, { "epoch": 1.96, "grad_norm": 4.608291571213547, "learning_rate": 3.2413641343588897e-06, "loss": 0.6475, "step": 46285 }, { "epoch": 1.96, "grad_norm": 4.7114517458120035, "learning_rate": 3.2402117255313327e-06, "loss": 0.636, "step": 46290 }, { "epoch": 1.96, "grad_norm": 6.264695212115154, "learning_rate": 3.2390594233945568e-06, "loss": 0.6274, "step": 46295 }, { "epoch": 1.96, "grad_norm": 4.172386351747783, "learning_rate": 3.2379072280184195e-06, "loss": 0.6317, "step": 46300 }, { "epoch": 1.96, "grad_norm": 4.613526803223688, "learning_rate": 3.236755139472777e-06, "loss": 0.6202, "step": 46305 }, { "epoch": 1.96, "grad_norm": 6.651683753453762, "learning_rate": 3.235603157827477e-06, "loss": 0.6415, "step": 46310 }, { "epoch": 1.96, "grad_norm": 4.0517205975773445, "learning_rate": 3.23445128315236e-06, "loss": 0.6424, "step": 46315 }, { "epoch": 1.96, "grad_norm": 7.649946827934103, "learning_rate": 3.233299515517262e-06, "loss": 0.6412, "step": 46320 }, { "epoch": 1.96, "grad_norm": 4.261054805422806, "learning_rate": 3.2321478549920104e-06, "loss": 0.651, "step": 46325 }, { "epoch": 1.96, "grad_norm": 4.7366286658296595, "learning_rate": 3.230996301646427e-06, "loss": 0.6163, "step": 46330 }, { "epoch": 1.96, "grad_norm": 6.2007178344769285, "learning_rate": 3.2298448555503255e-06, "loss": 0.6251, "step": 46335 }, { "epoch": 1.96, "grad_norm": 4.486332633224456, "learning_rate": 3.228693516773517e-06, "loss": 0.596, "step": 46340 }, { "epoch": 1.96, "grad_norm": 4.055566125001305, "learning_rate": 3.2275422853858027e-06, "loss": 0.662, "step": 46345 }, { "epoch": 1.96, "grad_norm": 4.657242884892401, "learning_rate": 3.226391161456979e-06, "loss": 0.601, "step": 46350 }, { "epoch": 1.96, "grad_norm": 4.235320775098148, "learning_rate": 3.2252401450568354e-06, "loss": 0.6186, "step": 46355 }, { "epoch": 1.96, "grad_norm": 4.699293184880735, "learning_rate": 3.224089236255153e-06, "loss": 0.644, "step": 46360 }, { "epoch": 1.96, "grad_norm": 7.835230626583173, "learning_rate": 3.2229384351217096e-06, "loss": 0.6231, "step": 46365 }, { "epoch": 1.96, "grad_norm": 8.280775743768032, "learning_rate": 3.2217877417262745e-06, "loss": 0.6559, "step": 46370 }, { "epoch": 1.96, "grad_norm": 5.055690437459158, "learning_rate": 3.220637156138611e-06, "loss": 0.6385, "step": 46375 }, { "epoch": 1.96, "grad_norm": 5.225587105394276, "learning_rate": 3.219486678428475e-06, "loss": 0.6543, "step": 46380 }, { "epoch": 1.96, "grad_norm": 6.991628963972974, "learning_rate": 3.2183363086656164e-06, "loss": 0.6144, "step": 46385 }, { "epoch": 1.96, "grad_norm": 5.120547908226655, "learning_rate": 3.2171860469197802e-06, "loss": 0.6511, "step": 46390 }, { "epoch": 1.96, "grad_norm": 8.24200400520017, "learning_rate": 3.2160358932607018e-06, "loss": 0.6215, "step": 46395 }, { "epoch": 1.96, "grad_norm": 4.511490936426109, "learning_rate": 3.2148858477581135e-06, "loss": 0.6366, "step": 46400 }, { "epoch": 1.96, "grad_norm": 4.18077413080865, "learning_rate": 3.213735910481738e-06, "loss": 0.6431, "step": 46405 }, { "epoch": 1.96, "grad_norm": 6.568808169848295, "learning_rate": 3.2125860815012912e-06, "loss": 0.6414, "step": 46410 }, { "epoch": 1.96, "grad_norm": 4.480480991063598, "learning_rate": 3.211436360886486e-06, "loss": 0.6051, "step": 46415 }, { "epoch": 1.96, "grad_norm": 5.687428771859702, "learning_rate": 3.2102867487070254e-06, "loss": 0.6195, "step": 46420 }, { "epoch": 1.96, "grad_norm": 10.440458962487813, "learning_rate": 3.2091372450326085e-06, "loss": 0.625, "step": 46425 }, { "epoch": 1.96, "grad_norm": 5.138163854485248, "learning_rate": 3.2079878499329254e-06, "loss": 0.6189, "step": 46430 }, { "epoch": 1.97, "grad_norm": 4.332813901169031, "learning_rate": 3.2068385634776594e-06, "loss": 0.6063, "step": 46435 }, { "epoch": 1.97, "grad_norm": 5.111931666593924, "learning_rate": 3.20568938573649e-06, "loss": 0.6404, "step": 46440 }, { "epoch": 1.97, "grad_norm": 12.894188818141188, "learning_rate": 3.2045403167790866e-06, "loss": 0.6378, "step": 46445 }, { "epoch": 1.97, "grad_norm": 11.732185759794607, "learning_rate": 3.2033913566751175e-06, "loss": 0.6172, "step": 46450 }, { "epoch": 1.97, "grad_norm": 5.723825270650447, "learning_rate": 3.202242505494237e-06, "loss": 0.6224, "step": 46455 }, { "epoch": 1.97, "grad_norm": 8.039691356704054, "learning_rate": 3.2010937633060966e-06, "loss": 0.6371, "step": 46460 }, { "epoch": 1.97, "grad_norm": 7.334629584991646, "learning_rate": 3.199945130180345e-06, "loss": 0.6262, "step": 46465 }, { "epoch": 1.97, "grad_norm": 7.8948631683887385, "learning_rate": 3.198796606186616e-06, "loss": 0.6206, "step": 46470 }, { "epoch": 1.97, "grad_norm": 7.265846080712185, "learning_rate": 3.197648191394544e-06, "loss": 0.6473, "step": 46475 }, { "epoch": 1.97, "grad_norm": 7.16205564117438, "learning_rate": 3.1964998858737536e-06, "loss": 0.5934, "step": 46480 }, { "epoch": 1.97, "grad_norm": 4.0936364925505995, "learning_rate": 3.195351689693863e-06, "loss": 0.6069, "step": 46485 }, { "epoch": 1.97, "grad_norm": 9.474963773750005, "learning_rate": 3.1942036029244827e-06, "loss": 0.6083, "step": 46490 }, { "epoch": 1.97, "grad_norm": 4.922126300218031, "learning_rate": 3.193055625635219e-06, "loss": 0.6358, "step": 46495 }, { "epoch": 1.97, "grad_norm": 4.261451480397849, "learning_rate": 3.1919077578956704e-06, "loss": 0.6325, "step": 46500 }, { "epoch": 1.97, "grad_norm": 4.8411999635717295, "learning_rate": 3.190759999775428e-06, "loss": 0.5961, "step": 46505 }, { "epoch": 1.97, "grad_norm": 11.582305485153292, "learning_rate": 3.189612351344078e-06, "loss": 0.6252, "step": 46510 }, { "epoch": 1.97, "grad_norm": 4.634585402936844, "learning_rate": 3.188464812671198e-06, "loss": 0.6465, "step": 46515 }, { "epoch": 1.97, "grad_norm": 5.522395089026834, "learning_rate": 3.1873173838263603e-06, "loss": 0.6153, "step": 46520 }, { "epoch": 1.97, "grad_norm": 4.254912837329435, "learning_rate": 3.186170064879131e-06, "loss": 0.62, "step": 46525 }, { "epoch": 1.97, "grad_norm": 4.1235950846996134, "learning_rate": 3.185022855899067e-06, "loss": 0.6173, "step": 46530 }, { "epoch": 1.97, "grad_norm": 3.9879173494100844, "learning_rate": 3.183875756955721e-06, "loss": 0.6285, "step": 46535 }, { "epoch": 1.97, "grad_norm": 5.057685357645161, "learning_rate": 3.182728768118639e-06, "loss": 0.6052, "step": 46540 }, { "epoch": 1.97, "grad_norm": 4.438452022472454, "learning_rate": 3.181581889457358e-06, "loss": 0.6082, "step": 46545 }, { "epoch": 1.97, "grad_norm": 4.211037911354311, "learning_rate": 3.1804351210414118e-06, "loss": 0.6132, "step": 46550 }, { "epoch": 1.97, "grad_norm": 5.417088172029665, "learning_rate": 3.179288462940323e-06, "loss": 0.6437, "step": 46555 }, { "epoch": 1.97, "grad_norm": 4.34058702002519, "learning_rate": 3.1781419152236135e-06, "loss": 0.6434, "step": 46560 }, { "epoch": 1.97, "grad_norm": 6.1262098205843145, "learning_rate": 3.1769954779607927e-06, "loss": 0.5811, "step": 46565 }, { "epoch": 1.97, "grad_norm": 6.593190620281481, "learning_rate": 3.175849151221365e-06, "loss": 0.6192, "step": 46570 }, { "epoch": 1.97, "grad_norm": 7.50319941357607, "learning_rate": 3.1747029350748316e-06, "loss": 0.6816, "step": 46575 }, { "epoch": 1.97, "grad_norm": 7.145322094430233, "learning_rate": 3.1735568295906805e-06, "loss": 0.6226, "step": 46580 }, { "epoch": 1.97, "grad_norm": 8.430310278295654, "learning_rate": 3.172410834838401e-06, "loss": 0.6448, "step": 46585 }, { "epoch": 1.97, "grad_norm": 8.479118272701115, "learning_rate": 3.1712649508874688e-06, "loss": 0.6426, "step": 46590 }, { "epoch": 1.97, "grad_norm": 5.027445150246773, "learning_rate": 3.170119177807355e-06, "loss": 0.6086, "step": 46595 }, { "epoch": 1.97, "grad_norm": 12.2505219023973, "learning_rate": 3.168973515667526e-06, "loss": 0.6211, "step": 46600 }, { "epoch": 1.97, "grad_norm": 8.601870945224483, "learning_rate": 3.167827964537439e-06, "loss": 0.6435, "step": 46605 }, { "epoch": 1.97, "grad_norm": 8.981888257153422, "learning_rate": 3.166682524486546e-06, "loss": 0.6527, "step": 46610 }, { "epoch": 1.97, "grad_norm": 4.741489578123719, "learning_rate": 3.1655371955842917e-06, "loss": 0.6023, "step": 46615 }, { "epoch": 1.97, "grad_norm": 16.638431468401823, "learning_rate": 3.1643919779001133e-06, "loss": 0.621, "step": 46620 }, { "epoch": 1.97, "grad_norm": 4.896211754000867, "learning_rate": 3.1632468715034424e-06, "loss": 0.5929, "step": 46625 }, { "epoch": 1.97, "grad_norm": 4.595735014594119, "learning_rate": 3.1621018764637025e-06, "loss": 0.6311, "step": 46630 }, { "epoch": 1.97, "grad_norm": 4.558233563644817, "learning_rate": 3.1609569928503124e-06, "loss": 0.6301, "step": 46635 }, { "epoch": 1.97, "grad_norm": 4.35762206616698, "learning_rate": 3.1598122207326844e-06, "loss": 0.6305, "step": 46640 }, { "epoch": 1.97, "grad_norm": 4.125061682513744, "learning_rate": 3.1586675601802185e-06, "loss": 0.6185, "step": 46645 }, { "epoch": 1.97, "grad_norm": 4.6341609068490595, "learning_rate": 3.157523011262318e-06, "loss": 0.5991, "step": 46650 }, { "epoch": 1.97, "grad_norm": 4.888579897394806, "learning_rate": 3.1563785740483677e-06, "loss": 0.6515, "step": 46655 }, { "epoch": 1.97, "grad_norm": 11.847300158494878, "learning_rate": 3.1552342486077546e-06, "loss": 0.6052, "step": 46660 }, { "epoch": 1.97, "grad_norm": 4.763821469586007, "learning_rate": 3.154090035009854e-06, "loss": 0.6205, "step": 46665 }, { "epoch": 1.98, "grad_norm": 10.162321763741094, "learning_rate": 3.152945933324039e-06, "loss": 0.5883, "step": 46670 }, { "epoch": 1.98, "grad_norm": 4.387937504583639, "learning_rate": 3.151801943619669e-06, "loss": 0.6429, "step": 46675 }, { "epoch": 1.98, "grad_norm": 4.684205240756767, "learning_rate": 3.1506580659661046e-06, "loss": 0.6434, "step": 46680 }, { "epoch": 1.98, "grad_norm": 12.421610626633672, "learning_rate": 3.1495143004326944e-06, "loss": 0.6181, "step": 46685 }, { "epoch": 1.98, "grad_norm": 4.429681014119205, "learning_rate": 3.148370647088781e-06, "loss": 0.616, "step": 46690 }, { "epoch": 1.98, "grad_norm": 8.777285286288175, "learning_rate": 3.147227106003701e-06, "loss": 0.6412, "step": 46695 }, { "epoch": 1.98, "grad_norm": 12.826352553655871, "learning_rate": 3.146083677246784e-06, "loss": 0.6221, "step": 46700 }, { "epoch": 1.98, "grad_norm": 11.31903637921935, "learning_rate": 3.144940360887353e-06, "loss": 0.5993, "step": 46705 }, { "epoch": 1.98, "grad_norm": 8.270590648377889, "learning_rate": 3.143797156994724e-06, "loss": 0.6465, "step": 46710 }, { "epoch": 1.98, "grad_norm": 9.339541665265655, "learning_rate": 3.1426540656382047e-06, "loss": 0.6034, "step": 46715 }, { "epoch": 1.98, "grad_norm": 8.945666816275763, "learning_rate": 3.1415110868870994e-06, "loss": 0.6473, "step": 46720 }, { "epoch": 1.98, "grad_norm": 8.212152578476717, "learning_rate": 3.1403682208107015e-06, "loss": 0.5978, "step": 46725 }, { "epoch": 1.98, "grad_norm": 9.036458940961808, "learning_rate": 3.139225467478302e-06, "loss": 0.654, "step": 46730 }, { "epoch": 1.98, "grad_norm": 8.971596171819852, "learning_rate": 3.138082826959181e-06, "loss": 0.6805, "step": 46735 }, { "epoch": 1.98, "grad_norm": 4.925432467243013, "learning_rate": 3.136940299322613e-06, "loss": 0.5843, "step": 46740 }, { "epoch": 1.98, "grad_norm": 10.761712516313626, "learning_rate": 3.135797884637867e-06, "loss": 0.637, "step": 46745 }, { "epoch": 1.98, "grad_norm": 7.491754094052424, "learning_rate": 3.134655582974204e-06, "loss": 0.6178, "step": 46750 }, { "epoch": 1.98, "grad_norm": 8.868578846769974, "learning_rate": 3.133513394400879e-06, "loss": 0.6232, "step": 46755 }, { "epoch": 1.98, "grad_norm": 4.225187227322583, "learning_rate": 3.1323713189871396e-06, "loss": 0.6425, "step": 46760 }, { "epoch": 1.98, "grad_norm": 4.440747971591411, "learning_rate": 3.131229356802224e-06, "loss": 0.6294, "step": 46765 }, { "epoch": 1.98, "grad_norm": 4.634832900460518, "learning_rate": 3.130087507915369e-06, "loss": 0.6102, "step": 46770 }, { "epoch": 1.98, "grad_norm": 4.800312612927686, "learning_rate": 3.1289457723958005e-06, "loss": 0.6147, "step": 46775 }, { "epoch": 1.98, "grad_norm": 8.16424808361495, "learning_rate": 3.1278041503127376e-06, "loss": 0.6233, "step": 46780 }, { "epoch": 1.98, "grad_norm": 4.263129373261356, "learning_rate": 3.1266626417353957e-06, "loss": 0.6241, "step": 46785 }, { "epoch": 1.98, "grad_norm": 12.457306859331743, "learning_rate": 3.125521246732978e-06, "loss": 0.6176, "step": 46790 }, { "epoch": 1.98, "grad_norm": 5.4080910944801275, "learning_rate": 3.1243799653746877e-06, "loss": 0.6343, "step": 46795 }, { "epoch": 1.98, "grad_norm": 8.777589661068728, "learning_rate": 3.1232387977297143e-06, "loss": 0.6109, "step": 46800 }, { "epoch": 1.98, "grad_norm": 4.32213921654426, "learning_rate": 3.1220977438672433e-06, "loss": 0.6375, "step": 46805 }, { "epoch": 1.98, "grad_norm": 7.580621833082404, "learning_rate": 3.1209568038564574e-06, "loss": 0.6875, "step": 46810 }, { "epoch": 1.98, "grad_norm": 4.604946762870719, "learning_rate": 3.1198159777665237e-06, "loss": 0.6364, "step": 46815 }, { "epoch": 1.98, "grad_norm": 4.283053467752125, "learning_rate": 3.1186752656666097e-06, "loss": 0.6329, "step": 46820 }, { "epoch": 1.98, "grad_norm": 4.094788332827578, "learning_rate": 3.1175346676258712e-06, "loss": 0.6542, "step": 46825 }, { "epoch": 1.98, "grad_norm": 4.40663322536381, "learning_rate": 3.116394183713462e-06, "loss": 0.6215, "step": 46830 }, { "epoch": 1.98, "grad_norm": 6.025858793450196, "learning_rate": 3.1152538139985245e-06, "loss": 0.6319, "step": 46835 }, { "epoch": 1.98, "grad_norm": 9.977742969996442, "learning_rate": 3.114113558550197e-06, "loss": 0.6087, "step": 46840 }, { "epoch": 1.98, "grad_norm": 5.727026109270043, "learning_rate": 3.112973417437609e-06, "loss": 0.5802, "step": 46845 }, { "epoch": 1.98, "grad_norm": 5.11985387639585, "learning_rate": 3.1118333907298843e-06, "loss": 0.6445, "step": 46850 }, { "epoch": 1.98, "grad_norm": 13.697498840480531, "learning_rate": 3.1106934784961394e-06, "loss": 0.599, "step": 46855 }, { "epoch": 1.98, "grad_norm": 5.421122232118819, "learning_rate": 3.109553680805484e-06, "loss": 0.6134, "step": 46860 }, { "epoch": 1.98, "grad_norm": 4.2473322106608675, "learning_rate": 3.1084139977270207e-06, "loss": 0.6099, "step": 46865 }, { "epoch": 1.98, "grad_norm": 6.245696152850468, "learning_rate": 3.1072744293298446e-06, "loss": 0.6463, "step": 46870 }, { "epoch": 1.98, "grad_norm": 12.45783626369971, "learning_rate": 3.1061349756830437e-06, "loss": 0.597, "step": 46875 }, { "epoch": 1.98, "grad_norm": 7.461279453089836, "learning_rate": 3.104995636855702e-06, "loss": 0.6219, "step": 46880 }, { "epoch": 1.98, "grad_norm": 4.676007299294277, "learning_rate": 3.103856412916892e-06, "loss": 0.6447, "step": 46885 }, { "epoch": 1.98, "grad_norm": 5.2950696905504415, "learning_rate": 3.1027173039356827e-06, "loss": 0.6514, "step": 46890 }, { "epoch": 1.98, "grad_norm": 7.398479795081675, "learning_rate": 3.1015783099811357e-06, "loss": 0.6138, "step": 46895 }, { "epoch": 1.98, "grad_norm": 5.044479169464052, "learning_rate": 3.100439431122302e-06, "loss": 0.6054, "step": 46900 }, { "epoch": 1.99, "grad_norm": 11.804052362360174, "learning_rate": 3.0993006674282316e-06, "loss": 0.6449, "step": 46905 }, { "epoch": 1.99, "grad_norm": 10.975069219231203, "learning_rate": 3.0981620189679616e-06, "loss": 0.6155, "step": 46910 }, { "epoch": 1.99, "grad_norm": 4.229553714779008, "learning_rate": 3.0970234858105276e-06, "loss": 0.6426, "step": 46915 }, { "epoch": 1.99, "grad_norm": 12.128456260333069, "learning_rate": 3.0958850680249548e-06, "loss": 0.6324, "step": 46920 }, { "epoch": 1.99, "grad_norm": 5.458938698591031, "learning_rate": 3.0947467656802597e-06, "loss": 0.6175, "step": 46925 }, { "epoch": 1.99, "grad_norm": 8.15558306401921, "learning_rate": 3.0936085788454574e-06, "loss": 0.6006, "step": 46930 }, { "epoch": 1.99, "grad_norm": 9.598198601089736, "learning_rate": 3.0924705075895513e-06, "loss": 0.6499, "step": 46935 }, { "epoch": 1.99, "grad_norm": 4.767678372819063, "learning_rate": 3.0913325519815396e-06, "loss": 0.6308, "step": 46940 }, { "epoch": 1.99, "grad_norm": 5.900721759057035, "learning_rate": 3.090194712090413e-06, "loss": 0.6418, "step": 46945 }, { "epoch": 1.99, "grad_norm": 8.82450697601192, "learning_rate": 3.089056987985155e-06, "loss": 0.6155, "step": 46950 }, { "epoch": 1.99, "grad_norm": 4.728015496275644, "learning_rate": 3.0879193797347433e-06, "loss": 0.6167, "step": 46955 }, { "epoch": 1.99, "grad_norm": 4.742949349784359, "learning_rate": 3.086781887408147e-06, "loss": 0.6079, "step": 46960 }, { "epoch": 1.99, "grad_norm": 4.5879537730708915, "learning_rate": 3.0856445110743305e-06, "loss": 0.6121, "step": 46965 }, { "epoch": 1.99, "grad_norm": 4.3228519702352, "learning_rate": 3.0845072508022487e-06, "loss": 0.6126, "step": 46970 }, { "epoch": 1.99, "grad_norm": 8.6610079079695, "learning_rate": 3.083370106660849e-06, "loss": 0.6267, "step": 46975 }, { "epoch": 1.99, "grad_norm": 8.529583406288785, "learning_rate": 3.0822330787190736e-06, "loss": 0.6532, "step": 46980 }, { "epoch": 1.99, "grad_norm": 6.686243257580459, "learning_rate": 3.081096167045858e-06, "loss": 0.618, "step": 46985 }, { "epoch": 1.99, "grad_norm": 7.865513355628833, "learning_rate": 3.0799593717101307e-06, "loss": 0.6138, "step": 46990 }, { "epoch": 1.99, "grad_norm": 10.96422710705633, "learning_rate": 3.078822692780809e-06, "loss": 0.611, "step": 46995 }, { "epoch": 1.99, "grad_norm": 8.794855510431477, "learning_rate": 3.0776861303268104e-06, "loss": 0.6108, "step": 47000 }, { "epoch": 1.99, "grad_norm": 6.947990803639829, "learning_rate": 3.0765496844170374e-06, "loss": 0.6406, "step": 47005 }, { "epoch": 1.99, "grad_norm": 6.211926810267644, "learning_rate": 3.0754133551203923e-06, "loss": 0.649, "step": 47010 }, { "epoch": 1.99, "grad_norm": 5.8510144909303285, "learning_rate": 3.074277142505767e-06, "loss": 0.6171, "step": 47015 }, { "epoch": 1.99, "grad_norm": 5.4852303674809, "learning_rate": 3.073141046642045e-06, "loss": 0.6159, "step": 47020 }, { "epoch": 1.99, "grad_norm": 4.810037219446173, "learning_rate": 3.0720050675981063e-06, "loss": 0.6219, "step": 47025 }, { "epoch": 1.99, "grad_norm": 12.041243225205607, "learning_rate": 3.07086920544282e-06, "loss": 0.6427, "step": 47030 }, { "epoch": 1.99, "grad_norm": 4.929232061116423, "learning_rate": 3.069733460245053e-06, "loss": 0.6298, "step": 47035 }, { "epoch": 1.99, "grad_norm": 8.549853995408155, "learning_rate": 3.0685978320736604e-06, "loss": 0.6161, "step": 47040 }, { "epoch": 1.99, "grad_norm": 5.801933256377474, "learning_rate": 3.067462320997491e-06, "loss": 0.6165, "step": 47045 }, { "epoch": 1.99, "grad_norm": 11.999573797118748, "learning_rate": 3.066326927085389e-06, "loss": 0.6374, "step": 47050 }, { "epoch": 1.99, "grad_norm": 6.627220885243319, "learning_rate": 3.0651916504061895e-06, "loss": 0.6213, "step": 47055 }, { "epoch": 1.99, "grad_norm": 5.775374496663006, "learning_rate": 3.064056491028722e-06, "loss": 0.6486, "step": 47060 }, { "epoch": 1.99, "grad_norm": 4.979715372933483, "learning_rate": 3.062921449021806e-06, "loss": 0.6071, "step": 47065 }, { "epoch": 1.99, "grad_norm": 5.297321311676706, "learning_rate": 3.0617865244542565e-06, "loss": 0.6069, "step": 47070 }, { "epoch": 1.99, "grad_norm": 11.398372254633383, "learning_rate": 3.0606517173948824e-06, "loss": 0.5869, "step": 47075 }, { "epoch": 1.99, "grad_norm": 8.213627464121108, "learning_rate": 3.0595170279124808e-06, "loss": 0.5774, "step": 47080 }, { "epoch": 1.99, "grad_norm": 7.847612780284359, "learning_rate": 3.0583824560758456e-06, "loss": 0.6361, "step": 47085 }, { "epoch": 1.99, "grad_norm": 5.230833664308463, "learning_rate": 3.0572480019537643e-06, "loss": 0.6126, "step": 47090 }, { "epoch": 1.99, "grad_norm": 4.87824339017857, "learning_rate": 3.0561136656150126e-06, "loss": 0.6206, "step": 47095 }, { "epoch": 1.99, "grad_norm": 6.034740510501605, "learning_rate": 3.0549794471283645e-06, "loss": 0.6209, "step": 47100 }, { "epoch": 1.99, "grad_norm": 4.2982236032587195, "learning_rate": 3.0538453465625826e-06, "loss": 0.6171, "step": 47105 }, { "epoch": 1.99, "grad_norm": 6.429817191328955, "learning_rate": 3.0527113639864248e-06, "loss": 0.6423, "step": 47110 }, { "epoch": 1.99, "grad_norm": 5.750921463542665, "learning_rate": 3.051577499468642e-06, "loss": 0.6386, "step": 47115 }, { "epoch": 1.99, "grad_norm": 4.561544068931166, "learning_rate": 3.0504437530779747e-06, "loss": 0.6201, "step": 47120 }, { "epoch": 1.99, "grad_norm": 4.249087963466114, "learning_rate": 3.0493101248831607e-06, "loss": 0.6402, "step": 47125 }, { "epoch": 1.99, "grad_norm": 4.099603054673989, "learning_rate": 3.0481766149529286e-06, "loss": 0.6406, "step": 47130 }, { "epoch": 1.99, "grad_norm": 4.376779125790554, "learning_rate": 3.047043223356e-06, "loss": 0.6189, "step": 47135 }, { "epoch": 2.0, "grad_norm": 4.488704818790552, "learning_rate": 3.0459099501610856e-06, "loss": 0.5909, "step": 47140 }, { "epoch": 2.0, "grad_norm": 10.966580928447296, "learning_rate": 3.044776795436897e-06, "loss": 0.5981, "step": 47145 }, { "epoch": 2.0, "grad_norm": 4.322300020749453, "learning_rate": 3.0436437592521308e-06, "loss": 0.5779, "step": 47150 }, { "epoch": 2.0, "grad_norm": 5.84558432774676, "learning_rate": 3.04251084167548e-06, "loss": 0.5873, "step": 47155 }, { "epoch": 2.0, "grad_norm": 4.497170623213144, "learning_rate": 3.0413780427756323e-06, "loss": 0.6044, "step": 47160 }, { "epoch": 2.0, "grad_norm": 4.704093094519641, "learning_rate": 3.0402453626212637e-06, "loss": 0.6012, "step": 47165 }, { "epoch": 2.0, "grad_norm": 7.978748573025106, "learning_rate": 3.039112801281047e-06, "loss": 0.5737, "step": 47170 }, { "epoch": 2.0, "grad_norm": 4.53526559971311, "learning_rate": 3.0379803588236443e-06, "loss": 0.6037, "step": 47175 }, { "epoch": 2.0, "grad_norm": 3.954032451193737, "learning_rate": 3.0368480353177133e-06, "loss": 0.5919, "step": 47180 }, { "epoch": 2.0, "grad_norm": 4.161137178889893, "learning_rate": 3.035715830831904e-06, "loss": 0.6486, "step": 47185 }, { "epoch": 2.0, "grad_norm": 4.82406453908264, "learning_rate": 3.034583745434857e-06, "loss": 0.6545, "step": 47190 }, { "epoch": 2.0, "grad_norm": 4.232416342902947, "learning_rate": 3.033451779195209e-06, "loss": 0.6171, "step": 47195 }, { "epoch": 2.0, "grad_norm": 6.53915098172456, "learning_rate": 3.0323199321815866e-06, "loss": 0.6451, "step": 47200 }, { "epoch": 2.0, "grad_norm": 4.242573088464062, "learning_rate": 3.0311882044626094e-06, "loss": 0.6336, "step": 47205 }, { "epoch": 2.0, "grad_norm": 4.681813637211751, "learning_rate": 3.030056596106894e-06, "loss": 0.6199, "step": 47210 }, { "epoch": 2.0, "grad_norm": 4.287132884725201, "learning_rate": 3.0289251071830434e-06, "loss": 0.6114, "step": 47215 }, { "epoch": 2.0, "grad_norm": 7.937589762642504, "learning_rate": 3.027793737759658e-06, "loss": 0.6609, "step": 47220 }, { "epoch": 2.0, "grad_norm": 5.1908081433656745, "learning_rate": 3.0266624879053297e-06, "loss": 0.6242, "step": 47225 }, { "epoch": 2.0, "grad_norm": 4.440861334593197, "learning_rate": 3.02553135768864e-06, "loss": 0.6008, "step": 47230 }, { "epoch": 2.0, "grad_norm": 5.804001209664816, "learning_rate": 3.0244003471781703e-06, "loss": 0.651, "step": 47235 }, { "epoch": 2.0, "grad_norm": 6.856443484067078, "learning_rate": 3.023269456442487e-06, "loss": 0.5801, "step": 47240 }, { "epoch": 2.0, "grad_norm": 4.520627380087969, "learning_rate": 3.0221386855501545e-06, "loss": 0.6238, "step": 47245 }, { "epoch": 2.0, "grad_norm": 4.76037385146742, "learning_rate": 3.021008034569727e-06, "loss": 0.6219, "step": 47250 }, { "epoch": 2.0, "grad_norm": 4.448442979736266, "learning_rate": 3.0198775035697532e-06, "loss": 0.6403, "step": 47255 }, { "epoch": 2.0, "eval_loss": 0.8787976503372192, "eval_runtime": 36.9564, "eval_samples_per_second": 31.632, "eval_steps_per_second": 3.978, "step": 47257 }, { "epoch": 2.0, "grad_norm": 5.406558505351557, "learning_rate": 3.0187470926187747e-06, "loss": 0.4693, "step": 47260 }, { "epoch": 2.0, "grad_norm": 3.7125083707601454, "learning_rate": 3.017616801785322e-06, "loss": 0.3843, "step": 47265 }, { "epoch": 2.0, "grad_norm": 5.479499005769949, "learning_rate": 3.0164866311379246e-06, "loss": 0.428, "step": 47270 }, { "epoch": 2.0, "grad_norm": 5.055686527703872, "learning_rate": 3.0153565807451e-06, "loss": 0.3967, "step": 47275 }, { "epoch": 2.0, "grad_norm": 4.282920902201872, "learning_rate": 3.01422665067536e-06, "loss": 0.4125, "step": 47280 }, { "epoch": 2.0, "grad_norm": 4.34626229991146, "learning_rate": 3.013096840997209e-06, "loss": 0.3679, "step": 47285 }, { "epoch": 2.0, "grad_norm": 3.8383342170736694, "learning_rate": 3.0119671517791425e-06, "loss": 0.4228, "step": 47290 }, { "epoch": 2.0, "grad_norm": 8.587227508857335, "learning_rate": 3.010837583089656e-06, "loss": 0.4017, "step": 47295 }, { "epoch": 2.0, "grad_norm": 4.801434011877231, "learning_rate": 3.0097081349972236e-06, "loss": 0.3895, "step": 47300 }, { "epoch": 2.0, "grad_norm": 4.063233546911788, "learning_rate": 3.008578807570326e-06, "loss": 0.4089, "step": 47305 }, { "epoch": 2.0, "grad_norm": 4.4089489198017775, "learning_rate": 3.007449600877428e-06, "loss": 0.4324, "step": 47310 }, { "epoch": 2.0, "grad_norm": 5.267716439156547, "learning_rate": 3.0063205149869924e-06, "loss": 0.4019, "step": 47315 }, { "epoch": 2.0, "grad_norm": 4.694874467645835, "learning_rate": 3.005191549967471e-06, "loss": 0.3914, "step": 47320 }, { "epoch": 2.0, "grad_norm": 7.624501837834187, "learning_rate": 3.0040627058873097e-06, "loss": 0.4038, "step": 47325 }, { "epoch": 2.0, "grad_norm": 5.608713061113824, "learning_rate": 3.0029339828149477e-06, "loss": 0.3948, "step": 47330 }, { "epoch": 2.0, "grad_norm": 4.1808167089937465, "learning_rate": 3.001805380818815e-06, "loss": 0.4122, "step": 47335 }, { "epoch": 2.0, "grad_norm": 4.349164693119417, "learning_rate": 3.0006768999673375e-06, "loss": 0.3734, "step": 47340 }, { "epoch": 2.0, "grad_norm": 4.148108784119843, "learning_rate": 2.9995485403289306e-06, "loss": 0.3989, "step": 47345 }, { "epoch": 2.0, "grad_norm": 4.757899254225337, "learning_rate": 2.9984203019720014e-06, "loss": 0.3911, "step": 47350 }, { "epoch": 2.0, "grad_norm": 3.967542362222487, "learning_rate": 2.997292184964955e-06, "loss": 0.3816, "step": 47355 }, { "epoch": 2.0, "grad_norm": 3.9765544164382205, "learning_rate": 2.996164189376183e-06, "loss": 0.4181, "step": 47360 }, { "epoch": 2.0, "grad_norm": 4.575063373682107, "learning_rate": 2.9950363152740747e-06, "loss": 0.4045, "step": 47365 }, { "epoch": 2.0, "grad_norm": 3.8164892321274517, "learning_rate": 2.993908562727008e-06, "loss": 0.3978, "step": 47370 }, { "epoch": 2.0, "grad_norm": 5.123280446546897, "learning_rate": 2.9927809318033565e-06, "loss": 0.4334, "step": 47375 }, { "epoch": 2.01, "grad_norm": 14.684753100301338, "learning_rate": 2.9916534225714846e-06, "loss": 0.3957, "step": 47380 }, { "epoch": 2.01, "grad_norm": 4.653272048105198, "learning_rate": 2.9905260350997505e-06, "loss": 0.3888, "step": 47385 }, { "epoch": 2.01, "grad_norm": 6.271621887287846, "learning_rate": 2.9893987694565023e-06, "loss": 0.3797, "step": 47390 }, { "epoch": 2.01, "grad_norm": 4.443935735667931, "learning_rate": 2.9882716257100856e-06, "loss": 0.4037, "step": 47395 }, { "epoch": 2.01, "grad_norm": 5.038228259834561, "learning_rate": 2.9871446039288337e-06, "loss": 0.4026, "step": 47400 }, { "epoch": 2.01, "grad_norm": 5.022961434538157, "learning_rate": 2.9860177041810763e-06, "loss": 0.4348, "step": 47405 }, { "epoch": 2.01, "grad_norm": 4.382533210689673, "learning_rate": 2.984890926535133e-06, "loss": 0.3571, "step": 47410 }, { "epoch": 2.01, "grad_norm": 6.715919155041624, "learning_rate": 2.9837642710593162e-06, "loss": 0.4028, "step": 47415 }, { "epoch": 2.01, "grad_norm": 4.976456413190658, "learning_rate": 2.982637737821933e-06, "loss": 0.4097, "step": 47420 }, { "epoch": 2.01, "grad_norm": 4.544803547755578, "learning_rate": 2.981511326891281e-06, "loss": 0.4091, "step": 47425 }, { "epoch": 2.01, "grad_norm": 5.1863930014457384, "learning_rate": 2.9803850383356525e-06, "loss": 0.3616, "step": 47430 }, { "epoch": 2.01, "grad_norm": 6.813989691485184, "learning_rate": 2.9792588722233307e-06, "loss": 0.4137, "step": 47435 }, { "epoch": 2.01, "grad_norm": 4.411035134292161, "learning_rate": 2.9781328286225895e-06, "loss": 0.4104, "step": 47440 }, { "epoch": 2.01, "grad_norm": 4.286110766397366, "learning_rate": 2.9770069076017007e-06, "loss": 0.3881, "step": 47445 }, { "epoch": 2.01, "grad_norm": 4.1930344244648055, "learning_rate": 2.975881109228923e-06, "loss": 0.392, "step": 47450 }, { "epoch": 2.01, "grad_norm": 4.048276347620639, "learning_rate": 2.974755433572514e-06, "loss": 0.3951, "step": 47455 }, { "epoch": 2.01, "grad_norm": 4.498821357179391, "learning_rate": 2.9736298807007145e-06, "loss": 0.3726, "step": 47460 }, { "epoch": 2.01, "grad_norm": 4.4904755563052365, "learning_rate": 2.9725044506817685e-06, "loss": 0.4013, "step": 47465 }, { "epoch": 2.01, "grad_norm": 3.966527817370967, "learning_rate": 2.9713791435839033e-06, "loss": 0.413, "step": 47470 }, { "epoch": 2.01, "grad_norm": 3.941165482959042, "learning_rate": 2.970253959475347e-06, "loss": 0.396, "step": 47475 }, { "epoch": 2.01, "grad_norm": 4.236806687808002, "learning_rate": 2.969128898424314e-06, "loss": 0.3768, "step": 47480 }, { "epoch": 2.01, "grad_norm": 6.643563266388056, "learning_rate": 2.968003960499013e-06, "loss": 0.4003, "step": 47485 }, { "epoch": 2.01, "grad_norm": 5.31465131212337, "learning_rate": 2.9668791457676473e-06, "loss": 0.4034, "step": 47490 }, { "epoch": 2.01, "grad_norm": 5.784586985494391, "learning_rate": 2.9657544542984086e-06, "loss": 0.3918, "step": 47495 }, { "epoch": 2.01, "grad_norm": 3.647443403147733, "learning_rate": 2.9646298861594867e-06, "loss": 0.3783, "step": 47500 }, { "epoch": 2.01, "grad_norm": 3.830395460228566, "learning_rate": 2.96350544141906e-06, "loss": 0.4024, "step": 47505 }, { "epoch": 2.01, "grad_norm": 4.073805944001501, "learning_rate": 2.962381120145298e-06, "loss": 0.39, "step": 47510 }, { "epoch": 2.01, "grad_norm": 5.097842659651942, "learning_rate": 2.961256922406368e-06, "loss": 0.428, "step": 47515 }, { "epoch": 2.01, "grad_norm": 4.2687924730478075, "learning_rate": 2.9601328482704237e-06, "loss": 0.3887, "step": 47520 }, { "epoch": 2.01, "grad_norm": 3.8454766960321742, "learning_rate": 2.9590088978056185e-06, "loss": 0.3798, "step": 47525 }, { "epoch": 2.01, "grad_norm": 7.77419988240331, "learning_rate": 2.957885071080091e-06, "loss": 0.3894, "step": 47530 }, { "epoch": 2.01, "grad_norm": 6.34451387447682, "learning_rate": 2.9567613681619754e-06, "loss": 0.3796, "step": 47535 }, { "epoch": 2.01, "grad_norm": 5.5956675373415585, "learning_rate": 2.955637789119401e-06, "loss": 0.3945, "step": 47540 }, { "epoch": 2.01, "grad_norm": 6.617278596110945, "learning_rate": 2.954514334020484e-06, "loss": 0.3968, "step": 47545 }, { "epoch": 2.01, "grad_norm": 4.067198771902128, "learning_rate": 2.953391002933339e-06, "loss": 0.3977, "step": 47550 }, { "epoch": 2.01, "grad_norm": 8.695716488099286, "learning_rate": 2.9522677959260694e-06, "loss": 0.4003, "step": 47555 }, { "epoch": 2.01, "grad_norm": 6.982660863579801, "learning_rate": 2.9511447130667702e-06, "loss": 0.3887, "step": 47560 }, { "epoch": 2.01, "grad_norm": 6.674794757926067, "learning_rate": 2.9500217544235326e-06, "loss": 0.3941, "step": 47565 }, { "epoch": 2.01, "grad_norm": 4.66708020258648, "learning_rate": 2.948898920064437e-06, "loss": 0.3925, "step": 47570 }, { "epoch": 2.01, "grad_norm": 7.6983697439890335, "learning_rate": 2.947776210057559e-06, "loss": 0.3829, "step": 47575 }, { "epoch": 2.01, "grad_norm": 3.9040654156844323, "learning_rate": 2.9466536244709647e-06, "loss": 0.4048, "step": 47580 }, { "epoch": 2.01, "grad_norm": 4.6199754344666895, "learning_rate": 2.945531163372711e-06, "loss": 0.3724, "step": 47585 }, { "epoch": 2.01, "grad_norm": 4.420611029965048, "learning_rate": 2.9444088268308535e-06, "loss": 0.3792, "step": 47590 }, { "epoch": 2.01, "grad_norm": 5.570304254007392, "learning_rate": 2.943286614913432e-06, "loss": 0.3974, "step": 47595 }, { "epoch": 2.01, "grad_norm": 5.712081437431294, "learning_rate": 2.942164527688486e-06, "loss": 0.3844, "step": 47600 }, { "epoch": 2.01, "grad_norm": 4.715592466413024, "learning_rate": 2.941042565224043e-06, "loss": 0.4118, "step": 47605 }, { "epoch": 2.01, "grad_norm": 6.168183477980882, "learning_rate": 2.9399207275881237e-06, "loss": 0.3862, "step": 47610 }, { "epoch": 2.02, "grad_norm": 4.071783064772214, "learning_rate": 2.9387990148487433e-06, "loss": 0.41, "step": 47615 }, { "epoch": 2.02, "grad_norm": 4.90123964207469, "learning_rate": 2.9376774270739085e-06, "loss": 0.3961, "step": 47620 }, { "epoch": 2.02, "grad_norm": 4.348298075396367, "learning_rate": 2.9365559643316154e-06, "loss": 0.3712, "step": 47625 }, { "epoch": 2.02, "grad_norm": 4.296818125288639, "learning_rate": 2.935434626689855e-06, "loss": 0.4092, "step": 47630 }, { "epoch": 2.02, "grad_norm": 3.977071053657004, "learning_rate": 2.9343134142166123e-06, "loss": 0.3871, "step": 47635 }, { "epoch": 2.02, "grad_norm": 8.71759936216889, "learning_rate": 2.933192326979862e-06, "loss": 0.3992, "step": 47640 }, { "epoch": 2.02, "grad_norm": 3.842694774223993, "learning_rate": 2.932071365047574e-06, "loss": 0.3774, "step": 47645 }, { "epoch": 2.02, "grad_norm": 5.259355052271828, "learning_rate": 2.930950528487708e-06, "loss": 0.3621, "step": 47650 }, { "epoch": 2.02, "grad_norm": 5.18225565917172, "learning_rate": 2.929829817368216e-06, "loss": 0.3843, "step": 47655 }, { "epoch": 2.02, "grad_norm": 3.925843051594825, "learning_rate": 2.928709231757045e-06, "loss": 0.3704, "step": 47660 }, { "epoch": 2.02, "grad_norm": 3.997059658212725, "learning_rate": 2.9275887717221318e-06, "loss": 0.3616, "step": 47665 }, { "epoch": 2.02, "grad_norm": 6.273733940560973, "learning_rate": 2.9264684373314067e-06, "loss": 0.3791, "step": 47670 }, { "epoch": 2.02, "grad_norm": 10.010193607783602, "learning_rate": 2.925348228652793e-06, "loss": 0.3829, "step": 47675 }, { "epoch": 2.02, "grad_norm": 4.502786673034213, "learning_rate": 2.924228145754204e-06, "loss": 0.3866, "step": 47680 }, { "epoch": 2.02, "grad_norm": 4.064099294271605, "learning_rate": 2.9231081887035494e-06, "loss": 0.4221, "step": 47685 }, { "epoch": 2.02, "grad_norm": 6.656586130220156, "learning_rate": 2.921988357568727e-06, "loss": 0.3767, "step": 47690 }, { "epoch": 2.02, "grad_norm": 4.545874618160335, "learning_rate": 2.920868652417629e-06, "loss": 0.3984, "step": 47695 }, { "epoch": 2.02, "grad_norm": 7.857123149454434, "learning_rate": 2.919749073318141e-06, "loss": 0.4016, "step": 47700 }, { "epoch": 2.02, "grad_norm": 4.342110253228481, "learning_rate": 2.91862962033814e-06, "loss": 0.397, "step": 47705 }, { "epoch": 2.02, "grad_norm": 8.202751623725467, "learning_rate": 2.9175102935454926e-06, "loss": 0.4101, "step": 47710 }, { "epoch": 2.02, "grad_norm": 4.432775513127382, "learning_rate": 2.916391093008062e-06, "loss": 0.386, "step": 47715 }, { "epoch": 2.02, "grad_norm": 3.7077636641253324, "learning_rate": 2.9152720187937035e-06, "loss": 0.3842, "step": 47720 }, { "epoch": 2.02, "grad_norm": 3.9534942903313044, "learning_rate": 2.9141530709702604e-06, "loss": 0.4235, "step": 47725 }, { "epoch": 2.02, "grad_norm": 4.2377277916748, "learning_rate": 2.9130342496055717e-06, "loss": 0.4148, "step": 47730 }, { "epoch": 2.02, "grad_norm": 4.215758128269374, "learning_rate": 2.9119155547674698e-06, "loss": 0.39, "step": 47735 }, { "epoch": 2.02, "grad_norm": 4.0780200293333735, "learning_rate": 2.9107969865237794e-06, "loss": 0.4015, "step": 47740 }, { "epoch": 2.02, "grad_norm": 4.17709725900854, "learning_rate": 2.9096785449423114e-06, "loss": 0.3853, "step": 47745 }, { "epoch": 2.02, "grad_norm": 6.800077962782613, "learning_rate": 2.9085602300908756e-06, "loss": 0.42, "step": 47750 }, { "epoch": 2.02, "grad_norm": 3.8035581374777836, "learning_rate": 2.907442042037275e-06, "loss": 0.3845, "step": 47755 }, { "epoch": 2.02, "grad_norm": 5.46898508667038, "learning_rate": 2.9063239808492973e-06, "loss": 0.4063, "step": 47760 }, { "epoch": 2.02, "grad_norm": 3.927107264142804, "learning_rate": 2.9052060465947296e-06, "loss": 0.3929, "step": 47765 }, { "epoch": 2.02, "grad_norm": 4.466200293620653, "learning_rate": 2.904088239341351e-06, "loss": 0.3707, "step": 47770 }, { "epoch": 2.02, "grad_norm": 4.110460981541154, "learning_rate": 2.902970559156927e-06, "loss": 0.42, "step": 47775 }, { "epoch": 2.02, "grad_norm": 5.286904665721644, "learning_rate": 2.9018530061092225e-06, "loss": 0.3996, "step": 47780 }, { "epoch": 2.02, "grad_norm": 5.816565170761535, "learning_rate": 2.900735580265988e-06, "loss": 0.4084, "step": 47785 }, { "epoch": 2.02, "grad_norm": 4.30919406035898, "learning_rate": 2.8996182816949735e-06, "loss": 0.4203, "step": 47790 }, { "epoch": 2.02, "grad_norm": 4.464092131797559, "learning_rate": 2.8985011104639143e-06, "loss": 0.3971, "step": 47795 }, { "epoch": 2.02, "grad_norm": 3.8381139630388628, "learning_rate": 2.8973840666405424e-06, "loss": 0.3726, "step": 47800 }, { "epoch": 2.02, "grad_norm": 4.819212224980174, "learning_rate": 2.896267150292583e-06, "loss": 0.4025, "step": 47805 }, { "epoch": 2.02, "grad_norm": 9.015863328472255, "learning_rate": 2.8951503614877475e-06, "loss": 0.4059, "step": 47810 }, { "epoch": 2.02, "grad_norm": 24.369607105062965, "learning_rate": 2.894033700293746e-06, "loss": 0.4389, "step": 47815 }, { "epoch": 2.02, "grad_norm": 4.289849586678744, "learning_rate": 2.892917166778277e-06, "loss": 0.3554, "step": 47820 }, { "epoch": 2.02, "grad_norm": 4.31593109655073, "learning_rate": 2.891800761009036e-06, "loss": 0.3947, "step": 47825 }, { "epoch": 2.02, "grad_norm": 3.640556423552882, "learning_rate": 2.890684483053703e-06, "loss": 0.3721, "step": 47830 }, { "epoch": 2.02, "grad_norm": 4.729831882273159, "learning_rate": 2.8895683329799563e-06, "loss": 0.3697, "step": 47835 }, { "epoch": 2.02, "grad_norm": 4.028984164154781, "learning_rate": 2.8884523108554672e-06, "loss": 0.4006, "step": 47840 }, { "epoch": 2.02, "grad_norm": 3.892084887615599, "learning_rate": 2.8873364167478935e-06, "loss": 0.3685, "step": 47845 }, { "epoch": 2.03, "grad_norm": 4.9450597584169, "learning_rate": 2.8862206507248893e-06, "loss": 0.387, "step": 47850 }, { "epoch": 2.03, "grad_norm": 4.97601429158614, "learning_rate": 2.8851050128541024e-06, "loss": 0.3925, "step": 47855 }, { "epoch": 2.03, "grad_norm": 4.661187136926373, "learning_rate": 2.883989503203167e-06, "loss": 0.3878, "step": 47860 }, { "epoch": 2.03, "grad_norm": 12.622871963056166, "learning_rate": 2.882874121839715e-06, "loss": 0.4059, "step": 47865 }, { "epoch": 2.03, "grad_norm": 4.401097743443097, "learning_rate": 2.881758868831369e-06, "loss": 0.358, "step": 47870 }, { "epoch": 2.03, "grad_norm": 6.711589675533919, "learning_rate": 2.8806437442457457e-06, "loss": 0.4128, "step": 47875 }, { "epoch": 2.03, "grad_norm": 4.2245819868186905, "learning_rate": 2.879528748150447e-06, "loss": 0.3654, "step": 47880 }, { "epoch": 2.03, "grad_norm": 4.060696159350697, "learning_rate": 2.8784138806130746e-06, "loss": 0.4016, "step": 47885 }, { "epoch": 2.03, "grad_norm": 3.8312903214940777, "learning_rate": 2.877299141701221e-06, "loss": 0.3616, "step": 47890 }, { "epoch": 2.03, "grad_norm": 6.765022755362178, "learning_rate": 2.8761845314824653e-06, "loss": 0.4007, "step": 47895 }, { "epoch": 2.03, "grad_norm": 6.807441965558301, "learning_rate": 2.8750700500243867e-06, "loss": 0.398, "step": 47900 }, { "epoch": 2.03, "grad_norm": 4.180072075119044, "learning_rate": 2.8739556973945527e-06, "loss": 0.4146, "step": 47905 }, { "epoch": 2.03, "grad_norm": 4.082194429377859, "learning_rate": 2.8728414736605208e-06, "loss": 0.3929, "step": 47910 }, { "epoch": 2.03, "grad_norm": 6.099911697422608, "learning_rate": 2.871727378889845e-06, "loss": 0.3859, "step": 47915 }, { "epoch": 2.03, "grad_norm": 4.542975038218331, "learning_rate": 2.8706134131500683e-06, "loss": 0.396, "step": 47920 }, { "epoch": 2.03, "grad_norm": 4.0134557152627215, "learning_rate": 2.869499576508731e-06, "loss": 0.383, "step": 47925 }, { "epoch": 2.03, "grad_norm": 8.532318568264783, "learning_rate": 2.8683858690333555e-06, "loss": 0.3989, "step": 47930 }, { "epoch": 2.03, "grad_norm": 5.299556875424699, "learning_rate": 2.867272290791467e-06, "loss": 0.3632, "step": 47935 }, { "epoch": 2.03, "grad_norm": 13.551811332320883, "learning_rate": 2.8661588418505793e-06, "loss": 0.3953, "step": 47940 }, { "epoch": 2.03, "grad_norm": 5.49397292238517, "learning_rate": 2.8650455222781947e-06, "loss": 0.3662, "step": 47945 }, { "epoch": 2.03, "grad_norm": 5.16883034192617, "learning_rate": 2.86393233214181e-06, "loss": 0.3831, "step": 47950 }, { "epoch": 2.03, "grad_norm": 3.9712516729283847, "learning_rate": 2.8628192715089165e-06, "loss": 0.3712, "step": 47955 }, { "epoch": 2.03, "grad_norm": 4.117031164377064, "learning_rate": 2.8617063404469964e-06, "loss": 0.3997, "step": 47960 }, { "epoch": 2.03, "grad_norm": 4.252525765986604, "learning_rate": 2.8605935390235206e-06, "loss": 0.3911, "step": 47965 }, { "epoch": 2.03, "grad_norm": 4.875140145132414, "learning_rate": 2.8594808673059572e-06, "loss": 0.4097, "step": 47970 }, { "epoch": 2.03, "grad_norm": 4.485034875311755, "learning_rate": 2.858368325361765e-06, "loss": 0.4076, "step": 47975 }, { "epoch": 2.03, "grad_norm": 4.501660928661121, "learning_rate": 2.857255913258391e-06, "loss": 0.3784, "step": 47980 }, { "epoch": 2.03, "grad_norm": 6.4661822513601805, "learning_rate": 2.8561436310632793e-06, "loss": 0.3823, "step": 47985 }, { "epoch": 2.03, "grad_norm": 9.04588973113391, "learning_rate": 2.855031478843865e-06, "loss": 0.382, "step": 47990 }, { "epoch": 2.03, "grad_norm": 5.195991934620597, "learning_rate": 2.8539194566675752e-06, "loss": 0.4198, "step": 47995 }, { "epoch": 2.03, "grad_norm": 3.840824558497126, "learning_rate": 2.852807564601825e-06, "loss": 0.4005, "step": 48000 }, { "epoch": 2.03, "grad_norm": 4.170442593148155, "learning_rate": 2.8516958027140284e-06, "loss": 0.4006, "step": 48005 }, { "epoch": 2.03, "grad_norm": 4.359858673157745, "learning_rate": 2.850584171071589e-06, "loss": 0.4029, "step": 48010 }, { "epoch": 2.03, "grad_norm": 4.277054442590462, "learning_rate": 2.8494726697418974e-06, "loss": 0.3779, "step": 48015 }, { "epoch": 2.03, "grad_norm": 4.403124936217612, "learning_rate": 2.8483612987923435e-06, "loss": 0.3809, "step": 48020 }, { "epoch": 2.03, "grad_norm": 3.9906868772275375, "learning_rate": 2.8472500582903084e-06, "loss": 0.3762, "step": 48025 }, { "epoch": 2.03, "grad_norm": 4.614173722470552, "learning_rate": 2.8461389483031586e-06, "loss": 0.3806, "step": 48030 }, { "epoch": 2.03, "grad_norm": 4.640790744750531, "learning_rate": 2.8450279688982603e-06, "loss": 0.3797, "step": 48035 }, { "epoch": 2.03, "grad_norm": 4.1857230043837115, "learning_rate": 2.8439171201429684e-06, "loss": 0.4142, "step": 48040 }, { "epoch": 2.03, "grad_norm": 5.231838896996696, "learning_rate": 2.842806402104633e-06, "loss": 0.372, "step": 48045 }, { "epoch": 2.03, "grad_norm": 4.126745866215367, "learning_rate": 2.841695814850589e-06, "loss": 0.3977, "step": 48050 }, { "epoch": 2.03, "grad_norm": 5.054999098469555, "learning_rate": 2.84058535844817e-06, "loss": 0.3869, "step": 48055 }, { "epoch": 2.03, "grad_norm": 4.070293201148474, "learning_rate": 2.8394750329647025e-06, "loss": 0.3824, "step": 48060 }, { "epoch": 2.03, "grad_norm": 3.99780252063637, "learning_rate": 2.8383648384674973e-06, "loss": 0.393, "step": 48065 }, { "epoch": 2.03, "grad_norm": 3.9781577660559595, "learning_rate": 2.837254775023864e-06, "loss": 0.3669, "step": 48070 }, { "epoch": 2.03, "grad_norm": 5.236269854909774, "learning_rate": 2.836144842701106e-06, "loss": 0.4292, "step": 48075 }, { "epoch": 2.03, "grad_norm": 5.110770039245152, "learning_rate": 2.835035041566511e-06, "loss": 0.3921, "step": 48080 }, { "epoch": 2.04, "grad_norm": 5.773325234350164, "learning_rate": 2.833925371687363e-06, "loss": 0.3582, "step": 48085 }, { "epoch": 2.04, "grad_norm": 5.288653456996363, "learning_rate": 2.832815833130942e-06, "loss": 0.4091, "step": 48090 }, { "epoch": 2.04, "grad_norm": 3.924388218237496, "learning_rate": 2.831706425964511e-06, "loss": 0.3531, "step": 48095 }, { "epoch": 2.04, "grad_norm": 5.042470814114546, "learning_rate": 2.830597150255333e-06, "loss": 0.4236, "step": 48100 }, { "epoch": 2.04, "grad_norm": 3.7041070601447075, "learning_rate": 2.829488006070661e-06, "loss": 0.3924, "step": 48105 }, { "epoch": 2.04, "grad_norm": 4.3330071802938175, "learning_rate": 2.8283789934777383e-06, "loss": 0.392, "step": 48110 }, { "epoch": 2.04, "grad_norm": 4.175850130255339, "learning_rate": 2.8272701125437986e-06, "loss": 0.4072, "step": 48115 }, { "epoch": 2.04, "grad_norm": 3.984236367039781, "learning_rate": 2.8261613633360708e-06, "loss": 0.383, "step": 48120 }, { "epoch": 2.04, "grad_norm": 4.155621785129845, "learning_rate": 2.8250527459217775e-06, "loss": 0.3698, "step": 48125 }, { "epoch": 2.04, "grad_norm": 5.017320981212969, "learning_rate": 2.823944260368131e-06, "loss": 0.3719, "step": 48130 }, { "epoch": 2.04, "grad_norm": 5.088609117737862, "learning_rate": 2.822835906742333e-06, "loss": 0.3972, "step": 48135 }, { "epoch": 2.04, "grad_norm": 4.473783393776434, "learning_rate": 2.8217276851115805e-06, "loss": 0.3805, "step": 48140 }, { "epoch": 2.04, "grad_norm": 4.028859481683899, "learning_rate": 2.820619595543064e-06, "loss": 0.3896, "step": 48145 }, { "epoch": 2.04, "grad_norm": 4.23202135574207, "learning_rate": 2.81951163810396e-06, "loss": 0.3766, "step": 48150 }, { "epoch": 2.04, "grad_norm": 4.1775451629764415, "learning_rate": 2.818403812861442e-06, "loss": 0.3866, "step": 48155 }, { "epoch": 2.04, "grad_norm": 3.7352576058573073, "learning_rate": 2.8172961198826775e-06, "loss": 0.3768, "step": 48160 }, { "epoch": 2.04, "grad_norm": 4.651892688530061, "learning_rate": 2.816188559234818e-06, "loss": 0.3727, "step": 48165 }, { "epoch": 2.04, "grad_norm": 4.331925586133346, "learning_rate": 2.8150811309850124e-06, "loss": 0.3939, "step": 48170 }, { "epoch": 2.04, "grad_norm": 4.165468639382936, "learning_rate": 2.813973835200403e-06, "loss": 0.3807, "step": 48175 }, { "epoch": 2.04, "grad_norm": 4.434481766797425, "learning_rate": 2.812866671948122e-06, "loss": 0.3909, "step": 48180 }, { "epoch": 2.04, "grad_norm": 5.294882379298864, "learning_rate": 2.811759641295291e-06, "loss": 0.3986, "step": 48185 }, { "epoch": 2.04, "grad_norm": 4.400347963344931, "learning_rate": 2.810652743309027e-06, "loss": 0.4312, "step": 48190 }, { "epoch": 2.04, "grad_norm": 3.7200872540214545, "learning_rate": 2.8095459780564404e-06, "loss": 0.3877, "step": 48195 }, { "epoch": 2.04, "grad_norm": 3.5877589160667362, "learning_rate": 2.8084393456046265e-06, "loss": 0.3736, "step": 48200 }, { "epoch": 2.04, "grad_norm": 4.699831064953315, "learning_rate": 2.8073328460206804e-06, "loss": 0.4073, "step": 48205 }, { "epoch": 2.04, "grad_norm": 4.434159599386055, "learning_rate": 2.8062264793716865e-06, "loss": 0.4155, "step": 48210 }, { "epoch": 2.04, "grad_norm": 4.0739280884905735, "learning_rate": 2.8051202457247173e-06, "loss": 0.369, "step": 48215 }, { "epoch": 2.04, "grad_norm": 3.944469726262241, "learning_rate": 2.8040141451468426e-06, "loss": 0.3648, "step": 48220 }, { "epoch": 2.04, "grad_norm": 4.767980413361959, "learning_rate": 2.8029081777051213e-06, "loss": 0.3872, "step": 48225 }, { "epoch": 2.04, "grad_norm": 4.217062336095852, "learning_rate": 2.8018023434666073e-06, "loss": 0.3766, "step": 48230 }, { "epoch": 2.04, "grad_norm": 5.801639687007457, "learning_rate": 2.8006966424983413e-06, "loss": 0.3897, "step": 48235 }, { "epoch": 2.04, "grad_norm": 4.292093732864242, "learning_rate": 2.7995910748673586e-06, "loss": 0.3776, "step": 48240 }, { "epoch": 2.04, "grad_norm": 4.035631103481674, "learning_rate": 2.79848564064069e-06, "loss": 0.3778, "step": 48245 }, { "epoch": 2.04, "grad_norm": 5.05459240264595, "learning_rate": 2.7973803398853506e-06, "loss": 0.3736, "step": 48250 }, { "epoch": 2.04, "grad_norm": 4.231650065518774, "learning_rate": 2.796275172668352e-06, "loss": 0.3745, "step": 48255 }, { "epoch": 2.04, "grad_norm": 4.223637167027155, "learning_rate": 2.7951701390567014e-06, "loss": 0.3812, "step": 48260 }, { "epoch": 2.04, "grad_norm": 4.853259572625339, "learning_rate": 2.7940652391173906e-06, "loss": 0.4185, "step": 48265 }, { "epoch": 2.04, "grad_norm": 4.108730016375332, "learning_rate": 2.7929604729174044e-06, "loss": 0.3968, "step": 48270 }, { "epoch": 2.04, "grad_norm": 4.195627575098824, "learning_rate": 2.7918558405237232e-06, "loss": 0.3659, "step": 48275 }, { "epoch": 2.04, "grad_norm": 4.99909738886082, "learning_rate": 2.7907513420033207e-06, "loss": 0.3948, "step": 48280 }, { "epoch": 2.04, "grad_norm": 5.04313488262699, "learning_rate": 2.7896469774231548e-06, "loss": 0.4016, "step": 48285 }, { "epoch": 2.04, "grad_norm": 4.94339342572917, "learning_rate": 2.7885427468501813e-06, "loss": 0.3799, "step": 48290 }, { "epoch": 2.04, "grad_norm": 3.6890326192917984, "learning_rate": 2.787438650351347e-06, "loss": 0.3845, "step": 48295 }, { "epoch": 2.04, "grad_norm": 6.355299529250054, "learning_rate": 2.7863346879935926e-06, "loss": 0.4236, "step": 48300 }, { "epoch": 2.04, "grad_norm": 5.87706970564047, "learning_rate": 2.7852308598438437e-06, "loss": 0.4043, "step": 48305 }, { "epoch": 2.04, "grad_norm": 7.240536451779711, "learning_rate": 2.784127165969024e-06, "loss": 0.3466, "step": 48310 }, { "epoch": 2.04, "grad_norm": 6.003576672084176, "learning_rate": 2.7830236064360485e-06, "loss": 0.4018, "step": 48315 }, { "epoch": 2.04, "grad_norm": 3.870110644213413, "learning_rate": 2.781920181311819e-06, "loss": 0.402, "step": 48320 }, { "epoch": 2.05, "grad_norm": 5.426806177657531, "learning_rate": 2.780816890663236e-06, "loss": 0.374, "step": 48325 }, { "epoch": 2.05, "grad_norm": 4.344707046671713, "learning_rate": 2.77971373455719e-06, "loss": 0.4182, "step": 48330 }, { "epoch": 2.05, "grad_norm": 3.514210849108016, "learning_rate": 2.778610713060559e-06, "loss": 0.394, "step": 48335 }, { "epoch": 2.05, "grad_norm": 4.592807150078073, "learning_rate": 2.777507826240216e-06, "loss": 0.374, "step": 48340 }, { "epoch": 2.05, "grad_norm": 4.569838100045995, "learning_rate": 2.7764050741630278e-06, "loss": 0.4111, "step": 48345 }, { "epoch": 2.05, "grad_norm": 4.264369401870004, "learning_rate": 2.7753024568958508e-06, "loss": 0.393, "step": 48350 }, { "epoch": 2.05, "grad_norm": 4.69277871852729, "learning_rate": 2.774199974505532e-06, "loss": 0.4024, "step": 48355 }, { "epoch": 2.05, "grad_norm": 3.9145106005925556, "learning_rate": 2.773097627058912e-06, "loss": 0.3963, "step": 48360 }, { "epoch": 2.05, "grad_norm": 3.656961185937571, "learning_rate": 2.7719954146228254e-06, "loss": 0.3946, "step": 48365 }, { "epoch": 2.05, "grad_norm": 4.321301624594661, "learning_rate": 2.7708933372640925e-06, "loss": 0.3788, "step": 48370 }, { "epoch": 2.05, "grad_norm": 5.02214173024217, "learning_rate": 2.7697913950495298e-06, "loss": 0.3725, "step": 48375 }, { "epoch": 2.05, "grad_norm": 3.9796984419359673, "learning_rate": 2.7686895880459475e-06, "loss": 0.3787, "step": 48380 }, { "epoch": 2.05, "grad_norm": 4.258481266938924, "learning_rate": 2.7675879163201414e-06, "loss": 0.4112, "step": 48385 }, { "epoch": 2.05, "grad_norm": 3.901298453053275, "learning_rate": 2.766486379938904e-06, "loss": 0.3796, "step": 48390 }, { "epoch": 2.05, "grad_norm": 4.312260190227359, "learning_rate": 2.7653849789690203e-06, "loss": 0.3675, "step": 48395 }, { "epoch": 2.05, "grad_norm": 4.291482365128579, "learning_rate": 2.764283713477261e-06, "loss": 0.4042, "step": 48400 }, { "epoch": 2.05, "grad_norm": 4.42267969844581, "learning_rate": 2.7631825835303948e-06, "loss": 0.3847, "step": 48405 }, { "epoch": 2.05, "grad_norm": 4.403607079143448, "learning_rate": 2.762081589195179e-06, "loss": 0.4052, "step": 48410 }, { "epoch": 2.05, "grad_norm": 4.185346648236187, "learning_rate": 2.7609807305383675e-06, "loss": 0.3743, "step": 48415 }, { "epoch": 2.05, "grad_norm": 4.34310104095834, "learning_rate": 2.759880007626697e-06, "loss": 0.3935, "step": 48420 }, { "epoch": 2.05, "grad_norm": 3.7844747328018946, "learning_rate": 2.7587794205269024e-06, "loss": 0.3797, "step": 48425 }, { "epoch": 2.05, "grad_norm": 4.299545195137125, "learning_rate": 2.7576789693057125e-06, "loss": 0.3884, "step": 48430 }, { "epoch": 2.05, "grad_norm": 4.963007533460066, "learning_rate": 2.7565786540298413e-06, "loss": 0.4162, "step": 48435 }, { "epoch": 2.05, "grad_norm": 4.617904577702721, "learning_rate": 2.7554784747659962e-06, "loss": 0.3868, "step": 48440 }, { "epoch": 2.05, "grad_norm": 5.551583945865222, "learning_rate": 2.7543784315808796e-06, "loss": 0.38, "step": 48445 }, { "epoch": 2.05, "grad_norm": 4.624987964056512, "learning_rate": 2.7532785245411863e-06, "loss": 0.3734, "step": 48450 }, { "epoch": 2.05, "grad_norm": 4.329332866545534, "learning_rate": 2.7521787537135956e-06, "loss": 0.4195, "step": 48455 }, { "epoch": 2.05, "grad_norm": 4.507290462139366, "learning_rate": 2.751079119164787e-06, "loss": 0.3637, "step": 48460 }, { "epoch": 2.05, "grad_norm": 4.053213369785701, "learning_rate": 2.7499796209614283e-06, "loss": 0.3878, "step": 48465 }, { "epoch": 2.05, "grad_norm": 5.040708612867266, "learning_rate": 2.7488802591701754e-06, "loss": 0.426, "step": 48470 }, { "epoch": 2.05, "grad_norm": 4.169649148186469, "learning_rate": 2.7477810338576817e-06, "loss": 0.405, "step": 48475 }, { "epoch": 2.05, "grad_norm": 3.9586866184857987, "learning_rate": 2.7466819450905893e-06, "loss": 0.3906, "step": 48480 }, { "epoch": 2.05, "grad_norm": 4.167320348648449, "learning_rate": 2.7455829929355353e-06, "loss": 0.374, "step": 48485 }, { "epoch": 2.05, "grad_norm": 5.058896872929868, "learning_rate": 2.7444841774591425e-06, "loss": 0.3902, "step": 48490 }, { "epoch": 2.05, "grad_norm": 4.612775650778324, "learning_rate": 2.7433854987280295e-06, "loss": 0.3887, "step": 48495 }, { "epoch": 2.05, "grad_norm": 4.373570165706974, "learning_rate": 2.742286956808809e-06, "loss": 0.3901, "step": 48500 }, { "epoch": 2.05, "grad_norm": 4.4190735712246, "learning_rate": 2.7411885517680782e-06, "loss": 0.3956, "step": 48505 }, { "epoch": 2.05, "grad_norm": 4.301074962439415, "learning_rate": 2.7400902836724313e-06, "loss": 0.3909, "step": 48510 }, { "epoch": 2.05, "grad_norm": 4.001962786369771, "learning_rate": 2.738992152588456e-06, "loss": 0.3926, "step": 48515 }, { "epoch": 2.05, "grad_norm": 5.789725858477086, "learning_rate": 2.7378941585827246e-06, "loss": 0.4055, "step": 48520 }, { "epoch": 2.05, "grad_norm": 3.5982589723521654, "learning_rate": 2.736796301721807e-06, "loss": 0.3787, "step": 48525 }, { "epoch": 2.05, "grad_norm": 5.404394226673754, "learning_rate": 2.735698582072264e-06, "loss": 0.4263, "step": 48530 }, { "epoch": 2.05, "grad_norm": 4.32801112748244, "learning_rate": 2.7346009997006474e-06, "loss": 0.3711, "step": 48535 }, { "epoch": 2.05, "grad_norm": 4.021115381284519, "learning_rate": 2.733503554673497e-06, "loss": 0.392, "step": 48540 }, { "epoch": 2.05, "grad_norm": 4.054467299470257, "learning_rate": 2.73240624705735e-06, "loss": 0.3923, "step": 48545 }, { "epoch": 2.05, "grad_norm": 4.642101356379261, "learning_rate": 2.731309076918735e-06, "loss": 0.3778, "step": 48550 }, { "epoch": 2.05, "grad_norm": 5.90088598323868, "learning_rate": 2.7302120443241665e-06, "loss": 0.3997, "step": 48555 }, { "epoch": 2.06, "grad_norm": 5.950109874480378, "learning_rate": 2.7291151493401545e-06, "loss": 0.3865, "step": 48560 }, { "epoch": 2.06, "grad_norm": 3.5499005039360556, "learning_rate": 2.7280183920332056e-06, "loss": 0.3666, "step": 48565 }, { "epoch": 2.06, "grad_norm": 3.956143559045745, "learning_rate": 2.726921772469806e-06, "loss": 0.4051, "step": 48570 }, { "epoch": 2.06, "grad_norm": 3.6392363595031822, "learning_rate": 2.725825290716444e-06, "loss": 0.3691, "step": 48575 }, { "epoch": 2.06, "grad_norm": 4.893745130583176, "learning_rate": 2.7247289468395965e-06, "loss": 0.3983, "step": 48580 }, { "epoch": 2.06, "grad_norm": 6.5570696978935015, "learning_rate": 2.723632740905732e-06, "loss": 0.4088, "step": 48585 }, { "epoch": 2.06, "grad_norm": 9.698394165038511, "learning_rate": 2.7225366729813095e-06, "loss": 0.3877, "step": 48590 }, { "epoch": 2.06, "grad_norm": 5.450383148938726, "learning_rate": 2.7214407431327783e-06, "loss": 0.3782, "step": 48595 }, { "epoch": 2.06, "grad_norm": 3.944339661267222, "learning_rate": 2.7203449514265833e-06, "loss": 0.3916, "step": 48600 }, { "epoch": 2.06, "grad_norm": 3.775718101785444, "learning_rate": 2.7192492979291606e-06, "loss": 0.3764, "step": 48605 }, { "epoch": 2.06, "grad_norm": 4.800730545085146, "learning_rate": 2.7181537827069328e-06, "loss": 0.3919, "step": 48610 }, { "epoch": 2.06, "grad_norm": 3.966451241805173, "learning_rate": 2.7170584058263196e-06, "loss": 0.3913, "step": 48615 }, { "epoch": 2.06, "grad_norm": 6.93660263204623, "learning_rate": 2.715963167353733e-06, "loss": 0.4016, "step": 48620 }, { "epoch": 2.06, "grad_norm": 7.698625819136889, "learning_rate": 2.7148680673555695e-06, "loss": 0.3889, "step": 48625 }, { "epoch": 2.06, "grad_norm": 6.232238118570845, "learning_rate": 2.713773105898224e-06, "loss": 0.3714, "step": 48630 }, { "epoch": 2.06, "grad_norm": 8.823150222903811, "learning_rate": 2.7126782830480827e-06, "loss": 0.3978, "step": 48635 }, { "epoch": 2.06, "grad_norm": 4.042317024595186, "learning_rate": 2.7115835988715177e-06, "loss": 0.3769, "step": 48640 }, { "epoch": 2.06, "grad_norm": 4.003916360256172, "learning_rate": 2.710489053434898e-06, "loss": 0.3805, "step": 48645 }, { "epoch": 2.06, "grad_norm": 4.520191329108922, "learning_rate": 2.709394646804584e-06, "loss": 0.3653, "step": 48650 }, { "epoch": 2.06, "grad_norm": 4.049544885302632, "learning_rate": 2.7083003790469262e-06, "loss": 0.3794, "step": 48655 }, { "epoch": 2.06, "grad_norm": 3.964407085722664, "learning_rate": 2.707206250228265e-06, "loss": 0.3751, "step": 48660 }, { "epoch": 2.06, "grad_norm": 4.330884655345749, "learning_rate": 2.706112260414935e-06, "loss": 0.3828, "step": 48665 }, { "epoch": 2.06, "grad_norm": 4.1978876822049935, "learning_rate": 2.705018409673263e-06, "loss": 0.4039, "step": 48670 }, { "epoch": 2.06, "grad_norm": 3.8487417803422064, "learning_rate": 2.703924698069563e-06, "loss": 0.3695, "step": 48675 }, { "epoch": 2.06, "grad_norm": 3.4957794396976087, "learning_rate": 2.7028311256701457e-06, "loss": 0.3739, "step": 48680 }, { "epoch": 2.06, "grad_norm": 3.901395195338321, "learning_rate": 2.7017376925413123e-06, "loss": 0.3523, "step": 48685 }, { "epoch": 2.06, "grad_norm": 4.272880650340702, "learning_rate": 2.7006443987493513e-06, "loss": 0.3918, "step": 48690 }, { "epoch": 2.06, "grad_norm": 4.552460420620594, "learning_rate": 2.699551244360547e-06, "loss": 0.3615, "step": 48695 }, { "epoch": 2.06, "grad_norm": 6.09029001793615, "learning_rate": 2.6984582294411763e-06, "loss": 0.3888, "step": 48700 }, { "epoch": 2.06, "grad_norm": 4.178009153647491, "learning_rate": 2.697365354057503e-06, "loss": 0.3941, "step": 48705 }, { "epoch": 2.06, "grad_norm": 4.30048215653837, "learning_rate": 2.696272618275785e-06, "loss": 0.3769, "step": 48710 }, { "epoch": 2.06, "grad_norm": 4.094727503777293, "learning_rate": 2.6951800221622714e-06, "loss": 0.4215, "step": 48715 }, { "epoch": 2.06, "grad_norm": 3.8872646963910995, "learning_rate": 2.6940875657832068e-06, "loss": 0.3962, "step": 48720 }, { "epoch": 2.06, "grad_norm": 4.159269793740176, "learning_rate": 2.6929952492048184e-06, "loss": 0.3791, "step": 48725 }, { "epoch": 2.06, "grad_norm": 4.013258462664164, "learning_rate": 2.6919030724933333e-06, "loss": 0.408, "step": 48730 }, { "epoch": 2.06, "grad_norm": 4.282336799805424, "learning_rate": 2.6908110357149674e-06, "loss": 0.425, "step": 48735 }, { "epoch": 2.06, "grad_norm": 5.50757335706558, "learning_rate": 2.6897191389359253e-06, "loss": 0.4026, "step": 48740 }, { "epoch": 2.06, "grad_norm": 6.971045764146771, "learning_rate": 2.688627382222406e-06, "loss": 0.3961, "step": 48745 }, { "epoch": 2.06, "grad_norm": 3.661024779287403, "learning_rate": 2.687535765640602e-06, "loss": 0.3937, "step": 48750 }, { "epoch": 2.06, "grad_norm": 5.04324946428947, "learning_rate": 2.686444289256693e-06, "loss": 0.3865, "step": 48755 }, { "epoch": 2.06, "grad_norm": 7.579909932334052, "learning_rate": 2.6853529531368505e-06, "loss": 0.3963, "step": 48760 }, { "epoch": 2.06, "grad_norm": 4.347597687549111, "learning_rate": 2.6842617573472403e-06, "loss": 0.3717, "step": 48765 }, { "epoch": 2.06, "grad_norm": 3.8794700100244914, "learning_rate": 2.68317070195402e-06, "loss": 0.3488, "step": 48770 }, { "epoch": 2.06, "grad_norm": 4.178796846730626, "learning_rate": 2.682079787023334e-06, "loss": 0.3866, "step": 48775 }, { "epoch": 2.06, "grad_norm": 4.16766330119346, "learning_rate": 2.680989012621323e-06, "loss": 0.3815, "step": 48780 }, { "epoch": 2.06, "grad_norm": 5.375902402039784, "learning_rate": 2.679898378814117e-06, "loss": 0.3849, "step": 48785 }, { "epoch": 2.06, "grad_norm": 5.36328391655569, "learning_rate": 2.67880788566784e-06, "loss": 0.3894, "step": 48790 }, { "epoch": 2.07, "grad_norm": 5.1341531474382105, "learning_rate": 2.677717533248602e-06, "loss": 0.4083, "step": 48795 }, { "epoch": 2.07, "grad_norm": 4.597446799046262, "learning_rate": 2.6766273216225093e-06, "loss": 0.3923, "step": 48800 }, { "epoch": 2.07, "grad_norm": 3.7483899072627427, "learning_rate": 2.6755372508556603e-06, "loss": 0.3751, "step": 48805 }, { "epoch": 2.07, "grad_norm": 4.689037156362096, "learning_rate": 2.6744473210141385e-06, "loss": 0.3792, "step": 48810 }, { "epoch": 2.07, "grad_norm": 4.405152396891129, "learning_rate": 2.6733575321640264e-06, "loss": 0.3683, "step": 48815 }, { "epoch": 2.07, "grad_norm": 4.601083531395233, "learning_rate": 2.6722678843713958e-06, "loss": 0.3676, "step": 48820 }, { "epoch": 2.07, "grad_norm": 4.163212067135756, "learning_rate": 2.6711783777023036e-06, "loss": 0.3734, "step": 48825 }, { "epoch": 2.07, "grad_norm": 4.3582318833968365, "learning_rate": 2.6700890122228074e-06, "loss": 0.3766, "step": 48830 }, { "epoch": 2.07, "grad_norm": 4.660450556949528, "learning_rate": 2.6689997879989516e-06, "loss": 0.3734, "step": 48835 }, { "epoch": 2.07, "grad_norm": 4.161179407442511, "learning_rate": 2.667910705096774e-06, "loss": 0.402, "step": 48840 }, { "epoch": 2.07, "grad_norm": 4.371897780123862, "learning_rate": 2.666821763582299e-06, "loss": 0.3697, "step": 48845 }, { "epoch": 2.07, "grad_norm": 5.496835219583497, "learning_rate": 2.665732963521548e-06, "loss": 0.3996, "step": 48850 }, { "epoch": 2.07, "grad_norm": 5.528942984797106, "learning_rate": 2.664644304980534e-06, "loss": 0.3725, "step": 48855 }, { "epoch": 2.07, "grad_norm": 4.577663673743399, "learning_rate": 2.663555788025254e-06, "loss": 0.3653, "step": 48860 }, { "epoch": 2.07, "grad_norm": 4.1040313893328255, "learning_rate": 2.6624674127217054e-06, "loss": 0.3763, "step": 48865 }, { "epoch": 2.07, "grad_norm": 4.294053441864696, "learning_rate": 2.6613791791358734e-06, "loss": 0.3891, "step": 48870 }, { "epoch": 2.07, "grad_norm": 4.480008742807418, "learning_rate": 2.6602910873337307e-06, "loss": 0.3832, "step": 48875 }, { "epoch": 2.07, "grad_norm": 4.0473120786164465, "learning_rate": 2.6592031373812484e-06, "loss": 0.382, "step": 48880 }, { "epoch": 2.07, "grad_norm": 4.450227310997142, "learning_rate": 2.658115329344384e-06, "loss": 0.3829, "step": 48885 }, { "epoch": 2.07, "grad_norm": 4.135922977015602, "learning_rate": 2.6570276632890917e-06, "loss": 0.3888, "step": 48890 }, { "epoch": 2.07, "grad_norm": 6.515891820904272, "learning_rate": 2.655940139281309e-06, "loss": 0.3785, "step": 48895 }, { "epoch": 2.07, "grad_norm": 4.344462821168202, "learning_rate": 2.6548527573869698e-06, "loss": 0.3946, "step": 48900 }, { "epoch": 2.07, "grad_norm": 4.109356002022949, "learning_rate": 2.653765517672003e-06, "loss": 0.378, "step": 48905 }, { "epoch": 2.07, "grad_norm": 5.2609856172907765, "learning_rate": 2.6526784202023194e-06, "loss": 0.3644, "step": 48910 }, { "epoch": 2.07, "grad_norm": 4.614365807863728, "learning_rate": 2.6515914650438314e-06, "loss": 0.4074, "step": 48915 }, { "epoch": 2.07, "grad_norm": 6.06455249508406, "learning_rate": 2.6505046522624326e-06, "loss": 0.3869, "step": 48920 }, { "epoch": 2.07, "grad_norm": 6.006835085881209, "learning_rate": 2.6494179819240184e-06, "loss": 0.3879, "step": 48925 }, { "epoch": 2.07, "grad_norm": 4.090195172031044, "learning_rate": 2.648331454094466e-06, "loss": 0.3663, "step": 48930 }, { "epoch": 2.07, "grad_norm": 4.140840771231388, "learning_rate": 2.6472450688396513e-06, "loss": 0.4142, "step": 48935 }, { "epoch": 2.07, "grad_norm": 4.115114507990791, "learning_rate": 2.6461588262254388e-06, "loss": 0.3821, "step": 48940 }, { "epoch": 2.07, "grad_norm": 4.722847534864015, "learning_rate": 2.6450727263176817e-06, "loss": 0.3778, "step": 48945 }, { "epoch": 2.07, "grad_norm": 3.827257159887921, "learning_rate": 2.6439867691822283e-06, "loss": 0.3782, "step": 48950 }, { "epoch": 2.07, "grad_norm": 7.277556924058113, "learning_rate": 2.6429009548849173e-06, "loss": 0.4098, "step": 48955 }, { "epoch": 2.07, "grad_norm": 4.058902488040372, "learning_rate": 2.6418152834915806e-06, "loss": 0.3627, "step": 48960 }, { "epoch": 2.07, "grad_norm": 4.048965176677439, "learning_rate": 2.6407297550680346e-06, "loss": 0.3868, "step": 48965 }, { "epoch": 2.07, "grad_norm": 4.762467868739015, "learning_rate": 2.639644369680094e-06, "loss": 0.3825, "step": 48970 }, { "epoch": 2.07, "grad_norm": 4.645496806393459, "learning_rate": 2.638559127393565e-06, "loss": 0.3908, "step": 48975 }, { "epoch": 2.07, "grad_norm": 4.2444726141926274, "learning_rate": 2.6374740282742383e-06, "loss": 0.3985, "step": 48980 }, { "epoch": 2.07, "grad_norm": 10.766393375915584, "learning_rate": 2.6363890723879026e-06, "loss": 0.3849, "step": 48985 }, { "epoch": 2.07, "grad_norm": 5.040058211840637, "learning_rate": 2.6353042598003363e-06, "loss": 0.3969, "step": 48990 }, { "epoch": 2.07, "grad_norm": 6.57164803620502, "learning_rate": 2.634219590577306e-06, "loss": 0.3958, "step": 48995 }, { "epoch": 2.07, "grad_norm": 4.143096539548772, "learning_rate": 2.6331350647845727e-06, "loss": 0.3926, "step": 49000 }, { "epoch": 2.07, "grad_norm": 5.546735037849133, "learning_rate": 2.632050682487891e-06, "loss": 0.3752, "step": 49005 }, { "epoch": 2.07, "grad_norm": 4.44884087597974, "learning_rate": 2.630966443752999e-06, "loss": 0.3693, "step": 49010 }, { "epoch": 2.07, "grad_norm": 4.970880980102137, "learning_rate": 2.629882348645634e-06, "loss": 0.4049, "step": 49015 }, { "epoch": 2.07, "grad_norm": 3.8810543456140656, "learning_rate": 2.62879839723152e-06, "loss": 0.3579, "step": 49020 }, { "epoch": 2.07, "grad_norm": 8.451817284269396, "learning_rate": 2.627714589576377e-06, "loss": 0.3783, "step": 49025 }, { "epoch": 2.08, "grad_norm": 6.580980237386432, "learning_rate": 2.6266309257459085e-06, "loss": 0.3731, "step": 49030 }, { "epoch": 2.08, "grad_norm": 5.348971145075655, "learning_rate": 2.6255474058058163e-06, "loss": 0.3921, "step": 49035 }, { "epoch": 2.08, "grad_norm": 4.203429737016677, "learning_rate": 2.6244640298217917e-06, "loss": 0.3601, "step": 49040 }, { "epoch": 2.08, "grad_norm": 4.588063239393658, "learning_rate": 2.6233807978595143e-06, "loss": 0.3976, "step": 49045 }, { "epoch": 2.08, "grad_norm": 12.491402518908926, "learning_rate": 2.622297709984659e-06, "loss": 0.404, "step": 49050 }, { "epoch": 2.08, "grad_norm": 13.691072458845982, "learning_rate": 2.621214766262891e-06, "loss": 0.3907, "step": 49055 }, { "epoch": 2.08, "grad_norm": 7.581929565525865, "learning_rate": 2.6201319667598633e-06, "loss": 0.378, "step": 49060 }, { "epoch": 2.08, "grad_norm": 4.747502036354879, "learning_rate": 2.619049311541224e-06, "loss": 0.3916, "step": 49065 }, { "epoch": 2.08, "grad_norm": 11.947588096786621, "learning_rate": 2.6179668006726124e-06, "loss": 0.376, "step": 49070 }, { "epoch": 2.08, "grad_norm": 6.828127093266616, "learning_rate": 2.6168844342196597e-06, "loss": 0.3843, "step": 49075 }, { "epoch": 2.08, "grad_norm": 7.896040292874367, "learning_rate": 2.6158022122479807e-06, "loss": 0.4004, "step": 49080 }, { "epoch": 2.08, "grad_norm": 4.567006714401726, "learning_rate": 2.6147201348231916e-06, "loss": 0.3537, "step": 49085 }, { "epoch": 2.08, "grad_norm": 5.807868812847661, "learning_rate": 2.6136382020108943e-06, "loss": 0.4177, "step": 49090 }, { "epoch": 2.08, "grad_norm": 4.981771887222753, "learning_rate": 2.6125564138766846e-06, "loss": 0.4008, "step": 49095 }, { "epoch": 2.08, "grad_norm": 4.236785748551839, "learning_rate": 2.611474770486146e-06, "loss": 0.3975, "step": 49100 }, { "epoch": 2.08, "grad_norm": 5.061875568407436, "learning_rate": 2.6103932719048564e-06, "loss": 0.4048, "step": 49105 }, { "epoch": 2.08, "grad_norm": 4.095892521061463, "learning_rate": 2.609311918198385e-06, "loss": 0.3802, "step": 49110 }, { "epoch": 2.08, "grad_norm": 6.063882065852188, "learning_rate": 2.608230709432289e-06, "loss": 0.4112, "step": 49115 }, { "epoch": 2.08, "grad_norm": 4.0820637444667724, "learning_rate": 2.607149645672119e-06, "loss": 0.3867, "step": 49120 }, { "epoch": 2.08, "grad_norm": 4.114988874060213, "learning_rate": 2.6060687269834207e-06, "loss": 0.3664, "step": 49125 }, { "epoch": 2.08, "grad_norm": 4.041203185459699, "learning_rate": 2.604987953431721e-06, "loss": 0.3946, "step": 49130 }, { "epoch": 2.08, "grad_norm": 3.998551344833177, "learning_rate": 2.6039073250825476e-06, "loss": 0.377, "step": 49135 }, { "epoch": 2.08, "grad_norm": 4.120079708441207, "learning_rate": 2.6028268420014146e-06, "loss": 0.3781, "step": 49140 }, { "epoch": 2.08, "grad_norm": 4.839990058698662, "learning_rate": 2.6017465042538314e-06, "loss": 0.3723, "step": 49145 }, { "epoch": 2.08, "grad_norm": 6.046238072447489, "learning_rate": 2.6006663119052913e-06, "loss": 0.4018, "step": 49150 }, { "epoch": 2.08, "grad_norm": 5.934221379241571, "learning_rate": 2.5995862650212856e-06, "loss": 0.3632, "step": 49155 }, { "epoch": 2.08, "grad_norm": 5.565988227474438, "learning_rate": 2.598506363667295e-06, "loss": 0.3711, "step": 49160 }, { "epoch": 2.08, "grad_norm": 4.125449991708251, "learning_rate": 2.5974266079087884e-06, "loss": 0.3767, "step": 49165 }, { "epoch": 2.08, "grad_norm": 5.8424417805420745, "learning_rate": 2.59634699781123e-06, "loss": 0.4194, "step": 49170 }, { "epoch": 2.08, "grad_norm": 5.678483842287691, "learning_rate": 2.5952675334400745e-06, "loss": 0.3714, "step": 49175 }, { "epoch": 2.08, "grad_norm": 4.9783348503739715, "learning_rate": 2.594188214860763e-06, "loss": 0.3954, "step": 49180 }, { "epoch": 2.08, "grad_norm": 4.362954186255089, "learning_rate": 2.593109042138733e-06, "loss": 0.3896, "step": 49185 }, { "epoch": 2.08, "grad_norm": 3.691186314651187, "learning_rate": 2.592030015339413e-06, "loss": 0.3806, "step": 49190 }, { "epoch": 2.08, "grad_norm": 4.683857388953216, "learning_rate": 2.590951134528222e-06, "loss": 0.391, "step": 49195 }, { "epoch": 2.08, "grad_norm": 5.0321210169962605, "learning_rate": 2.5898723997705657e-06, "loss": 0.3884, "step": 49200 }, { "epoch": 2.08, "grad_norm": 4.2841414946300995, "learning_rate": 2.5887938111318455e-06, "loss": 0.3768, "step": 49205 }, { "epoch": 2.08, "grad_norm": 3.9920102444496095, "learning_rate": 2.587715368677457e-06, "loss": 0.3981, "step": 49210 }, { "epoch": 2.08, "grad_norm": 3.7900466935067825, "learning_rate": 2.5866370724727784e-06, "loss": 0.3809, "step": 49215 }, { "epoch": 2.08, "grad_norm": 3.8137774098630923, "learning_rate": 2.5855589225831844e-06, "loss": 0.3873, "step": 49220 }, { "epoch": 2.08, "grad_norm": 5.4578510942446075, "learning_rate": 2.5844809190740437e-06, "loss": 0.3991, "step": 49225 }, { "epoch": 2.08, "grad_norm": 4.194832317260989, "learning_rate": 2.5834030620107078e-06, "loss": 0.3843, "step": 49230 }, { "epoch": 2.08, "grad_norm": 5.1262913405352775, "learning_rate": 2.582325351458525e-06, "loss": 0.3571, "step": 49235 }, { "epoch": 2.08, "grad_norm": 3.7949040845686275, "learning_rate": 2.5812477874828373e-06, "loss": 0.3943, "step": 49240 }, { "epoch": 2.08, "grad_norm": 7.0427948804486435, "learning_rate": 2.5801703701489713e-06, "loss": 0.3844, "step": 49245 }, { "epoch": 2.08, "grad_norm": 4.711364121142358, "learning_rate": 2.5790930995222463e-06, "loss": 0.3736, "step": 49250 }, { "epoch": 2.08, "grad_norm": 3.9603745763761222, "learning_rate": 2.578015975667976e-06, "loss": 0.377, "step": 49255 }, { "epoch": 2.08, "grad_norm": 4.119100866087835, "learning_rate": 2.5769389986514637e-06, "loss": 0.3782, "step": 49260 }, { "epoch": 2.08, "grad_norm": 4.739832962603566, "learning_rate": 2.5758621685380016e-06, "loss": 0.4062, "step": 49265 }, { "epoch": 2.09, "grad_norm": 4.53863607520641, "learning_rate": 2.574785485392875e-06, "loss": 0.3619, "step": 49270 }, { "epoch": 2.09, "grad_norm": 4.3492833110385085, "learning_rate": 2.5737089492813612e-06, "loss": 0.3997, "step": 49275 }, { "epoch": 2.09, "grad_norm": 3.946245638803383, "learning_rate": 2.5726325602687285e-06, "loss": 0.3302, "step": 49280 }, { "epoch": 2.09, "grad_norm": 4.617055321970091, "learning_rate": 2.571556318420232e-06, "loss": 0.3671, "step": 49285 }, { "epoch": 2.09, "grad_norm": 4.870217948071273, "learning_rate": 2.5704802238011226e-06, "loss": 0.3877, "step": 49290 }, { "epoch": 2.09, "grad_norm": 4.817464712010223, "learning_rate": 2.569404276476643e-06, "loss": 0.4373, "step": 49295 }, { "epoch": 2.09, "grad_norm": 4.80271860260757, "learning_rate": 2.5683284765120204e-06, "loss": 0.3877, "step": 49300 }, { "epoch": 2.09, "grad_norm": 3.8786578448434077, "learning_rate": 2.5672528239724794e-06, "loss": 0.3765, "step": 49305 }, { "epoch": 2.09, "grad_norm": 4.840724826597111, "learning_rate": 2.5661773189232365e-06, "loss": 0.3767, "step": 49310 }, { "epoch": 2.09, "grad_norm": 3.9607700841651874, "learning_rate": 2.56510196142949e-06, "loss": 0.3947, "step": 49315 }, { "epoch": 2.09, "grad_norm": 5.392638736642099, "learning_rate": 2.564026751556441e-06, "loss": 0.3965, "step": 49320 }, { "epoch": 2.09, "grad_norm": 6.844300602395313, "learning_rate": 2.562951689369274e-06, "loss": 0.4104, "step": 49325 }, { "epoch": 2.09, "grad_norm": 5.5335264704752785, "learning_rate": 2.5618767749331696e-06, "loss": 0.3938, "step": 49330 }, { "epoch": 2.09, "grad_norm": 6.3056748648663214, "learning_rate": 2.5608020083132935e-06, "loss": 0.4149, "step": 49335 }, { "epoch": 2.09, "grad_norm": 4.602749764289394, "learning_rate": 2.5597273895748054e-06, "loss": 0.3902, "step": 49340 }, { "epoch": 2.09, "grad_norm": 3.602597555011603, "learning_rate": 2.5586529187828603e-06, "loss": 0.3698, "step": 49345 }, { "epoch": 2.09, "grad_norm": 4.4850721070595, "learning_rate": 2.557578596002595e-06, "loss": 0.3848, "step": 49350 }, { "epoch": 2.09, "grad_norm": 3.9814981383252155, "learning_rate": 2.556504421299145e-06, "loss": 0.3772, "step": 49355 }, { "epoch": 2.09, "grad_norm": 4.164175277475107, "learning_rate": 2.555430394737637e-06, "loss": 0.3438, "step": 49360 }, { "epoch": 2.09, "grad_norm": 5.72081236370228, "learning_rate": 2.554356516383182e-06, "loss": 0.4124, "step": 49365 }, { "epoch": 2.09, "grad_norm": 6.148949610442225, "learning_rate": 2.5532827863008862e-06, "loss": 0.4052, "step": 49370 }, { "epoch": 2.09, "grad_norm": 5.784977006109457, "learning_rate": 2.5522092045558487e-06, "loss": 0.3867, "step": 49375 }, { "epoch": 2.09, "grad_norm": 5.812493989430617, "learning_rate": 2.5511357712131587e-06, "loss": 0.37, "step": 49380 }, { "epoch": 2.09, "grad_norm": 4.110668952923217, "learning_rate": 2.5500624863378916e-06, "loss": 0.3836, "step": 49385 }, { "epoch": 2.09, "grad_norm": 4.351742140320441, "learning_rate": 2.548989349995119e-06, "loss": 0.3675, "step": 49390 }, { "epoch": 2.09, "grad_norm": 4.306976478065809, "learning_rate": 2.547916362249905e-06, "loss": 0.4087, "step": 49395 }, { "epoch": 2.09, "grad_norm": 4.863035877359422, "learning_rate": 2.5468435231672995e-06, "loss": 0.3838, "step": 49400 }, { "epoch": 2.09, "grad_norm": 4.050191686008576, "learning_rate": 2.545770832812342e-06, "loss": 0.3926, "step": 49405 }, { "epoch": 2.09, "grad_norm": 9.555796270167855, "learning_rate": 2.5446982912500706e-06, "loss": 0.3983, "step": 49410 }, { "epoch": 2.09, "grad_norm": 6.1891855453256985, "learning_rate": 2.543625898545511e-06, "loss": 0.3832, "step": 49415 }, { "epoch": 2.09, "grad_norm": 4.2132842778285164, "learning_rate": 2.5425536547636763e-06, "loss": 0.3744, "step": 49420 }, { "epoch": 2.09, "grad_norm": 4.603209848717021, "learning_rate": 2.5414815599695743e-06, "loss": 0.3965, "step": 49425 }, { "epoch": 2.09, "grad_norm": 3.995517650234465, "learning_rate": 2.5404096142282057e-06, "loss": 0.4101, "step": 49430 }, { "epoch": 2.09, "grad_norm": 6.309811905580369, "learning_rate": 2.5393378176045556e-06, "loss": 0.4008, "step": 49435 }, { "epoch": 2.09, "grad_norm": 4.606835131566553, "learning_rate": 2.5382661701636046e-06, "loss": 0.369, "step": 49440 }, { "epoch": 2.09, "grad_norm": 4.509785120845635, "learning_rate": 2.5371946719703257e-06, "loss": 0.3814, "step": 49445 }, { "epoch": 2.09, "grad_norm": 4.3606537445326925, "learning_rate": 2.53612332308968e-06, "loss": 0.3469, "step": 49450 }, { "epoch": 2.09, "grad_norm": 4.143428631512332, "learning_rate": 2.5350521235866188e-06, "loss": 0.3928, "step": 49455 }, { "epoch": 2.09, "grad_norm": 4.397818090890851, "learning_rate": 2.5339810735260863e-06, "loss": 0.3969, "step": 49460 }, { "epoch": 2.09, "grad_norm": 5.570348234496646, "learning_rate": 2.532910172973019e-06, "loss": 0.3865, "step": 49465 }, { "epoch": 2.09, "grad_norm": 4.299430828387545, "learning_rate": 2.531839421992339e-06, "loss": 0.377, "step": 49470 }, { "epoch": 2.09, "grad_norm": 4.637197922302898, "learning_rate": 2.5307688206489644e-06, "loss": 0.3854, "step": 49475 }, { "epoch": 2.09, "grad_norm": 5.952569899156878, "learning_rate": 2.5296983690078054e-06, "loss": 0.3943, "step": 49480 }, { "epoch": 2.09, "grad_norm": 5.744601122212498, "learning_rate": 2.528628067133756e-06, "loss": 0.3721, "step": 49485 }, { "epoch": 2.09, "grad_norm": 4.205104201256576, "learning_rate": 2.527557915091706e-06, "loss": 0.3624, "step": 49490 }, { "epoch": 2.09, "grad_norm": 6.051912784040933, "learning_rate": 2.526487912946538e-06, "loss": 0.3788, "step": 49495 }, { "epoch": 2.09, "grad_norm": 4.115852812523828, "learning_rate": 2.5254180607631236e-06, "loss": 0.3864, "step": 49500 }, { "epoch": 2.1, "grad_norm": 3.7616855038523815, "learning_rate": 2.5243483586063206e-06, "loss": 0.3686, "step": 49505 }, { "epoch": 2.1, "grad_norm": 4.280639664026874, "learning_rate": 2.5232788065409847e-06, "loss": 0.3966, "step": 49510 }, { "epoch": 2.1, "grad_norm": 4.297314428993249, "learning_rate": 2.522209404631961e-06, "loss": 0.4087, "step": 49515 }, { "epoch": 2.1, "grad_norm": 3.9481993530155446, "learning_rate": 2.521140152944081e-06, "loss": 0.3625, "step": 49520 }, { "epoch": 2.1, "grad_norm": 5.786545880772812, "learning_rate": 2.5200710515421717e-06, "loss": 0.393, "step": 49525 }, { "epoch": 2.1, "grad_norm": 4.251369715162034, "learning_rate": 2.5190021004910516e-06, "loss": 0.3987, "step": 49530 }, { "epoch": 2.1, "grad_norm": 3.957075682599359, "learning_rate": 2.517933299855524e-06, "loss": 0.3978, "step": 49535 }, { "epoch": 2.1, "grad_norm": 3.9582525929647363, "learning_rate": 2.5168646497003895e-06, "loss": 0.3675, "step": 49540 }, { "epoch": 2.1, "grad_norm": 6.983028463144516, "learning_rate": 2.515796150090439e-06, "loss": 0.3666, "step": 49545 }, { "epoch": 2.1, "grad_norm": 4.6195673020099015, "learning_rate": 2.514727801090448e-06, "loss": 0.3957, "step": 49550 }, { "epoch": 2.1, "grad_norm": 4.6001685427389, "learning_rate": 2.5136596027651905e-06, "loss": 0.3647, "step": 49555 }, { "epoch": 2.1, "grad_norm": 3.721425414917315, "learning_rate": 2.512591555179429e-06, "loss": 0.3775, "step": 49560 }, { "epoch": 2.1, "grad_norm": 4.670864627243677, "learning_rate": 2.5115236583979145e-06, "loss": 0.4264, "step": 49565 }, { "epoch": 2.1, "grad_norm": 4.676794173034663, "learning_rate": 2.510455912485389e-06, "loss": 0.371, "step": 49570 }, { "epoch": 2.1, "grad_norm": 4.43474250570863, "learning_rate": 2.509388317506588e-06, "loss": 0.4064, "step": 49575 }, { "epoch": 2.1, "grad_norm": 4.224012140003875, "learning_rate": 2.508320873526237e-06, "loss": 0.3953, "step": 49580 }, { "epoch": 2.1, "grad_norm": 4.32758956943303, "learning_rate": 2.5072535806090542e-06, "loss": 0.4153, "step": 49585 }, { "epoch": 2.1, "grad_norm": 3.560613241540756, "learning_rate": 2.506186438819742e-06, "loss": 0.3663, "step": 49590 }, { "epoch": 2.1, "grad_norm": 3.9372993658854702, "learning_rate": 2.5051194482230003e-06, "loss": 0.3548, "step": 49595 }, { "epoch": 2.1, "grad_norm": 4.158865974125944, "learning_rate": 2.5040526088835192e-06, "loss": 0.3692, "step": 49600 }, { "epoch": 2.1, "grad_norm": 3.8617992983720666, "learning_rate": 2.5029859208659753e-06, "loss": 0.3936, "step": 49605 }, { "epoch": 2.1, "grad_norm": 5.270589598654198, "learning_rate": 2.5019193842350387e-06, "loss": 0.394, "step": 49610 }, { "epoch": 2.1, "grad_norm": 4.225232404923594, "learning_rate": 2.5008529990553743e-06, "loss": 0.3847, "step": 49615 }, { "epoch": 2.1, "grad_norm": 4.218055627216, "learning_rate": 2.4997867653916286e-06, "loss": 0.3978, "step": 49620 }, { "epoch": 2.1, "grad_norm": 4.2932021414246915, "learning_rate": 2.498720683308447e-06, "loss": 0.3676, "step": 49625 }, { "epoch": 2.1, "grad_norm": 3.9182320791886864, "learning_rate": 2.4976547528704627e-06, "loss": 0.3799, "step": 49630 }, { "epoch": 2.1, "grad_norm": 4.4105785979391126, "learning_rate": 2.4965889741423015e-06, "loss": 0.3755, "step": 49635 }, { "epoch": 2.1, "grad_norm": 5.8279685439912345, "learning_rate": 2.4955233471885747e-06, "loss": 0.4094, "step": 49640 }, { "epoch": 2.1, "grad_norm": 4.303876463827186, "learning_rate": 2.4944578720738906e-06, "loss": 0.3712, "step": 49645 }, { "epoch": 2.1, "grad_norm": 7.364460746140534, "learning_rate": 2.4933925488628473e-06, "loss": 0.3781, "step": 49650 }, { "epoch": 2.1, "grad_norm": 4.492039106257755, "learning_rate": 2.492327377620028e-06, "loss": 0.3781, "step": 49655 }, { "epoch": 2.1, "grad_norm": 5.224287065740411, "learning_rate": 2.4912623584100137e-06, "loss": 0.3828, "step": 49660 }, { "epoch": 2.1, "grad_norm": 4.627652283633303, "learning_rate": 2.490197491297374e-06, "loss": 0.3707, "step": 49665 }, { "epoch": 2.1, "grad_norm": 3.796643170941925, "learning_rate": 2.4891327763466668e-06, "loss": 0.3893, "step": 49670 }, { "epoch": 2.1, "grad_norm": 4.38920195627582, "learning_rate": 2.4880682136224427e-06, "loss": 0.4081, "step": 49675 }, { "epoch": 2.1, "grad_norm": 5.559810741139777, "learning_rate": 2.4870038031892437e-06, "loss": 0.3735, "step": 49680 }, { "epoch": 2.1, "grad_norm": 4.470549831519377, "learning_rate": 2.4859395451116038e-06, "loss": 0.3817, "step": 49685 }, { "epoch": 2.1, "grad_norm": 4.0109098537896255, "learning_rate": 2.4848754394540415e-06, "loss": 0.3935, "step": 49690 }, { "epoch": 2.1, "grad_norm": 4.763284059541512, "learning_rate": 2.483811486281073e-06, "loss": 0.3634, "step": 49695 }, { "epoch": 2.1, "grad_norm": 4.136611052150635, "learning_rate": 2.482747685657205e-06, "loss": 0.3724, "step": 49700 }, { "epoch": 2.1, "grad_norm": 4.444272159245875, "learning_rate": 2.481684037646927e-06, "loss": 0.3839, "step": 49705 }, { "epoch": 2.1, "grad_norm": 4.449399016862127, "learning_rate": 2.4806205423147284e-06, "loss": 0.368, "step": 49710 }, { "epoch": 2.1, "grad_norm": 5.908726942079969, "learning_rate": 2.479557199725087e-06, "loss": 0.393, "step": 49715 }, { "epoch": 2.1, "grad_norm": 4.240950711045852, "learning_rate": 2.478494009942467e-06, "loss": 0.3554, "step": 49720 }, { "epoch": 2.1, "grad_norm": 4.0333119426723885, "learning_rate": 2.4774309730313294e-06, "loss": 0.3765, "step": 49725 }, { "epoch": 2.1, "grad_norm": 4.146407327590279, "learning_rate": 2.4763680890561196e-06, "loss": 0.4146, "step": 49730 }, { "epoch": 2.1, "grad_norm": 3.8480734753855494, "learning_rate": 2.475305358081281e-06, "loss": 0.3645, "step": 49735 }, { "epoch": 2.11, "grad_norm": 6.657829288946752, "learning_rate": 2.4742427801712393e-06, "loss": 0.3883, "step": 49740 }, { "epoch": 2.11, "grad_norm": 4.131902751353832, "learning_rate": 2.473180355390418e-06, "loss": 0.4095, "step": 49745 }, { "epoch": 2.11, "grad_norm": 4.147268937645226, "learning_rate": 2.4721180838032296e-06, "loss": 0.3763, "step": 49750 }, { "epoch": 2.11, "grad_norm": 4.084015032056912, "learning_rate": 2.4710559654740773e-06, "loss": 0.3596, "step": 49755 }, { "epoch": 2.11, "grad_norm": 6.080179734460404, "learning_rate": 2.46999400046735e-06, "loss": 0.3781, "step": 49760 }, { "epoch": 2.11, "grad_norm": 4.020642133077635, "learning_rate": 2.468932188847435e-06, "loss": 0.4006, "step": 49765 }, { "epoch": 2.11, "grad_norm": 3.7836456282610076, "learning_rate": 2.467870530678707e-06, "loss": 0.3662, "step": 49770 }, { "epoch": 2.11, "grad_norm": 5.271451236352591, "learning_rate": 2.4668090260255285e-06, "loss": 0.3862, "step": 49775 }, { "epoch": 2.11, "grad_norm": 6.404612159231934, "learning_rate": 2.4657476749522568e-06, "loss": 0.3779, "step": 49780 }, { "epoch": 2.11, "grad_norm": 7.126695836322258, "learning_rate": 2.464686477523241e-06, "loss": 0.4025, "step": 49785 }, { "epoch": 2.11, "grad_norm": 4.2224258109326485, "learning_rate": 2.463625433802814e-06, "loss": 0.3666, "step": 49790 }, { "epoch": 2.11, "grad_norm": 4.71235947766091, "learning_rate": 2.4625645438553057e-06, "loss": 0.3843, "step": 49795 }, { "epoch": 2.11, "grad_norm": 3.7269993203341003, "learning_rate": 2.461503807745035e-06, "loss": 0.3565, "step": 49800 }, { "epoch": 2.11, "grad_norm": 4.586395355051144, "learning_rate": 2.4604432255363126e-06, "loss": 0.3468, "step": 49805 }, { "epoch": 2.11, "grad_norm": 4.763742919086334, "learning_rate": 2.4593827972934354e-06, "loss": 0.3985, "step": 49810 }, { "epoch": 2.11, "grad_norm": 5.631216144703485, "learning_rate": 2.4583225230806955e-06, "loss": 0.3819, "step": 49815 }, { "epoch": 2.11, "grad_norm": 5.316063192548243, "learning_rate": 2.457262402962376e-06, "loss": 0.3811, "step": 49820 }, { "epoch": 2.11, "grad_norm": 4.080869557647586, "learning_rate": 2.4562024370027454e-06, "loss": 0.3716, "step": 49825 }, { "epoch": 2.11, "grad_norm": 4.8456964305603085, "learning_rate": 2.4551426252660682e-06, "loss": 0.3696, "step": 49830 }, { "epoch": 2.11, "grad_norm": 4.268892212304196, "learning_rate": 2.454082967816599e-06, "loss": 0.3847, "step": 49835 }, { "epoch": 2.11, "grad_norm": 3.8704386788553897, "learning_rate": 2.4530234647185788e-06, "loss": 0.3505, "step": 49840 }, { "epoch": 2.11, "grad_norm": 4.32426826315786, "learning_rate": 2.4519641160362427e-06, "loss": 0.3984, "step": 49845 }, { "epoch": 2.11, "grad_norm": 4.027185671168084, "learning_rate": 2.4509049218338193e-06, "loss": 0.3804, "step": 49850 }, { "epoch": 2.11, "grad_norm": 4.217048525038172, "learning_rate": 2.44984588217552e-06, "loss": 0.3965, "step": 49855 }, { "epoch": 2.11, "grad_norm": 4.13185634949303, "learning_rate": 2.448786997125553e-06, "loss": 0.3649, "step": 49860 }, { "epoch": 2.11, "grad_norm": 3.9757673531192483, "learning_rate": 2.4477282667481157e-06, "loss": 0.3882, "step": 49865 }, { "epoch": 2.11, "grad_norm": 4.46467420358684, "learning_rate": 2.446669691107397e-06, "loss": 0.3924, "step": 49870 }, { "epoch": 2.11, "grad_norm": 5.2558098145137375, "learning_rate": 2.4456112702675726e-06, "loss": 0.4079, "step": 49875 }, { "epoch": 2.11, "grad_norm": 4.675542984130239, "learning_rate": 2.444553004292812e-06, "loss": 0.3686, "step": 49880 }, { "epoch": 2.11, "grad_norm": 4.974364702495152, "learning_rate": 2.443494893247278e-06, "loss": 0.3768, "step": 49885 }, { "epoch": 2.11, "grad_norm": 4.569564416705734, "learning_rate": 2.442436937195118e-06, "loss": 0.402, "step": 49890 }, { "epoch": 2.11, "grad_norm": 3.9497043939560132, "learning_rate": 2.4413791362004713e-06, "loss": 0.3743, "step": 49895 }, { "epoch": 2.11, "grad_norm": 4.249458144394923, "learning_rate": 2.44032149032747e-06, "loss": 0.3724, "step": 49900 }, { "epoch": 2.11, "grad_norm": 5.162657587320998, "learning_rate": 2.43926399964024e-06, "loss": 0.3865, "step": 49905 }, { "epoch": 2.11, "grad_norm": 4.017711903993835, "learning_rate": 2.4382066642028884e-06, "loss": 0.3867, "step": 49910 }, { "epoch": 2.11, "grad_norm": 3.97519195441277, "learning_rate": 2.4371494840795206e-06, "loss": 0.3525, "step": 49915 }, { "epoch": 2.11, "grad_norm": 4.391077735217333, "learning_rate": 2.436092459334232e-06, "loss": 0.3822, "step": 49920 }, { "epoch": 2.11, "grad_norm": 6.260823648024386, "learning_rate": 2.4350355900311044e-06, "loss": 0.3691, "step": 49925 }, { "epoch": 2.11, "grad_norm": 4.771614016617556, "learning_rate": 2.433978876234213e-06, "loss": 0.3525, "step": 49930 }, { "epoch": 2.11, "grad_norm": 4.895896132051187, "learning_rate": 2.432922318007624e-06, "loss": 0.378, "step": 49935 }, { "epoch": 2.11, "grad_norm": 4.771203482913908, "learning_rate": 2.431865915415395e-06, "loss": 0.3774, "step": 49940 }, { "epoch": 2.11, "grad_norm": 3.532206117323641, "learning_rate": 2.4308096685215694e-06, "loss": 0.3684, "step": 49945 }, { "epoch": 2.11, "grad_norm": 4.318721255751485, "learning_rate": 2.4297535773901854e-06, "loss": 0.3712, "step": 49950 }, { "epoch": 2.11, "grad_norm": 8.96153361146327, "learning_rate": 2.428697642085273e-06, "loss": 0.3716, "step": 49955 }, { "epoch": 2.11, "grad_norm": 4.444379912833598, "learning_rate": 2.427641862670847e-06, "loss": 0.3775, "step": 49960 }, { "epoch": 2.11, "grad_norm": 5.428909623541166, "learning_rate": 2.4265862392109175e-06, "loss": 0.3653, "step": 49965 }, { "epoch": 2.11, "grad_norm": 4.3561449934485825, "learning_rate": 2.425530771769486e-06, "loss": 0.3805, "step": 49970 }, { "epoch": 2.12, "grad_norm": 5.883735944730505, "learning_rate": 2.4244754604105384e-06, "loss": 0.3868, "step": 49975 }, { "epoch": 2.12, "grad_norm": 6.384482524497086, "learning_rate": 2.4234203051980577e-06, "loss": 0.3926, "step": 49980 }, { "epoch": 2.12, "grad_norm": 4.225179001660537, "learning_rate": 2.422365306196014e-06, "loss": 0.3626, "step": 49985 }, { "epoch": 2.12, "grad_norm": 5.162008239120738, "learning_rate": 2.421310463468371e-06, "loss": 0.3775, "step": 49990 }, { "epoch": 2.12, "grad_norm": 5.16745171513817, "learning_rate": 2.420255777079077e-06, "loss": 0.3818, "step": 49995 }, { "epoch": 2.12, "grad_norm": 4.503633308041489, "learning_rate": 2.4192012470920763e-06, "loss": 0.3794, "step": 50000 }, { "epoch": 2.12, "grad_norm": 4.046081256510451, "learning_rate": 2.4181468735713037e-06, "loss": 0.3868, "step": 50005 }, { "epoch": 2.12, "grad_norm": 4.6688619739946144, "learning_rate": 2.417092656580679e-06, "loss": 0.3847, "step": 50010 }, { "epoch": 2.12, "grad_norm": 7.7663393019947735, "learning_rate": 2.4160385961841188e-06, "loss": 0.3732, "step": 50015 }, { "epoch": 2.12, "grad_norm": 7.877827143175252, "learning_rate": 2.414984692445529e-06, "loss": 0.3826, "step": 50020 }, { "epoch": 2.12, "grad_norm": 4.211286575101403, "learning_rate": 2.4139309454288013e-06, "loss": 0.3855, "step": 50025 }, { "epoch": 2.12, "grad_norm": 4.9243580697673135, "learning_rate": 2.4128773551978224e-06, "loss": 0.3993, "step": 50030 }, { "epoch": 2.12, "grad_norm": 4.9718384765724535, "learning_rate": 2.4118239218164684e-06, "loss": 0.3924, "step": 50035 }, { "epoch": 2.12, "grad_norm": 4.039118369219979, "learning_rate": 2.410770645348609e-06, "loss": 0.3518, "step": 50040 }, { "epoch": 2.12, "grad_norm": 4.082315817751445, "learning_rate": 2.4097175258580964e-06, "loss": 0.3622, "step": 50045 }, { "epoch": 2.12, "grad_norm": 5.146929517895016, "learning_rate": 2.4086645634087826e-06, "loss": 0.4051, "step": 50050 }, { "epoch": 2.12, "grad_norm": 3.845863747071786, "learning_rate": 2.4076117580645004e-06, "loss": 0.3896, "step": 50055 }, { "epoch": 2.12, "grad_norm": 4.354437884313686, "learning_rate": 2.406559109889084e-06, "loss": 0.3772, "step": 50060 }, { "epoch": 2.12, "grad_norm": 3.94783565526839, "learning_rate": 2.4055066189463473e-06, "loss": 0.376, "step": 50065 }, { "epoch": 2.12, "grad_norm": 3.8024253464910562, "learning_rate": 2.4044542853001018e-06, "loss": 0.3586, "step": 50070 }, { "epoch": 2.12, "grad_norm": 4.892252759936716, "learning_rate": 2.4034021090141498e-06, "loss": 0.3747, "step": 50075 }, { "epoch": 2.12, "grad_norm": 4.301806944445829, "learning_rate": 2.4023500901522767e-06, "loss": 0.3639, "step": 50080 }, { "epoch": 2.12, "grad_norm": 4.2826572367742495, "learning_rate": 2.4012982287782667e-06, "loss": 0.3558, "step": 50085 }, { "epoch": 2.12, "grad_norm": 4.297069211754986, "learning_rate": 2.400246524955892e-06, "loss": 0.408, "step": 50090 }, { "epoch": 2.12, "grad_norm": 4.075429312802378, "learning_rate": 2.399194978748911e-06, "loss": 0.3618, "step": 50095 }, { "epoch": 2.12, "grad_norm": 5.141510757619812, "learning_rate": 2.3981435902210776e-06, "loss": 0.404, "step": 50100 }, { "epoch": 2.12, "grad_norm": 4.4452110645637335, "learning_rate": 2.397092359436134e-06, "loss": 0.3802, "step": 50105 }, { "epoch": 2.12, "grad_norm": 4.219697315086441, "learning_rate": 2.396041286457816e-06, "loss": 0.3735, "step": 50110 }, { "epoch": 2.12, "grad_norm": 3.9661014527545277, "learning_rate": 2.394990371349842e-06, "loss": 0.3714, "step": 50115 }, { "epoch": 2.12, "grad_norm": 3.6277987578383777, "learning_rate": 2.3939396141759287e-06, "loss": 0.3893, "step": 50120 }, { "epoch": 2.12, "grad_norm": 4.167748381548549, "learning_rate": 2.392889014999782e-06, "loss": 0.3742, "step": 50125 }, { "epoch": 2.12, "grad_norm": 5.597812997401016, "learning_rate": 2.391838573885093e-06, "loss": 0.3837, "step": 50130 }, { "epoch": 2.12, "grad_norm": 4.461404742437002, "learning_rate": 2.3907882908955486e-06, "loss": 0.3763, "step": 50135 }, { "epoch": 2.12, "grad_norm": 5.692538058787547, "learning_rate": 2.3897381660948265e-06, "loss": 0.3839, "step": 50140 }, { "epoch": 2.12, "grad_norm": 5.369217026815814, "learning_rate": 2.388688199546589e-06, "loss": 0.3962, "step": 50145 }, { "epoch": 2.12, "grad_norm": 4.089548058217305, "learning_rate": 2.3876383913144934e-06, "loss": 0.3924, "step": 50150 }, { "epoch": 2.12, "grad_norm": 4.172585048318543, "learning_rate": 2.3865887414621897e-06, "loss": 0.407, "step": 50155 }, { "epoch": 2.12, "grad_norm": 4.110677182727945, "learning_rate": 2.3855392500533103e-06, "loss": 0.3696, "step": 50160 }, { "epoch": 2.12, "grad_norm": 4.427670251872314, "learning_rate": 2.384489917151485e-06, "loss": 0.3987, "step": 50165 }, { "epoch": 2.12, "grad_norm": 4.506669351712094, "learning_rate": 2.383440742820332e-06, "loss": 0.4063, "step": 50170 }, { "epoch": 2.12, "grad_norm": 5.569927951381089, "learning_rate": 2.3823917271234614e-06, "loss": 0.3862, "step": 50175 }, { "epoch": 2.12, "grad_norm": 4.400852669837989, "learning_rate": 2.381342870124468e-06, "loss": 0.3584, "step": 50180 }, { "epoch": 2.12, "grad_norm": 4.193714869002925, "learning_rate": 2.3802941718869426e-06, "loss": 0.3441, "step": 50185 }, { "epoch": 2.12, "grad_norm": 6.03284537146811, "learning_rate": 2.3792456324744667e-06, "loss": 0.375, "step": 50190 }, { "epoch": 2.12, "grad_norm": 6.061948575366682, "learning_rate": 2.378197251950607e-06, "loss": 0.3722, "step": 50195 }, { "epoch": 2.12, "grad_norm": 5.704703005457088, "learning_rate": 2.377149030378925e-06, "loss": 0.354, "step": 50200 }, { "epoch": 2.12, "grad_norm": 3.715195587032702, "learning_rate": 2.3761009678229736e-06, "loss": 0.3666, "step": 50205 }, { "epoch": 2.12, "grad_norm": 4.235779860225067, "learning_rate": 2.375053064346291e-06, "loss": 0.3873, "step": 50210 }, { "epoch": 2.13, "grad_norm": 5.2137413878824574, "learning_rate": 2.3740053200124076e-06, "loss": 0.3672, "step": 50215 }, { "epoch": 2.13, "grad_norm": 3.5808093551978804, "learning_rate": 2.3729577348848464e-06, "loss": 0.3783, "step": 50220 }, { "epoch": 2.13, "grad_norm": 4.452500082790417, "learning_rate": 2.3719103090271213e-06, "loss": 0.3502, "step": 50225 }, { "epoch": 2.13, "grad_norm": 5.1752993322630045, "learning_rate": 2.3708630425027314e-06, "loss": 0.3686, "step": 50230 }, { "epoch": 2.13, "grad_norm": 3.861356559290214, "learning_rate": 2.3698159353751704e-06, "loss": 0.3771, "step": 50235 }, { "epoch": 2.13, "grad_norm": 4.556076181390272, "learning_rate": 2.3687689877079217e-06, "loss": 0.3767, "step": 50240 }, { "epoch": 2.13, "grad_norm": 4.476052008178158, "learning_rate": 2.367722199564461e-06, "loss": 0.366, "step": 50245 }, { "epoch": 2.13, "grad_norm": 7.236177133365301, "learning_rate": 2.3666755710082473e-06, "loss": 0.3688, "step": 50250 }, { "epoch": 2.13, "grad_norm": 8.376589189582377, "learning_rate": 2.3656291021027376e-06, "loss": 0.4004, "step": 50255 }, { "epoch": 2.13, "grad_norm": 6.20965374969848, "learning_rate": 2.3645827929113774e-06, "loss": 0.3895, "step": 50260 }, { "epoch": 2.13, "grad_norm": 5.001237307254817, "learning_rate": 2.3635366434975975e-06, "loss": 0.3271, "step": 50265 }, { "epoch": 2.13, "grad_norm": 4.562536140586596, "learning_rate": 2.362490653924825e-06, "loss": 0.3966, "step": 50270 }, { "epoch": 2.13, "grad_norm": 5.593750425558845, "learning_rate": 2.361444824256477e-06, "loss": 0.3866, "step": 50275 }, { "epoch": 2.13, "grad_norm": 4.133215604047289, "learning_rate": 2.3603991545559556e-06, "loss": 0.3795, "step": 50280 }, { "epoch": 2.13, "grad_norm": 4.653949473834183, "learning_rate": 2.3593536448866578e-06, "loss": 0.3706, "step": 50285 }, { "epoch": 2.13, "grad_norm": 4.157975445181617, "learning_rate": 2.3583082953119708e-06, "loss": 0.3726, "step": 50290 }, { "epoch": 2.13, "grad_norm": 4.042881334685599, "learning_rate": 2.3572631058952716e-06, "loss": 0.3481, "step": 50295 }, { "epoch": 2.13, "grad_norm": 4.167037811031089, "learning_rate": 2.356218076699925e-06, "loss": 0.3775, "step": 50300 }, { "epoch": 2.13, "grad_norm": 7.668433576834251, "learning_rate": 2.355173207789288e-06, "loss": 0.3797, "step": 50305 }, { "epoch": 2.13, "grad_norm": 3.896701667204212, "learning_rate": 2.354128499226711e-06, "loss": 0.351, "step": 50310 }, { "epoch": 2.13, "grad_norm": 3.668711073387136, "learning_rate": 2.3530839510755277e-06, "loss": 0.35, "step": 50315 }, { "epoch": 2.13, "grad_norm": 6.853118180994254, "learning_rate": 2.3520395633990674e-06, "loss": 0.3861, "step": 50320 }, { "epoch": 2.13, "grad_norm": 4.320294182639153, "learning_rate": 2.3509953362606503e-06, "loss": 0.3755, "step": 50325 }, { "epoch": 2.13, "grad_norm": 4.573031600142931, "learning_rate": 2.3499512697235814e-06, "loss": 0.3953, "step": 50330 }, { "epoch": 2.13, "grad_norm": 4.212673635948971, "learning_rate": 2.348907363851161e-06, "loss": 0.3511, "step": 50335 }, { "epoch": 2.13, "grad_norm": 4.067905488706705, "learning_rate": 2.3478636187066783e-06, "loss": 0.3819, "step": 50340 }, { "epoch": 2.13, "grad_norm": 5.609118523630749, "learning_rate": 2.346820034353414e-06, "loss": 0.3591, "step": 50345 }, { "epoch": 2.13, "grad_norm": 4.779343776010606, "learning_rate": 2.3457766108546343e-06, "loss": 0.4008, "step": 50350 }, { "epoch": 2.13, "grad_norm": 6.120065677225579, "learning_rate": 2.3447333482736e-06, "loss": 0.3653, "step": 50355 }, { "epoch": 2.13, "grad_norm": 12.66347312711895, "learning_rate": 2.3436902466735632e-06, "loss": 0.3893, "step": 50360 }, { "epoch": 2.13, "grad_norm": 3.9585936718256085, "learning_rate": 2.3426473061177606e-06, "loss": 0.3819, "step": 50365 }, { "epoch": 2.13, "grad_norm": 4.278607432923751, "learning_rate": 2.3416045266694265e-06, "loss": 0.4004, "step": 50370 }, { "epoch": 2.13, "grad_norm": 5.233860298690419, "learning_rate": 2.3405619083917773e-06, "loss": 0.4086, "step": 50375 }, { "epoch": 2.13, "grad_norm": 4.044596122486876, "learning_rate": 2.3395194513480278e-06, "loss": 0.3893, "step": 50380 }, { "epoch": 2.13, "grad_norm": 4.3305520126800126, "learning_rate": 2.338477155601375e-06, "loss": 0.4049, "step": 50385 }, { "epoch": 2.13, "grad_norm": 4.292684482287587, "learning_rate": 2.337435021215013e-06, "loss": 0.4026, "step": 50390 }, { "epoch": 2.13, "grad_norm": 3.9413607865978926, "learning_rate": 2.3363930482521253e-06, "loss": 0.3502, "step": 50395 }, { "epoch": 2.13, "grad_norm": 4.657990302429634, "learning_rate": 2.3353512367758786e-06, "loss": 0.366, "step": 50400 }, { "epoch": 2.13, "grad_norm": 4.1175086497455995, "learning_rate": 2.334309586849437e-06, "loss": 0.3646, "step": 50405 }, { "epoch": 2.13, "grad_norm": 7.08414716030964, "learning_rate": 2.3332680985359557e-06, "loss": 0.3891, "step": 50410 }, { "epoch": 2.13, "grad_norm": 3.901863339868657, "learning_rate": 2.332226771898572e-06, "loss": 0.3711, "step": 50415 }, { "epoch": 2.13, "grad_norm": 4.163917329710865, "learning_rate": 2.331185607000421e-06, "loss": 0.3752, "step": 50420 }, { "epoch": 2.13, "grad_norm": 4.435059414610699, "learning_rate": 2.330144603904626e-06, "loss": 0.3592, "step": 50425 }, { "epoch": 2.13, "grad_norm": 4.042912333737766, "learning_rate": 2.3291037626743006e-06, "loss": 0.4071, "step": 50430 }, { "epoch": 2.13, "grad_norm": 6.695729774111038, "learning_rate": 2.3280630833725452e-06, "loss": 0.3786, "step": 50435 }, { "epoch": 2.13, "grad_norm": 4.693785131950566, "learning_rate": 2.3270225660624545e-06, "loss": 0.3792, "step": 50440 }, { "epoch": 2.13, "grad_norm": 4.591554681300314, "learning_rate": 2.325982210807114e-06, "loss": 0.3509, "step": 50445 }, { "epoch": 2.14, "grad_norm": 4.269179696655977, "learning_rate": 2.3249420176695938e-06, "loss": 0.3836, "step": 50450 }, { "epoch": 2.14, "grad_norm": 4.908920454745094, "learning_rate": 2.3239019867129588e-06, "loss": 0.4186, "step": 50455 }, { "epoch": 2.14, "grad_norm": 4.286860635930135, "learning_rate": 2.322862118000266e-06, "loss": 0.4171, "step": 50460 }, { "epoch": 2.14, "grad_norm": 5.299826387909762, "learning_rate": 2.3218224115945553e-06, "loss": 0.3799, "step": 50465 }, { "epoch": 2.14, "grad_norm": 4.282772459292189, "learning_rate": 2.3207828675588627e-06, "loss": 0.3711, "step": 50470 }, { "epoch": 2.14, "grad_norm": 3.9776379064244893, "learning_rate": 2.3197434859562135e-06, "loss": 0.366, "step": 50475 }, { "epoch": 2.14, "grad_norm": 3.846682034736626, "learning_rate": 2.318704266849623e-06, "loss": 0.3854, "step": 50480 }, { "epoch": 2.14, "grad_norm": 4.700855002782642, "learning_rate": 2.317665210302093e-06, "loss": 0.3739, "step": 50485 }, { "epoch": 2.14, "grad_norm": 4.3009172930782515, "learning_rate": 2.3166263163766206e-06, "loss": 0.3961, "step": 50490 }, { "epoch": 2.14, "grad_norm": 6.079679362055096, "learning_rate": 2.3155875851361924e-06, "loss": 0.3788, "step": 50495 }, { "epoch": 2.14, "grad_norm": 6.275290053587404, "learning_rate": 2.3145490166437793e-06, "loss": 0.3711, "step": 50500 }, { "epoch": 2.14, "grad_norm": 5.038353232740384, "learning_rate": 2.3135106109623494e-06, "loss": 0.3698, "step": 50505 }, { "epoch": 2.14, "grad_norm": 7.3784742231605875, "learning_rate": 2.31247236815486e-06, "loss": 0.38, "step": 50510 }, { "epoch": 2.14, "grad_norm": 8.677001944116634, "learning_rate": 2.3114342882842523e-06, "loss": 0.4145, "step": 50515 }, { "epoch": 2.14, "grad_norm": 6.423570557876477, "learning_rate": 2.3103963714134647e-06, "loss": 0.3844, "step": 50520 }, { "epoch": 2.14, "grad_norm": 6.983621066241579, "learning_rate": 2.3093586176054223e-06, "loss": 0.3792, "step": 50525 }, { "epoch": 2.14, "grad_norm": 6.064275571815297, "learning_rate": 2.3083210269230434e-06, "loss": 0.3717, "step": 50530 }, { "epoch": 2.14, "grad_norm": 6.699126525000278, "learning_rate": 2.3072835994292312e-06, "loss": 0.3931, "step": 50535 }, { "epoch": 2.14, "grad_norm": 4.006200984998097, "learning_rate": 2.306246335186882e-06, "loss": 0.383, "step": 50540 }, { "epoch": 2.14, "grad_norm": 3.774542194307398, "learning_rate": 2.3052092342588823e-06, "loss": 0.3783, "step": 50545 }, { "epoch": 2.14, "grad_norm": 6.5839880311907875, "learning_rate": 2.304172296708111e-06, "loss": 0.3876, "step": 50550 }, { "epoch": 2.14, "grad_norm": 6.259050357289544, "learning_rate": 2.30313552259743e-06, "loss": 0.3884, "step": 50555 }, { "epoch": 2.14, "grad_norm": 3.976538532929298, "learning_rate": 2.302098911989699e-06, "loss": 0.3681, "step": 50560 }, { "epoch": 2.14, "grad_norm": 4.345438457859317, "learning_rate": 2.301062464947766e-06, "loss": 0.3793, "step": 50565 }, { "epoch": 2.14, "grad_norm": 3.9192401196155164, "learning_rate": 2.300026181534464e-06, "loss": 0.3724, "step": 50570 }, { "epoch": 2.14, "grad_norm": 4.092022787303198, "learning_rate": 2.298990061812621e-06, "loss": 0.3669, "step": 50575 }, { "epoch": 2.14, "grad_norm": 3.9344946740917663, "learning_rate": 2.2979541058450565e-06, "loss": 0.38, "step": 50580 }, { "epoch": 2.14, "grad_norm": 3.9973193778708347, "learning_rate": 2.296918313694574e-06, "loss": 0.3936, "step": 50585 }, { "epoch": 2.14, "grad_norm": 4.208261080433807, "learning_rate": 2.2958826854239714e-06, "loss": 0.3653, "step": 50590 }, { "epoch": 2.14, "grad_norm": 3.9184189305912267, "learning_rate": 2.2948472210960367e-06, "loss": 0.3411, "step": 50595 }, { "epoch": 2.14, "grad_norm": 4.52101815736275, "learning_rate": 2.293811920773548e-06, "loss": 0.3773, "step": 50600 }, { "epoch": 2.14, "grad_norm": 4.0849819119092645, "learning_rate": 2.29277678451927e-06, "loss": 0.3729, "step": 50605 }, { "epoch": 2.14, "grad_norm": 4.506825155390957, "learning_rate": 2.2917418123959604e-06, "loss": 0.3888, "step": 50610 }, { "epoch": 2.14, "grad_norm": 4.504393941969743, "learning_rate": 2.29070700446637e-06, "loss": 0.4016, "step": 50615 }, { "epoch": 2.14, "grad_norm": 4.0558706310688555, "learning_rate": 2.2896723607932315e-06, "loss": 0.3742, "step": 50620 }, { "epoch": 2.14, "grad_norm": 4.465244562518348, "learning_rate": 2.288637881439274e-06, "loss": 0.3708, "step": 50625 }, { "epoch": 2.14, "grad_norm": 4.634756276872223, "learning_rate": 2.287603566467217e-06, "loss": 0.3962, "step": 50630 }, { "epoch": 2.14, "grad_norm": 4.1301317656661025, "learning_rate": 2.286569415939764e-06, "loss": 0.371, "step": 50635 }, { "epoch": 2.14, "grad_norm": 4.582733968358318, "learning_rate": 2.285535429919615e-06, "loss": 0.3978, "step": 50640 }, { "epoch": 2.14, "grad_norm": 3.6912932808825785, "learning_rate": 2.2845016084694572e-06, "loss": 0.3909, "step": 50645 }, { "epoch": 2.14, "grad_norm": 4.3816800423906015, "learning_rate": 2.28346795165197e-06, "loss": 0.4069, "step": 50650 }, { "epoch": 2.14, "grad_norm": 4.113223028788747, "learning_rate": 2.282434459529817e-06, "loss": 0.3459, "step": 50655 }, { "epoch": 2.14, "grad_norm": 4.434120460227427, "learning_rate": 2.281401132165658e-06, "loss": 0.3679, "step": 50660 }, { "epoch": 2.14, "grad_norm": 4.066238234409624, "learning_rate": 2.280367969622142e-06, "loss": 0.3603, "step": 50665 }, { "epoch": 2.14, "grad_norm": 4.028461449172315, "learning_rate": 2.2793349719619036e-06, "loss": 0.3916, "step": 50670 }, { "epoch": 2.14, "grad_norm": 3.757003421187071, "learning_rate": 2.2783021392475717e-06, "loss": 0.3617, "step": 50675 }, { "epoch": 2.14, "grad_norm": 4.070517854659836, "learning_rate": 2.2772694715417658e-06, "loss": 0.4051, "step": 50680 }, { "epoch": 2.15, "grad_norm": 4.311564882763757, "learning_rate": 2.2762369689070903e-06, "loss": 0.367, "step": 50685 }, { "epoch": 2.15, "grad_norm": 4.3411079329766435, "learning_rate": 2.2752046314061436e-06, "loss": 0.3862, "step": 50690 }, { "epoch": 2.15, "grad_norm": 3.6180860993451187, "learning_rate": 2.274172459101517e-06, "loss": 0.4064, "step": 50695 }, { "epoch": 2.15, "grad_norm": 4.726395907339224, "learning_rate": 2.2731404520557836e-06, "loss": 0.3684, "step": 50700 }, { "epoch": 2.15, "grad_norm": 4.465517734535609, "learning_rate": 2.2721086103315117e-06, "loss": 0.4041, "step": 50705 }, { "epoch": 2.15, "grad_norm": 5.149715113016545, "learning_rate": 2.2710769339912588e-06, "loss": 0.353, "step": 50710 }, { "epoch": 2.15, "grad_norm": 3.901392965811322, "learning_rate": 2.270045423097575e-06, "loss": 0.3764, "step": 50715 }, { "epoch": 2.15, "grad_norm": 4.447604627125826, "learning_rate": 2.269014077712994e-06, "loss": 0.4183, "step": 50720 }, { "epoch": 2.15, "grad_norm": 3.8174424379871974, "learning_rate": 2.2679828979000456e-06, "loss": 0.3595, "step": 50725 }, { "epoch": 2.15, "grad_norm": 4.462026469959688, "learning_rate": 2.266951883721246e-06, "loss": 0.385, "step": 50730 }, { "epoch": 2.15, "grad_norm": 4.180264338912058, "learning_rate": 2.2659210352391055e-06, "loss": 0.378, "step": 50735 }, { "epoch": 2.15, "grad_norm": 4.881279386411326, "learning_rate": 2.264890352516117e-06, "loss": 0.372, "step": 50740 }, { "epoch": 2.15, "grad_norm": 3.8155366830853468, "learning_rate": 2.26385983561477e-06, "loss": 0.3555, "step": 50745 }, { "epoch": 2.15, "grad_norm": 4.734346289277913, "learning_rate": 2.262829484597544e-06, "loss": 0.3884, "step": 50750 }, { "epoch": 2.15, "grad_norm": 4.043472371146545, "learning_rate": 2.2617992995269013e-06, "loss": 0.3557, "step": 50755 }, { "epoch": 2.15, "grad_norm": 4.119800570632844, "learning_rate": 2.2607692804653014e-06, "loss": 0.3768, "step": 50760 }, { "epoch": 2.15, "grad_norm": 5.601288320000802, "learning_rate": 2.2597394274751932e-06, "loss": 0.3474, "step": 50765 }, { "epoch": 2.15, "grad_norm": 7.678334891168863, "learning_rate": 2.2587097406190102e-06, "loss": 0.3673, "step": 50770 }, { "epoch": 2.15, "grad_norm": 7.976398426379458, "learning_rate": 2.257680219959181e-06, "loss": 0.3924, "step": 50775 }, { "epoch": 2.15, "grad_norm": 4.53633594368111, "learning_rate": 2.256650865558122e-06, "loss": 0.3677, "step": 50780 }, { "epoch": 2.15, "grad_norm": 3.757943587039481, "learning_rate": 2.2556216774782427e-06, "loss": 0.3646, "step": 50785 }, { "epoch": 2.15, "grad_norm": 4.809714445900774, "learning_rate": 2.2545926557819344e-06, "loss": 0.3599, "step": 50790 }, { "epoch": 2.15, "grad_norm": 4.583126285015245, "learning_rate": 2.2535638005315867e-06, "loss": 0.3843, "step": 50795 }, { "epoch": 2.15, "grad_norm": 4.017375823145854, "learning_rate": 2.2525351117895783e-06, "loss": 0.3919, "step": 50800 }, { "epoch": 2.15, "grad_norm": 5.046980258206454, "learning_rate": 2.251506589618271e-06, "loss": 0.3607, "step": 50805 }, { "epoch": 2.15, "grad_norm": 7.1304231819622155, "learning_rate": 2.2504782340800234e-06, "loss": 0.3802, "step": 50810 }, { "epoch": 2.15, "grad_norm": 4.543585660689358, "learning_rate": 2.249450045237183e-06, "loss": 0.3627, "step": 50815 }, { "epoch": 2.15, "grad_norm": 5.948765733882174, "learning_rate": 2.2484220231520825e-06, "loss": 0.3711, "step": 50820 }, { "epoch": 2.15, "grad_norm": 5.788088857808289, "learning_rate": 2.24739416788705e-06, "loss": 0.3746, "step": 50825 }, { "epoch": 2.15, "grad_norm": 5.339496384859222, "learning_rate": 2.246366479504401e-06, "loss": 0.3833, "step": 50830 }, { "epoch": 2.15, "grad_norm": 4.135418584638771, "learning_rate": 2.245338958066443e-06, "loss": 0.3586, "step": 50835 }, { "epoch": 2.15, "grad_norm": 4.430434323466136, "learning_rate": 2.244311603635468e-06, "loss": 0.3588, "step": 50840 }, { "epoch": 2.15, "grad_norm": 4.216925922270115, "learning_rate": 2.243284416273763e-06, "loss": 0.376, "step": 50845 }, { "epoch": 2.15, "grad_norm": 4.076710326214254, "learning_rate": 2.2422573960436063e-06, "loss": 0.3867, "step": 50850 }, { "epoch": 2.15, "grad_norm": 4.498541750925796, "learning_rate": 2.24123054300726e-06, "loss": 0.3587, "step": 50855 }, { "epoch": 2.15, "grad_norm": 5.3990030999973095, "learning_rate": 2.240203857226978e-06, "loss": 0.3843, "step": 50860 }, { "epoch": 2.15, "grad_norm": 4.20784503874404, "learning_rate": 2.239177338765007e-06, "loss": 0.3928, "step": 50865 }, { "epoch": 2.15, "grad_norm": 4.9201851433335735, "learning_rate": 2.238150987683583e-06, "loss": 0.4107, "step": 50870 }, { "epoch": 2.15, "grad_norm": 4.360094697411207, "learning_rate": 2.237124804044928e-06, "loss": 0.3517, "step": 50875 }, { "epoch": 2.15, "grad_norm": 4.044257098536264, "learning_rate": 2.236098787911257e-06, "loss": 0.3773, "step": 50880 }, { "epoch": 2.15, "grad_norm": 4.034921453634541, "learning_rate": 2.2350729393447778e-06, "loss": 0.349, "step": 50885 }, { "epoch": 2.15, "grad_norm": 4.6596562738585545, "learning_rate": 2.23404725840768e-06, "loss": 0.3859, "step": 50890 }, { "epoch": 2.15, "grad_norm": 4.3763265822479225, "learning_rate": 2.233021745162149e-06, "loss": 0.4168, "step": 50895 }, { "epoch": 2.15, "grad_norm": 4.018084908790607, "learning_rate": 2.23199639967036e-06, "loss": 0.357, "step": 50900 }, { "epoch": 2.15, "grad_norm": 5.034819798944364, "learning_rate": 2.230971221994477e-06, "loss": 0.3469, "step": 50905 }, { "epoch": 2.15, "grad_norm": 4.575473937712187, "learning_rate": 2.229946212196651e-06, "loss": 0.362, "step": 50910 }, { "epoch": 2.15, "grad_norm": 4.347200211362484, "learning_rate": 2.228921370339026e-06, "loss": 0.364, "step": 50915 }, { "epoch": 2.16, "grad_norm": 4.121996669574978, "learning_rate": 2.2278966964837386e-06, "loss": 0.3688, "step": 50920 }, { "epoch": 2.16, "grad_norm": 4.0957359069168495, "learning_rate": 2.2268721906929063e-06, "loss": 0.38, "step": 50925 }, { "epoch": 2.16, "grad_norm": 3.775776662087176, "learning_rate": 2.225847853028645e-06, "loss": 0.358, "step": 50930 }, { "epoch": 2.16, "grad_norm": 6.002876685241673, "learning_rate": 2.224823683553059e-06, "loss": 0.3885, "step": 50935 }, { "epoch": 2.16, "grad_norm": 5.82532733256446, "learning_rate": 2.2237996823282364e-06, "loss": 0.4085, "step": 50940 }, { "epoch": 2.16, "grad_norm": 4.061653621125829, "learning_rate": 2.2227758494162617e-06, "loss": 0.3669, "step": 50945 }, { "epoch": 2.16, "grad_norm": 4.324036788469857, "learning_rate": 2.221752184879206e-06, "loss": 0.3667, "step": 50950 }, { "epoch": 2.16, "grad_norm": 6.86716899218463, "learning_rate": 2.220728688779134e-06, "loss": 0.3756, "step": 50955 }, { "epoch": 2.16, "grad_norm": 5.573622293763051, "learning_rate": 2.2197053611780933e-06, "loss": 0.3425, "step": 50960 }, { "epoch": 2.16, "grad_norm": 5.3927309024147405, "learning_rate": 2.2186822021381266e-06, "loss": 0.3745, "step": 50965 }, { "epoch": 2.16, "grad_norm": 4.738554777109576, "learning_rate": 2.2176592117212675e-06, "loss": 0.3652, "step": 50970 }, { "epoch": 2.16, "grad_norm": 4.131962176264198, "learning_rate": 2.2166363899895325e-06, "loss": 0.3824, "step": 50975 }, { "epoch": 2.16, "grad_norm": 4.606439973467614, "learning_rate": 2.2156137370049346e-06, "loss": 0.3589, "step": 50980 }, { "epoch": 2.16, "grad_norm": 4.69896633364378, "learning_rate": 2.214591252829476e-06, "loss": 0.3777, "step": 50985 }, { "epoch": 2.16, "grad_norm": 4.235984561991574, "learning_rate": 2.2135689375251436e-06, "loss": 0.369, "step": 50990 }, { "epoch": 2.16, "grad_norm": 4.812624575996112, "learning_rate": 2.212546791153918e-06, "loss": 0.3656, "step": 50995 }, { "epoch": 2.16, "grad_norm": 3.88585961869959, "learning_rate": 2.211524813777773e-06, "loss": 0.3738, "step": 51000 }, { "epoch": 2.16, "grad_norm": 4.921244715278705, "learning_rate": 2.2105030054586616e-06, "loss": 0.3916, "step": 51005 }, { "epoch": 2.16, "grad_norm": 4.2388940789529626, "learning_rate": 2.2094813662585366e-06, "loss": 0.3536, "step": 51010 }, { "epoch": 2.16, "grad_norm": 4.4243669129879954, "learning_rate": 2.208459896239337e-06, "loss": 0.3792, "step": 51015 }, { "epoch": 2.16, "grad_norm": 4.655373553583169, "learning_rate": 2.2074385954629947e-06, "loss": 0.355, "step": 51020 }, { "epoch": 2.16, "grad_norm": 3.8428372723257587, "learning_rate": 2.206417463991421e-06, "loss": 0.3686, "step": 51025 }, { "epoch": 2.16, "grad_norm": 4.845743450724909, "learning_rate": 2.2053965018865276e-06, "loss": 0.3832, "step": 51030 }, { "epoch": 2.16, "grad_norm": 4.0654617722646, "learning_rate": 2.2043757092102123e-06, "loss": 0.385, "step": 51035 }, { "epoch": 2.16, "grad_norm": 5.408820900111711, "learning_rate": 2.2033550860243648e-06, "loss": 0.3547, "step": 51040 }, { "epoch": 2.16, "grad_norm": 3.8739037114327792, "learning_rate": 2.202334632390859e-06, "loss": 0.3475, "step": 51045 }, { "epoch": 2.16, "grad_norm": 7.678318964065676, "learning_rate": 2.2013143483715637e-06, "loss": 0.3682, "step": 51050 }, { "epoch": 2.16, "grad_norm": 5.594879435767553, "learning_rate": 2.2002942340283375e-06, "loss": 0.3845, "step": 51055 }, { "epoch": 2.16, "grad_norm": 6.884627620035937, "learning_rate": 2.1992742894230227e-06, "loss": 0.3879, "step": 51060 }, { "epoch": 2.16, "grad_norm": 4.202578743256359, "learning_rate": 2.1982545146174584e-06, "loss": 0.3713, "step": 51065 }, { "epoch": 2.16, "grad_norm": 4.664514550360664, "learning_rate": 2.197234909673472e-06, "loss": 0.3505, "step": 51070 }, { "epoch": 2.16, "grad_norm": 4.122267390009706, "learning_rate": 2.196215474652875e-06, "loss": 0.3818, "step": 51075 }, { "epoch": 2.16, "grad_norm": 4.992298514568472, "learning_rate": 2.1951962096174755e-06, "loss": 0.3745, "step": 51080 }, { "epoch": 2.16, "grad_norm": 4.122598217497837, "learning_rate": 2.1941771146290675e-06, "loss": 0.3715, "step": 51085 }, { "epoch": 2.16, "grad_norm": 3.4436339058779866, "learning_rate": 2.1931581897494385e-06, "loss": 0.3582, "step": 51090 }, { "epoch": 2.16, "grad_norm": 4.039835660364556, "learning_rate": 2.1921394350403587e-06, "loss": 0.3436, "step": 51095 }, { "epoch": 2.16, "grad_norm": 4.402455971415569, "learning_rate": 2.191120850563594e-06, "loss": 0.3904, "step": 51100 }, { "epoch": 2.16, "grad_norm": 5.0670076649141595, "learning_rate": 2.1901024363808997e-06, "loss": 0.3579, "step": 51105 }, { "epoch": 2.16, "grad_norm": 3.9168415397135177, "learning_rate": 2.1890841925540165e-06, "loss": 0.3836, "step": 51110 }, { "epoch": 2.16, "grad_norm": 5.204152111030827, "learning_rate": 2.188066119144679e-06, "loss": 0.3855, "step": 51115 }, { "epoch": 2.16, "grad_norm": 4.369177487586138, "learning_rate": 2.187048216214611e-06, "loss": 0.3774, "step": 51120 }, { "epoch": 2.16, "grad_norm": 3.9763553558216485, "learning_rate": 2.186030483825523e-06, "loss": 0.3592, "step": 51125 }, { "epoch": 2.16, "grad_norm": 4.507869513798007, "learning_rate": 2.1850129220391176e-06, "loss": 0.3745, "step": 51130 }, { "epoch": 2.16, "grad_norm": 4.088523272742548, "learning_rate": 2.1839955309170862e-06, "loss": 0.3691, "step": 51135 }, { "epoch": 2.16, "grad_norm": 4.197804250986726, "learning_rate": 2.1829783105211137e-06, "loss": 0.3611, "step": 51140 }, { "epoch": 2.16, "grad_norm": 4.114975302622314, "learning_rate": 2.181961260912866e-06, "loss": 0.3696, "step": 51145 }, { "epoch": 2.16, "grad_norm": 4.20858623205499, "learning_rate": 2.1809443821540067e-06, "loss": 0.3618, "step": 51150 }, { "epoch": 2.16, "grad_norm": 4.78329479625587, "learning_rate": 2.179927674306187e-06, "loss": 0.3681, "step": 51155 }, { "epoch": 2.17, "grad_norm": 5.032861455970911, "learning_rate": 2.1789111374310445e-06, "loss": 0.3831, "step": 51160 }, { "epoch": 2.17, "grad_norm": 5.835971674965703, "learning_rate": 2.1778947715902093e-06, "loss": 0.3808, "step": 51165 }, { "epoch": 2.17, "grad_norm": 10.612648986202283, "learning_rate": 2.176878576845304e-06, "loss": 0.3745, "step": 51170 }, { "epoch": 2.17, "grad_norm": 8.823951336420272, "learning_rate": 2.175862553257932e-06, "loss": 0.3443, "step": 51175 }, { "epoch": 2.17, "grad_norm": 6.5224714912929596, "learning_rate": 2.1748467008896966e-06, "loss": 0.3724, "step": 51180 }, { "epoch": 2.17, "grad_norm": 4.434440456786207, "learning_rate": 2.1738310198021824e-06, "loss": 0.3877, "step": 51185 }, { "epoch": 2.17, "grad_norm": 4.130317387170686, "learning_rate": 2.172815510056971e-06, "loss": 0.3638, "step": 51190 }, { "epoch": 2.17, "grad_norm": 5.090067295015865, "learning_rate": 2.171800171715625e-06, "loss": 0.4122, "step": 51195 }, { "epoch": 2.17, "grad_norm": 6.258493522486268, "learning_rate": 2.170785004839704e-06, "loss": 0.3623, "step": 51200 }, { "epoch": 2.17, "grad_norm": 14.981679850780715, "learning_rate": 2.1697700094907543e-06, "loss": 0.3704, "step": 51205 }, { "epoch": 2.17, "grad_norm": 9.409403803041403, "learning_rate": 2.168755185730314e-06, "loss": 0.3731, "step": 51210 }, { "epoch": 2.17, "grad_norm": 5.4132859461817295, "learning_rate": 2.1677405336199053e-06, "loss": 0.3483, "step": 51215 }, { "epoch": 2.17, "grad_norm": 8.94575902133439, "learning_rate": 2.166726053221046e-06, "loss": 0.3691, "step": 51220 }, { "epoch": 2.17, "grad_norm": 3.8243649025030644, "learning_rate": 2.1657117445952415e-06, "loss": 0.3857, "step": 51225 }, { "epoch": 2.17, "grad_norm": 4.105115141071039, "learning_rate": 2.164697607803984e-06, "loss": 0.3868, "step": 51230 }, { "epoch": 2.17, "grad_norm": 4.023860583391261, "learning_rate": 2.1636836429087582e-06, "loss": 0.354, "step": 51235 }, { "epoch": 2.17, "grad_norm": 4.114160904750753, "learning_rate": 2.162669849971041e-06, "loss": 0.3569, "step": 51240 }, { "epoch": 2.17, "grad_norm": 4.134121297964744, "learning_rate": 2.1616562290522907e-06, "loss": 0.3624, "step": 51245 }, { "epoch": 2.17, "grad_norm": 5.93559317758632, "learning_rate": 2.160642780213963e-06, "loss": 0.3807, "step": 51250 }, { "epoch": 2.17, "grad_norm": 5.320209098793835, "learning_rate": 2.1596295035175e-06, "loss": 0.381, "step": 51255 }, { "epoch": 2.17, "grad_norm": 4.430699333227567, "learning_rate": 2.158616399024335e-06, "loss": 0.374, "step": 51260 }, { "epoch": 2.17, "grad_norm": 3.9457623870660394, "learning_rate": 2.1576034667958865e-06, "loss": 0.3673, "step": 51265 }, { "epoch": 2.17, "grad_norm": 3.8085622861495594, "learning_rate": 2.1565907068935674e-06, "loss": 0.3806, "step": 51270 }, { "epoch": 2.17, "grad_norm": 4.803521610420573, "learning_rate": 2.15557811937878e-06, "loss": 0.3944, "step": 51275 }, { "epoch": 2.17, "grad_norm": 4.124632698326712, "learning_rate": 2.154565704312911e-06, "loss": 0.3551, "step": 51280 }, { "epoch": 2.17, "grad_norm": 4.00429236004617, "learning_rate": 2.1535534617573415e-06, "loss": 0.3573, "step": 51285 }, { "epoch": 2.17, "grad_norm": 4.457261112581591, "learning_rate": 2.152541391773443e-06, "loss": 0.3747, "step": 51290 }, { "epoch": 2.17, "grad_norm": 3.899646483049162, "learning_rate": 2.1515294944225713e-06, "loss": 0.3738, "step": 51295 }, { "epoch": 2.17, "grad_norm": 4.814709750417389, "learning_rate": 2.1505177697660756e-06, "loss": 0.383, "step": 51300 }, { "epoch": 2.17, "grad_norm": 5.956381493431635, "learning_rate": 2.1495062178652953e-06, "loss": 0.3913, "step": 51305 }, { "epoch": 2.17, "grad_norm": 4.2358200718095045, "learning_rate": 2.148494838781555e-06, "loss": 0.384, "step": 51310 }, { "epoch": 2.17, "grad_norm": 4.021572536853781, "learning_rate": 2.147483632576174e-06, "loss": 0.3854, "step": 51315 }, { "epoch": 2.17, "grad_norm": 4.145993945556708, "learning_rate": 2.1464725993104574e-06, "loss": 0.3481, "step": 51320 }, { "epoch": 2.17, "grad_norm": 4.253283009564978, "learning_rate": 2.145461739045704e-06, "loss": 0.346, "step": 51325 }, { "epoch": 2.17, "grad_norm": 4.0624195530810505, "learning_rate": 2.144451051843194e-06, "loss": 0.3512, "step": 51330 }, { "epoch": 2.17, "grad_norm": 6.586653923156549, "learning_rate": 2.143440537764207e-06, "loss": 0.3729, "step": 51335 }, { "epoch": 2.17, "grad_norm": 3.7158730649238367, "learning_rate": 2.142430196870007e-06, "loss": 0.3714, "step": 51340 }, { "epoch": 2.17, "grad_norm": 4.472858670509909, "learning_rate": 2.141420029221847e-06, "loss": 0.3669, "step": 51345 }, { "epoch": 2.17, "grad_norm": 5.750748239236262, "learning_rate": 2.140410034880968e-06, "loss": 0.3583, "step": 51350 }, { "epoch": 2.17, "grad_norm": 4.497897233037699, "learning_rate": 2.1394002139086064e-06, "loss": 0.3833, "step": 51355 }, { "epoch": 2.17, "grad_norm": 3.7936110680022295, "learning_rate": 2.1383905663659847e-06, "loss": 0.3954, "step": 51360 }, { "epoch": 2.17, "grad_norm": 4.101175928254679, "learning_rate": 2.1373810923143124e-06, "loss": 0.3698, "step": 51365 }, { "epoch": 2.17, "grad_norm": 4.003769428948448, "learning_rate": 2.1363717918147926e-06, "loss": 0.373, "step": 51370 }, { "epoch": 2.17, "grad_norm": 4.394080987851491, "learning_rate": 2.135362664928617e-06, "loss": 0.3977, "step": 51375 }, { "epoch": 2.17, "grad_norm": 5.823549119322144, "learning_rate": 2.134353711716964e-06, "loss": 0.3531, "step": 51380 }, { "epoch": 2.17, "grad_norm": 4.054676946497687, "learning_rate": 2.1333449322410037e-06, "loss": 0.3506, "step": 51385 }, { "epoch": 2.17, "grad_norm": 4.88462917993755, "learning_rate": 2.1323363265618964e-06, "loss": 0.3877, "step": 51390 }, { "epoch": 2.18, "grad_norm": 4.266221650146213, "learning_rate": 2.131327894740792e-06, "loss": 0.3616, "step": 51395 }, { "epoch": 2.18, "grad_norm": 4.206376712803528, "learning_rate": 2.1303196368388267e-06, "loss": 0.3667, "step": 51400 }, { "epoch": 2.18, "grad_norm": 4.255717954921978, "learning_rate": 2.1293115529171284e-06, "loss": 0.3778, "step": 51405 }, { "epoch": 2.18, "grad_norm": 7.041399534268732, "learning_rate": 2.1283036430368165e-06, "loss": 0.3717, "step": 51410 }, { "epoch": 2.18, "grad_norm": 3.8543742200587174, "learning_rate": 2.127295907258994e-06, "loss": 0.3398, "step": 51415 }, { "epoch": 2.18, "grad_norm": 4.5582613575096165, "learning_rate": 2.1262883456447588e-06, "loss": 0.3634, "step": 51420 }, { "epoch": 2.18, "grad_norm": 4.1305461447214205, "learning_rate": 2.1252809582551983e-06, "loss": 0.3972, "step": 51425 }, { "epoch": 2.18, "grad_norm": 3.746268164953898, "learning_rate": 2.1242737451513835e-06, "loss": 0.3691, "step": 51430 }, { "epoch": 2.18, "grad_norm": 3.924606015904837, "learning_rate": 2.123266706394381e-06, "loss": 0.3735, "step": 51435 }, { "epoch": 2.18, "grad_norm": 5.236784904148432, "learning_rate": 2.122259842045244e-06, "loss": 0.374, "step": 51440 }, { "epoch": 2.18, "grad_norm": 6.921410838405361, "learning_rate": 2.1212531521650188e-06, "loss": 0.3513, "step": 51445 }, { "epoch": 2.18, "grad_norm": 4.51288889518615, "learning_rate": 2.120246636814733e-06, "loss": 0.3709, "step": 51450 }, { "epoch": 2.18, "grad_norm": 4.794559015941084, "learning_rate": 2.119240296055411e-06, "loss": 0.3642, "step": 51455 }, { "epoch": 2.18, "grad_norm": 5.17300180789268, "learning_rate": 2.1182341299480657e-06, "loss": 0.3804, "step": 51460 }, { "epoch": 2.18, "grad_norm": 4.499227865092043, "learning_rate": 2.1172281385536953e-06, "loss": 0.373, "step": 51465 }, { "epoch": 2.18, "grad_norm": 4.605381574162916, "learning_rate": 2.116222321933291e-06, "loss": 0.3648, "step": 51470 }, { "epoch": 2.18, "grad_norm": 4.078348104362696, "learning_rate": 2.1152166801478352e-06, "loss": 0.3582, "step": 51475 }, { "epoch": 2.18, "grad_norm": 4.276052317327856, "learning_rate": 2.1142112132582925e-06, "loss": 0.3834, "step": 51480 }, { "epoch": 2.18, "grad_norm": 3.8700669775199654, "learning_rate": 2.113205921325624e-06, "loss": 0.3629, "step": 51485 }, { "epoch": 2.18, "grad_norm": 4.145527827304661, "learning_rate": 2.1122008044107765e-06, "loss": 0.3657, "step": 51490 }, { "epoch": 2.18, "grad_norm": 4.200229295045798, "learning_rate": 2.1111958625746905e-06, "loss": 0.3587, "step": 51495 }, { "epoch": 2.18, "grad_norm": 4.109583220687445, "learning_rate": 2.1101910958782877e-06, "loss": 0.3503, "step": 51500 }, { "epoch": 2.18, "grad_norm": 4.0092540391647296, "learning_rate": 2.109186504382489e-06, "loss": 0.3936, "step": 51505 }, { "epoch": 2.18, "grad_norm": 3.978861233971614, "learning_rate": 2.108182088148195e-06, "loss": 0.3537, "step": 51510 }, { "epoch": 2.18, "grad_norm": 4.261005355654024, "learning_rate": 2.1071778472363045e-06, "loss": 0.3833, "step": 51515 }, { "epoch": 2.18, "grad_norm": 4.613900951337735, "learning_rate": 2.106173781707699e-06, "loss": 0.3589, "step": 51520 }, { "epoch": 2.18, "grad_norm": 3.4904199785574037, "learning_rate": 2.1051698916232527e-06, "loss": 0.3376, "step": 51525 }, { "epoch": 2.18, "grad_norm": 4.105349777872789, "learning_rate": 2.1041661770438303e-06, "loss": 0.3703, "step": 51530 }, { "epoch": 2.18, "grad_norm": 4.11274391101578, "learning_rate": 2.1031626380302813e-06, "loss": 0.3728, "step": 51535 }, { "epoch": 2.18, "grad_norm": 3.8544893034824677, "learning_rate": 2.1021592746434484e-06, "loss": 0.3814, "step": 51540 }, { "epoch": 2.18, "grad_norm": 4.195565490792251, "learning_rate": 2.1011560869441644e-06, "loss": 0.3633, "step": 51545 }, { "epoch": 2.18, "grad_norm": 3.700153869312193, "learning_rate": 2.1001530749932466e-06, "loss": 0.3774, "step": 51550 }, { "epoch": 2.18, "grad_norm": 4.054351867797535, "learning_rate": 2.099150238851506e-06, "loss": 0.3513, "step": 51555 }, { "epoch": 2.18, "grad_norm": 3.962867752971523, "learning_rate": 2.0981475785797435e-06, "loss": 0.3855, "step": 51560 }, { "epoch": 2.18, "grad_norm": 3.7072800348990236, "learning_rate": 2.097145094238744e-06, "loss": 0.3415, "step": 51565 }, { "epoch": 2.18, "grad_norm": 4.105364614842066, "learning_rate": 2.096142785889286e-06, "loss": 0.3705, "step": 51570 }, { "epoch": 2.18, "grad_norm": 4.118636932486681, "learning_rate": 2.0951406535921383e-06, "loss": 0.3635, "step": 51575 }, { "epoch": 2.18, "grad_norm": 4.043424260924906, "learning_rate": 2.0941386974080573e-06, "loss": 0.3726, "step": 51580 }, { "epoch": 2.18, "grad_norm": 4.815175529899832, "learning_rate": 2.093136917397786e-06, "loss": 0.3739, "step": 51585 }, { "epoch": 2.18, "grad_norm": 3.888037287389602, "learning_rate": 2.0921353136220614e-06, "loss": 0.3719, "step": 51590 }, { "epoch": 2.18, "grad_norm": 4.183301608276359, "learning_rate": 2.0911338861416087e-06, "loss": 0.3587, "step": 51595 }, { "epoch": 2.18, "grad_norm": 3.8824926396972694, "learning_rate": 2.090132635017138e-06, "loss": 0.3829, "step": 51600 }, { "epoch": 2.18, "grad_norm": 4.962896790676248, "learning_rate": 2.089131560309355e-06, "loss": 0.3563, "step": 51605 }, { "epoch": 2.18, "grad_norm": 4.083195457470332, "learning_rate": 2.088130662078953e-06, "loss": 0.3662, "step": 51610 }, { "epoch": 2.18, "grad_norm": 4.370295758380475, "learning_rate": 2.0871299403866096e-06, "loss": 0.4075, "step": 51615 }, { "epoch": 2.18, "grad_norm": 4.536411881897502, "learning_rate": 2.086129395292998e-06, "loss": 0.367, "step": 51620 }, { "epoch": 2.18, "grad_norm": 4.081032954210912, "learning_rate": 2.085129026858778e-06, "loss": 0.3824, "step": 51625 }, { "epoch": 2.19, "grad_norm": 4.9554990247378115, "learning_rate": 2.084128835144601e-06, "loss": 0.3952, "step": 51630 }, { "epoch": 2.19, "grad_norm": 3.9446208025323477, "learning_rate": 2.0831288202111017e-06, "loss": 0.3861, "step": 51635 }, { "epoch": 2.19, "grad_norm": 4.083792622722573, "learning_rate": 2.08212898211891e-06, "loss": 0.362, "step": 51640 }, { "epoch": 2.19, "grad_norm": 6.14534859097579, "learning_rate": 2.0811293209286453e-06, "loss": 0.3723, "step": 51645 }, { "epoch": 2.19, "grad_norm": 3.983637357190407, "learning_rate": 2.0801298367009105e-06, "loss": 0.3804, "step": 51650 }, { "epoch": 2.19, "grad_norm": 4.212778105253001, "learning_rate": 2.0791305294963027e-06, "loss": 0.4083, "step": 51655 }, { "epoch": 2.19, "grad_norm": 4.239878358727457, "learning_rate": 2.078131399375409e-06, "loss": 0.3703, "step": 51660 }, { "epoch": 2.19, "grad_norm": 4.5231199356800325, "learning_rate": 2.077132446398802e-06, "loss": 0.3615, "step": 51665 }, { "epoch": 2.19, "grad_norm": 4.995279890712481, "learning_rate": 2.076133670627043e-06, "loss": 0.3972, "step": 51670 }, { "epoch": 2.19, "grad_norm": 4.156443819449974, "learning_rate": 2.0751350721206874e-06, "loss": 0.3746, "step": 51675 }, { "epoch": 2.19, "grad_norm": 4.785049446921461, "learning_rate": 2.074136650940278e-06, "loss": 0.3823, "step": 51680 }, { "epoch": 2.19, "grad_norm": 4.21327953631408, "learning_rate": 2.0731384071463446e-06, "loss": 0.3769, "step": 51685 }, { "epoch": 2.19, "grad_norm": 4.185043372646848, "learning_rate": 2.0721403407994075e-06, "loss": 0.3533, "step": 51690 }, { "epoch": 2.19, "grad_norm": 4.084063701857908, "learning_rate": 2.071142451959977e-06, "loss": 0.3874, "step": 51695 }, { "epoch": 2.19, "grad_norm": 4.756882068372619, "learning_rate": 2.0701447406885543e-06, "loss": 0.3908, "step": 51700 }, { "epoch": 2.19, "grad_norm": 3.97596081544535, "learning_rate": 2.0691472070456242e-06, "loss": 0.3525, "step": 51705 }, { "epoch": 2.19, "grad_norm": 4.981541293731341, "learning_rate": 2.0681498510916653e-06, "loss": 0.3682, "step": 51710 }, { "epoch": 2.19, "grad_norm": 4.050526559263437, "learning_rate": 2.0671526728871473e-06, "loss": 0.3304, "step": 51715 }, { "epoch": 2.19, "grad_norm": 4.222969853161382, "learning_rate": 2.066155672492522e-06, "loss": 0.3582, "step": 51720 }, { "epoch": 2.19, "grad_norm": 4.7534704966933, "learning_rate": 2.0651588499682364e-06, "loss": 0.3639, "step": 51725 }, { "epoch": 2.19, "grad_norm": 4.187612091421166, "learning_rate": 2.064162205374726e-06, "loss": 0.3812, "step": 51730 }, { "epoch": 2.19, "grad_norm": 5.227192675561793, "learning_rate": 2.063165738772412e-06, "loss": 0.3715, "step": 51735 }, { "epoch": 2.19, "grad_norm": 3.7752378694010176, "learning_rate": 2.062169450221708e-06, "loss": 0.3637, "step": 51740 }, { "epoch": 2.19, "grad_norm": 3.725752789852882, "learning_rate": 2.0611733397830174e-06, "loss": 0.3698, "step": 51745 }, { "epoch": 2.19, "grad_norm": 4.4193213644975735, "learning_rate": 2.0601774075167317e-06, "loss": 0.3795, "step": 51750 }, { "epoch": 2.19, "grad_norm": 3.990852042380881, "learning_rate": 2.059181653483229e-06, "loss": 0.3555, "step": 51755 }, { "epoch": 2.19, "grad_norm": 3.869377026977483, "learning_rate": 2.05818607774288e-06, "loss": 0.3612, "step": 51760 }, { "epoch": 2.19, "grad_norm": 5.236327299799827, "learning_rate": 2.0571906803560453e-06, "loss": 0.3494, "step": 51765 }, { "epoch": 2.19, "grad_norm": 5.026272768522512, "learning_rate": 2.0561954613830696e-06, "loss": 0.359, "step": 51770 }, { "epoch": 2.19, "grad_norm": 3.9085209536453, "learning_rate": 2.055200420884292e-06, "loss": 0.3829, "step": 51775 }, { "epoch": 2.19, "grad_norm": 3.9944548526479524, "learning_rate": 2.0542055589200406e-06, "loss": 0.3728, "step": 51780 }, { "epoch": 2.19, "grad_norm": 4.447119160316467, "learning_rate": 2.0532108755506263e-06, "loss": 0.3623, "step": 51785 }, { "epoch": 2.19, "grad_norm": 4.659615341526252, "learning_rate": 2.0522163708363573e-06, "loss": 0.3822, "step": 51790 }, { "epoch": 2.19, "grad_norm": 4.1250720666244, "learning_rate": 2.051222044837527e-06, "loss": 0.364, "step": 51795 }, { "epoch": 2.19, "grad_norm": 5.188667157657558, "learning_rate": 2.0502278976144197e-06, "loss": 0.3905, "step": 51800 }, { "epoch": 2.19, "grad_norm": 5.084043544631529, "learning_rate": 2.0492339292273046e-06, "loss": 0.4074, "step": 51805 }, { "epoch": 2.19, "grad_norm": 8.513666690445548, "learning_rate": 2.0482401397364445e-06, "loss": 0.3927, "step": 51810 }, { "epoch": 2.19, "grad_norm": 4.088418700873688, "learning_rate": 2.0472465292020917e-06, "loss": 0.3549, "step": 51815 }, { "epoch": 2.19, "grad_norm": 4.95418219143137, "learning_rate": 2.046253097684482e-06, "loss": 0.3718, "step": 51820 }, { "epoch": 2.19, "grad_norm": 4.243837819608366, "learning_rate": 2.0452598452438473e-06, "loss": 0.3715, "step": 51825 }, { "epoch": 2.19, "grad_norm": 4.110171573977456, "learning_rate": 2.044266771940406e-06, "loss": 0.3808, "step": 51830 }, { "epoch": 2.19, "grad_norm": 4.120271922361783, "learning_rate": 2.0432738778343634e-06, "loss": 0.3843, "step": 51835 }, { "epoch": 2.19, "grad_norm": 7.992287059383662, "learning_rate": 2.042281162985915e-06, "loss": 0.365, "step": 51840 }, { "epoch": 2.19, "grad_norm": 4.114072674781159, "learning_rate": 2.041288627455248e-06, "loss": 0.3639, "step": 51845 }, { "epoch": 2.19, "grad_norm": 3.8843517415091005, "learning_rate": 2.0402962713025376e-06, "loss": 0.3706, "step": 51850 }, { "epoch": 2.19, "grad_norm": 4.735312003394535, "learning_rate": 2.0393040945879446e-06, "loss": 0.3848, "step": 51855 }, { "epoch": 2.19, "grad_norm": 3.826196607110661, "learning_rate": 2.038312097371623e-06, "loss": 0.3639, "step": 51860 }, { "epoch": 2.2, "grad_norm": 4.204349597294005, "learning_rate": 2.0373202797137167e-06, "loss": 0.3785, "step": 51865 }, { "epoch": 2.2, "grad_norm": 3.8561300614769127, "learning_rate": 2.0363286416743533e-06, "loss": 0.3535, "step": 51870 }, { "epoch": 2.2, "grad_norm": 4.165358976097341, "learning_rate": 2.0353371833136547e-06, "loss": 0.3773, "step": 51875 }, { "epoch": 2.2, "grad_norm": 4.0839575592203285, "learning_rate": 2.03434590469173e-06, "loss": 0.4007, "step": 51880 }, { "epoch": 2.2, "grad_norm": 3.935437244851414, "learning_rate": 2.033354805868679e-06, "loss": 0.3479, "step": 51885 }, { "epoch": 2.2, "grad_norm": 4.756361962332113, "learning_rate": 2.0323638869045866e-06, "loss": 0.3827, "step": 51890 }, { "epoch": 2.2, "grad_norm": 4.4512864180963705, "learning_rate": 2.0313731478595295e-06, "loss": 0.3841, "step": 51895 }, { "epoch": 2.2, "grad_norm": 4.270440806479402, "learning_rate": 2.0303825887935763e-06, "loss": 0.3587, "step": 51900 }, { "epoch": 2.2, "grad_norm": 3.721564408715685, "learning_rate": 2.0293922097667774e-06, "loss": 0.3632, "step": 51905 }, { "epoch": 2.2, "grad_norm": 4.474115041100587, "learning_rate": 2.0284020108391782e-06, "loss": 0.3851, "step": 51910 }, { "epoch": 2.2, "grad_norm": 5.0845033704801565, "learning_rate": 2.0274119920708146e-06, "loss": 0.3777, "step": 51915 }, { "epoch": 2.2, "grad_norm": 3.7185346015757257, "learning_rate": 2.0264221535217032e-06, "loss": 0.3654, "step": 51920 }, { "epoch": 2.2, "grad_norm": 4.454504208450664, "learning_rate": 2.0254324952518584e-06, "loss": 0.3672, "step": 51925 }, { "epoch": 2.2, "grad_norm": 4.7530132158441925, "learning_rate": 2.0244430173212793e-06, "loss": 0.3425, "step": 51930 }, { "epoch": 2.2, "grad_norm": 4.063953293364503, "learning_rate": 2.0234537197899575e-06, "loss": 0.3606, "step": 51935 }, { "epoch": 2.2, "grad_norm": 4.1000237722405, "learning_rate": 2.022464602717866e-06, "loss": 0.3486, "step": 51940 }, { "epoch": 2.2, "grad_norm": 4.412934234675003, "learning_rate": 2.021475666164976e-06, "loss": 0.3774, "step": 51945 }, { "epoch": 2.2, "grad_norm": 5.134408646621687, "learning_rate": 2.0204869101912445e-06, "loss": 0.341, "step": 51950 }, { "epoch": 2.2, "grad_norm": 6.4598563774912305, "learning_rate": 2.0194983348566133e-06, "loss": 0.3673, "step": 51955 }, { "epoch": 2.2, "grad_norm": 3.9002987481566875, "learning_rate": 2.0185099402210183e-06, "loss": 0.3573, "step": 51960 }, { "epoch": 2.2, "grad_norm": 6.175541955717893, "learning_rate": 2.017521726344386e-06, "loss": 0.3904, "step": 51965 }, { "epoch": 2.2, "grad_norm": 6.6204911772146, "learning_rate": 2.0165336932866235e-06, "loss": 0.3711, "step": 51970 }, { "epoch": 2.2, "grad_norm": 4.579069146498737, "learning_rate": 2.0155458411076356e-06, "loss": 0.3619, "step": 51975 }, { "epoch": 2.2, "grad_norm": 6.7153989030271255, "learning_rate": 2.014558169867312e-06, "loss": 0.3554, "step": 51980 }, { "epoch": 2.2, "grad_norm": 4.013134493082191, "learning_rate": 2.013570679625535e-06, "loss": 0.3647, "step": 51985 }, { "epoch": 2.2, "grad_norm": 4.352710991603538, "learning_rate": 2.0125833704421703e-06, "loss": 0.385, "step": 51990 }, { "epoch": 2.2, "grad_norm": 4.5270311850192515, "learning_rate": 2.011596242377075e-06, "loss": 0.3601, "step": 51995 }, { "epoch": 2.2, "grad_norm": 6.137427258235416, "learning_rate": 2.0106092954900965e-06, "loss": 0.3773, "step": 52000 }, { "epoch": 2.2, "grad_norm": 5.676219143585936, "learning_rate": 2.0096225298410725e-06, "loss": 0.3668, "step": 52005 }, { "epoch": 2.2, "grad_norm": 5.1243149611227405, "learning_rate": 2.0086359454898247e-06, "loss": 0.3847, "step": 52010 }, { "epoch": 2.2, "grad_norm": 3.6235355890623606, "learning_rate": 2.0076495424961677e-06, "loss": 0.3746, "step": 52015 }, { "epoch": 2.2, "grad_norm": 4.012469569119401, "learning_rate": 2.006663320919907e-06, "loss": 0.3769, "step": 52020 }, { "epoch": 2.2, "grad_norm": 5.044297928019615, "learning_rate": 2.0056772808208303e-06, "loss": 0.3825, "step": 52025 }, { "epoch": 2.2, "grad_norm": 4.015911371663097, "learning_rate": 2.0046914222587195e-06, "loss": 0.3829, "step": 52030 }, { "epoch": 2.2, "grad_norm": 3.991488613149469, "learning_rate": 2.0037057452933465e-06, "loss": 0.3796, "step": 52035 }, { "epoch": 2.2, "grad_norm": 3.6813101717331835, "learning_rate": 2.0027202499844666e-06, "loss": 0.3514, "step": 52040 }, { "epoch": 2.2, "grad_norm": 4.919994615143753, "learning_rate": 2.0017349363918297e-06, "loss": 0.3446, "step": 52045 }, { "epoch": 2.2, "grad_norm": 4.889472870652354, "learning_rate": 2.000749804575171e-06, "loss": 0.347, "step": 52050 }, { "epoch": 2.2, "grad_norm": 4.071881291099214, "learning_rate": 1.99976485459422e-06, "loss": 0.3852, "step": 52055 }, { "epoch": 2.2, "grad_norm": 4.013320510136518, "learning_rate": 1.998780086508686e-06, "loss": 0.3703, "step": 52060 }, { "epoch": 2.2, "grad_norm": 4.030597314352833, "learning_rate": 1.9977955003782747e-06, "loss": 0.3894, "step": 52065 }, { "epoch": 2.2, "grad_norm": 5.834619976275307, "learning_rate": 1.9968110962626807e-06, "loss": 0.3567, "step": 52070 }, { "epoch": 2.2, "grad_norm": 4.362719555751729, "learning_rate": 1.9958268742215824e-06, "loss": 0.3876, "step": 52075 }, { "epoch": 2.2, "grad_norm": 4.44066083565376, "learning_rate": 1.9948428343146512e-06, "loss": 0.3733, "step": 52080 }, { "epoch": 2.2, "grad_norm": 4.995028036302467, "learning_rate": 1.993858976601549e-06, "loss": 0.369, "step": 52085 }, { "epoch": 2.2, "grad_norm": 3.906914702349831, "learning_rate": 1.9928753011419205e-06, "loss": 0.3622, "step": 52090 }, { "epoch": 2.2, "grad_norm": 5.093732474817955, "learning_rate": 1.9918918079954047e-06, "loss": 0.3404, "step": 52095 }, { "epoch": 2.2, "grad_norm": 4.741861705574664, "learning_rate": 1.990908497221628e-06, "loss": 0.3388, "step": 52100 }, { "epoch": 2.21, "grad_norm": 3.782002767313741, "learning_rate": 1.9899253688802073e-06, "loss": 0.3953, "step": 52105 }, { "epoch": 2.21, "grad_norm": 5.10076632090282, "learning_rate": 1.988942423030743e-06, "loss": 0.3632, "step": 52110 }, { "epoch": 2.21, "grad_norm": 4.474426153669389, "learning_rate": 1.9879596597328306e-06, "loss": 0.3667, "step": 52115 }, { "epoch": 2.21, "grad_norm": 4.359230629121613, "learning_rate": 1.986977079046053e-06, "loss": 0.3622, "step": 52120 }, { "epoch": 2.21, "grad_norm": 4.275009392004105, "learning_rate": 1.9859946810299784e-06, "loss": 0.3973, "step": 52125 }, { "epoch": 2.21, "grad_norm": 4.749293205796794, "learning_rate": 1.9850124657441683e-06, "loss": 0.3339, "step": 52130 }, { "epoch": 2.21, "grad_norm": 3.9336687103217254, "learning_rate": 1.984030433248173e-06, "loss": 0.3668, "step": 52135 }, { "epoch": 2.21, "grad_norm": 6.19449399804296, "learning_rate": 1.9830485836015273e-06, "loss": 0.3712, "step": 52140 }, { "epoch": 2.21, "grad_norm": 5.617219363891496, "learning_rate": 1.982066916863759e-06, "loss": 0.3695, "step": 52145 }, { "epoch": 2.21, "grad_norm": 5.0899390001602205, "learning_rate": 1.9810854330943857e-06, "loss": 0.3569, "step": 52150 }, { "epoch": 2.21, "grad_norm": 3.7095346688128936, "learning_rate": 1.9801041323529104e-06, "loss": 0.3636, "step": 52155 }, { "epoch": 2.21, "grad_norm": 3.9853679060097043, "learning_rate": 1.9791230146988243e-06, "loss": 0.3669, "step": 52160 }, { "epoch": 2.21, "grad_norm": 3.96138389585488, "learning_rate": 1.978142080191611e-06, "loss": 0.3826, "step": 52165 }, { "epoch": 2.21, "grad_norm": 3.747042031105779, "learning_rate": 1.9771613288907447e-06, "loss": 0.3809, "step": 52170 }, { "epoch": 2.21, "grad_norm": 4.046840603569988, "learning_rate": 1.976180760855681e-06, "loss": 0.3893, "step": 52175 }, { "epoch": 2.21, "grad_norm": 4.8691562524940775, "learning_rate": 1.9752003761458703e-06, "loss": 0.3622, "step": 52180 }, { "epoch": 2.21, "grad_norm": 5.062471620183667, "learning_rate": 1.974220174820752e-06, "loss": 0.3564, "step": 52185 }, { "epoch": 2.21, "grad_norm": 4.025426498823294, "learning_rate": 1.9732401569397526e-06, "loss": 0.3632, "step": 52190 }, { "epoch": 2.21, "grad_norm": 3.9743812865491153, "learning_rate": 1.9722603225622855e-06, "loss": 0.3693, "step": 52195 }, { "epoch": 2.21, "grad_norm": 4.068860289679473, "learning_rate": 1.971280671747756e-06, "loss": 0.3713, "step": 52200 }, { "epoch": 2.21, "grad_norm": 3.679569382675868, "learning_rate": 1.97030120455556e-06, "loss": 0.3657, "step": 52205 }, { "epoch": 2.21, "grad_norm": 3.949474841144683, "learning_rate": 1.9693219210450763e-06, "loss": 0.3496, "step": 52210 }, { "epoch": 2.21, "grad_norm": 4.2565806285270265, "learning_rate": 1.968342821275677e-06, "loss": 0.3591, "step": 52215 }, { "epoch": 2.21, "grad_norm": 4.446382583126833, "learning_rate": 1.967363905306725e-06, "loss": 0.359, "step": 52220 }, { "epoch": 2.21, "grad_norm": 3.9600743225860504, "learning_rate": 1.9663851731975635e-06, "loss": 0.3688, "step": 52225 }, { "epoch": 2.21, "grad_norm": 4.3558852888730355, "learning_rate": 1.9654066250075344e-06, "loss": 0.3674, "step": 52230 }, { "epoch": 2.21, "grad_norm": 4.248378836482522, "learning_rate": 1.9644282607959617e-06, "loss": 0.3731, "step": 52235 }, { "epoch": 2.21, "grad_norm": 4.339411432024546, "learning_rate": 1.9634500806221644e-06, "loss": 0.3641, "step": 52240 }, { "epoch": 2.21, "grad_norm": 4.419979276318935, "learning_rate": 1.9624720845454424e-06, "loss": 0.3585, "step": 52245 }, { "epoch": 2.21, "grad_norm": 4.087273234751124, "learning_rate": 1.9614942726250906e-06, "loss": 0.3808, "step": 52250 }, { "epoch": 2.21, "grad_norm": 4.006498356702361, "learning_rate": 1.9605166449203927e-06, "loss": 0.3615, "step": 52255 }, { "epoch": 2.21, "grad_norm": 3.899700520635498, "learning_rate": 1.9595392014906157e-06, "loss": 0.3629, "step": 52260 }, { "epoch": 2.21, "grad_norm": 4.0693599621914895, "learning_rate": 1.958561942395021e-06, "loss": 0.3893, "step": 52265 }, { "epoch": 2.21, "grad_norm": 3.824579001211704, "learning_rate": 1.957584867692859e-06, "loss": 0.3862, "step": 52270 }, { "epoch": 2.21, "grad_norm": 4.718757857068727, "learning_rate": 1.956607977443362e-06, "loss": 0.3705, "step": 52275 }, { "epoch": 2.21, "grad_norm": 7.00674977584657, "learning_rate": 1.9556312717057597e-06, "loss": 0.3662, "step": 52280 }, { "epoch": 2.21, "grad_norm": 7.961240252252757, "learning_rate": 1.9546547505392653e-06, "loss": 0.3709, "step": 52285 }, { "epoch": 2.21, "grad_norm": 4.3828703940668365, "learning_rate": 1.953678414003085e-06, "loss": 0.3584, "step": 52290 }, { "epoch": 2.21, "grad_norm": 5.086542332194679, "learning_rate": 1.9527022621564075e-06, "loss": 0.3574, "step": 52295 }, { "epoch": 2.21, "grad_norm": 6.943109211727939, "learning_rate": 1.9517262950584153e-06, "loss": 0.3669, "step": 52300 }, { "epoch": 2.21, "grad_norm": 5.714653688908005, "learning_rate": 1.9507505127682814e-06, "loss": 0.3804, "step": 52305 }, { "epoch": 2.21, "grad_norm": 3.7338210356256787, "learning_rate": 1.9497749153451597e-06, "loss": 0.3616, "step": 52310 }, { "epoch": 2.21, "grad_norm": 4.276891112505889, "learning_rate": 1.9487995028482023e-06, "loss": 0.3457, "step": 52315 }, { "epoch": 2.21, "grad_norm": 4.179078575684295, "learning_rate": 1.9478242753365413e-06, "loss": 0.3973, "step": 52320 }, { "epoch": 2.21, "grad_norm": 3.823555683691143, "learning_rate": 1.9468492328693057e-06, "loss": 0.3654, "step": 52325 }, { "epoch": 2.21, "grad_norm": 3.886830111675473, "learning_rate": 1.945874375505606e-06, "loss": 0.3463, "step": 52330 }, { "epoch": 2.21, "grad_norm": 4.678463769405813, "learning_rate": 1.944899703304547e-06, "loss": 0.3605, "step": 52335 }, { "epoch": 2.22, "grad_norm": 4.314466737027587, "learning_rate": 1.9439252163252214e-06, "loss": 0.3504, "step": 52340 }, { "epoch": 2.22, "grad_norm": 4.158457508563852, "learning_rate": 1.942950914626707e-06, "loss": 0.387, "step": 52345 }, { "epoch": 2.22, "grad_norm": 4.052021047208834, "learning_rate": 1.9419767982680725e-06, "loss": 0.3497, "step": 52350 }, { "epoch": 2.22, "grad_norm": 4.191669331438492, "learning_rate": 1.941002867308378e-06, "loss": 0.3601, "step": 52355 }, { "epoch": 2.22, "grad_norm": 4.289202422339016, "learning_rate": 1.9400291218066703e-06, "loss": 0.3631, "step": 52360 }, { "epoch": 2.22, "grad_norm": 4.163567958282461, "learning_rate": 1.9390555618219818e-06, "loss": 0.3589, "step": 52365 }, { "epoch": 2.22, "grad_norm": 4.764923268161796, "learning_rate": 1.938082187413339e-06, "loss": 0.3346, "step": 52370 }, { "epoch": 2.22, "grad_norm": 6.141513764941518, "learning_rate": 1.937108998639755e-06, "loss": 0.3725, "step": 52375 }, { "epoch": 2.22, "grad_norm": 8.46875793632904, "learning_rate": 1.9361359955602287e-06, "loss": 0.3733, "step": 52380 }, { "epoch": 2.22, "grad_norm": 4.904148435574873, "learning_rate": 1.935163178233752e-06, "loss": 0.3613, "step": 52385 }, { "epoch": 2.22, "grad_norm": 7.285022073967542, "learning_rate": 1.934190546719306e-06, "loss": 0.3497, "step": 52390 }, { "epoch": 2.22, "grad_norm": 5.109518472245119, "learning_rate": 1.933218101075855e-06, "loss": 0.3472, "step": 52395 }, { "epoch": 2.22, "grad_norm": 4.113303233106136, "learning_rate": 1.932245841362357e-06, "loss": 0.3752, "step": 52400 }, { "epoch": 2.22, "grad_norm": 3.8783738037222895, "learning_rate": 1.931273767637757e-06, "loss": 0.3413, "step": 52405 }, { "epoch": 2.22, "grad_norm": 4.477465744155155, "learning_rate": 1.930301879960991e-06, "loss": 0.3575, "step": 52410 }, { "epoch": 2.22, "grad_norm": 4.476051325252788, "learning_rate": 1.9293301783909784e-06, "loss": 0.3685, "step": 52415 }, { "epoch": 2.22, "grad_norm": 4.391132174108271, "learning_rate": 1.928358662986632e-06, "loss": 0.3618, "step": 52420 }, { "epoch": 2.22, "grad_norm": 4.545256176396989, "learning_rate": 1.927387333806854e-06, "loss": 0.3623, "step": 52425 }, { "epoch": 2.22, "grad_norm": 5.175017610564468, "learning_rate": 1.926416190910529e-06, "loss": 0.3542, "step": 52430 }, { "epoch": 2.22, "grad_norm": 4.123149654600157, "learning_rate": 1.925445234356537e-06, "loss": 0.3397, "step": 52435 }, { "epoch": 2.22, "grad_norm": 4.025658209469571, "learning_rate": 1.9244744642037462e-06, "loss": 0.3607, "step": 52440 }, { "epoch": 2.22, "grad_norm": 5.551473052274857, "learning_rate": 1.923503880511007e-06, "loss": 0.4065, "step": 52445 }, { "epoch": 2.22, "grad_norm": 3.9309635901021744, "learning_rate": 1.9225334833371655e-06, "loss": 0.3594, "step": 52450 }, { "epoch": 2.22, "grad_norm": 4.325724890301639, "learning_rate": 1.921563272741056e-06, "loss": 0.3896, "step": 52455 }, { "epoch": 2.22, "grad_norm": 3.818908116741205, "learning_rate": 1.9205932487814954e-06, "loss": 0.3511, "step": 52460 }, { "epoch": 2.22, "grad_norm": 4.583659448863868, "learning_rate": 1.919623411517296e-06, "loss": 0.3845, "step": 52465 }, { "epoch": 2.22, "grad_norm": 4.349346092745814, "learning_rate": 1.9186537610072555e-06, "loss": 0.362, "step": 52470 }, { "epoch": 2.22, "grad_norm": 4.080288114784697, "learning_rate": 1.917684297310164e-06, "loss": 0.3517, "step": 52475 }, { "epoch": 2.22, "grad_norm": 5.598254865537337, "learning_rate": 1.9167150204847913e-06, "loss": 0.355, "step": 52480 }, { "epoch": 2.22, "grad_norm": 4.15440874555474, "learning_rate": 1.915745930589904e-06, "loss": 0.3446, "step": 52485 }, { "epoch": 2.22, "grad_norm": 3.948977984371894, "learning_rate": 1.914777027684257e-06, "loss": 0.3546, "step": 52490 }, { "epoch": 2.22, "grad_norm": 3.7714640450693815, "learning_rate": 1.913808311826592e-06, "loss": 0.3823, "step": 52495 }, { "epoch": 2.22, "grad_norm": 4.237498032240751, "learning_rate": 1.9128397830756368e-06, "loss": 0.3623, "step": 52500 }, { "epoch": 2.22, "grad_norm": 4.289082304931002, "learning_rate": 1.9118714414901122e-06, "loss": 0.3836, "step": 52505 }, { "epoch": 2.22, "grad_norm": 4.078741156773504, "learning_rate": 1.9109032871287274e-06, "loss": 0.342, "step": 52510 }, { "epoch": 2.22, "grad_norm": 4.552989142599369, "learning_rate": 1.909935320050175e-06, "loss": 0.3742, "step": 52515 }, { "epoch": 2.22, "grad_norm": 4.98392139774444, "learning_rate": 1.908967540313143e-06, "loss": 0.371, "step": 52520 }, { "epoch": 2.22, "grad_norm": 3.86080492200221, "learning_rate": 1.907999947976305e-06, "loss": 0.3549, "step": 52525 }, { "epoch": 2.22, "grad_norm": 5.007939545921851, "learning_rate": 1.9070325430983212e-06, "loss": 0.3861, "step": 52530 }, { "epoch": 2.22, "grad_norm": 6.767143685456613, "learning_rate": 1.9060653257378432e-06, "loss": 0.3664, "step": 52535 }, { "epoch": 2.22, "grad_norm": 9.70787830221513, "learning_rate": 1.9050982959535114e-06, "loss": 0.3548, "step": 52540 }, { "epoch": 2.22, "grad_norm": 5.398269057966513, "learning_rate": 1.9041314538039545e-06, "loss": 0.3629, "step": 52545 }, { "epoch": 2.22, "grad_norm": 4.047948213816526, "learning_rate": 1.9031647993477874e-06, "loss": 0.375, "step": 52550 }, { "epoch": 2.22, "grad_norm": 4.492654768160743, "learning_rate": 1.9021983326436166e-06, "loss": 0.372, "step": 52555 }, { "epoch": 2.22, "grad_norm": 4.096637076860877, "learning_rate": 1.9012320537500373e-06, "loss": 0.3885, "step": 52560 }, { "epoch": 2.22, "grad_norm": 4.863173821493066, "learning_rate": 1.9002659627256292e-06, "loss": 0.3618, "step": 52565 }, { "epoch": 2.22, "grad_norm": 3.8221833842937722, "learning_rate": 1.899300059628965e-06, "loss": 0.3713, "step": 52570 }, { "epoch": 2.23, "grad_norm": 3.6380844127172853, "learning_rate": 1.898334344518607e-06, "loss": 0.3418, "step": 52575 }, { "epoch": 2.23, "grad_norm": 4.377294278632141, "learning_rate": 1.8973688174530996e-06, "loss": 0.3504, "step": 52580 }, { "epoch": 2.23, "grad_norm": 3.8239141479937873, "learning_rate": 1.8964034784909819e-06, "loss": 0.3501, "step": 52585 }, { "epoch": 2.23, "grad_norm": 3.877907379924693, "learning_rate": 1.895438327690779e-06, "loss": 0.371, "step": 52590 }, { "epoch": 2.23, "grad_norm": 3.803271010009943, "learning_rate": 1.894473365111007e-06, "loss": 0.3533, "step": 52595 }, { "epoch": 2.23, "grad_norm": 3.866671104008564, "learning_rate": 1.8935085908101658e-06, "loss": 0.3488, "step": 52600 }, { "epoch": 2.23, "grad_norm": 4.021100002240936, "learning_rate": 1.8925440048467486e-06, "loss": 0.3724, "step": 52605 }, { "epoch": 2.23, "grad_norm": 4.167187498662469, "learning_rate": 1.8915796072792363e-06, "loss": 0.3599, "step": 52610 }, { "epoch": 2.23, "grad_norm": 4.562233497884677, "learning_rate": 1.8906153981660946e-06, "loss": 0.376, "step": 52615 }, { "epoch": 2.23, "grad_norm": 3.947660863818793, "learning_rate": 1.8896513775657826e-06, "loss": 0.3504, "step": 52620 }, { "epoch": 2.23, "grad_norm": 4.128357593299075, "learning_rate": 1.888687545536747e-06, "loss": 0.3467, "step": 52625 }, { "epoch": 2.23, "grad_norm": 4.928489975796033, "learning_rate": 1.8877239021374195e-06, "loss": 0.3698, "step": 52630 }, { "epoch": 2.23, "grad_norm": 3.7889570286838437, "learning_rate": 1.8867604474262247e-06, "loss": 0.3717, "step": 52635 }, { "epoch": 2.23, "grad_norm": 3.822087568848517, "learning_rate": 1.8857971814615744e-06, "loss": 0.3613, "step": 52640 }, { "epoch": 2.23, "grad_norm": 4.288559002450357, "learning_rate": 1.8848341043018685e-06, "loss": 0.3748, "step": 52645 }, { "epoch": 2.23, "grad_norm": 4.43144202621119, "learning_rate": 1.8838712160054929e-06, "loss": 0.3811, "step": 52650 }, { "epoch": 2.23, "grad_norm": 4.536662797303458, "learning_rate": 1.8829085166308265e-06, "loss": 0.3725, "step": 52655 }, { "epoch": 2.23, "grad_norm": 4.048189720593763, "learning_rate": 1.881946006236235e-06, "loss": 0.3602, "step": 52660 }, { "epoch": 2.23, "grad_norm": 4.693163203436942, "learning_rate": 1.8809836848800744e-06, "loss": 0.3797, "step": 52665 }, { "epoch": 2.23, "grad_norm": 4.59335958281409, "learning_rate": 1.8800215526206844e-06, "loss": 0.3682, "step": 52670 }, { "epoch": 2.23, "grad_norm": 3.5897458105273437, "learning_rate": 1.879059609516397e-06, "loss": 0.3652, "step": 52675 }, { "epoch": 2.23, "grad_norm": 4.279809588318355, "learning_rate": 1.878097855625534e-06, "loss": 0.3526, "step": 52680 }, { "epoch": 2.23, "grad_norm": 3.855734402805208, "learning_rate": 1.8771362910064006e-06, "loss": 0.3832, "step": 52685 }, { "epoch": 2.23, "grad_norm": 3.907301637185064, "learning_rate": 1.8761749157172953e-06, "loss": 0.362, "step": 52690 }, { "epoch": 2.23, "grad_norm": 5.213336188221828, "learning_rate": 1.8752137298165052e-06, "loss": 0.3691, "step": 52695 }, { "epoch": 2.23, "grad_norm": 4.269830686902421, "learning_rate": 1.8742527333623e-06, "loss": 0.3613, "step": 52700 }, { "epoch": 2.23, "grad_norm": 4.033303111413465, "learning_rate": 1.8732919264129451e-06, "loss": 0.3572, "step": 52705 }, { "epoch": 2.23, "grad_norm": 4.1794721454088535, "learning_rate": 1.872331309026692e-06, "loss": 0.3565, "step": 52710 }, { "epoch": 2.23, "grad_norm": 3.933123449945412, "learning_rate": 1.871370881261777e-06, "loss": 0.3746, "step": 52715 }, { "epoch": 2.23, "grad_norm": 3.8003110831257105, "learning_rate": 1.8704106431764296e-06, "loss": 0.3603, "step": 52720 }, { "epoch": 2.23, "grad_norm": 4.212211974765563, "learning_rate": 1.8694505948288673e-06, "loss": 0.3612, "step": 52725 }, { "epoch": 2.23, "grad_norm": 4.298843631015071, "learning_rate": 1.8684907362772947e-06, "loss": 0.3493, "step": 52730 }, { "epoch": 2.23, "grad_norm": 4.191727846833544, "learning_rate": 1.8675310675799036e-06, "loss": 0.3454, "step": 52735 }, { "epoch": 2.23, "grad_norm": 3.9403184134836104, "learning_rate": 1.8665715887948766e-06, "loss": 0.3759, "step": 52740 }, { "epoch": 2.23, "grad_norm": 4.090940384661897, "learning_rate": 1.865612299980386e-06, "loss": 0.3797, "step": 52745 }, { "epoch": 2.23, "grad_norm": 4.1649462186129025, "learning_rate": 1.864653201194588e-06, "loss": 0.3755, "step": 52750 }, { "epoch": 2.23, "grad_norm": 3.703855388222147, "learning_rate": 1.8636942924956298e-06, "loss": 0.3573, "step": 52755 }, { "epoch": 2.23, "grad_norm": 6.791083060422642, "learning_rate": 1.862735573941651e-06, "loss": 0.3492, "step": 52760 }, { "epoch": 2.23, "grad_norm": 4.237379850746144, "learning_rate": 1.8617770455907707e-06, "loss": 0.3456, "step": 52765 }, { "epoch": 2.23, "grad_norm": 5.025506823848462, "learning_rate": 1.8608187075011047e-06, "loss": 0.3641, "step": 52770 }, { "epoch": 2.23, "grad_norm": 3.662202097187108, "learning_rate": 1.859860559730754e-06, "loss": 0.3625, "step": 52775 }, { "epoch": 2.23, "grad_norm": 4.390596568070467, "learning_rate": 1.8589026023378093e-06, "loss": 0.3587, "step": 52780 }, { "epoch": 2.23, "grad_norm": 5.23930610963032, "learning_rate": 1.8579448353803459e-06, "loss": 0.3738, "step": 52785 }, { "epoch": 2.23, "grad_norm": 7.842964638300032, "learning_rate": 1.8569872589164317e-06, "loss": 0.3529, "step": 52790 }, { "epoch": 2.23, "grad_norm": 4.009353901865072, "learning_rate": 1.8560298730041237e-06, "loss": 0.3383, "step": 52795 }, { "epoch": 2.23, "grad_norm": 4.3152747534048865, "learning_rate": 1.8550726777014633e-06, "loss": 0.3524, "step": 52800 }, { "epoch": 2.23, "grad_norm": 5.217652354038226, "learning_rate": 1.8541156730664817e-06, "loss": 0.3632, "step": 52805 }, { "epoch": 2.24, "grad_norm": 4.181123017580436, "learning_rate": 1.8531588591571998e-06, "loss": 0.3787, "step": 52810 }, { "epoch": 2.24, "grad_norm": 3.834009309587241, "learning_rate": 1.852202236031629e-06, "loss": 0.392, "step": 52815 }, { "epoch": 2.24, "grad_norm": 4.431219782249487, "learning_rate": 1.8512458037477626e-06, "loss": 0.3963, "step": 52820 }, { "epoch": 2.24, "grad_norm": 3.8475311036454407, "learning_rate": 1.850289562363588e-06, "loss": 0.3562, "step": 52825 }, { "epoch": 2.24, "grad_norm": 3.9014367275676376, "learning_rate": 1.8493335119370814e-06, "loss": 0.3483, "step": 52830 }, { "epoch": 2.24, "grad_norm": 4.718757910136919, "learning_rate": 1.848377652526202e-06, "loss": 0.393, "step": 52835 }, { "epoch": 2.24, "grad_norm": 3.8967628671227765, "learning_rate": 1.847421984188902e-06, "loss": 0.3461, "step": 52840 }, { "epoch": 2.24, "grad_norm": 4.0272393495300856, "learning_rate": 1.8464665069831205e-06, "loss": 0.3456, "step": 52845 }, { "epoch": 2.24, "grad_norm": 4.147982336466948, "learning_rate": 1.8455112209667875e-06, "loss": 0.3658, "step": 52850 }, { "epoch": 2.24, "grad_norm": 4.015118316576194, "learning_rate": 1.8445561261978156e-06, "loss": 0.3353, "step": 52855 }, { "epoch": 2.24, "grad_norm": 4.796401477746836, "learning_rate": 1.843601222734111e-06, "loss": 0.383, "step": 52860 }, { "epoch": 2.24, "grad_norm": 4.188307939391202, "learning_rate": 1.8426465106335685e-06, "loss": 0.3621, "step": 52865 }, { "epoch": 2.24, "grad_norm": 4.5333895220065905, "learning_rate": 1.8416919899540663e-06, "loss": 0.3486, "step": 52870 }, { "epoch": 2.24, "grad_norm": 3.9254561126277734, "learning_rate": 1.8407376607534754e-06, "loss": 0.3623, "step": 52875 }, { "epoch": 2.24, "grad_norm": 3.8879028391273223, "learning_rate": 1.839783523089656e-06, "loss": 0.345, "step": 52880 }, { "epoch": 2.24, "grad_norm": 4.235122960089528, "learning_rate": 1.838829577020451e-06, "loss": 0.3504, "step": 52885 }, { "epoch": 2.24, "grad_norm": 4.021847320569711, "learning_rate": 1.837875822603698e-06, "loss": 0.3809, "step": 52890 }, { "epoch": 2.24, "grad_norm": 4.389994328576631, "learning_rate": 1.8369222598972187e-06, "loss": 0.364, "step": 52895 }, { "epoch": 2.24, "grad_norm": 4.414931627394944, "learning_rate": 1.8359688889588278e-06, "loss": 0.3325, "step": 52900 }, { "epoch": 2.24, "grad_norm": 4.1916310647214745, "learning_rate": 1.8350157098463212e-06, "loss": 0.3668, "step": 52905 }, { "epoch": 2.24, "grad_norm": 4.161141304947531, "learning_rate": 1.8340627226174896e-06, "loss": 0.392, "step": 52910 }, { "epoch": 2.24, "grad_norm": 3.8422776779919365, "learning_rate": 1.8331099273301112e-06, "loss": 0.3777, "step": 52915 }, { "epoch": 2.24, "grad_norm": 4.486709164494434, "learning_rate": 1.8321573240419476e-06, "loss": 0.3639, "step": 52920 }, { "epoch": 2.24, "grad_norm": 3.87158017382092, "learning_rate": 1.8312049128107545e-06, "loss": 0.3641, "step": 52925 }, { "epoch": 2.24, "grad_norm": 5.217925218782692, "learning_rate": 1.8302526936942749e-06, "loss": 0.3785, "step": 52930 }, { "epoch": 2.24, "grad_norm": 4.355501497210609, "learning_rate": 1.829300666750236e-06, "loss": 0.3365, "step": 52935 }, { "epoch": 2.24, "grad_norm": 4.063589917072802, "learning_rate": 1.8283488320363585e-06, "loss": 0.3552, "step": 52940 }, { "epoch": 2.24, "grad_norm": 4.122445580458788, "learning_rate": 1.8273971896103488e-06, "loss": 0.3463, "step": 52945 }, { "epoch": 2.24, "grad_norm": 4.936382037431601, "learning_rate": 1.826445739529904e-06, "loss": 0.378, "step": 52950 }, { "epoch": 2.24, "grad_norm": 5.101368925141458, "learning_rate": 1.8254944818527038e-06, "loss": 0.3489, "step": 52955 }, { "epoch": 2.24, "grad_norm": 4.304381004406447, "learning_rate": 1.8245434166364246e-06, "loss": 0.3741, "step": 52960 }, { "epoch": 2.24, "grad_norm": 5.911301933481199, "learning_rate": 1.8235925439387225e-06, "loss": 0.3409, "step": 52965 }, { "epoch": 2.24, "grad_norm": 5.0932602824018005, "learning_rate": 1.8226418638172498e-06, "loss": 0.3627, "step": 52970 }, { "epoch": 2.24, "grad_norm": 3.853821227355291, "learning_rate": 1.8216913763296401e-06, "loss": 0.3594, "step": 52975 }, { "epoch": 2.24, "grad_norm": 4.39686085366871, "learning_rate": 1.8207410815335203e-06, "loss": 0.3309, "step": 52980 }, { "epoch": 2.24, "grad_norm": 6.102895459780268, "learning_rate": 1.8197909794865054e-06, "loss": 0.3576, "step": 52985 }, { "epoch": 2.24, "grad_norm": 3.8446937586439263, "learning_rate": 1.818841070246194e-06, "loss": 0.3737, "step": 52990 }, { "epoch": 2.24, "grad_norm": 4.616285824894671, "learning_rate": 1.8178913538701782e-06, "loss": 0.3572, "step": 52995 }, { "epoch": 2.24, "grad_norm": 4.119704979213325, "learning_rate": 1.816941830416038e-06, "loss": 0.378, "step": 53000 }, { "epoch": 2.24, "grad_norm": 4.173626965155049, "learning_rate": 1.815992499941337e-06, "loss": 0.3658, "step": 53005 }, { "epoch": 2.24, "grad_norm": 3.924961337143304, "learning_rate": 1.8150433625036324e-06, "loss": 0.3634, "step": 53010 }, { "epoch": 2.24, "grad_norm": 4.119049885767718, "learning_rate": 1.814094418160468e-06, "loss": 0.3996, "step": 53015 }, { "epoch": 2.24, "grad_norm": 5.358812973288078, "learning_rate": 1.8131456669693736e-06, "loss": 0.3529, "step": 53020 }, { "epoch": 2.24, "grad_norm": 4.580032077984705, "learning_rate": 1.8121971089878698e-06, "loss": 0.3714, "step": 53025 }, { "epoch": 2.24, "grad_norm": 3.948483304481978, "learning_rate": 1.8112487442734656e-06, "loss": 0.3765, "step": 53030 }, { "epoch": 2.24, "grad_norm": 3.8361594368064384, "learning_rate": 1.8103005728836587e-06, "loss": 0.3595, "step": 53035 }, { "epoch": 2.24, "grad_norm": 4.116040656169031, "learning_rate": 1.8093525948759311e-06, "loss": 0.3774, "step": 53040 }, { "epoch": 2.24, "grad_norm": 4.421928918688815, "learning_rate": 1.8084048103077573e-06, "loss": 0.3633, "step": 53045 }, { "epoch": 2.25, "grad_norm": 4.128638318632504, "learning_rate": 1.8074572192366002e-06, "loss": 0.3545, "step": 53050 }, { "epoch": 2.25, "grad_norm": 3.958646606347638, "learning_rate": 1.8065098217199061e-06, "loss": 0.3324, "step": 53055 }, { "epoch": 2.25, "grad_norm": 3.815161411482528, "learning_rate": 1.8055626178151154e-06, "loss": 0.3731, "step": 53060 }, { "epoch": 2.25, "grad_norm": 3.8586910485184274, "learning_rate": 1.8046156075796545e-06, "loss": 0.3431, "step": 53065 }, { "epoch": 2.25, "grad_norm": 4.020434190253334, "learning_rate": 1.803668791070936e-06, "loss": 0.3692, "step": 53070 }, { "epoch": 2.25, "grad_norm": 5.139718093080879, "learning_rate": 1.802722168346363e-06, "loss": 0.367, "step": 53075 }, { "epoch": 2.25, "grad_norm": 3.5604930248365037, "learning_rate": 1.801775739463328e-06, "loss": 0.3629, "step": 53080 }, { "epoch": 2.25, "grad_norm": 4.296991113426738, "learning_rate": 1.8008295044792102e-06, "loss": 0.3661, "step": 53085 }, { "epoch": 2.25, "grad_norm": 4.323002687686565, "learning_rate": 1.7998834634513745e-06, "loss": 0.3569, "step": 53090 }, { "epoch": 2.25, "grad_norm": 3.6347778054673068, "learning_rate": 1.798937616437178e-06, "loss": 0.3454, "step": 53095 }, { "epoch": 2.25, "grad_norm": 4.013264099885689, "learning_rate": 1.7979919634939663e-06, "loss": 0.3725, "step": 53100 }, { "epoch": 2.25, "grad_norm": 4.597132063916742, "learning_rate": 1.797046504679069e-06, "loss": 0.337, "step": 53105 }, { "epoch": 2.25, "grad_norm": 4.0651751369632505, "learning_rate": 1.7961012400498073e-06, "loss": 0.3589, "step": 53110 }, { "epoch": 2.25, "grad_norm": 3.8606636170674413, "learning_rate": 1.7951561696634912e-06, "loss": 0.3635, "step": 53115 }, { "epoch": 2.25, "grad_norm": 3.9580716053831773, "learning_rate": 1.7942112935774153e-06, "loss": 0.3663, "step": 53120 }, { "epoch": 2.25, "grad_norm": 4.088798366856965, "learning_rate": 1.7932666118488674e-06, "loss": 0.3581, "step": 53125 }, { "epoch": 2.25, "grad_norm": 4.343914207042321, "learning_rate": 1.7923221245351168e-06, "loss": 0.3606, "step": 53130 }, { "epoch": 2.25, "grad_norm": 4.813265053430516, "learning_rate": 1.7913778316934293e-06, "loss": 0.3442, "step": 53135 }, { "epoch": 2.25, "grad_norm": 3.884118493906515, "learning_rate": 1.7904337333810512e-06, "loss": 0.3262, "step": 53140 }, { "epoch": 2.25, "grad_norm": 4.248908872095434, "learning_rate": 1.789489829655221e-06, "loss": 0.3877, "step": 53145 }, { "epoch": 2.25, "grad_norm": 3.39241482898308, "learning_rate": 1.7885461205731664e-06, "loss": 0.3351, "step": 53150 }, { "epoch": 2.25, "grad_norm": 4.386977583539001, "learning_rate": 1.787602606192102e-06, "loss": 0.3398, "step": 53155 }, { "epoch": 2.25, "grad_norm": 4.181755995726041, "learning_rate": 1.7866592865692272e-06, "loss": 0.3642, "step": 53160 }, { "epoch": 2.25, "grad_norm": 4.402468719281442, "learning_rate": 1.7857161617617352e-06, "loss": 0.3671, "step": 53165 }, { "epoch": 2.25, "grad_norm": 4.515984720811214, "learning_rate": 1.7847732318268063e-06, "loss": 0.3666, "step": 53170 }, { "epoch": 2.25, "grad_norm": 4.184225551302388, "learning_rate": 1.783830496821603e-06, "loss": 0.3667, "step": 53175 }, { "epoch": 2.25, "grad_norm": 4.137313444840027, "learning_rate": 1.7828879568032835e-06, "loss": 0.3715, "step": 53180 }, { "epoch": 2.25, "grad_norm": 4.241981363542599, "learning_rate": 1.7819456118289928e-06, "loss": 0.3709, "step": 53185 }, { "epoch": 2.25, "grad_norm": 4.437289420148114, "learning_rate": 1.7810034619558592e-06, "loss": 0.3566, "step": 53190 }, { "epoch": 2.25, "grad_norm": 4.647701738805234, "learning_rate": 1.7800615072410032e-06, "loss": 0.3565, "step": 53195 }, { "epoch": 2.25, "grad_norm": 4.720576199353198, "learning_rate": 1.7791197477415333e-06, "loss": 0.3913, "step": 53200 }, { "epoch": 2.25, "grad_norm": 4.453086032747109, "learning_rate": 1.7781781835145479e-06, "loss": 0.3895, "step": 53205 }, { "epoch": 2.25, "grad_norm": 4.116628271761173, "learning_rate": 1.777236814617127e-06, "loss": 0.3938, "step": 53210 }, { "epoch": 2.25, "grad_norm": 4.885827225269734, "learning_rate": 1.7762956411063454e-06, "loss": 0.3683, "step": 53215 }, { "epoch": 2.25, "grad_norm": 4.0550655185288145, "learning_rate": 1.7753546630392644e-06, "loss": 0.3717, "step": 53220 }, { "epoch": 2.25, "grad_norm": 4.56891051689047, "learning_rate": 1.7744138804729305e-06, "loss": 0.3645, "step": 53225 }, { "epoch": 2.25, "grad_norm": 3.9699939071558257, "learning_rate": 1.7734732934643816e-06, "loss": 0.3348, "step": 53230 }, { "epoch": 2.25, "grad_norm": 4.097134850863129, "learning_rate": 1.7725329020706443e-06, "loss": 0.3631, "step": 53235 }, { "epoch": 2.25, "grad_norm": 4.038300638656339, "learning_rate": 1.7715927063487293e-06, "loss": 0.3229, "step": 53240 }, { "epoch": 2.25, "grad_norm": 4.633620662524091, "learning_rate": 1.7706527063556384e-06, "loss": 0.3886, "step": 53245 }, { "epoch": 2.25, "grad_norm": 4.097900884047093, "learning_rate": 1.7697129021483616e-06, "loss": 0.3472, "step": 53250 }, { "epoch": 2.25, "grad_norm": 4.195587040045473, "learning_rate": 1.7687732937838787e-06, "loss": 0.3725, "step": 53255 }, { "epoch": 2.25, "grad_norm": 4.299414692616899, "learning_rate": 1.7678338813191508e-06, "loss": 0.3914, "step": 53260 }, { "epoch": 2.25, "grad_norm": 3.7194205865236696, "learning_rate": 1.766894664811134e-06, "loss": 0.3432, "step": 53265 }, { "epoch": 2.25, "grad_norm": 4.278337404970941, "learning_rate": 1.7659556443167725e-06, "loss": 0.3571, "step": 53270 }, { "epoch": 2.25, "grad_norm": 3.933569454614378, "learning_rate": 1.7650168198929919e-06, "loss": 0.3578, "step": 53275 }, { "epoch": 2.25, "grad_norm": 4.194446581644329, "learning_rate": 1.7640781915967126e-06, "loss": 0.3421, "step": 53280 }, { "epoch": 2.26, "grad_norm": 4.33961023645128, "learning_rate": 1.7631397594848425e-06, "loss": 0.3732, "step": 53285 }, { "epoch": 2.26, "grad_norm": 6.134170479546423, "learning_rate": 1.7622015236142742e-06, "loss": 0.3758, "step": 53290 }, { "epoch": 2.26, "grad_norm": 4.3237712335239005, "learning_rate": 1.761263484041889e-06, "loss": 0.3495, "step": 53295 }, { "epoch": 2.26, "grad_norm": 3.7699836841176357, "learning_rate": 1.7603256408245584e-06, "loss": 0.3116, "step": 53300 }, { "epoch": 2.26, "grad_norm": 4.0464377712993755, "learning_rate": 1.7593879940191433e-06, "loss": 0.3389, "step": 53305 }, { "epoch": 2.26, "grad_norm": 4.3543430064408915, "learning_rate": 1.7584505436824867e-06, "loss": 0.3798, "step": 53310 }, { "epoch": 2.26, "grad_norm": 4.242050900284283, "learning_rate": 1.7575132898714253e-06, "loss": 0.3731, "step": 53315 }, { "epoch": 2.26, "grad_norm": 5.507600767474456, "learning_rate": 1.7565762326427843e-06, "loss": 0.3511, "step": 53320 }, { "epoch": 2.26, "grad_norm": 4.376689386220052, "learning_rate": 1.7556393720533703e-06, "loss": 0.3666, "step": 53325 }, { "epoch": 2.26, "grad_norm": 4.063002731053875, "learning_rate": 1.7547027081599848e-06, "loss": 0.3545, "step": 53330 }, { "epoch": 2.26, "grad_norm": 3.9913007451936484, "learning_rate": 1.7537662410194145e-06, "loss": 0.3344, "step": 53335 }, { "epoch": 2.26, "grad_norm": 3.9030341251991154, "learning_rate": 1.7528299706884367e-06, "loss": 0.3767, "step": 53340 }, { "epoch": 2.26, "grad_norm": 4.0020783429591, "learning_rate": 1.7518938972238114e-06, "loss": 0.3446, "step": 53345 }, { "epoch": 2.26, "grad_norm": 4.2720730395942725, "learning_rate": 1.7509580206822912e-06, "loss": 0.3699, "step": 53350 }, { "epoch": 2.26, "grad_norm": 4.695300767476802, "learning_rate": 1.750022341120618e-06, "loss": 0.3562, "step": 53355 }, { "epoch": 2.26, "grad_norm": 3.920804392162004, "learning_rate": 1.749086858595515e-06, "loss": 0.349, "step": 53360 }, { "epoch": 2.26, "grad_norm": 3.9897560495512487, "learning_rate": 1.7481515731637006e-06, "loss": 0.3567, "step": 53365 }, { "epoch": 2.26, "grad_norm": 4.600349154542182, "learning_rate": 1.7472164848818785e-06, "loss": 0.3642, "step": 53370 }, { "epoch": 2.26, "grad_norm": 4.027130459451585, "learning_rate": 1.7462815938067385e-06, "loss": 0.3519, "step": 53375 }, { "epoch": 2.26, "grad_norm": 5.1685124606383415, "learning_rate": 1.7453468999949607e-06, "loss": 0.3565, "step": 53380 }, { "epoch": 2.26, "grad_norm": 6.5330676899466695, "learning_rate": 1.744412403503214e-06, "loss": 0.3754, "step": 53385 }, { "epoch": 2.26, "grad_norm": 5.22537501307672, "learning_rate": 1.7434781043881549e-06, "loss": 0.368, "step": 53390 }, { "epoch": 2.26, "grad_norm": 4.25519038338382, "learning_rate": 1.7425440027064244e-06, "loss": 0.3507, "step": 53395 }, { "epoch": 2.26, "grad_norm": 4.196358007450328, "learning_rate": 1.7416100985146561e-06, "loss": 0.3561, "step": 53400 }, { "epoch": 2.26, "grad_norm": 4.330358392893522, "learning_rate": 1.7406763918694714e-06, "loss": 0.3702, "step": 53405 }, { "epoch": 2.26, "grad_norm": 3.7685175680526726, "learning_rate": 1.7397428828274743e-06, "loss": 0.3679, "step": 53410 }, { "epoch": 2.26, "grad_norm": 4.337856249867497, "learning_rate": 1.738809571445263e-06, "loss": 0.3404, "step": 53415 }, { "epoch": 2.26, "grad_norm": 4.43433399364858, "learning_rate": 1.7378764577794227e-06, "loss": 0.3406, "step": 53420 }, { "epoch": 2.26, "grad_norm": 5.479431797562968, "learning_rate": 1.7369435418865226e-06, "loss": 0.3673, "step": 53425 }, { "epoch": 2.26, "grad_norm": 6.1348623849809885, "learning_rate": 1.7360108238231233e-06, "loss": 0.3483, "step": 53430 }, { "epoch": 2.26, "grad_norm": 4.241709787499552, "learning_rate": 1.7350783036457742e-06, "loss": 0.3483, "step": 53435 }, { "epoch": 2.26, "grad_norm": 3.9106016096357057, "learning_rate": 1.7341459814110112e-06, "loss": 0.358, "step": 53440 }, { "epoch": 2.26, "grad_norm": 4.09202162984694, "learning_rate": 1.7332138571753565e-06, "loss": 0.3698, "step": 53445 }, { "epoch": 2.26, "grad_norm": 4.026678978131323, "learning_rate": 1.7322819309953243e-06, "loss": 0.3405, "step": 53450 }, { "epoch": 2.26, "grad_norm": 3.720973626143717, "learning_rate": 1.7313502029274116e-06, "loss": 0.3489, "step": 53455 }, { "epoch": 2.26, "grad_norm": 4.028549137275551, "learning_rate": 1.7304186730281097e-06, "loss": 0.3794, "step": 53460 }, { "epoch": 2.26, "grad_norm": 4.154253964684513, "learning_rate": 1.729487341353891e-06, "loss": 0.3486, "step": 53465 }, { "epoch": 2.26, "grad_norm": 4.297762908427482, "learning_rate": 1.7285562079612212e-06, "loss": 0.3736, "step": 53470 }, { "epoch": 2.26, "grad_norm": 5.529379423898424, "learning_rate": 1.727625272906554e-06, "loss": 0.3435, "step": 53475 }, { "epoch": 2.26, "grad_norm": 6.0056473662193595, "learning_rate": 1.7266945362463257e-06, "loss": 0.3606, "step": 53480 }, { "epoch": 2.26, "grad_norm": 6.851484476646318, "learning_rate": 1.7257639980369657e-06, "loss": 0.3703, "step": 53485 }, { "epoch": 2.26, "grad_norm": 4.737436852150534, "learning_rate": 1.7248336583348917e-06, "loss": 0.3383, "step": 53490 }, { "epoch": 2.26, "grad_norm": 4.116955231639267, "learning_rate": 1.723903517196504e-06, "loss": 0.3493, "step": 53495 }, { "epoch": 2.26, "grad_norm": 3.947191587972658, "learning_rate": 1.7229735746781961e-06, "loss": 0.3716, "step": 53500 }, { "epoch": 2.26, "grad_norm": 3.9423658542145104, "learning_rate": 1.7220438308363474e-06, "loss": 0.3646, "step": 53505 }, { "epoch": 2.26, "grad_norm": 5.534652848145454, "learning_rate": 1.7211142857273273e-06, "loss": 0.3395, "step": 53510 }, { "epoch": 2.26, "grad_norm": 5.417820561507296, "learning_rate": 1.7201849394074882e-06, "loss": 0.3439, "step": 53515 }, { "epoch": 2.27, "grad_norm": 3.6940709494381223, "learning_rate": 1.7192557919331754e-06, "loss": 0.349, "step": 53520 }, { "epoch": 2.27, "grad_norm": 4.517813302195003, "learning_rate": 1.7183268433607215e-06, "loss": 0.371, "step": 53525 }, { "epoch": 2.27, "grad_norm": 3.756244759348609, "learning_rate": 1.717398093746443e-06, "loss": 0.346, "step": 53530 }, { "epoch": 2.27, "grad_norm": 3.666612902245065, "learning_rate": 1.7164695431466493e-06, "loss": 0.357, "step": 53535 }, { "epoch": 2.27, "grad_norm": 4.142042160920247, "learning_rate": 1.7155411916176363e-06, "loss": 0.3703, "step": 53540 }, { "epoch": 2.27, "grad_norm": 4.771070128081749, "learning_rate": 1.7146130392156851e-06, "loss": 0.3289, "step": 53545 }, { "epoch": 2.27, "grad_norm": 4.952037272240819, "learning_rate": 1.7136850859970673e-06, "loss": 0.3618, "step": 53550 }, { "epoch": 2.27, "grad_norm": 3.9875735536578905, "learning_rate": 1.712757332018043e-06, "loss": 0.3435, "step": 53555 }, { "epoch": 2.27, "grad_norm": 4.248130150239056, "learning_rate": 1.7118297773348603e-06, "loss": 0.3745, "step": 53560 }, { "epoch": 2.27, "grad_norm": 4.693447740966035, "learning_rate": 1.7109024220037508e-06, "loss": 0.3574, "step": 53565 }, { "epoch": 2.27, "grad_norm": 4.001178525810628, "learning_rate": 1.7099752660809393e-06, "loss": 0.3604, "step": 53570 }, { "epoch": 2.27, "grad_norm": 4.15567987386065, "learning_rate": 1.709048309622638e-06, "loss": 0.3608, "step": 53575 }, { "epoch": 2.27, "grad_norm": 5.458772579463961, "learning_rate": 1.708121552685042e-06, "loss": 0.3659, "step": 53580 }, { "epoch": 2.27, "grad_norm": 3.6441195864043547, "learning_rate": 1.70719499532434e-06, "loss": 0.364, "step": 53585 }, { "epoch": 2.27, "grad_norm": 4.60218399901867, "learning_rate": 1.7062686375967075e-06, "loss": 0.3646, "step": 53590 }, { "epoch": 2.27, "grad_norm": 5.474015576600486, "learning_rate": 1.705342479558304e-06, "loss": 0.3669, "step": 53595 }, { "epoch": 2.27, "grad_norm": 3.960015999850594, "learning_rate": 1.7044165212652814e-06, "loss": 0.3586, "step": 53600 }, { "epoch": 2.27, "grad_norm": 5.120008156315747, "learning_rate": 1.703490762773779e-06, "loss": 0.3485, "step": 53605 }, { "epoch": 2.27, "grad_norm": 4.671993652432476, "learning_rate": 1.7025652041399216e-06, "loss": 0.3586, "step": 53610 }, { "epoch": 2.27, "grad_norm": 4.8846380423301134, "learning_rate": 1.7016398454198212e-06, "loss": 0.367, "step": 53615 }, { "epoch": 2.27, "grad_norm": 5.633219516723773, "learning_rate": 1.7007146866695817e-06, "loss": 0.3408, "step": 53620 }, { "epoch": 2.27, "grad_norm": 4.177864678116925, "learning_rate": 1.6997897279452936e-06, "loss": 0.3755, "step": 53625 }, { "epoch": 2.27, "grad_norm": 3.7608927920769686, "learning_rate": 1.6988649693030318e-06, "loss": 0.3813, "step": 53630 }, { "epoch": 2.27, "grad_norm": 4.253026317541189, "learning_rate": 1.697940410798863e-06, "loss": 0.362, "step": 53635 }, { "epoch": 2.27, "grad_norm": 4.555835182519368, "learning_rate": 1.6970160524888407e-06, "loss": 0.3417, "step": 53640 }, { "epoch": 2.27, "grad_norm": 3.9950158001306777, "learning_rate": 1.6960918944290071e-06, "loss": 0.3737, "step": 53645 }, { "epoch": 2.27, "grad_norm": 5.436047335095203, "learning_rate": 1.6951679366753887e-06, "loss": 0.3515, "step": 53650 }, { "epoch": 2.27, "grad_norm": 4.410036756666341, "learning_rate": 1.6942441792840037e-06, "loss": 0.3437, "step": 53655 }, { "epoch": 2.27, "grad_norm": 3.7153311080252975, "learning_rate": 1.6933206223108577e-06, "loss": 0.4044, "step": 53660 }, { "epoch": 2.27, "grad_norm": 4.24268534774769, "learning_rate": 1.6923972658119414e-06, "loss": 0.3631, "step": 53665 }, { "epoch": 2.27, "grad_norm": 3.904438638145542, "learning_rate": 1.6914741098432357e-06, "loss": 0.3463, "step": 53670 }, { "epoch": 2.27, "grad_norm": 3.955096463219155, "learning_rate": 1.6905511544607105e-06, "loss": 0.3788, "step": 53675 }, { "epoch": 2.27, "grad_norm": 4.4841771134210315, "learning_rate": 1.6896283997203195e-06, "loss": 0.3582, "step": 53680 }, { "epoch": 2.27, "grad_norm": 4.439059606654289, "learning_rate": 1.688705845678007e-06, "loss": 0.3702, "step": 53685 }, { "epoch": 2.27, "grad_norm": 4.117388715887991, "learning_rate": 1.6877834923897057e-06, "loss": 0.3728, "step": 53690 }, { "epoch": 2.27, "grad_norm": 4.202590326056345, "learning_rate": 1.686861339911336e-06, "loss": 0.365, "step": 53695 }, { "epoch": 2.27, "grad_norm": 4.358550415251707, "learning_rate": 1.685939388298803e-06, "loss": 0.3402, "step": 53700 }, { "epoch": 2.27, "grad_norm": 4.000296034883441, "learning_rate": 1.6850176376080025e-06, "loss": 0.3448, "step": 53705 }, { "epoch": 2.27, "grad_norm": 4.139867775382686, "learning_rate": 1.6840960878948198e-06, "loss": 0.3394, "step": 53710 }, { "epoch": 2.27, "grad_norm": 6.22044320979231, "learning_rate": 1.6831747392151226e-06, "loss": 0.3514, "step": 53715 }, { "epoch": 2.27, "grad_norm": 5.450863336664558, "learning_rate": 1.6822535916247707e-06, "loss": 0.3701, "step": 53720 }, { "epoch": 2.27, "grad_norm": 4.044017424452533, "learning_rate": 1.6813326451796125e-06, "loss": 0.3698, "step": 53725 }, { "epoch": 2.27, "grad_norm": 4.088624488258786, "learning_rate": 1.680411899935479e-06, "loss": 0.358, "step": 53730 }, { "epoch": 2.27, "grad_norm": 4.889949864823278, "learning_rate": 1.679491355948194e-06, "loss": 0.3497, "step": 53735 }, { "epoch": 2.27, "grad_norm": 5.89628743852143, "learning_rate": 1.6785710132735665e-06, "loss": 0.3449, "step": 53740 }, { "epoch": 2.27, "grad_norm": 4.331833851205384, "learning_rate": 1.6776508719673972e-06, "loss": 0.3403, "step": 53745 }, { "epoch": 2.27, "grad_norm": 4.432662568383738, "learning_rate": 1.6767309320854675e-06, "loss": 0.3597, "step": 53750 }, { "epoch": 2.28, "grad_norm": 4.005956186508215, "learning_rate": 1.675811193683552e-06, "loss": 0.3582, "step": 53755 }, { "epoch": 2.28, "grad_norm": 4.165452087193429, "learning_rate": 1.674891656817414e-06, "loss": 0.3514, "step": 53760 }, { "epoch": 2.28, "grad_norm": 4.541740515105331, "learning_rate": 1.6739723215427989e-06, "loss": 0.3393, "step": 53765 }, { "epoch": 2.28, "grad_norm": 4.393627239289327, "learning_rate": 1.6730531879154466e-06, "loss": 0.3717, "step": 53770 }, { "epoch": 2.28, "grad_norm": 4.177844615958607, "learning_rate": 1.6721342559910776e-06, "loss": 0.3664, "step": 53775 }, { "epoch": 2.28, "grad_norm": 3.8877402778645456, "learning_rate": 1.671215525825408e-06, "loss": 0.3792, "step": 53780 }, { "epoch": 2.28, "grad_norm": 4.082938698169374, "learning_rate": 1.670296997474134e-06, "loss": 0.3672, "step": 53785 }, { "epoch": 2.28, "grad_norm": 4.761175571373643, "learning_rate": 1.6693786709929456e-06, "loss": 0.3511, "step": 53790 }, { "epoch": 2.28, "grad_norm": 4.957586432011262, "learning_rate": 1.6684605464375186e-06, "loss": 0.353, "step": 53795 }, { "epoch": 2.28, "grad_norm": 4.862241561782925, "learning_rate": 1.667542623863514e-06, "loss": 0.3819, "step": 53800 }, { "epoch": 2.28, "grad_norm": 3.7832819532305297, "learning_rate": 1.666624903326584e-06, "loss": 0.3572, "step": 53805 }, { "epoch": 2.28, "grad_norm": 3.809256140248226, "learning_rate": 1.6657073848823674e-06, "loss": 0.3318, "step": 53810 }, { "epoch": 2.28, "grad_norm": 4.678544404311857, "learning_rate": 1.664790068586492e-06, "loss": 0.3448, "step": 53815 }, { "epoch": 2.28, "grad_norm": 3.968567111848225, "learning_rate": 1.6638729544945692e-06, "loss": 0.3687, "step": 53820 }, { "epoch": 2.28, "grad_norm": 4.1825118829528165, "learning_rate": 1.662956042662202e-06, "loss": 0.3569, "step": 53825 }, { "epoch": 2.28, "grad_norm": 4.373804070411063, "learning_rate": 1.6620393331449824e-06, "loss": 0.356, "step": 53830 }, { "epoch": 2.28, "grad_norm": 4.730073924046188, "learning_rate": 1.661122825998484e-06, "loss": 0.3503, "step": 53835 }, { "epoch": 2.28, "grad_norm": 4.324773873405594, "learning_rate": 1.6602065212782737e-06, "loss": 0.3527, "step": 53840 }, { "epoch": 2.28, "grad_norm": 3.9049361054855707, "learning_rate": 1.6592904190399056e-06, "loss": 0.3586, "step": 53845 }, { "epoch": 2.28, "grad_norm": 4.529696152015119, "learning_rate": 1.6583745193389172e-06, "loss": 0.3777, "step": 53850 }, { "epoch": 2.28, "grad_norm": 6.295943552280052, "learning_rate": 1.657458822230839e-06, "loss": 0.3517, "step": 53855 }, { "epoch": 2.28, "grad_norm": 3.8195071766793327, "learning_rate": 1.6565433277711884e-06, "loss": 0.3741, "step": 53860 }, { "epoch": 2.28, "grad_norm": 5.976281948370772, "learning_rate": 1.655628036015466e-06, "loss": 0.3712, "step": 53865 }, { "epoch": 2.28, "grad_norm": 3.848172801210396, "learning_rate": 1.6547129470191642e-06, "loss": 0.3372, "step": 53870 }, { "epoch": 2.28, "grad_norm": 3.9888022135396084, "learning_rate": 1.6537980608377625e-06, "loss": 0.34, "step": 53875 }, { "epoch": 2.28, "grad_norm": 3.884714561540338, "learning_rate": 1.6528833775267294e-06, "loss": 0.3566, "step": 53880 }, { "epoch": 2.28, "grad_norm": 4.861421660821195, "learning_rate": 1.651968897141516e-06, "loss": 0.3564, "step": 53885 }, { "epoch": 2.28, "grad_norm": 4.029863730122377, "learning_rate": 1.651054619737567e-06, "loss": 0.3521, "step": 53890 }, { "epoch": 2.28, "grad_norm": 4.0401136785269784, "learning_rate": 1.650140545370313e-06, "loss": 0.3689, "step": 53895 }, { "epoch": 2.28, "grad_norm": 5.992637725002927, "learning_rate": 1.649226674095169e-06, "loss": 0.3786, "step": 53900 }, { "epoch": 2.28, "grad_norm": 4.603948815781125, "learning_rate": 1.6483130059675418e-06, "loss": 0.3807, "step": 53905 }, { "epoch": 2.28, "grad_norm": 4.314905509092832, "learning_rate": 1.6473995410428256e-06, "loss": 0.3813, "step": 53910 }, { "epoch": 2.28, "grad_norm": 3.742272897624811, "learning_rate": 1.6464862793763987e-06, "loss": 0.3423, "step": 53915 }, { "epoch": 2.28, "grad_norm": 4.140519683103931, "learning_rate": 1.6455732210236302e-06, "loss": 0.354, "step": 53920 }, { "epoch": 2.28, "grad_norm": 3.9568868787192897, "learning_rate": 1.6446603660398763e-06, "loss": 0.3618, "step": 53925 }, { "epoch": 2.28, "grad_norm": 4.632563943908224, "learning_rate": 1.6437477144804831e-06, "loss": 0.3693, "step": 53930 }, { "epoch": 2.28, "grad_norm": 4.511488474185682, "learning_rate": 1.6428352664007796e-06, "loss": 0.3894, "step": 53935 }, { "epoch": 2.28, "grad_norm": 3.9434512065821616, "learning_rate": 1.6419230218560834e-06, "loss": 0.3592, "step": 53940 }, { "epoch": 2.28, "grad_norm": 3.8746032384581377, "learning_rate": 1.6410109809017028e-06, "loss": 0.3417, "step": 53945 }, { "epoch": 2.28, "grad_norm": 4.348400461265312, "learning_rate": 1.6400991435929336e-06, "loss": 0.3358, "step": 53950 }, { "epoch": 2.28, "grad_norm": 6.545580126116169, "learning_rate": 1.639187509985055e-06, "loss": 0.3457, "step": 53955 }, { "epoch": 2.28, "grad_norm": 6.090312521635988, "learning_rate": 1.6382760801333381e-06, "loss": 0.3776, "step": 53960 }, { "epoch": 2.28, "grad_norm": 4.490438032829979, "learning_rate": 1.637364854093042e-06, "loss": 0.3778, "step": 53965 }, { "epoch": 2.28, "grad_norm": 6.061629522302544, "learning_rate": 1.636453831919408e-06, "loss": 0.3633, "step": 53970 }, { "epoch": 2.28, "grad_norm": 4.938051934754005, "learning_rate": 1.6355430136676702e-06, "loss": 0.3592, "step": 53975 }, { "epoch": 2.28, "grad_norm": 6.940753640065751, "learning_rate": 1.6346323993930513e-06, "loss": 0.3467, "step": 53980 }, { "epoch": 2.28, "grad_norm": 4.1890036538453215, "learning_rate": 1.6337219891507549e-06, "loss": 0.3504, "step": 53985 }, { "epoch": 2.28, "grad_norm": 3.9636722645967186, "learning_rate": 1.6328117829959795e-06, "loss": 0.3724, "step": 53990 }, { "epoch": 2.29, "grad_norm": 4.440576461666571, "learning_rate": 1.6319017809839066e-06, "loss": 0.3636, "step": 53995 }, { "epoch": 2.29, "grad_norm": 3.980594556174794, "learning_rate": 1.6309919831697092e-06, "loss": 0.353, "step": 54000 }, { "epoch": 2.29, "grad_norm": 4.38553438441278, "learning_rate": 1.6300823896085428e-06, "loss": 0.3745, "step": 54005 }, { "epoch": 2.29, "grad_norm": 3.89829446284253, "learning_rate": 1.6291730003555545e-06, "loss": 0.3358, "step": 54010 }, { "epoch": 2.29, "grad_norm": 4.086969881960062, "learning_rate": 1.6282638154658798e-06, "loss": 0.3262, "step": 54015 }, { "epoch": 2.29, "grad_norm": 4.195947151810937, "learning_rate": 1.6273548349946366e-06, "loss": 0.3579, "step": 54020 }, { "epoch": 2.29, "grad_norm": 5.698918289129321, "learning_rate": 1.626446058996935e-06, "loss": 0.3538, "step": 54025 }, { "epoch": 2.29, "grad_norm": 4.583908630926258, "learning_rate": 1.625537487527874e-06, "loss": 0.3641, "step": 54030 }, { "epoch": 2.29, "grad_norm": 4.0732035707624, "learning_rate": 1.624629120642533e-06, "loss": 0.3575, "step": 54035 }, { "epoch": 2.29, "grad_norm": 4.12736534965869, "learning_rate": 1.6237209583959862e-06, "loss": 0.3829, "step": 54040 }, { "epoch": 2.29, "grad_norm": 4.341394177900301, "learning_rate": 1.622813000843293e-06, "loss": 0.3453, "step": 54045 }, { "epoch": 2.29, "grad_norm": 4.4463287160370895, "learning_rate": 1.6219052480395008e-06, "loss": 0.3461, "step": 54050 }, { "epoch": 2.29, "grad_norm": 4.736034861548534, "learning_rate": 1.6209977000396415e-06, "loss": 0.3475, "step": 54055 }, { "epoch": 2.29, "grad_norm": 4.941732501968647, "learning_rate": 1.620090356898738e-06, "loss": 0.3634, "step": 54060 }, { "epoch": 2.29, "grad_norm": 4.113572760744246, "learning_rate": 1.6191832186718026e-06, "loss": 0.363, "step": 54065 }, { "epoch": 2.29, "grad_norm": 4.316465409997266, "learning_rate": 1.6182762854138284e-06, "loss": 0.3669, "step": 54070 }, { "epoch": 2.29, "grad_norm": 4.902919805053425, "learning_rate": 1.6173695571798016e-06, "loss": 0.3724, "step": 54075 }, { "epoch": 2.29, "grad_norm": 3.5987229966565293, "learning_rate": 1.6164630340246962e-06, "loss": 0.3544, "step": 54080 }, { "epoch": 2.29, "grad_norm": 4.638539649125324, "learning_rate": 1.6155567160034692e-06, "loss": 0.3779, "step": 54085 }, { "epoch": 2.29, "grad_norm": 5.080423649524689, "learning_rate": 1.614650603171069e-06, "loss": 0.3598, "step": 54090 }, { "epoch": 2.29, "grad_norm": 4.096004492737438, "learning_rate": 1.6137446955824327e-06, "loss": 0.3701, "step": 54095 }, { "epoch": 2.29, "grad_norm": 3.9395172309290376, "learning_rate": 1.612838993292481e-06, "loss": 0.3298, "step": 54100 }, { "epoch": 2.29, "grad_norm": 4.510374501224488, "learning_rate": 1.6119334963561218e-06, "loss": 0.375, "step": 54105 }, { "epoch": 2.29, "grad_norm": 3.7503732736481483, "learning_rate": 1.6110282048282554e-06, "loss": 0.3841, "step": 54110 }, { "epoch": 2.29, "grad_norm": 6.05969682723419, "learning_rate": 1.610123118763766e-06, "loss": 0.3658, "step": 54115 }, { "epoch": 2.29, "grad_norm": 3.92240969821514, "learning_rate": 1.6092182382175287e-06, "loss": 0.3448, "step": 54120 }, { "epoch": 2.29, "grad_norm": 3.969431447082445, "learning_rate": 1.6083135632444002e-06, "loss": 0.3707, "step": 54125 }, { "epoch": 2.29, "grad_norm": 4.865708207745226, "learning_rate": 1.6074090938992294e-06, "loss": 0.3731, "step": 54130 }, { "epoch": 2.29, "grad_norm": 4.678214546814403, "learning_rate": 1.606504830236854e-06, "loss": 0.3281, "step": 54135 }, { "epoch": 2.29, "grad_norm": 4.147521254906464, "learning_rate": 1.605600772312093e-06, "loss": 0.3434, "step": 54140 }, { "epoch": 2.29, "grad_norm": 4.892202008027043, "learning_rate": 1.6046969201797585e-06, "loss": 0.3651, "step": 54145 }, { "epoch": 2.29, "grad_norm": 5.662867757518447, "learning_rate": 1.6037932738946504e-06, "loss": 0.3849, "step": 54150 }, { "epoch": 2.29, "grad_norm": 4.533982260557702, "learning_rate": 1.6028898335115512e-06, "loss": 0.3697, "step": 54155 }, { "epoch": 2.29, "grad_norm": 3.9329376652717856, "learning_rate": 1.601986599085234e-06, "loss": 0.3381, "step": 54160 }, { "epoch": 2.29, "grad_norm": 4.928528047824139, "learning_rate": 1.6010835706704619e-06, "loss": 0.3533, "step": 54165 }, { "epoch": 2.29, "grad_norm": 4.274240210819913, "learning_rate": 1.60018074832198e-06, "loss": 0.321, "step": 54170 }, { "epoch": 2.29, "grad_norm": 3.707991744920607, "learning_rate": 1.5992781320945244e-06, "loss": 0.3396, "step": 54175 }, { "epoch": 2.29, "grad_norm": 3.676013712742363, "learning_rate": 1.5983757220428182e-06, "loss": 0.3376, "step": 54180 }, { "epoch": 2.29, "grad_norm": 4.939713354876155, "learning_rate": 1.5974735182215745e-06, "loss": 0.3757, "step": 54185 }, { "epoch": 2.29, "grad_norm": 4.242562341679641, "learning_rate": 1.5965715206854865e-06, "loss": 0.3601, "step": 54190 }, { "epoch": 2.29, "grad_norm": 4.066942725453382, "learning_rate": 1.5956697294892426e-06, "loss": 0.3715, "step": 54195 }, { "epoch": 2.29, "grad_norm": 4.109300405520384, "learning_rate": 1.5947681446875168e-06, "loss": 0.3469, "step": 54200 }, { "epoch": 2.29, "grad_norm": 4.252417062689107, "learning_rate": 1.593866766334966e-06, "loss": 0.3737, "step": 54205 }, { "epoch": 2.29, "grad_norm": 4.2258718928596775, "learning_rate": 1.5929655944862405e-06, "loss": 0.3621, "step": 54210 }, { "epoch": 2.29, "grad_norm": 4.036467177114945, "learning_rate": 1.5920646291959768e-06, "loss": 0.3386, "step": 54215 }, { "epoch": 2.29, "grad_norm": 3.9361625880776874, "learning_rate": 1.5911638705187943e-06, "loss": 0.352, "step": 54220 }, { "epoch": 2.29, "grad_norm": 3.953098875507561, "learning_rate": 1.5902633185093058e-06, "loss": 0.3347, "step": 54225 }, { "epoch": 2.3, "grad_norm": 4.964339110373232, "learning_rate": 1.5893629732221084e-06, "loss": 0.3668, "step": 54230 }, { "epoch": 2.3, "grad_norm": 4.647972259228924, "learning_rate": 1.5884628347117887e-06, "loss": 0.367, "step": 54235 }, { "epoch": 2.3, "grad_norm": 3.7886093819231967, "learning_rate": 1.587562903032917e-06, "loss": 0.3539, "step": 54240 }, { "epoch": 2.3, "grad_norm": 4.171963771955341, "learning_rate": 1.5866631782400549e-06, "loss": 0.3662, "step": 54245 }, { "epoch": 2.3, "grad_norm": 4.8748684689839, "learning_rate": 1.5857636603877518e-06, "loss": 0.3691, "step": 54250 }, { "epoch": 2.3, "grad_norm": 3.9899482962934925, "learning_rate": 1.5848643495305389e-06, "loss": 0.3676, "step": 54255 }, { "epoch": 2.3, "grad_norm": 4.861892248603292, "learning_rate": 1.5839652457229433e-06, "loss": 0.3909, "step": 54260 }, { "epoch": 2.3, "grad_norm": 3.832836269589828, "learning_rate": 1.5830663490194703e-06, "loss": 0.3186, "step": 54265 }, { "epoch": 2.3, "grad_norm": 3.576846228157823, "learning_rate": 1.5821676594746211e-06, "loss": 0.3345, "step": 54270 }, { "epoch": 2.3, "grad_norm": 4.243372339194075, "learning_rate": 1.5812691771428778e-06, "loss": 0.3501, "step": 54275 }, { "epoch": 2.3, "grad_norm": 4.799899103952548, "learning_rate": 1.5803709020787144e-06, "loss": 0.3661, "step": 54280 }, { "epoch": 2.3, "grad_norm": 3.9924482757247732, "learning_rate": 1.5794728343365916e-06, "loss": 0.3308, "step": 54285 }, { "epoch": 2.3, "grad_norm": 3.9579642517354436, "learning_rate": 1.5785749739709538e-06, "loss": 0.3516, "step": 54290 }, { "epoch": 2.3, "grad_norm": 3.6627928235415723, "learning_rate": 1.5776773210362373e-06, "loss": 0.3574, "step": 54295 }, { "epoch": 2.3, "grad_norm": 4.621492241620413, "learning_rate": 1.576779875586864e-06, "loss": 0.3765, "step": 54300 }, { "epoch": 2.3, "grad_norm": 4.273438159161337, "learning_rate": 1.5758826376772452e-06, "loss": 0.3492, "step": 54305 }, { "epoch": 2.3, "grad_norm": 5.888323589021636, "learning_rate": 1.5749856073617743e-06, "loss": 0.3863, "step": 54310 }, { "epoch": 2.3, "grad_norm": 6.8475785737430135, "learning_rate": 1.574088784694837e-06, "loss": 0.3925, "step": 54315 }, { "epoch": 2.3, "grad_norm": 4.2768154266216465, "learning_rate": 1.5731921697308072e-06, "loss": 0.3478, "step": 54320 }, { "epoch": 2.3, "grad_norm": 4.644933878248935, "learning_rate": 1.5722957625240403e-06, "loss": 0.3456, "step": 54325 }, { "epoch": 2.3, "grad_norm": 3.9362747564959126, "learning_rate": 1.5713995631288853e-06, "loss": 0.3455, "step": 54330 }, { "epoch": 2.3, "grad_norm": 4.198280074306882, "learning_rate": 1.5705035715996764e-06, "loss": 0.3642, "step": 54335 }, { "epoch": 2.3, "grad_norm": 3.5689660725406824, "learning_rate": 1.5696077879907329e-06, "loss": 0.3526, "step": 54340 }, { "epoch": 2.3, "grad_norm": 4.383629727664212, "learning_rate": 1.5687122123563647e-06, "loss": 0.3638, "step": 54345 }, { "epoch": 2.3, "grad_norm": 4.111614594638951, "learning_rate": 1.5678168447508673e-06, "loss": 0.3539, "step": 54350 }, { "epoch": 2.3, "grad_norm": 3.748408307940443, "learning_rate": 1.566921685228527e-06, "loss": 0.3457, "step": 54355 }, { "epoch": 2.3, "grad_norm": 3.9945781072614888, "learning_rate": 1.5660267338436107e-06, "loss": 0.3314, "step": 54360 }, { "epoch": 2.3, "grad_norm": 5.040786609556089, "learning_rate": 1.5651319906503782e-06, "loss": 0.3486, "step": 54365 }, { "epoch": 2.3, "grad_norm": 4.258217828415274, "learning_rate": 1.564237455703077e-06, "loss": 0.3925, "step": 54370 }, { "epoch": 2.3, "grad_norm": 5.030547960070974, "learning_rate": 1.563343129055937e-06, "loss": 0.3454, "step": 54375 }, { "epoch": 2.3, "grad_norm": 5.711019105295505, "learning_rate": 1.56244901076318e-06, "loss": 0.3578, "step": 54380 }, { "epoch": 2.3, "grad_norm": 4.293216098227058, "learning_rate": 1.5615551008790158e-06, "loss": 0.3568, "step": 54385 }, { "epoch": 2.3, "grad_norm": 4.345646803166752, "learning_rate": 1.5606613994576358e-06, "loss": 0.3434, "step": 54390 }, { "epoch": 2.3, "grad_norm": 3.870418942169404, "learning_rate": 1.559767906553224e-06, "loss": 0.3383, "step": 54395 }, { "epoch": 2.3, "grad_norm": 3.917107018287111, "learning_rate": 1.5588746222199508e-06, "loss": 0.3568, "step": 54400 }, { "epoch": 2.3, "grad_norm": 4.608625122301406, "learning_rate": 1.5579815465119746e-06, "loss": 0.3297, "step": 54405 }, { "epoch": 2.3, "grad_norm": 4.237061293918931, "learning_rate": 1.5570886794834366e-06, "loss": 0.3553, "step": 54410 }, { "epoch": 2.3, "grad_norm": 4.890301608714642, "learning_rate": 1.5561960211884709e-06, "loss": 0.3636, "step": 54415 }, { "epoch": 2.3, "grad_norm": 4.672545805304668, "learning_rate": 1.5553035716811975e-06, "loss": 0.3545, "step": 54420 }, { "epoch": 2.3, "grad_norm": 4.285513502410438, "learning_rate": 1.5544113310157222e-06, "loss": 0.3621, "step": 54425 }, { "epoch": 2.3, "grad_norm": 4.195289254714602, "learning_rate": 1.5535192992461372e-06, "loss": 0.3431, "step": 54430 }, { "epoch": 2.3, "grad_norm": 3.975971656082313, "learning_rate": 1.5526274764265248e-06, "loss": 0.3564, "step": 54435 }, { "epoch": 2.3, "grad_norm": 4.6232849208679685, "learning_rate": 1.5517358626109557e-06, "loss": 0.3481, "step": 54440 }, { "epoch": 2.3, "grad_norm": 3.8645433259674706, "learning_rate": 1.550844457853482e-06, "loss": 0.3708, "step": 54445 }, { "epoch": 2.3, "grad_norm": 5.030236020665636, "learning_rate": 1.5499532622081497e-06, "loss": 0.3477, "step": 54450 }, { "epoch": 2.3, "grad_norm": 3.8824498299346897, "learning_rate": 1.5490622757289898e-06, "loss": 0.3585, "step": 54455 }, { "epoch": 2.3, "grad_norm": 4.55262600883059, "learning_rate": 1.5481714984700175e-06, "loss": 0.335, "step": 54460 }, { "epoch": 2.31, "grad_norm": 5.217657656721012, "learning_rate": 1.5472809304852398e-06, "loss": 0.3586, "step": 54465 }, { "epoch": 2.31, "grad_norm": 4.9833721421229535, "learning_rate": 1.5463905718286503e-06, "loss": 0.3441, "step": 54470 }, { "epoch": 2.31, "grad_norm": 4.768387392257179, "learning_rate": 1.5455004225542258e-06, "loss": 0.3505, "step": 54475 }, { "epoch": 2.31, "grad_norm": 5.126358559302379, "learning_rate": 1.5446104827159352e-06, "loss": 0.3399, "step": 54480 }, { "epoch": 2.31, "grad_norm": 4.1187375614476265, "learning_rate": 1.5437207523677332e-06, "loss": 0.3527, "step": 54485 }, { "epoch": 2.31, "grad_norm": 3.905465351404854, "learning_rate": 1.5428312315635623e-06, "loss": 0.3341, "step": 54490 }, { "epoch": 2.31, "grad_norm": 4.426213750926166, "learning_rate": 1.5419419203573493e-06, "loss": 0.3896, "step": 54495 }, { "epoch": 2.31, "grad_norm": 4.271299589331983, "learning_rate": 1.5410528188030117e-06, "loss": 0.3403, "step": 54500 }, { "epoch": 2.31, "grad_norm": 4.613707296371552, "learning_rate": 1.5401639269544538e-06, "loss": 0.3567, "step": 54505 }, { "epoch": 2.31, "grad_norm": 4.09305132112167, "learning_rate": 1.5392752448655652e-06, "loss": 0.3676, "step": 54510 }, { "epoch": 2.31, "grad_norm": 5.488485063581074, "learning_rate": 1.5383867725902236e-06, "loss": 0.3362, "step": 54515 }, { "epoch": 2.31, "grad_norm": 3.8484101083788125, "learning_rate": 1.5374985101822976e-06, "loss": 0.3652, "step": 54520 }, { "epoch": 2.31, "grad_norm": 3.8554772214689526, "learning_rate": 1.536610457695636e-06, "loss": 0.3479, "step": 54525 }, { "epoch": 2.31, "grad_norm": 4.712726932185907, "learning_rate": 1.535722615184081e-06, "loss": 0.3596, "step": 54530 }, { "epoch": 2.31, "grad_norm": 3.846021487830665, "learning_rate": 1.534834982701459e-06, "loss": 0.3692, "step": 54535 }, { "epoch": 2.31, "grad_norm": 4.242382626604983, "learning_rate": 1.533947560301587e-06, "loss": 0.3297, "step": 54540 }, { "epoch": 2.31, "grad_norm": 3.68054097154349, "learning_rate": 1.5330603480382627e-06, "loss": 0.3594, "step": 54545 }, { "epoch": 2.31, "grad_norm": 4.1638448492734765, "learning_rate": 1.5321733459652776e-06, "loss": 0.3747, "step": 54550 }, { "epoch": 2.31, "grad_norm": 3.8901708019677828, "learning_rate": 1.5312865541364091e-06, "loss": 0.3532, "step": 54555 }, { "epoch": 2.31, "grad_norm": 5.184278415049343, "learning_rate": 1.530399972605418e-06, "loss": 0.3414, "step": 54560 }, { "epoch": 2.31, "grad_norm": 5.194221233850725, "learning_rate": 1.529513601426057e-06, "loss": 0.3571, "step": 54565 }, { "epoch": 2.31, "grad_norm": 4.057713950237074, "learning_rate": 1.5286274406520647e-06, "loss": 0.3403, "step": 54570 }, { "epoch": 2.31, "grad_norm": 4.290901052679981, "learning_rate": 1.527741490337164e-06, "loss": 0.3651, "step": 54575 }, { "epoch": 2.31, "grad_norm": 3.8386264888140786, "learning_rate": 1.52685575053507e-06, "loss": 0.3461, "step": 54580 }, { "epoch": 2.31, "grad_norm": 4.030908843819734, "learning_rate": 1.5259702212994803e-06, "loss": 0.3616, "step": 54585 }, { "epoch": 2.31, "grad_norm": 3.58900924208917, "learning_rate": 1.5250849026840847e-06, "loss": 0.3449, "step": 54590 }, { "epoch": 2.31, "grad_norm": 4.165105511076327, "learning_rate": 1.524199794742554e-06, "loss": 0.3585, "step": 54595 }, { "epoch": 2.31, "grad_norm": 3.949481408332809, "learning_rate": 1.5233148975285511e-06, "loss": 0.3779, "step": 54600 }, { "epoch": 2.31, "grad_norm": 3.8051735822646995, "learning_rate": 1.5224302110957256e-06, "loss": 0.3275, "step": 54605 }, { "epoch": 2.31, "grad_norm": 5.234490750159869, "learning_rate": 1.5215457354977142e-06, "loss": 0.3627, "step": 54610 }, { "epoch": 2.31, "grad_norm": 4.696538255496593, "learning_rate": 1.5206614707881367e-06, "loss": 0.3456, "step": 54615 }, { "epoch": 2.31, "grad_norm": 4.142745589436863, "learning_rate": 1.519777417020606e-06, "loss": 0.3626, "step": 54620 }, { "epoch": 2.31, "grad_norm": 4.455452016624465, "learning_rate": 1.518893574248721e-06, "loss": 0.3481, "step": 54625 }, { "epoch": 2.31, "grad_norm": 4.756356387535101, "learning_rate": 1.5180099425260625e-06, "loss": 0.3673, "step": 54630 }, { "epoch": 2.31, "grad_norm": 4.132544145561781, "learning_rate": 1.5171265219062048e-06, "loss": 0.3539, "step": 54635 }, { "epoch": 2.31, "grad_norm": 4.224546957526565, "learning_rate": 1.516243312442709e-06, "loss": 0.3725, "step": 54640 }, { "epoch": 2.31, "grad_norm": 4.417291184044669, "learning_rate": 1.515360314189117e-06, "loss": 0.3601, "step": 54645 }, { "epoch": 2.31, "grad_norm": 6.015782224684628, "learning_rate": 1.5144775271989653e-06, "loss": 0.3319, "step": 54650 }, { "epoch": 2.31, "grad_norm": 4.818772574066071, "learning_rate": 1.513594951525774e-06, "loss": 0.3899, "step": 54655 }, { "epoch": 2.31, "grad_norm": 4.155469987480383, "learning_rate": 1.5127125872230536e-06, "loss": 0.3472, "step": 54660 }, { "epoch": 2.31, "grad_norm": 5.129356873051151, "learning_rate": 1.5118304343442947e-06, "loss": 0.3573, "step": 54665 }, { "epoch": 2.31, "grad_norm": 5.24383810235975, "learning_rate": 1.5109484929429818e-06, "loss": 0.3569, "step": 54670 }, { "epoch": 2.31, "grad_norm": 4.729161233369576, "learning_rate": 1.5100667630725856e-06, "loss": 0.3514, "step": 54675 }, { "epoch": 2.31, "grad_norm": 4.661766609408095, "learning_rate": 1.5091852447865607e-06, "loss": 0.3387, "step": 54680 }, { "epoch": 2.31, "grad_norm": 7.3151684908094445, "learning_rate": 1.508303938138352e-06, "loss": 0.3551, "step": 54685 }, { "epoch": 2.31, "grad_norm": 4.029977165359439, "learning_rate": 1.5074228431813914e-06, "loss": 0.3425, "step": 54690 }, { "epoch": 2.31, "grad_norm": 4.311875253260442, "learning_rate": 1.5065419599690944e-06, "loss": 0.3704, "step": 54695 }, { "epoch": 2.32, "grad_norm": 4.870749532162497, "learning_rate": 1.5056612885548688e-06, "loss": 0.387, "step": 54700 }, { "epoch": 2.32, "grad_norm": 4.842078782439271, "learning_rate": 1.5047808289921056e-06, "loss": 0.3701, "step": 54705 }, { "epoch": 2.32, "grad_norm": 6.040159327264844, "learning_rate": 1.5039005813341873e-06, "loss": 0.3776, "step": 54710 }, { "epoch": 2.32, "grad_norm": 4.793420440112233, "learning_rate": 1.5030205456344772e-06, "loss": 0.3645, "step": 54715 }, { "epoch": 2.32, "grad_norm": 5.0452165662670065, "learning_rate": 1.5021407219463302e-06, "loss": 0.374, "step": 54720 }, { "epoch": 2.32, "grad_norm": 5.5105079348272366, "learning_rate": 1.50126111032309e-06, "loss": 0.3564, "step": 54725 }, { "epoch": 2.32, "grad_norm": 3.900807702448875, "learning_rate": 1.5003817108180807e-06, "loss": 0.3663, "step": 54730 }, { "epoch": 2.32, "grad_norm": 3.933343415261964, "learning_rate": 1.4995025234846205e-06, "loss": 0.3652, "step": 54735 }, { "epoch": 2.32, "grad_norm": 4.457793374976396, "learning_rate": 1.4986235483760126e-06, "loss": 0.3318, "step": 54740 }, { "epoch": 2.32, "grad_norm": 4.7322497060818405, "learning_rate": 1.4977447855455451e-06, "loss": 0.3618, "step": 54745 }, { "epoch": 2.32, "grad_norm": 4.280188274763539, "learning_rate": 1.4968662350464935e-06, "loss": 0.3522, "step": 54750 }, { "epoch": 2.32, "grad_norm": 3.7128033553288846, "learning_rate": 1.4959878969321228e-06, "loss": 0.3415, "step": 54755 }, { "epoch": 2.32, "grad_norm": 4.490439323355305, "learning_rate": 1.4951097712556867e-06, "loss": 0.3298, "step": 54760 }, { "epoch": 2.32, "grad_norm": 4.189145788249495, "learning_rate": 1.4942318580704195e-06, "loss": 0.3571, "step": 54765 }, { "epoch": 2.32, "grad_norm": 5.145431798540984, "learning_rate": 1.4933541574295484e-06, "loss": 0.3365, "step": 54770 }, { "epoch": 2.32, "grad_norm": 3.5612836306860545, "learning_rate": 1.492476669386287e-06, "loss": 0.3381, "step": 54775 }, { "epoch": 2.32, "grad_norm": 4.323649033873212, "learning_rate": 1.4915993939938317e-06, "loss": 0.3385, "step": 54780 }, { "epoch": 2.32, "grad_norm": 4.740094399127605, "learning_rate": 1.4907223313053704e-06, "loss": 0.3505, "step": 54785 }, { "epoch": 2.32, "grad_norm": 4.251059386330171, "learning_rate": 1.4898454813740775e-06, "loss": 0.333, "step": 54790 }, { "epoch": 2.32, "grad_norm": 5.115704031736812, "learning_rate": 1.4889688442531154e-06, "loss": 0.3244, "step": 54795 }, { "epoch": 2.32, "grad_norm": 6.894299328325798, "learning_rate": 1.4880924199956287e-06, "loss": 0.3859, "step": 54800 }, { "epoch": 2.32, "grad_norm": 5.623584526767408, "learning_rate": 1.4872162086547538e-06, "loss": 0.3363, "step": 54805 }, { "epoch": 2.32, "grad_norm": 4.340472513680994, "learning_rate": 1.4863402102836144e-06, "loss": 0.3438, "step": 54810 }, { "epoch": 2.32, "grad_norm": 4.562430085590519, "learning_rate": 1.4854644249353161e-06, "loss": 0.3354, "step": 54815 }, { "epoch": 2.32, "grad_norm": 3.675050383311123, "learning_rate": 1.4845888526629582e-06, "loss": 0.3632, "step": 54820 }, { "epoch": 2.32, "grad_norm": 4.310334601145407, "learning_rate": 1.4837134935196247e-06, "loss": 0.3641, "step": 54825 }, { "epoch": 2.32, "grad_norm": 3.7025482435075645, "learning_rate": 1.4828383475583824e-06, "loss": 0.3679, "step": 54830 }, { "epoch": 2.32, "grad_norm": 4.293721810385579, "learning_rate": 1.481963414832291e-06, "loss": 0.3686, "step": 54835 }, { "epoch": 2.32, "grad_norm": 3.9702048547296207, "learning_rate": 1.4810886953943953e-06, "loss": 0.3234, "step": 54840 }, { "epoch": 2.32, "grad_norm": 3.8790463707830636, "learning_rate": 1.4802141892977284e-06, "loss": 0.3703, "step": 54845 }, { "epoch": 2.32, "grad_norm": 3.8246093269370887, "learning_rate": 1.4793398965953054e-06, "loss": 0.3558, "step": 54850 }, { "epoch": 2.32, "grad_norm": 3.9693095388012964, "learning_rate": 1.4784658173401344e-06, "loss": 0.3722, "step": 54855 }, { "epoch": 2.32, "grad_norm": 4.559594673145694, "learning_rate": 1.4775919515852093e-06, "loss": 0.358, "step": 54860 }, { "epoch": 2.32, "grad_norm": 3.5047566984407292, "learning_rate": 1.4767182993835073e-06, "loss": 0.3264, "step": 54865 }, { "epoch": 2.32, "grad_norm": 4.015975326022307, "learning_rate": 1.475844860787996e-06, "loss": 0.3599, "step": 54870 }, { "epoch": 2.32, "grad_norm": 6.612837539950845, "learning_rate": 1.4749716358516319e-06, "loss": 0.3724, "step": 54875 }, { "epoch": 2.32, "grad_norm": 4.5090104021344315, "learning_rate": 1.4740986246273525e-06, "loss": 0.3566, "step": 54880 }, { "epoch": 2.32, "grad_norm": 3.848175949299008, "learning_rate": 1.4732258271680877e-06, "loss": 0.3396, "step": 54885 }, { "epoch": 2.32, "grad_norm": 4.145539148320909, "learning_rate": 1.4723532435267528e-06, "loss": 0.3664, "step": 54890 }, { "epoch": 2.32, "grad_norm": 6.2972418476206355, "learning_rate": 1.4714808737562513e-06, "loss": 0.3325, "step": 54895 }, { "epoch": 2.32, "grad_norm": 3.965644271616134, "learning_rate": 1.4706087179094691e-06, "loss": 0.3274, "step": 54900 }, { "epoch": 2.32, "grad_norm": 4.040050757815286, "learning_rate": 1.4697367760392855e-06, "loss": 0.3478, "step": 54905 }, { "epoch": 2.32, "grad_norm": 3.7491681622397164, "learning_rate": 1.4688650481985611e-06, "loss": 0.3473, "step": 54910 }, { "epoch": 2.32, "grad_norm": 3.9314535362925818, "learning_rate": 1.467993534440149e-06, "loss": 0.3621, "step": 54915 }, { "epoch": 2.32, "grad_norm": 4.161599878075956, "learning_rate": 1.4671222348168835e-06, "loss": 0.3239, "step": 54920 }, { "epoch": 2.32, "grad_norm": 3.9503418436398356, "learning_rate": 1.4662511493815906e-06, "loss": 0.3513, "step": 54925 }, { "epoch": 2.32, "grad_norm": 5.302651321008852, "learning_rate": 1.465380278187083e-06, "loss": 0.334, "step": 54930 }, { "epoch": 2.32, "grad_norm": 3.749175738056873, "learning_rate": 1.4645096212861559e-06, "loss": 0.334, "step": 54935 }, { "epoch": 2.33, "grad_norm": 3.9269051051274264, "learning_rate": 1.4636391787315968e-06, "loss": 0.367, "step": 54940 }, { "epoch": 2.33, "grad_norm": 4.499298834293157, "learning_rate": 1.462768950576179e-06, "loss": 0.3291, "step": 54945 }, { "epoch": 2.33, "grad_norm": 4.058165514177957, "learning_rate": 1.4618989368726583e-06, "loss": 0.3569, "step": 54950 }, { "epoch": 2.33, "grad_norm": 4.229824167909258, "learning_rate": 1.461029137673784e-06, "loss": 0.3643, "step": 54955 }, { "epoch": 2.33, "grad_norm": 3.964847259972748, "learning_rate": 1.4601595530322882e-06, "loss": 0.3739, "step": 54960 }, { "epoch": 2.33, "grad_norm": 4.812806702163245, "learning_rate": 1.4592901830008932e-06, "loss": 0.3725, "step": 54965 }, { "epoch": 2.33, "grad_norm": 3.903635657175875, "learning_rate": 1.4584210276323036e-06, "loss": 0.3636, "step": 54970 }, { "epoch": 2.33, "grad_norm": 4.013918381004673, "learning_rate": 1.4575520869792153e-06, "loss": 0.3402, "step": 54975 }, { "epoch": 2.33, "grad_norm": 3.981856069566361, "learning_rate": 1.4566833610943104e-06, "loss": 0.3348, "step": 54980 }, { "epoch": 2.33, "grad_norm": 5.0819493093990955, "learning_rate": 1.455814850030255e-06, "loss": 0.3851, "step": 54985 }, { "epoch": 2.33, "grad_norm": 3.9158626436302235, "learning_rate": 1.4549465538397057e-06, "loss": 0.343, "step": 54990 }, { "epoch": 2.33, "grad_norm": 4.1561099321644015, "learning_rate": 1.4540784725753054e-06, "loss": 0.3607, "step": 54995 }, { "epoch": 2.33, "grad_norm": 4.330581672876071, "learning_rate": 1.453210606289681e-06, "loss": 0.3564, "step": 55000 }, { "epoch": 2.33, "grad_norm": 3.7598439176710032, "learning_rate": 1.4523429550354506e-06, "loss": 0.3496, "step": 55005 }, { "epoch": 2.33, "grad_norm": 4.110514276993395, "learning_rate": 1.451475518865218e-06, "loss": 0.3598, "step": 55010 }, { "epoch": 2.33, "grad_norm": 4.425936554800969, "learning_rate": 1.450608297831571e-06, "loss": 0.3385, "step": 55015 }, { "epoch": 2.33, "grad_norm": 4.409645223191734, "learning_rate": 1.4497412919870873e-06, "loss": 0.3342, "step": 55020 }, { "epoch": 2.33, "grad_norm": 3.7035321145067583, "learning_rate": 1.448874501384332e-06, "loss": 0.3493, "step": 55025 }, { "epoch": 2.33, "grad_norm": 3.729207651077628, "learning_rate": 1.4480079260758568e-06, "loss": 0.3634, "step": 55030 }, { "epoch": 2.33, "grad_norm": 4.041527196712236, "learning_rate": 1.4471415661141969e-06, "loss": 0.3683, "step": 55035 }, { "epoch": 2.33, "grad_norm": 4.231534648775785, "learning_rate": 1.446275421551878e-06, "loss": 0.3411, "step": 55040 }, { "epoch": 2.33, "grad_norm": 4.770910874341322, "learning_rate": 1.4454094924414148e-06, "loss": 0.3637, "step": 55045 }, { "epoch": 2.33, "grad_norm": 5.266759415682012, "learning_rate": 1.4445437788353016e-06, "loss": 0.374, "step": 55050 }, { "epoch": 2.33, "grad_norm": 6.3309693671857845, "learning_rate": 1.4436782807860267e-06, "loss": 0.3582, "step": 55055 }, { "epoch": 2.33, "grad_norm": 4.347056760015068, "learning_rate": 1.442812998346063e-06, "loss": 0.3393, "step": 55060 }, { "epoch": 2.33, "grad_norm": 3.9543437865546567, "learning_rate": 1.4419479315678698e-06, "loss": 0.3544, "step": 55065 }, { "epoch": 2.33, "grad_norm": 4.370479074613875, "learning_rate": 1.4410830805038905e-06, "loss": 0.3627, "step": 55070 }, { "epoch": 2.33, "grad_norm": 3.7904289230250656, "learning_rate": 1.4402184452065616e-06, "loss": 0.3285, "step": 55075 }, { "epoch": 2.33, "grad_norm": 3.2537987230363425, "learning_rate": 1.439354025728304e-06, "loss": 0.3517, "step": 55080 }, { "epoch": 2.33, "grad_norm": 4.0295388926119005, "learning_rate": 1.4384898221215221e-06, "loss": 0.3308, "step": 55085 }, { "epoch": 2.33, "grad_norm": 4.903153173011288, "learning_rate": 1.4376258344386118e-06, "loss": 0.3471, "step": 55090 }, { "epoch": 2.33, "grad_norm": 4.50862363989649, "learning_rate": 1.4367620627319533e-06, "loss": 0.331, "step": 55095 }, { "epoch": 2.33, "grad_norm": 4.079267157926809, "learning_rate": 1.4358985070539171e-06, "loss": 0.337, "step": 55100 }, { "epoch": 2.33, "grad_norm": 3.9588658514421446, "learning_rate": 1.4350351674568542e-06, "loss": 0.3365, "step": 55105 }, { "epoch": 2.33, "grad_norm": 3.6230614158507906, "learning_rate": 1.4341720439931083e-06, "loss": 0.3528, "step": 55110 }, { "epoch": 2.33, "grad_norm": 3.7213048380296483, "learning_rate": 1.4333091367150093e-06, "loss": 0.3281, "step": 55115 }, { "epoch": 2.33, "grad_norm": 3.8826770753095894, "learning_rate": 1.43244644567487e-06, "loss": 0.3671, "step": 55120 }, { "epoch": 2.33, "grad_norm": 7.502305015558864, "learning_rate": 1.431583970924994e-06, "loss": 0.3777, "step": 55125 }, { "epoch": 2.33, "grad_norm": 3.6638167148785827, "learning_rate": 1.430721712517672e-06, "loss": 0.3219, "step": 55130 }, { "epoch": 2.33, "grad_norm": 4.2067003247315675, "learning_rate": 1.4298596705051775e-06, "loss": 0.3492, "step": 55135 }, { "epoch": 2.33, "grad_norm": 3.898883888290832, "learning_rate": 1.428997844939775e-06, "loss": 0.3371, "step": 55140 }, { "epoch": 2.33, "grad_norm": 4.840320847532583, "learning_rate": 1.4281362358737145e-06, "loss": 0.3557, "step": 55145 }, { "epoch": 2.33, "grad_norm": 4.1757951472849815, "learning_rate": 1.4272748433592343e-06, "loss": 0.3533, "step": 55150 }, { "epoch": 2.33, "grad_norm": 5.265764544492705, "learning_rate": 1.4264136674485551e-06, "loss": 0.3611, "step": 55155 }, { "epoch": 2.33, "grad_norm": 4.538951808797188, "learning_rate": 1.4255527081938885e-06, "loss": 0.3682, "step": 55160 }, { "epoch": 2.33, "grad_norm": 4.2395222661775165, "learning_rate": 1.4246919656474346e-06, "loss": 0.3556, "step": 55165 }, { "epoch": 2.33, "grad_norm": 4.303842668172034, "learning_rate": 1.4238314398613735e-06, "loss": 0.3612, "step": 55170 }, { "epoch": 2.34, "grad_norm": 3.7933471389283695, "learning_rate": 1.422971130887878e-06, "loss": 0.3445, "step": 55175 }, { "epoch": 2.34, "grad_norm": 3.579030712320996, "learning_rate": 1.4221110387791082e-06, "loss": 0.3525, "step": 55180 }, { "epoch": 2.34, "grad_norm": 3.6831803856637624, "learning_rate": 1.421251163587205e-06, "loss": 0.3682, "step": 55185 }, { "epoch": 2.34, "grad_norm": 4.1399608102221945, "learning_rate": 1.4203915053643031e-06, "loss": 0.3834, "step": 55190 }, { "epoch": 2.34, "grad_norm": 5.4401983960482445, "learning_rate": 1.4195320641625193e-06, "loss": 0.3522, "step": 55195 }, { "epoch": 2.34, "grad_norm": 3.845814885900577, "learning_rate": 1.4186728400339616e-06, "loss": 0.3396, "step": 55200 }, { "epoch": 2.34, "grad_norm": 5.975743911665738, "learning_rate": 1.4178138330307194e-06, "loss": 0.3515, "step": 55205 }, { "epoch": 2.34, "grad_norm": 3.9030223366957415, "learning_rate": 1.4169550432048723e-06, "loss": 0.3529, "step": 55210 }, { "epoch": 2.34, "grad_norm": 3.7824323296751756, "learning_rate": 1.4160964706084883e-06, "loss": 0.3579, "step": 55215 }, { "epoch": 2.34, "grad_norm": 3.8097082542178446, "learning_rate": 1.4152381152936173e-06, "loss": 0.3266, "step": 55220 }, { "epoch": 2.34, "grad_norm": 3.933591243195999, "learning_rate": 1.4143799773123e-06, "loss": 0.3575, "step": 55225 }, { "epoch": 2.34, "grad_norm": 5.146356553603192, "learning_rate": 1.4135220567165641e-06, "loss": 0.3375, "step": 55230 }, { "epoch": 2.34, "grad_norm": 3.9988722874556673, "learning_rate": 1.4126643535584222e-06, "loss": 0.3352, "step": 55235 }, { "epoch": 2.34, "grad_norm": 4.448258150133473, "learning_rate": 1.4118068678898716e-06, "loss": 0.3545, "step": 55240 }, { "epoch": 2.34, "grad_norm": 4.409290774615804, "learning_rate": 1.410949599762902e-06, "loss": 0.3135, "step": 55245 }, { "epoch": 2.34, "grad_norm": 4.287618183791643, "learning_rate": 1.410092549229487e-06, "loss": 0.3642, "step": 55250 }, { "epoch": 2.34, "grad_norm": 4.102636634431103, "learning_rate": 1.4092357163415848e-06, "loss": 0.3587, "step": 55255 }, { "epoch": 2.34, "grad_norm": 4.830941851288035, "learning_rate": 1.408379101151145e-06, "loss": 0.3533, "step": 55260 }, { "epoch": 2.34, "grad_norm": 3.494206083549135, "learning_rate": 1.4075227037101003e-06, "loss": 0.3411, "step": 55265 }, { "epoch": 2.34, "grad_norm": 4.115211419964185, "learning_rate": 1.4066665240703731e-06, "loss": 0.3359, "step": 55270 }, { "epoch": 2.34, "grad_norm": 3.8486596680118907, "learning_rate": 1.405810562283869e-06, "loss": 0.3437, "step": 55275 }, { "epoch": 2.34, "grad_norm": 3.71487928916392, "learning_rate": 1.4049548184024837e-06, "loss": 0.3198, "step": 55280 }, { "epoch": 2.34, "grad_norm": 3.8722078192122, "learning_rate": 1.4040992924780995e-06, "loss": 0.3162, "step": 55285 }, { "epoch": 2.34, "grad_norm": 5.194733275513015, "learning_rate": 1.4032439845625818e-06, "loss": 0.3285, "step": 55290 }, { "epoch": 2.34, "grad_norm": 4.255646733236831, "learning_rate": 1.4023888947077863e-06, "loss": 0.3656, "step": 55295 }, { "epoch": 2.34, "grad_norm": 3.9646102654367352, "learning_rate": 1.4015340229655566e-06, "loss": 0.3501, "step": 55300 }, { "epoch": 2.34, "grad_norm": 3.827318706598373, "learning_rate": 1.400679369387718e-06, "loss": 0.3277, "step": 55305 }, { "epoch": 2.34, "grad_norm": 4.181024183654609, "learning_rate": 1.3998249340260873e-06, "loss": 0.3528, "step": 55310 }, { "epoch": 2.34, "grad_norm": 4.62937205311957, "learning_rate": 1.3989707169324678e-06, "loss": 0.3559, "step": 55315 }, { "epoch": 2.34, "grad_norm": 5.076289702041772, "learning_rate": 1.3981167181586447e-06, "loss": 0.3045, "step": 55320 }, { "epoch": 2.34, "grad_norm": 4.390021454149438, "learning_rate": 1.3972629377563956e-06, "loss": 0.3547, "step": 55325 }, { "epoch": 2.34, "grad_norm": 4.207551287030898, "learning_rate": 1.3964093757774821e-06, "loss": 0.3782, "step": 55330 }, { "epoch": 2.34, "grad_norm": 5.0926156674937415, "learning_rate": 1.395556032273655e-06, "loss": 0.3542, "step": 55335 }, { "epoch": 2.34, "grad_norm": 4.293898468006087, "learning_rate": 1.3947029072966466e-06, "loss": 0.3447, "step": 55340 }, { "epoch": 2.34, "grad_norm": 4.603413405753714, "learning_rate": 1.3938500008981815e-06, "loss": 0.3292, "step": 55345 }, { "epoch": 2.34, "grad_norm": 6.188241163373703, "learning_rate": 1.3929973131299702e-06, "loss": 0.3717, "step": 55350 }, { "epoch": 2.34, "grad_norm": 4.106368986001545, "learning_rate": 1.3921448440437059e-06, "loss": 0.3455, "step": 55355 }, { "epoch": 2.34, "grad_norm": 4.487163671429729, "learning_rate": 1.3912925936910716e-06, "loss": 0.3369, "step": 55360 }, { "epoch": 2.34, "grad_norm": 4.0320571019137175, "learning_rate": 1.3904405621237393e-06, "loss": 0.362, "step": 55365 }, { "epoch": 2.34, "grad_norm": 4.756532346662424, "learning_rate": 1.3895887493933618e-06, "loss": 0.3423, "step": 55370 }, { "epoch": 2.34, "grad_norm": 4.008639388891662, "learning_rate": 1.3887371555515838e-06, "loss": 0.3584, "step": 55375 }, { "epoch": 2.34, "grad_norm": 6.150479953739124, "learning_rate": 1.387885780650035e-06, "loss": 0.3468, "step": 55380 }, { "epoch": 2.34, "grad_norm": 6.8201034489847805, "learning_rate": 1.3870346247403327e-06, "loss": 0.3613, "step": 55385 }, { "epoch": 2.34, "grad_norm": 3.9871027200222926, "learning_rate": 1.3861836878740787e-06, "loss": 0.3293, "step": 55390 }, { "epoch": 2.34, "grad_norm": 4.263023714690012, "learning_rate": 1.3853329701028616e-06, "loss": 0.333, "step": 55395 }, { "epoch": 2.34, "grad_norm": 4.351444783155533, "learning_rate": 1.384482471478259e-06, "loss": 0.3608, "step": 55400 }, { "epoch": 2.34, "grad_norm": 3.944162345614734, "learning_rate": 1.383632192051836e-06, "loss": 0.3649, "step": 55405 }, { "epoch": 2.35, "grad_norm": 3.7974114056083184, "learning_rate": 1.382782131875139e-06, "loss": 0.3388, "step": 55410 }, { "epoch": 2.35, "grad_norm": 4.735181743867907, "learning_rate": 1.381932290999707e-06, "loss": 0.3941, "step": 55415 }, { "epoch": 2.35, "grad_norm": 3.8026001460057346, "learning_rate": 1.3810826694770635e-06, "loss": 0.3227, "step": 55420 }, { "epoch": 2.35, "grad_norm": 4.212846050476077, "learning_rate": 1.3802332673587172e-06, "loss": 0.3846, "step": 55425 }, { "epoch": 2.35, "grad_norm": 4.566705643787886, "learning_rate": 1.3793840846961649e-06, "loss": 0.3623, "step": 55430 }, { "epoch": 2.35, "grad_norm": 3.8995268184608, "learning_rate": 1.3785351215408926e-06, "loss": 0.3407, "step": 55435 }, { "epoch": 2.35, "grad_norm": 3.6167877910947106, "learning_rate": 1.3776863779443668e-06, "loss": 0.3639, "step": 55440 }, { "epoch": 2.35, "grad_norm": 3.785471208077401, "learning_rate": 1.3768378539580456e-06, "loss": 0.3403, "step": 55445 }, { "epoch": 2.35, "grad_norm": 4.939465010723387, "learning_rate": 1.3759895496333737e-06, "loss": 0.3681, "step": 55450 }, { "epoch": 2.35, "grad_norm": 4.730612907304052, "learning_rate": 1.375141465021782e-06, "loss": 0.3424, "step": 55455 }, { "epoch": 2.35, "grad_norm": 3.87422294839232, "learning_rate": 1.3742936001746842e-06, "loss": 0.3845, "step": 55460 }, { "epoch": 2.35, "grad_norm": 4.110447528294041, "learning_rate": 1.3734459551434853e-06, "loss": 0.336, "step": 55465 }, { "epoch": 2.35, "grad_norm": 5.129127653410787, "learning_rate": 1.3725985299795774e-06, "loss": 0.3473, "step": 55470 }, { "epoch": 2.35, "grad_norm": 4.5655910557715105, "learning_rate": 1.3717513247343345e-06, "loss": 0.3434, "step": 55475 }, { "epoch": 2.35, "grad_norm": 4.05958024109856, "learning_rate": 1.3709043394591215e-06, "loss": 0.3436, "step": 55480 }, { "epoch": 2.35, "grad_norm": 3.957447208747008, "learning_rate": 1.3700575742052897e-06, "loss": 0.3729, "step": 55485 }, { "epoch": 2.35, "grad_norm": 4.010910135203399, "learning_rate": 1.3692110290241733e-06, "loss": 0.3346, "step": 55490 }, { "epoch": 2.35, "grad_norm": 3.9818323022813673, "learning_rate": 1.3683647039670978e-06, "loss": 0.3143, "step": 55495 }, { "epoch": 2.35, "grad_norm": 3.8843343325320614, "learning_rate": 1.3675185990853723e-06, "loss": 0.3683, "step": 55500 }, { "epoch": 2.35, "grad_norm": 3.9518874183869803, "learning_rate": 1.366672714430296e-06, "loss": 0.3643, "step": 55505 }, { "epoch": 2.35, "grad_norm": 5.388592137358785, "learning_rate": 1.3658270500531496e-06, "loss": 0.3639, "step": 55510 }, { "epoch": 2.35, "grad_norm": 4.087657438327674, "learning_rate": 1.364981606005204e-06, "loss": 0.353, "step": 55515 }, { "epoch": 2.35, "grad_norm": 4.395500165851745, "learning_rate": 1.3641363823377184e-06, "loss": 0.3432, "step": 55520 }, { "epoch": 2.35, "grad_norm": 4.020965168443106, "learning_rate": 1.363291379101932e-06, "loss": 0.355, "step": 55525 }, { "epoch": 2.35, "grad_norm": 5.031467185067379, "learning_rate": 1.3624465963490774e-06, "loss": 0.3268, "step": 55530 }, { "epoch": 2.35, "grad_norm": 4.180448758465576, "learning_rate": 1.3616020341303721e-06, "loss": 0.3456, "step": 55535 }, { "epoch": 2.35, "grad_norm": 4.081239417991336, "learning_rate": 1.3607576924970167e-06, "loss": 0.3433, "step": 55540 }, { "epoch": 2.35, "grad_norm": 4.377178175391782, "learning_rate": 1.3599135715002031e-06, "loss": 0.3518, "step": 55545 }, { "epoch": 2.35, "grad_norm": 3.9698833285195647, "learning_rate": 1.3590696711911072e-06, "loss": 0.3378, "step": 55550 }, { "epoch": 2.35, "grad_norm": 4.092839442359719, "learning_rate": 1.3582259916208952e-06, "loss": 0.344, "step": 55555 }, { "epoch": 2.35, "grad_norm": 4.296058623961259, "learning_rate": 1.3573825328407114e-06, "loss": 0.353, "step": 55560 }, { "epoch": 2.35, "grad_norm": 3.9908689704850016, "learning_rate": 1.3565392949016948e-06, "loss": 0.3323, "step": 55565 }, { "epoch": 2.35, "grad_norm": 4.145262859862436, "learning_rate": 1.3556962778549687e-06, "loss": 0.3625, "step": 55570 }, { "epoch": 2.35, "grad_norm": 4.779388984786787, "learning_rate": 1.354853481751644e-06, "loss": 0.3272, "step": 55575 }, { "epoch": 2.35, "grad_norm": 4.329013288163415, "learning_rate": 1.3540109066428142e-06, "loss": 0.3505, "step": 55580 }, { "epoch": 2.35, "grad_norm": 4.0504946029171665, "learning_rate": 1.3531685525795635e-06, "loss": 0.3115, "step": 55585 }, { "epoch": 2.35, "grad_norm": 3.7605621964633253, "learning_rate": 1.3523264196129626e-06, "loss": 0.3076, "step": 55590 }, { "epoch": 2.35, "grad_norm": 3.8509597011257197, "learning_rate": 1.3514845077940653e-06, "loss": 0.3619, "step": 55595 }, { "epoch": 2.35, "grad_norm": 4.5116914437079565, "learning_rate": 1.350642817173915e-06, "loss": 0.3848, "step": 55600 }, { "epoch": 2.35, "grad_norm": 4.303479192892619, "learning_rate": 1.3498013478035426e-06, "loss": 0.3346, "step": 55605 }, { "epoch": 2.35, "grad_norm": 4.267675582147344, "learning_rate": 1.3489600997339607e-06, "loss": 0.3668, "step": 55610 }, { "epoch": 2.35, "grad_norm": 5.172539554338578, "learning_rate": 1.3481190730161742e-06, "loss": 0.3695, "step": 55615 }, { "epoch": 2.35, "grad_norm": 4.483649949038961, "learning_rate": 1.347278267701172e-06, "loss": 0.3254, "step": 55620 }, { "epoch": 2.35, "grad_norm": 3.997130439257425, "learning_rate": 1.346437683839928e-06, "loss": 0.3705, "step": 55625 }, { "epoch": 2.35, "grad_norm": 3.921495149472675, "learning_rate": 1.345597321483405e-06, "loss": 0.3438, "step": 55630 }, { "epoch": 2.35, "grad_norm": 4.066636128029731, "learning_rate": 1.3447571806825526e-06, "loss": 0.3367, "step": 55635 }, { "epoch": 2.35, "grad_norm": 3.8966105533045448, "learning_rate": 1.3439172614883067e-06, "loss": 0.3496, "step": 55640 }, { "epoch": 2.35, "grad_norm": 3.7508693037144214, "learning_rate": 1.3430775639515858e-06, "loss": 0.3449, "step": 55645 }, { "epoch": 2.36, "grad_norm": 4.33855871275355, "learning_rate": 1.342238088123301e-06, "loss": 0.3559, "step": 55650 }, { "epoch": 2.36, "grad_norm": 4.0971856127482456, "learning_rate": 1.3413988340543477e-06, "loss": 0.3479, "step": 55655 }, { "epoch": 2.36, "grad_norm": 4.46747640623213, "learning_rate": 1.340559801795605e-06, "loss": 0.3593, "step": 55660 }, { "epoch": 2.36, "grad_norm": 4.831264526982916, "learning_rate": 1.3397209913979425e-06, "loss": 0.3475, "step": 55665 }, { "epoch": 2.36, "grad_norm": 4.162810804648072, "learning_rate": 1.338882402912216e-06, "loss": 0.3626, "step": 55670 }, { "epoch": 2.36, "grad_norm": 4.739472971462204, "learning_rate": 1.3380440363892637e-06, "loss": 0.36, "step": 55675 }, { "epoch": 2.36, "grad_norm": 6.142191384800508, "learning_rate": 1.3372058918799147e-06, "loss": 0.3675, "step": 55680 }, { "epoch": 2.36, "grad_norm": 4.112393245707808, "learning_rate": 1.3363679694349834e-06, "loss": 0.3438, "step": 55685 }, { "epoch": 2.36, "grad_norm": 3.7682394535471095, "learning_rate": 1.3355302691052724e-06, "loss": 0.3467, "step": 55690 }, { "epoch": 2.36, "grad_norm": 4.102171217715294, "learning_rate": 1.3346927909415652e-06, "loss": 0.3492, "step": 55695 }, { "epoch": 2.36, "grad_norm": 3.7037749956670356, "learning_rate": 1.333855534994638e-06, "loss": 0.362, "step": 55700 }, { "epoch": 2.36, "grad_norm": 7.785678508909988, "learning_rate": 1.333018501315252e-06, "loss": 0.3692, "step": 55705 }, { "epoch": 2.36, "grad_norm": 4.245461029426809, "learning_rate": 1.3321816899541512e-06, "loss": 0.3528, "step": 55710 }, { "epoch": 2.36, "grad_norm": 5.164128537186574, "learning_rate": 1.331345100962072e-06, "loss": 0.3468, "step": 55715 }, { "epoch": 2.36, "grad_norm": 6.9848189691815366, "learning_rate": 1.3305087343897316e-06, "loss": 0.3772, "step": 55720 }, { "epoch": 2.36, "grad_norm": 4.523771692658554, "learning_rate": 1.3296725902878394e-06, "loss": 0.3539, "step": 55725 }, { "epoch": 2.36, "grad_norm": 4.019773474493285, "learning_rate": 1.3288366687070852e-06, "loss": 0.3397, "step": 55730 }, { "epoch": 2.36, "grad_norm": 4.270038306108495, "learning_rate": 1.32800096969815e-06, "loss": 0.3803, "step": 55735 }, { "epoch": 2.36, "grad_norm": 4.225382394729628, "learning_rate": 1.3271654933117012e-06, "loss": 0.3192, "step": 55740 }, { "epoch": 2.36, "grad_norm": 4.593876420552302, "learning_rate": 1.3263302395983886e-06, "loss": 0.3548, "step": 55745 }, { "epoch": 2.36, "grad_norm": 4.390086747140461, "learning_rate": 1.3254952086088518e-06, "loss": 0.3474, "step": 55750 }, { "epoch": 2.36, "grad_norm": 4.453808160952599, "learning_rate": 1.3246604003937176e-06, "loss": 0.3428, "step": 55755 }, { "epoch": 2.36, "grad_norm": 4.921844285521916, "learning_rate": 1.3238258150035983e-06, "loss": 0.3603, "step": 55760 }, { "epoch": 2.36, "grad_norm": 4.365298250537008, "learning_rate": 1.3229914524890902e-06, "loss": 0.3534, "step": 55765 }, { "epoch": 2.36, "grad_norm": 3.817963196687594, "learning_rate": 1.3221573129007793e-06, "loss": 0.3447, "step": 55770 }, { "epoch": 2.36, "grad_norm": 4.229219626446148, "learning_rate": 1.321323396289238e-06, "loss": 0.3524, "step": 55775 }, { "epoch": 2.36, "grad_norm": 4.422963194825038, "learning_rate": 1.3204897027050224e-06, "loss": 0.3764, "step": 55780 }, { "epoch": 2.36, "grad_norm": 4.489940431164984, "learning_rate": 1.3196562321986778e-06, "loss": 0.3433, "step": 55785 }, { "epoch": 2.36, "grad_norm": 4.410909456564164, "learning_rate": 1.3188229848207363e-06, "loss": 0.3363, "step": 55790 }, { "epoch": 2.36, "grad_norm": 3.6294574542768974, "learning_rate": 1.3179899606217122e-06, "loss": 0.3291, "step": 55795 }, { "epoch": 2.36, "grad_norm": 4.112139769441018, "learning_rate": 1.3171571596521116e-06, "loss": 0.3564, "step": 55800 }, { "epoch": 2.36, "grad_norm": 5.732627052020113, "learning_rate": 1.3163245819624237e-06, "loss": 0.3487, "step": 55805 }, { "epoch": 2.36, "grad_norm": 4.45956805501037, "learning_rate": 1.315492227603128e-06, "loss": 0.3471, "step": 55810 }, { "epoch": 2.36, "grad_norm": 4.177961859652178, "learning_rate": 1.3146600966246832e-06, "loss": 0.3994, "step": 55815 }, { "epoch": 2.36, "grad_norm": 4.049830124000791, "learning_rate": 1.3138281890775417e-06, "loss": 0.3345, "step": 55820 }, { "epoch": 2.36, "grad_norm": 4.9092876170129225, "learning_rate": 1.3129965050121402e-06, "loss": 0.3518, "step": 55825 }, { "epoch": 2.36, "grad_norm": 6.0184514845303365, "learning_rate": 1.3121650444788992e-06, "loss": 0.3433, "step": 55830 }, { "epoch": 2.36, "grad_norm": 3.684731485243696, "learning_rate": 1.3113338075282284e-06, "loss": 0.3503, "step": 55835 }, { "epoch": 2.36, "grad_norm": 5.227962320291398, "learning_rate": 1.3105027942105247e-06, "loss": 0.3487, "step": 55840 }, { "epoch": 2.36, "grad_norm": 3.9294827476025485, "learning_rate": 1.309672004576168e-06, "loss": 0.3484, "step": 55845 }, { "epoch": 2.36, "grad_norm": 4.401911422461429, "learning_rate": 1.3088414386755272e-06, "loss": 0.3219, "step": 55850 }, { "epoch": 2.36, "grad_norm": 4.111762800385693, "learning_rate": 1.308011096558957e-06, "loss": 0.3649, "step": 55855 }, { "epoch": 2.36, "grad_norm": 4.467962077468306, "learning_rate": 1.3071809782768008e-06, "loss": 0.3347, "step": 55860 }, { "epoch": 2.36, "grad_norm": 5.588413416092084, "learning_rate": 1.3063510838793824e-06, "loss": 0.3681, "step": 55865 }, { "epoch": 2.36, "grad_norm": 5.021150635520996, "learning_rate": 1.305521413417018e-06, "loss": 0.3125, "step": 55870 }, { "epoch": 2.36, "grad_norm": 4.391011618769206, "learning_rate": 1.3046919669400093e-06, "loss": 0.3583, "step": 55875 }, { "epoch": 2.36, "grad_norm": 4.64237703987566, "learning_rate": 1.3038627444986425e-06, "loss": 0.3331, "step": 55880 }, { "epoch": 2.37, "grad_norm": 3.7865438235615665, "learning_rate": 1.3030337461431874e-06, "loss": 0.3329, "step": 55885 }, { "epoch": 2.37, "grad_norm": 4.186188349926038, "learning_rate": 1.3022049719239073e-06, "loss": 0.3562, "step": 55890 }, { "epoch": 2.37, "grad_norm": 4.78490752133425, "learning_rate": 1.301376421891049e-06, "loss": 0.3578, "step": 55895 }, { "epoch": 2.37, "grad_norm": 3.918538994360132, "learning_rate": 1.300548096094842e-06, "loss": 0.3183, "step": 55900 }, { "epoch": 2.37, "grad_norm": 3.9138612279561666, "learning_rate": 1.2997199945855066e-06, "loss": 0.3275, "step": 55905 }, { "epoch": 2.37, "grad_norm": 3.923359895247955, "learning_rate": 1.29889211741325e-06, "loss": 0.3566, "step": 55910 }, { "epoch": 2.37, "grad_norm": 4.032891921983772, "learning_rate": 1.2980644646282614e-06, "loss": 0.3785, "step": 55915 }, { "epoch": 2.37, "grad_norm": 3.853533660744576, "learning_rate": 1.2972370362807191e-06, "loss": 0.3153, "step": 55920 }, { "epoch": 2.37, "grad_norm": 4.745807634438993, "learning_rate": 1.2964098324207902e-06, "loss": 0.3432, "step": 55925 }, { "epoch": 2.37, "grad_norm": 6.417016321105122, "learning_rate": 1.2955828530986225e-06, "loss": 0.3715, "step": 55930 }, { "epoch": 2.37, "grad_norm": 4.764829857995478, "learning_rate": 1.2947560983643549e-06, "loss": 0.3669, "step": 55935 }, { "epoch": 2.37, "grad_norm": 5.782572627433298, "learning_rate": 1.2939295682681108e-06, "loss": 0.3393, "step": 55940 }, { "epoch": 2.37, "grad_norm": 3.9253579246168524, "learning_rate": 1.2931032628600016e-06, "loss": 0.3351, "step": 55945 }, { "epoch": 2.37, "grad_norm": 4.218643236891965, "learning_rate": 1.2922771821901213e-06, "loss": 0.3566, "step": 55950 }, { "epoch": 2.37, "grad_norm": 4.86567523556772, "learning_rate": 1.2914513263085536e-06, "loss": 0.3456, "step": 55955 }, { "epoch": 2.37, "grad_norm": 3.550604741656876, "learning_rate": 1.29062569526537e-06, "loss": 0.3247, "step": 55960 }, { "epoch": 2.37, "grad_norm": 4.13616965405224, "learning_rate": 1.2898002891106225e-06, "loss": 0.3499, "step": 55965 }, { "epoch": 2.37, "grad_norm": 4.791048857521803, "learning_rate": 1.2889751078943553e-06, "loss": 0.3529, "step": 55970 }, { "epoch": 2.37, "grad_norm": 3.850706860275869, "learning_rate": 1.2881501516665966e-06, "loss": 0.3288, "step": 55975 }, { "epoch": 2.37, "grad_norm": 4.25230556934984, "learning_rate": 1.28732542047736e-06, "loss": 0.3476, "step": 55980 }, { "epoch": 2.37, "grad_norm": 4.208737602593975, "learning_rate": 1.2865009143766466e-06, "loss": 0.3352, "step": 55985 }, { "epoch": 2.37, "grad_norm": 4.021362939462503, "learning_rate": 1.2856766334144444e-06, "loss": 0.3421, "step": 55990 }, { "epoch": 2.37, "grad_norm": 4.00083711490453, "learning_rate": 1.2848525776407283e-06, "loss": 0.3668, "step": 55995 }, { "epoch": 2.37, "grad_norm": 4.676075614243951, "learning_rate": 1.2840287471054557e-06, "loss": 0.3721, "step": 56000 }, { "epoch": 2.37, "grad_norm": 3.82609149913964, "learning_rate": 1.283205141858575e-06, "loss": 0.38, "step": 56005 }, { "epoch": 2.37, "grad_norm": 3.81295511228173, "learning_rate": 1.2823817619500195e-06, "loss": 0.365, "step": 56010 }, { "epoch": 2.37, "grad_norm": 3.9665553329486913, "learning_rate": 1.2815586074297053e-06, "loss": 0.3077, "step": 56015 }, { "epoch": 2.37, "grad_norm": 4.052871564458969, "learning_rate": 1.28073567834754e-06, "loss": 0.3665, "step": 56020 }, { "epoch": 2.37, "grad_norm": 3.8782914665998804, "learning_rate": 1.2799129747534166e-06, "loss": 0.3661, "step": 56025 }, { "epoch": 2.37, "grad_norm": 3.938404687773065, "learning_rate": 1.2790904966972102e-06, "loss": 0.3485, "step": 56030 }, { "epoch": 2.37, "grad_norm": 4.014476909953917, "learning_rate": 1.2782682442287865e-06, "loss": 0.3525, "step": 56035 }, { "epoch": 2.37, "grad_norm": 3.737326330683595, "learning_rate": 1.277446217397998e-06, "loss": 0.3539, "step": 56040 }, { "epoch": 2.37, "grad_norm": 4.278939190998384, "learning_rate": 1.2766244162546804e-06, "loss": 0.3429, "step": 56045 }, { "epoch": 2.37, "grad_norm": 5.531774304321063, "learning_rate": 1.275802840848655e-06, "loss": 0.3494, "step": 56050 }, { "epoch": 2.37, "grad_norm": 5.298112082051563, "learning_rate": 1.2749814912297337e-06, "loss": 0.3615, "step": 56055 }, { "epoch": 2.37, "grad_norm": 5.205917929398763, "learning_rate": 1.274160367447712e-06, "loss": 0.3476, "step": 56060 }, { "epoch": 2.37, "grad_norm": 4.3174209372800485, "learning_rate": 1.2733394695523738e-06, "loss": 0.3408, "step": 56065 }, { "epoch": 2.37, "grad_norm": 3.556242124937779, "learning_rate": 1.2725187975934849e-06, "loss": 0.3255, "step": 56070 }, { "epoch": 2.37, "grad_norm": 4.336639195586417, "learning_rate": 1.271698351620802e-06, "loss": 0.3662, "step": 56075 }, { "epoch": 2.37, "grad_norm": 4.180398745519801, "learning_rate": 1.2708781316840674e-06, "loss": 0.3499, "step": 56080 }, { "epoch": 2.37, "grad_norm": 4.082706949604593, "learning_rate": 1.2700581378330052e-06, "loss": 0.345, "step": 56085 }, { "epoch": 2.37, "grad_norm": 4.454175050134992, "learning_rate": 1.2692383701173316e-06, "loss": 0.3257, "step": 56090 }, { "epoch": 2.37, "grad_norm": 3.8894647293107556, "learning_rate": 1.2684188285867477e-06, "loss": 0.3632, "step": 56095 }, { "epoch": 2.37, "grad_norm": 4.0870018333469025, "learning_rate": 1.267599513290937e-06, "loss": 0.3608, "step": 56100 }, { "epoch": 2.37, "grad_norm": 3.949174027432825, "learning_rate": 1.2667804242795734e-06, "loss": 0.3415, "step": 56105 }, { "epoch": 2.37, "grad_norm": 4.10937404443264, "learning_rate": 1.265961561602317e-06, "loss": 0.3446, "step": 56110 }, { "epoch": 2.37, "grad_norm": 4.057028937983714, "learning_rate": 1.2651429253088127e-06, "loss": 0.3552, "step": 56115 }, { "epoch": 2.38, "grad_norm": 4.061570314602765, "learning_rate": 1.2643245154486899e-06, "loss": 0.3477, "step": 56120 }, { "epoch": 2.38, "grad_norm": 4.82220118583394, "learning_rate": 1.2635063320715685e-06, "loss": 0.3545, "step": 56125 }, { "epoch": 2.38, "grad_norm": 3.768856531618555, "learning_rate": 1.2626883752270536e-06, "loss": 0.3607, "step": 56130 }, { "epoch": 2.38, "grad_norm": 4.117609707521339, "learning_rate": 1.2618706449647317e-06, "loss": 0.3363, "step": 56135 }, { "epoch": 2.38, "grad_norm": 4.519244719455889, "learning_rate": 1.261053141334182e-06, "loss": 0.3462, "step": 56140 }, { "epoch": 2.38, "grad_norm": 3.763057681384074, "learning_rate": 1.2602358643849683e-06, "loss": 0.3537, "step": 56145 }, { "epoch": 2.38, "grad_norm": 3.8336662345472563, "learning_rate": 1.2594188141666368e-06, "loss": 0.3431, "step": 56150 }, { "epoch": 2.38, "grad_norm": 4.149541099161768, "learning_rate": 1.2586019907287244e-06, "loss": 0.3275, "step": 56155 }, { "epoch": 2.38, "grad_norm": 4.316995669464308, "learning_rate": 1.2577853941207547e-06, "loss": 0.3666, "step": 56160 }, { "epoch": 2.38, "grad_norm": 3.66280877743433, "learning_rate": 1.2569690243922312e-06, "loss": 0.3264, "step": 56165 }, { "epoch": 2.38, "grad_norm": 4.282029869352085, "learning_rate": 1.2561528815926505e-06, "loss": 0.3387, "step": 56170 }, { "epoch": 2.38, "grad_norm": 3.954316961126753, "learning_rate": 1.255336965771493e-06, "loss": 0.3419, "step": 56175 }, { "epoch": 2.38, "grad_norm": 3.9916603811696194, "learning_rate": 1.2545212769782262e-06, "loss": 0.3437, "step": 56180 }, { "epoch": 2.38, "grad_norm": 4.294647069778002, "learning_rate": 1.2537058152623005e-06, "loss": 0.3579, "step": 56185 }, { "epoch": 2.38, "grad_norm": 4.664136498279568, "learning_rate": 1.2528905806731555e-06, "loss": 0.3308, "step": 56190 }, { "epoch": 2.38, "grad_norm": 4.073167455128477, "learning_rate": 1.2520755732602196e-06, "loss": 0.3278, "step": 56195 }, { "epoch": 2.38, "grad_norm": 4.272783277771394, "learning_rate": 1.251260793072901e-06, "loss": 0.3382, "step": 56200 }, { "epoch": 2.38, "grad_norm": 4.363907824497373, "learning_rate": 1.2504462401605972e-06, "loss": 0.3269, "step": 56205 }, { "epoch": 2.38, "grad_norm": 5.312327297114382, "learning_rate": 1.2496319145726932e-06, "loss": 0.3322, "step": 56210 }, { "epoch": 2.38, "grad_norm": 4.569899863667851, "learning_rate": 1.2488178163585601e-06, "loss": 0.3572, "step": 56215 }, { "epoch": 2.38, "grad_norm": 4.047102384079045, "learning_rate": 1.2480039455675525e-06, "loss": 0.3433, "step": 56220 }, { "epoch": 2.38, "grad_norm": 3.84011461636553, "learning_rate": 1.2471903022490134e-06, "loss": 0.3188, "step": 56225 }, { "epoch": 2.38, "grad_norm": 4.5966779973840906, "learning_rate": 1.2463768864522735e-06, "loss": 0.3444, "step": 56230 }, { "epoch": 2.38, "grad_norm": 4.0845417939107875, "learning_rate": 1.245563698226645e-06, "loss": 0.3368, "step": 56235 }, { "epoch": 2.38, "grad_norm": 4.050081355770733, "learning_rate": 1.2447507376214302e-06, "loss": 0.3512, "step": 56240 }, { "epoch": 2.38, "grad_norm": 4.214356694193335, "learning_rate": 1.2439380046859174e-06, "loss": 0.3599, "step": 56245 }, { "epoch": 2.38, "grad_norm": 3.8158448569828205, "learning_rate": 1.2431254994693804e-06, "loss": 0.3275, "step": 56250 }, { "epoch": 2.38, "grad_norm": 3.9101584457008234, "learning_rate": 1.2423132220210766e-06, "loss": 0.3328, "step": 56255 }, { "epoch": 2.38, "grad_norm": 4.163220685716269, "learning_rate": 1.2415011723902536e-06, "loss": 0.3479, "step": 56260 }, { "epoch": 2.38, "grad_norm": 5.317419706805609, "learning_rate": 1.2406893506261453e-06, "loss": 0.3417, "step": 56265 }, { "epoch": 2.38, "grad_norm": 4.400425551107368, "learning_rate": 1.2398777567779663e-06, "loss": 0.3362, "step": 56270 }, { "epoch": 2.38, "grad_norm": 4.556054707705937, "learning_rate": 1.2390663908949235e-06, "loss": 0.3435, "step": 56275 }, { "epoch": 2.38, "grad_norm": 3.4800779373837205, "learning_rate": 1.2382552530262081e-06, "loss": 0.3359, "step": 56280 }, { "epoch": 2.38, "grad_norm": 4.028650576657266, "learning_rate": 1.2374443432209954e-06, "loss": 0.3427, "step": 56285 }, { "epoch": 2.38, "grad_norm": 6.1591874444541075, "learning_rate": 1.236633661528449e-06, "loss": 0.3519, "step": 56290 }, { "epoch": 2.38, "grad_norm": 4.923034852127373, "learning_rate": 1.2358232079977179e-06, "loss": 0.3326, "step": 56295 }, { "epoch": 2.38, "grad_norm": 8.29779257531179, "learning_rate": 1.23501298267794e-06, "loss": 0.3533, "step": 56300 }, { "epoch": 2.38, "grad_norm": 4.818434293798176, "learning_rate": 1.234202985618233e-06, "loss": 0.3429, "step": 56305 }, { "epoch": 2.38, "grad_norm": 4.2591512145228565, "learning_rate": 1.2333932168677066e-06, "loss": 0.3652, "step": 56310 }, { "epoch": 2.38, "grad_norm": 3.6263671184116357, "learning_rate": 1.2325836764754563e-06, "loss": 0.3539, "step": 56315 }, { "epoch": 2.38, "grad_norm": 4.070523711018789, "learning_rate": 1.2317743644905583e-06, "loss": 0.3439, "step": 56320 }, { "epoch": 2.38, "grad_norm": 3.7271284536070377, "learning_rate": 1.2309652809620814e-06, "loss": 0.335, "step": 56325 }, { "epoch": 2.38, "grad_norm": 4.272686173320991, "learning_rate": 1.2301564259390786e-06, "loss": 0.3516, "step": 56330 }, { "epoch": 2.38, "grad_norm": 3.9364050290880144, "learning_rate": 1.229347799470586e-06, "loss": 0.319, "step": 56335 }, { "epoch": 2.38, "grad_norm": 4.460413442485901, "learning_rate": 1.2285394016056295e-06, "loss": 0.3512, "step": 56340 }, { "epoch": 2.38, "grad_norm": 4.17413430368539, "learning_rate": 1.22773123239322e-06, "loss": 0.3603, "step": 56345 }, { "epoch": 2.38, "grad_norm": 4.236662996057952, "learning_rate": 1.2269232918823554e-06, "loss": 0.3965, "step": 56350 }, { "epoch": 2.39, "grad_norm": 3.7770242104213736, "learning_rate": 1.2261155801220166e-06, "loss": 0.3292, "step": 56355 }, { "epoch": 2.39, "grad_norm": 3.663359595479212, "learning_rate": 1.2253080971611736e-06, "loss": 0.3214, "step": 56360 }, { "epoch": 2.39, "grad_norm": 4.299667672535685, "learning_rate": 1.224500843048783e-06, "loss": 0.3423, "step": 56365 }, { "epoch": 2.39, "grad_norm": 4.8169810599133935, "learning_rate": 1.223693817833786e-06, "loss": 0.3537, "step": 56370 }, { "epoch": 2.39, "grad_norm": 4.279985295007791, "learning_rate": 1.2228870215651074e-06, "loss": 0.3581, "step": 56375 }, { "epoch": 2.39, "grad_norm": 3.8059500991932085, "learning_rate": 1.222080454291663e-06, "loss": 0.3096, "step": 56380 }, { "epoch": 2.39, "grad_norm": 4.183620710461634, "learning_rate": 1.2212741160623537e-06, "loss": 0.3648, "step": 56385 }, { "epoch": 2.39, "grad_norm": 4.460469143650533, "learning_rate": 1.2204680069260628e-06, "loss": 0.3401, "step": 56390 }, { "epoch": 2.39, "grad_norm": 3.747481355269418, "learning_rate": 1.2196621269316638e-06, "loss": 0.3394, "step": 56395 }, { "epoch": 2.39, "grad_norm": 4.454252294094227, "learning_rate": 1.2188564761280159e-06, "loss": 0.3656, "step": 56400 }, { "epoch": 2.39, "grad_norm": 3.5021637823003036, "learning_rate": 1.2180510545639612e-06, "loss": 0.3592, "step": 56405 }, { "epoch": 2.39, "grad_norm": 3.8405870111259075, "learning_rate": 1.2172458622883303e-06, "loss": 0.3625, "step": 56410 }, { "epoch": 2.39, "grad_norm": 4.367901583823025, "learning_rate": 1.2164408993499404e-06, "loss": 0.3456, "step": 56415 }, { "epoch": 2.39, "grad_norm": 4.068501791686017, "learning_rate": 1.2156361657975957e-06, "loss": 0.3619, "step": 56420 }, { "epoch": 2.39, "grad_norm": 4.456982934607577, "learning_rate": 1.2148316616800815e-06, "loss": 0.3716, "step": 56425 }, { "epoch": 2.39, "grad_norm": 4.01996956647707, "learning_rate": 1.2140273870461737e-06, "loss": 0.3459, "step": 56430 }, { "epoch": 2.39, "grad_norm": 4.100836662742025, "learning_rate": 1.2132233419446354e-06, "loss": 0.348, "step": 56435 }, { "epoch": 2.39, "grad_norm": 4.216336199715205, "learning_rate": 1.2124195264242101e-06, "loss": 0.3527, "step": 56440 }, { "epoch": 2.39, "grad_norm": 4.7200618430058805, "learning_rate": 1.211615940533632e-06, "loss": 0.3659, "step": 56445 }, { "epoch": 2.39, "grad_norm": 7.882473699789652, "learning_rate": 1.2108125843216223e-06, "loss": 0.3448, "step": 56450 }, { "epoch": 2.39, "grad_norm": 6.606248741664309, "learning_rate": 1.210009457836882e-06, "loss": 0.3697, "step": 56455 }, { "epoch": 2.39, "grad_norm": 5.876830757682971, "learning_rate": 1.209206561128105e-06, "loss": 0.3246, "step": 56460 }, { "epoch": 2.39, "grad_norm": 6.807438903928233, "learning_rate": 1.2084038942439697e-06, "loss": 0.342, "step": 56465 }, { "epoch": 2.39, "grad_norm": 5.5609946872091225, "learning_rate": 1.2076014572331356e-06, "loss": 0.333, "step": 56470 }, { "epoch": 2.39, "grad_norm": 4.731673141284543, "learning_rate": 1.2067992501442549e-06, "loss": 0.3551, "step": 56475 }, { "epoch": 2.39, "grad_norm": 4.184057591087504, "learning_rate": 1.2059972730259623e-06, "loss": 0.3445, "step": 56480 }, { "epoch": 2.39, "grad_norm": 4.4390993490406245, "learning_rate": 1.2051955259268805e-06, "loss": 0.3263, "step": 56485 }, { "epoch": 2.39, "grad_norm": 3.918815012539708, "learning_rate": 1.2043940088956147e-06, "loss": 0.3436, "step": 56490 }, { "epoch": 2.39, "grad_norm": 3.7354162698167492, "learning_rate": 1.2035927219807604e-06, "loss": 0.3631, "step": 56495 }, { "epoch": 2.39, "grad_norm": 3.913744920111048, "learning_rate": 1.2027916652308975e-06, "loss": 0.3288, "step": 56500 }, { "epoch": 2.39, "grad_norm": 4.308056665449875, "learning_rate": 1.2019908386945894e-06, "loss": 0.3466, "step": 56505 }, { "epoch": 2.39, "grad_norm": 4.473366687726201, "learning_rate": 1.2011902424203897e-06, "loss": 0.3456, "step": 56510 }, { "epoch": 2.39, "grad_norm": 4.75336022461932, "learning_rate": 1.2003898764568372e-06, "loss": 0.3521, "step": 56515 }, { "epoch": 2.39, "grad_norm": 4.141934935456006, "learning_rate": 1.1995897408524527e-06, "loss": 0.3312, "step": 56520 }, { "epoch": 2.39, "grad_norm": 4.137933199754448, "learning_rate": 1.1987898356557493e-06, "loss": 0.3309, "step": 56525 }, { "epoch": 2.39, "grad_norm": 4.926767726938109, "learning_rate": 1.19799016091522e-06, "loss": 0.356, "step": 56530 }, { "epoch": 2.39, "grad_norm": 4.743462704212318, "learning_rate": 1.1971907166793496e-06, "loss": 0.328, "step": 56535 }, { "epoch": 2.39, "grad_norm": 4.662777440762261, "learning_rate": 1.1963915029966028e-06, "loss": 0.3705, "step": 56540 }, { "epoch": 2.39, "grad_norm": 4.254489488529074, "learning_rate": 1.1955925199154356e-06, "loss": 0.3516, "step": 56545 }, { "epoch": 2.39, "grad_norm": 4.103481329540277, "learning_rate": 1.1947937674842874e-06, "loss": 0.368, "step": 56550 }, { "epoch": 2.39, "grad_norm": 3.785153847180542, "learning_rate": 1.1939952457515858e-06, "loss": 0.3235, "step": 56555 }, { "epoch": 2.39, "grad_norm": 3.8150842846257937, "learning_rate": 1.19319695476574e-06, "loss": 0.3236, "step": 56560 }, { "epoch": 2.39, "grad_norm": 3.7591610191463336, "learning_rate": 1.1923988945751502e-06, "loss": 0.3154, "step": 56565 }, { "epoch": 2.39, "grad_norm": 3.473851978568733, "learning_rate": 1.1916010652282005e-06, "loss": 0.3331, "step": 56570 }, { "epoch": 2.39, "grad_norm": 5.5208392503754045, "learning_rate": 1.1908034667732593e-06, "loss": 0.3529, "step": 56575 }, { "epoch": 2.39, "grad_norm": 3.8793528002780695, "learning_rate": 1.190006099258683e-06, "loss": 0.3544, "step": 56580 }, { "epoch": 2.39, "grad_norm": 4.265155831274544, "learning_rate": 1.1892089627328157e-06, "loss": 0.3466, "step": 56585 }, { "epoch": 2.39, "grad_norm": 4.9854106775049, "learning_rate": 1.1884120572439827e-06, "loss": 0.3462, "step": 56590 }, { "epoch": 2.4, "grad_norm": 3.8964229123107885, "learning_rate": 1.1876153828404995e-06, "loss": 0.3599, "step": 56595 }, { "epoch": 2.4, "grad_norm": 4.374570532710603, "learning_rate": 1.1868189395706658e-06, "loss": 0.3576, "step": 56600 }, { "epoch": 2.4, "grad_norm": 7.799192059631832, "learning_rate": 1.186022727482769e-06, "loss": 0.3649, "step": 56605 }, { "epoch": 2.4, "grad_norm": 5.223432559321248, "learning_rate": 1.1852267466250783e-06, "loss": 0.3044, "step": 56610 }, { "epoch": 2.4, "grad_norm": 4.4106607448899435, "learning_rate": 1.1844309970458534e-06, "loss": 0.3278, "step": 56615 }, { "epoch": 2.4, "grad_norm": 4.595113762495233, "learning_rate": 1.183635478793339e-06, "loss": 0.3299, "step": 56620 }, { "epoch": 2.4, "grad_norm": 3.604172959403088, "learning_rate": 1.1828401919157628e-06, "loss": 0.3157, "step": 56625 }, { "epoch": 2.4, "grad_norm": 3.7892305894727145, "learning_rate": 1.1820451364613423e-06, "loss": 0.3393, "step": 56630 }, { "epoch": 2.4, "grad_norm": 5.9769497888625605, "learning_rate": 1.1812503124782803e-06, "loss": 0.3272, "step": 56635 }, { "epoch": 2.4, "grad_norm": 3.880632401665747, "learning_rate": 1.1804557200147615e-06, "loss": 0.3051, "step": 56640 }, { "epoch": 2.4, "grad_norm": 4.017454781649578, "learning_rate": 1.179661359118962e-06, "loss": 0.3418, "step": 56645 }, { "epoch": 2.4, "grad_norm": 4.63058356214561, "learning_rate": 1.1788672298390413e-06, "loss": 0.3342, "step": 56650 }, { "epoch": 2.4, "grad_norm": 4.154782479950823, "learning_rate": 1.1780733322231458e-06, "loss": 0.3748, "step": 56655 }, { "epoch": 2.4, "grad_norm": 4.0865863330178716, "learning_rate": 1.1772796663194052e-06, "loss": 0.3528, "step": 56660 }, { "epoch": 2.4, "grad_norm": 4.491277325552022, "learning_rate": 1.1764862321759384e-06, "loss": 0.3639, "step": 56665 }, { "epoch": 2.4, "grad_norm": 4.155548428228232, "learning_rate": 1.1756930298408499e-06, "loss": 0.3213, "step": 56670 }, { "epoch": 2.4, "grad_norm": 3.882242553454759, "learning_rate": 1.1749000593622267e-06, "loss": 0.3585, "step": 56675 }, { "epoch": 2.4, "grad_norm": 4.260700333042391, "learning_rate": 1.174107320788146e-06, "loss": 0.3658, "step": 56680 }, { "epoch": 2.4, "grad_norm": 4.078747061421399, "learning_rate": 1.17331481416667e-06, "loss": 0.356, "step": 56685 }, { "epoch": 2.4, "grad_norm": 3.9670077186311135, "learning_rate": 1.172522539545845e-06, "loss": 0.3635, "step": 56690 }, { "epoch": 2.4, "grad_norm": 4.023438981837194, "learning_rate": 1.1717304969737031e-06, "loss": 0.3508, "step": 56695 }, { "epoch": 2.4, "grad_norm": 3.823090998694218, "learning_rate": 1.1709386864982646e-06, "loss": 0.3637, "step": 56700 }, { "epoch": 2.4, "grad_norm": 3.854170596248708, "learning_rate": 1.170147108167536e-06, "loss": 0.3736, "step": 56705 }, { "epoch": 2.4, "grad_norm": 3.654391944424784, "learning_rate": 1.1693557620295053e-06, "loss": 0.345, "step": 56710 }, { "epoch": 2.4, "grad_norm": 4.238560403086151, "learning_rate": 1.1685646481321515e-06, "loss": 0.3376, "step": 56715 }, { "epoch": 2.4, "grad_norm": 4.158361449649931, "learning_rate": 1.1677737665234373e-06, "loss": 0.3488, "step": 56720 }, { "epoch": 2.4, "grad_norm": 4.544770183822335, "learning_rate": 1.166983117251313e-06, "loss": 0.3585, "step": 56725 }, { "epoch": 2.4, "grad_norm": 4.6140877954686355, "learning_rate": 1.1661927003637097e-06, "loss": 0.3253, "step": 56730 }, { "epoch": 2.4, "grad_norm": 3.6973941741159484, "learning_rate": 1.1654025159085508e-06, "loss": 0.3446, "step": 56735 }, { "epoch": 2.4, "grad_norm": 5.437665175818403, "learning_rate": 1.1646125639337435e-06, "loss": 0.3398, "step": 56740 }, { "epoch": 2.4, "grad_norm": 4.2111065634403655, "learning_rate": 1.1638228444871774e-06, "loss": 0.3508, "step": 56745 }, { "epoch": 2.4, "grad_norm": 4.589798529588758, "learning_rate": 1.1630333576167324e-06, "loss": 0.3353, "step": 56750 }, { "epoch": 2.4, "grad_norm": 3.503344616811239, "learning_rate": 1.1622441033702742e-06, "loss": 0.3392, "step": 56755 }, { "epoch": 2.4, "grad_norm": 4.7342863826040205, "learning_rate": 1.1614550817956498e-06, "loss": 0.3568, "step": 56760 }, { "epoch": 2.4, "grad_norm": 4.4872082940221, "learning_rate": 1.1606662929406975e-06, "loss": 0.3531, "step": 56765 }, { "epoch": 2.4, "grad_norm": 4.354552228237745, "learning_rate": 1.1598777368532398e-06, "loss": 0.3226, "step": 56770 }, { "epoch": 2.4, "grad_norm": 3.7670790249664683, "learning_rate": 1.159089413581082e-06, "loss": 0.3539, "step": 56775 }, { "epoch": 2.4, "grad_norm": 4.022191338765902, "learning_rate": 1.1583013231720197e-06, "loss": 0.3609, "step": 56780 }, { "epoch": 2.4, "grad_norm": 4.467486150618449, "learning_rate": 1.1575134656738318e-06, "loss": 0.3562, "step": 56785 }, { "epoch": 2.4, "grad_norm": 4.626950937858999, "learning_rate": 1.1567258411342851e-06, "loss": 0.3273, "step": 56790 }, { "epoch": 2.4, "grad_norm": 3.840100248745007, "learning_rate": 1.1559384496011294e-06, "loss": 0.313, "step": 56795 }, { "epoch": 2.4, "grad_norm": 4.612093788714251, "learning_rate": 1.155151291122102e-06, "loss": 0.3399, "step": 56800 }, { "epoch": 2.4, "grad_norm": 4.945557251560856, "learning_rate": 1.1543643657449277e-06, "loss": 0.3526, "step": 56805 }, { "epoch": 2.4, "grad_norm": 4.4700650585877755, "learning_rate": 1.1535776735173127e-06, "loss": 0.3461, "step": 56810 }, { "epoch": 2.4, "grad_norm": 3.9393605197990986, "learning_rate": 1.1527912144869536e-06, "loss": 0.3218, "step": 56815 }, { "epoch": 2.4, "grad_norm": 5.3111464394638555, "learning_rate": 1.1520049887015322e-06, "loss": 0.353, "step": 56820 }, { "epoch": 2.4, "grad_norm": 4.342555915373743, "learning_rate": 1.1512189962087123e-06, "loss": 0.3217, "step": 56825 }, { "epoch": 2.41, "grad_norm": 4.864373750013248, "learning_rate": 1.1504332370561477e-06, "loss": 0.3392, "step": 56830 }, { "epoch": 2.41, "grad_norm": 4.806553216063425, "learning_rate": 1.1496477112914767e-06, "loss": 0.3553, "step": 56835 }, { "epoch": 2.41, "grad_norm": 3.854469413987072, "learning_rate": 1.1488624189623249e-06, "loss": 0.3476, "step": 56840 }, { "epoch": 2.41, "grad_norm": 4.275119593561502, "learning_rate": 1.1480773601162992e-06, "loss": 0.3511, "step": 56845 }, { "epoch": 2.41, "grad_norm": 3.970161339987659, "learning_rate": 1.1472925348009984e-06, "loss": 0.3305, "step": 56850 }, { "epoch": 2.41, "grad_norm": 4.142120702832089, "learning_rate": 1.1465079430640008e-06, "loss": 0.3397, "step": 56855 }, { "epoch": 2.41, "grad_norm": 4.017284552602058, "learning_rate": 1.145723584952878e-06, "loss": 0.3456, "step": 56860 }, { "epoch": 2.41, "grad_norm": 3.6748169488463605, "learning_rate": 1.1449394605151792e-06, "loss": 0.3224, "step": 56865 }, { "epoch": 2.41, "grad_norm": 3.542752211364065, "learning_rate": 1.1441555697984453e-06, "loss": 0.3047, "step": 56870 }, { "epoch": 2.41, "grad_norm": 4.369654380259122, "learning_rate": 1.143371912850203e-06, "loss": 0.3082, "step": 56875 }, { "epoch": 2.41, "grad_norm": 4.897751019700783, "learning_rate": 1.1425884897179607e-06, "loss": 0.3325, "step": 56880 }, { "epoch": 2.41, "grad_norm": 4.472942981051483, "learning_rate": 1.1418053004492152e-06, "loss": 0.3615, "step": 56885 }, { "epoch": 2.41, "grad_norm": 4.615938007710609, "learning_rate": 1.1410223450914514e-06, "loss": 0.3681, "step": 56890 }, { "epoch": 2.41, "grad_norm": 4.307134665168528, "learning_rate": 1.1402396236921343e-06, "loss": 0.3337, "step": 56895 }, { "epoch": 2.41, "grad_norm": 5.7385537536736795, "learning_rate": 1.1394571362987195e-06, "loss": 0.3574, "step": 56900 }, { "epoch": 2.41, "grad_norm": 4.496709311583137, "learning_rate": 1.1386748829586468e-06, "loss": 0.3265, "step": 56905 }, { "epoch": 2.41, "grad_norm": 3.9961132678714324, "learning_rate": 1.1378928637193438e-06, "loss": 0.3483, "step": 56910 }, { "epoch": 2.41, "grad_norm": 5.167455808327841, "learning_rate": 1.1371110786282185e-06, "loss": 0.3259, "step": 56915 }, { "epoch": 2.41, "grad_norm": 3.6139097669997216, "learning_rate": 1.1363295277326703e-06, "loss": 0.324, "step": 56920 }, { "epoch": 2.41, "grad_norm": 4.175104134970415, "learning_rate": 1.135548211080083e-06, "loss": 0.3471, "step": 56925 }, { "epoch": 2.41, "grad_norm": 4.055510692413433, "learning_rate": 1.1347671287178235e-06, "loss": 0.3219, "step": 56930 }, { "epoch": 2.41, "grad_norm": 4.069856583483291, "learning_rate": 1.1339862806932478e-06, "loss": 0.3349, "step": 56935 }, { "epoch": 2.41, "grad_norm": 3.8233800481673206, "learning_rate": 1.1332056670536974e-06, "loss": 0.339, "step": 56940 }, { "epoch": 2.41, "grad_norm": 3.9253397380128967, "learning_rate": 1.1324252878464958e-06, "loss": 0.329, "step": 56945 }, { "epoch": 2.41, "grad_norm": 4.634972787058376, "learning_rate": 1.1316451431189567e-06, "loss": 0.3455, "step": 56950 }, { "epoch": 2.41, "grad_norm": 4.200900944922554, "learning_rate": 1.1308652329183778e-06, "loss": 0.3607, "step": 56955 }, { "epoch": 2.41, "grad_norm": 3.918487083661899, "learning_rate": 1.1300855572920445e-06, "loss": 0.3495, "step": 56960 }, { "epoch": 2.41, "grad_norm": 4.113599656671094, "learning_rate": 1.1293061162872232e-06, "loss": 0.3224, "step": 56965 }, { "epoch": 2.41, "grad_norm": 4.549136236937687, "learning_rate": 1.128526909951171e-06, "loss": 0.3433, "step": 56970 }, { "epoch": 2.41, "grad_norm": 4.150902529307994, "learning_rate": 1.127747938331129e-06, "loss": 0.3144, "step": 56975 }, { "epoch": 2.41, "grad_norm": 4.039845476152868, "learning_rate": 1.1269692014743227e-06, "loss": 0.3368, "step": 56980 }, { "epoch": 2.41, "grad_norm": 3.6695552906140594, "learning_rate": 1.1261906994279652e-06, "loss": 0.3142, "step": 56985 }, { "epoch": 2.41, "grad_norm": 4.881461509640357, "learning_rate": 1.1254124322392562e-06, "loss": 0.378, "step": 56990 }, { "epoch": 2.41, "grad_norm": 4.350278752790158, "learning_rate": 1.124634399955377e-06, "loss": 0.3489, "step": 56995 }, { "epoch": 2.41, "grad_norm": 4.156181304261934, "learning_rate": 1.1238566026234993e-06, "loss": 0.3305, "step": 57000 }, { "epoch": 2.41, "grad_norm": 4.019306395727137, "learning_rate": 1.1230790402907776e-06, "loss": 0.3508, "step": 57005 }, { "epoch": 2.41, "grad_norm": 4.168698725812384, "learning_rate": 1.1223017130043574e-06, "loss": 0.3589, "step": 57010 }, { "epoch": 2.41, "grad_norm": 4.554145242819516, "learning_rate": 1.1215246208113584e-06, "loss": 0.3187, "step": 57015 }, { "epoch": 2.41, "grad_norm": 4.451892932916253, "learning_rate": 1.1207477637588982e-06, "loss": 0.3697, "step": 57020 }, { "epoch": 2.41, "grad_norm": 3.9727925653567415, "learning_rate": 1.1199711418940739e-06, "loss": 0.3418, "step": 57025 }, { "epoch": 2.41, "grad_norm": 3.6742176836623375, "learning_rate": 1.1191947552639714e-06, "loss": 0.3522, "step": 57030 }, { "epoch": 2.41, "grad_norm": 3.9566178267034964, "learning_rate": 1.1184186039156585e-06, "loss": 0.3449, "step": 57035 }, { "epoch": 2.41, "grad_norm": 3.6715870793680265, "learning_rate": 1.117642687896192e-06, "loss": 0.3304, "step": 57040 }, { "epoch": 2.41, "grad_norm": 4.299037989487199, "learning_rate": 1.116867007252615e-06, "loss": 0.3465, "step": 57045 }, { "epoch": 2.41, "grad_norm": 5.842260670438582, "learning_rate": 1.1160915620319519e-06, "loss": 0.3149, "step": 57050 }, { "epoch": 2.41, "grad_norm": 4.327095322186797, "learning_rate": 1.1153163522812166e-06, "loss": 0.3645, "step": 57055 }, { "epoch": 2.41, "grad_norm": 4.009791496917735, "learning_rate": 1.1145413780474102e-06, "loss": 0.3038, "step": 57060 }, { "epoch": 2.42, "grad_norm": 4.219170096938655, "learning_rate": 1.113766639377513e-06, "loss": 0.3463, "step": 57065 }, { "epoch": 2.42, "grad_norm": 4.17106153268378, "learning_rate": 1.1129921363184976e-06, "loss": 0.3369, "step": 57070 }, { "epoch": 2.42, "grad_norm": 5.176857000090576, "learning_rate": 1.11221786891732e-06, "loss": 0.3354, "step": 57075 }, { "epoch": 2.42, "grad_norm": 4.39145399245273, "learning_rate": 1.1114438372209208e-06, "loss": 0.346, "step": 57080 }, { "epoch": 2.42, "grad_norm": 4.798230384768873, "learning_rate": 1.1106700412762273e-06, "loss": 0.3407, "step": 57085 }, { "epoch": 2.42, "grad_norm": 4.676647171519071, "learning_rate": 1.1098964811301528e-06, "loss": 0.3621, "step": 57090 }, { "epoch": 2.42, "grad_norm": 4.095255275659979, "learning_rate": 1.1091231568295974e-06, "loss": 0.3391, "step": 57095 }, { "epoch": 2.42, "grad_norm": 4.207240671239678, "learning_rate": 1.108350068421442e-06, "loss": 0.341, "step": 57100 }, { "epoch": 2.42, "grad_norm": 5.349139686535518, "learning_rate": 1.1075772159525593e-06, "loss": 0.3391, "step": 57105 }, { "epoch": 2.42, "grad_norm": 3.674538039444013, "learning_rate": 1.1068045994698062e-06, "loss": 0.3356, "step": 57110 }, { "epoch": 2.42, "grad_norm": 3.82336304763771, "learning_rate": 1.1060322190200206e-06, "loss": 0.3316, "step": 57115 }, { "epoch": 2.42, "grad_norm": 4.14540455272532, "learning_rate": 1.105260074650032e-06, "loss": 0.3588, "step": 57120 }, { "epoch": 2.42, "grad_norm": 3.9799566895007277, "learning_rate": 1.1044881664066538e-06, "loss": 0.3468, "step": 57125 }, { "epoch": 2.42, "grad_norm": 4.323147081653709, "learning_rate": 1.1037164943366819e-06, "loss": 0.3781, "step": 57130 }, { "epoch": 2.42, "grad_norm": 4.177913963804417, "learning_rate": 1.1029450584869017e-06, "loss": 0.3465, "step": 57135 }, { "epoch": 2.42, "grad_norm": 3.943590613600536, "learning_rate": 1.1021738589040843e-06, "loss": 0.3086, "step": 57140 }, { "epoch": 2.42, "grad_norm": 4.5844919474233095, "learning_rate": 1.1014028956349854e-06, "loss": 0.3309, "step": 57145 }, { "epoch": 2.42, "grad_norm": 4.505840638835024, "learning_rate": 1.1006321687263438e-06, "loss": 0.3451, "step": 57150 }, { "epoch": 2.42, "grad_norm": 3.920962168656925, "learning_rate": 1.0998616782248878e-06, "loss": 0.3523, "step": 57155 }, { "epoch": 2.42, "grad_norm": 4.278549877096805, "learning_rate": 1.0990914241773314e-06, "loss": 0.3304, "step": 57160 }, { "epoch": 2.42, "grad_norm": 4.279815723608131, "learning_rate": 1.0983214066303705e-06, "loss": 0.3192, "step": 57165 }, { "epoch": 2.42, "grad_norm": 5.332700203661917, "learning_rate": 1.0975516256306913e-06, "loss": 0.3359, "step": 57170 }, { "epoch": 2.42, "grad_norm": 4.058974227370347, "learning_rate": 1.0967820812249602e-06, "loss": 0.3475, "step": 57175 }, { "epoch": 2.42, "grad_norm": 3.920267009674776, "learning_rate": 1.0960127734598358e-06, "loss": 0.3496, "step": 57180 }, { "epoch": 2.42, "grad_norm": 3.950268933823067, "learning_rate": 1.0952437023819556e-06, "loss": 0.3415, "step": 57185 }, { "epoch": 2.42, "grad_norm": 4.303089897269296, "learning_rate": 1.0944748680379485e-06, "loss": 0.3223, "step": 57190 }, { "epoch": 2.42, "grad_norm": 4.522371916364081, "learning_rate": 1.0937062704744273e-06, "loss": 0.3361, "step": 57195 }, { "epoch": 2.42, "grad_norm": 4.1440315944873385, "learning_rate": 1.0929379097379867e-06, "loss": 0.3302, "step": 57200 }, { "epoch": 2.42, "grad_norm": 3.4479956726598364, "learning_rate": 1.0921697858752123e-06, "loss": 0.3173, "step": 57205 }, { "epoch": 2.42, "grad_norm": 3.9852513369908555, "learning_rate": 1.091401898932673e-06, "loss": 0.3142, "step": 57210 }, { "epoch": 2.42, "grad_norm": 4.6148319317635105, "learning_rate": 1.090634248956925e-06, "loss": 0.3568, "step": 57215 }, { "epoch": 2.42, "grad_norm": 4.153989987777079, "learning_rate": 1.0898668359945052e-06, "loss": 0.3581, "step": 57220 }, { "epoch": 2.42, "grad_norm": 4.043643162056907, "learning_rate": 1.0890996600919417e-06, "loss": 0.3175, "step": 57225 }, { "epoch": 2.42, "grad_norm": 4.769632091804468, "learning_rate": 1.0883327212957473e-06, "loss": 0.3437, "step": 57230 }, { "epoch": 2.42, "grad_norm": 4.1185154700071855, "learning_rate": 1.0875660196524162e-06, "loss": 0.3705, "step": 57235 }, { "epoch": 2.42, "grad_norm": 3.8860089998557124, "learning_rate": 1.086799555208433e-06, "loss": 0.343, "step": 57240 }, { "epoch": 2.42, "grad_norm": 4.422458694447115, "learning_rate": 1.0860333280102676e-06, "loss": 0.337, "step": 57245 }, { "epoch": 2.42, "grad_norm": 4.081317646390591, "learning_rate": 1.085267338104371e-06, "loss": 0.3443, "step": 57250 }, { "epoch": 2.42, "grad_norm": 4.629507468223407, "learning_rate": 1.0845015855371844e-06, "loss": 0.366, "step": 57255 }, { "epoch": 2.42, "grad_norm": 7.457854345957066, "learning_rate": 1.083736070355133e-06, "loss": 0.3448, "step": 57260 }, { "epoch": 2.42, "grad_norm": 4.645145445076018, "learning_rate": 1.08297079260463e-06, "loss": 0.3301, "step": 57265 }, { "epoch": 2.42, "grad_norm": 3.4120901532230996, "learning_rate": 1.0822057523320677e-06, "loss": 0.3042, "step": 57270 }, { "epoch": 2.42, "grad_norm": 4.314199996264397, "learning_rate": 1.0814409495838314e-06, "loss": 0.3383, "step": 57275 }, { "epoch": 2.42, "grad_norm": 3.847019796796359, "learning_rate": 1.0806763844062884e-06, "loss": 0.3256, "step": 57280 }, { "epoch": 2.42, "grad_norm": 4.168166630142644, "learning_rate": 1.0799120568457899e-06, "loss": 0.3228, "step": 57285 }, { "epoch": 2.42, "grad_norm": 5.122705712166449, "learning_rate": 1.079147966948677e-06, "loss": 0.3518, "step": 57290 }, { "epoch": 2.42, "grad_norm": 4.3011995455462815, "learning_rate": 1.078384114761275e-06, "loss": 0.3421, "step": 57295 }, { "epoch": 2.43, "grad_norm": 3.8623773092730533, "learning_rate": 1.0776205003298906e-06, "loss": 0.3476, "step": 57300 }, { "epoch": 2.43, "grad_norm": 5.079811980004402, "learning_rate": 1.076857123700822e-06, "loss": 0.3434, "step": 57305 }, { "epoch": 2.43, "grad_norm": 4.671007921855317, "learning_rate": 1.0760939849203512e-06, "loss": 0.366, "step": 57310 }, { "epoch": 2.43, "grad_norm": 3.971629722778755, "learning_rate": 1.0753310840347425e-06, "loss": 0.2955, "step": 57315 }, { "epoch": 2.43, "grad_norm": 4.056073541879393, "learning_rate": 1.0745684210902503e-06, "loss": 0.327, "step": 57320 }, { "epoch": 2.43, "grad_norm": 4.010705608054268, "learning_rate": 1.073805996133111e-06, "loss": 0.3525, "step": 57325 }, { "epoch": 2.43, "grad_norm": 4.006245451207343, "learning_rate": 1.0730438092095512e-06, "loss": 0.3459, "step": 57330 }, { "epoch": 2.43, "grad_norm": 3.833995367390576, "learning_rate": 1.0722818603657776e-06, "loss": 0.3537, "step": 57335 }, { "epoch": 2.43, "grad_norm": 4.18623663348344, "learning_rate": 1.0715201496479844e-06, "loss": 0.3384, "step": 57340 }, { "epoch": 2.43, "grad_norm": 4.031412104001203, "learning_rate": 1.0707586771023526e-06, "loss": 0.3422, "step": 57345 }, { "epoch": 2.43, "grad_norm": 4.10377783078606, "learning_rate": 1.0699974427750493e-06, "loss": 0.3578, "step": 57350 }, { "epoch": 2.43, "grad_norm": 3.8387256636012395, "learning_rate": 1.069236446712224e-06, "loss": 0.3487, "step": 57355 }, { "epoch": 2.43, "grad_norm": 3.8569554352473374, "learning_rate": 1.068475688960014e-06, "loss": 0.312, "step": 57360 }, { "epoch": 2.43, "grad_norm": 3.9106425919117127, "learning_rate": 1.0677151695645443e-06, "loss": 0.3204, "step": 57365 }, { "epoch": 2.43, "grad_norm": 5.623182944435181, "learning_rate": 1.066954888571919e-06, "loss": 0.3383, "step": 57370 }, { "epoch": 2.43, "grad_norm": 6.396277386889992, "learning_rate": 1.066194846028234e-06, "loss": 0.3266, "step": 57375 }, { "epoch": 2.43, "grad_norm": 3.708517370015805, "learning_rate": 1.065435041979569e-06, "loss": 0.307, "step": 57380 }, { "epoch": 2.43, "grad_norm": 3.776082362017705, "learning_rate": 1.0646754764719868e-06, "loss": 0.3255, "step": 57385 }, { "epoch": 2.43, "grad_norm": 4.651993225058832, "learning_rate": 1.0639161495515382e-06, "loss": 0.3476, "step": 57390 }, { "epoch": 2.43, "grad_norm": 4.041922501312812, "learning_rate": 1.06315706126426e-06, "loss": 0.3297, "step": 57395 }, { "epoch": 2.43, "grad_norm": 4.05497325401343, "learning_rate": 1.0623982116561737e-06, "loss": 0.3708, "step": 57400 }, { "epoch": 2.43, "grad_norm": 4.1239151536547904, "learning_rate": 1.0616396007732837e-06, "loss": 0.3367, "step": 57405 }, { "epoch": 2.43, "grad_norm": 4.176437728071655, "learning_rate": 1.0608812286615844e-06, "loss": 0.3412, "step": 57410 }, { "epoch": 2.43, "grad_norm": 3.8292579727100775, "learning_rate": 1.0601230953670544e-06, "loss": 0.3287, "step": 57415 }, { "epoch": 2.43, "grad_norm": 4.018676745502739, "learning_rate": 1.0593652009356541e-06, "loss": 0.3396, "step": 57420 }, { "epoch": 2.43, "grad_norm": 3.757641193806856, "learning_rate": 1.0586075454133344e-06, "loss": 0.3205, "step": 57425 }, { "epoch": 2.43, "grad_norm": 3.854202971176708, "learning_rate": 1.0578501288460307e-06, "loss": 0.327, "step": 57430 }, { "epoch": 2.43, "grad_norm": 5.048296795232814, "learning_rate": 1.0570929512796608e-06, "loss": 0.3136, "step": 57435 }, { "epoch": 2.43, "grad_norm": 4.4253108170231386, "learning_rate": 1.0563360127601307e-06, "loss": 0.3087, "step": 57440 }, { "epoch": 2.43, "grad_norm": 4.344310223586198, "learning_rate": 1.0555793133333313e-06, "loss": 0.3226, "step": 57445 }, { "epoch": 2.43, "grad_norm": 5.147417267918798, "learning_rate": 1.0548228530451416e-06, "loss": 0.3477, "step": 57450 }, { "epoch": 2.43, "grad_norm": 4.442360431712468, "learning_rate": 1.0540666319414195e-06, "loss": 0.3122, "step": 57455 }, { "epoch": 2.43, "grad_norm": 4.676397447636436, "learning_rate": 1.0533106500680147e-06, "loss": 0.3635, "step": 57460 }, { "epoch": 2.43, "grad_norm": 4.754240180536058, "learning_rate": 1.0525549074707613e-06, "loss": 0.3475, "step": 57465 }, { "epoch": 2.43, "grad_norm": 4.002888257892067, "learning_rate": 1.0517994041954742e-06, "loss": 0.3425, "step": 57470 }, { "epoch": 2.43, "grad_norm": 4.301489663317671, "learning_rate": 1.05104414028796e-06, "loss": 0.3327, "step": 57475 }, { "epoch": 2.43, "grad_norm": 4.011149077297497, "learning_rate": 1.0502891157940092e-06, "loss": 0.3588, "step": 57480 }, { "epoch": 2.43, "grad_norm": 4.696351906337958, "learning_rate": 1.049534330759393e-06, "loss": 0.3518, "step": 57485 }, { "epoch": 2.43, "grad_norm": 4.04155955519014, "learning_rate": 1.0487797852298742e-06, "loss": 0.3415, "step": 57490 }, { "epoch": 2.43, "grad_norm": 4.975640996381281, "learning_rate": 1.0480254792511996e-06, "loss": 0.3081, "step": 57495 }, { "epoch": 2.43, "grad_norm": 3.7245848930658427, "learning_rate": 1.0472714128690992e-06, "loss": 0.3249, "step": 57500 }, { "epoch": 2.43, "grad_norm": 3.7477953543032463, "learning_rate": 1.0465175861292887e-06, "loss": 0.3507, "step": 57505 }, { "epoch": 2.43, "grad_norm": 4.0224121271880255, "learning_rate": 1.0457639990774715e-06, "loss": 0.3422, "step": 57510 }, { "epoch": 2.43, "grad_norm": 4.721288576188212, "learning_rate": 1.0450106517593355e-06, "loss": 0.343, "step": 57515 }, { "epoch": 2.43, "grad_norm": 4.158340769514438, "learning_rate": 1.0442575442205556e-06, "loss": 0.32, "step": 57520 }, { "epoch": 2.43, "grad_norm": 10.097333264311956, "learning_rate": 1.0435046765067869e-06, "loss": 0.3354, "step": 57525 }, { "epoch": 2.43, "grad_norm": 3.88933683718144, "learning_rate": 1.0427520486636754e-06, "loss": 0.3374, "step": 57530 }, { "epoch": 2.43, "grad_norm": 5.088273658952878, "learning_rate": 1.0419996607368521e-06, "loss": 0.3453, "step": 57535 }, { "epoch": 2.44, "grad_norm": 4.142753100821465, "learning_rate": 1.0412475127719296e-06, "loss": 0.288, "step": 57540 }, { "epoch": 2.44, "grad_norm": 4.08556700288829, "learning_rate": 1.0404956048145094e-06, "loss": 0.3279, "step": 57545 }, { "epoch": 2.44, "grad_norm": 4.286816117138864, "learning_rate": 1.0397439369101791e-06, "loss": 0.3048, "step": 57550 }, { "epoch": 2.44, "grad_norm": 4.164524582619652, "learning_rate": 1.0389925091045073e-06, "loss": 0.3646, "step": 57555 }, { "epoch": 2.44, "grad_norm": 3.794924632062213, "learning_rate": 1.0382413214430515e-06, "loss": 0.338, "step": 57560 }, { "epoch": 2.44, "grad_norm": 3.899638909029547, "learning_rate": 1.0374903739713549e-06, "loss": 0.324, "step": 57565 }, { "epoch": 2.44, "grad_norm": 4.109486744421262, "learning_rate": 1.0367396667349466e-06, "loss": 0.356, "step": 57570 }, { "epoch": 2.44, "grad_norm": 4.160693008928198, "learning_rate": 1.035989199779337e-06, "loss": 0.3433, "step": 57575 }, { "epoch": 2.44, "grad_norm": 4.24856131113691, "learning_rate": 1.035238973150025e-06, "loss": 0.3618, "step": 57580 }, { "epoch": 2.44, "grad_norm": 4.078613817089558, "learning_rate": 1.0344889868924973e-06, "loss": 0.3472, "step": 57585 }, { "epoch": 2.44, "grad_norm": 3.726913556723607, "learning_rate": 1.0337392410522207e-06, "loss": 0.3559, "step": 57590 }, { "epoch": 2.44, "grad_norm": 3.598044912764058, "learning_rate": 1.03298973567465e-06, "loss": 0.3411, "step": 57595 }, { "epoch": 2.44, "grad_norm": 3.728965349914194, "learning_rate": 1.032240470805228e-06, "loss": 0.3453, "step": 57600 }, { "epoch": 2.44, "grad_norm": 3.7912803911807837, "learning_rate": 1.0314914464893776e-06, "loss": 0.359, "step": 57605 }, { "epoch": 2.44, "grad_norm": 3.9649811004033784, "learning_rate": 1.0307426627725115e-06, "loss": 0.3065, "step": 57610 }, { "epoch": 2.44, "grad_norm": 4.998160224698893, "learning_rate": 1.0299941197000268e-06, "loss": 0.3439, "step": 57615 }, { "epoch": 2.44, "grad_norm": 4.157528765418892, "learning_rate": 1.0292458173173037e-06, "loss": 0.3434, "step": 57620 }, { "epoch": 2.44, "grad_norm": 4.126924245463731, "learning_rate": 1.0284977556697102e-06, "loss": 0.3657, "step": 57625 }, { "epoch": 2.44, "grad_norm": 3.3799710129198606, "learning_rate": 1.0277499348025993e-06, "loss": 0.3178, "step": 57630 }, { "epoch": 2.44, "grad_norm": 4.198746943937231, "learning_rate": 1.0270023547613106e-06, "loss": 0.3293, "step": 57635 }, { "epoch": 2.44, "grad_norm": 3.9167831767502874, "learning_rate": 1.0262550155911648e-06, "loss": 0.3106, "step": 57640 }, { "epoch": 2.44, "grad_norm": 4.114667366858954, "learning_rate": 1.0255079173374727e-06, "loss": 0.3523, "step": 57645 }, { "epoch": 2.44, "grad_norm": 4.158913337625498, "learning_rate": 1.0247610600455294e-06, "loss": 0.3596, "step": 57650 }, { "epoch": 2.44, "grad_norm": 4.176295388025411, "learning_rate": 1.024014443760612e-06, "loss": 0.3534, "step": 57655 }, { "epoch": 2.44, "grad_norm": 3.934322932853985, "learning_rate": 1.0232680685279889e-06, "loss": 0.3221, "step": 57660 }, { "epoch": 2.44, "grad_norm": 3.956755526343446, "learning_rate": 1.0225219343929078e-06, "loss": 0.3128, "step": 57665 }, { "epoch": 2.44, "grad_norm": 7.917637726100076, "learning_rate": 1.0217760414006067e-06, "loss": 0.3568, "step": 57670 }, { "epoch": 2.44, "grad_norm": 4.308814510640868, "learning_rate": 1.0210303895963047e-06, "loss": 0.3322, "step": 57675 }, { "epoch": 2.44, "grad_norm": 4.201381113507919, "learning_rate": 1.02028497902521e-06, "loss": 0.3514, "step": 57680 }, { "epoch": 2.44, "grad_norm": 3.788767981748645, "learning_rate": 1.0195398097325154e-06, "loss": 0.3315, "step": 57685 }, { "epoch": 2.44, "grad_norm": 4.390397389469206, "learning_rate": 1.0187948817633963e-06, "loss": 0.3234, "step": 57690 }, { "epoch": 2.44, "grad_norm": 4.030540324968917, "learning_rate": 1.0180501951630163e-06, "loss": 0.3272, "step": 57695 }, { "epoch": 2.44, "grad_norm": 4.186967476322787, "learning_rate": 1.0173057499765237e-06, "loss": 0.313, "step": 57700 }, { "epoch": 2.44, "grad_norm": 4.7722700856943385, "learning_rate": 1.0165615462490535e-06, "loss": 0.3264, "step": 57705 }, { "epoch": 2.44, "grad_norm": 4.712247172111406, "learning_rate": 1.0158175840257218e-06, "loss": 0.3365, "step": 57710 }, { "epoch": 2.44, "grad_norm": 4.216025199584915, "learning_rate": 1.0150738633516345e-06, "loss": 0.3357, "step": 57715 }, { "epoch": 2.44, "grad_norm": 5.912224589842646, "learning_rate": 1.0143303842718816e-06, "loss": 0.3367, "step": 57720 }, { "epoch": 2.44, "grad_norm": 3.683108902872549, "learning_rate": 1.0135871468315361e-06, "loss": 0.328, "step": 57725 }, { "epoch": 2.44, "grad_norm": 3.9188250624071963, "learning_rate": 1.0128441510756598e-06, "loss": 0.3188, "step": 57730 }, { "epoch": 2.44, "grad_norm": 4.380345490609457, "learning_rate": 1.0121013970492993e-06, "loss": 0.3269, "step": 57735 }, { "epoch": 2.44, "grad_norm": 4.2589191332529905, "learning_rate": 1.0113588847974831e-06, "loss": 0.3494, "step": 57740 }, { "epoch": 2.44, "grad_norm": 4.363706637948588, "learning_rate": 1.010616614365229e-06, "loss": 0.3336, "step": 57745 }, { "epoch": 2.44, "grad_norm": 3.8149506810636016, "learning_rate": 1.0098745857975377e-06, "loss": 0.3549, "step": 57750 }, { "epoch": 2.44, "grad_norm": 3.5876270672835022, "learning_rate": 1.009132799139399e-06, "loss": 0.3352, "step": 57755 }, { "epoch": 2.44, "grad_norm": 4.863643630747743, "learning_rate": 1.0083912544357815e-06, "loss": 0.3339, "step": 57760 }, { "epoch": 2.44, "grad_norm": 4.1185300834446155, "learning_rate": 1.0076499517316452e-06, "loss": 0.3359, "step": 57765 }, { "epoch": 2.44, "grad_norm": 4.494898866158834, "learning_rate": 1.0069088910719332e-06, "loss": 0.3536, "step": 57770 }, { "epoch": 2.45, "grad_norm": 3.81819663192064, "learning_rate": 1.0061680725015716e-06, "loss": 0.329, "step": 57775 }, { "epoch": 2.45, "grad_norm": 5.106686848080117, "learning_rate": 1.0054274960654758e-06, "loss": 0.353, "step": 57780 }, { "epoch": 2.45, "grad_norm": 4.540619469094639, "learning_rate": 1.0046871618085463e-06, "loss": 0.3633, "step": 57785 }, { "epoch": 2.45, "grad_norm": 4.170156793140971, "learning_rate": 1.0039470697756636e-06, "loss": 0.337, "step": 57790 }, { "epoch": 2.45, "grad_norm": 3.783237611291634, "learning_rate": 1.0032072200116993e-06, "loss": 0.338, "step": 57795 }, { "epoch": 2.45, "grad_norm": 3.841812060669276, "learning_rate": 1.0024676125615091e-06, "loss": 0.3297, "step": 57800 }, { "epoch": 2.45, "grad_norm": 4.307104071591766, "learning_rate": 1.001728247469933e-06, "loss": 0.3347, "step": 57805 }, { "epoch": 2.45, "grad_norm": 4.672964346078582, "learning_rate": 1.0009891247817949e-06, "loss": 0.3353, "step": 57810 }, { "epoch": 2.45, "grad_norm": 5.529553607824022, "learning_rate": 1.0002502445419066e-06, "loss": 0.3676, "step": 57815 }, { "epoch": 2.45, "grad_norm": 4.589727959893476, "learning_rate": 9.995116067950654e-07, "loss": 0.3516, "step": 57820 }, { "epoch": 2.45, "grad_norm": 5.051004716091924, "learning_rate": 9.987732115860521e-07, "loss": 0.3179, "step": 57825 }, { "epoch": 2.45, "grad_norm": 4.015515864329743, "learning_rate": 9.980350589596315e-07, "loss": 0.3207, "step": 57830 }, { "epoch": 2.45, "grad_norm": 4.111480734761745, "learning_rate": 9.972971489605572e-07, "loss": 0.3558, "step": 57835 }, { "epoch": 2.45, "grad_norm": 4.34980478291329, "learning_rate": 9.965594816335677e-07, "loss": 0.3442, "step": 57840 }, { "epoch": 2.45, "grad_norm": 4.030695302400033, "learning_rate": 9.95822057023383e-07, "loss": 0.3362, "step": 57845 }, { "epoch": 2.45, "grad_norm": 4.0218469246048825, "learning_rate": 9.950848751747123e-07, "loss": 0.3321, "step": 57850 }, { "epoch": 2.45, "grad_norm": 5.084027852516597, "learning_rate": 9.9434793613225e-07, "loss": 0.3481, "step": 57855 }, { "epoch": 2.45, "grad_norm": 4.918143296888962, "learning_rate": 9.936112399406717e-07, "loss": 0.3273, "step": 57860 }, { "epoch": 2.45, "grad_norm": 3.8605849080771644, "learning_rate": 9.928747866446432e-07, "loss": 0.3569, "step": 57865 }, { "epoch": 2.45, "grad_norm": 4.562713887654944, "learning_rate": 9.921385762888126e-07, "loss": 0.3239, "step": 57870 }, { "epoch": 2.45, "grad_norm": 6.868384103138348, "learning_rate": 9.914026089178158e-07, "loss": 0.3381, "step": 57875 }, { "epoch": 2.45, "grad_norm": 5.955612731139122, "learning_rate": 9.906668845762702e-07, "loss": 0.3303, "step": 57880 }, { "epoch": 2.45, "grad_norm": 5.144102524507659, "learning_rate": 9.899314033087816e-07, "loss": 0.3496, "step": 57885 }, { "epoch": 2.45, "grad_norm": 4.104408580151624, "learning_rate": 9.891961651599407e-07, "loss": 0.3294, "step": 57890 }, { "epoch": 2.45, "grad_norm": 4.114335058040739, "learning_rate": 9.884611701743214e-07, "loss": 0.3657, "step": 57895 }, { "epoch": 2.45, "grad_norm": 3.884242465242713, "learning_rate": 9.877264183964847e-07, "loss": 0.332, "step": 57900 }, { "epoch": 2.45, "grad_norm": 4.638497784630729, "learning_rate": 9.869919098709786e-07, "loss": 0.326, "step": 57905 }, { "epoch": 2.45, "grad_norm": 3.9003398205687696, "learning_rate": 9.8625764464233e-07, "loss": 0.3542, "step": 57910 }, { "epoch": 2.45, "grad_norm": 4.503952794337806, "learning_rate": 9.85523622755058e-07, "loss": 0.3478, "step": 57915 }, { "epoch": 2.45, "grad_norm": 3.990727130641454, "learning_rate": 9.847898442536653e-07, "loss": 0.3239, "step": 57920 }, { "epoch": 2.45, "grad_norm": 4.119265451764556, "learning_rate": 9.840563091826356e-07, "loss": 0.3126, "step": 57925 }, { "epoch": 2.45, "grad_norm": 4.10291623220106, "learning_rate": 9.833230175864421e-07, "loss": 0.3438, "step": 57930 }, { "epoch": 2.45, "grad_norm": 4.011869773449787, "learning_rate": 9.82589969509543e-07, "loss": 0.3451, "step": 57935 }, { "epoch": 2.45, "grad_norm": 3.997151637525354, "learning_rate": 9.818571649963815e-07, "loss": 0.3231, "step": 57940 }, { "epoch": 2.45, "grad_norm": 9.557629859936185, "learning_rate": 9.81124604091383e-07, "loss": 0.3356, "step": 57945 }, { "epoch": 2.45, "grad_norm": 3.921715430224236, "learning_rate": 9.803922868389621e-07, "loss": 0.319, "step": 57950 }, { "epoch": 2.45, "grad_norm": 4.2303711254001435, "learning_rate": 9.796602132835175e-07, "loss": 0.3398, "step": 57955 }, { "epoch": 2.45, "grad_norm": 3.6323853353094595, "learning_rate": 9.789283834694314e-07, "loss": 0.2968, "step": 57960 }, { "epoch": 2.45, "grad_norm": 4.311430514944957, "learning_rate": 9.781967974410722e-07, "loss": 0.302, "step": 57965 }, { "epoch": 2.45, "grad_norm": 5.043047224285803, "learning_rate": 9.77465455242796e-07, "loss": 0.3449, "step": 57970 }, { "epoch": 2.45, "grad_norm": 3.9690928373540038, "learning_rate": 9.76734356918939e-07, "loss": 0.3133, "step": 57975 }, { "epoch": 2.45, "grad_norm": 4.048784510867277, "learning_rate": 9.760035025138287e-07, "loss": 0.3408, "step": 57980 }, { "epoch": 2.45, "grad_norm": 4.421545900895566, "learning_rate": 9.752728920717714e-07, "loss": 0.3474, "step": 57985 }, { "epoch": 2.45, "grad_norm": 4.014967560969097, "learning_rate": 9.74542525637065e-07, "loss": 0.3394, "step": 57990 }, { "epoch": 2.45, "grad_norm": 4.091823324036924, "learning_rate": 9.738124032539863e-07, "loss": 0.3306, "step": 57995 }, { "epoch": 2.45, "grad_norm": 3.8277986429628736, "learning_rate": 9.730825249668025e-07, "loss": 0.3106, "step": 58000 }, { "epoch": 2.45, "grad_norm": 4.211996454142268, "learning_rate": 9.723528908197627e-07, "loss": 0.3227, "step": 58005 }, { "epoch": 2.46, "grad_norm": 3.7358848196516883, "learning_rate": 9.716235008571057e-07, "loss": 0.3329, "step": 58010 }, { "epoch": 2.46, "grad_norm": 4.022325646444267, "learning_rate": 9.708943551230477e-07, "loss": 0.3216, "step": 58015 }, { "epoch": 2.46, "grad_norm": 4.343900784186914, "learning_rate": 9.701654536617972e-07, "loss": 0.3365, "step": 58020 }, { "epoch": 2.46, "grad_norm": 3.9269734842288497, "learning_rate": 9.69436796517546e-07, "loss": 0.3285, "step": 58025 }, { "epoch": 2.46, "grad_norm": 4.007266108796683, "learning_rate": 9.687083837344686e-07, "loss": 0.3744, "step": 58030 }, { "epoch": 2.46, "grad_norm": 4.3212273849333895, "learning_rate": 9.679802153567269e-07, "loss": 0.334, "step": 58035 }, { "epoch": 2.46, "grad_norm": 3.9806066433569924, "learning_rate": 9.672522914284698e-07, "loss": 0.335, "step": 58040 }, { "epoch": 2.46, "grad_norm": 5.6812938298816436, "learning_rate": 9.665246119938255e-07, "loss": 0.3446, "step": 58045 }, { "epoch": 2.46, "grad_norm": 4.059877876749104, "learning_rate": 9.657971770969132e-07, "loss": 0.3464, "step": 58050 }, { "epoch": 2.46, "grad_norm": 4.710145415616947, "learning_rate": 9.650699867818352e-07, "loss": 0.3226, "step": 58055 }, { "epoch": 2.46, "grad_norm": 4.26468005775309, "learning_rate": 9.643430410926796e-07, "loss": 0.3404, "step": 58060 }, { "epoch": 2.46, "grad_norm": 4.758440218495642, "learning_rate": 9.636163400735171e-07, "loss": 0.3179, "step": 58065 }, { "epoch": 2.46, "grad_norm": 5.176114235529649, "learning_rate": 9.628898837684058e-07, "loss": 0.34, "step": 58070 }, { "epoch": 2.46, "grad_norm": 3.720959094192976, "learning_rate": 9.621636722213911e-07, "loss": 0.3297, "step": 58075 }, { "epoch": 2.46, "grad_norm": 4.198156584241073, "learning_rate": 9.614377054764972e-07, "loss": 0.3628, "step": 58080 }, { "epoch": 2.46, "grad_norm": 3.7058224136965374, "learning_rate": 9.607119835777395e-07, "loss": 0.3496, "step": 58085 }, { "epoch": 2.46, "grad_norm": 4.186234773337296, "learning_rate": 9.599865065691177e-07, "loss": 0.3472, "step": 58090 }, { "epoch": 2.46, "grad_norm": 4.301061000952912, "learning_rate": 9.592612744946122e-07, "loss": 0.323, "step": 58095 }, { "epoch": 2.46, "grad_norm": 3.9779878522924412, "learning_rate": 9.585362873981934e-07, "loss": 0.3483, "step": 58100 }, { "epoch": 2.46, "grad_norm": 4.594277868331492, "learning_rate": 9.578115453238146e-07, "loss": 0.3302, "step": 58105 }, { "epoch": 2.46, "grad_norm": 3.9350733920100303, "learning_rate": 9.570870483154166e-07, "loss": 0.3312, "step": 58110 }, { "epoch": 2.46, "grad_norm": 3.6554657083400315, "learning_rate": 9.563627964169208e-07, "loss": 0.3166, "step": 58115 }, { "epoch": 2.46, "grad_norm": 3.693416019850289, "learning_rate": 9.55638789672238e-07, "loss": 0.3074, "step": 58120 }, { "epoch": 2.46, "grad_norm": 4.463928052541967, "learning_rate": 9.549150281252633e-07, "loss": 0.3326, "step": 58125 }, { "epoch": 2.46, "grad_norm": 5.370003220954758, "learning_rate": 9.54191511819874e-07, "loss": 0.3788, "step": 58130 }, { "epoch": 2.46, "grad_norm": 4.208975837731788, "learning_rate": 9.534682407999363e-07, "loss": 0.3286, "step": 58135 }, { "epoch": 2.46, "grad_norm": 3.772740363982837, "learning_rate": 9.527452151093003e-07, "loss": 0.3068, "step": 58140 }, { "epoch": 2.46, "grad_norm": 4.344465173934658, "learning_rate": 9.520224347918011e-07, "loss": 0.3276, "step": 58145 }, { "epoch": 2.46, "grad_norm": 4.7909641611039735, "learning_rate": 9.512998998912565e-07, "loss": 0.3632, "step": 58150 }, { "epoch": 2.46, "grad_norm": 4.020885442292411, "learning_rate": 9.50577610451473e-07, "loss": 0.3348, "step": 58155 }, { "epoch": 2.46, "grad_norm": 4.2546838652340355, "learning_rate": 9.498555665162423e-07, "loss": 0.3195, "step": 58160 }, { "epoch": 2.46, "grad_norm": 3.8440447611500277, "learning_rate": 9.491337681293372e-07, "loss": 0.3193, "step": 58165 }, { "epoch": 2.46, "grad_norm": 5.114406331294704, "learning_rate": 9.484122153345193e-07, "loss": 0.3688, "step": 58170 }, { "epoch": 2.46, "grad_norm": 3.5736438617384545, "learning_rate": 9.476909081755348e-07, "loss": 0.3284, "step": 58175 }, { "epoch": 2.46, "grad_norm": 4.775883518369523, "learning_rate": 9.469698466961153e-07, "loss": 0.3473, "step": 58180 }, { "epoch": 2.46, "grad_norm": 4.1274448455720165, "learning_rate": 9.462490309399741e-07, "loss": 0.3255, "step": 58185 }, { "epoch": 2.46, "grad_norm": 3.95530638738191, "learning_rate": 9.455284609508131e-07, "loss": 0.3115, "step": 58190 }, { "epoch": 2.46, "grad_norm": 3.895430299413554, "learning_rate": 9.448081367723205e-07, "loss": 0.335, "step": 58195 }, { "epoch": 2.46, "grad_norm": 4.455174541729543, "learning_rate": 9.440880584481637e-07, "loss": 0.3426, "step": 58200 }, { "epoch": 2.46, "grad_norm": 4.070652128340231, "learning_rate": 9.433682260220011e-07, "loss": 0.3334, "step": 58205 }, { "epoch": 2.46, "grad_norm": 4.011371457504796, "learning_rate": 9.426486395374757e-07, "loss": 0.3299, "step": 58210 }, { "epoch": 2.46, "grad_norm": 3.8882679379722207, "learning_rate": 9.419292990382101e-07, "loss": 0.3501, "step": 58215 }, { "epoch": 2.46, "grad_norm": 4.4677649919736595, "learning_rate": 9.412102045678173e-07, "loss": 0.3261, "step": 58220 }, { "epoch": 2.46, "grad_norm": 3.9680582153818484, "learning_rate": 9.404913561698964e-07, "loss": 0.3285, "step": 58225 }, { "epoch": 2.46, "grad_norm": 4.122767508663721, "learning_rate": 9.397727538880247e-07, "loss": 0.3396, "step": 58230 }, { "epoch": 2.46, "grad_norm": 4.1124101755867635, "learning_rate": 9.390543977657718e-07, "loss": 0.323, "step": 58235 }, { "epoch": 2.46, "grad_norm": 4.291380443769415, "learning_rate": 9.383362878466884e-07, "loss": 0.3665, "step": 58240 }, { "epoch": 2.47, "grad_norm": 3.7092245249342137, "learning_rate": 9.376184241743136e-07, "loss": 0.3219, "step": 58245 }, { "epoch": 2.47, "grad_norm": 3.687090341626141, "learning_rate": 9.369008067921659e-07, "loss": 0.3458, "step": 58250 }, { "epoch": 2.47, "grad_norm": 3.973464197846426, "learning_rate": 9.361834357437544e-07, "loss": 0.3368, "step": 58255 }, { "epoch": 2.47, "grad_norm": 5.208980161266249, "learning_rate": 9.35466311072572e-07, "loss": 0.3184, "step": 58260 }, { "epoch": 2.47, "grad_norm": 3.6889042926387217, "learning_rate": 9.34749432822093e-07, "loss": 0.3148, "step": 58265 }, { "epoch": 2.47, "grad_norm": 5.180494058250353, "learning_rate": 9.340328010357819e-07, "loss": 0.3359, "step": 58270 }, { "epoch": 2.47, "grad_norm": 4.893159481283603, "learning_rate": 9.333164157570862e-07, "loss": 0.3124, "step": 58275 }, { "epoch": 2.47, "grad_norm": 5.644463092156722, "learning_rate": 9.326002770294362e-07, "loss": 0.354, "step": 58280 }, { "epoch": 2.47, "grad_norm": 4.666878066985316, "learning_rate": 9.318843848962506e-07, "loss": 0.3507, "step": 58285 }, { "epoch": 2.47, "grad_norm": 5.590830100358635, "learning_rate": 9.311687394009317e-07, "loss": 0.3462, "step": 58290 }, { "epoch": 2.47, "grad_norm": 4.595728307105326, "learning_rate": 9.304533405868682e-07, "loss": 0.3155, "step": 58295 }, { "epoch": 2.47, "grad_norm": 4.620419596737383, "learning_rate": 9.297381884974305e-07, "loss": 0.3509, "step": 58300 }, { "epoch": 2.47, "grad_norm": 3.52902342051602, "learning_rate": 9.290232831759783e-07, "loss": 0.3291, "step": 58305 }, { "epoch": 2.47, "grad_norm": 3.831226808705749, "learning_rate": 9.283086246658512e-07, "loss": 0.3199, "step": 58310 }, { "epoch": 2.47, "grad_norm": 4.346870057432061, "learning_rate": 9.275942130103805e-07, "loss": 0.3573, "step": 58315 }, { "epoch": 2.47, "grad_norm": 3.932522509004938, "learning_rate": 9.26880048252875e-07, "loss": 0.3355, "step": 58320 }, { "epoch": 2.47, "grad_norm": 4.47507555533065, "learning_rate": 9.261661304366349e-07, "loss": 0.3332, "step": 58325 }, { "epoch": 2.47, "grad_norm": 4.36904969334475, "learning_rate": 9.254524596049436e-07, "loss": 0.315, "step": 58330 }, { "epoch": 2.47, "grad_norm": 4.066510037309596, "learning_rate": 9.247390358010666e-07, "loss": 0.3097, "step": 58335 }, { "epoch": 2.47, "grad_norm": 3.846180841540975, "learning_rate": 9.240258590682577e-07, "loss": 0.3436, "step": 58340 }, { "epoch": 2.47, "grad_norm": 4.120987707775423, "learning_rate": 9.233129294497556e-07, "loss": 0.3184, "step": 58345 }, { "epoch": 2.47, "grad_norm": 3.4204227091229478, "learning_rate": 9.226002469887818e-07, "loss": 0.301, "step": 58350 }, { "epoch": 2.47, "grad_norm": 4.316774541312869, "learning_rate": 9.21887811728544e-07, "loss": 0.3174, "step": 58355 }, { "epoch": 2.47, "grad_norm": 3.897440013264647, "learning_rate": 9.21175623712236e-07, "loss": 0.3193, "step": 58360 }, { "epoch": 2.47, "grad_norm": 4.558413012499301, "learning_rate": 9.204636829830366e-07, "loss": 0.3292, "step": 58365 }, { "epoch": 2.47, "grad_norm": 4.46212629300126, "learning_rate": 9.197519895841062e-07, "loss": 0.3283, "step": 58370 }, { "epoch": 2.47, "grad_norm": 3.7081494631482386, "learning_rate": 9.190405435585942e-07, "loss": 0.3201, "step": 58375 }, { "epoch": 2.47, "grad_norm": 4.821804232453492, "learning_rate": 9.183293449496338e-07, "loss": 0.3316, "step": 58380 }, { "epoch": 2.47, "grad_norm": 4.008789381846802, "learning_rate": 9.176183938003413e-07, "loss": 0.3184, "step": 58385 }, { "epoch": 2.47, "grad_norm": 4.954433012395198, "learning_rate": 9.16907690153821e-07, "loss": 0.3239, "step": 58390 }, { "epoch": 2.47, "grad_norm": 5.41426769054615, "learning_rate": 9.161972340531611e-07, "loss": 0.3532, "step": 58395 }, { "epoch": 2.47, "grad_norm": 4.485994223007647, "learning_rate": 9.154870255414323e-07, "loss": 0.3359, "step": 58400 }, { "epoch": 2.47, "grad_norm": 4.35881063486017, "learning_rate": 9.147770646616938e-07, "loss": 0.3144, "step": 58405 }, { "epoch": 2.47, "grad_norm": 5.091901587978427, "learning_rate": 9.140673514569887e-07, "loss": 0.3263, "step": 58410 }, { "epoch": 2.47, "grad_norm": 3.7238183652070664, "learning_rate": 9.133578859703457e-07, "loss": 0.3433, "step": 58415 }, { "epoch": 2.47, "grad_norm": 4.432782061707567, "learning_rate": 9.126486682447755e-07, "loss": 0.3423, "step": 58420 }, { "epoch": 2.47, "grad_norm": 4.11916193849522, "learning_rate": 9.119396983232765e-07, "loss": 0.3232, "step": 58425 }, { "epoch": 2.47, "grad_norm": 3.8425905287039237, "learning_rate": 9.112309762488331e-07, "loss": 0.3469, "step": 58430 }, { "epoch": 2.47, "grad_norm": 4.2075187448700415, "learning_rate": 9.105225020644109e-07, "loss": 0.3435, "step": 58435 }, { "epoch": 2.47, "grad_norm": 4.2003156453723935, "learning_rate": 9.09814275812963e-07, "loss": 0.3452, "step": 58440 }, { "epoch": 2.47, "grad_norm": 4.195084230276883, "learning_rate": 9.091062975374287e-07, "loss": 0.3138, "step": 58445 }, { "epoch": 2.47, "grad_norm": 4.1021101967404014, "learning_rate": 9.083985672807283e-07, "loss": 0.3376, "step": 58450 }, { "epoch": 2.47, "grad_norm": 3.8959783252176603, "learning_rate": 9.076910850857706e-07, "loss": 0.3413, "step": 58455 }, { "epoch": 2.47, "grad_norm": 4.798082719633067, "learning_rate": 9.069838509954499e-07, "loss": 0.2872, "step": 58460 }, { "epoch": 2.47, "grad_norm": 4.074653191472527, "learning_rate": 9.062768650526399e-07, "loss": 0.3615, "step": 58465 }, { "epoch": 2.47, "grad_norm": 4.256795681957427, "learning_rate": 9.055701273002071e-07, "loss": 0.3365, "step": 58470 }, { "epoch": 2.47, "grad_norm": 5.007933346692168, "learning_rate": 9.04863637780995e-07, "loss": 0.3233, "step": 58475 }, { "epoch": 2.47, "grad_norm": 4.192174292196861, "learning_rate": 9.041573965378381e-07, "loss": 0.3163, "step": 58480 }, { "epoch": 2.48, "grad_norm": 4.3235963385365705, "learning_rate": 9.03451403613555e-07, "loss": 0.3533, "step": 58485 }, { "epoch": 2.48, "grad_norm": 4.000500032147201, "learning_rate": 9.027456590509453e-07, "loss": 0.3455, "step": 58490 }, { "epoch": 2.48, "grad_norm": 4.551394077317539, "learning_rate": 9.020401628927972e-07, "loss": 0.332, "step": 58495 }, { "epoch": 2.48, "grad_norm": 5.288404178687894, "learning_rate": 9.013349151818845e-07, "loss": 0.3075, "step": 58500 }, { "epoch": 2.48, "grad_norm": 4.104596820504148, "learning_rate": 9.006299159609616e-07, "loss": 0.3333, "step": 58505 }, { "epoch": 2.48, "grad_norm": 3.778141298216287, "learning_rate": 8.999251652727714e-07, "loss": 0.3337, "step": 58510 }, { "epoch": 2.48, "grad_norm": 4.342982785724166, "learning_rate": 8.992206631600431e-07, "loss": 0.3033, "step": 58515 }, { "epoch": 2.48, "grad_norm": 3.8216353786389456, "learning_rate": 8.985164096654853e-07, "loss": 0.3295, "step": 58520 }, { "epoch": 2.48, "grad_norm": 4.054447736403844, "learning_rate": 8.97812404831796e-07, "loss": 0.3246, "step": 58525 }, { "epoch": 2.48, "grad_norm": 3.72847822466765, "learning_rate": 8.971086487016589e-07, "loss": 0.3436, "step": 58530 }, { "epoch": 2.48, "grad_norm": 5.505367402810515, "learning_rate": 8.964051413177377e-07, "loss": 0.3244, "step": 58535 }, { "epoch": 2.48, "grad_norm": 4.880147906450789, "learning_rate": 8.957018827226849e-07, "loss": 0.335, "step": 58540 }, { "epoch": 2.48, "grad_norm": 3.8977944968886624, "learning_rate": 8.949988729591375e-07, "loss": 0.3612, "step": 58545 }, { "epoch": 2.48, "grad_norm": 4.336580404163079, "learning_rate": 8.94296112069718e-07, "loss": 0.3239, "step": 58550 }, { "epoch": 2.48, "grad_norm": 3.8582104762199414, "learning_rate": 8.935936000970303e-07, "loss": 0.3248, "step": 58555 }, { "epoch": 2.48, "grad_norm": 5.076738611723542, "learning_rate": 8.928913370836667e-07, "loss": 0.317, "step": 58560 }, { "epoch": 2.48, "grad_norm": 3.9274700203818815, "learning_rate": 8.92189323072205e-07, "loss": 0.3261, "step": 58565 }, { "epoch": 2.48, "grad_norm": 3.9777906919751045, "learning_rate": 8.914875581052029e-07, "loss": 0.3443, "step": 58570 }, { "epoch": 2.48, "grad_norm": 3.7821678402872556, "learning_rate": 8.907860422252085e-07, "loss": 0.3316, "step": 58575 }, { "epoch": 2.48, "grad_norm": 3.943707057062974, "learning_rate": 8.900847754747533e-07, "loss": 0.3002, "step": 58580 }, { "epoch": 2.48, "grad_norm": 4.1933186838592595, "learning_rate": 8.89383757896351e-07, "loss": 0.3053, "step": 58585 }, { "epoch": 2.48, "grad_norm": 5.446519441990898, "learning_rate": 8.886829895325033e-07, "loss": 0.334, "step": 58590 }, { "epoch": 2.48, "grad_norm": 4.00209566047743, "learning_rate": 8.879824704256956e-07, "loss": 0.3259, "step": 58595 }, { "epoch": 2.48, "grad_norm": 3.935041893481826, "learning_rate": 8.872822006183996e-07, "loss": 0.3122, "step": 58600 }, { "epoch": 2.48, "grad_norm": 4.380111008563508, "learning_rate": 8.865821801530677e-07, "loss": 0.3192, "step": 58605 }, { "epoch": 2.48, "grad_norm": 3.8277054381220643, "learning_rate": 8.858824090721424e-07, "loss": 0.3362, "step": 58610 }, { "epoch": 2.48, "grad_norm": 3.8100739264799737, "learning_rate": 8.851828874180491e-07, "loss": 0.3097, "step": 58615 }, { "epoch": 2.48, "grad_norm": 3.768931206364091, "learning_rate": 8.844836152331954e-07, "loss": 0.3316, "step": 58620 }, { "epoch": 2.48, "grad_norm": 3.7458001777082153, "learning_rate": 8.837845925599775e-07, "loss": 0.3182, "step": 58625 }, { "epoch": 2.48, "grad_norm": 3.948371023352602, "learning_rate": 8.83085819440776e-07, "loss": 0.3382, "step": 58630 }, { "epoch": 2.48, "grad_norm": 4.135767218043399, "learning_rate": 8.823872959179547e-07, "loss": 0.3225, "step": 58635 }, { "epoch": 2.48, "grad_norm": 3.893008466657662, "learning_rate": 8.81689022033862e-07, "loss": 0.3167, "step": 58640 }, { "epoch": 2.48, "grad_norm": 4.719960746100174, "learning_rate": 8.809909978308323e-07, "loss": 0.331, "step": 58645 }, { "epoch": 2.48, "grad_norm": 3.87942159322901, "learning_rate": 8.802932233511874e-07, "loss": 0.3454, "step": 58650 }, { "epoch": 2.48, "grad_norm": 3.8344993831730423, "learning_rate": 8.795956986372278e-07, "loss": 0.3557, "step": 58655 }, { "epoch": 2.48, "grad_norm": 4.054187803961821, "learning_rate": 8.788984237312437e-07, "loss": 0.33, "step": 58660 }, { "epoch": 2.48, "grad_norm": 4.190706683770032, "learning_rate": 8.782013986755094e-07, "loss": 0.3282, "step": 58665 }, { "epoch": 2.48, "grad_norm": 8.16308410311078, "learning_rate": 8.775046235122847e-07, "loss": 0.3263, "step": 58670 }, { "epoch": 2.48, "grad_norm": 3.8777335600294487, "learning_rate": 8.768080982838096e-07, "loss": 0.3231, "step": 58675 }, { "epoch": 2.48, "grad_norm": 5.084343449050413, "learning_rate": 8.761118230323146e-07, "loss": 0.3229, "step": 58680 }, { "epoch": 2.48, "grad_norm": 6.077679419743137, "learning_rate": 8.754157978000133e-07, "loss": 0.3362, "step": 58685 }, { "epoch": 2.48, "grad_norm": 4.277839053969695, "learning_rate": 8.747200226291019e-07, "loss": 0.3361, "step": 58690 }, { "epoch": 2.48, "grad_norm": 4.334987602613929, "learning_rate": 8.740244975617639e-07, "loss": 0.3436, "step": 58695 }, { "epoch": 2.48, "grad_norm": 4.445038226272411, "learning_rate": 8.733292226401691e-07, "loss": 0.3232, "step": 58700 }, { "epoch": 2.48, "grad_norm": 4.08888295856013, "learning_rate": 8.726341979064662e-07, "loss": 0.3215, "step": 58705 }, { "epoch": 2.48, "grad_norm": 3.357013434606589, "learning_rate": 8.719394234027945e-07, "loss": 0.3073, "step": 58710 }, { "epoch": 2.48, "grad_norm": 3.804892504339479, "learning_rate": 8.712448991712757e-07, "loss": 0.3333, "step": 58715 }, { "epoch": 2.49, "grad_norm": 3.6189021674492063, "learning_rate": 8.70550625254018e-07, "loss": 0.3127, "step": 58720 }, { "epoch": 2.49, "grad_norm": 4.138400164281322, "learning_rate": 8.698566016931114e-07, "loss": 0.3423, "step": 58725 }, { "epoch": 2.49, "grad_norm": 4.006190563873847, "learning_rate": 8.691628285306336e-07, "loss": 0.3462, "step": 58730 }, { "epoch": 2.49, "grad_norm": 4.260099285734865, "learning_rate": 8.684693058086463e-07, "loss": 0.3378, "step": 58735 }, { "epoch": 2.49, "grad_norm": 3.981250927784904, "learning_rate": 8.677760335691942e-07, "loss": 0.3259, "step": 58740 }, { "epoch": 2.49, "grad_norm": 4.099213153900137, "learning_rate": 8.670830118543094e-07, "loss": 0.3342, "step": 58745 }, { "epoch": 2.49, "grad_norm": 4.3514594057971925, "learning_rate": 8.663902407060093e-07, "loss": 0.3037, "step": 58750 }, { "epoch": 2.49, "grad_norm": 3.7539554067145207, "learning_rate": 8.656977201662908e-07, "loss": 0.3225, "step": 58755 }, { "epoch": 2.49, "grad_norm": 3.6137390912871874, "learning_rate": 8.650054502771421e-07, "loss": 0.3421, "step": 58760 }, { "epoch": 2.49, "grad_norm": 4.4318814724480315, "learning_rate": 8.643134310805346e-07, "loss": 0.3131, "step": 58765 }, { "epoch": 2.49, "grad_norm": 4.515224671825227, "learning_rate": 8.636216626184196e-07, "loss": 0.3269, "step": 58770 }, { "epoch": 2.49, "grad_norm": 4.23263990082479, "learning_rate": 8.629301449327399e-07, "loss": 0.36, "step": 58775 }, { "epoch": 2.49, "grad_norm": 3.8923881496274224, "learning_rate": 8.622388780654183e-07, "loss": 0.3457, "step": 58780 }, { "epoch": 2.49, "grad_norm": 3.834103722103489, "learning_rate": 8.615478620583678e-07, "loss": 0.3164, "step": 58785 }, { "epoch": 2.49, "grad_norm": 3.7258403880280575, "learning_rate": 8.608570969534797e-07, "loss": 0.3223, "step": 58790 }, { "epoch": 2.49, "grad_norm": 4.192899136925136, "learning_rate": 8.60166582792632e-07, "loss": 0.3226, "step": 58795 }, { "epoch": 2.49, "grad_norm": 3.6977824554796874, "learning_rate": 8.594763196176903e-07, "loss": 0.3016, "step": 58800 }, { "epoch": 2.49, "grad_norm": 4.0325258340937635, "learning_rate": 8.587863074705044e-07, "loss": 0.3451, "step": 58805 }, { "epoch": 2.49, "grad_norm": 4.577778187756743, "learning_rate": 8.580965463929053e-07, "loss": 0.3273, "step": 58810 }, { "epoch": 2.49, "grad_norm": 4.141649443153325, "learning_rate": 8.574070364267117e-07, "loss": 0.3389, "step": 58815 }, { "epoch": 2.49, "grad_norm": 4.2485709666449205, "learning_rate": 8.567177776137286e-07, "loss": 0.3187, "step": 58820 }, { "epoch": 2.49, "grad_norm": 3.964900119715837, "learning_rate": 8.560287699957409e-07, "loss": 0.3346, "step": 58825 }, { "epoch": 2.49, "grad_norm": 4.051902085781984, "learning_rate": 8.553400136145224e-07, "loss": 0.332, "step": 58830 }, { "epoch": 2.49, "grad_norm": 3.8768151973188063, "learning_rate": 8.546515085118317e-07, "loss": 0.3162, "step": 58835 }, { "epoch": 2.49, "grad_norm": 4.117639526938961, "learning_rate": 8.539632547294086e-07, "loss": 0.3452, "step": 58840 }, { "epoch": 2.49, "grad_norm": 4.158855921756477, "learning_rate": 8.532752523089799e-07, "loss": 0.346, "step": 58845 }, { "epoch": 2.49, "grad_norm": 3.731733355289314, "learning_rate": 8.525875012922591e-07, "loss": 0.3284, "step": 58850 }, { "epoch": 2.49, "grad_norm": 4.214768251587851, "learning_rate": 8.519000017209422e-07, "loss": 0.3315, "step": 58855 }, { "epoch": 2.49, "grad_norm": 4.041421851831155, "learning_rate": 8.512127536367082e-07, "loss": 0.3264, "step": 58860 }, { "epoch": 2.49, "grad_norm": 3.995042913375904, "learning_rate": 8.505257570812247e-07, "loss": 0.3497, "step": 58865 }, { "epoch": 2.49, "grad_norm": 4.060052209606571, "learning_rate": 8.498390120961425e-07, "loss": 0.3695, "step": 58870 }, { "epoch": 2.49, "grad_norm": 4.775867109854676, "learning_rate": 8.491525187230959e-07, "loss": 0.3194, "step": 58875 }, { "epoch": 2.49, "grad_norm": 4.038957450063596, "learning_rate": 8.484662770037045e-07, "loss": 0.3582, "step": 58880 }, { "epoch": 2.49, "grad_norm": 3.89067126182076, "learning_rate": 8.477802869795759e-07, "loss": 0.3292, "step": 58885 }, { "epoch": 2.49, "grad_norm": 4.854609023700863, "learning_rate": 8.47094548692296e-07, "loss": 0.3359, "step": 58890 }, { "epoch": 2.49, "grad_norm": 3.966840882256013, "learning_rate": 8.464090621834414e-07, "loss": 0.3294, "step": 58895 }, { "epoch": 2.49, "grad_norm": 4.429258963801496, "learning_rate": 8.457238274945701e-07, "loss": 0.3444, "step": 58900 }, { "epoch": 2.49, "grad_norm": 4.156591986012652, "learning_rate": 8.450388446672275e-07, "loss": 0.3405, "step": 58905 }, { "epoch": 2.49, "grad_norm": 4.067304446123939, "learning_rate": 8.443541137429401e-07, "loss": 0.313, "step": 58910 }, { "epoch": 2.49, "grad_norm": 4.039966795203793, "learning_rate": 8.436696347632217e-07, "loss": 0.3352, "step": 58915 }, { "epoch": 2.49, "grad_norm": 4.228929101556266, "learning_rate": 8.429854077695715e-07, "loss": 0.3345, "step": 58920 }, { "epoch": 2.49, "grad_norm": 4.089246390736494, "learning_rate": 8.423014328034701e-07, "loss": 0.3427, "step": 58925 }, { "epoch": 2.49, "grad_norm": 3.807227350861328, "learning_rate": 8.41617709906386e-07, "loss": 0.3442, "step": 58930 }, { "epoch": 2.49, "grad_norm": 3.8634331730545606, "learning_rate": 8.409342391197722e-07, "loss": 0.332, "step": 58935 }, { "epoch": 2.49, "grad_norm": 3.8915481839237933, "learning_rate": 8.402510204850634e-07, "loss": 0.3052, "step": 58940 }, { "epoch": 2.49, "grad_norm": 4.370919345732317, "learning_rate": 8.395680540436824e-07, "loss": 0.3541, "step": 58945 }, { "epoch": 2.49, "grad_norm": 4.365954224798414, "learning_rate": 8.388853398370356e-07, "loss": 0.3251, "step": 58950 }, { "epoch": 2.5, "grad_norm": 3.7711686927919352, "learning_rate": 8.382028779065154e-07, "loss": 0.3547, "step": 58955 }, { "epoch": 2.5, "grad_norm": 3.825913535969419, "learning_rate": 8.375206682934939e-07, "loss": 0.3424, "step": 58960 }, { "epoch": 2.5, "grad_norm": 4.008513274760299, "learning_rate": 8.368387110393334e-07, "loss": 0.3438, "step": 58965 }, { "epoch": 2.5, "grad_norm": 4.781910330100673, "learning_rate": 8.361570061853785e-07, "loss": 0.3439, "step": 58970 }, { "epoch": 2.5, "grad_norm": 3.9399810172314, "learning_rate": 8.354755537729603e-07, "loss": 0.3402, "step": 58975 }, { "epoch": 2.5, "grad_norm": 4.170777503352065, "learning_rate": 8.347943538433916e-07, "loss": 0.3364, "step": 58980 }, { "epoch": 2.5, "grad_norm": 4.238833583444122, "learning_rate": 8.341134064379718e-07, "loss": 0.3021, "step": 58985 }, { "epoch": 2.5, "grad_norm": 3.8788168354739323, "learning_rate": 8.334327115979867e-07, "loss": 0.3066, "step": 58990 }, { "epoch": 2.5, "grad_norm": 4.555126485950414, "learning_rate": 8.327522693647016e-07, "loss": 0.3234, "step": 58995 }, { "epoch": 2.5, "grad_norm": 4.104557432455638, "learning_rate": 8.320720797793718e-07, "loss": 0.3456, "step": 59000 }, { "epoch": 2.5, "grad_norm": 3.951570563823209, "learning_rate": 8.313921428832361e-07, "loss": 0.3294, "step": 59005 }, { "epoch": 2.5, "grad_norm": 5.173219356345165, "learning_rate": 8.307124587175141e-07, "loss": 0.3394, "step": 59010 }, { "epoch": 2.5, "grad_norm": 4.6202244587136345, "learning_rate": 8.300330273234147e-07, "loss": 0.3427, "step": 59015 }, { "epoch": 2.5, "grad_norm": 4.429648675125979, "learning_rate": 8.293538487421299e-07, "loss": 0.3133, "step": 59020 }, { "epoch": 2.5, "grad_norm": 5.050367722234799, "learning_rate": 8.286749230148378e-07, "loss": 0.3245, "step": 59025 }, { "epoch": 2.5, "grad_norm": 4.0372871422143035, "learning_rate": 8.279962501826965e-07, "loss": 0.3546, "step": 59030 }, { "epoch": 2.5, "grad_norm": 3.4138522404107308, "learning_rate": 8.273178302868534e-07, "loss": 0.3045, "step": 59035 }, { "epoch": 2.5, "grad_norm": 4.306120606640471, "learning_rate": 8.26639663368441e-07, "loss": 0.3262, "step": 59040 }, { "epoch": 2.5, "grad_norm": 3.68966676374268, "learning_rate": 8.259617494685706e-07, "loss": 0.3307, "step": 59045 }, { "epoch": 2.5, "grad_norm": 4.095915542373566, "learning_rate": 8.252840886283453e-07, "loss": 0.3417, "step": 59050 }, { "epoch": 2.5, "grad_norm": 3.49749356746126, "learning_rate": 8.246066808888492e-07, "loss": 0.3165, "step": 59055 }, { "epoch": 2.5, "grad_norm": 3.9747725210497684, "learning_rate": 8.239295262911501e-07, "loss": 0.3125, "step": 59060 }, { "epoch": 2.5, "grad_norm": 5.025760428429545, "learning_rate": 8.232526248763028e-07, "loss": 0.3277, "step": 59065 }, { "epoch": 2.5, "grad_norm": 4.388603525408128, "learning_rate": 8.225759766853469e-07, "loss": 0.3377, "step": 59070 }, { "epoch": 2.5, "grad_norm": 3.961473987771514, "learning_rate": 8.218995817593034e-07, "loss": 0.3214, "step": 59075 }, { "epoch": 2.5, "grad_norm": 4.07421745834712, "learning_rate": 8.212234401391817e-07, "loss": 0.3318, "step": 59080 }, { "epoch": 2.5, "grad_norm": 4.069505368616721, "learning_rate": 8.205475518659734e-07, "loss": 0.3032, "step": 59085 }, { "epoch": 2.5, "grad_norm": 3.7108747535814857, "learning_rate": 8.198719169806568e-07, "loss": 0.3397, "step": 59090 }, { "epoch": 2.5, "grad_norm": 3.96862032196223, "learning_rate": 8.191965355241921e-07, "loss": 0.3257, "step": 59095 }, { "epoch": 2.5, "grad_norm": 4.316274806786525, "learning_rate": 8.185214075375269e-07, "loss": 0.3519, "step": 59100 }, { "epoch": 2.5, "grad_norm": 3.9830456047835145, "learning_rate": 8.178465330615926e-07, "loss": 0.3157, "step": 59105 }, { "epoch": 2.5, "grad_norm": 4.087839912675959, "learning_rate": 8.171719121373028e-07, "loss": 0.3188, "step": 59110 }, { "epoch": 2.5, "grad_norm": 3.843163064948869, "learning_rate": 8.164975448055606e-07, "loss": 0.3111, "step": 59115 }, { "epoch": 2.5, "grad_norm": 4.5514400925150404, "learning_rate": 8.158234311072477e-07, "loss": 0.3445, "step": 59120 }, { "epoch": 2.5, "grad_norm": 3.8350963836785006, "learning_rate": 8.151495710832368e-07, "loss": 0.318, "step": 59125 }, { "epoch": 2.5, "grad_norm": 3.649791019276955, "learning_rate": 8.144759647743794e-07, "loss": 0.3437, "step": 59130 }, { "epoch": 2.5, "grad_norm": 3.7371911867332406, "learning_rate": 8.138026122215148e-07, "loss": 0.3546, "step": 59135 }, { "epoch": 2.5, "grad_norm": 3.860935362757238, "learning_rate": 8.131295134654688e-07, "loss": 0.3558, "step": 59140 }, { "epoch": 2.5, "grad_norm": 4.476222308172504, "learning_rate": 8.124566685470458e-07, "loss": 0.3368, "step": 59145 }, { "epoch": 2.5, "grad_norm": 4.178226295157558, "learning_rate": 8.117840775070406e-07, "loss": 0.3059, "step": 59150 }, { "epoch": 2.5, "grad_norm": 4.239808204584898, "learning_rate": 8.111117403862301e-07, "loss": 0.3525, "step": 59155 }, { "epoch": 2.5, "grad_norm": 4.217491164551992, "learning_rate": 8.10439657225377e-07, "loss": 0.3346, "step": 59160 }, { "epoch": 2.5, "grad_norm": 3.941996504721705, "learning_rate": 8.097678280652255e-07, "loss": 0.3076, "step": 59165 }, { "epoch": 2.5, "grad_norm": 3.9255223873200253, "learning_rate": 8.090962529465079e-07, "loss": 0.3207, "step": 59170 }, { "epoch": 2.5, "grad_norm": 5.647812240194351, "learning_rate": 8.084249319099413e-07, "loss": 0.3231, "step": 59175 }, { "epoch": 2.5, "grad_norm": 4.7071907829865, "learning_rate": 8.077538649962224e-07, "loss": 0.3184, "step": 59180 }, { "epoch": 2.5, "grad_norm": 4.17655956924807, "learning_rate": 8.070830522460388e-07, "loss": 0.3338, "step": 59185 }, { "epoch": 2.51, "grad_norm": 4.2911878007222635, "learning_rate": 8.0641249370006e-07, "loss": 0.314, "step": 59190 }, { "epoch": 2.51, "grad_norm": 3.8579700572713893, "learning_rate": 8.057421893989376e-07, "loss": 0.3257, "step": 59195 }, { "epoch": 2.51, "grad_norm": 4.5675712924895455, "learning_rate": 8.050721393833122e-07, "loss": 0.3308, "step": 59200 }, { "epoch": 2.51, "grad_norm": 4.062517413885641, "learning_rate": 8.044023436938059e-07, "loss": 0.3216, "step": 59205 }, { "epoch": 2.51, "grad_norm": 3.753771273018451, "learning_rate": 8.037328023710284e-07, "loss": 0.3563, "step": 59210 }, { "epoch": 2.51, "grad_norm": 4.263291098167929, "learning_rate": 8.030635154555693e-07, "loss": 0.3145, "step": 59215 }, { "epoch": 2.51, "grad_norm": 3.9372479836477265, "learning_rate": 8.02394482988007e-07, "loss": 0.2997, "step": 59220 }, { "epoch": 2.51, "grad_norm": 3.818830632240378, "learning_rate": 8.017257050089039e-07, "loss": 0.3232, "step": 59225 }, { "epoch": 2.51, "grad_norm": 3.7747768088828217, "learning_rate": 8.010571815588037e-07, "loss": 0.301, "step": 59230 }, { "epoch": 2.51, "grad_norm": 3.660699962398027, "learning_rate": 8.003889126782377e-07, "loss": 0.334, "step": 59235 }, { "epoch": 2.51, "grad_norm": 6.282923676286064, "learning_rate": 7.997208984077232e-07, "loss": 0.3383, "step": 59240 }, { "epoch": 2.51, "grad_norm": 5.386209908148228, "learning_rate": 7.99053138787757e-07, "loss": 0.3411, "step": 59245 }, { "epoch": 2.51, "grad_norm": 4.016248386394783, "learning_rate": 7.983856338588247e-07, "loss": 0.3381, "step": 59250 }, { "epoch": 2.51, "grad_norm": 4.515148939304655, "learning_rate": 7.97718383661395e-07, "loss": 0.3502, "step": 59255 }, { "epoch": 2.51, "grad_norm": 5.41189848342285, "learning_rate": 7.970513882359233e-07, "loss": 0.3668, "step": 59260 }, { "epoch": 2.51, "grad_norm": 5.628244596276695, "learning_rate": 7.963846476228438e-07, "loss": 0.3408, "step": 59265 }, { "epoch": 2.51, "grad_norm": 4.027123714180529, "learning_rate": 7.957181618625815e-07, "loss": 0.3279, "step": 59270 }, { "epoch": 2.51, "grad_norm": 4.746128053515588, "learning_rate": 7.95051930995544e-07, "loss": 0.3133, "step": 59275 }, { "epoch": 2.51, "grad_norm": 4.203785334787506, "learning_rate": 7.943859550621213e-07, "loss": 0.3359, "step": 59280 }, { "epoch": 2.51, "grad_norm": 3.8900463386404294, "learning_rate": 7.937202341026896e-07, "loss": 0.3364, "step": 59285 }, { "epoch": 2.51, "grad_norm": 4.209176673434223, "learning_rate": 7.930547681576095e-07, "loss": 0.3116, "step": 59290 }, { "epoch": 2.51, "grad_norm": 4.538853158616472, "learning_rate": 7.923895572672286e-07, "loss": 0.3323, "step": 59295 }, { "epoch": 2.51, "grad_norm": 3.817678060019137, "learning_rate": 7.917246014718738e-07, "loss": 0.3158, "step": 59300 }, { "epoch": 2.51, "grad_norm": 4.141449220825571, "learning_rate": 7.910599008118603e-07, "loss": 0.3247, "step": 59305 }, { "epoch": 2.51, "grad_norm": 4.601890778492797, "learning_rate": 7.903954553274878e-07, "loss": 0.3108, "step": 59310 }, { "epoch": 2.51, "grad_norm": 4.32745497788374, "learning_rate": 7.897312650590383e-07, "loss": 0.3374, "step": 59315 }, { "epoch": 2.51, "grad_norm": 4.530612648154769, "learning_rate": 7.890673300467805e-07, "loss": 0.321, "step": 59320 }, { "epoch": 2.51, "grad_norm": 4.654232621100237, "learning_rate": 7.884036503309673e-07, "loss": 0.325, "step": 59325 }, { "epoch": 2.51, "grad_norm": 3.604454785358741, "learning_rate": 7.877402259518357e-07, "loss": 0.3025, "step": 59330 }, { "epoch": 2.51, "grad_norm": 4.462611038736459, "learning_rate": 7.87077056949605e-07, "loss": 0.3387, "step": 59335 }, { "epoch": 2.51, "grad_norm": 3.6975954169822334, "learning_rate": 7.864141433644834e-07, "loss": 0.3188, "step": 59340 }, { "epoch": 2.51, "grad_norm": 3.85150669142777, "learning_rate": 7.857514852366621e-07, "loss": 0.3292, "step": 59345 }, { "epoch": 2.51, "grad_norm": 4.1904791124107, "learning_rate": 7.850890826063129e-07, "loss": 0.3145, "step": 59350 }, { "epoch": 2.51, "grad_norm": 4.862036027955601, "learning_rate": 7.844269355135975e-07, "loss": 0.3405, "step": 59355 }, { "epoch": 2.51, "grad_norm": 3.793644227102505, "learning_rate": 7.83765043998661e-07, "loss": 0.3191, "step": 59360 }, { "epoch": 2.51, "grad_norm": 3.654141634392695, "learning_rate": 7.831034081016292e-07, "loss": 0.3233, "step": 59365 }, { "epoch": 2.51, "grad_norm": 4.228846983712715, "learning_rate": 7.824420278626166e-07, "loss": 0.3392, "step": 59370 }, { "epoch": 2.51, "grad_norm": 4.885658199814358, "learning_rate": 7.817809033217216e-07, "loss": 0.3366, "step": 59375 }, { "epoch": 2.51, "grad_norm": 3.7403193210688674, "learning_rate": 7.811200345190245e-07, "loss": 0.3211, "step": 59380 }, { "epoch": 2.51, "grad_norm": 4.06335013644027, "learning_rate": 7.804594214945921e-07, "loss": 0.3225, "step": 59385 }, { "epoch": 2.51, "grad_norm": 3.97689899189851, "learning_rate": 7.797990642884762e-07, "loss": 0.3485, "step": 59390 }, { "epoch": 2.51, "grad_norm": 3.7404999482310384, "learning_rate": 7.791389629407131e-07, "loss": 0.3408, "step": 59395 }, { "epoch": 2.51, "grad_norm": 3.9631399690064155, "learning_rate": 7.784791174913209e-07, "loss": 0.3429, "step": 59400 }, { "epoch": 2.51, "grad_norm": 4.089356190587559, "learning_rate": 7.778195279803041e-07, "loss": 0.3537, "step": 59405 }, { "epoch": 2.51, "grad_norm": 3.9490251066258133, "learning_rate": 7.771601944476542e-07, "loss": 0.3266, "step": 59410 }, { "epoch": 2.51, "grad_norm": 3.7236914292901915, "learning_rate": 7.765011169333419e-07, "loss": 0.3049, "step": 59415 }, { "epoch": 2.51, "grad_norm": 4.017895535717673, "learning_rate": 7.758422954773259e-07, "loss": 0.3476, "step": 59420 }, { "epoch": 2.51, "grad_norm": 3.46868801887529, "learning_rate": 7.751837301195503e-07, "loss": 0.3081, "step": 59425 }, { "epoch": 2.52, "grad_norm": 3.9508890663922847, "learning_rate": 7.745254208999392e-07, "loss": 0.3129, "step": 59430 }, { "epoch": 2.52, "grad_norm": 3.9121141955990617, "learning_rate": 7.738673678584052e-07, "loss": 0.3261, "step": 59435 }, { "epoch": 2.52, "grad_norm": 4.10480361176652, "learning_rate": 7.732095710348453e-07, "loss": 0.3107, "step": 59440 }, { "epoch": 2.52, "grad_norm": 4.762556270795881, "learning_rate": 7.725520304691386e-07, "loss": 0.3226, "step": 59445 }, { "epoch": 2.52, "grad_norm": 3.8332173670209815, "learning_rate": 7.718947462011489e-07, "loss": 0.3469, "step": 59450 }, { "epoch": 2.52, "grad_norm": 4.2459475179733985, "learning_rate": 7.712377182707265e-07, "loss": 0.3491, "step": 59455 }, { "epoch": 2.52, "grad_norm": 4.81613155242911, "learning_rate": 7.70580946717705e-07, "loss": 0.3293, "step": 59460 }, { "epoch": 2.52, "grad_norm": 3.98676709773683, "learning_rate": 7.699244315819037e-07, "loss": 0.331, "step": 59465 }, { "epoch": 2.52, "grad_norm": 3.9048908827302204, "learning_rate": 7.692681729031232e-07, "loss": 0.3317, "step": 59470 }, { "epoch": 2.52, "grad_norm": 6.000047929831132, "learning_rate": 7.686121707211508e-07, "loss": 0.3131, "step": 59475 }, { "epoch": 2.52, "grad_norm": 8.068356453698133, "learning_rate": 7.679564250757598e-07, "loss": 0.3238, "step": 59480 }, { "epoch": 2.52, "grad_norm": 4.566372471123929, "learning_rate": 7.673009360067041e-07, "loss": 0.3403, "step": 59485 }, { "epoch": 2.52, "grad_norm": 4.837527413814982, "learning_rate": 7.666457035537245e-07, "loss": 0.3701, "step": 59490 }, { "epoch": 2.52, "grad_norm": 4.274115320114632, "learning_rate": 7.65990727756547e-07, "loss": 0.3202, "step": 59495 }, { "epoch": 2.52, "grad_norm": 3.737252534009414, "learning_rate": 7.653360086548795e-07, "loss": 0.3079, "step": 59500 }, { "epoch": 2.52, "grad_norm": 3.943777546452165, "learning_rate": 7.646815462884155e-07, "loss": 0.3459, "step": 59505 }, { "epoch": 2.52, "grad_norm": 3.9796797568370885, "learning_rate": 7.640273406968346e-07, "loss": 0.3401, "step": 59510 }, { "epoch": 2.52, "grad_norm": 3.933766080622509, "learning_rate": 7.633733919197989e-07, "loss": 0.3084, "step": 59515 }, { "epoch": 2.52, "grad_norm": 3.7959885694883204, "learning_rate": 7.627196999969544e-07, "loss": 0.3202, "step": 59520 }, { "epoch": 2.52, "grad_norm": 3.7109861952781817, "learning_rate": 7.620662649679333e-07, "loss": 0.3464, "step": 59525 }, { "epoch": 2.52, "grad_norm": 3.971425611942858, "learning_rate": 7.614130868723518e-07, "loss": 0.3172, "step": 59530 }, { "epoch": 2.52, "grad_norm": 4.494658310844765, "learning_rate": 7.607601657498092e-07, "loss": 0.3266, "step": 59535 }, { "epoch": 2.52, "grad_norm": 4.800318541240934, "learning_rate": 7.601075016398901e-07, "loss": 0.3114, "step": 59540 }, { "epoch": 2.52, "grad_norm": 4.143851173815291, "learning_rate": 7.59455094582166e-07, "loss": 0.3295, "step": 59545 }, { "epoch": 2.52, "grad_norm": 5.356375795086613, "learning_rate": 7.588029446161871e-07, "loss": 0.311, "step": 59550 }, { "epoch": 2.52, "grad_norm": 4.886496575388588, "learning_rate": 7.581510517814927e-07, "loss": 0.3316, "step": 59555 }, { "epoch": 2.52, "grad_norm": 4.021070934546926, "learning_rate": 7.574994161176053e-07, "loss": 0.3194, "step": 59560 }, { "epoch": 2.52, "grad_norm": 4.043773580526641, "learning_rate": 7.568480376640325e-07, "loss": 0.3177, "step": 59565 }, { "epoch": 2.52, "grad_norm": 3.9416867631654218, "learning_rate": 7.561969164602639e-07, "loss": 0.3182, "step": 59570 }, { "epoch": 2.52, "grad_norm": 3.8505498081259755, "learning_rate": 7.555460525457753e-07, "loss": 0.3064, "step": 59575 }, { "epoch": 2.52, "grad_norm": 4.29734800243156, "learning_rate": 7.548954459600288e-07, "loss": 0.3386, "step": 59580 }, { "epoch": 2.52, "grad_norm": 4.305180239732112, "learning_rate": 7.542450967424653e-07, "loss": 0.3274, "step": 59585 }, { "epoch": 2.52, "grad_norm": 4.489416697282077, "learning_rate": 7.535950049325153e-07, "loss": 0.3702, "step": 59590 }, { "epoch": 2.52, "grad_norm": 3.838327834158462, "learning_rate": 7.529451705695934e-07, "loss": 0.3032, "step": 59595 }, { "epoch": 2.52, "grad_norm": 4.074615635275481, "learning_rate": 7.522955936930959e-07, "loss": 0.3484, "step": 59600 }, { "epoch": 2.52, "grad_norm": 5.506027377207432, "learning_rate": 7.51646274342403e-07, "loss": 0.3056, "step": 59605 }, { "epoch": 2.52, "grad_norm": 3.5953751044098814, "learning_rate": 7.509972125568826e-07, "loss": 0.3325, "step": 59610 }, { "epoch": 2.52, "grad_norm": 3.8354525936069988, "learning_rate": 7.503484083758866e-07, "loss": 0.311, "step": 59615 }, { "epoch": 2.52, "grad_norm": 4.277667863256289, "learning_rate": 7.49699861838748e-07, "loss": 0.3285, "step": 59620 }, { "epoch": 2.52, "grad_norm": 3.686741883579355, "learning_rate": 7.490515729847864e-07, "loss": 0.3247, "step": 59625 }, { "epoch": 2.52, "grad_norm": 3.878912618454519, "learning_rate": 7.48403541853307e-07, "loss": 0.3078, "step": 59630 }, { "epoch": 2.52, "grad_norm": 3.894726990131706, "learning_rate": 7.477557684835979e-07, "loss": 0.3269, "step": 59635 }, { "epoch": 2.52, "grad_norm": 3.6242062664600985, "learning_rate": 7.471082529149304e-07, "loss": 0.3132, "step": 59640 }, { "epoch": 2.52, "grad_norm": 4.260740747796143, "learning_rate": 7.46460995186562e-07, "loss": 0.3068, "step": 59645 }, { "epoch": 2.52, "grad_norm": 3.8246068797947563, "learning_rate": 7.458139953377358e-07, "loss": 0.3363, "step": 59650 }, { "epoch": 2.52, "grad_norm": 9.066831689625904, "learning_rate": 7.451672534076748e-07, "loss": 0.3205, "step": 59655 }, { "epoch": 2.52, "grad_norm": 4.257545554073458, "learning_rate": 7.4452076943559e-07, "loss": 0.302, "step": 59660 }, { "epoch": 2.53, "grad_norm": 3.939807365835118, "learning_rate": 7.43874543460677e-07, "loss": 0.3426, "step": 59665 }, { "epoch": 2.53, "grad_norm": 4.123730007789098, "learning_rate": 7.432285755221125e-07, "loss": 0.3031, "step": 59670 }, { "epoch": 2.53, "grad_norm": 4.211775339917534, "learning_rate": 7.425828656590611e-07, "loss": 0.3058, "step": 59675 }, { "epoch": 2.53, "grad_norm": 4.220743859827115, "learning_rate": 7.419374139106706e-07, "loss": 0.3407, "step": 59680 }, { "epoch": 2.53, "grad_norm": 3.9959640779286496, "learning_rate": 7.41292220316071e-07, "loss": 0.3233, "step": 59685 }, { "epoch": 2.53, "grad_norm": 4.572689592631259, "learning_rate": 7.406472849143797e-07, "loss": 0.3314, "step": 59690 }, { "epoch": 2.53, "grad_norm": 3.9873545757146704, "learning_rate": 7.400026077446975e-07, "loss": 0.3104, "step": 59695 }, { "epoch": 2.53, "grad_norm": 3.7262835903178937, "learning_rate": 7.393581888461099e-07, "loss": 0.3364, "step": 59700 }, { "epoch": 2.53, "grad_norm": 4.159143267379357, "learning_rate": 7.387140282576843e-07, "loss": 0.323, "step": 59705 }, { "epoch": 2.53, "grad_norm": 4.610566803796951, "learning_rate": 7.380701260184748e-07, "loss": 0.3545, "step": 59710 }, { "epoch": 2.53, "grad_norm": 4.089682756389737, "learning_rate": 7.374264821675208e-07, "loss": 0.3343, "step": 59715 }, { "epoch": 2.53, "grad_norm": 4.487867920004437, "learning_rate": 7.367830967438427e-07, "loss": 0.3039, "step": 59720 }, { "epoch": 2.53, "grad_norm": 4.2051118155817875, "learning_rate": 7.361399697864474e-07, "loss": 0.3081, "step": 59725 }, { "epoch": 2.53, "grad_norm": 4.346930805827001, "learning_rate": 7.35497101334327e-07, "loss": 0.3387, "step": 59730 }, { "epoch": 2.53, "grad_norm": 4.009230476535528, "learning_rate": 7.34854491426455e-07, "loss": 0.3386, "step": 59735 }, { "epoch": 2.53, "grad_norm": 3.609355700472187, "learning_rate": 7.342121401017915e-07, "loss": 0.2886, "step": 59740 }, { "epoch": 2.53, "grad_norm": 4.475290506412041, "learning_rate": 7.335700473992808e-07, "loss": 0.3414, "step": 59745 }, { "epoch": 2.53, "grad_norm": 3.89405597160021, "learning_rate": 7.329282133578524e-07, "loss": 0.3209, "step": 59750 }, { "epoch": 2.53, "grad_norm": 3.59622157491749, "learning_rate": 7.322866380164157e-07, "loss": 0.3181, "step": 59755 }, { "epoch": 2.53, "grad_norm": 3.4941735148486455, "learning_rate": 7.316453214138697e-07, "loss": 0.3039, "step": 59760 }, { "epoch": 2.53, "grad_norm": 3.8936420126764717, "learning_rate": 7.310042635890957e-07, "loss": 0.3336, "step": 59765 }, { "epoch": 2.53, "grad_norm": 4.6232250331611535, "learning_rate": 7.303634645809588e-07, "loss": 0.3172, "step": 59770 }, { "epoch": 2.53, "grad_norm": 4.153706896362985, "learning_rate": 7.297229244283072e-07, "loss": 0.353, "step": 59775 }, { "epoch": 2.53, "grad_norm": 3.603243054085999, "learning_rate": 7.29082643169976e-07, "loss": 0.3125, "step": 59780 }, { "epoch": 2.53, "grad_norm": 4.910458029321549, "learning_rate": 7.284426208447848e-07, "loss": 0.3393, "step": 59785 }, { "epoch": 2.53, "grad_norm": 5.738434661618399, "learning_rate": 7.278028574915341e-07, "loss": 0.3431, "step": 59790 }, { "epoch": 2.53, "grad_norm": 6.37977270422054, "learning_rate": 7.271633531490124e-07, "loss": 0.3273, "step": 59795 }, { "epoch": 2.53, "grad_norm": 3.924155628682069, "learning_rate": 7.265241078559915e-07, "loss": 0.3412, "step": 59800 }, { "epoch": 2.53, "grad_norm": 5.146740524690065, "learning_rate": 7.258851216512247e-07, "loss": 0.3151, "step": 59805 }, { "epoch": 2.53, "grad_norm": 3.765253508446272, "learning_rate": 7.252463945734529e-07, "loss": 0.3619, "step": 59810 }, { "epoch": 2.53, "grad_norm": 3.5618826053339374, "learning_rate": 7.246079266614009e-07, "loss": 0.3434, "step": 59815 }, { "epoch": 2.53, "grad_norm": 4.8401029248856, "learning_rate": 7.23969717953778e-07, "loss": 0.3263, "step": 59820 }, { "epoch": 2.53, "grad_norm": 3.9385262366393894, "learning_rate": 7.233317684892744e-07, "loss": 0.3128, "step": 59825 }, { "epoch": 2.53, "grad_norm": 4.4044455110737815, "learning_rate": 7.226940783065678e-07, "loss": 0.3334, "step": 59830 }, { "epoch": 2.53, "grad_norm": 4.447380879835691, "learning_rate": 7.220566474443219e-07, "loss": 0.333, "step": 59835 }, { "epoch": 2.53, "grad_norm": 3.8620156284140963, "learning_rate": 7.214194759411786e-07, "loss": 0.3078, "step": 59840 }, { "epoch": 2.53, "grad_norm": 4.2676883677847455, "learning_rate": 7.207825638357702e-07, "loss": 0.344, "step": 59845 }, { "epoch": 2.53, "grad_norm": 4.540381203139076, "learning_rate": 7.201459111667108e-07, "loss": 0.3354, "step": 59850 }, { "epoch": 2.53, "grad_norm": 4.203504785503325, "learning_rate": 7.19509517972597e-07, "loss": 0.3445, "step": 59855 }, { "epoch": 2.53, "grad_norm": 3.8016333589226843, "learning_rate": 7.188733842920126e-07, "loss": 0.3269, "step": 59860 }, { "epoch": 2.53, "grad_norm": 3.769434168850976, "learning_rate": 7.182375101635241e-07, "loss": 0.3251, "step": 59865 }, { "epoch": 2.53, "grad_norm": 4.669461653292791, "learning_rate": 7.176018956256847e-07, "loss": 0.3286, "step": 59870 }, { "epoch": 2.53, "grad_norm": 4.020252337155452, "learning_rate": 7.169665407170268e-07, "loss": 0.3549, "step": 59875 }, { "epoch": 2.53, "grad_norm": 4.166632616432261, "learning_rate": 7.163314454760711e-07, "loss": 0.309, "step": 59880 }, { "epoch": 2.53, "grad_norm": 5.002179838041498, "learning_rate": 7.156966099413231e-07, "loss": 0.3126, "step": 59885 }, { "epoch": 2.53, "grad_norm": 3.9766786984249065, "learning_rate": 7.150620341512693e-07, "loss": 0.3307, "step": 59890 }, { "epoch": 2.53, "grad_norm": 4.108463688772603, "learning_rate": 7.14427718144382e-07, "loss": 0.3342, "step": 59895 }, { "epoch": 2.54, "grad_norm": 3.899877322435653, "learning_rate": 7.137936619591201e-07, "loss": 0.3166, "step": 59900 }, { "epoch": 2.54, "grad_norm": 4.124973819499981, "learning_rate": 7.131598656339217e-07, "loss": 0.3171, "step": 59905 }, { "epoch": 2.54, "grad_norm": 4.450194601809639, "learning_rate": 7.125263292072132e-07, "loss": 0.3271, "step": 59910 }, { "epoch": 2.54, "grad_norm": 3.754796725347709, "learning_rate": 7.118930527174056e-07, "loss": 0.3003, "step": 59915 }, { "epoch": 2.54, "grad_norm": 4.481548791741152, "learning_rate": 7.112600362028904e-07, "loss": 0.3151, "step": 59920 }, { "epoch": 2.54, "grad_norm": 5.153841670059852, "learning_rate": 7.106272797020469e-07, "loss": 0.3541, "step": 59925 }, { "epoch": 2.54, "grad_norm": 4.361985739266365, "learning_rate": 7.099947832532355e-07, "loss": 0.3356, "step": 59930 }, { "epoch": 2.54, "grad_norm": 3.5394792780967017, "learning_rate": 7.093625468948051e-07, "loss": 0.3181, "step": 59935 }, { "epoch": 2.54, "grad_norm": 3.8176454656652252, "learning_rate": 7.087305706650837e-07, "loss": 0.3101, "step": 59940 }, { "epoch": 2.54, "grad_norm": 6.190022450319625, "learning_rate": 7.080988546023876e-07, "loss": 0.3098, "step": 59945 }, { "epoch": 2.54, "grad_norm": 5.484092122461223, "learning_rate": 7.074673987450159e-07, "loss": 0.3036, "step": 59950 }, { "epoch": 2.54, "grad_norm": 4.0591760240222134, "learning_rate": 7.068362031312526e-07, "loss": 0.2872, "step": 59955 }, { "epoch": 2.54, "grad_norm": 3.7822418378374296, "learning_rate": 7.062052677993636e-07, "loss": 0.3316, "step": 59960 }, { "epoch": 2.54, "grad_norm": 4.430190769410036, "learning_rate": 7.055745927876012e-07, "loss": 0.3436, "step": 59965 }, { "epoch": 2.54, "grad_norm": 4.29267852134061, "learning_rate": 7.049441781342026e-07, "loss": 0.3219, "step": 59970 }, { "epoch": 2.54, "grad_norm": 3.7735698878523243, "learning_rate": 7.04314023877386e-07, "loss": 0.3183, "step": 59975 }, { "epoch": 2.54, "grad_norm": 3.7169112206559247, "learning_rate": 7.036841300553565e-07, "loss": 0.3088, "step": 59980 }, { "epoch": 2.54, "grad_norm": 4.381551916456472, "learning_rate": 7.030544967063047e-07, "loss": 0.334, "step": 59985 }, { "epoch": 2.54, "grad_norm": 3.5381773910357204, "learning_rate": 7.024251238683999e-07, "loss": 0.3288, "step": 59990 }, { "epoch": 2.54, "grad_norm": 4.044473688624339, "learning_rate": 7.017960115798012e-07, "loss": 0.2832, "step": 59995 }, { "epoch": 2.54, "grad_norm": 3.7722178718590356, "learning_rate": 7.011671598786496e-07, "loss": 0.3438, "step": 60000 }, { "epoch": 2.54, "grad_norm": 3.8971499955084368, "learning_rate": 7.005385688030713e-07, "loss": 0.3268, "step": 60005 }, { "epoch": 2.54, "grad_norm": 4.225998919441519, "learning_rate": 6.999102383911744e-07, "loss": 0.3316, "step": 60010 }, { "epoch": 2.54, "grad_norm": 3.9899434860295386, "learning_rate": 6.992821686810536e-07, "loss": 0.3079, "step": 60015 }, { "epoch": 2.54, "grad_norm": 3.964539794338357, "learning_rate": 6.986543597107875e-07, "loss": 0.2986, "step": 60020 }, { "epoch": 2.54, "grad_norm": 4.230036763899095, "learning_rate": 6.980268115184363e-07, "loss": 0.3292, "step": 60025 }, { "epoch": 2.54, "grad_norm": 3.9557055495543554, "learning_rate": 6.973995241420478e-07, "loss": 0.3359, "step": 60030 }, { "epoch": 2.54, "grad_norm": 3.8611554006504347, "learning_rate": 6.967724976196532e-07, "loss": 0.302, "step": 60035 }, { "epoch": 2.54, "grad_norm": 3.4551201427357174, "learning_rate": 6.961457319892656e-07, "loss": 0.3282, "step": 60040 }, { "epoch": 2.54, "grad_norm": 4.686132249473026, "learning_rate": 6.95519227288885e-07, "loss": 0.328, "step": 60045 }, { "epoch": 2.54, "grad_norm": 4.498405263329421, "learning_rate": 6.948929835564944e-07, "loss": 0.3385, "step": 60050 }, { "epoch": 2.54, "grad_norm": 4.323771158421139, "learning_rate": 6.942670008300622e-07, "loss": 0.3295, "step": 60055 }, { "epoch": 2.54, "grad_norm": 3.985424843898281, "learning_rate": 6.936412791475377e-07, "loss": 0.3095, "step": 60060 }, { "epoch": 2.54, "grad_norm": 3.917734481631152, "learning_rate": 6.930158185468578e-07, "loss": 0.3202, "step": 60065 }, { "epoch": 2.54, "grad_norm": 5.261622182783867, "learning_rate": 6.92390619065943e-07, "loss": 0.3162, "step": 60070 }, { "epoch": 2.54, "grad_norm": 3.7118307254708016, "learning_rate": 6.917656807426953e-07, "loss": 0.3317, "step": 60075 }, { "epoch": 2.54, "grad_norm": 3.819649872673034, "learning_rate": 6.911410036150041e-07, "loss": 0.3355, "step": 60080 }, { "epoch": 2.54, "grad_norm": 4.092356305981138, "learning_rate": 6.905165877207426e-07, "loss": 0.3375, "step": 60085 }, { "epoch": 2.54, "grad_norm": 4.200441489623247, "learning_rate": 6.898924330977669e-07, "loss": 0.337, "step": 60090 }, { "epoch": 2.54, "grad_norm": 3.7366249124796984, "learning_rate": 6.892685397839161e-07, "loss": 0.3423, "step": 60095 }, { "epoch": 2.54, "grad_norm": 3.697677287923659, "learning_rate": 6.886449078170154e-07, "loss": 0.3209, "step": 60100 }, { "epoch": 2.54, "grad_norm": 5.341719427270338, "learning_rate": 6.880215372348759e-07, "loss": 0.3078, "step": 60105 }, { "epoch": 2.54, "grad_norm": 4.178229741452269, "learning_rate": 6.873984280752882e-07, "loss": 0.339, "step": 60110 }, { "epoch": 2.54, "grad_norm": 4.1482601100558245, "learning_rate": 6.867755803760306e-07, "loss": 0.354, "step": 60115 }, { "epoch": 2.54, "grad_norm": 3.5744507712474616, "learning_rate": 6.861529941748646e-07, "loss": 0.3025, "step": 60120 }, { "epoch": 2.54, "grad_norm": 3.913959936649277, "learning_rate": 6.855306695095365e-07, "loss": 0.3315, "step": 60125 }, { "epoch": 2.54, "grad_norm": 3.754446097277773, "learning_rate": 6.849086064177745e-07, "loss": 0.3268, "step": 60130 }, { "epoch": 2.55, "grad_norm": 3.8403367197732172, "learning_rate": 6.84286804937293e-07, "loss": 0.2969, "step": 60135 }, { "epoch": 2.55, "grad_norm": 4.014147750276571, "learning_rate": 6.836652651057912e-07, "loss": 0.33, "step": 60140 }, { "epoch": 2.55, "grad_norm": 4.0223688230333075, "learning_rate": 6.83043986960949e-07, "loss": 0.3282, "step": 60145 }, { "epoch": 2.55, "grad_norm": 3.8905603103908595, "learning_rate": 6.824229705404339e-07, "loss": 0.3137, "step": 60150 }, { "epoch": 2.55, "grad_norm": 4.463797114164437, "learning_rate": 6.818022158818976e-07, "loss": 0.3246, "step": 60155 }, { "epoch": 2.55, "grad_norm": 3.700214886931591, "learning_rate": 6.811817230229723e-07, "loss": 0.3259, "step": 60160 }, { "epoch": 2.55, "grad_norm": 4.245794609574286, "learning_rate": 6.805614920012771e-07, "loss": 0.3266, "step": 60165 }, { "epoch": 2.55, "grad_norm": 4.5247086041512965, "learning_rate": 6.799415228544159e-07, "loss": 0.334, "step": 60170 }, { "epoch": 2.55, "grad_norm": 4.562493537765349, "learning_rate": 6.793218156199754e-07, "loss": 0.2912, "step": 60175 }, { "epoch": 2.55, "grad_norm": 3.93124551508199, "learning_rate": 6.787023703355261e-07, "loss": 0.3128, "step": 60180 }, { "epoch": 2.55, "grad_norm": 3.950683794164515, "learning_rate": 6.780831870386223e-07, "loss": 0.3315, "step": 60185 }, { "epoch": 2.55, "grad_norm": 3.9486667646716604, "learning_rate": 6.774642657668063e-07, "loss": 0.3162, "step": 60190 }, { "epoch": 2.55, "grad_norm": 3.7056213473545903, "learning_rate": 6.768456065575973e-07, "loss": 0.3262, "step": 60195 }, { "epoch": 2.55, "grad_norm": 3.558391670058067, "learning_rate": 6.762272094485056e-07, "loss": 0.3475, "step": 60200 }, { "epoch": 2.55, "grad_norm": 5.948417283770093, "learning_rate": 6.756090744770233e-07, "loss": 0.3098, "step": 60205 }, { "epoch": 2.55, "grad_norm": 4.125841875489312, "learning_rate": 6.749912016806231e-07, "loss": 0.3268, "step": 60210 }, { "epoch": 2.55, "grad_norm": 3.930962517424942, "learning_rate": 6.743735910967675e-07, "loss": 0.314, "step": 60215 }, { "epoch": 2.55, "grad_norm": 5.176779330992543, "learning_rate": 6.737562427629002e-07, "loss": 0.3311, "step": 60220 }, { "epoch": 2.55, "grad_norm": 4.205975105134719, "learning_rate": 6.731391567164475e-07, "loss": 0.3275, "step": 60225 }, { "epoch": 2.55, "grad_norm": 3.6712143285179266, "learning_rate": 6.725223329948221e-07, "loss": 0.3345, "step": 60230 }, { "epoch": 2.55, "grad_norm": 4.359628457642211, "learning_rate": 6.719057716354211e-07, "loss": 0.352, "step": 60235 }, { "epoch": 2.55, "grad_norm": 3.7099754110848906, "learning_rate": 6.712894726756253e-07, "loss": 0.3243, "step": 60240 }, { "epoch": 2.55, "grad_norm": 4.165017573973444, "learning_rate": 6.706734361527972e-07, "loss": 0.3329, "step": 60245 }, { "epoch": 2.55, "grad_norm": 3.8265183801083773, "learning_rate": 6.70057662104287e-07, "loss": 0.3213, "step": 60250 }, { "epoch": 2.55, "grad_norm": 4.133524904188468, "learning_rate": 6.694421505674259e-07, "loss": 0.3117, "step": 60255 }, { "epoch": 2.55, "grad_norm": 4.001863664423481, "learning_rate": 6.688269015795318e-07, "loss": 0.3244, "step": 60260 }, { "epoch": 2.55, "grad_norm": 4.103951122714365, "learning_rate": 6.682119151779037e-07, "loss": 0.3277, "step": 60265 }, { "epoch": 2.55, "grad_norm": 4.3006487258510395, "learning_rate": 6.675971913998274e-07, "loss": 0.3192, "step": 60270 }, { "epoch": 2.55, "grad_norm": 5.069164023364594, "learning_rate": 6.669827302825732e-07, "loss": 0.3284, "step": 60275 }, { "epoch": 2.55, "grad_norm": 4.106503874231339, "learning_rate": 6.663685318633917e-07, "loss": 0.2937, "step": 60280 }, { "epoch": 2.55, "grad_norm": 4.4129892148103975, "learning_rate": 6.657545961795209e-07, "loss": 0.322, "step": 60285 }, { "epoch": 2.55, "grad_norm": 3.835251287329939, "learning_rate": 6.651409232681832e-07, "loss": 0.3465, "step": 60290 }, { "epoch": 2.55, "grad_norm": 4.719365725411388, "learning_rate": 6.645275131665812e-07, "loss": 0.3107, "step": 60295 }, { "epoch": 2.55, "grad_norm": 4.291617929473293, "learning_rate": 6.639143659119057e-07, "loss": 0.3175, "step": 60300 }, { "epoch": 2.55, "grad_norm": 4.181532040920078, "learning_rate": 6.633014815413297e-07, "loss": 0.3311, "step": 60305 }, { "epoch": 2.55, "grad_norm": 3.817873502640602, "learning_rate": 6.626888600920118e-07, "loss": 0.3037, "step": 60310 }, { "epoch": 2.55, "grad_norm": 4.0174940148699445, "learning_rate": 6.620765016010916e-07, "loss": 0.3261, "step": 60315 }, { "epoch": 2.55, "grad_norm": 4.466116156338231, "learning_rate": 6.614644061056947e-07, "loss": 0.3427, "step": 60320 }, { "epoch": 2.55, "grad_norm": 4.102721518392326, "learning_rate": 6.608525736429327e-07, "loss": 0.3137, "step": 60325 }, { "epoch": 2.55, "grad_norm": 4.212619003126551, "learning_rate": 6.602410042498969e-07, "loss": 0.3274, "step": 60330 }, { "epoch": 2.55, "grad_norm": 4.404479645135486, "learning_rate": 6.596296979636657e-07, "loss": 0.3209, "step": 60335 }, { "epoch": 2.55, "grad_norm": 3.9788117396100326, "learning_rate": 6.590186548213024e-07, "loss": 0.3384, "step": 60340 }, { "epoch": 2.55, "grad_norm": 4.192032664025439, "learning_rate": 6.584078748598494e-07, "loss": 0.3294, "step": 60345 }, { "epoch": 2.55, "grad_norm": 3.8187910469163495, "learning_rate": 6.577973581163388e-07, "loss": 0.317, "step": 60350 }, { "epoch": 2.55, "grad_norm": 4.799317871144328, "learning_rate": 6.571871046277839e-07, "loss": 0.312, "step": 60355 }, { "epoch": 2.55, "grad_norm": 4.389165936300695, "learning_rate": 6.565771144311839e-07, "loss": 0.3158, "step": 60360 }, { "epoch": 2.55, "grad_norm": 3.9038180814624943, "learning_rate": 6.559673875635181e-07, "loss": 0.3445, "step": 60365 }, { "epoch": 2.55, "grad_norm": 3.9438443292973284, "learning_rate": 6.553579240617542e-07, "loss": 0.3311, "step": 60370 }, { "epoch": 2.56, "grad_norm": 3.866632270946839, "learning_rate": 6.547487239628425e-07, "loss": 0.3479, "step": 60375 }, { "epoch": 2.56, "grad_norm": 4.571714511295955, "learning_rate": 6.541397873037159e-07, "loss": 0.3459, "step": 60380 }, { "epoch": 2.56, "grad_norm": 4.365948555736455, "learning_rate": 6.535311141212924e-07, "loss": 0.3493, "step": 60385 }, { "epoch": 2.56, "grad_norm": 4.4014852589521976, "learning_rate": 6.529227044524755e-07, "loss": 0.3168, "step": 60390 }, { "epoch": 2.56, "grad_norm": 3.8956098317614547, "learning_rate": 6.52314558334149e-07, "loss": 0.3168, "step": 60395 }, { "epoch": 2.56, "grad_norm": 4.285001775537623, "learning_rate": 6.517066758031848e-07, "loss": 0.2866, "step": 60400 }, { "epoch": 2.56, "grad_norm": 3.948400570147473, "learning_rate": 6.510990568964365e-07, "loss": 0.3124, "step": 60405 }, { "epoch": 2.56, "grad_norm": 4.69799336869235, "learning_rate": 6.504917016507439e-07, "loss": 0.3291, "step": 60410 }, { "epoch": 2.56, "grad_norm": 3.823576171610237, "learning_rate": 6.49884610102926e-07, "loss": 0.3275, "step": 60415 }, { "epoch": 2.56, "grad_norm": 4.18842494997157, "learning_rate": 6.4927778228979e-07, "loss": 0.3262, "step": 60420 }, { "epoch": 2.56, "grad_norm": 3.564782326829167, "learning_rate": 6.48671218248127e-07, "loss": 0.3576, "step": 60425 }, { "epoch": 2.56, "grad_norm": 3.835360849719144, "learning_rate": 6.480649180147114e-07, "loss": 0.3103, "step": 60430 }, { "epoch": 2.56, "grad_norm": 3.884120624893983, "learning_rate": 6.474588816263e-07, "loss": 0.3305, "step": 60435 }, { "epoch": 2.56, "grad_norm": 4.367480096268666, "learning_rate": 6.468531091196362e-07, "loss": 0.3151, "step": 60440 }, { "epoch": 2.56, "grad_norm": 4.441609670349472, "learning_rate": 6.462476005314461e-07, "loss": 0.3092, "step": 60445 }, { "epoch": 2.56, "grad_norm": 3.6918592951755786, "learning_rate": 6.456423558984387e-07, "loss": 0.3177, "step": 60450 }, { "epoch": 2.56, "grad_norm": 4.252760210918517, "learning_rate": 6.450373752573091e-07, "loss": 0.3366, "step": 60455 }, { "epoch": 2.56, "grad_norm": 5.002615904312581, "learning_rate": 6.444326586447369e-07, "loss": 0.3303, "step": 60460 }, { "epoch": 2.56, "grad_norm": 4.951713687011071, "learning_rate": 6.438282060973816e-07, "loss": 0.3438, "step": 60465 }, { "epoch": 2.56, "grad_norm": 4.045528159872285, "learning_rate": 6.432240176518906e-07, "loss": 0.3103, "step": 60470 }, { "epoch": 2.56, "grad_norm": 3.911730912075422, "learning_rate": 6.42620093344894e-07, "loss": 0.3399, "step": 60475 }, { "epoch": 2.56, "grad_norm": 4.246180504770331, "learning_rate": 6.420164332130069e-07, "loss": 0.3217, "step": 60480 }, { "epoch": 2.56, "grad_norm": 4.059635217073218, "learning_rate": 6.414130372928263e-07, "loss": 0.3253, "step": 60485 }, { "epoch": 2.56, "grad_norm": 3.8893273338119156, "learning_rate": 6.408099056209338e-07, "loss": 0.336, "step": 60490 }, { "epoch": 2.56, "grad_norm": 4.3870252524615925, "learning_rate": 6.402070382338976e-07, "loss": 0.3187, "step": 60495 }, { "epoch": 2.56, "grad_norm": 4.078630820728053, "learning_rate": 6.396044351682656e-07, "loss": 0.3179, "step": 60500 }, { "epoch": 2.56, "grad_norm": 4.165571015583123, "learning_rate": 6.390020964605725e-07, "loss": 0.3171, "step": 60505 }, { "epoch": 2.56, "grad_norm": 4.4324970650196125, "learning_rate": 6.38400022147338e-07, "loss": 0.3259, "step": 60510 }, { "epoch": 2.56, "grad_norm": 4.219860905718792, "learning_rate": 6.377982122650611e-07, "loss": 0.3285, "step": 60515 }, { "epoch": 2.56, "grad_norm": 3.8666632629621827, "learning_rate": 6.3719666685023e-07, "loss": 0.3179, "step": 60520 }, { "epoch": 2.56, "grad_norm": 4.281638383729655, "learning_rate": 6.365953859393143e-07, "loss": 0.3438, "step": 60525 }, { "epoch": 2.56, "grad_norm": 3.813085313500639, "learning_rate": 6.359943695687665e-07, "loss": 0.3076, "step": 60530 }, { "epoch": 2.56, "grad_norm": 4.352900150053343, "learning_rate": 6.353936177750258e-07, "loss": 0.3131, "step": 60535 }, { "epoch": 2.56, "grad_norm": 4.055747717232485, "learning_rate": 6.347931305945138e-07, "loss": 0.3121, "step": 60540 }, { "epoch": 2.56, "grad_norm": 4.010891312602815, "learning_rate": 6.341929080636366e-07, "loss": 0.3227, "step": 60545 }, { "epoch": 2.56, "grad_norm": 4.165844590153781, "learning_rate": 6.335929502187832e-07, "loss": 0.2912, "step": 60550 }, { "epoch": 2.56, "grad_norm": 3.9258213587435296, "learning_rate": 6.32993257096327e-07, "loss": 0.3155, "step": 60555 }, { "epoch": 2.56, "grad_norm": 3.7237915316879797, "learning_rate": 6.323938287326275e-07, "loss": 0.3274, "step": 60560 }, { "epoch": 2.56, "grad_norm": 4.237922448911811, "learning_rate": 6.317946651640239e-07, "loss": 0.3225, "step": 60565 }, { "epoch": 2.56, "grad_norm": 3.5828062616197167, "learning_rate": 6.311957664268437e-07, "loss": 0.3252, "step": 60570 }, { "epoch": 2.56, "grad_norm": 3.9295260623137436, "learning_rate": 6.30597132557394e-07, "loss": 0.3348, "step": 60575 }, { "epoch": 2.56, "grad_norm": 4.357972980430638, "learning_rate": 6.299987635919713e-07, "loss": 0.3287, "step": 60580 }, { "epoch": 2.56, "grad_norm": 3.6546876633669587, "learning_rate": 6.2940065956685e-07, "loss": 0.3234, "step": 60585 }, { "epoch": 2.56, "grad_norm": 3.733188457374905, "learning_rate": 6.288028205182928e-07, "loss": 0.332, "step": 60590 }, { "epoch": 2.56, "grad_norm": 4.0485185225083775, "learning_rate": 6.282052464825461e-07, "loss": 0.3012, "step": 60595 }, { "epoch": 2.56, "grad_norm": 4.410965373173383, "learning_rate": 6.276079374958361e-07, "loss": 0.3165, "step": 60600 }, { "epoch": 2.56, "grad_norm": 4.354633261072752, "learning_rate": 6.270108935943781e-07, "loss": 0.3163, "step": 60605 }, { "epoch": 2.57, "grad_norm": 3.741932092281292, "learning_rate": 6.264141148143682e-07, "loss": 0.3221, "step": 60610 }, { "epoch": 2.57, "grad_norm": 4.455577958032577, "learning_rate": 6.258176011919886e-07, "loss": 0.3111, "step": 60615 }, { "epoch": 2.57, "grad_norm": 3.934174313430105, "learning_rate": 6.252213527634026e-07, "loss": 0.3213, "step": 60620 }, { "epoch": 2.57, "grad_norm": 3.9296083249142515, "learning_rate": 6.246253695647597e-07, "loss": 0.3479, "step": 60625 }, { "epoch": 2.57, "grad_norm": 6.046134828368785, "learning_rate": 6.240296516321937e-07, "loss": 0.3295, "step": 60630 }, { "epoch": 2.57, "grad_norm": 4.22772554274616, "learning_rate": 6.234341990018184e-07, "loss": 0.3084, "step": 60635 }, { "epoch": 2.57, "grad_norm": 3.6969121250247663, "learning_rate": 6.228390117097366e-07, "loss": 0.3001, "step": 60640 }, { "epoch": 2.57, "grad_norm": 4.476640043833009, "learning_rate": 6.222440897920329e-07, "loss": 0.3175, "step": 60645 }, { "epoch": 2.57, "grad_norm": 3.711755542255426, "learning_rate": 6.216494332847739e-07, "loss": 0.2996, "step": 60650 }, { "epoch": 2.57, "grad_norm": 3.850102395876614, "learning_rate": 6.21055042224013e-07, "loss": 0.315, "step": 60655 }, { "epoch": 2.57, "grad_norm": 3.7612764079786003, "learning_rate": 6.204609166457864e-07, "loss": 0.328, "step": 60660 }, { "epoch": 2.57, "grad_norm": 5.0275154471976835, "learning_rate": 6.198670565861148e-07, "loss": 0.3201, "step": 60665 }, { "epoch": 2.57, "grad_norm": 4.7683288682066465, "learning_rate": 6.192734620810009e-07, "loss": 0.3475, "step": 60670 }, { "epoch": 2.57, "grad_norm": 4.140457978702257, "learning_rate": 6.186801331664327e-07, "loss": 0.3212, "step": 60675 }, { "epoch": 2.57, "grad_norm": 4.090453073693936, "learning_rate": 6.180870698783836e-07, "loss": 0.341, "step": 60680 }, { "epoch": 2.57, "grad_norm": 3.9042863321041086, "learning_rate": 6.174942722528071e-07, "loss": 0.3379, "step": 60685 }, { "epoch": 2.57, "grad_norm": 4.7792951036320535, "learning_rate": 6.169017403256439e-07, "loss": 0.3221, "step": 60690 }, { "epoch": 2.57, "grad_norm": 3.941908723011603, "learning_rate": 6.163094741328179e-07, "loss": 0.3131, "step": 60695 }, { "epoch": 2.57, "grad_norm": 4.64483322839673, "learning_rate": 6.157174737102356e-07, "loss": 0.3187, "step": 60700 }, { "epoch": 2.57, "grad_norm": 5.310391032095249, "learning_rate": 6.151257390937882e-07, "loss": 0.3206, "step": 60705 }, { "epoch": 2.57, "grad_norm": 3.8853871469043915, "learning_rate": 6.145342703193507e-07, "loss": 0.3115, "step": 60710 }, { "epoch": 2.57, "grad_norm": 3.950711456034995, "learning_rate": 6.139430674227842e-07, "loss": 0.3108, "step": 60715 }, { "epoch": 2.57, "grad_norm": 3.3138678705462388, "learning_rate": 6.133521304399287e-07, "loss": 0.3127, "step": 60720 }, { "epoch": 2.57, "grad_norm": 4.442805937571479, "learning_rate": 6.127614594066123e-07, "loss": 0.3181, "step": 60725 }, { "epoch": 2.57, "grad_norm": 4.26935102954461, "learning_rate": 6.121710543586467e-07, "loss": 0.3206, "step": 60730 }, { "epoch": 2.57, "grad_norm": 4.016064853362011, "learning_rate": 6.115809153318253e-07, "loss": 0.3367, "step": 60735 }, { "epoch": 2.57, "grad_norm": 4.428061863475383, "learning_rate": 6.109910423619253e-07, "loss": 0.3186, "step": 60740 }, { "epoch": 2.57, "grad_norm": 4.70480221007726, "learning_rate": 6.104014354847104e-07, "loss": 0.3555, "step": 60745 }, { "epoch": 2.57, "grad_norm": 4.213475491497634, "learning_rate": 6.098120947359271e-07, "loss": 0.328, "step": 60750 }, { "epoch": 2.57, "grad_norm": 4.7294433310255055, "learning_rate": 6.092230201513039e-07, "loss": 0.3101, "step": 60755 }, { "epoch": 2.57, "grad_norm": 3.584011838725251, "learning_rate": 6.086342117665556e-07, "loss": 0.2997, "step": 60760 }, { "epoch": 2.57, "grad_norm": 4.464617787179817, "learning_rate": 6.080456696173808e-07, "loss": 0.3495, "step": 60765 }, { "epoch": 2.57, "grad_norm": 4.772956784899947, "learning_rate": 6.074573937394596e-07, "loss": 0.3072, "step": 60770 }, { "epoch": 2.57, "grad_norm": 4.500191507016388, "learning_rate": 6.068693841684576e-07, "loss": 0.3147, "step": 60775 }, { "epoch": 2.57, "grad_norm": 3.8066797977423907, "learning_rate": 6.062816409400241e-07, "loss": 0.3245, "step": 60780 }, { "epoch": 2.57, "grad_norm": 4.474648180608742, "learning_rate": 6.056941640897939e-07, "loss": 0.3099, "step": 60785 }, { "epoch": 2.57, "grad_norm": 4.113143690635657, "learning_rate": 6.051069536533816e-07, "loss": 0.3279, "step": 60790 }, { "epoch": 2.57, "grad_norm": 3.7988892753298784, "learning_rate": 6.045200096663895e-07, "loss": 0.3208, "step": 60795 }, { "epoch": 2.57, "grad_norm": 3.447266577623724, "learning_rate": 6.039333321644026e-07, "loss": 0.3162, "step": 60800 }, { "epoch": 2.57, "grad_norm": 3.6227665853562665, "learning_rate": 6.033469211829873e-07, "loss": 0.3274, "step": 60805 }, { "epoch": 2.57, "grad_norm": 4.127141914998595, "learning_rate": 6.027607767576982e-07, "loss": 0.3228, "step": 60810 }, { "epoch": 2.57, "grad_norm": 3.7711423883518465, "learning_rate": 6.021748989240712e-07, "loss": 0.2997, "step": 60815 }, { "epoch": 2.57, "grad_norm": 3.899470346777691, "learning_rate": 6.015892877176249e-07, "loss": 0.3158, "step": 60820 }, { "epoch": 2.57, "grad_norm": 3.865217404064876, "learning_rate": 6.01003943173864e-07, "loss": 0.2958, "step": 60825 }, { "epoch": 2.57, "grad_norm": 4.0068408926721775, "learning_rate": 6.004188653282777e-07, "loss": 0.3161, "step": 60830 }, { "epoch": 2.57, "grad_norm": 3.8812480211843075, "learning_rate": 5.99834054216335e-07, "loss": 0.3208, "step": 60835 }, { "epoch": 2.57, "grad_norm": 3.652016014701967, "learning_rate": 5.992495098734929e-07, "loss": 0.314, "step": 60840 }, { "epoch": 2.58, "grad_norm": 3.9303718850930034, "learning_rate": 5.9866523233519e-07, "loss": 0.339, "step": 60845 }, { "epoch": 2.58, "grad_norm": 4.0378803941598616, "learning_rate": 5.980812216368503e-07, "loss": 0.3184, "step": 60850 }, { "epoch": 2.58, "grad_norm": 4.442180802541784, "learning_rate": 5.974974778138787e-07, "loss": 0.3321, "step": 60855 }, { "epoch": 2.58, "grad_norm": 3.894805942569527, "learning_rate": 5.969140009016672e-07, "loss": 0.3189, "step": 60860 }, { "epoch": 2.58, "grad_norm": 3.596451518289577, "learning_rate": 5.963307909355914e-07, "loss": 0.3171, "step": 60865 }, { "epoch": 2.58, "grad_norm": 4.267617797822112, "learning_rate": 5.957478479510071e-07, "loss": 0.2916, "step": 60870 }, { "epoch": 2.58, "grad_norm": 4.033338327596204, "learning_rate": 5.951651719832575e-07, "loss": 0.3218, "step": 60875 }, { "epoch": 2.58, "grad_norm": 4.035040744182014, "learning_rate": 5.945827630676698e-07, "loss": 0.2873, "step": 60880 }, { "epoch": 2.58, "grad_norm": 3.8642122514784707, "learning_rate": 5.940006212395511e-07, "loss": 0.3119, "step": 60885 }, { "epoch": 2.58, "grad_norm": 3.9148160156933494, "learning_rate": 5.934187465341972e-07, "loss": 0.3213, "step": 60890 }, { "epoch": 2.58, "grad_norm": 3.886421574667727, "learning_rate": 5.928371389868853e-07, "loss": 0.3073, "step": 60895 }, { "epoch": 2.58, "grad_norm": 3.846833879232973, "learning_rate": 5.922557986328759e-07, "loss": 0.3076, "step": 60900 }, { "epoch": 2.58, "grad_norm": 3.8847042948503945, "learning_rate": 5.916747255074129e-07, "loss": 0.3366, "step": 60905 }, { "epoch": 2.58, "grad_norm": 4.039904694906965, "learning_rate": 5.910939196457266e-07, "loss": 0.3227, "step": 60910 }, { "epoch": 2.58, "grad_norm": 3.9748669288165823, "learning_rate": 5.905133810830288e-07, "loss": 0.3139, "step": 60915 }, { "epoch": 2.58, "grad_norm": 4.369120696143879, "learning_rate": 5.899331098545174e-07, "loss": 0.3174, "step": 60920 }, { "epoch": 2.58, "grad_norm": 3.618535040952674, "learning_rate": 5.8935310599537e-07, "loss": 0.3271, "step": 60925 }, { "epoch": 2.58, "grad_norm": 4.663677158971989, "learning_rate": 5.887733695407516e-07, "loss": 0.3272, "step": 60930 }, { "epoch": 2.58, "grad_norm": 4.52064709775734, "learning_rate": 5.881939005258114e-07, "loss": 0.3122, "step": 60935 }, { "epoch": 2.58, "grad_norm": 4.13976370701866, "learning_rate": 5.876146989856784e-07, "loss": 0.3281, "step": 60940 }, { "epoch": 2.58, "grad_norm": 3.5769002043863187, "learning_rate": 5.870357649554692e-07, "loss": 0.275, "step": 60945 }, { "epoch": 2.58, "grad_norm": 4.740256851770981, "learning_rate": 5.864570984702838e-07, "loss": 0.3206, "step": 60950 }, { "epoch": 2.58, "grad_norm": 3.9770483037016437, "learning_rate": 5.858786995652033e-07, "loss": 0.3071, "step": 60955 }, { "epoch": 2.58, "grad_norm": 3.7273865980644203, "learning_rate": 5.853005682752949e-07, "loss": 0.2977, "step": 60960 }, { "epoch": 2.58, "grad_norm": 3.7275253649024123, "learning_rate": 5.847227046356085e-07, "loss": 0.3604, "step": 60965 }, { "epoch": 2.58, "grad_norm": 3.6034041413820472, "learning_rate": 5.841451086811806e-07, "loss": 0.3144, "step": 60970 }, { "epoch": 2.58, "grad_norm": 4.041026484457373, "learning_rate": 5.835677804470263e-07, "loss": 0.3005, "step": 60975 }, { "epoch": 2.58, "grad_norm": 3.738534395948023, "learning_rate": 5.829907199681484e-07, "loss": 0.3207, "step": 60980 }, { "epoch": 2.58, "grad_norm": 4.568046800380762, "learning_rate": 5.824139272795332e-07, "loss": 0.3177, "step": 60985 }, { "epoch": 2.58, "grad_norm": 3.7897766213372606, "learning_rate": 5.818374024161483e-07, "loss": 0.3435, "step": 60990 }, { "epoch": 2.58, "grad_norm": 4.639531932957799, "learning_rate": 5.812611454129474e-07, "loss": 0.3421, "step": 60995 }, { "epoch": 2.58, "grad_norm": 4.028132244094959, "learning_rate": 5.806851563048688e-07, "loss": 0.3338, "step": 61000 }, { "epoch": 2.58, "grad_norm": 3.895145479158649, "learning_rate": 5.801094351268305e-07, "loss": 0.28, "step": 61005 }, { "epoch": 2.58, "grad_norm": 3.803586040843032, "learning_rate": 5.795339819137375e-07, "loss": 0.3066, "step": 61010 }, { "epoch": 2.58, "grad_norm": 3.840450104568355, "learning_rate": 5.789587967004789e-07, "loss": 0.3223, "step": 61015 }, { "epoch": 2.58, "grad_norm": 3.701161083602059, "learning_rate": 5.783838795219266e-07, "loss": 0.2987, "step": 61020 }, { "epoch": 2.58, "grad_norm": 4.012175348374319, "learning_rate": 5.778092304129346e-07, "loss": 0.3263, "step": 61025 }, { "epoch": 2.58, "grad_norm": 3.87569263241528, "learning_rate": 5.772348494083424e-07, "loss": 0.3188, "step": 61030 }, { "epoch": 2.58, "grad_norm": 4.11302596015302, "learning_rate": 5.766607365429749e-07, "loss": 0.3448, "step": 61035 }, { "epoch": 2.58, "grad_norm": 3.9631778006363074, "learning_rate": 5.760868918516372e-07, "loss": 0.3247, "step": 61040 }, { "epoch": 2.58, "grad_norm": 4.127014182169294, "learning_rate": 5.755133153691194e-07, "loss": 0.3345, "step": 61045 }, { "epoch": 2.58, "grad_norm": 4.150535034538587, "learning_rate": 5.749400071301975e-07, "loss": 0.3033, "step": 61050 }, { "epoch": 2.58, "grad_norm": 4.028437573369199, "learning_rate": 5.743669671696279e-07, "loss": 0.3393, "step": 61055 }, { "epoch": 2.58, "grad_norm": 4.039452774202473, "learning_rate": 5.737941955221538e-07, "loss": 0.3255, "step": 61060 }, { "epoch": 2.58, "grad_norm": 3.7849163705972186, "learning_rate": 5.732216922224987e-07, "loss": 0.3053, "step": 61065 }, { "epoch": 2.58, "grad_norm": 4.222992448549588, "learning_rate": 5.726494573053742e-07, "loss": 0.3319, "step": 61070 }, { "epoch": 2.58, "grad_norm": 3.9712264275483493, "learning_rate": 5.72077490805471e-07, "loss": 0.3053, "step": 61075 }, { "epoch": 2.59, "grad_norm": 3.9590354745895087, "learning_rate": 5.715057927574663e-07, "loss": 0.325, "step": 61080 }, { "epoch": 2.59, "grad_norm": 4.070422239819677, "learning_rate": 5.709343631960218e-07, "loss": 0.3587, "step": 61085 }, { "epoch": 2.59, "grad_norm": 3.620214439414734, "learning_rate": 5.703632021557797e-07, "loss": 0.3028, "step": 61090 }, { "epoch": 2.59, "grad_norm": 3.7393405551422014, "learning_rate": 5.697923096713687e-07, "loss": 0.346, "step": 61095 }, { "epoch": 2.59, "grad_norm": 4.0526063955932825, "learning_rate": 5.692216857774008e-07, "loss": 0.3114, "step": 61100 }, { "epoch": 2.59, "grad_norm": 4.360772158423643, "learning_rate": 5.686513305084712e-07, "loss": 0.3196, "step": 61105 }, { "epoch": 2.59, "grad_norm": 3.842795953319503, "learning_rate": 5.680812438991578e-07, "loss": 0.3238, "step": 61110 }, { "epoch": 2.59, "grad_norm": 3.547614277255096, "learning_rate": 5.675114259840236e-07, "loss": 0.3194, "step": 61115 }, { "epoch": 2.59, "grad_norm": 3.8166903139690835, "learning_rate": 5.66941876797617e-07, "loss": 0.3417, "step": 61120 }, { "epoch": 2.59, "grad_norm": 4.235668872700056, "learning_rate": 5.663725963744648e-07, "loss": 0.318, "step": 61125 }, { "epoch": 2.59, "grad_norm": 4.087943928064198, "learning_rate": 5.658035847490828e-07, "loss": 0.3224, "step": 61130 }, { "epoch": 2.59, "grad_norm": 5.822427054717882, "learning_rate": 5.652348419559689e-07, "loss": 0.329, "step": 61135 }, { "epoch": 2.59, "grad_norm": 3.8128348472130504, "learning_rate": 5.646663680296022e-07, "loss": 0.328, "step": 61140 }, { "epoch": 2.59, "grad_norm": 3.6042068546496724, "learning_rate": 5.640981630044495e-07, "loss": 0.3261, "step": 61145 }, { "epoch": 2.59, "grad_norm": 3.6707972949487186, "learning_rate": 5.635302269149584e-07, "loss": 0.3167, "step": 61150 }, { "epoch": 2.59, "grad_norm": 4.930946828205894, "learning_rate": 5.629625597955623e-07, "loss": 0.3256, "step": 61155 }, { "epoch": 2.59, "grad_norm": 4.183062901373014, "learning_rate": 5.623951616806761e-07, "loss": 0.3083, "step": 61160 }, { "epoch": 2.59, "grad_norm": 4.1168953992130115, "learning_rate": 5.618280326046999e-07, "loss": 0.3468, "step": 61165 }, { "epoch": 2.59, "grad_norm": 4.023259171251964, "learning_rate": 5.612611726020173e-07, "loss": 0.3206, "step": 61170 }, { "epoch": 2.59, "grad_norm": 3.5965053580108663, "learning_rate": 5.606945817069947e-07, "loss": 0.3114, "step": 61175 }, { "epoch": 2.59, "grad_norm": 3.490143774222751, "learning_rate": 5.601282599539831e-07, "loss": 0.3104, "step": 61180 }, { "epoch": 2.59, "grad_norm": 3.8279880389206884, "learning_rate": 5.595622073773177e-07, "loss": 0.3237, "step": 61185 }, { "epoch": 2.59, "grad_norm": 3.9198491567649505, "learning_rate": 5.589964240113155e-07, "loss": 0.3125, "step": 61190 }, { "epoch": 2.59, "grad_norm": 4.0852472350888425, "learning_rate": 5.584309098902784e-07, "loss": 0.3136, "step": 61195 }, { "epoch": 2.59, "grad_norm": 4.032288826492062, "learning_rate": 5.578656650484921e-07, "loss": 0.3324, "step": 61200 }, { "epoch": 2.59, "grad_norm": 4.381239856800773, "learning_rate": 5.573006895202266e-07, "loss": 0.3302, "step": 61205 }, { "epoch": 2.59, "grad_norm": 3.817670769410906, "learning_rate": 5.567359833397335e-07, "loss": 0.3391, "step": 61210 }, { "epoch": 2.59, "grad_norm": 3.9761984129207595, "learning_rate": 5.561715465412493e-07, "loss": 0.3247, "step": 61215 }, { "epoch": 2.59, "grad_norm": 4.157059663593086, "learning_rate": 5.556073791589955e-07, "loss": 0.3281, "step": 61220 }, { "epoch": 2.59, "grad_norm": 3.689968599966529, "learning_rate": 5.550434812271749e-07, "loss": 0.3344, "step": 61225 }, { "epoch": 2.59, "grad_norm": 4.435767451859413, "learning_rate": 5.544798527799739e-07, "loss": 0.3226, "step": 61230 }, { "epoch": 2.59, "grad_norm": 4.088049484016113, "learning_rate": 5.539164938515646e-07, "loss": 0.3157, "step": 61235 }, { "epoch": 2.59, "grad_norm": 3.9985024859747322, "learning_rate": 5.533534044761035e-07, "loss": 0.3096, "step": 61240 }, { "epoch": 2.59, "grad_norm": 3.9110266732988332, "learning_rate": 5.527905846877257e-07, "loss": 0.3188, "step": 61245 }, { "epoch": 2.59, "grad_norm": 3.5825818521903154, "learning_rate": 5.522280345205556e-07, "loss": 0.3104, "step": 61250 }, { "epoch": 2.59, "grad_norm": 4.104909381589157, "learning_rate": 5.516657540086995e-07, "loss": 0.3325, "step": 61255 }, { "epoch": 2.59, "grad_norm": 4.0815074274619505, "learning_rate": 5.511037431862443e-07, "loss": 0.3336, "step": 61260 }, { "epoch": 2.59, "grad_norm": 4.020535704740354, "learning_rate": 5.505420020872648e-07, "loss": 0.3184, "step": 61265 }, { "epoch": 2.59, "grad_norm": 4.162962251168534, "learning_rate": 5.499805307458173e-07, "loss": 0.3078, "step": 61270 }, { "epoch": 2.59, "grad_norm": 3.794050945093777, "learning_rate": 5.494193291959432e-07, "loss": 0.3067, "step": 61275 }, { "epoch": 2.59, "grad_norm": 4.455721477503858, "learning_rate": 5.488583974716644e-07, "loss": 0.3161, "step": 61280 }, { "epoch": 2.59, "grad_norm": 4.33710986213934, "learning_rate": 5.482977356069902e-07, "loss": 0.3105, "step": 61285 }, { "epoch": 2.59, "grad_norm": 4.4768296477460545, "learning_rate": 5.477373436359118e-07, "loss": 0.3034, "step": 61290 }, { "epoch": 2.59, "grad_norm": 5.5509595221800785, "learning_rate": 5.471772215924032e-07, "loss": 0.3076, "step": 61295 }, { "epoch": 2.59, "grad_norm": 4.145174398713696, "learning_rate": 5.466173695104232e-07, "loss": 0.3173, "step": 61300 }, { "epoch": 2.59, "grad_norm": 4.839010359210999, "learning_rate": 5.460577874239153e-07, "loss": 0.3008, "step": 61305 }, { "epoch": 2.59, "grad_norm": 4.3915965453445915, "learning_rate": 5.45498475366803e-07, "loss": 0.3067, "step": 61310 }, { "epoch": 2.59, "grad_norm": 3.747774104315277, "learning_rate": 5.449394333729974e-07, "loss": 0.3275, "step": 61315 }, { "epoch": 2.6, "grad_norm": 4.583037674297164, "learning_rate": 5.443806614763908e-07, "loss": 0.3529, "step": 61320 }, { "epoch": 2.6, "grad_norm": 4.143469671257063, "learning_rate": 5.438221597108612e-07, "loss": 0.3181, "step": 61325 }, { "epoch": 2.6, "grad_norm": 3.888963159957272, "learning_rate": 5.432639281102669e-07, "loss": 0.3103, "step": 61330 }, { "epoch": 2.6, "grad_norm": 4.032944697459095, "learning_rate": 5.427059667084533e-07, "loss": 0.298, "step": 61335 }, { "epoch": 2.6, "grad_norm": 4.917368241201488, "learning_rate": 5.421482755392482e-07, "loss": 0.3171, "step": 61340 }, { "epoch": 2.6, "grad_norm": 4.219228617506832, "learning_rate": 5.415908546364612e-07, "loss": 0.3195, "step": 61345 }, { "epoch": 2.6, "grad_norm": 3.730917153037885, "learning_rate": 5.410337040338882e-07, "loss": 0.316, "step": 61350 }, { "epoch": 2.6, "grad_norm": 3.70030821660783, "learning_rate": 5.40476823765308e-07, "loss": 0.3196, "step": 61355 }, { "epoch": 2.6, "grad_norm": 3.961794813221685, "learning_rate": 5.399202138644816e-07, "loss": 0.3134, "step": 61360 }, { "epoch": 2.6, "grad_norm": 3.9567151687211712, "learning_rate": 5.393638743651547e-07, "loss": 0.2996, "step": 61365 }, { "epoch": 2.6, "grad_norm": 3.7629407546595504, "learning_rate": 5.388078053010576e-07, "loss": 0.3001, "step": 61370 }, { "epoch": 2.6, "grad_norm": 4.172999905615919, "learning_rate": 5.382520067059017e-07, "loss": 0.3212, "step": 61375 }, { "epoch": 2.6, "grad_norm": 3.8332256006061374, "learning_rate": 5.376964786133854e-07, "loss": 0.3279, "step": 61380 }, { "epoch": 2.6, "grad_norm": 4.065914314714507, "learning_rate": 5.371412210571858e-07, "loss": 0.2902, "step": 61385 }, { "epoch": 2.6, "grad_norm": 4.871140565630022, "learning_rate": 5.365862340709698e-07, "loss": 0.3377, "step": 61390 }, { "epoch": 2.6, "grad_norm": 4.214846082012037, "learning_rate": 5.360315176883818e-07, "loss": 0.3163, "step": 61395 }, { "epoch": 2.6, "grad_norm": 3.610035993270218, "learning_rate": 5.354770719430535e-07, "loss": 0.3291, "step": 61400 }, { "epoch": 2.6, "grad_norm": 4.056680274821778, "learning_rate": 5.349228968685999e-07, "loss": 0.307, "step": 61405 }, { "epoch": 2.6, "grad_norm": 4.064196285707383, "learning_rate": 5.343689924986201e-07, "loss": 0.2844, "step": 61410 }, { "epoch": 2.6, "grad_norm": 3.713762737796012, "learning_rate": 5.338153588666928e-07, "loss": 0.3033, "step": 61415 }, { "epoch": 2.6, "grad_norm": 4.859532337199477, "learning_rate": 5.332619960063851e-07, "loss": 0.3321, "step": 61420 }, { "epoch": 2.6, "grad_norm": 4.514041757146596, "learning_rate": 5.327089039512462e-07, "loss": 0.2973, "step": 61425 }, { "epoch": 2.6, "grad_norm": 3.955936615061935, "learning_rate": 5.321560827348066e-07, "loss": 0.3107, "step": 61430 }, { "epoch": 2.6, "grad_norm": 4.051360880785011, "learning_rate": 5.316035323905827e-07, "loss": 0.3334, "step": 61435 }, { "epoch": 2.6, "grad_norm": 3.508834619724927, "learning_rate": 5.310512529520761e-07, "loss": 0.3136, "step": 61440 }, { "epoch": 2.6, "grad_norm": 4.582404413204896, "learning_rate": 5.304992444527673e-07, "loss": 0.3258, "step": 61445 }, { "epoch": 2.6, "grad_norm": 4.653254716716395, "learning_rate": 5.29947506926124e-07, "loss": 0.3258, "step": 61450 }, { "epoch": 2.6, "grad_norm": 4.23980459570075, "learning_rate": 5.293960404055964e-07, "loss": 0.3507, "step": 61455 }, { "epoch": 2.6, "grad_norm": 3.9653649188136666, "learning_rate": 5.288448449246192e-07, "loss": 0.3267, "step": 61460 }, { "epoch": 2.6, "grad_norm": 4.439453074226438, "learning_rate": 5.282939205166082e-07, "loss": 0.34, "step": 61465 }, { "epoch": 2.6, "grad_norm": 3.6593351934282534, "learning_rate": 5.277432672149646e-07, "loss": 0.2928, "step": 61470 }, { "epoch": 2.6, "grad_norm": 4.229136717602049, "learning_rate": 5.271928850530738e-07, "loss": 0.3075, "step": 61475 }, { "epoch": 2.6, "grad_norm": 4.011319211210055, "learning_rate": 5.266427740643032e-07, "loss": 0.3181, "step": 61480 }, { "epoch": 2.6, "grad_norm": 3.9782550573193665, "learning_rate": 5.260929342820037e-07, "loss": 0.3391, "step": 61485 }, { "epoch": 2.6, "grad_norm": 4.269395641288928, "learning_rate": 5.255433657395126e-07, "loss": 0.3107, "step": 61490 }, { "epoch": 2.6, "grad_norm": 3.93890289764734, "learning_rate": 5.249940684701465e-07, "loss": 0.3218, "step": 61495 }, { "epoch": 2.6, "grad_norm": 3.976579031612839, "learning_rate": 5.244450425072084e-07, "loss": 0.3276, "step": 61500 }, { "epoch": 2.6, "grad_norm": 3.891678475478426, "learning_rate": 5.238962878839838e-07, "loss": 0.3103, "step": 61505 }, { "epoch": 2.6, "grad_norm": 3.651118941853109, "learning_rate": 5.233478046337437e-07, "loss": 0.3057, "step": 61510 }, { "epoch": 2.6, "grad_norm": 4.078771878913234, "learning_rate": 5.227995927897389e-07, "loss": 0.3119, "step": 61515 }, { "epoch": 2.6, "grad_norm": 5.719823506994609, "learning_rate": 5.222516523852067e-07, "loss": 0.3334, "step": 61520 }, { "epoch": 2.6, "grad_norm": 4.029061818008321, "learning_rate": 5.217039834533682e-07, "loss": 0.3285, "step": 61525 }, { "epoch": 2.6, "grad_norm": 4.251631040390349, "learning_rate": 5.211565860274249e-07, "loss": 0.3053, "step": 61530 }, { "epoch": 2.6, "grad_norm": 4.617490820412938, "learning_rate": 5.206094601405647e-07, "loss": 0.3193, "step": 61535 }, { "epoch": 2.6, "grad_norm": 4.123861243760195, "learning_rate": 5.2006260582596e-07, "loss": 0.3232, "step": 61540 }, { "epoch": 2.6, "grad_norm": 4.258286951590188, "learning_rate": 5.195160231167628e-07, "loss": 0.2974, "step": 61545 }, { "epoch": 2.6, "grad_norm": 4.026935032155439, "learning_rate": 5.189697120461107e-07, "loss": 0.3156, "step": 61550 }, { "epoch": 2.61, "grad_norm": 4.394538596533563, "learning_rate": 5.184236726471259e-07, "loss": 0.3025, "step": 61555 }, { "epoch": 2.61, "grad_norm": 4.3138110842901805, "learning_rate": 5.178779049529137e-07, "loss": 0.3346, "step": 61560 }, { "epoch": 2.61, "grad_norm": 4.082881525756429, "learning_rate": 5.173324089965603e-07, "loss": 0.3098, "step": 61565 }, { "epoch": 2.61, "grad_norm": 4.091433780955253, "learning_rate": 5.167871848111395e-07, "loss": 0.3039, "step": 61570 }, { "epoch": 2.61, "grad_norm": 4.332971502348048, "learning_rate": 5.16242232429705e-07, "loss": 0.3211, "step": 61575 }, { "epoch": 2.61, "grad_norm": 3.853558979089911, "learning_rate": 5.156975518852986e-07, "loss": 0.3284, "step": 61580 }, { "epoch": 2.61, "grad_norm": 4.002871294151511, "learning_rate": 5.151531432109391e-07, "loss": 0.3071, "step": 61585 }, { "epoch": 2.61, "grad_norm": 4.306157195120202, "learning_rate": 5.146090064396342e-07, "loss": 0.3355, "step": 61590 }, { "epoch": 2.61, "grad_norm": 4.278748564462806, "learning_rate": 5.140651416043735e-07, "loss": 0.3114, "step": 61595 }, { "epoch": 2.61, "grad_norm": 4.538938550701009, "learning_rate": 5.13521548738129e-07, "loss": 0.3364, "step": 61600 }, { "epoch": 2.61, "grad_norm": 3.704198544622109, "learning_rate": 5.129782278738576e-07, "loss": 0.3255, "step": 61605 }, { "epoch": 2.61, "grad_norm": 4.2296578085479215, "learning_rate": 5.124351790445003e-07, "loss": 0.3224, "step": 61610 }, { "epoch": 2.61, "grad_norm": 4.052205664895545, "learning_rate": 5.118924022829791e-07, "loss": 0.2997, "step": 61615 }, { "epoch": 2.61, "grad_norm": 5.296221797076509, "learning_rate": 5.113498976222004e-07, "loss": 0.322, "step": 61620 }, { "epoch": 2.61, "grad_norm": 4.486936528064822, "learning_rate": 5.108076650950561e-07, "loss": 0.3316, "step": 61625 }, { "epoch": 2.61, "grad_norm": 3.8211108030274903, "learning_rate": 5.102657047344212e-07, "loss": 0.3078, "step": 61630 }, { "epoch": 2.61, "grad_norm": 3.784780053334359, "learning_rate": 5.097240165731499e-07, "loss": 0.3067, "step": 61635 }, { "epoch": 2.61, "grad_norm": 4.00107472647597, "learning_rate": 5.091826006440854e-07, "loss": 0.3211, "step": 61640 }, { "epoch": 2.61, "grad_norm": 7.819110920355072, "learning_rate": 5.086414569800524e-07, "loss": 0.344, "step": 61645 }, { "epoch": 2.61, "grad_norm": 4.397941167638235, "learning_rate": 5.081005856138571e-07, "loss": 0.3329, "step": 61650 }, { "epoch": 2.61, "grad_norm": 3.7534630370329203, "learning_rate": 5.075599865782921e-07, "loss": 0.2901, "step": 61655 }, { "epoch": 2.61, "grad_norm": 3.8749462115055944, "learning_rate": 5.070196599061333e-07, "loss": 0.3156, "step": 61660 }, { "epoch": 2.61, "grad_norm": 3.606854872207813, "learning_rate": 5.064796056301369e-07, "loss": 0.2865, "step": 61665 }, { "epoch": 2.61, "grad_norm": 4.200039837918513, "learning_rate": 5.05939823783046e-07, "loss": 0.3222, "step": 61670 }, { "epoch": 2.61, "grad_norm": 5.186989163023474, "learning_rate": 5.054003143975871e-07, "loss": 0.3225, "step": 61675 }, { "epoch": 2.61, "grad_norm": 4.357362557753834, "learning_rate": 5.048610775064666e-07, "loss": 0.3414, "step": 61680 }, { "epoch": 2.61, "grad_norm": 3.7122388095019168, "learning_rate": 5.043221131423781e-07, "loss": 0.3165, "step": 61685 }, { "epoch": 2.61, "grad_norm": 4.306950235764491, "learning_rate": 5.037834213379972e-07, "loss": 0.3, "step": 61690 }, { "epoch": 2.61, "grad_norm": 4.1521129283972105, "learning_rate": 5.032450021259849e-07, "loss": 0.3212, "step": 61695 }, { "epoch": 2.61, "grad_norm": 3.9560971402412024, "learning_rate": 5.027068555389813e-07, "loss": 0.3494, "step": 61700 }, { "epoch": 2.61, "grad_norm": 3.7487499340118404, "learning_rate": 5.021689816096148e-07, "loss": 0.3022, "step": 61705 }, { "epoch": 2.61, "grad_norm": 3.715152816470811, "learning_rate": 5.016313803704931e-07, "loss": 0.3001, "step": 61710 }, { "epoch": 2.61, "grad_norm": 4.616085660599245, "learning_rate": 5.010940518542112e-07, "loss": 0.3374, "step": 61715 }, { "epoch": 2.61, "grad_norm": 3.730181293782424, "learning_rate": 5.005569960933443e-07, "loss": 0.3168, "step": 61720 }, { "epoch": 2.61, "grad_norm": 4.202024059702223, "learning_rate": 5.000202131204529e-07, "loss": 0.2923, "step": 61725 }, { "epoch": 2.61, "grad_norm": 3.8509993657846255, "learning_rate": 4.994837029680821e-07, "loss": 0.2968, "step": 61730 }, { "epoch": 2.61, "grad_norm": 3.9658950088168803, "learning_rate": 4.989474656687571e-07, "loss": 0.3152, "step": 61735 }, { "epoch": 2.61, "grad_norm": 3.777446708358338, "learning_rate": 4.984115012549884e-07, "loss": 0.3259, "step": 61740 }, { "epoch": 2.61, "grad_norm": 4.024418791367168, "learning_rate": 4.978758097592717e-07, "loss": 0.3006, "step": 61745 }, { "epoch": 2.61, "grad_norm": 4.1609474324050515, "learning_rate": 4.973403912140823e-07, "loss": 0.3405, "step": 61750 }, { "epoch": 2.61, "grad_norm": 4.02304442653169, "learning_rate": 4.968052456518818e-07, "loss": 0.3143, "step": 61755 }, { "epoch": 2.61, "grad_norm": 3.6323390945395433, "learning_rate": 4.962703731051144e-07, "loss": 0.3193, "step": 61760 }, { "epoch": 2.61, "grad_norm": 4.257151533242243, "learning_rate": 4.957357736062097e-07, "loss": 0.3005, "step": 61765 }, { "epoch": 2.61, "grad_norm": 4.827600218900033, "learning_rate": 4.95201447187576e-07, "loss": 0.3434, "step": 61770 }, { "epoch": 2.61, "grad_norm": 3.9067172250515707, "learning_rate": 4.946673938816088e-07, "loss": 0.3251, "step": 61775 }, { "epoch": 2.61, "grad_norm": 3.5134945546421132, "learning_rate": 4.941336137206882e-07, "loss": 0.303, "step": 61780 }, { "epoch": 2.61, "grad_norm": 3.8412196427141563, "learning_rate": 4.936001067371732e-07, "loss": 0.3223, "step": 61785 }, { "epoch": 2.62, "grad_norm": 5.7360459099370615, "learning_rate": 4.930668729634091e-07, "loss": 0.3131, "step": 61790 }, { "epoch": 2.62, "grad_norm": 3.8565568441411173, "learning_rate": 4.925339124317264e-07, "loss": 0.3335, "step": 61795 }, { "epoch": 2.62, "grad_norm": 4.26537055671759, "learning_rate": 4.920012251744344e-07, "loss": 0.3063, "step": 61800 }, { "epoch": 2.62, "grad_norm": 5.873356813153911, "learning_rate": 4.914688112238292e-07, "loss": 0.3327, "step": 61805 }, { "epoch": 2.62, "grad_norm": 4.843880213304912, "learning_rate": 4.909366706121899e-07, "loss": 0.3271, "step": 61810 }, { "epoch": 2.62, "grad_norm": 5.487797344896647, "learning_rate": 4.90404803371779e-07, "loss": 0.3279, "step": 61815 }, { "epoch": 2.62, "grad_norm": 3.813643277797878, "learning_rate": 4.898732095348407e-07, "loss": 0.322, "step": 61820 }, { "epoch": 2.62, "grad_norm": 4.300010933164801, "learning_rate": 4.893418891336043e-07, "loss": 0.3063, "step": 61825 }, { "epoch": 2.62, "grad_norm": 5.326386207261732, "learning_rate": 4.88810842200284e-07, "loss": 0.327, "step": 61830 }, { "epoch": 2.62, "grad_norm": 4.123818365226369, "learning_rate": 4.88280068767073e-07, "loss": 0.324, "step": 61835 }, { "epoch": 2.62, "grad_norm": 3.8449714680780556, "learning_rate": 4.877495688661521e-07, "loss": 0.291, "step": 61840 }, { "epoch": 2.62, "grad_norm": 4.05860815753287, "learning_rate": 4.872193425296839e-07, "loss": 0.3342, "step": 61845 }, { "epoch": 2.62, "grad_norm": 3.80730820226469, "learning_rate": 4.866893897898139e-07, "loss": 0.3223, "step": 61850 }, { "epoch": 2.62, "grad_norm": 3.802247311625388, "learning_rate": 4.861597106786715e-07, "loss": 0.3211, "step": 61855 }, { "epoch": 2.62, "grad_norm": 3.8366309225363158, "learning_rate": 4.856303052283695e-07, "loss": 0.3027, "step": 61860 }, { "epoch": 2.62, "grad_norm": 4.097382181051562, "learning_rate": 4.85101173471006e-07, "loss": 0.3263, "step": 61865 }, { "epoch": 2.62, "grad_norm": 3.8980764405068578, "learning_rate": 4.845723154386589e-07, "loss": 0.3338, "step": 61870 }, { "epoch": 2.62, "grad_norm": 4.321895141840432, "learning_rate": 4.840437311633911e-07, "loss": 0.3164, "step": 61875 }, { "epoch": 2.62, "grad_norm": 4.754000467843632, "learning_rate": 4.835154206772497e-07, "loss": 0.3078, "step": 61880 }, { "epoch": 2.62, "grad_norm": 3.542726878933037, "learning_rate": 4.829873840122651e-07, "loss": 0.3162, "step": 61885 }, { "epoch": 2.62, "grad_norm": 3.9475740788495264, "learning_rate": 4.824596212004495e-07, "loss": 0.3065, "step": 61890 }, { "epoch": 2.62, "grad_norm": 3.863871108321877, "learning_rate": 4.819321322738002e-07, "loss": 0.3153, "step": 61895 }, { "epoch": 2.62, "grad_norm": 4.9835604851027, "learning_rate": 4.814049172642982e-07, "loss": 0.2873, "step": 61900 }, { "epoch": 2.62, "grad_norm": 4.179191854270572, "learning_rate": 4.80877976203905e-07, "loss": 0.3248, "step": 61905 }, { "epoch": 2.62, "grad_norm": 4.135033390382629, "learning_rate": 4.803513091245693e-07, "loss": 0.3227, "step": 61910 }, { "epoch": 2.62, "grad_norm": 4.050966570963871, "learning_rate": 4.798249160582208e-07, "loss": 0.3117, "step": 61915 }, { "epoch": 2.62, "grad_norm": 3.9028896479163047, "learning_rate": 4.792987970367724e-07, "loss": 0.3066, "step": 61920 }, { "epoch": 2.62, "grad_norm": 4.02754580911077, "learning_rate": 4.787729520921219e-07, "loss": 0.3389, "step": 61925 }, { "epoch": 2.62, "grad_norm": 4.783000975423128, "learning_rate": 4.782473812561494e-07, "loss": 0.3241, "step": 61930 }, { "epoch": 2.62, "grad_norm": 3.9338731064580923, "learning_rate": 4.7772208456072e-07, "loss": 0.3288, "step": 61935 }, { "epoch": 2.62, "grad_norm": 4.48857932430121, "learning_rate": 4.771970620376787e-07, "loss": 0.304, "step": 61940 }, { "epoch": 2.62, "grad_norm": 3.877747580483727, "learning_rate": 4.7667231371885724e-07, "loss": 0.3267, "step": 61945 }, { "epoch": 2.62, "grad_norm": 3.525515266412173, "learning_rate": 4.761478396360708e-07, "loss": 0.3426, "step": 61950 }, { "epoch": 2.62, "grad_norm": 3.8893811360417962, "learning_rate": 4.7562363982111394e-07, "loss": 0.3163, "step": 61955 }, { "epoch": 2.62, "grad_norm": 4.129005840373028, "learning_rate": 4.75099714305769e-07, "loss": 0.2966, "step": 61960 }, { "epoch": 2.62, "grad_norm": 4.081183530936069, "learning_rate": 4.745760631218005e-07, "loss": 0.3152, "step": 61965 }, { "epoch": 2.62, "grad_norm": 4.14841009531825, "learning_rate": 4.740526863009548e-07, "loss": 0.3092, "step": 61970 }, { "epoch": 2.62, "grad_norm": 4.026489780859911, "learning_rate": 4.7352958387496264e-07, "loss": 0.314, "step": 61975 }, { "epoch": 2.62, "grad_norm": 5.434235289941069, "learning_rate": 4.730067558755402e-07, "loss": 0.2846, "step": 61980 }, { "epoch": 2.62, "grad_norm": 4.186504864393021, "learning_rate": 4.7248420233438177e-07, "loss": 0.31, "step": 61985 }, { "epoch": 2.62, "grad_norm": 3.987838830645109, "learning_rate": 4.7196192328317067e-07, "loss": 0.3193, "step": 61990 }, { "epoch": 2.62, "grad_norm": 4.06604996020392, "learning_rate": 4.7143991875357007e-07, "loss": 0.3044, "step": 61995 }, { "epoch": 2.62, "grad_norm": 4.4786265278390625, "learning_rate": 4.709181887772285e-07, "loss": 0.3143, "step": 62000 }, { "epoch": 2.62, "grad_norm": 4.0293313270815645, "learning_rate": 4.7039673338577563e-07, "loss": 0.287, "step": 62005 }, { "epoch": 2.62, "grad_norm": 4.184571140639369, "learning_rate": 4.6987555261082673e-07, "loss": 0.3172, "step": 62010 }, { "epoch": 2.62, "grad_norm": 4.2418463732659095, "learning_rate": 4.6935464648397935e-07, "loss": 0.3148, "step": 62015 }, { "epoch": 2.62, "grad_norm": 4.648134298010513, "learning_rate": 4.688340150368142e-07, "loss": 0.3159, "step": 62020 }, { "epoch": 2.63, "grad_norm": 4.4068946381351, "learning_rate": 4.6831365830089514e-07, "loss": 0.2947, "step": 62025 }, { "epoch": 2.63, "grad_norm": 3.9851286632081186, "learning_rate": 4.677935763077718e-07, "loss": 0.3148, "step": 62030 }, { "epoch": 2.63, "grad_norm": 4.154976559697933, "learning_rate": 4.672737690889734e-07, "loss": 0.3138, "step": 62035 }, { "epoch": 2.63, "grad_norm": 4.367260197703903, "learning_rate": 4.6675423667601417e-07, "loss": 0.3191, "step": 62040 }, { "epoch": 2.63, "grad_norm": 3.7986328193506735, "learning_rate": 4.6623497910039227e-07, "loss": 0.3235, "step": 62045 }, { "epoch": 2.63, "grad_norm": 3.6624905167990454, "learning_rate": 4.657159963935903e-07, "loss": 0.3849, "step": 62050 }, { "epoch": 2.63, "grad_norm": 4.0803212857036435, "learning_rate": 4.651972885870698e-07, "loss": 0.2947, "step": 62055 }, { "epoch": 2.63, "grad_norm": 4.978560572543455, "learning_rate": 4.6467885571228057e-07, "loss": 0.3035, "step": 62060 }, { "epoch": 2.63, "grad_norm": 4.940676377197255, "learning_rate": 4.641606978006524e-07, "loss": 0.3075, "step": 62065 }, { "epoch": 2.63, "grad_norm": 4.15025134143643, "learning_rate": 4.63642814883602e-07, "loss": 0.3069, "step": 62070 }, { "epoch": 2.63, "grad_norm": 4.071293092827934, "learning_rate": 4.6312520699252406e-07, "loss": 0.3374, "step": 62075 }, { "epoch": 2.63, "grad_norm": 4.284741222171689, "learning_rate": 4.6260787415880137e-07, "loss": 0.3004, "step": 62080 }, { "epoch": 2.63, "grad_norm": 4.162697807570442, "learning_rate": 4.620908164137988e-07, "loss": 0.3184, "step": 62085 }, { "epoch": 2.63, "grad_norm": 3.979645905454209, "learning_rate": 4.615740337888619e-07, "loss": 0.3148, "step": 62090 }, { "epoch": 2.63, "grad_norm": 3.891892827953397, "learning_rate": 4.610575263153233e-07, "loss": 0.3107, "step": 62095 }, { "epoch": 2.63, "grad_norm": 4.000297329638286, "learning_rate": 4.60541294024498e-07, "loss": 0.3228, "step": 62100 }, { "epoch": 2.63, "grad_norm": 4.532291744061456, "learning_rate": 4.6002533694768147e-07, "loss": 0.3406, "step": 62105 }, { "epoch": 2.63, "grad_norm": 3.812773912051182, "learning_rate": 4.595096551161554e-07, "loss": 0.2894, "step": 62110 }, { "epoch": 2.63, "grad_norm": 3.7747853287298923, "learning_rate": 4.5899424856118533e-07, "loss": 0.3076, "step": 62115 }, { "epoch": 2.63, "grad_norm": 4.166983895910034, "learning_rate": 4.584791173140185e-07, "loss": 0.3063, "step": 62120 }, { "epoch": 2.63, "grad_norm": 4.213401972157103, "learning_rate": 4.579642614058843e-07, "loss": 0.3316, "step": 62125 }, { "epoch": 2.63, "grad_norm": 4.473739050296158, "learning_rate": 4.5744968086799793e-07, "loss": 0.312, "step": 62130 }, { "epoch": 2.63, "grad_norm": 3.3238967528751817, "learning_rate": 4.5693537573155813e-07, "loss": 0.2782, "step": 62135 }, { "epoch": 2.63, "grad_norm": 4.205583338534464, "learning_rate": 4.5642134602774337e-07, "loss": 0.3461, "step": 62140 }, { "epoch": 2.63, "grad_norm": 3.9367353994930103, "learning_rate": 4.5590759178771873e-07, "loss": 0.3199, "step": 62145 }, { "epoch": 2.63, "grad_norm": 3.6753713253600466, "learning_rate": 4.553941130426326e-07, "loss": 0.3126, "step": 62150 }, { "epoch": 2.63, "grad_norm": 3.7247961111282244, "learning_rate": 4.54880909823614e-07, "loss": 0.3091, "step": 62155 }, { "epoch": 2.63, "grad_norm": 3.6897391032153757, "learning_rate": 4.54367982161778e-07, "loss": 0.3022, "step": 62160 }, { "epoch": 2.63, "grad_norm": 3.6982490355415982, "learning_rate": 4.5385533008822146e-07, "loss": 0.2961, "step": 62165 }, { "epoch": 2.63, "grad_norm": 4.0672506624954865, "learning_rate": 4.5334295363402616e-07, "loss": 0.3072, "step": 62170 }, { "epoch": 2.63, "grad_norm": 4.260266438889107, "learning_rate": 4.5283085283025384e-07, "loss": 0.3465, "step": 62175 }, { "epoch": 2.63, "grad_norm": 3.8681760165837944, "learning_rate": 4.523190277079531e-07, "loss": 0.2965, "step": 62180 }, { "epoch": 2.63, "grad_norm": 4.414103981966088, "learning_rate": 4.518074782981546e-07, "loss": 0.3062, "step": 62185 }, { "epoch": 2.63, "grad_norm": 3.891465929709592, "learning_rate": 4.512962046318714e-07, "loss": 0.2996, "step": 62190 }, { "epoch": 2.63, "grad_norm": 4.317033430660098, "learning_rate": 4.507852067401003e-07, "loss": 0.2992, "step": 62195 }, { "epoch": 2.63, "grad_norm": 4.086105384469176, "learning_rate": 4.5027448465382207e-07, "loss": 0.3161, "step": 62200 }, { "epoch": 2.63, "grad_norm": 4.250221577729695, "learning_rate": 4.497640384040003e-07, "loss": 0.3432, "step": 62205 }, { "epoch": 2.63, "grad_norm": 3.6493664409687074, "learning_rate": 4.492538680215813e-07, "loss": 0.3026, "step": 62210 }, { "epoch": 2.63, "grad_norm": 4.135977039658859, "learning_rate": 4.4874397353749597e-07, "loss": 0.3065, "step": 62215 }, { "epoch": 2.63, "grad_norm": 5.232765021489601, "learning_rate": 4.482343549826579e-07, "loss": 0.3174, "step": 62220 }, { "epoch": 2.63, "grad_norm": 4.463472978189227, "learning_rate": 4.477250123879623e-07, "loss": 0.308, "step": 62225 }, { "epoch": 2.63, "grad_norm": 4.274657271005043, "learning_rate": 4.4721594578429005e-07, "loss": 0.3294, "step": 62230 }, { "epoch": 2.63, "grad_norm": 4.127685598654136, "learning_rate": 4.4670715520250484e-07, "loss": 0.3092, "step": 62235 }, { "epoch": 2.63, "grad_norm": 4.505085828176142, "learning_rate": 4.4619864067345255e-07, "loss": 0.3208, "step": 62240 }, { "epoch": 2.63, "grad_norm": 4.150739956785816, "learning_rate": 4.4569040222796234e-07, "loss": 0.3365, "step": 62245 }, { "epoch": 2.63, "grad_norm": 3.5715358713377645, "learning_rate": 4.4518243989684793e-07, "loss": 0.3076, "step": 62250 }, { "epoch": 2.63, "grad_norm": 4.004925192446083, "learning_rate": 4.44674753710907e-07, "loss": 0.285, "step": 62255 }, { "epoch": 2.63, "grad_norm": 3.880839020667359, "learning_rate": 4.441673437009164e-07, "loss": 0.2977, "step": 62260 }, { "epoch": 2.64, "grad_norm": 3.7802019264526927, "learning_rate": 4.4366020989764005e-07, "loss": 0.3073, "step": 62265 }, { "epoch": 2.64, "grad_norm": 3.3868838708714994, "learning_rate": 4.4315335233182487e-07, "loss": 0.3323, "step": 62270 }, { "epoch": 2.64, "grad_norm": 4.783471177471014, "learning_rate": 4.4264677103419863e-07, "loss": 0.3217, "step": 62275 }, { "epoch": 2.64, "grad_norm": 3.8928707398943856, "learning_rate": 4.421404660354739e-07, "loss": 0.2982, "step": 62280 }, { "epoch": 2.64, "grad_norm": 4.032453782623777, "learning_rate": 4.416344373663489e-07, "loss": 0.3218, "step": 62285 }, { "epoch": 2.64, "grad_norm": 3.576640811038284, "learning_rate": 4.4112868505749917e-07, "loss": 0.3027, "step": 62290 }, { "epoch": 2.64, "grad_norm": 3.6051552283845214, "learning_rate": 4.40623209139589e-07, "loss": 0.3115, "step": 62295 }, { "epoch": 2.64, "grad_norm": 4.218690241005833, "learning_rate": 4.401180096432639e-07, "loss": 0.3025, "step": 62300 }, { "epoch": 2.64, "grad_norm": 4.192772501321442, "learning_rate": 4.396130865991527e-07, "loss": 0.3263, "step": 62305 }, { "epoch": 2.64, "grad_norm": 3.7606415646585747, "learning_rate": 4.391084400378665e-07, "loss": 0.3391, "step": 62310 }, { "epoch": 2.64, "grad_norm": 4.177950437824774, "learning_rate": 4.3860406999000074e-07, "loss": 0.3063, "step": 62315 }, { "epoch": 2.64, "grad_norm": 3.7171736505639994, "learning_rate": 4.3809997648613545e-07, "loss": 0.3046, "step": 62320 }, { "epoch": 2.64, "grad_norm": 3.6741382501235718, "learning_rate": 4.3759615955682955e-07, "loss": 0.2893, "step": 62325 }, { "epoch": 2.64, "grad_norm": 4.270355098223836, "learning_rate": 4.3709261923263027e-07, "loss": 0.3148, "step": 62330 }, { "epoch": 2.64, "grad_norm": 3.915009653193231, "learning_rate": 4.3658935554406537e-07, "loss": 0.3018, "step": 62335 }, { "epoch": 2.64, "grad_norm": 3.720301480927801, "learning_rate": 4.3608636852164486e-07, "loss": 0.3082, "step": 62340 }, { "epoch": 2.64, "grad_norm": 3.87274674045287, "learning_rate": 4.355836581958645e-07, "loss": 0.299, "step": 62345 }, { "epoch": 2.64, "grad_norm": 4.084986336560197, "learning_rate": 4.350812245972025e-07, "loss": 0.3072, "step": 62350 }, { "epoch": 2.64, "grad_norm": 4.216755447824875, "learning_rate": 4.345790677561207e-07, "loss": 0.3353, "step": 62355 }, { "epoch": 2.64, "grad_norm": 3.9103749153226413, "learning_rate": 4.3407718770306026e-07, "loss": 0.3117, "step": 62360 }, { "epoch": 2.64, "grad_norm": 4.064086601364142, "learning_rate": 4.3357558446845085e-07, "loss": 0.296, "step": 62365 }, { "epoch": 2.64, "grad_norm": 3.588534171076538, "learning_rate": 4.3307425808270296e-07, "loss": 0.3187, "step": 62370 }, { "epoch": 2.64, "grad_norm": 4.211333261488694, "learning_rate": 4.325732085762113e-07, "loss": 0.3282, "step": 62375 }, { "epoch": 2.64, "grad_norm": 4.667269424607841, "learning_rate": 4.3207243597935153e-07, "loss": 0.3167, "step": 62380 }, { "epoch": 2.64, "grad_norm": 4.582452102109443, "learning_rate": 4.315719403224844e-07, "loss": 0.314, "step": 62385 }, { "epoch": 2.64, "grad_norm": 4.107428418292896, "learning_rate": 4.3107172163595446e-07, "loss": 0.2944, "step": 62390 }, { "epoch": 2.64, "grad_norm": 3.5976620529439676, "learning_rate": 4.305717799500875e-07, "loss": 0.3222, "step": 62395 }, { "epoch": 2.64, "grad_norm": 3.942093710922755, "learning_rate": 4.3007211529519366e-07, "loss": 0.3243, "step": 62400 }, { "epoch": 2.64, "grad_norm": 4.127776951655821, "learning_rate": 4.295727277015671e-07, "loss": 0.3204, "step": 62405 }, { "epoch": 2.64, "grad_norm": 3.5753943665623837, "learning_rate": 4.2907361719948246e-07, "loss": 0.2937, "step": 62410 }, { "epoch": 2.64, "grad_norm": 4.125975312288947, "learning_rate": 4.2857478381920004e-07, "loss": 0.3092, "step": 62415 }, { "epoch": 2.64, "grad_norm": 4.307459394067735, "learning_rate": 4.280762275909633e-07, "loss": 0.3401, "step": 62420 }, { "epoch": 2.64, "grad_norm": 3.680277844240787, "learning_rate": 4.275779485449988e-07, "loss": 0.3043, "step": 62425 }, { "epoch": 2.64, "grad_norm": 3.8945985928468043, "learning_rate": 4.2707994671151334e-07, "loss": 0.3194, "step": 62430 }, { "epoch": 2.64, "grad_norm": 4.193478380339177, "learning_rate": 4.265822221207011e-07, "loss": 0.3408, "step": 62435 }, { "epoch": 2.64, "grad_norm": 3.9947280895547292, "learning_rate": 4.260847748027386e-07, "loss": 0.3068, "step": 62440 }, { "epoch": 2.64, "grad_norm": 3.706864588725747, "learning_rate": 4.255876047877816e-07, "loss": 0.3, "step": 62445 }, { "epoch": 2.64, "grad_norm": 4.183828890630293, "learning_rate": 4.250907121059744e-07, "loss": 0.3128, "step": 62450 }, { "epoch": 2.64, "grad_norm": 4.145990129283243, "learning_rate": 4.245940967874418e-07, "loss": 0.3389, "step": 62455 }, { "epoch": 2.64, "grad_norm": 4.108575491707238, "learning_rate": 4.240977588622913e-07, "loss": 0.3046, "step": 62460 }, { "epoch": 2.64, "grad_norm": 4.006368525070137, "learning_rate": 4.2360169836061504e-07, "loss": 0.3299, "step": 62465 }, { "epoch": 2.64, "grad_norm": 3.7961833393325795, "learning_rate": 4.231059153124872e-07, "loss": 0.3277, "step": 62470 }, { "epoch": 2.64, "grad_norm": 4.12929652250622, "learning_rate": 4.226104097479672e-07, "loss": 0.3365, "step": 62475 }, { "epoch": 2.64, "grad_norm": 4.0271903468062495, "learning_rate": 4.221151816970942e-07, "loss": 0.3146, "step": 62480 }, { "epoch": 2.64, "grad_norm": 3.777805053933496, "learning_rate": 4.216202311898926e-07, "loss": 0.2952, "step": 62485 }, { "epoch": 2.64, "grad_norm": 4.5376265744807736, "learning_rate": 4.2112555825637167e-07, "loss": 0.3228, "step": 62490 }, { "epoch": 2.64, "grad_norm": 3.8143661922808745, "learning_rate": 4.2063116292651974e-07, "loss": 0.341, "step": 62495 }, { "epoch": 2.65, "grad_norm": 3.9101203009994405, "learning_rate": 4.201370452303111e-07, "loss": 0.2957, "step": 62500 }, { "epoch": 2.65, "grad_norm": 3.5829591916765864, "learning_rate": 4.1964320519770395e-07, "loss": 0.2879, "step": 62505 }, { "epoch": 2.65, "grad_norm": 4.907673467667554, "learning_rate": 4.191496428586367e-07, "loss": 0.2917, "step": 62510 }, { "epoch": 2.65, "grad_norm": 4.035933862704518, "learning_rate": 4.1865635824303365e-07, "loss": 0.3192, "step": 62515 }, { "epoch": 2.65, "grad_norm": 4.188700353930271, "learning_rate": 4.1816335138080035e-07, "loss": 0.2923, "step": 62520 }, { "epoch": 2.65, "grad_norm": 3.9489801907466973, "learning_rate": 4.1767062230182784e-07, "loss": 0.3267, "step": 62525 }, { "epoch": 2.65, "grad_norm": 4.0380631671401135, "learning_rate": 4.171781710359868e-07, "loss": 0.3062, "step": 62530 }, { "epoch": 2.65, "grad_norm": 4.139472944979526, "learning_rate": 4.166859976131338e-07, "loss": 0.3284, "step": 62535 }, { "epoch": 2.65, "grad_norm": 4.447480325181174, "learning_rate": 4.161941020631094e-07, "loss": 0.3094, "step": 62540 }, { "epoch": 2.65, "grad_norm": 4.301603010620997, "learning_rate": 4.157024844157337e-07, "loss": 0.3178, "step": 62545 }, { "epoch": 2.65, "grad_norm": 3.8473966544862317, "learning_rate": 4.1521114470081223e-07, "loss": 0.3019, "step": 62550 }, { "epoch": 2.65, "grad_norm": 4.102813216240773, "learning_rate": 4.1472008294813515e-07, "loss": 0.3093, "step": 62555 }, { "epoch": 2.65, "grad_norm": 4.1421077011691185, "learning_rate": 4.1422929918747303e-07, "loss": 0.3395, "step": 62560 }, { "epoch": 2.65, "grad_norm": 4.447333841541427, "learning_rate": 4.1373879344858035e-07, "loss": 0.2888, "step": 62565 }, { "epoch": 2.65, "grad_norm": 3.9110970265115625, "learning_rate": 4.132485657611951e-07, "loss": 0.2902, "step": 62570 }, { "epoch": 2.65, "grad_norm": 5.009399336059135, "learning_rate": 4.1275861615503954e-07, "loss": 0.3266, "step": 62575 }, { "epoch": 2.65, "grad_norm": 3.6052795707777734, "learning_rate": 4.12268944659816e-07, "loss": 0.2812, "step": 62580 }, { "epoch": 2.65, "grad_norm": 4.252989943955905, "learning_rate": 4.117795513052125e-07, "loss": 0.328, "step": 62585 }, { "epoch": 2.65, "grad_norm": 4.212473609643261, "learning_rate": 4.1129043612090137e-07, "loss": 0.3145, "step": 62590 }, { "epoch": 2.65, "grad_norm": 3.6820735534044653, "learning_rate": 4.1080159913653274e-07, "loss": 0.3187, "step": 62595 }, { "epoch": 2.65, "grad_norm": 4.230818392896048, "learning_rate": 4.1031304038174625e-07, "loss": 0.3033, "step": 62600 }, { "epoch": 2.65, "grad_norm": 3.8304496603345872, "learning_rate": 4.098247598861599e-07, "loss": 0.2981, "step": 62605 }, { "epoch": 2.65, "grad_norm": 4.000488891883417, "learning_rate": 4.0933675767937884e-07, "loss": 0.3309, "step": 62610 }, { "epoch": 2.65, "grad_norm": 3.9304357191152843, "learning_rate": 4.0884903379098673e-07, "loss": 0.3119, "step": 62615 }, { "epoch": 2.65, "grad_norm": 4.10233754043791, "learning_rate": 4.083615882505543e-07, "loss": 0.3594, "step": 62620 }, { "epoch": 2.65, "grad_norm": 4.261915961266912, "learning_rate": 4.078744210876345e-07, "loss": 0.3436, "step": 62625 }, { "epoch": 2.65, "grad_norm": 3.8868360895943073, "learning_rate": 4.073875323317617e-07, "loss": 0.348, "step": 62630 }, { "epoch": 2.65, "grad_norm": 4.018408617594063, "learning_rate": 4.069009220124542e-07, "loss": 0.3261, "step": 62635 }, { "epoch": 2.65, "grad_norm": 3.857177456550557, "learning_rate": 4.064145901592154e-07, "loss": 0.3114, "step": 62640 }, { "epoch": 2.65, "grad_norm": 3.8878469000594262, "learning_rate": 4.059285368015281e-07, "loss": 0.3083, "step": 62645 }, { "epoch": 2.65, "grad_norm": 4.1309302922725335, "learning_rate": 4.0544276196886113e-07, "loss": 0.3515, "step": 62650 }, { "epoch": 2.65, "grad_norm": 4.117796456344412, "learning_rate": 4.049572656906664e-07, "loss": 0.2964, "step": 62655 }, { "epoch": 2.65, "grad_norm": 4.245049819102319, "learning_rate": 4.0447204799637765e-07, "loss": 0.3412, "step": 62660 }, { "epoch": 2.65, "grad_norm": 3.868061169510937, "learning_rate": 4.039871089154118e-07, "loss": 0.2927, "step": 62665 }, { "epoch": 2.65, "grad_norm": 3.9487228630287237, "learning_rate": 4.0350244847716936e-07, "loss": 0.3212, "step": 62670 }, { "epoch": 2.65, "grad_norm": 3.9362236426391983, "learning_rate": 4.030180667110351e-07, "loss": 0.3151, "step": 62675 }, { "epoch": 2.65, "grad_norm": 4.022362415444539, "learning_rate": 4.025339636463743e-07, "loss": 0.3107, "step": 62680 }, { "epoch": 2.65, "grad_norm": 3.657344414422595, "learning_rate": 4.020501393125359e-07, "loss": 0.3043, "step": 62685 }, { "epoch": 2.65, "grad_norm": 4.317213174352339, "learning_rate": 4.0156659373885465e-07, "loss": 0.3298, "step": 62690 }, { "epoch": 2.65, "grad_norm": 4.030750161798941, "learning_rate": 4.0108332695464603e-07, "loss": 0.3125, "step": 62695 }, { "epoch": 2.65, "grad_norm": 3.919019740430411, "learning_rate": 4.006003389892077e-07, "loss": 0.2974, "step": 62700 }, { "epoch": 2.65, "grad_norm": 4.4595626926307785, "learning_rate": 4.001176298718229e-07, "loss": 0.3342, "step": 62705 }, { "epoch": 2.65, "grad_norm": 3.913374992107964, "learning_rate": 3.9963519963175825e-07, "loss": 0.3183, "step": 62710 }, { "epoch": 2.65, "grad_norm": 3.8654010721597842, "learning_rate": 3.9915304829825916e-07, "loss": 0.2976, "step": 62715 }, { "epoch": 2.65, "grad_norm": 3.6358854877686295, "learning_rate": 3.986711759005585e-07, "loss": 0.3069, "step": 62720 }, { "epoch": 2.65, "grad_norm": 3.883880699745248, "learning_rate": 3.981895824678711e-07, "loss": 0.3011, "step": 62725 }, { "epoch": 2.65, "grad_norm": 4.019832031041032, "learning_rate": 3.977082680293953e-07, "loss": 0.3278, "step": 62730 }, { "epoch": 2.66, "grad_norm": 3.964238755025775, "learning_rate": 3.972272326143095e-07, "loss": 0.3213, "step": 62735 }, { "epoch": 2.66, "grad_norm": 4.036516533172175, "learning_rate": 3.9674647625177867e-07, "loss": 0.3157, "step": 62740 }, { "epoch": 2.66, "grad_norm": 3.471826685983796, "learning_rate": 3.962659989709505e-07, "loss": 0.3112, "step": 62745 }, { "epoch": 2.66, "grad_norm": 4.549004953113974, "learning_rate": 3.9578580080095296e-07, "loss": 0.2975, "step": 62750 }, { "epoch": 2.66, "grad_norm": 6.373782728759738, "learning_rate": 3.95305881770901e-07, "loss": 0.2901, "step": 62755 }, { "epoch": 2.66, "grad_norm": 4.2960755236821235, "learning_rate": 3.948262419098897e-07, "loss": 0.3105, "step": 62760 }, { "epoch": 2.66, "grad_norm": 4.458788581526812, "learning_rate": 3.9434688124699803e-07, "loss": 0.3079, "step": 62765 }, { "epoch": 2.66, "grad_norm": 4.007021741412846, "learning_rate": 3.938677998112889e-07, "loss": 0.3127, "step": 62770 }, { "epoch": 2.66, "grad_norm": 4.466896579803888, "learning_rate": 3.933889976318067e-07, "loss": 0.312, "step": 62775 }, { "epoch": 2.66, "grad_norm": 4.223018166272255, "learning_rate": 3.929104747375817e-07, "loss": 0.33, "step": 62780 }, { "epoch": 2.66, "grad_norm": 3.822980934469118, "learning_rate": 3.924322311576223e-07, "loss": 0.3094, "step": 62785 }, { "epoch": 2.66, "grad_norm": 3.7504172250582966, "learning_rate": 3.919542669209253e-07, "loss": 0.3043, "step": 62790 }, { "epoch": 2.66, "grad_norm": 3.3690442015275064, "learning_rate": 3.9147658205646867e-07, "loss": 0.2906, "step": 62795 }, { "epoch": 2.66, "grad_norm": 3.4881108828009033, "learning_rate": 3.9099917659321086e-07, "loss": 0.2937, "step": 62800 }, { "epoch": 2.66, "grad_norm": 3.9622058354861496, "learning_rate": 3.9052205056009705e-07, "loss": 0.2916, "step": 62805 }, { "epoch": 2.66, "grad_norm": 3.8285423444192466, "learning_rate": 3.900452039860542e-07, "loss": 0.278, "step": 62810 }, { "epoch": 2.66, "grad_norm": 3.786064232446165, "learning_rate": 3.895686368999907e-07, "loss": 0.3251, "step": 62815 }, { "epoch": 2.66, "grad_norm": 4.559570286536799, "learning_rate": 3.890923493308002e-07, "loss": 0.3033, "step": 62820 }, { "epoch": 2.66, "grad_norm": 3.613033224864597, "learning_rate": 3.8861634130735957e-07, "loss": 0.2866, "step": 62825 }, { "epoch": 2.66, "grad_norm": 3.328184832963255, "learning_rate": 3.881406128585263e-07, "loss": 0.3178, "step": 62830 }, { "epoch": 2.66, "grad_norm": 4.212968339442758, "learning_rate": 3.876651640131429e-07, "loss": 0.3337, "step": 62835 }, { "epoch": 2.66, "grad_norm": 4.082110291055248, "learning_rate": 3.8718999480003516e-07, "loss": 0.3207, "step": 62840 }, { "epoch": 2.66, "grad_norm": 3.5040417113813787, "learning_rate": 3.867151052480106e-07, "loss": 0.3092, "step": 62845 }, { "epoch": 2.66, "grad_norm": 3.7191139762989667, "learning_rate": 3.862404953858595e-07, "loss": 0.2999, "step": 62850 }, { "epoch": 2.66, "grad_norm": 3.5432629042874906, "learning_rate": 3.857661652423572e-07, "loss": 0.2978, "step": 62855 }, { "epoch": 2.66, "grad_norm": 3.7260448657606315, "learning_rate": 3.8529211484626016e-07, "loss": 0.3208, "step": 62860 }, { "epoch": 2.66, "grad_norm": 4.143889934351701, "learning_rate": 3.8481834422631037e-07, "loss": 0.3165, "step": 62865 }, { "epoch": 2.66, "grad_norm": 3.595413338474495, "learning_rate": 3.843448534112287e-07, "loss": 0.3279, "step": 62870 }, { "epoch": 2.66, "grad_norm": 4.113120506968053, "learning_rate": 3.838716424297229e-07, "loss": 0.3237, "step": 62875 }, { "epoch": 2.66, "grad_norm": 4.366969188580846, "learning_rate": 3.8339871131048314e-07, "loss": 0.3086, "step": 62880 }, { "epoch": 2.66, "grad_norm": 3.849661488717415, "learning_rate": 3.8292606008217993e-07, "loss": 0.2953, "step": 62885 }, { "epoch": 2.66, "grad_norm": 4.13492382678597, "learning_rate": 3.824536887734698e-07, "loss": 0.3214, "step": 62890 }, { "epoch": 2.66, "grad_norm": 3.6930570783544154, "learning_rate": 3.8198159741299146e-07, "loss": 0.2991, "step": 62895 }, { "epoch": 2.66, "grad_norm": 3.652318367684894, "learning_rate": 3.8150978602936594e-07, "loss": 0.3125, "step": 62900 }, { "epoch": 2.66, "grad_norm": 4.965424330921914, "learning_rate": 3.81038254651197e-07, "loss": 0.3394, "step": 62905 }, { "epoch": 2.66, "grad_norm": 3.663769557283041, "learning_rate": 3.8056700330707407e-07, "loss": 0.3188, "step": 62910 }, { "epoch": 2.66, "grad_norm": 3.8631574569840006, "learning_rate": 3.80096032025567e-07, "loss": 0.3049, "step": 62915 }, { "epoch": 2.66, "grad_norm": 3.3772366127057785, "learning_rate": 3.7962534083522796e-07, "loss": 0.2999, "step": 62920 }, { "epoch": 2.66, "grad_norm": 4.700627785795629, "learning_rate": 3.7915492976459524e-07, "loss": 0.2935, "step": 62925 }, { "epoch": 2.66, "grad_norm": 3.769597669478493, "learning_rate": 3.786847988421888e-07, "loss": 0.3141, "step": 62930 }, { "epoch": 2.66, "grad_norm": 4.476550767849943, "learning_rate": 3.782149480965097e-07, "loss": 0.3171, "step": 62935 }, { "epoch": 2.66, "grad_norm": 3.9883081859016882, "learning_rate": 3.777453775560441e-07, "loss": 0.3319, "step": 62940 }, { "epoch": 2.66, "grad_norm": 4.322442805168393, "learning_rate": 3.77276087249262e-07, "loss": 0.3, "step": 62945 }, { "epoch": 2.66, "grad_norm": 3.5312706288985742, "learning_rate": 3.768070772046134e-07, "loss": 0.3276, "step": 62950 }, { "epoch": 2.66, "grad_norm": 3.7259883954948725, "learning_rate": 3.763383474505333e-07, "loss": 0.3094, "step": 62955 }, { "epoch": 2.66, "grad_norm": 4.675865446565234, "learning_rate": 3.758698980154396e-07, "loss": 0.3445, "step": 62960 }, { "epoch": 2.66, "grad_norm": 4.257036900331993, "learning_rate": 3.754017289277345e-07, "loss": 0.3151, "step": 62965 }, { "epoch": 2.67, "grad_norm": 4.141267654892652, "learning_rate": 3.7493384021579926e-07, "loss": 0.3053, "step": 62970 }, { "epoch": 2.67, "grad_norm": 4.517845425439684, "learning_rate": 3.7446623190800167e-07, "loss": 0.3399, "step": 62975 }, { "epoch": 2.67, "grad_norm": 4.547274188659153, "learning_rate": 3.7399890403269244e-07, "loss": 0.2901, "step": 62980 }, { "epoch": 2.67, "grad_norm": 3.9424105440736965, "learning_rate": 3.735318566182022e-07, "loss": 0.3101, "step": 62985 }, { "epoch": 2.67, "grad_norm": 3.71816535066981, "learning_rate": 3.730650896928484e-07, "loss": 0.3335, "step": 62990 }, { "epoch": 2.67, "grad_norm": 3.774555322403104, "learning_rate": 3.7259860328492934e-07, "loss": 0.3266, "step": 62995 }, { "epoch": 2.67, "grad_norm": 3.836153236432657, "learning_rate": 3.72132397422727e-07, "loss": 0.309, "step": 63000 }, { "epoch": 2.67, "grad_norm": 4.660229316598106, "learning_rate": 3.7166647213450423e-07, "loss": 0.3095, "step": 63005 }, { "epoch": 2.67, "grad_norm": 4.079337364428318, "learning_rate": 3.7120082744851016e-07, "loss": 0.3323, "step": 63010 }, { "epoch": 2.67, "grad_norm": 4.388071970382545, "learning_rate": 3.707354633929766e-07, "loss": 0.2991, "step": 63015 }, { "epoch": 2.67, "grad_norm": 4.247074231390211, "learning_rate": 3.702703799961149e-07, "loss": 0.3331, "step": 63020 }, { "epoch": 2.67, "grad_norm": 3.9172282293462968, "learning_rate": 3.6980557728612254e-07, "loss": 0.299, "step": 63025 }, { "epoch": 2.67, "grad_norm": 3.763376038016525, "learning_rate": 3.693410552911797e-07, "loss": 0.3275, "step": 63030 }, { "epoch": 2.67, "grad_norm": 3.7089181044183257, "learning_rate": 3.6887681403944885e-07, "loss": 0.3016, "step": 63035 }, { "epoch": 2.67, "grad_norm": 3.515039029522563, "learning_rate": 3.684128535590753e-07, "loss": 0.3105, "step": 63040 }, { "epoch": 2.67, "grad_norm": 3.823857735537007, "learning_rate": 3.6794917387818706e-07, "loss": 0.3155, "step": 63045 }, { "epoch": 2.67, "grad_norm": 3.638622436570413, "learning_rate": 3.6748577502489725e-07, "loss": 0.29, "step": 63050 }, { "epoch": 2.67, "grad_norm": 3.7307567709265133, "learning_rate": 3.670226570272989e-07, "loss": 0.3276, "step": 63055 }, { "epoch": 2.67, "grad_norm": 3.9707330344181617, "learning_rate": 3.6655981991346955e-07, "loss": 0.2909, "step": 63060 }, { "epoch": 2.67, "grad_norm": 4.25895628680597, "learning_rate": 3.6609726371147067e-07, "loss": 0.3084, "step": 63065 }, { "epoch": 2.67, "grad_norm": 3.6790490376620313, "learning_rate": 3.6563498844934485e-07, "loss": 0.2935, "step": 63070 }, { "epoch": 2.67, "grad_norm": 4.160279738610442, "learning_rate": 3.651729941551185e-07, "loss": 0.2873, "step": 63075 }, { "epoch": 2.67, "grad_norm": 4.290986744296786, "learning_rate": 3.6471128085680096e-07, "loss": 0.3229, "step": 63080 }, { "epoch": 2.67, "grad_norm": 3.5576104524744623, "learning_rate": 3.642498485823859e-07, "loss": 0.2902, "step": 63085 }, { "epoch": 2.67, "grad_norm": 3.7227335358086036, "learning_rate": 3.637886973598465e-07, "loss": 0.3142, "step": 63090 }, { "epoch": 2.67, "grad_norm": 4.347242945399834, "learning_rate": 3.63327827217142e-07, "loss": 0.3258, "step": 63095 }, { "epoch": 2.67, "grad_norm": 3.8316713591159774, "learning_rate": 3.628672381822146e-07, "loss": 0.3038, "step": 63100 }, { "epoch": 2.67, "grad_norm": 3.7631134696392308, "learning_rate": 3.6240693028298636e-07, "loss": 0.3046, "step": 63105 }, { "epoch": 2.67, "grad_norm": 4.629145535827151, "learning_rate": 3.619469035473655e-07, "loss": 0.3093, "step": 63110 }, { "epoch": 2.67, "grad_norm": 3.848838485693634, "learning_rate": 3.614871580032425e-07, "loss": 0.3139, "step": 63115 }, { "epoch": 2.67, "grad_norm": 4.462028056040803, "learning_rate": 3.6102769367848953e-07, "loss": 0.3112, "step": 63120 }, { "epoch": 2.67, "grad_norm": 4.308744281050955, "learning_rate": 3.605685106009632e-07, "loss": 0.3188, "step": 63125 }, { "epoch": 2.67, "grad_norm": 4.050373853463839, "learning_rate": 3.601096087985023e-07, "loss": 0.3085, "step": 63130 }, { "epoch": 2.67, "grad_norm": 4.28989871063134, "learning_rate": 3.596509882989285e-07, "loss": 0.3254, "step": 63135 }, { "epoch": 2.67, "grad_norm": 4.617329027302648, "learning_rate": 3.5919264913004624e-07, "loss": 0.3167, "step": 63140 }, { "epoch": 2.67, "grad_norm": 4.34690333924876, "learning_rate": 3.5873459131964383e-07, "loss": 0.2956, "step": 63145 }, { "epoch": 2.67, "grad_norm": 3.8602412605825775, "learning_rate": 3.582768148954924e-07, "loss": 0.3126, "step": 63150 }, { "epoch": 2.67, "grad_norm": 3.558462619116576, "learning_rate": 3.578193198853447e-07, "loss": 0.3045, "step": 63155 }, { "epoch": 2.67, "grad_norm": 4.089247466433942, "learning_rate": 3.573621063169375e-07, "loss": 0.3014, "step": 63160 }, { "epoch": 2.67, "grad_norm": 4.448606070304286, "learning_rate": 3.5690517421799076e-07, "loss": 0.3129, "step": 63165 }, { "epoch": 2.67, "grad_norm": 4.005898398982488, "learning_rate": 3.564485236162074e-07, "loss": 0.3048, "step": 63170 }, { "epoch": 2.67, "grad_norm": 3.750109998091521, "learning_rate": 3.559921545392708e-07, "loss": 0.3027, "step": 63175 }, { "epoch": 2.67, "grad_norm": 3.8583589959766167, "learning_rate": 3.5553606701485055e-07, "loss": 0.3218, "step": 63180 }, { "epoch": 2.67, "grad_norm": 4.062017841310653, "learning_rate": 3.5508026107059836e-07, "loss": 0.3051, "step": 63185 }, { "epoch": 2.67, "grad_norm": 3.8276387556936613, "learning_rate": 3.5462473673414766e-07, "loss": 0.3146, "step": 63190 }, { "epoch": 2.67, "grad_norm": 4.213257576198637, "learning_rate": 3.541694940331153e-07, "loss": 0.3366, "step": 63195 }, { "epoch": 2.67, "grad_norm": 7.909023961073213, "learning_rate": 3.537145329951025e-07, "loss": 0.3113, "step": 63200 }, { "epoch": 2.67, "grad_norm": 3.5756925536496453, "learning_rate": 3.532598536476911e-07, "loss": 0.3067, "step": 63205 }, { "epoch": 2.68, "grad_norm": 3.7428387732383315, "learning_rate": 3.528054560184474e-07, "loss": 0.2989, "step": 63210 }, { "epoch": 2.68, "grad_norm": 4.387162293554796, "learning_rate": 3.523513401349199e-07, "loss": 0.3185, "step": 63215 }, { "epoch": 2.68, "grad_norm": 3.724452529211642, "learning_rate": 3.51897506024641e-07, "loss": 0.3233, "step": 63220 }, { "epoch": 2.68, "grad_norm": 4.2459704624738634, "learning_rate": 3.5144395371512485e-07, "loss": 0.2926, "step": 63225 }, { "epoch": 2.68, "grad_norm": 3.941283983722673, "learning_rate": 3.509906832338683e-07, "loss": 0.2853, "step": 63230 }, { "epoch": 2.68, "grad_norm": 3.839492283560509, "learning_rate": 3.5053769460835385e-07, "loss": 0.3072, "step": 63235 }, { "epoch": 2.68, "grad_norm": 4.720361842743651, "learning_rate": 3.500849878660423e-07, "loss": 0.3099, "step": 63240 }, { "epoch": 2.68, "grad_norm": 3.7045334456504424, "learning_rate": 3.4963256303438167e-07, "loss": 0.3233, "step": 63245 }, { "epoch": 2.68, "grad_norm": 4.410965785152744, "learning_rate": 3.491804201408011e-07, "loss": 0.309, "step": 63250 }, { "epoch": 2.68, "grad_norm": 4.983955286713831, "learning_rate": 3.4872855921271156e-07, "loss": 0.329, "step": 63255 }, { "epoch": 2.68, "grad_norm": 4.298398960441399, "learning_rate": 3.482769802775082e-07, "loss": 0.3159, "step": 63260 }, { "epoch": 2.68, "grad_norm": 4.444943039756403, "learning_rate": 3.4782568336256983e-07, "loss": 0.3312, "step": 63265 }, { "epoch": 2.68, "grad_norm": 3.202554289875888, "learning_rate": 3.473746684952578e-07, "loss": 0.3295, "step": 63270 }, { "epoch": 2.68, "grad_norm": 4.0608905995582845, "learning_rate": 3.469239357029136e-07, "loss": 0.3257, "step": 63275 }, { "epoch": 2.68, "grad_norm": 4.1242594056199335, "learning_rate": 3.4647348501286537e-07, "loss": 0.3149, "step": 63280 }, { "epoch": 2.68, "grad_norm": 4.037876476697423, "learning_rate": 3.4602331645242347e-07, "loss": 0.337, "step": 63285 }, { "epoch": 2.68, "grad_norm": 3.9340345575230233, "learning_rate": 3.4557343004887776e-07, "loss": 0.3228, "step": 63290 }, { "epoch": 2.68, "grad_norm": 4.161195654822754, "learning_rate": 3.451238258295053e-07, "loss": 0.3221, "step": 63295 }, { "epoch": 2.68, "grad_norm": 3.6699141417986887, "learning_rate": 3.4467450382156484e-07, "loss": 0.3085, "step": 63300 }, { "epoch": 2.68, "grad_norm": 3.6659597530951906, "learning_rate": 3.442254640522952e-07, "loss": 0.322, "step": 63305 }, { "epoch": 2.68, "grad_norm": 3.752612471594262, "learning_rate": 3.437767065489223e-07, "loss": 0.3128, "step": 63310 }, { "epoch": 2.68, "grad_norm": 5.923503439919269, "learning_rate": 3.433282313386516e-07, "loss": 0.3333, "step": 63315 }, { "epoch": 2.68, "grad_norm": 3.5581217984930817, "learning_rate": 3.4288003844867467e-07, "loss": 0.2896, "step": 63320 }, { "epoch": 2.68, "grad_norm": 3.981903419644975, "learning_rate": 3.424321279061632e-07, "loss": 0.341, "step": 63325 }, { "epoch": 2.68, "grad_norm": 3.8096554574919606, "learning_rate": 3.4198449973827154e-07, "loss": 0.3409, "step": 63330 }, { "epoch": 2.68, "grad_norm": 4.15027510271367, "learning_rate": 3.4153715397213905e-07, "loss": 0.3024, "step": 63335 }, { "epoch": 2.68, "grad_norm": 3.869499713850801, "learning_rate": 3.410900906348874e-07, "loss": 0.297, "step": 63340 }, { "epoch": 2.68, "grad_norm": 3.852368621188546, "learning_rate": 3.4064330975361993e-07, "loss": 0.2935, "step": 63345 }, { "epoch": 2.68, "grad_norm": 4.466918705146553, "learning_rate": 3.4019681135542383e-07, "loss": 0.2983, "step": 63350 }, { "epoch": 2.68, "grad_norm": 3.704616046684038, "learning_rate": 3.397505954673702e-07, "loss": 0.2914, "step": 63355 }, { "epoch": 2.68, "grad_norm": 3.632388038180807, "learning_rate": 3.393046621165097e-07, "loss": 0.3156, "step": 63360 }, { "epoch": 2.68, "grad_norm": 4.287795553208689, "learning_rate": 3.3885901132987896e-07, "loss": 0.295, "step": 63365 }, { "epoch": 2.68, "grad_norm": 4.189436966609815, "learning_rate": 3.3841364313449756e-07, "loss": 0.3345, "step": 63370 }, { "epoch": 2.68, "grad_norm": 4.065564924882823, "learning_rate": 3.379685575573649e-07, "loss": 0.3313, "step": 63375 }, { "epoch": 2.68, "grad_norm": 3.7660807688813756, "learning_rate": 3.3752375462546615e-07, "loss": 0.3046, "step": 63380 }, { "epoch": 2.68, "grad_norm": 3.4195753906266653, "learning_rate": 3.370792343657692e-07, "loss": 0.2795, "step": 63385 }, { "epoch": 2.68, "grad_norm": 3.8846844275227705, "learning_rate": 3.366349968052224e-07, "loss": 0.3082, "step": 63390 }, { "epoch": 2.68, "grad_norm": 3.9861153799117064, "learning_rate": 3.361910419707592e-07, "loss": 0.3087, "step": 63395 }, { "epoch": 2.68, "grad_norm": 3.772696980978408, "learning_rate": 3.3574736988929545e-07, "loss": 0.3099, "step": 63400 }, { "epoch": 2.68, "grad_norm": 3.881036085790811, "learning_rate": 3.353039805877306e-07, "loss": 0.3246, "step": 63405 }, { "epoch": 2.68, "grad_norm": 3.541051401992806, "learning_rate": 3.348608740929443e-07, "loss": 0.329, "step": 63410 }, { "epoch": 2.68, "grad_norm": 4.779742961823956, "learning_rate": 3.3441805043180175e-07, "loss": 0.2922, "step": 63415 }, { "epoch": 2.68, "grad_norm": 3.760066285635997, "learning_rate": 3.3397550963115034e-07, "loss": 0.2974, "step": 63420 }, { "epoch": 2.68, "grad_norm": 4.920615279935689, "learning_rate": 3.335332517178186e-07, "loss": 0.3085, "step": 63425 }, { "epoch": 2.68, "grad_norm": 3.7892340448363435, "learning_rate": 3.3309127671862063e-07, "loss": 0.2864, "step": 63430 }, { "epoch": 2.68, "grad_norm": 4.730264375575319, "learning_rate": 3.3264958466035226e-07, "loss": 0.3324, "step": 63435 }, { "epoch": 2.68, "grad_norm": 3.662611820688694, "learning_rate": 3.322081755697909e-07, "loss": 0.2951, "step": 63440 }, { "epoch": 2.69, "grad_norm": 4.747979117619223, "learning_rate": 3.317670494736985e-07, "loss": 0.3134, "step": 63445 }, { "epoch": 2.69, "grad_norm": 3.82233068396079, "learning_rate": 3.3132620639881873e-07, "loss": 0.3319, "step": 63450 }, { "epoch": 2.69, "grad_norm": 3.86455603076855, "learning_rate": 3.3088564637188013e-07, "loss": 0.2907, "step": 63455 }, { "epoch": 2.69, "grad_norm": 3.6702179813193556, "learning_rate": 3.3044536941959083e-07, "loss": 0.3038, "step": 63460 }, { "epoch": 2.69, "grad_norm": 4.726698312871764, "learning_rate": 3.3000537556864385e-07, "loss": 0.3326, "step": 63465 }, { "epoch": 2.69, "grad_norm": 3.8586134768816223, "learning_rate": 3.295656648457163e-07, "loss": 0.319, "step": 63470 }, { "epoch": 2.69, "grad_norm": 3.921426741207301, "learning_rate": 3.2912623727746395e-07, "loss": 0.3212, "step": 63475 }, { "epoch": 2.69, "grad_norm": 3.653790379306705, "learning_rate": 3.2868709289052947e-07, "loss": 0.2948, "step": 63480 }, { "epoch": 2.69, "grad_norm": 3.900115844058006, "learning_rate": 3.282482317115376e-07, "loss": 0.3087, "step": 63485 }, { "epoch": 2.69, "grad_norm": 3.876253816104976, "learning_rate": 3.278096537670944e-07, "loss": 0.2953, "step": 63490 }, { "epoch": 2.69, "grad_norm": 4.001770883004138, "learning_rate": 3.2737135908378845e-07, "loss": 0.337, "step": 63495 }, { "epoch": 2.69, "grad_norm": 4.085340234854928, "learning_rate": 3.269333476881936e-07, "loss": 0.3229, "step": 63500 }, { "epoch": 2.69, "grad_norm": 3.585882067439357, "learning_rate": 3.264956196068653e-07, "loss": 0.3253, "step": 63505 }, { "epoch": 2.69, "grad_norm": 3.9680427948082277, "learning_rate": 3.2605817486634105e-07, "loss": 0.3261, "step": 63510 }, { "epoch": 2.69, "grad_norm": 3.8694908491687556, "learning_rate": 3.2562101349314147e-07, "loss": 0.2982, "step": 63515 }, { "epoch": 2.69, "grad_norm": 3.4312982568659467, "learning_rate": 3.2518413551377193e-07, "loss": 0.288, "step": 63520 }, { "epoch": 2.69, "grad_norm": 3.80816709774872, "learning_rate": 3.2474754095471796e-07, "loss": 0.3244, "step": 63525 }, { "epoch": 2.69, "grad_norm": 3.6847682122815955, "learning_rate": 3.243112298424489e-07, "loss": 0.3007, "step": 63530 }, { "epoch": 2.69, "grad_norm": 3.8848673786384422, "learning_rate": 3.2387520220341753e-07, "loss": 0.335, "step": 63535 }, { "epoch": 2.69, "grad_norm": 3.9410146728325555, "learning_rate": 3.2343945806405875e-07, "loss": 0.3207, "step": 63540 }, { "epoch": 2.69, "grad_norm": 4.90522845987869, "learning_rate": 3.2300399745079037e-07, "loss": 0.3254, "step": 63545 }, { "epoch": 2.69, "grad_norm": 4.26840163288536, "learning_rate": 3.2256882039001237e-07, "loss": 0.3075, "step": 63550 }, { "epoch": 2.69, "grad_norm": 4.065886810523565, "learning_rate": 3.221339269081103e-07, "loss": 0.2964, "step": 63555 }, { "epoch": 2.69, "grad_norm": 5.45058027763528, "learning_rate": 3.216993170314481e-07, "loss": 0.3121, "step": 63560 }, { "epoch": 2.69, "grad_norm": 4.7060357052530755, "learning_rate": 3.2126499078637576e-07, "loss": 0.2936, "step": 63565 }, { "epoch": 2.69, "grad_norm": 4.055878240936127, "learning_rate": 3.2083094819922553e-07, "loss": 0.3215, "step": 63570 }, { "epoch": 2.69, "grad_norm": 3.8228959847870874, "learning_rate": 3.2039718929631256e-07, "loss": 0.2993, "step": 63575 }, { "epoch": 2.69, "grad_norm": 3.9934887468671945, "learning_rate": 3.199637141039336e-07, "loss": 0.3202, "step": 63580 }, { "epoch": 2.69, "grad_norm": 4.804194201095336, "learning_rate": 3.195305226483686e-07, "loss": 0.3047, "step": 63585 }, { "epoch": 2.69, "grad_norm": 4.5671889488205935, "learning_rate": 3.190976149558822e-07, "loss": 0.3186, "step": 63590 }, { "epoch": 2.69, "grad_norm": 4.040845114664043, "learning_rate": 3.186649910527184e-07, "loss": 0.3369, "step": 63595 }, { "epoch": 2.69, "grad_norm": 4.039201313262964, "learning_rate": 3.1823265096510737e-07, "loss": 0.3007, "step": 63600 }, { "epoch": 2.69, "grad_norm": 3.883774306819197, "learning_rate": 3.178005947192603e-07, "loss": 0.3034, "step": 63605 }, { "epoch": 2.69, "grad_norm": 3.800042071543404, "learning_rate": 3.173688223413707e-07, "loss": 0.3014, "step": 63610 }, { "epoch": 2.69, "grad_norm": 4.1273685394802335, "learning_rate": 3.169373338576165e-07, "loss": 0.3154, "step": 63615 }, { "epoch": 2.69, "grad_norm": 4.789935603158744, "learning_rate": 3.165061292941568e-07, "loss": 0.3107, "step": 63620 }, { "epoch": 2.69, "grad_norm": 3.7359632488317276, "learning_rate": 3.1607520867713623e-07, "loss": 0.322, "step": 63625 }, { "epoch": 2.69, "grad_norm": 4.168455923392447, "learning_rate": 3.1564457203267784e-07, "loss": 0.3198, "step": 63630 }, { "epoch": 2.69, "grad_norm": 3.690063748548547, "learning_rate": 3.152142193868907e-07, "loss": 0.298, "step": 63635 }, { "epoch": 2.69, "grad_norm": 4.042027342232877, "learning_rate": 3.1478415076586663e-07, "loss": 0.3277, "step": 63640 }, { "epoch": 2.69, "grad_norm": 3.9779070734197153, "learning_rate": 3.143543661956783e-07, "loss": 0.3189, "step": 63645 }, { "epoch": 2.69, "grad_norm": 3.8850518679923915, "learning_rate": 3.1392486570238303e-07, "loss": 0.3061, "step": 63650 }, { "epoch": 2.69, "grad_norm": 4.69665094964033, "learning_rate": 3.134956493120195e-07, "loss": 0.3263, "step": 63655 }, { "epoch": 2.69, "grad_norm": 3.5573779177008027, "learning_rate": 3.1306671705061073e-07, "loss": 0.2985, "step": 63660 }, { "epoch": 2.69, "grad_norm": 4.113197805901236, "learning_rate": 3.1263806894416047e-07, "loss": 0.3201, "step": 63665 }, { "epoch": 2.69, "grad_norm": 4.17981287054807, "learning_rate": 3.1220970501865675e-07, "loss": 0.3157, "step": 63670 }, { "epoch": 2.69, "grad_norm": 4.263370874042329, "learning_rate": 3.117816253000711e-07, "loss": 0.3044, "step": 63675 }, { "epoch": 2.7, "grad_norm": 3.7341386768023512, "learning_rate": 3.113538298143554e-07, "loss": 0.3103, "step": 63680 }, { "epoch": 2.7, "grad_norm": 3.621293475813652, "learning_rate": 3.109263185874462e-07, "loss": 0.3096, "step": 63685 }, { "epoch": 2.7, "grad_norm": 3.9956075230829926, "learning_rate": 3.1049909164526273e-07, "loss": 0.3104, "step": 63690 }, { "epoch": 2.7, "grad_norm": 4.674613369724306, "learning_rate": 3.1007214901370487e-07, "loss": 0.3124, "step": 63695 }, { "epoch": 2.7, "grad_norm": 3.8471102855492756, "learning_rate": 3.0964549071865856e-07, "loss": 0.2803, "step": 63700 }, { "epoch": 2.7, "grad_norm": 5.558912531651892, "learning_rate": 3.092191167859898e-07, "loss": 0.337, "step": 63705 }, { "epoch": 2.7, "grad_norm": 4.679136639045699, "learning_rate": 3.0879302724155016e-07, "loss": 0.3254, "step": 63710 }, { "epoch": 2.7, "grad_norm": 3.9861287914206542, "learning_rate": 3.083672221111694e-07, "loss": 0.302, "step": 63715 }, { "epoch": 2.7, "grad_norm": 3.8216308714475997, "learning_rate": 3.079417014206648e-07, "loss": 0.305, "step": 63720 }, { "epoch": 2.7, "grad_norm": 4.315489527546183, "learning_rate": 3.075164651958346e-07, "loss": 0.2937, "step": 63725 }, { "epoch": 2.7, "grad_norm": 3.762818044416501, "learning_rate": 3.0709151346245803e-07, "loss": 0.3154, "step": 63730 }, { "epoch": 2.7, "grad_norm": 3.3883082312070987, "learning_rate": 3.0666684624630016e-07, "loss": 0.293, "step": 63735 }, { "epoch": 2.7, "grad_norm": 4.106198318346511, "learning_rate": 3.0624246357310705e-07, "loss": 0.2911, "step": 63740 }, { "epoch": 2.7, "grad_norm": 4.292953244889915, "learning_rate": 3.0581836546860644e-07, "loss": 0.3344, "step": 63745 }, { "epoch": 2.7, "grad_norm": 3.8848897592786416, "learning_rate": 3.0539455195851164e-07, "loss": 0.3049, "step": 63750 }, { "epoch": 2.7, "grad_norm": 4.491949522522433, "learning_rate": 3.0497102306851656e-07, "loss": 0.2986, "step": 63755 }, { "epoch": 2.7, "grad_norm": 3.8630005720322527, "learning_rate": 3.0454777882429953e-07, "loss": 0.3098, "step": 63760 }, { "epoch": 2.7, "grad_norm": 4.057622721833372, "learning_rate": 3.0412481925151893e-07, "loss": 0.3227, "step": 63765 }, { "epoch": 2.7, "grad_norm": 3.8460417925757944, "learning_rate": 3.0370214437581814e-07, "loss": 0.3147, "step": 63770 }, { "epoch": 2.7, "grad_norm": 3.8354884947505252, "learning_rate": 3.0327975422282383e-07, "loss": 0.3095, "step": 63775 }, { "epoch": 2.7, "grad_norm": 4.317436466762637, "learning_rate": 3.028576488181428e-07, "loss": 0.3009, "step": 63780 }, { "epoch": 2.7, "grad_norm": 4.221063406665968, "learning_rate": 3.024358281873663e-07, "loss": 0.3082, "step": 63785 }, { "epoch": 2.7, "grad_norm": 3.6902844225620437, "learning_rate": 3.0201429235606983e-07, "loss": 0.2949, "step": 63790 }, { "epoch": 2.7, "grad_norm": 4.618086436097366, "learning_rate": 3.0159304134980693e-07, "loss": 0.3236, "step": 63795 }, { "epoch": 2.7, "grad_norm": 4.885467391342991, "learning_rate": 3.0117207519411883e-07, "loss": 0.3374, "step": 63800 }, { "epoch": 2.7, "grad_norm": 4.391157750167784, "learning_rate": 3.0075139391452677e-07, "loss": 0.2929, "step": 63805 }, { "epoch": 2.7, "grad_norm": 4.473366924779301, "learning_rate": 3.0033099753653704e-07, "loss": 0.2993, "step": 63810 }, { "epoch": 2.7, "grad_norm": 3.8817423115897958, "learning_rate": 2.999108860856342e-07, "loss": 0.3119, "step": 63815 }, { "epoch": 2.7, "grad_norm": 3.655230148340203, "learning_rate": 2.994910595872896e-07, "loss": 0.308, "step": 63820 }, { "epoch": 2.7, "grad_norm": 3.739944001879175, "learning_rate": 2.990715180669562e-07, "loss": 0.3047, "step": 63825 }, { "epoch": 2.7, "grad_norm": 3.7786235098532286, "learning_rate": 2.9865226155007086e-07, "loss": 0.3041, "step": 63830 }, { "epoch": 2.7, "grad_norm": 3.4775167018495274, "learning_rate": 2.9823329006204936e-07, "loss": 0.3101, "step": 63835 }, { "epoch": 2.7, "grad_norm": 3.915246478684711, "learning_rate": 2.9781460362829417e-07, "loss": 0.3078, "step": 63840 }, { "epoch": 2.7, "grad_norm": 3.965063873760154, "learning_rate": 2.9739620227418995e-07, "loss": 0.3014, "step": 63845 }, { "epoch": 2.7, "grad_norm": 4.291054859735662, "learning_rate": 2.969780860251009e-07, "loss": 0.3085, "step": 63850 }, { "epoch": 2.7, "grad_norm": 3.810278348940488, "learning_rate": 2.965602549063773e-07, "loss": 0.2977, "step": 63855 }, { "epoch": 2.7, "grad_norm": 3.57645031035686, "learning_rate": 2.9614270894335164e-07, "loss": 0.3113, "step": 63860 }, { "epoch": 2.7, "grad_norm": 3.9215543524963117, "learning_rate": 2.957254481613375e-07, "loss": 0.2989, "step": 63865 }, { "epoch": 2.7, "grad_norm": 3.7947664605093596, "learning_rate": 2.95308472585632e-07, "loss": 0.3032, "step": 63870 }, { "epoch": 2.7, "grad_norm": 3.888267428395147, "learning_rate": 2.9489178224151595e-07, "loss": 0.3177, "step": 63875 }, { "epoch": 2.7, "grad_norm": 3.872568078772845, "learning_rate": 2.9447537715425247e-07, "loss": 0.3018, "step": 63880 }, { "epoch": 2.7, "grad_norm": 4.441106636748337, "learning_rate": 2.940592573490858e-07, "loss": 0.3079, "step": 63885 }, { "epoch": 2.7, "grad_norm": 4.2215215301710725, "learning_rate": 2.936434228512447e-07, "loss": 0.3049, "step": 63890 }, { "epoch": 2.7, "grad_norm": 4.267603528270491, "learning_rate": 2.932278736859401e-07, "loss": 0.3027, "step": 63895 }, { "epoch": 2.7, "grad_norm": 4.051962348831266, "learning_rate": 2.9281260987836456e-07, "loss": 0.3294, "step": 63900 }, { "epoch": 2.7, "grad_norm": 3.438990742707771, "learning_rate": 2.923976314536953e-07, "loss": 0.2922, "step": 63905 }, { "epoch": 2.7, "grad_norm": 4.263088562824927, "learning_rate": 2.9198293843709146e-07, "loss": 0.3219, "step": 63910 }, { "epoch": 2.7, "grad_norm": 4.431371288156548, "learning_rate": 2.915685308536936e-07, "loss": 0.2958, "step": 63915 }, { "epoch": 2.71, "grad_norm": 3.875489819547162, "learning_rate": 2.911544087286261e-07, "loss": 0.3217, "step": 63920 }, { "epoch": 2.71, "grad_norm": 3.8687520081945346, "learning_rate": 2.9074057208699714e-07, "loss": 0.3095, "step": 63925 }, { "epoch": 2.71, "grad_norm": 3.772740514612205, "learning_rate": 2.903270209538961e-07, "loss": 0.3387, "step": 63930 }, { "epoch": 2.71, "grad_norm": 3.926257533006478, "learning_rate": 2.8991375535439403e-07, "loss": 0.3297, "step": 63935 }, { "epoch": 2.71, "grad_norm": 4.240385828952048, "learning_rate": 2.895007753135476e-07, "loss": 0.32, "step": 63940 }, { "epoch": 2.71, "grad_norm": 4.230984729912234, "learning_rate": 2.8908808085639396e-07, "loss": 0.3268, "step": 63945 }, { "epoch": 2.71, "grad_norm": 4.21158824567527, "learning_rate": 2.886756720079531e-07, "loss": 0.3102, "step": 63950 }, { "epoch": 2.71, "grad_norm": 3.8929001628293403, "learning_rate": 2.882635487932284e-07, "loss": 0.312, "step": 63955 }, { "epoch": 2.71, "grad_norm": 4.077681018342183, "learning_rate": 2.87851711237207e-07, "loss": 0.2767, "step": 63960 }, { "epoch": 2.71, "grad_norm": 4.0806386262009315, "learning_rate": 2.8744015936485513e-07, "loss": 0.3158, "step": 63965 }, { "epoch": 2.71, "grad_norm": 3.885086719153386, "learning_rate": 2.870288932011256e-07, "loss": 0.3579, "step": 63970 }, { "epoch": 2.71, "grad_norm": 4.149216587724372, "learning_rate": 2.8661791277095176e-07, "loss": 0.2599, "step": 63975 }, { "epoch": 2.71, "grad_norm": 3.56302701011447, "learning_rate": 2.862072180992509e-07, "loss": 0.2955, "step": 63980 }, { "epoch": 2.71, "grad_norm": 3.566227830630732, "learning_rate": 2.8579680921092036e-07, "loss": 0.3004, "step": 63985 }, { "epoch": 2.71, "grad_norm": 12.124329961324188, "learning_rate": 2.8538668613084297e-07, "loss": 0.334, "step": 63990 }, { "epoch": 2.71, "grad_norm": 4.181377837810836, "learning_rate": 2.8497684888388444e-07, "loss": 0.3674, "step": 63995 }, { "epoch": 2.71, "grad_norm": 3.7458388581051283, "learning_rate": 2.8456729749489044e-07, "loss": 0.3172, "step": 64000 }, { "epoch": 2.71, "grad_norm": 4.094997749220291, "learning_rate": 2.841580319886911e-07, "loss": 0.2993, "step": 64005 }, { "epoch": 2.71, "grad_norm": 5.3314382245235725, "learning_rate": 2.837490523900993e-07, "loss": 0.3216, "step": 64010 }, { "epoch": 2.71, "grad_norm": 3.733429516035325, "learning_rate": 2.8334035872391085e-07, "loss": 0.3118, "step": 64015 }, { "epoch": 2.71, "grad_norm": 3.53270812501522, "learning_rate": 2.829319510149031e-07, "loss": 0.315, "step": 64020 }, { "epoch": 2.71, "grad_norm": 4.171464368760084, "learning_rate": 2.825238292878357e-07, "loss": 0.3366, "step": 64025 }, { "epoch": 2.71, "grad_norm": 4.2957495826062075, "learning_rate": 2.821159935674539e-07, "loss": 0.282, "step": 64030 }, { "epoch": 2.71, "grad_norm": 4.749558518378707, "learning_rate": 2.817084438784817e-07, "loss": 0.2771, "step": 64035 }, { "epoch": 2.71, "grad_norm": 4.141913530580697, "learning_rate": 2.8130118024562833e-07, "loss": 0.3244, "step": 64040 }, { "epoch": 2.71, "grad_norm": 3.680250779544887, "learning_rate": 2.808942026935857e-07, "loss": 0.3179, "step": 64045 }, { "epoch": 2.71, "grad_norm": 3.601873442086598, "learning_rate": 2.804875112470268e-07, "loss": 0.3176, "step": 64050 }, { "epoch": 2.71, "grad_norm": 3.8325538786766886, "learning_rate": 2.8008110593060745e-07, "loss": 0.2939, "step": 64055 }, { "epoch": 2.71, "grad_norm": 4.284659842794704, "learning_rate": 2.79674986768968e-07, "loss": 0.3084, "step": 64060 }, { "epoch": 2.71, "grad_norm": 3.835472088122396, "learning_rate": 2.792691537867309e-07, "loss": 0.3154, "step": 64065 }, { "epoch": 2.71, "grad_norm": 4.130691298992677, "learning_rate": 2.788636070084988e-07, "loss": 0.3084, "step": 64070 }, { "epoch": 2.71, "grad_norm": 3.9739039036133477, "learning_rate": 2.7845834645885914e-07, "loss": 0.3288, "step": 64075 }, { "epoch": 2.71, "grad_norm": 3.558133060328146, "learning_rate": 2.780533721623835e-07, "loss": 0.2906, "step": 64080 }, { "epoch": 2.71, "grad_norm": 3.9499472539613554, "learning_rate": 2.776486841436221e-07, "loss": 0.3029, "step": 64085 }, { "epoch": 2.71, "grad_norm": 4.142574396307065, "learning_rate": 2.77244282427111e-07, "loss": 0.2922, "step": 64090 }, { "epoch": 2.71, "grad_norm": 4.312668281480161, "learning_rate": 2.768401670373683e-07, "loss": 0.2938, "step": 64095 }, { "epoch": 2.71, "grad_norm": 4.444615380743153, "learning_rate": 2.7643633799889267e-07, "loss": 0.3045, "step": 64100 }, { "epoch": 2.71, "grad_norm": 4.568110770735827, "learning_rate": 2.760327953361686e-07, "loss": 0.3121, "step": 64105 }, { "epoch": 2.71, "grad_norm": 3.574518144220236, "learning_rate": 2.7562953907366075e-07, "loss": 0.3422, "step": 64110 }, { "epoch": 2.71, "grad_norm": 3.696754118138441, "learning_rate": 2.7522656923581917e-07, "loss": 0.3015, "step": 64115 }, { "epoch": 2.71, "grad_norm": 3.490902359365186, "learning_rate": 2.748238858470725e-07, "loss": 0.2962, "step": 64120 }, { "epoch": 2.71, "grad_norm": 3.9820118415000927, "learning_rate": 2.7442148893183475e-07, "loss": 0.3385, "step": 64125 }, { "epoch": 2.71, "grad_norm": 4.107855796688776, "learning_rate": 2.74019378514504e-07, "loss": 0.3234, "step": 64130 }, { "epoch": 2.71, "grad_norm": 3.5448266934766477, "learning_rate": 2.7361755461945696e-07, "loss": 0.29, "step": 64135 }, { "epoch": 2.71, "grad_norm": 4.27660859517952, "learning_rate": 2.7321601727105463e-07, "loss": 0.357, "step": 64140 }, { "epoch": 2.71, "grad_norm": 3.967608413207996, "learning_rate": 2.728147664936426e-07, "loss": 0.2865, "step": 64145 }, { "epoch": 2.71, "grad_norm": 3.890745686730736, "learning_rate": 2.7241380231154747e-07, "loss": 0.2926, "step": 64150 }, { "epoch": 2.72, "grad_norm": 3.9010254805835896, "learning_rate": 2.7201312474907704e-07, "loss": 0.2956, "step": 64155 }, { "epoch": 2.72, "grad_norm": 4.160936068790499, "learning_rate": 2.716127338305241e-07, "loss": 0.318, "step": 64160 }, { "epoch": 2.72, "grad_norm": 3.7563094490898585, "learning_rate": 2.712126295801637e-07, "loss": 0.2836, "step": 64165 }, { "epoch": 2.72, "grad_norm": 3.989752569168498, "learning_rate": 2.7081281202225194e-07, "loss": 0.296, "step": 64170 }, { "epoch": 2.72, "grad_norm": 3.8126943237969533, "learning_rate": 2.704132811810295e-07, "loss": 0.2916, "step": 64175 }, { "epoch": 2.72, "grad_norm": 4.455004424296066, "learning_rate": 2.700140370807175e-07, "loss": 0.3167, "step": 64180 }, { "epoch": 2.72, "grad_norm": 3.9453780016771365, "learning_rate": 2.6961507974552324e-07, "loss": 0.3314, "step": 64185 }, { "epoch": 2.72, "grad_norm": 3.9898028121243305, "learning_rate": 2.6921640919963187e-07, "loss": 0.323, "step": 64190 }, { "epoch": 2.72, "grad_norm": 3.5783949299413202, "learning_rate": 2.6881802546721515e-07, "loss": 0.3147, "step": 64195 }, { "epoch": 2.72, "grad_norm": 3.9061295852107416, "learning_rate": 2.6841992857242536e-07, "loss": 0.2983, "step": 64200 }, { "epoch": 2.72, "grad_norm": 4.287263467894012, "learning_rate": 2.680221185393977e-07, "loss": 0.2979, "step": 64205 }, { "epoch": 2.72, "grad_norm": 4.571390822266338, "learning_rate": 2.6762459539225016e-07, "loss": 0.294, "step": 64210 }, { "epoch": 2.72, "grad_norm": 4.431756108399277, "learning_rate": 2.67227359155085e-07, "loss": 0.3154, "step": 64215 }, { "epoch": 2.72, "grad_norm": 4.402563928373287, "learning_rate": 2.668304098519831e-07, "loss": 0.3289, "step": 64220 }, { "epoch": 2.72, "grad_norm": 4.038070451843523, "learning_rate": 2.6643374750701166e-07, "loss": 0.2962, "step": 64225 }, { "epoch": 2.72, "grad_norm": 4.3328286436348735, "learning_rate": 2.6603737214421886e-07, "loss": 0.3235, "step": 64230 }, { "epoch": 2.72, "grad_norm": 3.69674049914878, "learning_rate": 2.6564128378763596e-07, "loss": 0.3073, "step": 64235 }, { "epoch": 2.72, "grad_norm": 4.842500347970012, "learning_rate": 2.6524548246127656e-07, "loss": 0.3345, "step": 64240 }, { "epoch": 2.72, "grad_norm": 3.8257964238814477, "learning_rate": 2.6484996818913646e-07, "loss": 0.3273, "step": 64245 }, { "epoch": 2.72, "grad_norm": 4.500263008020389, "learning_rate": 2.6445474099519585e-07, "loss": 0.3208, "step": 64250 }, { "epoch": 2.72, "grad_norm": 3.7222006523872215, "learning_rate": 2.6405980090341454e-07, "loss": 0.3129, "step": 64255 }, { "epoch": 2.72, "grad_norm": 3.9225322574897126, "learning_rate": 2.636651479377378e-07, "loss": 0.3389, "step": 64260 }, { "epoch": 2.72, "grad_norm": 3.722188585215918, "learning_rate": 2.63270782122092e-07, "loss": 0.3019, "step": 64265 }, { "epoch": 2.72, "grad_norm": 3.9322755379967314, "learning_rate": 2.628767034803853e-07, "loss": 0.3186, "step": 64270 }, { "epoch": 2.72, "grad_norm": 3.937987452862827, "learning_rate": 2.6248291203651075e-07, "loss": 0.3139, "step": 64275 }, { "epoch": 2.72, "grad_norm": 4.032752321367245, "learning_rate": 2.620894078143432e-07, "loss": 0.3148, "step": 64280 }, { "epoch": 2.72, "grad_norm": 3.82378609006923, "learning_rate": 2.61696190837738e-07, "loss": 0.3241, "step": 64285 }, { "epoch": 2.72, "grad_norm": 4.259701101212652, "learning_rate": 2.613032611305355e-07, "loss": 0.3158, "step": 64290 }, { "epoch": 2.72, "grad_norm": 4.168396664554637, "learning_rate": 2.609106187165589e-07, "loss": 0.3012, "step": 64295 }, { "epoch": 2.72, "grad_norm": 3.9894103113593054, "learning_rate": 2.6051826361961186e-07, "loss": 0.3032, "step": 64300 }, { "epoch": 2.72, "grad_norm": 5.291686441602706, "learning_rate": 2.6012619586348154e-07, "loss": 0.3263, "step": 64305 }, { "epoch": 2.72, "grad_norm": 3.987501901596429, "learning_rate": 2.597344154719378e-07, "loss": 0.3076, "step": 64310 }, { "epoch": 2.72, "grad_norm": 3.9094691174905223, "learning_rate": 2.5934292246873385e-07, "loss": 0.2997, "step": 64315 }, { "epoch": 2.72, "grad_norm": 4.545985202038435, "learning_rate": 2.589517168776051e-07, "loss": 0.3266, "step": 64320 }, { "epoch": 2.72, "grad_norm": 3.8011610258727857, "learning_rate": 2.585607987222677e-07, "loss": 0.2872, "step": 64325 }, { "epoch": 2.72, "grad_norm": 4.183545442755828, "learning_rate": 2.5817016802642257e-07, "loss": 0.3057, "step": 64330 }, { "epoch": 2.72, "grad_norm": 4.194145374456808, "learning_rate": 2.577798248137536e-07, "loss": 0.2935, "step": 64335 }, { "epoch": 2.72, "grad_norm": 3.755203623845972, "learning_rate": 2.5738976910792466e-07, "loss": 0.333, "step": 64340 }, { "epoch": 2.72, "grad_norm": 3.860562504345379, "learning_rate": 2.5700000093258405e-07, "loss": 0.2935, "step": 64345 }, { "epoch": 2.72, "grad_norm": 3.742879755861836, "learning_rate": 2.566105203113628e-07, "loss": 0.3379, "step": 64350 }, { "epoch": 2.72, "grad_norm": 4.006173990072811, "learning_rate": 2.562213272678732e-07, "loss": 0.3018, "step": 64355 }, { "epoch": 2.72, "grad_norm": 4.726284372634893, "learning_rate": 2.558324218257113e-07, "loss": 0.303, "step": 64360 }, { "epoch": 2.72, "grad_norm": 4.054091024148938, "learning_rate": 2.5544380400845503e-07, "loss": 0.3637, "step": 64365 }, { "epoch": 2.72, "grad_norm": 5.088400788246344, "learning_rate": 2.55055473839666e-07, "loss": 0.3242, "step": 64370 }, { "epoch": 2.72, "grad_norm": 4.12334607574293, "learning_rate": 2.546674313428865e-07, "loss": 0.3186, "step": 64375 }, { "epoch": 2.72, "grad_norm": 3.7972906843988308, "learning_rate": 2.5427967654164275e-07, "loss": 0.2977, "step": 64380 }, { "epoch": 2.72, "grad_norm": 4.112105611123452, "learning_rate": 2.538922094594437e-07, "loss": 0.2937, "step": 64385 }, { "epoch": 2.73, "grad_norm": 3.921388123884773, "learning_rate": 2.5350503011977943e-07, "loss": 0.3287, "step": 64390 }, { "epoch": 2.73, "grad_norm": 3.7008722767529925, "learning_rate": 2.53118138546124e-07, "loss": 0.309, "step": 64395 }, { "epoch": 2.73, "grad_norm": 3.864252584028144, "learning_rate": 2.527315347619341e-07, "loss": 0.3479, "step": 64400 }, { "epoch": 2.73, "grad_norm": 4.52684586239443, "learning_rate": 2.523452187906467e-07, "loss": 0.3011, "step": 64405 }, { "epoch": 2.73, "grad_norm": 3.97939542356285, "learning_rate": 2.519591906556845e-07, "loss": 0.3107, "step": 64410 }, { "epoch": 2.73, "grad_norm": 3.821303620613166, "learning_rate": 2.5157345038045013e-07, "loss": 0.2879, "step": 64415 }, { "epoch": 2.73, "grad_norm": 4.071821119369255, "learning_rate": 2.511879979883314e-07, "loss": 0.3146, "step": 64420 }, { "epoch": 2.73, "grad_norm": 4.123936879652426, "learning_rate": 2.5080283350269574e-07, "loss": 0.3011, "step": 64425 }, { "epoch": 2.73, "grad_norm": 3.4123132135864194, "learning_rate": 2.504179569468951e-07, "loss": 0.3009, "step": 64430 }, { "epoch": 2.73, "grad_norm": 4.218098898980717, "learning_rate": 2.5003336834426406e-07, "loss": 0.3035, "step": 64435 }, { "epoch": 2.73, "grad_norm": 4.420353727305965, "learning_rate": 2.4964906771811794e-07, "loss": 0.3019, "step": 64440 }, { "epoch": 2.73, "grad_norm": 3.6111994363737314, "learning_rate": 2.492650550917558e-07, "loss": 0.2902, "step": 64445 }, { "epoch": 2.73, "grad_norm": 3.756554923424346, "learning_rate": 2.488813304884602e-07, "loss": 0.3225, "step": 64450 }, { "epoch": 2.73, "grad_norm": 3.9324622279898627, "learning_rate": 2.4849789393149416e-07, "loss": 0.2931, "step": 64455 }, { "epoch": 2.73, "grad_norm": 3.8141876463297724, "learning_rate": 2.4811474544410575e-07, "loss": 0.3111, "step": 64460 }, { "epoch": 2.73, "grad_norm": 3.564738202726435, "learning_rate": 2.477318850495225e-07, "loss": 0.3161, "step": 64465 }, { "epoch": 2.73, "grad_norm": 4.202979457692879, "learning_rate": 2.47349312770957e-07, "loss": 0.3103, "step": 64470 }, { "epoch": 2.73, "grad_norm": 4.23549596103264, "learning_rate": 2.4696702863160284e-07, "loss": 0.3237, "step": 64475 }, { "epoch": 2.73, "grad_norm": 3.6315628170521697, "learning_rate": 2.4658503265463764e-07, "loss": 0.3069, "step": 64480 }, { "epoch": 2.73, "grad_norm": 3.9198042788291882, "learning_rate": 2.4620332486321954e-07, "loss": 0.2857, "step": 64485 }, { "epoch": 2.73, "grad_norm": 4.794952269989331, "learning_rate": 2.458219052804922e-07, "loss": 0.3216, "step": 64490 }, { "epoch": 2.73, "grad_norm": 4.056557395123123, "learning_rate": 2.454407739295783e-07, "loss": 0.3018, "step": 64495 }, { "epoch": 2.73, "grad_norm": 3.850756434170652, "learning_rate": 2.4505993083358493e-07, "loss": 0.3389, "step": 64500 }, { "epoch": 2.73, "grad_norm": 3.495598643573532, "learning_rate": 2.4467937601560246e-07, "loss": 0.2858, "step": 64505 }, { "epoch": 2.73, "grad_norm": 3.924171363916104, "learning_rate": 2.4429910949870185e-07, "loss": 0.3061, "step": 64510 }, { "epoch": 2.73, "grad_norm": 3.8522674518873288, "learning_rate": 2.4391913130593745e-07, "loss": 0.316, "step": 64515 }, { "epoch": 2.73, "grad_norm": 4.097685024304629, "learning_rate": 2.435394414603476e-07, "loss": 0.3085, "step": 64520 }, { "epoch": 2.73, "grad_norm": 3.8525635612875386, "learning_rate": 2.4316003998494987e-07, "loss": 0.3005, "step": 64525 }, { "epoch": 2.73, "grad_norm": 3.6120401223088727, "learning_rate": 2.427809269027476e-07, "loss": 0.2933, "step": 64530 }, { "epoch": 2.73, "grad_norm": 3.672660795565602, "learning_rate": 2.424021022367251e-07, "loss": 0.3233, "step": 64535 }, { "epoch": 2.73, "grad_norm": 3.445882246179231, "learning_rate": 2.4202356600984857e-07, "loss": 0.2858, "step": 64540 }, { "epoch": 2.73, "grad_norm": 3.8229755763777513, "learning_rate": 2.4164531824506845e-07, "loss": 0.3087, "step": 64545 }, { "epoch": 2.73, "grad_norm": 4.582452960183164, "learning_rate": 2.412673589653164e-07, "loss": 0.3146, "step": 64550 }, { "epoch": 2.73, "grad_norm": 4.221715476220587, "learning_rate": 2.4088968819350744e-07, "loss": 0.3159, "step": 64555 }, { "epoch": 2.73, "grad_norm": 3.8613121891840128, "learning_rate": 2.405123059525377e-07, "loss": 0.2918, "step": 64560 }, { "epoch": 2.73, "grad_norm": 3.9518270360115912, "learning_rate": 2.401352122652878e-07, "loss": 0.3185, "step": 64565 }, { "epoch": 2.73, "grad_norm": 3.8450482787372677, "learning_rate": 2.397584071546194e-07, "loss": 0.3195, "step": 64570 }, { "epoch": 2.73, "grad_norm": 4.024476699641109, "learning_rate": 2.3938189064337647e-07, "loss": 0.2673, "step": 64575 }, { "epoch": 2.73, "grad_norm": 4.292608082018545, "learning_rate": 2.3900566275438687e-07, "loss": 0.3273, "step": 64580 }, { "epoch": 2.73, "grad_norm": 3.567751739106351, "learning_rate": 2.3862972351046066e-07, "loss": 0.2948, "step": 64585 }, { "epoch": 2.73, "grad_norm": 4.673519355701563, "learning_rate": 2.3825407293438852e-07, "loss": 0.3093, "step": 64590 }, { "epoch": 2.73, "grad_norm": 4.120359961131146, "learning_rate": 2.3787871104894555e-07, "loss": 0.3291, "step": 64595 }, { "epoch": 2.73, "grad_norm": 4.067017270540312, "learning_rate": 2.3750363787688913e-07, "loss": 0.3281, "step": 64600 }, { "epoch": 2.73, "grad_norm": 3.6180085399876423, "learning_rate": 2.3712885344095937e-07, "loss": 0.2988, "step": 64605 }, { "epoch": 2.73, "grad_norm": 4.112850684993914, "learning_rate": 2.36754357763877e-07, "loss": 0.3278, "step": 64610 }, { "epoch": 2.73, "grad_norm": 4.225845852020629, "learning_rate": 2.3638015086834776e-07, "loss": 0.303, "step": 64615 }, { "epoch": 2.73, "grad_norm": 4.22012475300299, "learning_rate": 2.3600623277705847e-07, "loss": 0.3212, "step": 64620 }, { "epoch": 2.74, "grad_norm": 3.5061510264771174, "learning_rate": 2.356326035126788e-07, "loss": 0.3032, "step": 64625 }, { "epoch": 2.74, "grad_norm": 4.036888798018336, "learning_rate": 2.3525926309785952e-07, "loss": 0.2974, "step": 64630 }, { "epoch": 2.74, "grad_norm": 3.637103489756905, "learning_rate": 2.348862115552364e-07, "loss": 0.3076, "step": 64635 }, { "epoch": 2.74, "grad_norm": 3.6595033787501676, "learning_rate": 2.345134489074269e-07, "loss": 0.3082, "step": 64640 }, { "epoch": 2.74, "grad_norm": 4.310421670403091, "learning_rate": 2.341409751770285e-07, "loss": 0.3324, "step": 64645 }, { "epoch": 2.74, "grad_norm": 3.5662237564473487, "learning_rate": 2.3376879038662537e-07, "loss": 0.3265, "step": 64650 }, { "epoch": 2.74, "grad_norm": 4.02649492543004, "learning_rate": 2.3339689455878112e-07, "loss": 0.3254, "step": 64655 }, { "epoch": 2.74, "grad_norm": 3.772013746437683, "learning_rate": 2.3302528771604215e-07, "loss": 0.3189, "step": 64660 }, { "epoch": 2.74, "grad_norm": 4.33927275870982, "learning_rate": 2.3265396988093825e-07, "loss": 0.3043, "step": 64665 }, { "epoch": 2.74, "grad_norm": 3.7312077946729194, "learning_rate": 2.3228294107598194e-07, "loss": 0.3222, "step": 64670 }, { "epoch": 2.74, "grad_norm": 3.7443999843563125, "learning_rate": 2.3191220132366744e-07, "loss": 0.2795, "step": 64675 }, { "epoch": 2.74, "grad_norm": 3.9666098795834506, "learning_rate": 2.3154175064647068e-07, "loss": 0.3107, "step": 64680 }, { "epoch": 2.74, "grad_norm": 4.798754018011551, "learning_rate": 2.3117158906685143e-07, "loss": 0.312, "step": 64685 }, { "epoch": 2.74, "grad_norm": 3.98040172538192, "learning_rate": 2.3080171660725293e-07, "loss": 0.3448, "step": 64690 }, { "epoch": 2.74, "grad_norm": 6.35679047414917, "learning_rate": 2.3043213329009718e-07, "loss": 0.3238, "step": 64695 }, { "epoch": 2.74, "grad_norm": 3.9613800380209945, "learning_rate": 2.300628391377918e-07, "loss": 0.3077, "step": 64700 }, { "epoch": 2.74, "grad_norm": 4.833436524968185, "learning_rate": 2.2969383417272672e-07, "loss": 0.3556, "step": 64705 }, { "epoch": 2.74, "grad_norm": 3.7819657438978176, "learning_rate": 2.293251184172729e-07, "loss": 0.3088, "step": 64710 }, { "epoch": 2.74, "grad_norm": 4.70056909962587, "learning_rate": 2.2895669189378466e-07, "loss": 0.3223, "step": 64715 }, { "epoch": 2.74, "grad_norm": 4.172014618854703, "learning_rate": 2.2858855462459805e-07, "loss": 0.2963, "step": 64720 }, { "epoch": 2.74, "grad_norm": 3.601684356983415, "learning_rate": 2.282207066320341e-07, "loss": 0.2841, "step": 64725 }, { "epoch": 2.74, "grad_norm": 3.8215403593735107, "learning_rate": 2.278531479383922e-07, "loss": 0.333, "step": 64730 }, { "epoch": 2.74, "grad_norm": 3.8229177013126985, "learning_rate": 2.2748587856595672e-07, "loss": 0.2964, "step": 64735 }, { "epoch": 2.74, "grad_norm": 3.925452980140284, "learning_rate": 2.2711889853699542e-07, "loss": 0.2797, "step": 64740 }, { "epoch": 2.74, "grad_norm": 3.5425670316783058, "learning_rate": 2.2675220787375551e-07, "loss": 0.2958, "step": 64745 }, { "epoch": 2.74, "grad_norm": 4.876709846869579, "learning_rate": 2.263858065984692e-07, "loss": 0.3042, "step": 64750 }, { "epoch": 2.74, "grad_norm": 3.8287541255722006, "learning_rate": 2.2601969473335094e-07, "loss": 0.3377, "step": 64755 }, { "epoch": 2.74, "grad_norm": 3.6659429320113706, "learning_rate": 2.2565387230059577e-07, "loss": 0.329, "step": 64760 }, { "epoch": 2.74, "grad_norm": 4.085625882507761, "learning_rate": 2.2528833932238313e-07, "loss": 0.3334, "step": 64765 }, { "epoch": 2.74, "grad_norm": 3.582345753272541, "learning_rate": 2.2492309582087425e-07, "loss": 0.3003, "step": 64770 }, { "epoch": 2.74, "grad_norm": 3.9866021643814826, "learning_rate": 2.24558141818213e-07, "loss": 0.2933, "step": 64775 }, { "epoch": 2.74, "grad_norm": 4.087044199785954, "learning_rate": 2.241934773365251e-07, "loss": 0.2991, "step": 64780 }, { "epoch": 2.74, "grad_norm": 4.16247034728151, "learning_rate": 2.2382910239791833e-07, "loss": 0.2895, "step": 64785 }, { "epoch": 2.74, "grad_norm": 3.705156902317562, "learning_rate": 2.2346501702448452e-07, "loss": 0.2844, "step": 64790 }, { "epoch": 2.74, "grad_norm": 3.584680425294526, "learning_rate": 2.231012212382977e-07, "loss": 0.3188, "step": 64795 }, { "epoch": 2.74, "grad_norm": 4.59399050087571, "learning_rate": 2.227377150614124e-07, "loss": 0.34, "step": 64800 }, { "epoch": 2.74, "grad_norm": 3.806615009214411, "learning_rate": 2.223744985158671e-07, "loss": 0.3034, "step": 64805 }, { "epoch": 2.74, "grad_norm": 3.8082261288004027, "learning_rate": 2.2201157162368425e-07, "loss": 0.3048, "step": 64810 }, { "epoch": 2.74, "grad_norm": 4.757452731394143, "learning_rate": 2.2164893440686508e-07, "loss": 0.3348, "step": 64815 }, { "epoch": 2.74, "grad_norm": 4.405462319453972, "learning_rate": 2.2128658688739536e-07, "loss": 0.3249, "step": 64820 }, { "epoch": 2.74, "grad_norm": 4.368184522266987, "learning_rate": 2.209245290872447e-07, "loss": 0.35, "step": 64825 }, { "epoch": 2.74, "grad_norm": 4.1155682278962695, "learning_rate": 2.2056276102836228e-07, "loss": 0.3342, "step": 64830 }, { "epoch": 2.74, "grad_norm": 4.193755988449308, "learning_rate": 2.2020128273268106e-07, "loss": 0.3227, "step": 64835 }, { "epoch": 2.74, "grad_norm": 3.597634350879257, "learning_rate": 2.1984009422211738e-07, "loss": 0.2899, "step": 64840 }, { "epoch": 2.74, "grad_norm": 5.070330020651896, "learning_rate": 2.1947919551856767e-07, "loss": 0.3221, "step": 64845 }, { "epoch": 2.74, "grad_norm": 3.74117832970566, "learning_rate": 2.1911858664391272e-07, "loss": 0.3093, "step": 64850 }, { "epoch": 2.74, "grad_norm": 3.727513319194406, "learning_rate": 2.1875826762001562e-07, "loss": 0.3076, "step": 64855 }, { "epoch": 2.74, "grad_norm": 4.486668113847009, "learning_rate": 2.1839823846872165e-07, "loss": 0.3041, "step": 64860 }, { "epoch": 2.75, "grad_norm": 3.466974263282053, "learning_rate": 2.1803849921185728e-07, "loss": 0.3075, "step": 64865 }, { "epoch": 2.75, "grad_norm": 3.7686271749194256, "learning_rate": 2.1767904987123278e-07, "loss": 0.2973, "step": 64870 }, { "epoch": 2.75, "grad_norm": 4.333694984162197, "learning_rate": 2.1731989046864132e-07, "loss": 0.2977, "step": 64875 }, { "epoch": 2.75, "grad_norm": 3.9172145269028964, "learning_rate": 2.1696102102585658e-07, "loss": 0.3025, "step": 64880 }, { "epoch": 2.75, "grad_norm": 3.8777319184268446, "learning_rate": 2.1660244156463617e-07, "loss": 0.3135, "step": 64885 }, { "epoch": 2.75, "grad_norm": 3.7940391432218656, "learning_rate": 2.1624415210672044e-07, "loss": 0.2825, "step": 64890 }, { "epoch": 2.75, "grad_norm": 4.127851168880768, "learning_rate": 2.158861526738304e-07, "loss": 0.3014, "step": 64895 }, { "epoch": 2.75, "grad_norm": 3.819128409808455, "learning_rate": 2.155284432876703e-07, "loss": 0.2963, "step": 64900 }, { "epoch": 2.75, "grad_norm": 3.8351265349169963, "learning_rate": 2.1517102396992784e-07, "loss": 0.296, "step": 64905 }, { "epoch": 2.75, "grad_norm": 4.715909265681781, "learning_rate": 2.1481389474227233e-07, "loss": 0.2951, "step": 64910 }, { "epoch": 2.75, "grad_norm": 3.8858493772977396, "learning_rate": 2.1445705562635478e-07, "loss": 0.3117, "step": 64915 }, { "epoch": 2.75, "grad_norm": 3.8750300151212236, "learning_rate": 2.141005066438101e-07, "loss": 0.3129, "step": 64920 }, { "epoch": 2.75, "grad_norm": 3.95500372894167, "learning_rate": 2.1374424781625434e-07, "loss": 0.3237, "step": 64925 }, { "epoch": 2.75, "grad_norm": 4.142952344537536, "learning_rate": 2.1338827916528637e-07, "loss": 0.2912, "step": 64930 }, { "epoch": 2.75, "grad_norm": 3.6888942906917026, "learning_rate": 2.1303260071248722e-07, "loss": 0.3043, "step": 64935 }, { "epoch": 2.75, "grad_norm": 4.243586291605428, "learning_rate": 2.1267721247942186e-07, "loss": 0.3156, "step": 64940 }, { "epoch": 2.75, "grad_norm": 3.8653251124915475, "learning_rate": 2.1232211448763528e-07, "loss": 0.3046, "step": 64945 }, { "epoch": 2.75, "grad_norm": 4.200824789281138, "learning_rate": 2.1196730675865584e-07, "loss": 0.3299, "step": 64950 }, { "epoch": 2.75, "grad_norm": 4.193963979984511, "learning_rate": 2.116127893139952e-07, "loss": 0.3008, "step": 64955 }, { "epoch": 2.75, "grad_norm": 3.9576133520488224, "learning_rate": 2.1125856217514672e-07, "loss": 0.3116, "step": 64960 }, { "epoch": 2.75, "grad_norm": 4.754620424217938, "learning_rate": 2.1090462536358546e-07, "loss": 0.3351, "step": 64965 }, { "epoch": 2.75, "grad_norm": 3.912268731083, "learning_rate": 2.1055097890077038e-07, "loss": 0.2941, "step": 64970 }, { "epoch": 2.75, "grad_norm": 3.9356487192506915, "learning_rate": 2.1019762280814148e-07, "loss": 0.3755, "step": 64975 }, { "epoch": 2.75, "grad_norm": 3.8753060000683717, "learning_rate": 2.0984455710712226e-07, "loss": 0.2954, "step": 64980 }, { "epoch": 2.75, "grad_norm": 3.9960872331101203, "learning_rate": 2.0949178181911724e-07, "loss": 0.3063, "step": 64985 }, { "epoch": 2.75, "grad_norm": 3.514092569433876, "learning_rate": 2.0913929696551427e-07, "loss": 0.2872, "step": 64990 }, { "epoch": 2.75, "grad_norm": 4.106368768150705, "learning_rate": 2.0878710256768463e-07, "loss": 0.2855, "step": 64995 }, { "epoch": 2.75, "grad_norm": 3.8336283186113413, "learning_rate": 2.084351986469796e-07, "loss": 0.2803, "step": 65000 }, { "epoch": 2.75, "grad_norm": 4.489162892944125, "learning_rate": 2.0808358522473425e-07, "loss": 0.3688, "step": 65005 }, { "epoch": 2.75, "grad_norm": 4.899764268480078, "learning_rate": 2.0773226232226662e-07, "loss": 0.3455, "step": 65010 }, { "epoch": 2.75, "grad_norm": 3.7446596683305065, "learning_rate": 2.073812299608752e-07, "loss": 0.2892, "step": 65015 }, { "epoch": 2.75, "grad_norm": 4.278141365472454, "learning_rate": 2.070304881618429e-07, "loss": 0.3179, "step": 65020 }, { "epoch": 2.75, "grad_norm": 4.886493087314849, "learning_rate": 2.066800369464339e-07, "loss": 0.3118, "step": 65025 }, { "epoch": 2.75, "grad_norm": 3.872922585901304, "learning_rate": 2.0632987633589562e-07, "loss": 0.3075, "step": 65030 }, { "epoch": 2.75, "grad_norm": 3.8515226226935613, "learning_rate": 2.059800063514561e-07, "loss": 0.3153, "step": 65035 }, { "epoch": 2.75, "grad_norm": 3.768941911392484, "learning_rate": 2.0563042701432723e-07, "loss": 0.2825, "step": 65040 }, { "epoch": 2.75, "grad_norm": 4.021608317409099, "learning_rate": 2.052811383457043e-07, "loss": 0.3282, "step": 65045 }, { "epoch": 2.75, "grad_norm": 3.8046871340060346, "learning_rate": 2.0493214036676201e-07, "loss": 0.2868, "step": 65050 }, { "epoch": 2.75, "grad_norm": 3.9487338933842646, "learning_rate": 2.0458343309865957e-07, "loss": 0.3142, "step": 65055 }, { "epoch": 2.75, "grad_norm": 3.873583777965302, "learning_rate": 2.0423501656253897e-07, "loss": 0.3093, "step": 65060 }, { "epoch": 2.75, "grad_norm": 3.9082790773882095, "learning_rate": 2.0388689077952217e-07, "loss": 0.3187, "step": 65065 }, { "epoch": 2.75, "grad_norm": 3.955452043134012, "learning_rate": 2.035390557707162e-07, "loss": 0.3173, "step": 65070 }, { "epoch": 2.75, "grad_norm": 3.964901306990434, "learning_rate": 2.0319151155720806e-07, "loss": 0.3058, "step": 65075 }, { "epoch": 2.75, "grad_norm": 5.269240709116969, "learning_rate": 2.0284425816007035e-07, "loss": 0.3127, "step": 65080 }, { "epoch": 2.75, "grad_norm": 3.8676326939416543, "learning_rate": 2.0249729560035403e-07, "loss": 0.3114, "step": 65085 }, { "epoch": 2.75, "grad_norm": 3.698020681983677, "learning_rate": 2.02150623899095e-07, "loss": 0.3054, "step": 65090 }, { "epoch": 2.75, "grad_norm": 4.4125433885599525, "learning_rate": 2.0180424307731207e-07, "loss": 0.3192, "step": 65095 }, { "epoch": 2.76, "grad_norm": 3.883717733940784, "learning_rate": 2.0145815315600335e-07, "loss": 0.3104, "step": 65100 }, { "epoch": 2.76, "grad_norm": 3.842755776682679, "learning_rate": 2.0111235415615327e-07, "loss": 0.2834, "step": 65105 }, { "epoch": 2.76, "grad_norm": 3.7927718610114733, "learning_rate": 2.0076684609872443e-07, "loss": 0.3012, "step": 65110 }, { "epoch": 2.76, "grad_norm": 4.4450468139065, "learning_rate": 2.0042162900466622e-07, "loss": 0.3263, "step": 65115 }, { "epoch": 2.76, "grad_norm": 4.245525699240544, "learning_rate": 2.0007670289490577e-07, "loss": 0.2897, "step": 65120 }, { "epoch": 2.76, "grad_norm": 4.032783752746433, "learning_rate": 1.9973206779035692e-07, "loss": 0.3081, "step": 65125 }, { "epoch": 2.76, "grad_norm": 3.9465208964687735, "learning_rate": 1.993877237119135e-07, "loss": 0.3082, "step": 65130 }, { "epoch": 2.76, "grad_norm": 4.165506205537388, "learning_rate": 1.9904367068045048e-07, "loss": 0.3035, "step": 65135 }, { "epoch": 2.76, "grad_norm": 3.718943241187539, "learning_rate": 1.9869990871682898e-07, "loss": 0.3083, "step": 65140 }, { "epoch": 2.76, "grad_norm": 3.9493668626786245, "learning_rate": 1.9835643784188953e-07, "loss": 0.2886, "step": 65145 }, { "epoch": 2.76, "grad_norm": 4.065892744562885, "learning_rate": 1.9801325807645489e-07, "loss": 0.3044, "step": 65150 }, { "epoch": 2.76, "grad_norm": 4.027141162226871, "learning_rate": 1.9767036944133178e-07, "loss": 0.3158, "step": 65155 }, { "epoch": 2.76, "grad_norm": 4.056519483125561, "learning_rate": 1.97327771957308e-07, "loss": 0.3021, "step": 65160 }, { "epoch": 2.76, "grad_norm": 3.8679624695620722, "learning_rate": 1.9698546564515586e-07, "loss": 0.323, "step": 65165 }, { "epoch": 2.76, "grad_norm": 3.848463614312208, "learning_rate": 1.966434505256265e-07, "loss": 0.2999, "step": 65170 }, { "epoch": 2.76, "grad_norm": 3.9796206759044996, "learning_rate": 1.9630172661945612e-07, "loss": 0.3077, "step": 65175 }, { "epoch": 2.76, "grad_norm": 4.05645923036871, "learning_rate": 1.959602939473626e-07, "loss": 0.2726, "step": 65180 }, { "epoch": 2.76, "grad_norm": 3.719844561251929, "learning_rate": 1.9561915253004494e-07, "loss": 0.3176, "step": 65185 }, { "epoch": 2.76, "grad_norm": 3.6207441044336504, "learning_rate": 1.9527830238818656e-07, "loss": 0.3328, "step": 65190 }, { "epoch": 2.76, "grad_norm": 3.9061851623110293, "learning_rate": 1.9493774354245265e-07, "loss": 0.3066, "step": 65195 }, { "epoch": 2.76, "grad_norm": 4.3015199275416025, "learning_rate": 1.9459747601348888e-07, "loss": 0.3036, "step": 65200 }, { "epoch": 2.76, "grad_norm": 4.404087921835684, "learning_rate": 1.9425749982192488e-07, "loss": 0.2907, "step": 65205 }, { "epoch": 2.76, "grad_norm": 4.226711822729783, "learning_rate": 1.939178149883736e-07, "loss": 0.316, "step": 65210 }, { "epoch": 2.76, "grad_norm": 4.287596681112214, "learning_rate": 1.9357842153342853e-07, "loss": 0.2958, "step": 65215 }, { "epoch": 2.76, "grad_norm": 4.002288003678812, "learning_rate": 1.932393194776655e-07, "loss": 0.2882, "step": 65220 }, { "epoch": 2.76, "grad_norm": 3.8234044516707755, "learning_rate": 1.9290050884164413e-07, "loss": 0.2955, "step": 65225 }, { "epoch": 2.76, "grad_norm": 3.7499957524221186, "learning_rate": 1.9256198964590523e-07, "loss": 0.2914, "step": 65230 }, { "epoch": 2.76, "grad_norm": 5.948283643453769, "learning_rate": 1.922237619109718e-07, "loss": 0.3035, "step": 65235 }, { "epoch": 2.76, "grad_norm": 5.329173549498761, "learning_rate": 1.918858256573497e-07, "loss": 0.3262, "step": 65240 }, { "epoch": 2.76, "grad_norm": 3.921624831727337, "learning_rate": 1.9154818090552807e-07, "loss": 0.3157, "step": 65245 }, { "epoch": 2.76, "grad_norm": 3.98626161992799, "learning_rate": 1.9121082767597608e-07, "loss": 0.32, "step": 65250 }, { "epoch": 2.76, "grad_norm": 3.811663425539556, "learning_rate": 1.9087376598914632e-07, "loss": 0.3005, "step": 65255 }, { "epoch": 2.76, "grad_norm": 3.8054003858601866, "learning_rate": 1.905369958654746e-07, "loss": 0.3029, "step": 65260 }, { "epoch": 2.76, "grad_norm": 3.79523018656876, "learning_rate": 1.9020051732537904e-07, "loss": 0.322, "step": 65265 }, { "epoch": 2.76, "grad_norm": 3.9995917920528257, "learning_rate": 1.8986433038925777e-07, "loss": 0.3022, "step": 65270 }, { "epoch": 2.76, "grad_norm": 4.29626271696926, "learning_rate": 1.8952843507749287e-07, "loss": 0.311, "step": 65275 }, { "epoch": 2.76, "grad_norm": 4.409596273534369, "learning_rate": 1.8919283141044963e-07, "loss": 0.305, "step": 65280 }, { "epoch": 2.76, "grad_norm": 3.6632270873831843, "learning_rate": 1.8885751940847464e-07, "loss": 0.3001, "step": 65285 }, { "epoch": 2.76, "grad_norm": 4.954162200154615, "learning_rate": 1.8852249909189602e-07, "loss": 0.2908, "step": 65290 }, { "epoch": 2.76, "grad_norm": 3.9853858249401344, "learning_rate": 1.8818777048102588e-07, "loss": 0.2976, "step": 65295 }, { "epoch": 2.76, "grad_norm": 3.5511247496283236, "learning_rate": 1.8785333359615799e-07, "loss": 0.2792, "step": 65300 }, { "epoch": 2.76, "grad_norm": 3.80671534926733, "learning_rate": 1.8751918845756722e-07, "loss": 0.3221, "step": 65305 }, { "epoch": 2.76, "grad_norm": 4.060698877158665, "learning_rate": 1.8718533508551239e-07, "loss": 0.3086, "step": 65310 }, { "epoch": 2.76, "grad_norm": 4.121792242192534, "learning_rate": 1.8685177350023453e-07, "loss": 0.3259, "step": 65315 }, { "epoch": 2.76, "grad_norm": 4.1658854567631005, "learning_rate": 1.865185037219558e-07, "loss": 0.307, "step": 65320 }, { "epoch": 2.76, "grad_norm": 3.5710311581931364, "learning_rate": 1.8618552577088166e-07, "loss": 0.3151, "step": 65325 }, { "epoch": 2.76, "grad_norm": 4.057308731654288, "learning_rate": 1.8585283966719937e-07, "loss": 0.306, "step": 65330 }, { "epoch": 2.77, "grad_norm": 5.292362732722109, "learning_rate": 1.8552044543107938e-07, "loss": 0.318, "step": 65335 }, { "epoch": 2.77, "grad_norm": 4.00932084644621, "learning_rate": 1.851883430826723e-07, "loss": 0.3123, "step": 65340 }, { "epoch": 2.77, "grad_norm": 3.7318512459579996, "learning_rate": 1.8485653264211423e-07, "loss": 0.2803, "step": 65345 }, { "epoch": 2.77, "grad_norm": 3.9288826764931155, "learning_rate": 1.8452501412952128e-07, "loss": 0.3091, "step": 65350 }, { "epoch": 2.77, "grad_norm": 3.621948399798097, "learning_rate": 1.8419378756499184e-07, "loss": 0.3066, "step": 65355 }, { "epoch": 2.77, "grad_norm": 3.766212039988786, "learning_rate": 1.8386285296860762e-07, "loss": 0.2955, "step": 65360 }, { "epoch": 2.77, "grad_norm": 3.8817224663631773, "learning_rate": 1.8353221036043256e-07, "loss": 0.3032, "step": 65365 }, { "epoch": 2.77, "grad_norm": 3.8436266193523974, "learning_rate": 1.8320185976051175e-07, "loss": 0.3068, "step": 65370 }, { "epoch": 2.77, "grad_norm": 3.8610410563903272, "learning_rate": 1.8287180118887415e-07, "loss": 0.2979, "step": 65375 }, { "epoch": 2.77, "grad_norm": 3.917937626245474, "learning_rate": 1.825420346655299e-07, "loss": 0.3072, "step": 65380 }, { "epoch": 2.77, "grad_norm": 3.9542310461385424, "learning_rate": 1.8221256021047184e-07, "loss": 0.3187, "step": 65385 }, { "epoch": 2.77, "grad_norm": 4.5735932898633, "learning_rate": 1.818833778436746e-07, "loss": 0.297, "step": 65390 }, { "epoch": 2.77, "grad_norm": 3.7262538724710335, "learning_rate": 1.8155448758509664e-07, "loss": 0.2938, "step": 65395 }, { "epoch": 2.77, "grad_norm": 3.9927699038225, "learning_rate": 1.8122588945467644e-07, "loss": 0.3275, "step": 65400 }, { "epoch": 2.77, "grad_norm": 4.153100333832798, "learning_rate": 1.8089758347233644e-07, "loss": 0.3179, "step": 65405 }, { "epoch": 2.77, "grad_norm": 3.60220193009997, "learning_rate": 1.8056956965798068e-07, "loss": 0.2853, "step": 65410 }, { "epoch": 2.77, "grad_norm": 3.8211872084671077, "learning_rate": 1.802418480314966e-07, "loss": 0.3179, "step": 65415 }, { "epoch": 2.77, "grad_norm": 4.17942259917213, "learning_rate": 1.7991441861275161e-07, "loss": 0.3178, "step": 65420 }, { "epoch": 2.77, "grad_norm": 3.780293457962462, "learning_rate": 1.795872814215971e-07, "loss": 0.3106, "step": 65425 }, { "epoch": 2.77, "grad_norm": 3.8410367166036727, "learning_rate": 1.7926043647786716e-07, "loss": 0.3299, "step": 65430 }, { "epoch": 2.77, "grad_norm": 3.755185057538734, "learning_rate": 1.7893388380137766e-07, "loss": 0.3006, "step": 65435 }, { "epoch": 2.77, "grad_norm": 4.202531761377634, "learning_rate": 1.7860762341192495e-07, "loss": 0.3002, "step": 65440 }, { "epoch": 2.77, "grad_norm": 3.7466652284014947, "learning_rate": 1.7828165532928988e-07, "loss": 0.3022, "step": 65445 }, { "epoch": 2.77, "grad_norm": 4.282603630565211, "learning_rate": 1.7795597957323607e-07, "loss": 0.3083, "step": 65450 }, { "epoch": 2.77, "grad_norm": 3.856652818434062, "learning_rate": 1.7763059616350664e-07, "loss": 0.3113, "step": 65455 }, { "epoch": 2.77, "grad_norm": 4.520296979901218, "learning_rate": 1.773055051198297e-07, "loss": 0.3241, "step": 65460 }, { "epoch": 2.77, "grad_norm": 3.876866998330857, "learning_rate": 1.769807064619139e-07, "loss": 0.3186, "step": 65465 }, { "epoch": 2.77, "grad_norm": 3.9258771650668027, "learning_rate": 1.766562002094524e-07, "loss": 0.3082, "step": 65470 }, { "epoch": 2.77, "grad_norm": 3.8302538695348027, "learning_rate": 1.7633198638211668e-07, "loss": 0.3311, "step": 65475 }, { "epoch": 2.77, "grad_norm": 4.424954356570667, "learning_rate": 1.7600806499956435e-07, "loss": 0.315, "step": 65480 }, { "epoch": 2.77, "grad_norm": 4.050334123571713, "learning_rate": 1.756844360814336e-07, "loss": 0.2958, "step": 65485 }, { "epoch": 2.77, "grad_norm": 4.226016854547005, "learning_rate": 1.7536109964734482e-07, "loss": 0.3016, "step": 65490 }, { "epoch": 2.77, "grad_norm": 3.831013533798978, "learning_rate": 1.7503805571690123e-07, "loss": 0.3108, "step": 65495 }, { "epoch": 2.77, "grad_norm": 3.959873930872243, "learning_rate": 1.747153043096883e-07, "loss": 0.3144, "step": 65500 }, { "epoch": 2.77, "grad_norm": 3.860851315506426, "learning_rate": 1.7439284544527314e-07, "loss": 0.2884, "step": 65505 }, { "epoch": 2.77, "grad_norm": 3.7337144701626515, "learning_rate": 1.7407067914320452e-07, "loss": 0.3008, "step": 65510 }, { "epoch": 2.77, "grad_norm": 4.073854493257005, "learning_rate": 1.7374880542301576e-07, "loss": 0.293, "step": 65515 }, { "epoch": 2.77, "grad_norm": 3.7936794615524163, "learning_rate": 1.7342722430422176e-07, "loss": 0.3107, "step": 65520 }, { "epoch": 2.77, "grad_norm": 3.7129089803122053, "learning_rate": 1.7310593580631696e-07, "loss": 0.2924, "step": 65525 }, { "epoch": 2.77, "grad_norm": 3.775658664583829, "learning_rate": 1.727849399487813e-07, "loss": 0.3031, "step": 65530 }, { "epoch": 2.77, "grad_norm": 4.013941907497206, "learning_rate": 1.7246423675107649e-07, "loss": 0.3228, "step": 65535 }, { "epoch": 2.77, "grad_norm": 4.342091431439759, "learning_rate": 1.7214382623264415e-07, "loss": 0.3245, "step": 65540 }, { "epoch": 2.77, "grad_norm": 3.9285576854575237, "learning_rate": 1.7182370841291095e-07, "loss": 0.3272, "step": 65545 }, { "epoch": 2.77, "grad_norm": 3.919250390237749, "learning_rate": 1.7150388331128476e-07, "loss": 0.3042, "step": 65550 }, { "epoch": 2.77, "grad_norm": 4.135570087473103, "learning_rate": 1.7118435094715447e-07, "loss": 0.3125, "step": 65555 }, { "epoch": 2.77, "grad_norm": 4.172267531330611, "learning_rate": 1.7086511133989404e-07, "loss": 0.3081, "step": 65560 }, { "epoch": 2.77, "grad_norm": 3.9805558251982527, "learning_rate": 1.7054616450885685e-07, "loss": 0.3088, "step": 65565 }, { "epoch": 2.78, "grad_norm": 3.5592148820168257, "learning_rate": 1.7022751047338027e-07, "loss": 0.3077, "step": 65570 }, { "epoch": 2.78, "grad_norm": 3.9655760415055767, "learning_rate": 1.6990914925278325e-07, "loss": 0.3018, "step": 65575 }, { "epoch": 2.78, "grad_norm": 4.081779248704726, "learning_rate": 1.6959108086636645e-07, "loss": 0.3121, "step": 65580 }, { "epoch": 2.78, "grad_norm": 3.8991638152931314, "learning_rate": 1.6927330533341502e-07, "loss": 0.3145, "step": 65585 }, { "epoch": 2.78, "grad_norm": 3.8600334961310243, "learning_rate": 1.6895582267319355e-07, "loss": 0.286, "step": 65590 }, { "epoch": 2.78, "grad_norm": 4.563222013298887, "learning_rate": 1.6863863290494943e-07, "loss": 0.318, "step": 65595 }, { "epoch": 2.78, "grad_norm": 4.456289764630795, "learning_rate": 1.683217360479139e-07, "loss": 0.3167, "step": 65600 }, { "epoch": 2.78, "grad_norm": 4.2320522502159115, "learning_rate": 1.680051321213e-07, "loss": 0.3386, "step": 65605 }, { "epoch": 2.78, "grad_norm": 4.066092956413713, "learning_rate": 1.676888211443012e-07, "loss": 0.3123, "step": 65610 }, { "epoch": 2.78, "grad_norm": 4.081743828024168, "learning_rate": 1.673728031360955e-07, "loss": 0.3325, "step": 65615 }, { "epoch": 2.78, "grad_norm": 4.67229114479859, "learning_rate": 1.6705707811584203e-07, "loss": 0.3187, "step": 65620 }, { "epoch": 2.78, "grad_norm": 3.7243555863784104, "learning_rate": 1.6674164610268108e-07, "loss": 0.2918, "step": 65625 }, { "epoch": 2.78, "grad_norm": 3.731336288815053, "learning_rate": 1.664265071157378e-07, "loss": 0.2963, "step": 65630 }, { "epoch": 2.78, "grad_norm": 4.003264025463793, "learning_rate": 1.661116611741176e-07, "loss": 0.3506, "step": 65635 }, { "epoch": 2.78, "grad_norm": 4.009768694030956, "learning_rate": 1.6579710829690898e-07, "loss": 0.3222, "step": 65640 }, { "epoch": 2.78, "grad_norm": 3.827382315581644, "learning_rate": 1.6548284850318175e-07, "loss": 0.2984, "step": 65645 }, { "epoch": 2.78, "grad_norm": 3.873600253682237, "learning_rate": 1.6516888181198898e-07, "loss": 0.2863, "step": 65650 }, { "epoch": 2.78, "grad_norm": 3.9141922479332827, "learning_rate": 1.6485520824236544e-07, "loss": 0.3264, "step": 65655 }, { "epoch": 2.78, "grad_norm": 4.064819200594836, "learning_rate": 1.6454182781332818e-07, "loss": 0.3382, "step": 65660 }, { "epoch": 2.78, "grad_norm": 3.930905194379195, "learning_rate": 1.6422874054387638e-07, "loss": 0.2981, "step": 65665 }, { "epoch": 2.78, "grad_norm": 3.859641525842305, "learning_rate": 1.639159464529927e-07, "loss": 0.2835, "step": 65670 }, { "epoch": 2.78, "grad_norm": 3.8706396994931938, "learning_rate": 1.6360344555963913e-07, "loss": 0.3163, "step": 65675 }, { "epoch": 2.78, "grad_norm": 3.867007564868154, "learning_rate": 1.632912378827628e-07, "loss": 0.3017, "step": 65680 }, { "epoch": 2.78, "grad_norm": 3.8487713649721584, "learning_rate": 1.6297932344129186e-07, "loss": 0.3177, "step": 65685 }, { "epoch": 2.78, "grad_norm": 4.153643470848459, "learning_rate": 1.6266770225413674e-07, "loss": 0.3082, "step": 65690 }, { "epoch": 2.78, "grad_norm": 4.211692337238646, "learning_rate": 1.623563743401896e-07, "loss": 0.2948, "step": 65695 }, { "epoch": 2.78, "grad_norm": 3.618626290524091, "learning_rate": 1.6204533971832582e-07, "loss": 0.2929, "step": 65700 }, { "epoch": 2.78, "grad_norm": 3.9566671135323013, "learning_rate": 1.6173459840740258e-07, "loss": 0.3221, "step": 65705 }, { "epoch": 2.78, "grad_norm": 3.827249875879951, "learning_rate": 1.6142415042625926e-07, "loss": 0.3045, "step": 65710 }, { "epoch": 2.78, "grad_norm": 3.9594287758759457, "learning_rate": 1.6111399579371633e-07, "loss": 0.3018, "step": 65715 }, { "epoch": 2.78, "grad_norm": 4.050963687525933, "learning_rate": 1.608041345285799e-07, "loss": 0.3087, "step": 65720 }, { "epoch": 2.78, "grad_norm": 3.8859261335709756, "learning_rate": 1.6049456664963327e-07, "loss": 0.2882, "step": 65725 }, { "epoch": 2.78, "grad_norm": 3.9744083858635153, "learning_rate": 1.6018529217564584e-07, "loss": 0.3123, "step": 65730 }, { "epoch": 2.78, "grad_norm": 3.9780436089522957, "learning_rate": 1.5987631112536928e-07, "loss": 0.2863, "step": 65735 }, { "epoch": 2.78, "grad_norm": 3.979728497719229, "learning_rate": 1.5956762351753364e-07, "loss": 0.3313, "step": 65740 }, { "epoch": 2.78, "grad_norm": 3.9462244109348212, "learning_rate": 1.5925922937085503e-07, "loss": 0.3087, "step": 65745 }, { "epoch": 2.78, "grad_norm": 3.775861995879866, "learning_rate": 1.5895112870403072e-07, "loss": 0.2796, "step": 65750 }, { "epoch": 2.78, "grad_norm": 4.288523663108847, "learning_rate": 1.586433215357408e-07, "loss": 0.2976, "step": 65755 }, { "epoch": 2.78, "grad_norm": 3.7813724929829045, "learning_rate": 1.5833580788464477e-07, "loss": 0.2825, "step": 65760 }, { "epoch": 2.78, "grad_norm": 3.802954394261705, "learning_rate": 1.5802858776938658e-07, "loss": 0.3055, "step": 65765 }, { "epoch": 2.78, "grad_norm": 4.099488632156402, "learning_rate": 1.57721661208593e-07, "loss": 0.3045, "step": 65770 }, { "epoch": 2.78, "grad_norm": 3.977325451614377, "learning_rate": 1.5741502822087195e-07, "loss": 0.3127, "step": 65775 }, { "epoch": 2.78, "grad_norm": 4.164210107013999, "learning_rate": 1.57108688824813e-07, "loss": 0.289, "step": 65780 }, { "epoch": 2.78, "grad_norm": 4.019580638354942, "learning_rate": 1.5680264303898907e-07, "loss": 0.3396, "step": 65785 }, { "epoch": 2.78, "grad_norm": 3.80698606149582, "learning_rate": 1.564968908819553e-07, "loss": 0.2983, "step": 65790 }, { "epoch": 2.78, "grad_norm": 3.9040511640965962, "learning_rate": 1.5619143237224798e-07, "loss": 0.3108, "step": 65795 }, { "epoch": 2.78, "grad_norm": 3.634743205608102, "learning_rate": 1.5588626752838565e-07, "loss": 0.2774, "step": 65800 }, { "epoch": 2.78, "grad_norm": 3.8882672040483834, "learning_rate": 1.555813963688707e-07, "loss": 0.3166, "step": 65805 }, { "epoch": 2.79, "grad_norm": 4.0784562119060705, "learning_rate": 1.552768189121856e-07, "loss": 0.3114, "step": 65810 }, { "epoch": 2.79, "grad_norm": 3.9722743165088956, "learning_rate": 1.549725351767961e-07, "loss": 0.3272, "step": 65815 }, { "epoch": 2.79, "grad_norm": 4.250603616966226, "learning_rate": 1.5466854518115082e-07, "loss": 0.3242, "step": 65820 }, { "epoch": 2.79, "grad_norm": 3.868944725672153, "learning_rate": 1.5436484894367943e-07, "loss": 0.2953, "step": 65825 }, { "epoch": 2.79, "grad_norm": 3.778503149430003, "learning_rate": 1.5406144648279332e-07, "loss": 0.3113, "step": 65830 }, { "epoch": 2.79, "grad_norm": 3.9215593183432382, "learning_rate": 1.5375833781688776e-07, "loss": 0.3021, "step": 65835 }, { "epoch": 2.79, "grad_norm": 4.7266602745959005, "learning_rate": 1.5345552296433918e-07, "loss": 0.2923, "step": 65840 }, { "epoch": 2.79, "grad_norm": 4.147943985141481, "learning_rate": 1.5315300194350624e-07, "loss": 0.3376, "step": 65845 }, { "epoch": 2.79, "grad_norm": 4.088802083529507, "learning_rate": 1.528507747727298e-07, "loss": 0.2829, "step": 65850 }, { "epoch": 2.79, "grad_norm": 3.625653074701914, "learning_rate": 1.5254884147033355e-07, "loss": 0.289, "step": 65855 }, { "epoch": 2.79, "grad_norm": 3.7981901939653686, "learning_rate": 1.522472020546223e-07, "loss": 0.3024, "step": 65860 }, { "epoch": 2.79, "grad_norm": 3.4331042906156117, "learning_rate": 1.519458565438836e-07, "loss": 0.2972, "step": 65865 }, { "epoch": 2.79, "grad_norm": 3.8856951238989987, "learning_rate": 1.516448049563868e-07, "loss": 0.2938, "step": 65870 }, { "epoch": 2.79, "grad_norm": 4.02024041886062, "learning_rate": 1.51344047310385e-07, "loss": 0.3137, "step": 65875 }, { "epoch": 2.79, "grad_norm": 4.161525451083014, "learning_rate": 1.5104358362411032e-07, "loss": 0.3009, "step": 65880 }, { "epoch": 2.79, "grad_norm": 4.208350316959127, "learning_rate": 1.5074341391578095e-07, "loss": 0.301, "step": 65885 }, { "epoch": 2.79, "grad_norm": 4.246633644909555, "learning_rate": 1.504435382035946e-07, "loss": 0.3126, "step": 65890 }, { "epoch": 2.79, "grad_norm": 3.592633898423814, "learning_rate": 1.501439565057311e-07, "loss": 0.2825, "step": 65895 }, { "epoch": 2.79, "grad_norm": 4.033431196340206, "learning_rate": 1.4984466884035376e-07, "loss": 0.3091, "step": 65900 }, { "epoch": 2.79, "grad_norm": 3.4427903745678985, "learning_rate": 1.4954567522560804e-07, "loss": 0.3106, "step": 65905 }, { "epoch": 2.79, "grad_norm": 4.13457974549506, "learning_rate": 1.4924697567961998e-07, "loss": 0.2988, "step": 65910 }, { "epoch": 2.79, "grad_norm": 4.413833724534987, "learning_rate": 1.4894857022050014e-07, "loss": 0.3029, "step": 65915 }, { "epoch": 2.79, "grad_norm": 4.544897487000302, "learning_rate": 1.48650458866339e-07, "loss": 0.3283, "step": 65920 }, { "epoch": 2.79, "grad_norm": 3.650366655502041, "learning_rate": 1.4835264163521047e-07, "loss": 0.302, "step": 65925 }, { "epoch": 2.79, "grad_norm": 3.834593135339089, "learning_rate": 1.4805511854517006e-07, "loss": 0.3188, "step": 65930 }, { "epoch": 2.79, "grad_norm": 3.7003760460264923, "learning_rate": 1.4775788961425563e-07, "loss": 0.317, "step": 65935 }, { "epoch": 2.79, "grad_norm": 3.97500191055279, "learning_rate": 1.4746095486048774e-07, "loss": 0.3174, "step": 65940 }, { "epoch": 2.79, "grad_norm": 3.864482816397804, "learning_rate": 1.4716431430186918e-07, "loss": 0.3107, "step": 65945 }, { "epoch": 2.79, "grad_norm": 4.1889489305325815, "learning_rate": 1.4686796795638337e-07, "loss": 0.2941, "step": 65950 }, { "epoch": 2.79, "grad_norm": 3.9135850723917853, "learning_rate": 1.4657191584199703e-07, "loss": 0.314, "step": 65955 }, { "epoch": 2.79, "grad_norm": 4.062020471468528, "learning_rate": 1.4627615797665974e-07, "loss": 0.3186, "step": 65960 }, { "epoch": 2.79, "grad_norm": 4.443281254147498, "learning_rate": 1.4598069437830097e-07, "loss": 0.3119, "step": 65965 }, { "epoch": 2.79, "grad_norm": 4.052735967007083, "learning_rate": 1.4568552506483537e-07, "loss": 0.3122, "step": 65970 }, { "epoch": 2.79, "grad_norm": 3.965357707240167, "learning_rate": 1.4539065005415742e-07, "loss": 0.3072, "step": 65975 }, { "epoch": 2.79, "grad_norm": 4.226944156444805, "learning_rate": 1.4509606936414455e-07, "loss": 0.3092, "step": 65980 }, { "epoch": 2.79, "grad_norm": 4.071702724945526, "learning_rate": 1.4480178301265635e-07, "loss": 0.3117, "step": 65985 }, { "epoch": 2.79, "grad_norm": 5.023876085145244, "learning_rate": 1.445077910175352e-07, "loss": 0.3256, "step": 65990 }, { "epoch": 2.79, "grad_norm": 3.8785603258719106, "learning_rate": 1.4421409339660353e-07, "loss": 0.2957, "step": 65995 }, { "epoch": 2.79, "grad_norm": 3.911163729482931, "learning_rate": 1.4392069016766874e-07, "loss": 0.2872, "step": 66000 }, { "epoch": 2.79, "grad_norm": 4.028421561638905, "learning_rate": 1.4362758134851772e-07, "loss": 0.2926, "step": 66005 }, { "epoch": 2.79, "grad_norm": 4.325781685512501, "learning_rate": 1.4333476695692296e-07, "loss": 0.2872, "step": 66010 }, { "epoch": 2.79, "grad_norm": 3.9056340251082333, "learning_rate": 1.4304224701063406e-07, "loss": 0.3223, "step": 66015 }, { "epoch": 2.79, "grad_norm": 3.7310709054637488, "learning_rate": 1.427500215273875e-07, "loss": 0.2802, "step": 66020 }, { "epoch": 2.79, "grad_norm": 4.141366499944626, "learning_rate": 1.4245809052490012e-07, "loss": 0.306, "step": 66025 }, { "epoch": 2.79, "grad_norm": 3.739957568213342, "learning_rate": 1.4216645402087003e-07, "loss": 0.3028, "step": 66030 }, { "epoch": 2.79, "grad_norm": 3.752018530157752, "learning_rate": 1.4187511203297865e-07, "loss": 0.3123, "step": 66035 }, { "epoch": 2.79, "grad_norm": 4.146440336732898, "learning_rate": 1.4158406457888908e-07, "loss": 0.2967, "step": 66040 }, { "epoch": 2.8, "grad_norm": 4.3917037196166975, "learning_rate": 1.412933116762466e-07, "loss": 0.3172, "step": 66045 }, { "epoch": 2.8, "grad_norm": 3.966434214472327, "learning_rate": 1.4100285334267883e-07, "loss": 0.2909, "step": 66050 }, { "epoch": 2.8, "grad_norm": 4.032815589787488, "learning_rate": 1.4071268959579554e-07, "loss": 0.2995, "step": 66055 }, { "epoch": 2.8, "grad_norm": 3.6738601771493418, "learning_rate": 1.4042282045318877e-07, "loss": 0.3349, "step": 66060 }, { "epoch": 2.8, "grad_norm": 4.014133544085371, "learning_rate": 1.4013324593243173e-07, "loss": 0.296, "step": 66065 }, { "epoch": 2.8, "grad_norm": 3.4958918166402295, "learning_rate": 1.398439660510803e-07, "loss": 0.3116, "step": 66070 }, { "epoch": 2.8, "grad_norm": 3.639524101433475, "learning_rate": 1.395549808266744e-07, "loss": 0.2999, "step": 66075 }, { "epoch": 2.8, "grad_norm": 3.8281172121019464, "learning_rate": 1.3926629027673222e-07, "loss": 0.3099, "step": 66080 }, { "epoch": 2.8, "grad_norm": 3.8134126758102402, "learning_rate": 1.3897789441875753e-07, "loss": 0.2731, "step": 66085 }, { "epoch": 2.8, "grad_norm": 4.665191917005747, "learning_rate": 1.3868979327023358e-07, "loss": 0.3197, "step": 66090 }, { "epoch": 2.8, "grad_norm": 4.7703843925263225, "learning_rate": 1.3840198684862915e-07, "loss": 0.3023, "step": 66095 }, { "epoch": 2.8, "grad_norm": 3.8900945243153995, "learning_rate": 1.3811447517139086e-07, "loss": 0.3041, "step": 66100 }, { "epoch": 2.8, "grad_norm": 4.7938056430600655, "learning_rate": 1.3782725825595146e-07, "loss": 0.2771, "step": 66105 }, { "epoch": 2.8, "grad_norm": 3.9852064453687115, "learning_rate": 1.375403361197236e-07, "loss": 0.3366, "step": 66110 }, { "epoch": 2.8, "grad_norm": 3.8460223738589363, "learning_rate": 1.372537087801018e-07, "loss": 0.3116, "step": 66115 }, { "epoch": 2.8, "grad_norm": 3.822705939614552, "learning_rate": 1.3696737625446376e-07, "loss": 0.2989, "step": 66120 }, { "epoch": 2.8, "grad_norm": 4.217222978473458, "learning_rate": 1.3668133856016953e-07, "loss": 0.3085, "step": 66125 }, { "epoch": 2.8, "grad_norm": 3.694765075114718, "learning_rate": 1.3639559571456074e-07, "loss": 0.3018, "step": 66130 }, { "epoch": 2.8, "grad_norm": 3.8911314611795067, "learning_rate": 1.3611014773496024e-07, "loss": 0.2812, "step": 66135 }, { "epoch": 2.8, "grad_norm": 3.9126210855348806, "learning_rate": 1.3582499463867416e-07, "loss": 0.3296, "step": 66140 }, { "epoch": 2.8, "grad_norm": 4.439518418745524, "learning_rate": 1.3554013644299145e-07, "loss": 0.3112, "step": 66145 }, { "epoch": 2.8, "grad_norm": 3.939802094505862, "learning_rate": 1.3525557316518113e-07, "loss": 0.3211, "step": 66150 }, { "epoch": 2.8, "grad_norm": 3.8076097102150306, "learning_rate": 1.3497130482249598e-07, "loss": 0.2915, "step": 66155 }, { "epoch": 2.8, "grad_norm": 3.4456904535647266, "learning_rate": 1.346873314321706e-07, "loss": 0.2933, "step": 66160 }, { "epoch": 2.8, "grad_norm": 4.447273713019155, "learning_rate": 1.344036530114201e-07, "loss": 0.3326, "step": 66165 }, { "epoch": 2.8, "grad_norm": 3.7500368607705625, "learning_rate": 1.3412026957744463e-07, "loss": 0.3048, "step": 66170 }, { "epoch": 2.8, "grad_norm": 3.6303736126457324, "learning_rate": 1.3383718114742373e-07, "loss": 0.2961, "step": 66175 }, { "epoch": 2.8, "grad_norm": 3.719380208876276, "learning_rate": 1.3355438773852202e-07, "loss": 0.3075, "step": 66180 }, { "epoch": 2.8, "grad_norm": 3.6397414587303296, "learning_rate": 1.3327188936788194e-07, "loss": 0.2915, "step": 66185 }, { "epoch": 2.8, "grad_norm": 4.090318334292168, "learning_rate": 1.329896860526325e-07, "loss": 0.2946, "step": 66190 }, { "epoch": 2.8, "grad_norm": 3.8903856233679655, "learning_rate": 1.327077778098823e-07, "loss": 0.3034, "step": 66195 }, { "epoch": 2.8, "grad_norm": 3.6919065182351027, "learning_rate": 1.324261646567221e-07, "loss": 0.3183, "step": 66200 }, { "epoch": 2.8, "grad_norm": 4.409039021198208, "learning_rate": 1.3214484661022543e-07, "loss": 0.304, "step": 66205 }, { "epoch": 2.8, "grad_norm": 3.59498819413163, "learning_rate": 1.318638236874481e-07, "loss": 0.2906, "step": 66210 }, { "epoch": 2.8, "grad_norm": 3.9360898193845855, "learning_rate": 1.315830959054276e-07, "loss": 0.2923, "step": 66215 }, { "epoch": 2.8, "grad_norm": 3.924996466679918, "learning_rate": 1.313026632811837e-07, "loss": 0.3068, "step": 66220 }, { "epoch": 2.8, "grad_norm": 3.4859392814785477, "learning_rate": 1.310225258317177e-07, "loss": 0.3051, "step": 66225 }, { "epoch": 2.8, "grad_norm": 3.703131483792007, "learning_rate": 1.30742683574015e-07, "loss": 0.3141, "step": 66230 }, { "epoch": 2.8, "grad_norm": 4.034385462896406, "learning_rate": 1.3046313652503918e-07, "loss": 0.2771, "step": 66235 }, { "epoch": 2.8, "grad_norm": 3.814564600304197, "learning_rate": 1.301838847017406e-07, "loss": 0.2934, "step": 66240 }, { "epoch": 2.8, "grad_norm": 4.406916117055063, "learning_rate": 1.299049281210485e-07, "loss": 0.3217, "step": 66245 }, { "epoch": 2.8, "grad_norm": 4.3186557228139755, "learning_rate": 1.2962626679987543e-07, "loss": 0.3265, "step": 66250 }, { "epoch": 2.8, "grad_norm": 3.8366800017247673, "learning_rate": 1.2934790075511516e-07, "loss": 0.3018, "step": 66255 }, { "epoch": 2.8, "grad_norm": 4.264645999545481, "learning_rate": 1.290698300036447e-07, "loss": 0.2936, "step": 66260 }, { "epoch": 2.8, "grad_norm": 3.7968885158637278, "learning_rate": 1.287920545623228e-07, "loss": 0.3119, "step": 66265 }, { "epoch": 2.8, "grad_norm": 3.6999912154949843, "learning_rate": 1.2851457444798988e-07, "loss": 0.3171, "step": 66270 }, { "epoch": 2.8, "grad_norm": 4.896802516960357, "learning_rate": 1.2823738967746912e-07, "loss": 0.331, "step": 66275 }, { "epoch": 2.81, "grad_norm": 3.49203861117748, "learning_rate": 1.2796050026756546e-07, "loss": 0.274, "step": 66280 }, { "epoch": 2.81, "grad_norm": 3.6950060753647525, "learning_rate": 1.2768390623506487e-07, "loss": 0.2977, "step": 66285 }, { "epoch": 2.81, "grad_norm": 4.522275482796303, "learning_rate": 1.274076075967373e-07, "loss": 0.3094, "step": 66290 }, { "epoch": 2.81, "grad_norm": 4.437475305388126, "learning_rate": 1.2713160436933437e-07, "loss": 0.2871, "step": 66295 }, { "epoch": 2.81, "grad_norm": 4.047906103524821, "learning_rate": 1.2685589656958818e-07, "loss": 0.34, "step": 66300 }, { "epoch": 2.81, "grad_norm": 4.626234582023741, "learning_rate": 1.2658048421421487e-07, "loss": 0.3084, "step": 66305 }, { "epoch": 2.81, "grad_norm": 4.250559563406509, "learning_rate": 1.2630536731991106e-07, "loss": 0.2964, "step": 66310 }, { "epoch": 2.81, "grad_norm": 3.8933911771135854, "learning_rate": 1.2603054590335785e-07, "loss": 0.3189, "step": 66315 }, { "epoch": 2.81, "grad_norm": 4.580270398139942, "learning_rate": 1.2575601998121577e-07, "loss": 0.2813, "step": 66320 }, { "epoch": 2.81, "grad_norm": 4.473838338085486, "learning_rate": 1.2548178957012826e-07, "loss": 0.3129, "step": 66325 }, { "epoch": 2.81, "grad_norm": 4.219292715687132, "learning_rate": 1.2520785468672192e-07, "loss": 0.3014, "step": 66330 }, { "epoch": 2.81, "grad_norm": 3.6997243931386485, "learning_rate": 1.2493421534760354e-07, "loss": 0.3042, "step": 66335 }, { "epoch": 2.81, "grad_norm": 3.6345014932830244, "learning_rate": 1.2466087156936368e-07, "loss": 0.3027, "step": 66340 }, { "epoch": 2.81, "grad_norm": 3.8904124113843737, "learning_rate": 1.2438782336857525e-07, "loss": 0.3091, "step": 66345 }, { "epoch": 2.81, "grad_norm": 3.8721156041205114, "learning_rate": 1.2411507076179108e-07, "loss": 0.2981, "step": 66350 }, { "epoch": 2.81, "grad_norm": 3.8044772393915336, "learning_rate": 1.238426137655474e-07, "loss": 0.3002, "step": 66355 }, { "epoch": 2.81, "grad_norm": 3.9387966053478314, "learning_rate": 1.2357045239636323e-07, "loss": 0.2817, "step": 66360 }, { "epoch": 2.81, "grad_norm": 4.355239811357412, "learning_rate": 1.232985866707387e-07, "loss": 0.3168, "step": 66365 }, { "epoch": 2.81, "grad_norm": 4.0207966569729265, "learning_rate": 1.2302701660515558e-07, "loss": 0.3299, "step": 66370 }, { "epoch": 2.81, "grad_norm": 3.4585672106122747, "learning_rate": 1.2275574221607912e-07, "loss": 0.2923, "step": 66375 }, { "epoch": 2.81, "grad_norm": 3.4851600903793774, "learning_rate": 1.2248476351995609e-07, "loss": 0.2884, "step": 66380 }, { "epoch": 2.81, "grad_norm": 3.6264083475493236, "learning_rate": 1.2221408053321392e-07, "loss": 0.3101, "step": 66385 }, { "epoch": 2.81, "grad_norm": 4.371861768326324, "learning_rate": 1.2194369327226397e-07, "loss": 0.293, "step": 66390 }, { "epoch": 2.81, "grad_norm": 3.6737664898381173, "learning_rate": 1.2167360175349975e-07, "loss": 0.2697, "step": 66395 }, { "epoch": 2.81, "grad_norm": 4.006439456031335, "learning_rate": 1.2140380599329537e-07, "loss": 0.3001, "step": 66400 }, { "epoch": 2.81, "grad_norm": 3.7863118098481534, "learning_rate": 1.211343060080078e-07, "loss": 0.3427, "step": 66405 }, { "epoch": 2.81, "grad_norm": 7.956955039903079, "learning_rate": 1.208651018139756e-07, "loss": 0.3299, "step": 66410 }, { "epoch": 2.81, "grad_norm": 3.740787110781312, "learning_rate": 1.2059619342752126e-07, "loss": 0.2937, "step": 66415 }, { "epoch": 2.81, "grad_norm": 3.710710325984871, "learning_rate": 1.2032758086494621e-07, "loss": 0.3005, "step": 66420 }, { "epoch": 2.81, "grad_norm": 4.17434892568975, "learning_rate": 1.2005926414253633e-07, "loss": 0.2888, "step": 66425 }, { "epoch": 2.81, "grad_norm": 4.156747286732518, "learning_rate": 1.1979124327655911e-07, "loss": 0.3278, "step": 66430 }, { "epoch": 2.81, "grad_norm": 4.844988407388581, "learning_rate": 1.195235182832638e-07, "loss": 0.2961, "step": 66435 }, { "epoch": 2.81, "grad_norm": 3.7061772966782365, "learning_rate": 1.192560891788813e-07, "loss": 0.2834, "step": 66440 }, { "epoch": 2.81, "grad_norm": 3.8501975516257794, "learning_rate": 1.189889559796259e-07, "loss": 0.3198, "step": 66445 }, { "epoch": 2.81, "grad_norm": 5.223992057883741, "learning_rate": 1.1872211870169236e-07, "loss": 0.3175, "step": 66450 }, { "epoch": 2.81, "grad_norm": 3.9418931560435895, "learning_rate": 1.1845557736125835e-07, "loss": 0.3131, "step": 66455 }, { "epoch": 2.81, "grad_norm": 4.246955262915597, "learning_rate": 1.1818933197448368e-07, "loss": 0.2881, "step": 66460 }, { "epoch": 2.81, "grad_norm": 4.138000130925753, "learning_rate": 1.1792338255751045e-07, "loss": 0.3006, "step": 66465 }, { "epoch": 2.81, "grad_norm": 4.282356569696341, "learning_rate": 1.1765772912646133e-07, "loss": 0.2985, "step": 66470 }, { "epoch": 2.81, "grad_norm": 3.965595060283118, "learning_rate": 1.1739237169744288e-07, "loss": 0.3499, "step": 66475 }, { "epoch": 2.81, "grad_norm": 3.510424965104573, "learning_rate": 1.1712731028654279e-07, "loss": 0.3071, "step": 66480 }, { "epoch": 2.81, "grad_norm": 3.641196288338066, "learning_rate": 1.1686254490983096e-07, "loss": 0.2732, "step": 66485 }, { "epoch": 2.81, "grad_norm": 3.7716152403759553, "learning_rate": 1.16598075583359e-07, "loss": 0.2958, "step": 66490 }, { "epoch": 2.81, "grad_norm": 3.7126417140555814, "learning_rate": 1.1633390232316132e-07, "loss": 0.2912, "step": 66495 }, { "epoch": 2.81, "grad_norm": 3.9047309377096298, "learning_rate": 1.16070025145254e-07, "loss": 0.3091, "step": 66500 }, { "epoch": 2.81, "grad_norm": 3.6744445091962383, "learning_rate": 1.1580644406563479e-07, "loss": 0.3072, "step": 66505 }, { "epoch": 2.81, "grad_norm": 3.773190221162525, "learning_rate": 1.1554315910028424e-07, "loss": 0.2989, "step": 66510 }, { "epoch": 2.82, "grad_norm": 4.322302187142827, "learning_rate": 1.1528017026516458e-07, "loss": 0.2927, "step": 66515 }, { "epoch": 2.82, "grad_norm": 4.227083381794769, "learning_rate": 1.1501747757621918e-07, "loss": 0.3149, "step": 66520 }, { "epoch": 2.82, "grad_norm": 3.8731382124985827, "learning_rate": 1.1475508104937472e-07, "loss": 0.2752, "step": 66525 }, { "epoch": 2.82, "grad_norm": 4.051716361737263, "learning_rate": 1.1449298070054016e-07, "loss": 0.3037, "step": 66530 }, { "epoch": 2.82, "grad_norm": 4.025524018018709, "learning_rate": 1.1423117654560555e-07, "loss": 0.3067, "step": 66535 }, { "epoch": 2.82, "grad_norm": 3.814428004942523, "learning_rate": 1.1396966860044267e-07, "loss": 0.3134, "step": 66540 }, { "epoch": 2.82, "grad_norm": 3.8299371845202486, "learning_rate": 1.1370845688090715e-07, "loss": 0.3122, "step": 66545 }, { "epoch": 2.82, "grad_norm": 4.494629189356971, "learning_rate": 1.1344754140283464e-07, "loss": 0.3023, "step": 66550 }, { "epoch": 2.82, "grad_norm": 4.018656238634794, "learning_rate": 1.1318692218204363e-07, "loss": 0.2883, "step": 66555 }, { "epoch": 2.82, "grad_norm": 4.031157030415214, "learning_rate": 1.129265992343348e-07, "loss": 0.3082, "step": 66560 }, { "epoch": 2.82, "grad_norm": 3.9132225889676575, "learning_rate": 1.1266657257549162e-07, "loss": 0.3046, "step": 66565 }, { "epoch": 2.82, "grad_norm": 3.916739331239777, "learning_rate": 1.1240684222127818e-07, "loss": 0.2943, "step": 66570 }, { "epoch": 2.82, "grad_norm": 3.719415444070998, "learning_rate": 1.1214740818744019e-07, "loss": 0.3074, "step": 66575 }, { "epoch": 2.82, "grad_norm": 4.08373562402601, "learning_rate": 1.118882704897073e-07, "loss": 0.28, "step": 66580 }, { "epoch": 2.82, "grad_norm": 4.24456280357786, "learning_rate": 1.1162942914379083e-07, "loss": 0.2924, "step": 66585 }, { "epoch": 2.82, "grad_norm": 3.660316792103006, "learning_rate": 1.1137088416538267e-07, "loss": 0.3135, "step": 66590 }, { "epoch": 2.82, "grad_norm": 3.698729900036621, "learning_rate": 1.111126355701575e-07, "loss": 0.3012, "step": 66595 }, { "epoch": 2.82, "grad_norm": 4.167581656179021, "learning_rate": 1.1085468337377281e-07, "loss": 0.3075, "step": 66600 }, { "epoch": 2.82, "grad_norm": 3.8171665310468548, "learning_rate": 1.105970275918672e-07, "loss": 0.2958, "step": 66605 }, { "epoch": 2.82, "grad_norm": 3.9681352687333904, "learning_rate": 1.1033966824006092e-07, "loss": 0.3119, "step": 66610 }, { "epoch": 2.82, "grad_norm": 3.9633875908972263, "learning_rate": 1.100826053339582e-07, "loss": 0.3119, "step": 66615 }, { "epoch": 2.82, "grad_norm": 3.730272699542559, "learning_rate": 1.0982583888914377e-07, "loss": 0.3036, "step": 66620 }, { "epoch": 2.82, "grad_norm": 3.6601226956168045, "learning_rate": 1.0956936892118353e-07, "loss": 0.3141, "step": 66625 }, { "epoch": 2.82, "grad_norm": 4.190035551682392, "learning_rate": 1.0931319544562724e-07, "loss": 0.3018, "step": 66630 }, { "epoch": 2.82, "grad_norm": 3.9608967123520027, "learning_rate": 1.0905731847800693e-07, "loss": 0.2923, "step": 66635 }, { "epoch": 2.82, "grad_norm": 3.8473340876702293, "learning_rate": 1.0880173803383354e-07, "loss": 0.3096, "step": 66640 }, { "epoch": 2.82, "grad_norm": 4.433265141912316, "learning_rate": 1.0854645412860353e-07, "loss": 0.3059, "step": 66645 }, { "epoch": 2.82, "grad_norm": 3.8555058767248305, "learning_rate": 1.0829146677779456e-07, "loss": 0.299, "step": 66650 }, { "epoch": 2.82, "grad_norm": 4.1060316409972, "learning_rate": 1.0803677599686369e-07, "loss": 0.3112, "step": 66655 }, { "epoch": 2.82, "grad_norm": 4.318509897899225, "learning_rate": 1.0778238180125411e-07, "loss": 0.3058, "step": 66660 }, { "epoch": 2.82, "grad_norm": 3.7757600730691934, "learning_rate": 1.0752828420638795e-07, "loss": 0.29, "step": 66665 }, { "epoch": 2.82, "grad_norm": 4.289543254221664, "learning_rate": 1.072744832276712e-07, "loss": 0.3093, "step": 66670 }, { "epoch": 2.82, "grad_norm": 3.714514357589515, "learning_rate": 1.070209788804899e-07, "loss": 0.3133, "step": 66675 }, { "epoch": 2.82, "grad_norm": 3.7879337111108913, "learning_rate": 1.0676777118021397e-07, "loss": 0.2798, "step": 66680 }, { "epoch": 2.82, "grad_norm": 3.9285849043468937, "learning_rate": 1.0651486014219503e-07, "loss": 0.2822, "step": 66685 }, { "epoch": 2.82, "grad_norm": 3.968928836829013, "learning_rate": 1.0626224578176525e-07, "loss": 0.2777, "step": 66690 }, { "epoch": 2.82, "grad_norm": 3.948881254646119, "learning_rate": 1.0600992811424071e-07, "loss": 0.3056, "step": 66695 }, { "epoch": 2.82, "grad_norm": 3.89588114156917, "learning_rate": 1.0575790715491918e-07, "loss": 0.2935, "step": 66700 }, { "epoch": 2.82, "grad_norm": 4.277950863043998, "learning_rate": 1.0550618291907844e-07, "loss": 0.2798, "step": 66705 }, { "epoch": 2.82, "grad_norm": 4.991506313733201, "learning_rate": 1.0525475542198072e-07, "loss": 0.3286, "step": 66710 }, { "epoch": 2.82, "grad_norm": 3.9552777567751463, "learning_rate": 1.0500362467886937e-07, "loss": 0.2991, "step": 66715 }, { "epoch": 2.82, "grad_norm": 3.6976846786492086, "learning_rate": 1.0475279070497001e-07, "loss": 0.3166, "step": 66720 }, { "epoch": 2.82, "grad_norm": 4.087263916805855, "learning_rate": 1.0450225351548993e-07, "loss": 0.3147, "step": 66725 }, { "epoch": 2.82, "grad_norm": 3.6776440146372, "learning_rate": 1.0425201312561695e-07, "loss": 0.3208, "step": 66730 }, { "epoch": 2.82, "grad_norm": 3.642004041983497, "learning_rate": 1.0400206955052395e-07, "loss": 0.3248, "step": 66735 }, { "epoch": 2.82, "grad_norm": 3.8480691587041855, "learning_rate": 1.037524228053638e-07, "loss": 0.2957, "step": 66740 }, { "epoch": 2.82, "grad_norm": 3.9243982774402513, "learning_rate": 1.0350307290527162e-07, "loss": 0.3265, "step": 66745 }, { "epoch": 2.82, "grad_norm": 3.7087642966822187, "learning_rate": 1.0325401986536532e-07, "loss": 0.3021, "step": 66750 }, { "epoch": 2.83, "grad_norm": 4.304901180263585, "learning_rate": 1.0300526370074393e-07, "loss": 0.3108, "step": 66755 }, { "epoch": 2.83, "grad_norm": 4.640901913698597, "learning_rate": 1.0275680442648873e-07, "loss": 0.3078, "step": 66760 }, { "epoch": 2.83, "grad_norm": 3.97890554650979, "learning_rate": 1.025086420576632e-07, "loss": 0.3269, "step": 66765 }, { "epoch": 2.83, "grad_norm": 4.475593604317089, "learning_rate": 1.0226077660931311e-07, "loss": 0.2902, "step": 66770 }, { "epoch": 2.83, "grad_norm": 3.923594932682321, "learning_rate": 1.0201320809646476e-07, "loss": 0.3, "step": 66775 }, { "epoch": 2.83, "grad_norm": 3.472157445624218, "learning_rate": 1.0176593653412837e-07, "loss": 0.2782, "step": 66780 }, { "epoch": 2.83, "grad_norm": 3.5476683857251676, "learning_rate": 1.0151896193729471e-07, "loss": 0.3155, "step": 66785 }, { "epoch": 2.83, "grad_norm": 3.793003852099486, "learning_rate": 1.0127228432093794e-07, "loss": 0.2847, "step": 66790 }, { "epoch": 2.83, "grad_norm": 3.987219964957159, "learning_rate": 1.0102590370001275e-07, "loss": 0.2955, "step": 66795 }, { "epoch": 2.83, "grad_norm": 4.233337728231237, "learning_rate": 1.0077982008945608e-07, "loss": 0.2966, "step": 66800 }, { "epoch": 2.83, "grad_norm": 4.154253761603253, "learning_rate": 1.0053403350418877e-07, "loss": 0.2884, "step": 66805 }, { "epoch": 2.83, "grad_norm": 3.7180060022390804, "learning_rate": 1.0028854395911059e-07, "loss": 0.3111, "step": 66810 }, { "epoch": 2.83, "grad_norm": 4.230618273330698, "learning_rate": 1.0004335146910516e-07, "loss": 0.3244, "step": 66815 }, { "epoch": 2.83, "grad_norm": 3.9089877683429024, "learning_rate": 9.979845604903837e-08, "loss": 0.2684, "step": 66820 }, { "epoch": 2.83, "grad_norm": 4.2535574983620545, "learning_rate": 9.955385771375725e-08, "loss": 0.31, "step": 66825 }, { "epoch": 2.83, "grad_norm": 3.804029814543924, "learning_rate": 9.930955647809049e-08, "loss": 0.2864, "step": 66830 }, { "epoch": 2.83, "grad_norm": 4.067805896880169, "learning_rate": 9.906555235685066e-08, "loss": 0.3078, "step": 66835 }, { "epoch": 2.83, "grad_norm": 4.181333226734484, "learning_rate": 9.882184536482987e-08, "loss": 0.3165, "step": 66840 }, { "epoch": 2.83, "grad_norm": 4.0228709019557405, "learning_rate": 9.857843551680346e-08, "loss": 0.3166, "step": 66845 }, { "epoch": 2.83, "grad_norm": 3.9351911751079607, "learning_rate": 9.833532282752912e-08, "loss": 0.3183, "step": 66850 }, { "epoch": 2.83, "grad_norm": 3.8229410262220975, "learning_rate": 9.809250731174558e-08, "loss": 0.3053, "step": 66855 }, { "epoch": 2.83, "grad_norm": 4.169923979475677, "learning_rate": 9.784998898417441e-08, "loss": 0.28, "step": 66860 }, { "epoch": 2.83, "grad_norm": 3.780588783471478, "learning_rate": 9.760776785951886e-08, "loss": 0.3093, "step": 66865 }, { "epoch": 2.83, "grad_norm": 3.7373309534321053, "learning_rate": 9.736584395246384e-08, "loss": 0.325, "step": 66870 }, { "epoch": 2.83, "grad_norm": 3.718656870711661, "learning_rate": 9.712421727767651e-08, "loss": 0.2828, "step": 66875 }, { "epoch": 2.83, "grad_norm": 3.6361435903433663, "learning_rate": 9.68828878498057e-08, "loss": 0.2988, "step": 66880 }, { "epoch": 2.83, "grad_norm": 4.198849493646334, "learning_rate": 9.664185568348305e-08, "loss": 0.3024, "step": 66885 }, { "epoch": 2.83, "grad_norm": 3.833330956417512, "learning_rate": 9.640112079332186e-08, "loss": 0.3067, "step": 66890 }, { "epoch": 2.83, "grad_norm": 3.604323995141905, "learning_rate": 9.616068319391603e-08, "loss": 0.3095, "step": 66895 }, { "epoch": 2.83, "grad_norm": 4.084264266208825, "learning_rate": 9.592054289984332e-08, "loss": 0.3288, "step": 66900 }, { "epoch": 2.83, "grad_norm": 3.8986920321468745, "learning_rate": 9.568069992566265e-08, "loss": 0.3095, "step": 66905 }, { "epoch": 2.83, "grad_norm": 3.471322968063947, "learning_rate": 9.544115428591515e-08, "loss": 0.3116, "step": 66910 }, { "epoch": 2.83, "grad_norm": 3.922905062612245, "learning_rate": 9.520190599512314e-08, "loss": 0.2908, "step": 66915 }, { "epoch": 2.83, "grad_norm": 4.014577940712433, "learning_rate": 9.496295506779219e-08, "loss": 0.3122, "step": 66920 }, { "epoch": 2.83, "grad_norm": 4.290409916522706, "learning_rate": 9.472430151840962e-08, "loss": 0.2965, "step": 66925 }, { "epoch": 2.83, "grad_norm": 4.086715247383836, "learning_rate": 9.44859453614433e-08, "loss": 0.2938, "step": 66930 }, { "epoch": 2.83, "grad_norm": 3.761564399289158, "learning_rate": 9.424788661134444e-08, "loss": 0.2985, "step": 66935 }, { "epoch": 2.83, "grad_norm": 3.987361858351176, "learning_rate": 9.401012528254594e-08, "loss": 0.2968, "step": 66940 }, { "epoch": 2.83, "grad_norm": 4.1126567584095595, "learning_rate": 9.377266138946239e-08, "loss": 0.3273, "step": 66945 }, { "epoch": 2.83, "grad_norm": 4.24645724976815, "learning_rate": 9.353549494649006e-08, "loss": 0.3007, "step": 66950 }, { "epoch": 2.83, "grad_norm": 3.843980853445743, "learning_rate": 9.329862596800909e-08, "loss": 0.3169, "step": 66955 }, { "epoch": 2.83, "grad_norm": 3.9311125650960994, "learning_rate": 9.306205446837912e-08, "loss": 0.291, "step": 66960 }, { "epoch": 2.83, "grad_norm": 4.421728961437795, "learning_rate": 9.282578046194257e-08, "loss": 0.3164, "step": 66965 }, { "epoch": 2.83, "grad_norm": 3.8054452522309687, "learning_rate": 9.258980396302409e-08, "loss": 0.2996, "step": 66970 }, { "epoch": 2.83, "grad_norm": 3.9598817795196797, "learning_rate": 9.235412498593165e-08, "loss": 0.3023, "step": 66975 }, { "epoch": 2.83, "grad_norm": 3.6019334100173337, "learning_rate": 9.211874354495164e-08, "loss": 0.3101, "step": 66980 }, { "epoch": 2.83, "grad_norm": 3.8374474195160357, "learning_rate": 9.188365965435598e-08, "loss": 0.2989, "step": 66985 }, { "epoch": 2.84, "grad_norm": 4.382311891476272, "learning_rate": 9.164887332839656e-08, "loss": 0.306, "step": 66990 }, { "epoch": 2.84, "grad_norm": 4.232989210342415, "learning_rate": 9.141438458130814e-08, "loss": 0.3057, "step": 66995 }, { "epoch": 2.84, "grad_norm": 3.9487858540544245, "learning_rate": 9.118019342730655e-08, "loss": 0.3332, "step": 67000 }, { "epoch": 2.84, "grad_norm": 3.9949049276147126, "learning_rate": 9.0946299880591e-08, "loss": 0.3063, "step": 67005 }, { "epoch": 2.84, "grad_norm": 5.4523534242535945, "learning_rate": 9.07127039553407e-08, "loss": 0.3016, "step": 67010 }, { "epoch": 2.84, "grad_norm": 3.965588192676178, "learning_rate": 9.047940566571823e-08, "loss": 0.2986, "step": 67015 }, { "epoch": 2.84, "grad_norm": 3.5922554495644334, "learning_rate": 9.024640502586835e-08, "loss": 0.2834, "step": 67020 }, { "epoch": 2.84, "grad_norm": 3.5436549591077435, "learning_rate": 9.0013702049917e-08, "loss": 0.2961, "step": 67025 }, { "epoch": 2.84, "grad_norm": 3.97567368738804, "learning_rate": 8.978129675197122e-08, "loss": 0.3012, "step": 67030 }, { "epoch": 2.84, "grad_norm": 3.913069323928701, "learning_rate": 8.954918914612254e-08, "loss": 0.2815, "step": 67035 }, { "epoch": 2.84, "grad_norm": 3.897313176201929, "learning_rate": 8.931737924644247e-08, "loss": 0.3037, "step": 67040 }, { "epoch": 2.84, "grad_norm": 3.8299068606769406, "learning_rate": 8.908586706698419e-08, "loss": 0.3211, "step": 67045 }, { "epoch": 2.84, "grad_norm": 3.591180751762232, "learning_rate": 8.885465262178483e-08, "loss": 0.3101, "step": 67050 }, { "epoch": 2.84, "grad_norm": 4.2310044345013775, "learning_rate": 8.862373592486152e-08, "loss": 0.3116, "step": 67055 }, { "epoch": 2.84, "grad_norm": 4.048123222347952, "learning_rate": 8.839311699021414e-08, "loss": 0.3159, "step": 67060 }, { "epoch": 2.84, "grad_norm": 4.606003615309165, "learning_rate": 8.816279583182375e-08, "loss": 0.3017, "step": 67065 }, { "epoch": 2.84, "grad_norm": 3.7274266699363734, "learning_rate": 8.793277246365528e-08, "loss": 0.3036, "step": 67070 }, { "epoch": 2.84, "grad_norm": 3.9281172227640746, "learning_rate": 8.770304689965426e-08, "loss": 0.3169, "step": 67075 }, { "epoch": 2.84, "grad_norm": 3.7628051387493926, "learning_rate": 8.747361915374731e-08, "loss": 0.3256, "step": 67080 }, { "epoch": 2.84, "grad_norm": 3.5778257401188416, "learning_rate": 8.72444892398444e-08, "loss": 0.308, "step": 67085 }, { "epoch": 2.84, "grad_norm": 4.129571152398946, "learning_rate": 8.701565717183725e-08, "loss": 0.297, "step": 67090 }, { "epoch": 2.84, "grad_norm": 4.49191748976813, "learning_rate": 8.67871229635997e-08, "loss": 0.2798, "step": 67095 }, { "epoch": 2.84, "grad_norm": 4.352438169831849, "learning_rate": 8.65588866289857e-08, "loss": 0.3018, "step": 67100 }, { "epoch": 2.84, "grad_norm": 4.074233368999392, "learning_rate": 8.633094818183418e-08, "loss": 0.2965, "step": 67105 }, { "epoch": 2.84, "grad_norm": 3.9033117807989948, "learning_rate": 8.610330763596298e-08, "loss": 0.2981, "step": 67110 }, { "epoch": 2.84, "grad_norm": 3.883689386157709, "learning_rate": 8.587596500517437e-08, "loss": 0.2747, "step": 67115 }, { "epoch": 2.84, "grad_norm": 4.924283114694407, "learning_rate": 8.564892030325068e-08, "loss": 0.3189, "step": 67120 }, { "epoch": 2.84, "grad_norm": 3.9881209026213793, "learning_rate": 8.542217354395755e-08, "loss": 0.3048, "step": 67125 }, { "epoch": 2.84, "grad_norm": 3.6083239392812256, "learning_rate": 8.519572474104121e-08, "loss": 0.2921, "step": 67130 }, { "epoch": 2.84, "grad_norm": 3.553843084523379, "learning_rate": 8.496957390823068e-08, "loss": 0.2847, "step": 67135 }, { "epoch": 2.84, "grad_norm": 4.49659366333283, "learning_rate": 8.474372105923834e-08, "loss": 0.2958, "step": 67140 }, { "epoch": 2.84, "grad_norm": 3.9245769426870196, "learning_rate": 8.451816620775488e-08, "loss": 0.2984, "step": 67145 }, { "epoch": 2.84, "grad_norm": 3.909139265601154, "learning_rate": 8.42929093674566e-08, "loss": 0.3151, "step": 67150 }, { "epoch": 2.84, "grad_norm": 3.754452870833101, "learning_rate": 8.406795055199868e-08, "loss": 0.2966, "step": 67155 }, { "epoch": 2.84, "grad_norm": 4.896211401292076, "learning_rate": 8.384328977502188e-08, "loss": 0.313, "step": 67160 }, { "epoch": 2.84, "grad_norm": 3.9652700647286125, "learning_rate": 8.361892705014418e-08, "loss": 0.2965, "step": 67165 }, { "epoch": 2.84, "grad_norm": 3.932440722120992, "learning_rate": 8.339486239096972e-08, "loss": 0.3188, "step": 67170 }, { "epoch": 2.84, "grad_norm": 4.026457700814128, "learning_rate": 8.317109581108263e-08, "loss": 0.2954, "step": 67175 }, { "epoch": 2.84, "grad_norm": 4.179536205621041, "learning_rate": 8.294762732404871e-08, "loss": 0.2791, "step": 67180 }, { "epoch": 2.84, "grad_norm": 4.331706014121443, "learning_rate": 8.272445694341658e-08, "loss": 0.3181, "step": 67185 }, { "epoch": 2.84, "grad_norm": 4.110975468232174, "learning_rate": 8.250158468271652e-08, "loss": 0.3083, "step": 67190 }, { "epoch": 2.84, "grad_norm": 3.661368634763695, "learning_rate": 8.227901055545995e-08, "loss": 0.2927, "step": 67195 }, { "epoch": 2.84, "grad_norm": 4.127516834305215, "learning_rate": 8.205673457514162e-08, "loss": 0.3107, "step": 67200 }, { "epoch": 2.84, "grad_norm": 4.036782015352208, "learning_rate": 8.183475675523744e-08, "loss": 0.292, "step": 67205 }, { "epoch": 2.84, "grad_norm": 3.666382375396397, "learning_rate": 8.161307710920552e-08, "loss": 0.3156, "step": 67210 }, { "epoch": 2.84, "grad_norm": 3.8317792185015334, "learning_rate": 8.139169565048399e-08, "loss": 0.2946, "step": 67215 }, { "epoch": 2.84, "grad_norm": 3.9666831557572815, "learning_rate": 8.117061239249602e-08, "loss": 0.2888, "step": 67220 }, { "epoch": 2.85, "grad_norm": 3.7162140116190088, "learning_rate": 8.094982734864476e-08, "loss": 0.2838, "step": 67225 }, { "epoch": 2.85, "grad_norm": 4.4369378363985605, "learning_rate": 8.072934053231674e-08, "loss": 0.2822, "step": 67230 }, { "epoch": 2.85, "grad_norm": 3.8473237105543006, "learning_rate": 8.050915195687792e-08, "loss": 0.3293, "step": 67235 }, { "epoch": 2.85, "grad_norm": 4.063928807524676, "learning_rate": 8.028926163567874e-08, "loss": 0.304, "step": 67240 }, { "epoch": 2.85, "grad_norm": 3.8961441480635766, "learning_rate": 8.00696695820502e-08, "loss": 0.3277, "step": 67245 }, { "epoch": 2.85, "grad_norm": 4.104839097838789, "learning_rate": 7.985037580930499e-08, "loss": 0.3086, "step": 67250 }, { "epoch": 2.85, "grad_norm": 3.7561862366570544, "learning_rate": 7.963138033073858e-08, "loss": 0.2823, "step": 67255 }, { "epoch": 2.85, "grad_norm": 4.032227328781345, "learning_rate": 7.941268315962925e-08, "loss": 0.2955, "step": 67260 }, { "epoch": 2.85, "grad_norm": 3.666582286084504, "learning_rate": 7.919428430923415e-08, "loss": 0.3058, "step": 67265 }, { "epoch": 2.85, "grad_norm": 4.2208130023421315, "learning_rate": 7.897618379279436e-08, "loss": 0.3088, "step": 67270 }, { "epoch": 2.85, "grad_norm": 4.155545487274384, "learning_rate": 7.875838162353378e-08, "loss": 0.2827, "step": 67275 }, { "epoch": 2.85, "grad_norm": 3.581677797445036, "learning_rate": 7.85408778146568e-08, "loss": 0.2984, "step": 67280 }, { "epoch": 2.85, "grad_norm": 4.298966167329686, "learning_rate": 7.832367237934956e-08, "loss": 0.2965, "step": 67285 }, { "epoch": 2.85, "grad_norm": 3.9096560830894274, "learning_rate": 7.810676533078043e-08, "loss": 0.3074, "step": 67290 }, { "epoch": 2.85, "grad_norm": 4.525608092844694, "learning_rate": 7.789015668210053e-08, "loss": 0.3188, "step": 67295 }, { "epoch": 2.85, "grad_norm": 3.968339560893488, "learning_rate": 7.76738464464416e-08, "loss": 0.3048, "step": 67300 }, { "epoch": 2.85, "grad_norm": 4.041956996513647, "learning_rate": 7.745783463691869e-08, "loss": 0.3322, "step": 67305 }, { "epoch": 2.85, "grad_norm": 3.9109044858291866, "learning_rate": 7.724212126662744e-08, "loss": 0.3129, "step": 67310 }, { "epoch": 2.85, "grad_norm": 4.095252809311223, "learning_rate": 7.702670634864518e-08, "loss": 0.2884, "step": 67315 }, { "epoch": 2.85, "grad_norm": 4.018334635389714, "learning_rate": 7.68115898960331e-08, "loss": 0.3268, "step": 67320 }, { "epoch": 2.85, "grad_norm": 3.8834332328254075, "learning_rate": 7.659677192183245e-08, "loss": 0.3217, "step": 67325 }, { "epoch": 2.85, "grad_norm": 3.7482215232131466, "learning_rate": 7.638225243906727e-08, "loss": 0.2986, "step": 67330 }, { "epoch": 2.85, "grad_norm": 4.259989580802478, "learning_rate": 7.616803146074326e-08, "loss": 0.295, "step": 67335 }, { "epoch": 2.85, "grad_norm": 4.040747440323012, "learning_rate": 7.595410899984779e-08, "loss": 0.3212, "step": 67340 }, { "epoch": 2.85, "grad_norm": 3.930933398956746, "learning_rate": 7.57404850693505e-08, "loss": 0.2845, "step": 67345 }, { "epoch": 2.85, "grad_norm": 4.038288472412129, "learning_rate": 7.552715968220214e-08, "loss": 0.3144, "step": 67350 }, { "epoch": 2.85, "grad_norm": 4.276393039838735, "learning_rate": 7.531413285133737e-08, "loss": 0.3137, "step": 67355 }, { "epoch": 2.85, "grad_norm": 3.6448938374224613, "learning_rate": 7.51014045896703e-08, "loss": 0.2923, "step": 67360 }, { "epoch": 2.85, "grad_norm": 3.7237640698104713, "learning_rate": 7.488897491009839e-08, "loss": 0.2926, "step": 67365 }, { "epoch": 2.85, "grad_norm": 5.225816973028743, "learning_rate": 7.467684382550022e-08, "loss": 0.3039, "step": 67370 }, { "epoch": 2.85, "grad_norm": 3.7796898809835207, "learning_rate": 7.446501134873719e-08, "loss": 0.3167, "step": 67375 }, { "epoch": 2.85, "grad_norm": 5.057451796765932, "learning_rate": 7.425347749265233e-08, "loss": 0.3266, "step": 67380 }, { "epoch": 2.85, "grad_norm": 3.8978002623192625, "learning_rate": 7.404224227006928e-08, "loss": 0.3101, "step": 67385 }, { "epoch": 2.85, "grad_norm": 4.234921223837073, "learning_rate": 7.383130569379504e-08, "loss": 0.2961, "step": 67390 }, { "epoch": 2.85, "grad_norm": 4.605006233651287, "learning_rate": 7.36206677766188e-08, "loss": 0.3243, "step": 67395 }, { "epoch": 2.85, "grad_norm": 3.8396152618131727, "learning_rate": 7.341032853131036e-08, "loss": 0.2923, "step": 67400 }, { "epoch": 2.85, "grad_norm": 4.197094253155551, "learning_rate": 7.320028797062117e-08, "loss": 0.3123, "step": 67405 }, { "epoch": 2.85, "grad_norm": 4.325509709024995, "learning_rate": 7.299054610728662e-08, "loss": 0.3071, "step": 67410 }, { "epoch": 2.85, "grad_norm": 4.120089212305107, "learning_rate": 7.278110295402263e-08, "loss": 0.2915, "step": 67415 }, { "epoch": 2.85, "grad_norm": 4.001738714254425, "learning_rate": 7.257195852352628e-08, "loss": 0.2858, "step": 67420 }, { "epoch": 2.85, "grad_norm": 3.8368123057226837, "learning_rate": 7.236311282847797e-08, "loss": 0.3194, "step": 67425 }, { "epoch": 2.85, "grad_norm": 3.7713647258107335, "learning_rate": 7.21545658815398e-08, "loss": 0.2956, "step": 67430 }, { "epoch": 2.85, "grad_norm": 3.831142116085687, "learning_rate": 7.194631769535443e-08, "loss": 0.3012, "step": 67435 }, { "epoch": 2.85, "grad_norm": 3.794940382091522, "learning_rate": 7.173836828254732e-08, "loss": 0.3006, "step": 67440 }, { "epoch": 2.85, "grad_norm": 5.121171423024304, "learning_rate": 7.153071765572727e-08, "loss": 0.3408, "step": 67445 }, { "epoch": 2.85, "grad_norm": 3.970861748522601, "learning_rate": 7.132336582748201e-08, "loss": 0.2998, "step": 67450 }, { "epoch": 2.85, "grad_norm": 4.390808862202137, "learning_rate": 7.111631281038257e-08, "loss": 0.2821, "step": 67455 }, { "epoch": 2.86, "grad_norm": 4.146049709044729, "learning_rate": 7.090955861698334e-08, "loss": 0.2673, "step": 67460 }, { "epoch": 2.86, "grad_norm": 4.623546080779092, "learning_rate": 7.070310325981822e-08, "loss": 0.3049, "step": 67465 }, { "epoch": 2.86, "grad_norm": 3.9327759616211826, "learning_rate": 7.04969467514044e-08, "loss": 0.2955, "step": 67470 }, { "epoch": 2.86, "grad_norm": 4.0420158407377995, "learning_rate": 7.029108910424021e-08, "loss": 0.3057, "step": 67475 }, { "epoch": 2.86, "grad_norm": 4.298644760191139, "learning_rate": 7.008553033080678e-08, "loss": 0.2857, "step": 67480 }, { "epoch": 2.86, "grad_norm": 4.320236665572237, "learning_rate": 6.988027044356527e-08, "loss": 0.2797, "step": 67485 }, { "epoch": 2.86, "grad_norm": 3.993607925555911, "learning_rate": 6.967530945496126e-08, "loss": 0.2974, "step": 67490 }, { "epoch": 2.86, "grad_norm": 3.902493176651077, "learning_rate": 6.947064737742093e-08, "loss": 0.32, "step": 67495 }, { "epoch": 2.86, "grad_norm": 4.1351472616892835, "learning_rate": 6.926628422335103e-08, "loss": 0.3097, "step": 67500 }, { "epoch": 2.86, "grad_norm": 3.7985499954401676, "learning_rate": 6.906222000514274e-08, "loss": 0.3283, "step": 67505 }, { "epoch": 2.86, "grad_norm": 4.000809056287292, "learning_rate": 6.885845473516783e-08, "loss": 0.2762, "step": 67510 }, { "epoch": 2.86, "grad_norm": 4.052511167455435, "learning_rate": 6.865498842577923e-08, "loss": 0.3007, "step": 67515 }, { "epoch": 2.86, "grad_norm": 4.345118827410775, "learning_rate": 6.845182108931315e-08, "loss": 0.2875, "step": 67520 }, { "epoch": 2.86, "grad_norm": 4.943753358572154, "learning_rate": 6.824895273808641e-08, "loss": 0.3167, "step": 67525 }, { "epoch": 2.86, "grad_norm": 4.009374474150269, "learning_rate": 6.804638338439972e-08, "loss": 0.3214, "step": 67530 }, { "epoch": 2.86, "grad_norm": 4.372306066023339, "learning_rate": 6.784411304053273e-08, "loss": 0.3088, "step": 67535 }, { "epoch": 2.86, "grad_norm": 4.103184726385116, "learning_rate": 6.764214171874895e-08, "loss": 0.2969, "step": 67540 }, { "epoch": 2.86, "grad_norm": 4.3183564963653405, "learning_rate": 6.744046943129301e-08, "loss": 0.2961, "step": 67545 }, { "epoch": 2.86, "grad_norm": 4.403772313516793, "learning_rate": 6.723909619039237e-08, "loss": 0.3548, "step": 67550 }, { "epoch": 2.86, "grad_norm": 4.129769649073681, "learning_rate": 6.70380220082556e-08, "loss": 0.3007, "step": 67555 }, { "epoch": 2.86, "grad_norm": 3.4857746091169934, "learning_rate": 6.683724689707238e-08, "loss": 0.3164, "step": 67560 }, { "epoch": 2.86, "grad_norm": 3.5568505654827196, "learning_rate": 6.663677086901687e-08, "loss": 0.3034, "step": 67565 }, { "epoch": 2.86, "grad_norm": 4.059550441839261, "learning_rate": 6.643659393624158e-08, "loss": 0.2834, "step": 67570 }, { "epoch": 2.86, "grad_norm": 3.765808617531961, "learning_rate": 6.623671611088289e-08, "loss": 0.2958, "step": 67575 }, { "epoch": 2.86, "grad_norm": 3.643079583414973, "learning_rate": 6.603713740506001e-08, "loss": 0.2918, "step": 67580 }, { "epoch": 2.86, "grad_norm": 3.956742881121114, "learning_rate": 6.583785783087159e-08, "loss": 0.3094, "step": 67585 }, { "epoch": 2.86, "grad_norm": 3.7142177989822303, "learning_rate": 6.56388774004002e-08, "loss": 0.3, "step": 67590 }, { "epoch": 2.86, "grad_norm": 3.6569637025348833, "learning_rate": 6.54401961257084e-08, "loss": 0.293, "step": 67595 }, { "epoch": 2.86, "grad_norm": 4.113461126702002, "learning_rate": 6.524181401884267e-08, "loss": 0.3142, "step": 67600 }, { "epoch": 2.86, "grad_norm": 4.509535201185449, "learning_rate": 6.504373109183004e-08, "loss": 0.3632, "step": 67605 }, { "epoch": 2.86, "grad_norm": 4.013390228681255, "learning_rate": 6.484594735667926e-08, "loss": 0.294, "step": 67610 }, { "epoch": 2.86, "grad_norm": 4.107401201002257, "learning_rate": 6.46484628253824e-08, "loss": 0.2892, "step": 67615 }, { "epoch": 2.86, "grad_norm": 4.007312675983092, "learning_rate": 6.445127750991099e-08, "loss": 0.3196, "step": 67620 }, { "epoch": 2.86, "grad_norm": 4.0246456107255835, "learning_rate": 6.425439142222045e-08, "loss": 0.3147, "step": 67625 }, { "epoch": 2.86, "grad_norm": 3.7929694953876356, "learning_rate": 6.405780457424793e-08, "loss": 0.3114, "step": 67630 }, { "epoch": 2.86, "grad_norm": 3.827221937244473, "learning_rate": 6.38615169779111e-08, "loss": 0.304, "step": 67635 }, { "epoch": 2.86, "grad_norm": 4.017811275385251, "learning_rate": 6.366552864511099e-08, "loss": 0.2988, "step": 67640 }, { "epoch": 2.86, "grad_norm": 4.254243161932273, "learning_rate": 6.346983958772867e-08, "loss": 0.3092, "step": 67645 }, { "epoch": 2.86, "grad_norm": 4.235618035852096, "learning_rate": 6.327444981762964e-08, "loss": 0.3317, "step": 67650 }, { "epoch": 2.86, "grad_norm": 3.649658289948641, "learning_rate": 6.307935934665887e-08, "loss": 0.3027, "step": 67655 }, { "epoch": 2.86, "grad_norm": 4.212879270549319, "learning_rate": 6.288456818664468e-08, "loss": 0.3035, "step": 67660 }, { "epoch": 2.86, "grad_norm": 4.40303196951868, "learning_rate": 6.269007634939594e-08, "loss": 0.3016, "step": 67665 }, { "epoch": 2.86, "grad_norm": 3.763644277122364, "learning_rate": 6.249588384670491e-08, "loss": 0.3089, "step": 67670 }, { "epoch": 2.86, "grad_norm": 3.604362611174508, "learning_rate": 6.230199069034437e-08, "loss": 0.2903, "step": 67675 }, { "epoch": 2.86, "grad_norm": 3.348688564146852, "learning_rate": 6.210839689206938e-08, "loss": 0.2995, "step": 67680 }, { "epoch": 2.86, "grad_norm": 4.110360691380537, "learning_rate": 6.191510246361776e-08, "loss": 0.2925, "step": 67685 }, { "epoch": 2.86, "grad_norm": 4.13776166576163, "learning_rate": 6.172210741670737e-08, "loss": 0.3171, "step": 67690 }, { "epoch": 2.86, "grad_norm": 3.619616213327034, "learning_rate": 6.152941176303995e-08, "loss": 0.2922, "step": 67695 }, { "epoch": 2.87, "grad_norm": 3.9368302678334337, "learning_rate": 6.133701551429727e-08, "loss": 0.3457, "step": 67700 }, { "epoch": 2.87, "grad_norm": 4.13122736886974, "learning_rate": 6.114491868214445e-08, "loss": 0.2926, "step": 67705 }, { "epoch": 2.87, "grad_norm": 3.939790121370067, "learning_rate": 6.095312127822661e-08, "loss": 0.284, "step": 67710 }, { "epoch": 2.87, "grad_norm": 5.278099854757711, "learning_rate": 6.076162331417279e-08, "loss": 0.3234, "step": 67715 }, { "epoch": 2.87, "grad_norm": 3.9564192143498924, "learning_rate": 6.057042480159315e-08, "loss": 0.2896, "step": 67720 }, { "epoch": 2.87, "grad_norm": 3.7215124084459044, "learning_rate": 6.037952575207895e-08, "loss": 0.2969, "step": 67725 }, { "epoch": 2.87, "grad_norm": 3.6967953692561095, "learning_rate": 6.018892617720373e-08, "loss": 0.3069, "step": 67730 }, { "epoch": 2.87, "grad_norm": 4.309509381670668, "learning_rate": 5.999862608852325e-08, "loss": 0.3088, "step": 67735 }, { "epoch": 2.87, "grad_norm": 4.079767653835693, "learning_rate": 5.980862549757494e-08, "loss": 0.294, "step": 67740 }, { "epoch": 2.87, "grad_norm": 3.7731058588156903, "learning_rate": 5.96189244158779e-08, "loss": 0.3094, "step": 67745 }, { "epoch": 2.87, "grad_norm": 3.520293549013381, "learning_rate": 5.9429522854932975e-08, "loss": 0.281, "step": 67750 }, { "epoch": 2.87, "grad_norm": 3.7976274879106366, "learning_rate": 5.924042082622317e-08, "loss": 0.3162, "step": 67755 }, { "epoch": 2.87, "grad_norm": 4.012785851612927, "learning_rate": 5.90516183412132e-08, "loss": 0.309, "step": 67760 }, { "epoch": 2.87, "grad_norm": 3.7214814162351324, "learning_rate": 5.886311541134948e-08, "loss": 0.3075, "step": 67765 }, { "epoch": 2.87, "grad_norm": 3.7586493796062586, "learning_rate": 5.867491204806064e-08, "loss": 0.2982, "step": 67770 }, { "epoch": 2.87, "grad_norm": 3.592946941566493, "learning_rate": 5.848700826275644e-08, "loss": 0.3012, "step": 67775 }, { "epoch": 2.87, "grad_norm": 3.803601483320265, "learning_rate": 5.829940406682943e-08, "loss": 0.3036, "step": 67780 }, { "epoch": 2.87, "grad_norm": 3.6753659553407987, "learning_rate": 5.811209947165386e-08, "loss": 0.269, "step": 67785 }, { "epoch": 2.87, "grad_norm": 4.249177348010491, "learning_rate": 5.7925094488583964e-08, "loss": 0.3258, "step": 67790 }, { "epoch": 2.87, "grad_norm": 3.7360552671367717, "learning_rate": 5.773838912895846e-08, "loss": 0.3157, "step": 67795 }, { "epoch": 2.87, "grad_norm": 3.6380251834841344, "learning_rate": 5.755198340409718e-08, "loss": 0.3045, "step": 67800 }, { "epoch": 2.87, "grad_norm": 4.072445060656571, "learning_rate": 5.736587732529997e-08, "loss": 0.2807, "step": 67805 }, { "epoch": 2.87, "grad_norm": 4.136005460084404, "learning_rate": 5.718007090385058e-08, "loss": 0.2782, "step": 67810 }, { "epoch": 2.87, "grad_norm": 4.1103986138869315, "learning_rate": 5.69945641510139e-08, "loss": 0.3188, "step": 67815 }, { "epoch": 2.87, "grad_norm": 3.6584240467300493, "learning_rate": 5.6809357078037034e-08, "loss": 0.2844, "step": 67820 }, { "epoch": 2.87, "grad_norm": 4.0610861071155915, "learning_rate": 5.662444969614822e-08, "loss": 0.314, "step": 67825 }, { "epoch": 2.87, "grad_norm": 3.753442711285399, "learning_rate": 5.643984201655794e-08, "loss": 0.291, "step": 67830 }, { "epoch": 2.87, "grad_norm": 4.077994111311488, "learning_rate": 5.625553405045781e-08, "loss": 0.2938, "step": 67835 }, { "epoch": 2.87, "grad_norm": 3.7582268884852432, "learning_rate": 5.607152580902275e-08, "loss": 0.3189, "step": 67840 }, { "epoch": 2.87, "grad_norm": 3.8401190108814585, "learning_rate": 5.588781730340831e-08, "loss": 0.2909, "step": 67845 }, { "epoch": 2.87, "grad_norm": 4.243334684485353, "learning_rate": 5.570440854475223e-08, "loss": 0.3252, "step": 67850 }, { "epoch": 2.87, "grad_norm": 4.1548263105078655, "learning_rate": 5.55212995441734e-08, "loss": 0.328, "step": 67855 }, { "epoch": 2.87, "grad_norm": 3.749605414916934, "learning_rate": 5.5338490312774605e-08, "loss": 0.291, "step": 67860 }, { "epoch": 2.87, "grad_norm": 4.435419386219366, "learning_rate": 5.515598086163754e-08, "loss": 0.2955, "step": 67865 }, { "epoch": 2.87, "grad_norm": 4.397463558775545, "learning_rate": 5.4973771201827784e-08, "loss": 0.3271, "step": 67870 }, { "epoch": 2.87, "grad_norm": 4.523892991725451, "learning_rate": 5.479186134439263e-08, "loss": 0.3337, "step": 67875 }, { "epoch": 2.87, "grad_norm": 3.6840816927081423, "learning_rate": 5.461025130035991e-08, "loss": 0.2909, "step": 67880 }, { "epoch": 2.87, "grad_norm": 3.9481938107116066, "learning_rate": 5.442894108074026e-08, "loss": 0.2847, "step": 67885 }, { "epoch": 2.87, "grad_norm": 4.039275267035997, "learning_rate": 5.424793069652712e-08, "loss": 0.2917, "step": 67890 }, { "epoch": 2.87, "grad_norm": 4.0444081524366595, "learning_rate": 5.406722015869337e-08, "loss": 0.3163, "step": 67895 }, { "epoch": 2.87, "grad_norm": 3.7104128521835618, "learning_rate": 5.388680947819524e-08, "loss": 0.2813, "step": 67900 }, { "epoch": 2.87, "grad_norm": 3.949874376163575, "learning_rate": 5.370669866597067e-08, "loss": 0.2806, "step": 67905 }, { "epoch": 2.87, "grad_norm": 3.9228068709652106, "learning_rate": 5.352688773293924e-08, "loss": 0.3309, "step": 67910 }, { "epoch": 2.87, "grad_norm": 3.8337707097222875, "learning_rate": 5.334737669000167e-08, "loss": 0.3244, "step": 67915 }, { "epoch": 2.87, "grad_norm": 3.7026248893058886, "learning_rate": 5.3168165548042605e-08, "loss": 0.2998, "step": 67920 }, { "epoch": 2.87, "grad_norm": 3.9338322418427825, "learning_rate": 5.298925431792612e-08, "loss": 0.3018, "step": 67925 }, { "epoch": 2.87, "grad_norm": 3.6065154851992363, "learning_rate": 5.28106430104991e-08, "loss": 0.3138, "step": 67930 }, { "epoch": 2.88, "grad_norm": 3.878376198378528, "learning_rate": 5.2632331636590094e-08, "loss": 0.303, "step": 67935 }, { "epoch": 2.88, "grad_norm": 4.135505471397503, "learning_rate": 5.245432020701047e-08, "loss": 0.2979, "step": 67940 }, { "epoch": 2.88, "grad_norm": 4.655834798753016, "learning_rate": 5.227660873255158e-08, "loss": 0.3165, "step": 67945 }, { "epoch": 2.88, "grad_norm": 4.229688844348593, "learning_rate": 5.209919722398815e-08, "loss": 0.318, "step": 67950 }, { "epoch": 2.88, "grad_norm": 3.9280026145734683, "learning_rate": 5.1922085692076016e-08, "loss": 0.3234, "step": 67955 }, { "epoch": 2.88, "grad_norm": 3.694440796313576, "learning_rate": 5.1745274147552146e-08, "loss": 0.3336, "step": 67960 }, { "epoch": 2.88, "grad_norm": 3.967528818820221, "learning_rate": 5.156876260113741e-08, "loss": 0.3003, "step": 67965 }, { "epoch": 2.88, "grad_norm": 4.071088634268772, "learning_rate": 5.139255106353214e-08, "loss": 0.2832, "step": 67970 }, { "epoch": 2.88, "grad_norm": 4.114427385116013, "learning_rate": 5.121663954542e-08, "loss": 0.3065, "step": 67975 }, { "epoch": 2.88, "grad_norm": 4.110623937700965, "learning_rate": 5.10410280574658e-08, "loss": 0.289, "step": 67980 }, { "epoch": 2.88, "grad_norm": 3.8918191297522853, "learning_rate": 5.086571661031658e-08, "loss": 0.3137, "step": 67985 }, { "epoch": 2.88, "grad_norm": 4.03231752129029, "learning_rate": 5.069070521460051e-08, "loss": 0.31, "step": 67990 }, { "epoch": 2.88, "grad_norm": 3.4176898448604147, "learning_rate": 5.051599388092854e-08, "loss": 0.3079, "step": 67995 }, { "epoch": 2.88, "grad_norm": 3.954439025992426, "learning_rate": 5.0341582619892196e-08, "loss": 0.2985, "step": 68000 }, { "epoch": 2.88, "grad_norm": 4.4728957709554305, "learning_rate": 5.016747144206691e-08, "loss": 0.3245, "step": 68005 }, { "epoch": 2.88, "grad_norm": 4.040433767784017, "learning_rate": 4.9993660358007013e-08, "loss": 0.3241, "step": 68010 }, { "epoch": 2.88, "grad_norm": 3.8812577908943213, "learning_rate": 4.982014937825075e-08, "loss": 0.3119, "step": 68015 }, { "epoch": 2.88, "grad_norm": 4.23459266462151, "learning_rate": 4.9646938513318034e-08, "loss": 0.2992, "step": 68020 }, { "epoch": 2.88, "grad_norm": 4.188721997192261, "learning_rate": 4.9474027773709906e-08, "loss": 0.2986, "step": 68025 }, { "epoch": 2.88, "grad_norm": 3.9604939135563164, "learning_rate": 4.930141716990855e-08, "loss": 0.3271, "step": 68030 }, { "epoch": 2.88, "grad_norm": 3.7733903782639002, "learning_rate": 4.912910671237947e-08, "loss": 0.3125, "step": 68035 }, { "epoch": 2.88, "grad_norm": 4.695846733823223, "learning_rate": 4.895709641156987e-08, "loss": 0.3219, "step": 68040 }, { "epoch": 2.88, "grad_norm": 3.953916411472082, "learning_rate": 4.878538627790752e-08, "loss": 0.3027, "step": 68045 }, { "epoch": 2.88, "grad_norm": 4.49525420624861, "learning_rate": 4.861397632180243e-08, "loss": 0.2831, "step": 68050 }, { "epoch": 2.88, "grad_norm": 3.8251415167948046, "learning_rate": 4.844286655364794e-08, "loss": 0.2996, "step": 68055 }, { "epoch": 2.88, "grad_norm": 3.536336657712159, "learning_rate": 4.827205698381687e-08, "loss": 0.3086, "step": 68060 }, { "epoch": 2.88, "grad_norm": 3.662488379414853, "learning_rate": 4.810154762266483e-08, "loss": 0.3164, "step": 68065 }, { "epoch": 2.88, "grad_norm": 4.105437912824385, "learning_rate": 4.793133848053022e-08, "loss": 0.3091, "step": 68070 }, { "epoch": 2.88, "grad_norm": 3.976310772728087, "learning_rate": 4.7761429567731444e-08, "loss": 0.333, "step": 68075 }, { "epoch": 2.88, "grad_norm": 4.655764157848068, "learning_rate": 4.759182089456971e-08, "loss": 0.3132, "step": 68080 }, { "epoch": 2.88, "grad_norm": 4.160088455234844, "learning_rate": 4.742251247132845e-08, "loss": 0.3188, "step": 68085 }, { "epoch": 2.88, "grad_norm": 3.6052639029227476, "learning_rate": 4.725350430827225e-08, "loss": 0.3171, "step": 68090 }, { "epoch": 2.88, "grad_norm": 3.939240023965245, "learning_rate": 4.7084796415646785e-08, "loss": 0.3316, "step": 68095 }, { "epoch": 2.88, "grad_norm": 4.057461533580917, "learning_rate": 4.691638880368054e-08, "loss": 0.286, "step": 68100 }, { "epoch": 2.88, "grad_norm": 3.9405878143644353, "learning_rate": 4.674828148258481e-08, "loss": 0.3055, "step": 68105 }, { "epoch": 2.88, "grad_norm": 3.7936523463053398, "learning_rate": 4.658047446254976e-08, "loss": 0.3141, "step": 68110 }, { "epoch": 2.88, "grad_norm": 3.6367126046531073, "learning_rate": 4.6412967753750035e-08, "loss": 0.2918, "step": 68115 }, { "epoch": 2.88, "grad_norm": 4.43078455807695, "learning_rate": 4.624576136634029e-08, "loss": 0.2894, "step": 68120 }, { "epoch": 2.88, "grad_norm": 3.636939188101196, "learning_rate": 4.6078855310459084e-08, "loss": 0.2827, "step": 68125 }, { "epoch": 2.88, "grad_norm": 3.818867269756552, "learning_rate": 4.591224959622387e-08, "loss": 0.3114, "step": 68130 }, { "epoch": 2.88, "grad_norm": 4.046523286048708, "learning_rate": 4.574594423373657e-08, "loss": 0.2978, "step": 68135 }, { "epoch": 2.88, "grad_norm": 3.707557155336955, "learning_rate": 4.557993923307968e-08, "loss": 0.2904, "step": 68140 }, { "epoch": 2.88, "grad_norm": 3.538963217447223, "learning_rate": 4.541423460431738e-08, "loss": 0.3441, "step": 68145 }, { "epoch": 2.88, "grad_norm": 3.8956082662197864, "learning_rate": 4.52488303574955e-08, "loss": 0.3008, "step": 68150 }, { "epoch": 2.88, "grad_norm": 4.128782649758923, "learning_rate": 4.508372650264214e-08, "loss": 0.3189, "step": 68155 }, { "epoch": 2.88, "grad_norm": 4.099664076186443, "learning_rate": 4.491892304976764e-08, "loss": 0.3163, "step": 68160 }, { "epoch": 2.88, "grad_norm": 3.606471482014266, "learning_rate": 4.4754420008862876e-08, "loss": 0.2862, "step": 68165 }, { "epoch": 2.89, "grad_norm": 4.365835569663179, "learning_rate": 4.4590217389901545e-08, "loss": 0.2981, "step": 68170 }, { "epoch": 2.89, "grad_norm": 3.8880025185480425, "learning_rate": 4.442631520283902e-08, "loss": 0.2907, "step": 68175 }, { "epoch": 2.89, "grad_norm": 3.9339411742608488, "learning_rate": 4.4262713457611795e-08, "loss": 0.3187, "step": 68180 }, { "epoch": 2.89, "grad_norm": 4.637726328749896, "learning_rate": 4.40994121641386e-08, "loss": 0.2993, "step": 68185 }, { "epoch": 2.89, "grad_norm": 4.426330745195855, "learning_rate": 4.393641133231985e-08, "loss": 0.2843, "step": 68190 }, { "epoch": 2.89, "grad_norm": 3.571466892785576, "learning_rate": 4.3773710972038194e-08, "loss": 0.285, "step": 68195 }, { "epoch": 2.89, "grad_norm": 4.124900144647663, "learning_rate": 4.361131109315686e-08, "loss": 0.2893, "step": 68200 }, { "epoch": 2.89, "grad_norm": 4.178267785277212, "learning_rate": 4.344921170552241e-08, "loss": 0.3025, "step": 68205 }, { "epoch": 2.89, "grad_norm": 5.833346353458756, "learning_rate": 4.3287412818962535e-08, "loss": 0.3035, "step": 68210 }, { "epoch": 2.89, "grad_norm": 3.956584964131083, "learning_rate": 4.312591444328607e-08, "loss": 0.2689, "step": 68215 }, { "epoch": 2.89, "grad_norm": 3.6898939086712077, "learning_rate": 4.296471658828461e-08, "loss": 0.2873, "step": 68220 }, { "epoch": 2.89, "grad_norm": 3.8102327435330277, "learning_rate": 4.28038192637309e-08, "loss": 0.2941, "step": 68225 }, { "epoch": 2.89, "grad_norm": 3.832324182113011, "learning_rate": 4.264322247937991e-08, "loss": 0.2961, "step": 68230 }, { "epoch": 2.89, "grad_norm": 4.165332839781083, "learning_rate": 4.2482926244967745e-08, "loss": 0.306, "step": 68235 }, { "epoch": 2.89, "grad_norm": 3.800336400847749, "learning_rate": 4.232293057021275e-08, "loss": 0.295, "step": 68240 }, { "epoch": 2.89, "grad_norm": 4.3388994629932185, "learning_rate": 4.216323546481549e-08, "loss": 0.2739, "step": 68245 }, { "epoch": 2.89, "grad_norm": 4.145580320385166, "learning_rate": 4.200384093845711e-08, "loss": 0.2809, "step": 68250 }, { "epoch": 2.89, "grad_norm": 4.253355485403765, "learning_rate": 4.18447470008021e-08, "loss": 0.3169, "step": 68255 }, { "epoch": 2.89, "grad_norm": 3.99507858763937, "learning_rate": 4.1685953661494974e-08, "loss": 0.2938, "step": 68260 }, { "epoch": 2.89, "grad_norm": 3.525424363631607, "learning_rate": 4.152746093016358e-08, "loss": 0.2797, "step": 68265 }, { "epoch": 2.89, "grad_norm": 3.891564080415869, "learning_rate": 4.136926881641634e-08, "loss": 0.3034, "step": 68270 }, { "epoch": 2.89, "grad_norm": 3.9829505297849526, "learning_rate": 4.1211377329844485e-08, "loss": 0.285, "step": 68275 }, { "epoch": 2.89, "grad_norm": 3.680944982884361, "learning_rate": 4.105378648001979e-08, "loss": 0.2968, "step": 68280 }, { "epoch": 2.89, "grad_norm": 5.936474565118909, "learning_rate": 4.0896496276496836e-08, "loss": 0.2878, "step": 68285 }, { "epoch": 2.89, "grad_norm": 3.8885327228097024, "learning_rate": 4.073950672881244e-08, "loss": 0.3222, "step": 68290 }, { "epoch": 2.89, "grad_norm": 3.863223541092429, "learning_rate": 4.058281784648343e-08, "loss": 0.2749, "step": 68295 }, { "epoch": 2.89, "grad_norm": 4.070152976423319, "learning_rate": 4.0426429639009446e-08, "loss": 0.3059, "step": 68300 }, { "epoch": 2.89, "grad_norm": 4.671708676789781, "learning_rate": 4.027034211587233e-08, "loss": 0.2955, "step": 68305 }, { "epoch": 2.89, "grad_norm": 3.8120203794677217, "learning_rate": 4.011455528653563e-08, "loss": 0.3222, "step": 68310 }, { "epoch": 2.89, "grad_norm": 4.3692484527773665, "learning_rate": 3.9959069160442896e-08, "loss": 0.3078, "step": 68315 }, { "epoch": 2.89, "grad_norm": 3.4630689025834958, "learning_rate": 3.98038837470216e-08, "loss": 0.2977, "step": 68320 }, { "epoch": 2.89, "grad_norm": 3.8004478493882146, "learning_rate": 3.9648999055680316e-08, "loss": 0.3249, "step": 68325 }, { "epoch": 2.89, "grad_norm": 3.955375230998622, "learning_rate": 3.949441509580931e-08, "loss": 0.3289, "step": 68330 }, { "epoch": 2.89, "grad_norm": 3.838179596990951, "learning_rate": 3.934013187677943e-08, "loss": 0.3276, "step": 68335 }, { "epoch": 2.89, "grad_norm": 3.615824165264497, "learning_rate": 3.918614940794596e-08, "loss": 0.3002, "step": 68340 }, { "epoch": 2.89, "grad_norm": 3.6692548039891446, "learning_rate": 3.903246769864366e-08, "loss": 0.3151, "step": 68345 }, { "epoch": 2.89, "grad_norm": 4.017934018585339, "learning_rate": 3.887908675818952e-08, "loss": 0.3241, "step": 68350 }, { "epoch": 2.89, "grad_norm": 4.26999622719143, "learning_rate": 3.8726006595883325e-08, "loss": 0.3229, "step": 68355 }, { "epoch": 2.89, "grad_norm": 3.800516814896526, "learning_rate": 3.857322722100487e-08, "loss": 0.2897, "step": 68360 }, { "epoch": 2.89, "grad_norm": 3.3168952602091473, "learning_rate": 3.842074864281786e-08, "loss": 0.2962, "step": 68365 }, { "epoch": 2.89, "grad_norm": 4.862070172714224, "learning_rate": 3.826857087056546e-08, "loss": 0.305, "step": 68370 }, { "epoch": 2.89, "grad_norm": 4.8556357294495065, "learning_rate": 3.811669391347472e-08, "loss": 0.309, "step": 68375 }, { "epoch": 2.89, "grad_norm": 3.718769022098439, "learning_rate": 3.7965117780752735e-08, "loss": 0.293, "step": 68380 }, { "epoch": 2.89, "grad_norm": 3.6890938660963575, "learning_rate": 3.7813842481589926e-08, "loss": 0.3101, "step": 68385 }, { "epoch": 2.89, "grad_norm": 3.985691098986346, "learning_rate": 3.7662868025157284e-08, "loss": 0.2941, "step": 68390 }, { "epoch": 2.89, "grad_norm": 5.04899522587584, "learning_rate": 3.751219442060749e-08, "loss": 0.3059, "step": 68395 }, { "epoch": 2.89, "grad_norm": 3.9318863193804563, "learning_rate": 3.7361821677076024e-08, "loss": 0.2895, "step": 68400 }, { "epoch": 2.9, "grad_norm": 4.0831677984328945, "learning_rate": 3.721174980367948e-08, "loss": 0.2987, "step": 68405 }, { "epoch": 2.9, "grad_norm": 3.928370033903755, "learning_rate": 3.706197880951612e-08, "loss": 0.2746, "step": 68410 }, { "epoch": 2.9, "grad_norm": 3.636775568518349, "learning_rate": 3.691250870366647e-08, "loss": 0.2998, "step": 68415 }, { "epoch": 2.9, "grad_norm": 3.959377033082534, "learning_rate": 3.6763339495191616e-08, "loss": 0.3164, "step": 68420 }, { "epoch": 2.9, "grad_norm": 4.309048549000451, "learning_rate": 3.661447119313599e-08, "loss": 0.2983, "step": 68425 }, { "epoch": 2.9, "grad_norm": 3.815767290860579, "learning_rate": 3.646590380652515e-08, "loss": 0.3526, "step": 68430 }, { "epoch": 2.9, "grad_norm": 3.8075273010348276, "learning_rate": 3.631763734436577e-08, "loss": 0.2889, "step": 68435 }, { "epoch": 2.9, "grad_norm": 3.4820351964858527, "learning_rate": 3.6169671815646787e-08, "loss": 0.2809, "step": 68440 }, { "epoch": 2.9, "grad_norm": 3.6963708563811273, "learning_rate": 3.60220072293399e-08, "loss": 0.2844, "step": 68445 }, { "epoch": 2.9, "grad_norm": 3.482981463138997, "learning_rate": 3.5874643594396296e-08, "loss": 0.2877, "step": 68450 }, { "epoch": 2.9, "grad_norm": 3.8192170577883817, "learning_rate": 3.572758091975048e-08, "loss": 0.3183, "step": 68455 }, { "epoch": 2.9, "grad_norm": 4.178411123961507, "learning_rate": 3.558081921431922e-08, "loss": 0.2991, "step": 68460 }, { "epoch": 2.9, "grad_norm": 4.159345213807564, "learning_rate": 3.543435848699928e-08, "loss": 0.3404, "step": 68465 }, { "epoch": 2.9, "grad_norm": 3.6826306813228604, "learning_rate": 3.5288198746670774e-08, "loss": 0.3028, "step": 68470 }, { "epoch": 2.9, "grad_norm": 3.9362373809152778, "learning_rate": 3.514234000219496e-08, "loss": 0.3113, "step": 68475 }, { "epoch": 2.9, "grad_norm": 3.550782741689252, "learning_rate": 3.499678226241421e-08, "loss": 0.2776, "step": 68480 }, { "epoch": 2.9, "grad_norm": 3.9203915631799466, "learning_rate": 3.4851525536153675e-08, "loss": 0.3144, "step": 68485 }, { "epoch": 2.9, "grad_norm": 3.8585669134026475, "learning_rate": 3.4706569832220206e-08, "loss": 0.3336, "step": 68490 }, { "epoch": 2.9, "grad_norm": 4.164082168953407, "learning_rate": 3.456191515940177e-08, "loss": 0.3148, "step": 68495 }, { "epoch": 2.9, "grad_norm": 3.8835248821734143, "learning_rate": 3.441756152646747e-08, "loss": 0.2927, "step": 68500 }, { "epoch": 2.9, "grad_norm": 4.372668368564303, "learning_rate": 3.427350894217085e-08, "loss": 0.3269, "step": 68505 }, { "epoch": 2.9, "grad_norm": 3.8110977839958133, "learning_rate": 3.412975741524327e-08, "loss": 0.3032, "step": 68510 }, { "epoch": 2.9, "grad_norm": 4.2603150604024265, "learning_rate": 3.3986306954401635e-08, "loss": 0.3142, "step": 68515 }, { "epoch": 2.9, "grad_norm": 3.9767729319615404, "learning_rate": 3.3843157568342333e-08, "loss": 0.2896, "step": 68520 }, { "epoch": 2.9, "grad_norm": 3.524685702552604, "learning_rate": 3.370030926574397e-08, "loss": 0.2769, "step": 68525 }, { "epoch": 2.9, "grad_norm": 3.561871077630698, "learning_rate": 3.3557762055267396e-08, "loss": 0.2983, "step": 68530 }, { "epoch": 2.9, "grad_norm": 4.144976601722943, "learning_rate": 3.3415515945554037e-08, "loss": 0.32, "step": 68535 }, { "epoch": 2.9, "grad_norm": 3.830215380064149, "learning_rate": 3.327357094522865e-08, "loss": 0.2793, "step": 68540 }, { "epoch": 2.9, "grad_norm": 4.300268188756827, "learning_rate": 3.3131927062896585e-08, "loss": 0.3009, "step": 68545 }, { "epoch": 2.9, "grad_norm": 3.8675226492865487, "learning_rate": 3.2990584307145966e-08, "loss": 0.3336, "step": 68550 }, { "epoch": 2.9, "grad_norm": 4.008794901490101, "learning_rate": 3.2849542686544946e-08, "loss": 0.3048, "step": 68555 }, { "epoch": 2.9, "grad_norm": 3.938411351144604, "learning_rate": 3.270880220964501e-08, "loss": 0.3131, "step": 68560 }, { "epoch": 2.9, "grad_norm": 3.7019420233312235, "learning_rate": 3.256836288497878e-08, "loss": 0.2871, "step": 68565 }, { "epoch": 2.9, "grad_norm": 4.168885781507564, "learning_rate": 3.2428224721060556e-08, "loss": 0.3184, "step": 68570 }, { "epoch": 2.9, "grad_norm": 4.502478347289495, "learning_rate": 3.228838772638632e-08, "loss": 0.3164, "step": 68575 }, { "epoch": 2.9, "grad_norm": 4.311865579528974, "learning_rate": 3.2148851909434845e-08, "loss": 0.3122, "step": 68580 }, { "epoch": 2.9, "grad_norm": 3.672104511879892, "learning_rate": 3.200961727866492e-08, "loss": 0.3064, "step": 68585 }, { "epoch": 2.9, "grad_norm": 4.1320454978654455, "learning_rate": 3.1870683842518126e-08, "loss": 0.3185, "step": 68590 }, { "epoch": 2.9, "grad_norm": 3.537664079395672, "learning_rate": 3.173205160941828e-08, "loss": 0.3054, "step": 68595 }, { "epoch": 2.9, "grad_norm": 4.038821187842253, "learning_rate": 3.1593720587768664e-08, "loss": 0.293, "step": 68600 }, { "epoch": 2.9, "grad_norm": 3.877998533666184, "learning_rate": 3.145569078595756e-08, "loss": 0.3044, "step": 68605 }, { "epoch": 2.9, "grad_norm": 3.643096592138452, "learning_rate": 3.131796221235217e-08, "loss": 0.3083, "step": 68610 }, { "epoch": 2.9, "grad_norm": 3.982477064880714, "learning_rate": 3.118053487530304e-08, "loss": 0.3331, "step": 68615 }, { "epoch": 2.9, "grad_norm": 3.806037529784277, "learning_rate": 3.1043408783142383e-08, "loss": 0.3017, "step": 68620 }, { "epoch": 2.9, "grad_norm": 3.811151265361242, "learning_rate": 3.090658394418245e-08, "loss": 0.3, "step": 68625 }, { "epoch": 2.9, "grad_norm": 3.767268645171214, "learning_rate": 3.0770060366720476e-08, "loss": 0.2962, "step": 68630 }, { "epoch": 2.9, "grad_norm": 3.5548393994982024, "learning_rate": 3.063383805903153e-08, "loss": 0.2927, "step": 68635 }, { "epoch": 2.9, "grad_norm": 3.465699227101694, "learning_rate": 3.0497917029375655e-08, "loss": 0.2955, "step": 68640 }, { "epoch": 2.91, "grad_norm": 3.9169814682200492, "learning_rate": 3.0362297285992935e-08, "loss": 0.282, "step": 68645 }, { "epoch": 2.91, "grad_norm": 4.028086862289432, "learning_rate": 3.022697883710513e-08, "loss": 0.3031, "step": 68650 }, { "epoch": 2.91, "grad_norm": 4.201700573605033, "learning_rate": 3.0091961690917324e-08, "loss": 0.321, "step": 68655 }, { "epoch": 2.91, "grad_norm": 3.5429219029263437, "learning_rate": 2.99572458556141e-08, "loss": 0.2997, "step": 68660 }, { "epoch": 2.91, "grad_norm": 3.8126499059716075, "learning_rate": 2.9822831339363364e-08, "loss": 0.287, "step": 68665 }, { "epoch": 2.91, "grad_norm": 4.048774630790299, "learning_rate": 2.968871815031471e-08, "loss": 0.3126, "step": 68670 }, { "epoch": 2.91, "grad_norm": 3.7712885210146294, "learning_rate": 2.955490629659774e-08, "loss": 0.2922, "step": 68675 }, { "epoch": 2.91, "grad_norm": 3.7139500851331158, "learning_rate": 2.9421395786326522e-08, "loss": 0.2918, "step": 68680 }, { "epoch": 2.91, "grad_norm": 3.8007764773611212, "learning_rate": 2.9288186627594584e-08, "loss": 0.2952, "step": 68685 }, { "epoch": 2.91, "grad_norm": 3.5992752072775716, "learning_rate": 2.9155278828477686e-08, "loss": 0.3125, "step": 68690 }, { "epoch": 2.91, "grad_norm": 3.7670752312504683, "learning_rate": 2.9022672397034378e-08, "loss": 0.2988, "step": 68695 }, { "epoch": 2.91, "grad_norm": 4.017223261357962, "learning_rate": 2.889036734130435e-08, "loss": 0.3077, "step": 68700 }, { "epoch": 2.91, "grad_norm": 4.2384545886906855, "learning_rate": 2.8758363669308397e-08, "loss": 0.2952, "step": 68705 }, { "epoch": 2.91, "grad_norm": 3.676779163365829, "learning_rate": 2.8626661389049016e-08, "loss": 0.3248, "step": 68710 }, { "epoch": 2.91, "grad_norm": 3.5404653850526087, "learning_rate": 2.8495260508512036e-08, "loss": 0.3257, "step": 68715 }, { "epoch": 2.91, "grad_norm": 4.794028023869922, "learning_rate": 2.8364161035663307e-08, "loss": 0.3269, "step": 68720 }, { "epoch": 2.91, "grad_norm": 4.026428704437321, "learning_rate": 2.8233362978450918e-08, "loss": 0.3126, "step": 68725 }, { "epoch": 2.91, "grad_norm": 3.5761846931181487, "learning_rate": 2.810286634480519e-08, "loss": 0.3174, "step": 68730 }, { "epoch": 2.91, "grad_norm": 4.335833305973485, "learning_rate": 2.797267114263702e-08, "loss": 0.3051, "step": 68735 }, { "epoch": 2.91, "grad_norm": 3.833484952058422, "learning_rate": 2.7842777379840647e-08, "loss": 0.2769, "step": 68740 }, { "epoch": 2.91, "grad_norm": 4.317548014082353, "learning_rate": 2.7713185064290328e-08, "loss": 0.3293, "step": 68745 }, { "epoch": 2.91, "grad_norm": 3.579661563467071, "learning_rate": 2.7583894203843664e-08, "loss": 0.3334, "step": 68750 }, { "epoch": 2.91, "grad_norm": 3.9621202681466015, "learning_rate": 2.7454904806338277e-08, "loss": 0.3024, "step": 68755 }, { "epoch": 2.91, "grad_norm": 3.8544583587616192, "learning_rate": 2.7326216879595136e-08, "loss": 0.2946, "step": 68760 }, { "epoch": 2.91, "grad_norm": 4.0146741418993095, "learning_rate": 2.7197830431415772e-08, "loss": 0.2904, "step": 68765 }, { "epoch": 2.91, "grad_norm": 4.241244226482878, "learning_rate": 2.706974546958452e-08, "loss": 0.284, "step": 68770 }, { "epoch": 2.91, "grad_norm": 3.498336471688792, "learning_rate": 2.6941962001865718e-08, "loss": 0.2746, "step": 68775 }, { "epoch": 2.91, "grad_norm": 3.94161705685398, "learning_rate": 2.6814480036007617e-08, "loss": 0.3009, "step": 68780 }, { "epoch": 2.91, "grad_norm": 3.6808520872538018, "learning_rate": 2.6687299579739034e-08, "loss": 0.3153, "step": 68785 }, { "epoch": 2.91, "grad_norm": 4.318316742835231, "learning_rate": 2.6560420640769356e-08, "loss": 0.2821, "step": 68790 }, { "epoch": 2.91, "grad_norm": 3.946723742234188, "learning_rate": 2.6433843226791877e-08, "loss": 0.31, "step": 68795 }, { "epoch": 2.91, "grad_norm": 5.058627236064796, "learning_rate": 2.6307567345480457e-08, "loss": 0.3214, "step": 68800 }, { "epoch": 2.91, "grad_norm": 3.6815746238629887, "learning_rate": 2.6181593004490636e-08, "loss": 0.3222, "step": 68805 }, { "epoch": 2.91, "grad_norm": 4.147851876763575, "learning_rate": 2.605592021145964e-08, "loss": 0.3001, "step": 68810 }, { "epoch": 2.91, "grad_norm": 3.7776485170453227, "learning_rate": 2.5930548974007485e-08, "loss": 0.3028, "step": 68815 }, { "epoch": 2.91, "grad_norm": 3.8478125280383972, "learning_rate": 2.5805479299734204e-08, "loss": 0.2895, "step": 68820 }, { "epoch": 2.91, "grad_norm": 3.5077753251310035, "learning_rate": 2.5680711196222618e-08, "loss": 0.2964, "step": 68825 }, { "epoch": 2.91, "grad_norm": 3.742860252139474, "learning_rate": 2.5556244671037787e-08, "loss": 0.3063, "step": 68830 }, { "epoch": 2.91, "grad_norm": 4.228677553724897, "learning_rate": 2.543207973172479e-08, "loss": 0.3318, "step": 68835 }, { "epoch": 2.91, "grad_norm": 3.8349712403665177, "learning_rate": 2.530821638581149e-08, "loss": 0.2846, "step": 68840 }, { "epoch": 2.91, "grad_norm": 3.776124507539609, "learning_rate": 2.5184654640807992e-08, "loss": 0.2938, "step": 68845 }, { "epoch": 2.91, "grad_norm": 4.311411118780991, "learning_rate": 2.5061394504204973e-08, "loss": 0.3311, "step": 68850 }, { "epoch": 2.91, "grad_norm": 3.5077521269130503, "learning_rate": 2.4938435983475338e-08, "loss": 0.2958, "step": 68855 }, { "epoch": 2.91, "grad_norm": 3.6744346529510157, "learning_rate": 2.481577908607369e-08, "loss": 0.294, "step": 68860 }, { "epoch": 2.91, "grad_norm": 3.8865190295125815, "learning_rate": 2.4693423819436846e-08, "loss": 0.2894, "step": 68865 }, { "epoch": 2.91, "grad_norm": 3.954940760338175, "learning_rate": 2.4571370190982214e-08, "loss": 0.3076, "step": 68870 }, { "epoch": 2.91, "grad_norm": 4.118453112399337, "learning_rate": 2.444961820810998e-08, "loss": 0.3028, "step": 68875 }, { "epoch": 2.92, "grad_norm": 4.064592390970007, "learning_rate": 2.4328167878200916e-08, "loss": 0.3044, "step": 68880 }, { "epoch": 2.92, "grad_norm": 4.180993353802298, "learning_rate": 2.4207019208619674e-08, "loss": 0.3384, "step": 68885 }, { "epoch": 2.92, "grad_norm": 4.392095763137633, "learning_rate": 2.408617220670928e-08, "loss": 0.3039, "step": 68890 }, { "epoch": 2.92, "grad_norm": 3.772506295636982, "learning_rate": 2.3965626879797754e-08, "loss": 0.3036, "step": 68895 }, { "epoch": 2.92, "grad_norm": 3.5832293888868754, "learning_rate": 2.3845383235192587e-08, "loss": 0.3002, "step": 68900 }, { "epoch": 2.92, "grad_norm": 4.562603661030853, "learning_rate": 2.3725441280184057e-08, "loss": 0.2979, "step": 68905 }, { "epoch": 2.92, "grad_norm": 3.5511021122904136, "learning_rate": 2.3605801022044128e-08, "loss": 0.2974, "step": 68910 }, { "epoch": 2.92, "grad_norm": 3.964719018619828, "learning_rate": 2.3486462468025882e-08, "loss": 0.3138, "step": 68915 }, { "epoch": 2.92, "grad_norm": 3.6642424568558845, "learning_rate": 2.3367425625364647e-08, "loss": 0.3086, "step": 68920 }, { "epoch": 2.92, "grad_norm": 3.833275552405031, "learning_rate": 2.3248690501277426e-08, "loss": 0.3238, "step": 68925 }, { "epoch": 2.92, "grad_norm": 3.554982207288336, "learning_rate": 2.313025710296235e-08, "loss": 0.2713, "step": 68930 }, { "epoch": 2.92, "grad_norm": 3.803714723639175, "learning_rate": 2.3012125437599787e-08, "loss": 0.3123, "step": 68935 }, { "epoch": 2.92, "grad_norm": 3.516503240739867, "learning_rate": 2.2894295512352337e-08, "loss": 0.3027, "step": 68940 }, { "epoch": 2.92, "grad_norm": 3.725971668701787, "learning_rate": 2.2776767334362627e-08, "loss": 0.3119, "step": 68945 }, { "epoch": 2.92, "grad_norm": 3.91101511074878, "learning_rate": 2.2659540910757173e-08, "loss": 0.3213, "step": 68950 }, { "epoch": 2.92, "grad_norm": 4.580277042074699, "learning_rate": 2.2542616248642514e-08, "loss": 0.3171, "step": 68955 }, { "epoch": 2.92, "grad_norm": 4.058421775636477, "learning_rate": 2.2425993355106866e-08, "loss": 0.2942, "step": 68960 }, { "epoch": 2.92, "grad_norm": 3.9027736599066487, "learning_rate": 2.23096722372218e-08, "loss": 0.3027, "step": 68965 }, { "epoch": 2.92, "grad_norm": 4.0634263265857955, "learning_rate": 2.2193652902038898e-08, "loss": 0.2908, "step": 68970 }, { "epoch": 2.92, "grad_norm": 4.108222895679821, "learning_rate": 2.207793535659253e-08, "loss": 0.3183, "step": 68975 }, { "epoch": 2.92, "grad_norm": 3.7491728915013462, "learning_rate": 2.1962519607897638e-08, "loss": 0.2822, "step": 68980 }, { "epoch": 2.92, "grad_norm": 3.783634297216293, "learning_rate": 2.184740566295196e-08, "loss": 0.2915, "step": 68985 }, { "epoch": 2.92, "grad_norm": 3.850384182711361, "learning_rate": 2.1732593528734912e-08, "loss": 0.3107, "step": 68990 }, { "epoch": 2.92, "grad_norm": 3.9553156520898023, "learning_rate": 2.1618083212205932e-08, "loss": 0.3376, "step": 68995 }, { "epoch": 2.92, "grad_norm": 3.973669423882026, "learning_rate": 2.150387472030835e-08, "loss": 0.2969, "step": 69000 }, { "epoch": 2.92, "grad_norm": 3.975234145323945, "learning_rate": 2.1389968059966625e-08, "loss": 0.2958, "step": 69005 }, { "epoch": 2.92, "grad_norm": 3.6755181208108856, "learning_rate": 2.127636323808635e-08, "loss": 0.3089, "step": 69010 }, { "epoch": 2.92, "grad_norm": 3.937416320476416, "learning_rate": 2.1163060261554237e-08, "loss": 0.3108, "step": 69015 }, { "epoch": 2.92, "grad_norm": 3.9698618108124495, "learning_rate": 2.1050059137240343e-08, "loss": 0.2994, "step": 69020 }, { "epoch": 2.92, "grad_norm": 4.281887091340919, "learning_rate": 2.0937359871995855e-08, "loss": 0.2963, "step": 69025 }, { "epoch": 2.92, "grad_norm": 3.8023490505716215, "learning_rate": 2.082496247265198e-08, "loss": 0.2874, "step": 69030 }, { "epoch": 2.92, "grad_norm": 4.418863103343946, "learning_rate": 2.0712866946024922e-08, "loss": 0.302, "step": 69035 }, { "epoch": 2.92, "grad_norm": 3.5946670112554844, "learning_rate": 2.060107329890926e-08, "loss": 0.3152, "step": 69040 }, { "epoch": 2.92, "grad_norm": 3.797259351212995, "learning_rate": 2.0489581538083446e-08, "loss": 0.3171, "step": 69045 }, { "epoch": 2.92, "grad_norm": 4.032791676822362, "learning_rate": 2.037839167030653e-08, "loss": 0.3008, "step": 69050 }, { "epoch": 2.92, "grad_norm": 4.069340185753086, "learning_rate": 2.026750370231978e-08, "loss": 0.2943, "step": 69055 }, { "epoch": 2.92, "grad_norm": 4.2778729337520724, "learning_rate": 2.0156917640846152e-08, "loss": 0.2931, "step": 69060 }, { "epoch": 2.92, "grad_norm": 3.938323937829891, "learning_rate": 2.004663349258973e-08, "loss": 0.301, "step": 69065 }, { "epoch": 2.92, "grad_norm": 4.2776538753526, "learning_rate": 1.9936651264237384e-08, "loss": 0.3039, "step": 69070 }, { "epoch": 2.92, "grad_norm": 4.278914080949418, "learning_rate": 1.9826970962456006e-08, "loss": 0.297, "step": 69075 }, { "epoch": 2.92, "grad_norm": 3.9373754191968917, "learning_rate": 1.9717592593896384e-08, "loss": 0.294, "step": 69080 }, { "epoch": 2.92, "grad_norm": 3.7047491550322413, "learning_rate": 1.9608516165188774e-08, "loss": 0.3022, "step": 69085 }, { "epoch": 2.92, "grad_norm": 3.590363145597009, "learning_rate": 1.949974168294677e-08, "loss": 0.308, "step": 69090 }, { "epoch": 2.92, "grad_norm": 4.67948306767533, "learning_rate": 1.93912691537651e-08, "loss": 0.294, "step": 69095 }, { "epoch": 2.92, "grad_norm": 3.756401723490771, "learning_rate": 1.9283098584219617e-08, "loss": 0.3155, "step": 69100 }, { "epoch": 2.92, "grad_norm": 3.670150619972396, "learning_rate": 1.917522998086896e-08, "loss": 0.2968, "step": 69105 }, { "epoch": 2.92, "grad_norm": 5.244452084678132, "learning_rate": 1.906766335025234e-08, "loss": 0.321, "step": 69110 }, { "epoch": 2.93, "grad_norm": 4.236146000764753, "learning_rate": 1.896039869889177e-08, "loss": 0.3371, "step": 69115 }, { "epoch": 2.93, "grad_norm": 3.9737098962402206, "learning_rate": 1.8853436033289818e-08, "loss": 0.3235, "step": 69120 }, { "epoch": 2.93, "grad_norm": 3.68691947709129, "learning_rate": 1.8746775359931858e-08, "loss": 0.3023, "step": 69125 }, { "epoch": 2.93, "grad_norm": 4.572242257399972, "learning_rate": 1.8640416685283824e-08, "loss": 0.2954, "step": 69130 }, { "epoch": 2.93, "grad_norm": 3.8642255241899517, "learning_rate": 1.853436001579445e-08, "loss": 0.3138, "step": 69135 }, { "epoch": 2.93, "grad_norm": 4.573897828752867, "learning_rate": 1.842860535789359e-08, "loss": 0.3125, "step": 69140 }, { "epoch": 2.93, "grad_norm": 3.5674988929690343, "learning_rate": 1.8323152717992233e-08, "loss": 0.2777, "step": 69145 }, { "epoch": 2.93, "grad_norm": 3.7738493782206657, "learning_rate": 1.82180021024847e-08, "loss": 0.2959, "step": 69150 }, { "epoch": 2.93, "grad_norm": 4.115233207672061, "learning_rate": 1.8113153517744787e-08, "loss": 0.3114, "step": 69155 }, { "epoch": 2.93, "grad_norm": 3.8923182051584897, "learning_rate": 1.8008606970130184e-08, "loss": 0.3001, "step": 69160 }, { "epoch": 2.93, "grad_norm": 3.7537788036210427, "learning_rate": 1.7904362465978598e-08, "loss": 0.2928, "step": 69165 }, { "epoch": 2.93, "grad_norm": 5.026913644297368, "learning_rate": 1.780042001161053e-08, "loss": 0.3019, "step": 69170 }, { "epoch": 2.93, "grad_norm": 4.327557905748171, "learning_rate": 1.7696779613327053e-08, "loss": 0.2951, "step": 69175 }, { "epoch": 2.93, "grad_norm": 3.653596385605482, "learning_rate": 1.7593441277412028e-08, "loss": 0.3108, "step": 69180 }, { "epoch": 2.93, "grad_norm": 3.7794504415823154, "learning_rate": 1.7490405010130994e-08, "loss": 0.31, "step": 69185 }, { "epoch": 2.93, "grad_norm": 4.079691325939507, "learning_rate": 1.738767081772952e-08, "loss": 0.2985, "step": 69190 }, { "epoch": 2.93, "grad_norm": 3.4603248499999957, "learning_rate": 1.7285238706437613e-08, "loss": 0.2929, "step": 69195 }, { "epoch": 2.93, "grad_norm": 3.932217135225119, "learning_rate": 1.7183108682463644e-08, "loss": 0.3269, "step": 69200 }, { "epoch": 2.93, "grad_norm": 3.817301561769997, "learning_rate": 1.7081280752000995e-08, "loss": 0.3016, "step": 69205 }, { "epoch": 2.93, "grad_norm": 3.6481721626099834, "learning_rate": 1.6979754921222503e-08, "loss": 0.2808, "step": 69210 }, { "epoch": 2.93, "grad_norm": 4.281323146373273, "learning_rate": 1.68785311962838e-08, "loss": 0.3473, "step": 69215 }, { "epoch": 2.93, "grad_norm": 3.8704246587731785, "learning_rate": 1.6777609583321085e-08, "loss": 0.2759, "step": 69220 }, { "epoch": 2.93, "grad_norm": 3.681892559292812, "learning_rate": 1.6676990088453915e-08, "loss": 0.2876, "step": 69225 }, { "epoch": 2.93, "grad_norm": 3.8056899471265604, "learning_rate": 1.6576672717781296e-08, "loss": 0.2995, "step": 69230 }, { "epoch": 2.93, "grad_norm": 4.0175952184236134, "learning_rate": 1.6476657477386137e-08, "loss": 0.2815, "step": 69235 }, { "epoch": 2.93, "grad_norm": 4.01308490210128, "learning_rate": 1.6376944373331926e-08, "loss": 0.2945, "step": 69240 }, { "epoch": 2.93, "grad_norm": 3.789007634418479, "learning_rate": 1.6277533411663272e-08, "loss": 0.3056, "step": 69245 }, { "epoch": 2.93, "grad_norm": 4.067278453391076, "learning_rate": 1.6178424598408128e-08, "loss": 0.3047, "step": 69250 }, { "epoch": 2.93, "grad_norm": 4.380044302466743, "learning_rate": 1.6079617939575022e-08, "loss": 0.3121, "step": 69255 }, { "epoch": 2.93, "grad_norm": 4.736047962883004, "learning_rate": 1.5981113441153608e-08, "loss": 0.3034, "step": 69260 }, { "epoch": 2.93, "grad_norm": 3.8368476335647292, "learning_rate": 1.5882911109116884e-08, "loss": 0.2705, "step": 69265 }, { "epoch": 2.93, "grad_norm": 4.588978058696701, "learning_rate": 1.5785010949417868e-08, "loss": 0.3193, "step": 69270 }, { "epoch": 2.93, "grad_norm": 4.219559886938127, "learning_rate": 1.5687412967991812e-08, "loss": 0.3223, "step": 69275 }, { "epoch": 2.93, "grad_norm": 3.976212154360366, "learning_rate": 1.5590117170756204e-08, "loss": 0.2881, "step": 69280 }, { "epoch": 2.93, "grad_norm": 3.7494904366071196, "learning_rate": 1.5493123563610212e-08, "loss": 0.3126, "step": 69285 }, { "epoch": 2.93, "grad_norm": 3.9506167151953298, "learning_rate": 1.539643215243358e-08, "loss": 0.3194, "step": 69290 }, { "epoch": 2.93, "grad_norm": 4.248027628814416, "learning_rate": 1.5300042943088288e-08, "loss": 0.3029, "step": 69295 }, { "epoch": 2.93, "grad_norm": 3.3778353984149656, "learning_rate": 1.5203955941418547e-08, "loss": 0.3069, "step": 69300 }, { "epoch": 2.93, "grad_norm": 4.317202795306347, "learning_rate": 1.510817115325025e-08, "loss": 0.3049, "step": 69305 }, { "epoch": 2.93, "grad_norm": 3.8649890827610784, "learning_rate": 1.5012688584389868e-08, "loss": 0.313, "step": 69310 }, { "epoch": 2.93, "grad_norm": 4.123870003595725, "learning_rate": 1.49175082406261e-08, "loss": 0.3259, "step": 69315 }, { "epoch": 2.93, "grad_norm": 4.147584144428297, "learning_rate": 1.4822630127729886e-08, "loss": 0.327, "step": 69320 }, { "epoch": 2.93, "grad_norm": 4.049729660311289, "learning_rate": 1.4728054251453849e-08, "loss": 0.3164, "step": 69325 }, { "epoch": 2.93, "grad_norm": 5.027307755566849, "learning_rate": 1.4633780617530624e-08, "loss": 0.3027, "step": 69330 }, { "epoch": 2.93, "grad_norm": 4.265599407284306, "learning_rate": 1.4539809231676194e-08, "loss": 0.3027, "step": 69335 }, { "epoch": 2.93, "grad_norm": 3.9182774952137804, "learning_rate": 1.4446140099588224e-08, "loss": 0.2944, "step": 69340 }, { "epoch": 2.93, "grad_norm": 4.121998798297339, "learning_rate": 1.435277322694606e-08, "loss": 0.303, "step": 69345 }, { "epoch": 2.94, "grad_norm": 3.861249893732703, "learning_rate": 1.4259708619408507e-08, "loss": 0.3005, "step": 69350 }, { "epoch": 2.94, "grad_norm": 3.874154709892759, "learning_rate": 1.4166946282619387e-08, "loss": 0.2794, "step": 69355 }, { "epoch": 2.94, "grad_norm": 3.787112512513606, "learning_rate": 1.4074486222201978e-08, "loss": 0.2815, "step": 69360 }, { "epoch": 2.94, "grad_norm": 3.931507758482797, "learning_rate": 1.3982328443761795e-08, "loss": 0.3014, "step": 69365 }, { "epoch": 2.94, "grad_norm": 3.973964992026249, "learning_rate": 1.389047295288659e-08, "loss": 0.322, "step": 69370 }, { "epoch": 2.94, "grad_norm": 3.842996014133042, "learning_rate": 1.3798919755144691e-08, "loss": 0.3091, "step": 69375 }, { "epoch": 2.94, "grad_norm": 3.761891792967513, "learning_rate": 1.3707668856087208e-08, "loss": 0.3107, "step": 69380 }, { "epoch": 2.94, "grad_norm": 4.12190106774251, "learning_rate": 1.3616720261245831e-08, "loss": 0.3397, "step": 69385 }, { "epoch": 2.94, "grad_norm": 3.9791680016344486, "learning_rate": 1.3526073976135034e-08, "loss": 0.3128, "step": 69390 }, { "epoch": 2.94, "grad_norm": 3.6165125449599858, "learning_rate": 1.343573000624987e-08, "loss": 0.2972, "step": 69395 }, { "epoch": 2.94, "grad_norm": 3.952145508270869, "learning_rate": 1.3345688357068176e-08, "loss": 0.3297, "step": 69400 }, { "epoch": 2.94, "grad_norm": 4.518136034238327, "learning_rate": 1.3255949034048921e-08, "loss": 0.3155, "step": 69405 }, { "epoch": 2.94, "grad_norm": 3.6404407113721824, "learning_rate": 1.3166512042632751e-08, "loss": 0.294, "step": 69410 }, { "epoch": 2.94, "grad_norm": 3.9755185847325256, "learning_rate": 1.307737738824144e-08, "loss": 0.3104, "step": 69415 }, { "epoch": 2.94, "grad_norm": 4.290243292668716, "learning_rate": 1.2988545076279001e-08, "loss": 0.3134, "step": 69420 }, { "epoch": 2.94, "grad_norm": 4.17867803795504, "learning_rate": 1.290001511213168e-08, "loss": 0.3022, "step": 69425 }, { "epoch": 2.94, "grad_norm": 3.8650072216925864, "learning_rate": 1.2811787501166296e-08, "loss": 0.3128, "step": 69430 }, { "epoch": 2.94, "grad_norm": 4.051197051307589, "learning_rate": 1.27238622487319e-08, "loss": 0.3228, "step": 69435 }, { "epoch": 2.94, "grad_norm": 3.9562608580082426, "learning_rate": 1.2636239360159785e-08, "loss": 0.2825, "step": 69440 }, { "epoch": 2.94, "grad_norm": 4.860547114328133, "learning_rate": 1.254891884076126e-08, "loss": 0.3024, "step": 69445 }, { "epoch": 2.94, "grad_norm": 4.011063822615833, "learning_rate": 1.2461900695830976e-08, "loss": 0.3207, "step": 69450 }, { "epoch": 2.94, "grad_norm": 4.8483114025959955, "learning_rate": 1.2375184930644157e-08, "loss": 0.2928, "step": 69455 }, { "epoch": 2.94, "grad_norm": 3.81927638127644, "learning_rate": 1.2288771550458267e-08, "loss": 0.3094, "step": 69460 }, { "epoch": 2.94, "grad_norm": 3.824218051767776, "learning_rate": 1.2202660560512447e-08, "loss": 0.2972, "step": 69465 }, { "epoch": 2.94, "grad_norm": 3.5645699710634555, "learning_rate": 1.2116851966027521e-08, "loss": 0.2938, "step": 69470 }, { "epoch": 2.94, "grad_norm": 3.828378480856656, "learning_rate": 1.2031345772205439e-08, "loss": 0.3138, "step": 69475 }, { "epoch": 2.94, "grad_norm": 4.119706230422643, "learning_rate": 1.1946141984230386e-08, "loss": 0.3114, "step": 69480 }, { "epoch": 2.94, "grad_norm": 3.9064513654158826, "learning_rate": 1.1861240607268231e-08, "loss": 0.315, "step": 69485 }, { "epoch": 2.94, "grad_norm": 3.614202044562058, "learning_rate": 1.1776641646465413e-08, "loss": 0.3033, "step": 69490 }, { "epoch": 2.94, "grad_norm": 3.6173604711194085, "learning_rate": 1.1692345106951719e-08, "loss": 0.3226, "step": 69495 }, { "epoch": 2.94, "grad_norm": 4.123306587793768, "learning_rate": 1.1608350993837503e-08, "loss": 0.3178, "step": 69500 }, { "epoch": 2.94, "grad_norm": 3.877722454433807, "learning_rate": 1.1524659312215358e-08, "loss": 0.2859, "step": 69505 }, { "epoch": 2.94, "grad_norm": 4.543136355383514, "learning_rate": 1.1441270067159005e-08, "loss": 0.3104, "step": 69510 }, { "epoch": 2.94, "grad_norm": 3.7687363127494504, "learning_rate": 1.1358183263724398e-08, "loss": 0.2909, "step": 69515 }, { "epoch": 2.94, "grad_norm": 3.7110283912520132, "learning_rate": 1.127539890694862e-08, "loss": 0.2988, "step": 69520 }, { "epoch": 2.94, "grad_norm": 3.9257603308165203, "learning_rate": 1.1192917001849878e-08, "loss": 0.306, "step": 69525 }, { "epoch": 2.94, "grad_norm": 4.313500218721749, "learning_rate": 1.1110737553430284e-08, "loss": 0.3075, "step": 69530 }, { "epoch": 2.94, "grad_norm": 3.613242898220054, "learning_rate": 1.1028860566671407e-08, "loss": 0.3083, "step": 69535 }, { "epoch": 2.94, "grad_norm": 4.395869119550324, "learning_rate": 1.0947286046537053e-08, "loss": 0.279, "step": 69540 }, { "epoch": 2.94, "grad_norm": 3.6036246881065566, "learning_rate": 1.0866013997973268e-08, "loss": 0.2897, "step": 69545 }, { "epoch": 2.94, "grad_norm": 4.157828308995961, "learning_rate": 1.0785044425906666e-08, "loss": 0.3215, "step": 69550 }, { "epoch": 2.94, "grad_norm": 3.8275269253015995, "learning_rate": 1.0704377335246651e-08, "loss": 0.3141, "step": 69555 }, { "epoch": 2.94, "grad_norm": 3.8753952234786415, "learning_rate": 1.062401273088376e-08, "loss": 0.2764, "step": 69560 }, { "epoch": 2.94, "grad_norm": 4.574976864770501, "learning_rate": 1.0543950617690757e-08, "loss": 0.3163, "step": 69565 }, { "epoch": 2.94, "grad_norm": 3.927866906070172, "learning_rate": 1.0464191000520429e-08, "loss": 0.2931, "step": 69570 }, { "epoch": 2.94, "grad_norm": 4.902797342203316, "learning_rate": 1.0384733884209464e-08, "loss": 0.2911, "step": 69575 }, { "epoch": 2.94, "grad_norm": 3.6219406944057884, "learning_rate": 1.0305579273574562e-08, "loss": 0.2834, "step": 69580 }, { "epoch": 2.94, "grad_norm": 4.056491882381841, "learning_rate": 1.0226727173415218e-08, "loss": 0.3005, "step": 69585 }, { "epoch": 2.95, "grad_norm": 3.818981345711652, "learning_rate": 1.0148177588510944e-08, "loss": 0.2994, "step": 69590 }, { "epoch": 2.95, "grad_norm": 3.5204218446326014, "learning_rate": 1.0069930523624594e-08, "loss": 0.3004, "step": 69595 }, { "epoch": 2.95, "grad_norm": 4.667151481668382, "learning_rate": 9.991985983500152e-09, "loss": 0.3105, "step": 69600 }, { "epoch": 2.95, "grad_norm": 3.9494083497805264, "learning_rate": 9.914343972863284e-09, "loss": 0.3089, "step": 69605 }, { "epoch": 2.95, "grad_norm": 4.251667582732663, "learning_rate": 9.837004496420777e-09, "loss": 0.2791, "step": 69610 }, { "epoch": 2.95, "grad_norm": 3.8720616213283, "learning_rate": 9.75996755886166e-09, "loss": 0.3342, "step": 69615 }, { "epoch": 2.95, "grad_norm": 4.019648431242003, "learning_rate": 9.683233164856642e-09, "loss": 0.3115, "step": 69620 }, { "epoch": 2.95, "grad_norm": 4.218351982800034, "learning_rate": 9.606801319057557e-09, "loss": 0.2932, "step": 69625 }, { "epoch": 2.95, "grad_norm": 3.7110717126337334, "learning_rate": 9.530672026098476e-09, "loss": 0.3147, "step": 69630 }, { "epoch": 2.95, "grad_norm": 4.232999458684634, "learning_rate": 9.454845290594594e-09, "loss": 0.3157, "step": 69635 }, { "epoch": 2.95, "grad_norm": 3.984150720128455, "learning_rate": 9.379321117143903e-09, "loss": 0.3105, "step": 69640 }, { "epoch": 2.95, "grad_norm": 3.7238860231144164, "learning_rate": 9.304099510324404e-09, "loss": 0.307, "step": 69645 }, { "epoch": 2.95, "grad_norm": 4.515108910699393, "learning_rate": 9.229180474696342e-09, "loss": 0.3245, "step": 69650 }, { "epoch": 2.95, "grad_norm": 4.006913544022434, "learning_rate": 9.154564014802748e-09, "loss": 0.3072, "step": 69655 }, { "epoch": 2.95, "grad_norm": 3.9624418311113576, "learning_rate": 9.080250135166668e-09, "loss": 0.3102, "step": 69660 }, { "epoch": 2.95, "grad_norm": 3.7953230271194762, "learning_rate": 9.006238840293946e-09, "loss": 0.3116, "step": 69665 }, { "epoch": 2.95, "grad_norm": 3.294578477132442, "learning_rate": 8.932530134671547e-09, "loss": 0.3172, "step": 69670 }, { "epoch": 2.95, "grad_norm": 3.5860679718543684, "learning_rate": 8.859124022767562e-09, "loss": 0.3087, "step": 69675 }, { "epoch": 2.95, "grad_norm": 4.417211765666118, "learning_rate": 8.786020509033989e-09, "loss": 0.2921, "step": 69680 }, { "epoch": 2.95, "grad_norm": 3.833154357142647, "learning_rate": 8.713219597901169e-09, "loss": 0.339, "step": 69685 }, { "epoch": 2.95, "grad_norm": 3.7856616273749824, "learning_rate": 8.640721293783349e-09, "loss": 0.2923, "step": 69690 }, { "epoch": 2.95, "grad_norm": 4.2772075547614135, "learning_rate": 8.568525601076461e-09, "loss": 0.2996, "step": 69695 }, { "epoch": 2.95, "grad_norm": 3.8329761801228943, "learning_rate": 8.496632524157555e-09, "loss": 0.3057, "step": 69700 }, { "epoch": 2.95, "grad_norm": 4.444841685895909, "learning_rate": 8.42504206738426e-09, "loss": 0.3006, "step": 69705 }, { "epoch": 2.95, "grad_norm": 6.888675108817265, "learning_rate": 8.353754235097544e-09, "loss": 0.3017, "step": 69710 }, { "epoch": 2.95, "grad_norm": 3.4768943416676015, "learning_rate": 8.282769031619508e-09, "loss": 0.2969, "step": 69715 }, { "epoch": 2.95, "grad_norm": 3.7400112767247617, "learning_rate": 8.212086461253377e-09, "loss": 0.2887, "step": 69720 }, { "epoch": 2.95, "grad_norm": 3.8697640362474366, "learning_rate": 8.141706528285165e-09, "loss": 0.2567, "step": 69725 }, { "epoch": 2.95, "grad_norm": 3.8104736168045092, "learning_rate": 8.071629236980905e-09, "loss": 0.3104, "step": 69730 }, { "epoch": 2.95, "grad_norm": 4.050060493585424, "learning_rate": 8.001854591589419e-09, "loss": 0.2906, "step": 69735 }, { "epoch": 2.95, "grad_norm": 3.4199351512564786, "learning_rate": 7.932382596341215e-09, "loss": 0.3144, "step": 69740 }, { "epoch": 2.95, "grad_norm": 3.792928152768424, "learning_rate": 7.863213255448477e-09, "loss": 0.2983, "step": 69745 }, { "epoch": 2.95, "grad_norm": 4.479626224597136, "learning_rate": 7.794346573103407e-09, "loss": 0.2756, "step": 69750 }, { "epoch": 2.95, "grad_norm": 4.490161382552908, "learning_rate": 7.725782553482664e-09, "loss": 0.3428, "step": 69755 }, { "epoch": 2.95, "grad_norm": 3.6957980013622054, "learning_rate": 7.657521200742368e-09, "loss": 0.3251, "step": 69760 }, { "epoch": 2.95, "grad_norm": 4.010704039418363, "learning_rate": 7.589562519021432e-09, "loss": 0.3172, "step": 69765 }, { "epoch": 2.95, "grad_norm": 3.7259156914324674, "learning_rate": 7.521906512439337e-09, "loss": 0.2986, "step": 69770 }, { "epoch": 2.95, "grad_norm": 3.5170654959355, "learning_rate": 7.454553185098357e-09, "loss": 0.2944, "step": 69775 }, { "epoch": 2.95, "grad_norm": 4.023642729993144, "learning_rate": 7.387502541081892e-09, "loss": 0.3031, "step": 69780 }, { "epoch": 2.95, "grad_norm": 4.279968846870562, "learning_rate": 7.320754584454471e-09, "loss": 0.3121, "step": 69785 }, { "epoch": 2.95, "grad_norm": 3.7660069010111576, "learning_rate": 7.254309319263409e-09, "loss": 0.3027, "step": 69790 }, { "epoch": 2.95, "grad_norm": 4.006814122837297, "learning_rate": 7.188166749537151e-09, "loss": 0.3002, "step": 69795 }, { "epoch": 2.95, "grad_norm": 4.100640144034374, "learning_rate": 7.122326879285823e-09, "loss": 0.3046, "step": 69800 }, { "epoch": 2.95, "grad_norm": 3.9970810059400868, "learning_rate": 7.0567897125006776e-09, "loss": 0.3087, "step": 69805 }, { "epoch": 2.95, "grad_norm": 3.6903928157348114, "learning_rate": 6.991555253154647e-09, "loss": 0.2911, "step": 69810 }, { "epoch": 2.95, "grad_norm": 3.8697044706871493, "learning_rate": 6.926623505204011e-09, "loss": 0.3021, "step": 69815 }, { "epoch": 2.95, "grad_norm": 3.7077036560136936, "learning_rate": 6.861994472583955e-09, "loss": 0.2905, "step": 69820 }, { "epoch": 2.96, "grad_norm": 4.427155678616963, "learning_rate": 6.797668159213566e-09, "loss": 0.3276, "step": 69825 }, { "epoch": 2.96, "grad_norm": 3.656688913084691, "learning_rate": 6.733644568992504e-09, "loss": 0.286, "step": 69830 }, { "epoch": 2.96, "grad_norm": 3.810381098672481, "learning_rate": 6.669923705802661e-09, "loss": 0.3355, "step": 69835 }, { "epoch": 2.96, "grad_norm": 5.73562289250491, "learning_rate": 6.606505573507061e-09, "loss": 0.3352, "step": 69840 }, { "epoch": 2.96, "grad_norm": 4.162131414677388, "learning_rate": 6.543390175950403e-09, "loss": 0.2928, "step": 69845 }, { "epoch": 2.96, "grad_norm": 4.180381988029838, "learning_rate": 6.480577516959075e-09, "loss": 0.3311, "step": 69850 }, { "epoch": 2.96, "grad_norm": 4.390160790673757, "learning_rate": 6.4180676003416934e-09, "loss": 0.3147, "step": 69855 }, { "epoch": 2.96, "grad_norm": 3.94940857299616, "learning_rate": 6.355860429888005e-09, "loss": 0.2972, "step": 69860 }, { "epoch": 2.96, "grad_norm": 4.119850657798943, "learning_rate": 6.293956009368884e-09, "loss": 0.324, "step": 69865 }, { "epoch": 2.96, "grad_norm": 4.404943789149675, "learning_rate": 6.232354342537994e-09, "loss": 0.2992, "step": 69870 }, { "epoch": 2.96, "grad_norm": 3.9180760997053916, "learning_rate": 6.171055433129569e-09, "loss": 0.2868, "step": 69875 }, { "epoch": 2.96, "grad_norm": 4.049237073605573, "learning_rate": 6.1100592848600815e-09, "loss": 0.306, "step": 69880 }, { "epoch": 2.96, "grad_norm": 3.5391050094942385, "learning_rate": 6.049365901428239e-09, "loss": 0.3345, "step": 69885 }, { "epoch": 2.96, "grad_norm": 3.9878637425883547, "learning_rate": 5.988975286512766e-09, "loss": 0.2884, "step": 69890 }, { "epoch": 2.96, "grad_norm": 3.940702798714988, "learning_rate": 5.9288874437757324e-09, "loss": 0.2929, "step": 69895 }, { "epoch": 2.96, "grad_norm": 4.21448847756659, "learning_rate": 5.869102376859781e-09, "loss": 0.2813, "step": 69900 }, { "epoch": 2.96, "grad_norm": 3.7555692821779125, "learning_rate": 5.809620089388679e-09, "loss": 0.3041, "step": 69905 }, { "epoch": 2.96, "grad_norm": 3.7219221498414776, "learning_rate": 5.750440584970096e-09, "loss": 0.2812, "step": 69910 }, { "epoch": 2.96, "grad_norm": 4.650666089125777, "learning_rate": 5.691563867191163e-09, "loss": 0.3111, "step": 69915 }, { "epoch": 2.96, "grad_norm": 3.874403218996133, "learning_rate": 5.6329899396218024e-09, "loss": 0.3038, "step": 69920 }, { "epoch": 2.96, "grad_norm": 4.121525418993446, "learning_rate": 5.574718805812507e-09, "loss": 0.3037, "step": 69925 }, { "epoch": 2.96, "grad_norm": 3.7098169244371006, "learning_rate": 5.516750469296561e-09, "loss": 0.2875, "step": 69930 }, { "epoch": 2.96, "grad_norm": 4.29946152871183, "learning_rate": 5.459084933587822e-09, "loss": 0.3102, "step": 69935 }, { "epoch": 2.96, "grad_norm": 4.307436220250806, "learning_rate": 5.4017222021834906e-09, "loss": 0.3029, "step": 69940 }, { "epoch": 2.96, "grad_norm": 4.8625532152485755, "learning_rate": 5.344662278560231e-09, "loss": 0.3005, "step": 69945 }, { "epoch": 2.96, "grad_norm": 3.93674031209828, "learning_rate": 5.287905166178053e-09, "loss": 0.3102, "step": 69950 }, { "epoch": 2.96, "grad_norm": 4.608890140294212, "learning_rate": 5.231450868478094e-09, "loss": 0.2946, "step": 69955 }, { "epoch": 2.96, "grad_norm": 3.9659831383934794, "learning_rate": 5.17529938888206e-09, "loss": 0.3024, "step": 69960 }, { "epoch": 2.96, "grad_norm": 3.769673161955406, "learning_rate": 5.119450730795561e-09, "loss": 0.3019, "step": 69965 }, { "epoch": 2.96, "grad_norm": 5.009075825119275, "learning_rate": 5.063904897603666e-09, "loss": 0.3052, "step": 69970 }, { "epoch": 2.96, "grad_norm": 4.7758522555049785, "learning_rate": 5.008661892673683e-09, "loss": 0.2945, "step": 69975 }, { "epoch": 2.96, "grad_norm": 3.8244012773921563, "learning_rate": 4.9537217193557085e-09, "loss": 0.2857, "step": 69980 }, { "epoch": 2.96, "grad_norm": 4.232909999497438, "learning_rate": 4.899084380980412e-09, "loss": 0.3078, "step": 69985 }, { "epoch": 2.96, "grad_norm": 4.318898272923827, "learning_rate": 4.844749880859589e-09, "loss": 0.3069, "step": 69990 }, { "epoch": 2.96, "grad_norm": 3.8983974547088533, "learning_rate": 4.790718222287827e-09, "loss": 0.2935, "step": 69995 }, { "epoch": 2.96, "grad_norm": 3.8579075172114634, "learning_rate": 4.736989408541392e-09, "loss": 0.2882, "step": 70000 }, { "epoch": 2.96, "grad_norm": 4.177278943671146, "learning_rate": 4.683563442877126e-09, "loss": 0.3075, "step": 70005 }, { "epoch": 2.96, "grad_norm": 3.345862178897866, "learning_rate": 4.630440328534103e-09, "loss": 0.3034, "step": 70010 }, { "epoch": 2.96, "grad_norm": 3.839523640507528, "learning_rate": 4.577620068733079e-09, "loss": 0.2986, "step": 70015 }, { "epoch": 2.96, "grad_norm": 4.087982010295637, "learning_rate": 4.525102666676495e-09, "loss": 0.3092, "step": 70020 }, { "epoch": 2.96, "grad_norm": 3.8927770936361377, "learning_rate": 4.472888125548469e-09, "loss": 0.2869, "step": 70025 }, { "epoch": 2.96, "grad_norm": 3.9048928132621006, "learning_rate": 4.4209764485142474e-09, "loss": 0.2857, "step": 70030 }, { "epoch": 2.96, "grad_norm": 3.818364739173185, "learning_rate": 4.369367638721311e-09, "loss": 0.3068, "step": 70035 }, { "epoch": 2.96, "grad_norm": 3.7925570106816706, "learning_rate": 4.3180616992988255e-09, "loss": 0.2796, "step": 70040 }, { "epoch": 2.96, "grad_norm": 4.31552668830435, "learning_rate": 4.267058633356525e-09, "loss": 0.2887, "step": 70045 }, { "epoch": 2.96, "grad_norm": 4.524636109612765, "learning_rate": 4.216358443986934e-09, "loss": 0.3232, "step": 70050 }, { "epoch": 2.96, "grad_norm": 4.214091939910933, "learning_rate": 4.165961134264263e-09, "loss": 0.3072, "step": 70055 }, { "epoch": 2.97, "grad_norm": 3.81128564753025, "learning_rate": 4.115866707243843e-09, "loss": 0.2762, "step": 70060 }, { "epoch": 2.97, "grad_norm": 3.8822172031713955, "learning_rate": 4.0660751659621355e-09, "loss": 0.3037, "step": 70065 }, { "epoch": 2.97, "grad_norm": 4.107891610460311, "learning_rate": 4.016586513438392e-09, "loss": 0.3002, "step": 70070 }, { "epoch": 2.97, "grad_norm": 3.6077395429264008, "learning_rate": 3.96740075267299e-09, "loss": 0.3093, "step": 70075 }, { "epoch": 2.97, "grad_norm": 3.8219143428417977, "learning_rate": 3.918517886647988e-09, "loss": 0.3128, "step": 70080 }, { "epoch": 2.97, "grad_norm": 4.14871013347077, "learning_rate": 3.869937918326572e-09, "loss": 0.3155, "step": 70085 }, { "epoch": 2.97, "grad_norm": 4.473790800532902, "learning_rate": 3.821660850654163e-09, "loss": 0.3217, "step": 70090 }, { "epoch": 2.97, "grad_norm": 4.1128048361330425, "learning_rate": 3.773686686557865e-09, "loss": 0.3005, "step": 70095 }, { "epoch": 2.97, "grad_norm": 3.867898629450061, "learning_rate": 3.726015428945906e-09, "loss": 0.2873, "step": 70100 }, { "epoch": 2.97, "grad_norm": 3.6937437124934593, "learning_rate": 3.6786470807087525e-09, "loss": 0.2965, "step": 70105 }, { "epoch": 2.97, "grad_norm": 4.174590403505056, "learning_rate": 3.6315816447179965e-09, "loss": 0.3127, "step": 70110 }, { "epoch": 2.97, "grad_norm": 3.8031941633379276, "learning_rate": 3.5848191238274654e-09, "loss": 0.2737, "step": 70115 }, { "epoch": 2.97, "grad_norm": 3.8379302537153204, "learning_rate": 3.5383595208715594e-09, "loss": 0.2918, "step": 70120 }, { "epoch": 2.97, "grad_norm": 4.29588383567088, "learning_rate": 3.492202838667469e-09, "loss": 0.3313, "step": 70125 }, { "epoch": 2.97, "grad_norm": 4.041925819341384, "learning_rate": 3.4463490800135114e-09, "loss": 0.3034, "step": 70130 }, { "epoch": 2.97, "grad_norm": 4.560977162463394, "learning_rate": 3.400798247689685e-09, "loss": 0.3006, "step": 70135 }, { "epoch": 2.97, "grad_norm": 3.475642592477241, "learning_rate": 3.3555503444571146e-09, "loss": 0.3306, "step": 70140 }, { "epoch": 2.97, "grad_norm": 3.634522028690794, "learning_rate": 3.3106053730597165e-09, "loss": 0.3116, "step": 70145 }, { "epoch": 2.97, "grad_norm": 4.597345494503631, "learning_rate": 3.2659633362219776e-09, "loss": 0.3139, "step": 70150 }, { "epoch": 2.97, "grad_norm": 3.777776404919409, "learning_rate": 3.221624236650622e-09, "loss": 0.3198, "step": 70155 }, { "epoch": 2.97, "grad_norm": 3.8467046863581977, "learning_rate": 3.1775880770335e-09, "loss": 0.2871, "step": 70160 }, { "epoch": 2.97, "grad_norm": 3.566164710985339, "learning_rate": 3.133854860040697e-09, "loss": 0.2811, "step": 70165 }, { "epoch": 2.97, "grad_norm": 3.92608062355234, "learning_rate": 3.0904245883234262e-09, "loss": 0.3403, "step": 70170 }, { "epoch": 2.97, "grad_norm": 4.446020332177124, "learning_rate": 3.0472972645151366e-09, "loss": 0.3349, "step": 70175 }, { "epoch": 2.97, "grad_norm": 4.291256179270392, "learning_rate": 3.0044728912298483e-09, "loss": 0.2996, "step": 70180 }, { "epoch": 2.97, "grad_norm": 3.9132330612977824, "learning_rate": 2.961951471064373e-09, "loss": 0.3061, "step": 70185 }, { "epoch": 2.97, "grad_norm": 3.553729955094563, "learning_rate": 2.919733006596648e-09, "loss": 0.2759, "step": 70190 }, { "epoch": 2.97, "grad_norm": 3.4775475580804556, "learning_rate": 2.877817500385738e-09, "loss": 0.294, "step": 70195 }, { "epoch": 2.97, "grad_norm": 3.9211644211674526, "learning_rate": 2.8362049549734984e-09, "loss": 0.3105, "step": 70200 }, { "epoch": 2.97, "grad_norm": 3.7123482755314425, "learning_rate": 2.7948953728829107e-09, "loss": 0.2922, "step": 70205 }, { "epoch": 2.97, "grad_norm": 4.5129504433481875, "learning_rate": 2.7538887566175288e-09, "loss": 0.3139, "step": 70210 }, { "epoch": 2.97, "grad_norm": 3.472807759825269, "learning_rate": 2.7131851086642515e-09, "loss": 0.3273, "step": 70215 }, { "epoch": 2.97, "grad_norm": 4.115108764837657, "learning_rate": 2.6727844314905493e-09, "loss": 0.3171, "step": 70220 }, { "epoch": 2.97, "grad_norm": 3.9345346837315036, "learning_rate": 2.632686727545575e-09, "loss": 0.2842, "step": 70225 }, { "epoch": 2.97, "grad_norm": 3.4657165417155484, "learning_rate": 2.592891999260716e-09, "loss": 0.304, "step": 70230 }, { "epoch": 2.97, "grad_norm": 3.6797386700992107, "learning_rate": 2.553400249048488e-09, "loss": 0.2895, "step": 70235 }, { "epoch": 2.97, "grad_norm": 3.5680159930287014, "learning_rate": 2.514211479303086e-09, "loss": 0.2937, "step": 70240 }, { "epoch": 2.97, "grad_norm": 4.2110032538823585, "learning_rate": 2.4753256924003876e-09, "loss": 0.3058, "step": 70245 }, { "epoch": 2.97, "grad_norm": 3.7420552814800745, "learning_rate": 2.4367428906985067e-09, "loss": 0.3185, "step": 70250 }, { "epoch": 2.97, "grad_norm": 4.475126468660208, "learning_rate": 2.3984630765355733e-09, "loss": 0.3011, "step": 70255 }, { "epoch": 2.97, "grad_norm": 4.71884856987223, "learning_rate": 2.360486252233063e-09, "loss": 0.3268, "step": 70260 }, { "epoch": 2.97, "grad_norm": 3.834630901361669, "learning_rate": 2.3228124200930237e-09, "loss": 0.3095, "step": 70265 }, { "epoch": 2.97, "grad_norm": 4.308866744044423, "learning_rate": 2.285441582400294e-09, "loss": 0.3086, "step": 70270 }, { "epoch": 2.97, "grad_norm": 4.508513445780656, "learning_rate": 2.2483737414197292e-09, "loss": 0.292, "step": 70275 }, { "epoch": 2.97, "grad_norm": 4.197499560277858, "learning_rate": 2.211608899398421e-09, "loss": 0.3177, "step": 70280 }, { "epoch": 2.97, "grad_norm": 4.033360743806799, "learning_rate": 2.1751470585662516e-09, "loss": 0.3177, "step": 70285 }, { "epoch": 2.97, "grad_norm": 4.046989229747052, "learning_rate": 2.1389882211336752e-09, "loss": 0.319, "step": 70290 }, { "epoch": 2.98, "grad_norm": 3.8764286349236907, "learning_rate": 2.103132389292273e-09, "loss": 0.293, "step": 70295 }, { "epoch": 2.98, "grad_norm": 3.5827714786633127, "learning_rate": 2.0675795652158603e-09, "loss": 0.322, "step": 70300 }, { "epoch": 2.98, "grad_norm": 3.893423812906061, "learning_rate": 2.032329751060491e-09, "loss": 0.3038, "step": 70305 }, { "epoch": 2.98, "grad_norm": 3.8297518258550536, "learning_rate": 1.9973829489627892e-09, "loss": 0.3056, "step": 70310 }, { "epoch": 2.98, "grad_norm": 3.5853639957448995, "learning_rate": 1.9627391610416158e-09, "loss": 0.2799, "step": 70315 }, { "epoch": 2.98, "grad_norm": 3.907066856644201, "learning_rate": 1.9283983893975124e-09, "loss": 0.3252, "step": 70320 }, { "epoch": 2.98, "grad_norm": 3.801238706118709, "learning_rate": 1.894360636112702e-09, "loss": 0.279, "step": 70325 }, { "epoch": 2.98, "grad_norm": 3.9912177166410925, "learning_rate": 1.8606259032499795e-09, "loss": 0.2974, "step": 70330 }, { "epoch": 2.98, "grad_norm": 3.770227814867083, "learning_rate": 1.8271941928549308e-09, "loss": 0.2839, "step": 70335 }, { "epoch": 2.98, "grad_norm": 3.6590503174626754, "learning_rate": 1.794065506954823e-09, "loss": 0.3117, "step": 70340 }, { "epoch": 2.98, "grad_norm": 3.7252534169698523, "learning_rate": 1.7612398475574944e-09, "loss": 0.3315, "step": 70345 }, { "epoch": 2.98, "grad_norm": 4.209130209125072, "learning_rate": 1.7287172166535749e-09, "loss": 0.3135, "step": 70350 }, { "epoch": 2.98, "grad_norm": 3.976178900451546, "learning_rate": 1.6964976162142655e-09, "loss": 0.3134, "step": 70355 }, { "epoch": 2.98, "grad_norm": 3.9863273489235915, "learning_rate": 1.6645810481935587e-09, "loss": 0.2841, "step": 70360 }, { "epoch": 2.98, "grad_norm": 3.869330246996778, "learning_rate": 1.6329675145265733e-09, "loss": 0.3158, "step": 70365 }, { "epoch": 2.98, "grad_norm": 4.35833733446752, "learning_rate": 1.6016570171289992e-09, "loss": 0.3101, "step": 70370 }, { "epoch": 2.98, "grad_norm": 4.073039873960955, "learning_rate": 1.5706495579004277e-09, "loss": 0.3016, "step": 70375 }, { "epoch": 2.98, "grad_norm": 3.881411117558849, "learning_rate": 1.5399451387193564e-09, "loss": 0.3121, "step": 70380 }, { "epoch": 2.98, "grad_norm": 3.572663016631878, "learning_rate": 1.5095437614481845e-09, "loss": 0.3114, "step": 70385 }, { "epoch": 2.98, "grad_norm": 3.8428242356451223, "learning_rate": 1.479445427929882e-09, "loss": 0.2839, "step": 70390 }, { "epoch": 2.98, "grad_norm": 3.7496785901952943, "learning_rate": 1.4496501399891005e-09, "loss": 0.3187, "step": 70395 }, { "epoch": 2.98, "grad_norm": 4.228475694261304, "learning_rate": 1.4201578994327282e-09, "loss": 0.3016, "step": 70400 }, { "epoch": 2.98, "grad_norm": 3.8454342868258684, "learning_rate": 1.3909687080476685e-09, "loss": 0.3038, "step": 70405 }, { "epoch": 2.98, "grad_norm": 4.019206989000221, "learning_rate": 1.3620825676047277e-09, "loss": 0.3154, "step": 70410 }, { "epoch": 2.98, "grad_norm": 3.6259399780735695, "learning_rate": 1.3334994798547274e-09, "loss": 0.31, "step": 70415 }, { "epoch": 2.98, "grad_norm": 3.9160310875698108, "learning_rate": 1.3052194465307255e-09, "loss": 0.3067, "step": 70420 }, { "epoch": 2.98, "grad_norm": 3.5944950592481257, "learning_rate": 1.2772424693469064e-09, "loss": 0.3032, "step": 70425 }, { "epoch": 2.98, "grad_norm": 4.061033428497966, "learning_rate": 1.2495685499996913e-09, "loss": 0.2841, "step": 70430 }, { "epoch": 2.98, "grad_norm": 4.416399802931741, "learning_rate": 1.2221976901666267e-09, "loss": 0.3004, "step": 70435 }, { "epoch": 2.98, "grad_norm": 4.348335494132366, "learning_rate": 1.195129891508051e-09, "loss": 0.3018, "step": 70440 }, { "epoch": 2.98, "grad_norm": 4.033180939001871, "learning_rate": 1.1683651556637643e-09, "loss": 0.2999, "step": 70445 }, { "epoch": 2.98, "grad_norm": 3.6444732632307626, "learning_rate": 1.141903484256912e-09, "loss": 0.2747, "step": 70450 }, { "epoch": 2.98, "grad_norm": 3.6395846932311007, "learning_rate": 1.115744878891767e-09, "loss": 0.2529, "step": 70455 }, { "epoch": 2.98, "grad_norm": 4.072445173695659, "learning_rate": 1.0898893411542822e-09, "loss": 0.3001, "step": 70460 }, { "epoch": 2.98, "grad_norm": 4.1117817542443404, "learning_rate": 1.0643368726126479e-09, "loss": 0.3257, "step": 70465 }, { "epoch": 2.98, "grad_norm": 3.964724979286409, "learning_rate": 1.0390874748145153e-09, "loss": 0.3136, "step": 70470 }, { "epoch": 2.98, "grad_norm": 4.242072867810214, "learning_rate": 1.0141411492919916e-09, "loss": 0.3266, "step": 70475 }, { "epoch": 2.98, "grad_norm": 3.910976360279106, "learning_rate": 9.894978975572011e-10, "loss": 0.3071, "step": 70480 }, { "epoch": 2.98, "grad_norm": 3.9953559913131897, "learning_rate": 9.651577211039486e-10, "loss": 0.3016, "step": 70485 }, { "epoch": 2.98, "grad_norm": 3.8049983620172707, "learning_rate": 9.411206214082758e-10, "loss": 0.3099, "step": 70490 }, { "epoch": 2.98, "grad_norm": 3.9207910434479607, "learning_rate": 9.173865999273501e-10, "loss": 0.2862, "step": 70495 }, { "epoch": 2.98, "grad_norm": 4.29459758305129, "learning_rate": 8.939556580994657e-10, "loss": 0.3042, "step": 70500 }, { "epoch": 2.98, "grad_norm": 3.752689334246048, "learning_rate": 8.708277973462631e-10, "loss": 0.2877, "step": 70505 }, { "epoch": 2.98, "grad_norm": 3.4257914947500425, "learning_rate": 8.480030190688438e-10, "loss": 0.3088, "step": 70510 }, { "epoch": 2.98, "grad_norm": 3.9799008207804905, "learning_rate": 8.254813246516558e-10, "loss": 0.293, "step": 70515 }, { "epoch": 2.98, "grad_norm": 3.8469021372194026, "learning_rate": 8.032627154602735e-10, "loss": 0.3099, "step": 70520 }, { "epoch": 2.98, "grad_norm": 4.470557347177352, "learning_rate": 7.813471928413973e-10, "loss": 0.2897, "step": 70525 }, { "epoch": 2.98, "grad_norm": 3.6365721921530403, "learning_rate": 7.597347581234094e-10, "loss": 0.2916, "step": 70530 }, { "epoch": 2.99, "grad_norm": 3.8579595490789367, "learning_rate": 7.384254126169277e-10, "loss": 0.3067, "step": 70535 }, { "epoch": 2.99, "grad_norm": 4.04075518314832, "learning_rate": 7.17419157614252e-10, "loss": 0.2903, "step": 70540 }, { "epoch": 2.99, "grad_norm": 3.753720291366955, "learning_rate": 6.96715994388808e-10, "loss": 0.2924, "step": 70545 }, { "epoch": 2.99, "grad_norm": 4.006901122571129, "learning_rate": 6.763159241951478e-10, "loss": 0.3253, "step": 70550 }, { "epoch": 2.99, "grad_norm": 3.757779331675829, "learning_rate": 6.56218948270615e-10, "loss": 0.2922, "step": 70555 }, { "epoch": 2.99, "grad_norm": 3.732034919181486, "learning_rate": 6.364250678331241e-10, "loss": 0.298, "step": 70560 }, { "epoch": 2.99, "grad_norm": 4.323401385440728, "learning_rate": 6.169342840833814e-10, "loss": 0.2831, "step": 70565 }, { "epoch": 2.99, "grad_norm": 3.7095818812359633, "learning_rate": 5.977465982026642e-10, "loss": 0.3079, "step": 70570 }, { "epoch": 2.99, "grad_norm": 4.4984531606362586, "learning_rate": 5.788620113544863e-10, "loss": 0.3125, "step": 70575 }, { "epoch": 2.99, "grad_norm": 3.9580511462286676, "learning_rate": 5.602805246834875e-10, "loss": 0.3245, "step": 70580 }, { "epoch": 2.99, "grad_norm": 3.59066109035715, "learning_rate": 5.420021393165442e-10, "loss": 0.2978, "step": 70585 }, { "epoch": 2.99, "grad_norm": 3.665580764167094, "learning_rate": 5.240268563611039e-10, "loss": 0.2773, "step": 70590 }, { "epoch": 2.99, "grad_norm": 3.7402880381343766, "learning_rate": 5.063546769079608e-10, "loss": 0.3026, "step": 70595 }, { "epoch": 2.99, "grad_norm": 4.038462855292636, "learning_rate": 4.889856020279249e-10, "loss": 0.299, "step": 70600 }, { "epoch": 2.99, "grad_norm": 3.8905543612946873, "learning_rate": 4.719196327740427e-10, "loss": 0.2878, "step": 70605 }, { "epoch": 2.99, "grad_norm": 4.078318973530521, "learning_rate": 4.551567701810422e-10, "loss": 0.3115, "step": 70610 }, { "epoch": 2.99, "grad_norm": 4.047447722816276, "learning_rate": 4.386970152653325e-10, "loss": 0.3015, "step": 70615 }, { "epoch": 2.99, "grad_norm": 3.6460754802776254, "learning_rate": 4.2254036902500404e-10, "loss": 0.3162, "step": 70620 }, { "epoch": 2.99, "grad_norm": 3.8134927567707453, "learning_rate": 4.0668683243927364e-10, "loss": 0.3091, "step": 70625 }, { "epoch": 2.99, "grad_norm": 3.9807379984830185, "learning_rate": 3.911364064690393e-10, "loss": 0.3061, "step": 70630 }, { "epoch": 2.99, "grad_norm": 4.252773462840397, "learning_rate": 3.758890920574354e-10, "loss": 0.2951, "step": 70635 }, { "epoch": 2.99, "grad_norm": 3.5370726243380965, "learning_rate": 3.609448901287227e-10, "loss": 0.2892, "step": 70640 }, { "epoch": 2.99, "grad_norm": 4.080216421472142, "learning_rate": 3.4630380158939824e-10, "loss": 0.3099, "step": 70645 }, { "epoch": 2.99, "grad_norm": 3.9873977651378483, "learning_rate": 3.3196582732653026e-10, "loss": 0.2763, "step": 70650 }, { "epoch": 2.99, "grad_norm": 4.05471264951897, "learning_rate": 3.1793096820942336e-10, "loss": 0.3037, "step": 70655 }, { "epoch": 2.99, "grad_norm": 3.888577614060574, "learning_rate": 3.041992250890635e-10, "loss": 0.2928, "step": 70660 }, { "epoch": 2.99, "grad_norm": 4.247596436763597, "learning_rate": 2.90770598798118e-10, "loss": 0.3139, "step": 70665 }, { "epoch": 2.99, "grad_norm": 4.316810081793586, "learning_rate": 2.776450901503802e-10, "loss": 0.3065, "step": 70670 }, { "epoch": 2.99, "grad_norm": 3.943195477001331, "learning_rate": 2.6482269994243526e-10, "loss": 0.2867, "step": 70675 }, { "epoch": 2.99, "grad_norm": 3.7491797138481315, "learning_rate": 2.52303428950329e-10, "loss": 0.2909, "step": 70680 }, { "epoch": 2.99, "grad_norm": 4.1464391544718096, "learning_rate": 2.400872779345642e-10, "loss": 0.305, "step": 70685 }, { "epoch": 2.99, "grad_norm": 3.851233546529492, "learning_rate": 2.2817424763454943e-10, "loss": 0.2901, "step": 70690 }, { "epoch": 2.99, "grad_norm": 4.076400367655774, "learning_rate": 2.1656433877303984e-10, "loss": 0.3076, "step": 70695 }, { "epoch": 2.99, "grad_norm": 3.7909089798085005, "learning_rate": 2.0525755205447194e-10, "loss": 0.3248, "step": 70700 }, { "epoch": 2.99, "grad_norm": 4.037527495697327, "learning_rate": 1.9425388816329827e-10, "loss": 0.3162, "step": 70705 }, { "epoch": 2.99, "grad_norm": 3.7089612532748295, "learning_rate": 1.8355334776676283e-10, "loss": 0.2941, "step": 70710 }, { "epoch": 2.99, "grad_norm": 4.100527845485909, "learning_rate": 1.7315593151434606e-10, "loss": 0.2922, "step": 70715 }, { "epoch": 2.99, "grad_norm": 3.605224103909242, "learning_rate": 1.6306164003609958e-10, "loss": 0.2957, "step": 70720 }, { "epoch": 2.99, "grad_norm": 4.453543295072825, "learning_rate": 1.5327047394375627e-10, "loss": 0.2985, "step": 70725 }, { "epoch": 2.99, "grad_norm": 3.8118576540502134, "learning_rate": 1.4378243383128544e-10, "loss": 0.3098, "step": 70730 }, { "epoch": 2.99, "grad_norm": 4.147331427768618, "learning_rate": 1.345975202737826e-10, "loss": 0.3117, "step": 70735 }, { "epoch": 2.99, "grad_norm": 4.602903411192248, "learning_rate": 1.257157338274695e-10, "loss": 0.3134, "step": 70740 }, { "epoch": 2.99, "grad_norm": 3.6822477208693494, "learning_rate": 1.171370750319145e-10, "loss": 0.2922, "step": 70745 }, { "epoch": 2.99, "grad_norm": 3.380180310217267, "learning_rate": 1.0886154440670205e-10, "loss": 0.2834, "step": 70750 }, { "epoch": 2.99, "grad_norm": 3.403085142580083, "learning_rate": 1.0088914245309778e-10, "loss": 0.3142, "step": 70755 }, { "epoch": 2.99, "grad_norm": 3.713778823815118, "learning_rate": 9.321986965515894e-11, "loss": 0.2854, "step": 70760 }, { "epoch": 2.99, "grad_norm": 3.9908660392139175, "learning_rate": 8.5853726478069e-11, "loss": 0.2888, "step": 70765 }, { "epoch": 3.0, "grad_norm": 3.7075799983838698, "learning_rate": 7.879071336702738e-11, "loss": 0.2976, "step": 70770 }, { "epoch": 3.0, "grad_norm": 3.982746880921896, "learning_rate": 7.203083075169037e-11, "loss": 0.2981, "step": 70775 }, { "epoch": 3.0, "grad_norm": 4.82154141933062, "learning_rate": 6.55740790417303e-11, "loss": 0.3133, "step": 70780 }, { "epoch": 3.0, "grad_norm": 3.869583535042438, "learning_rate": 5.942045862739055e-11, "loss": 0.2852, "step": 70785 }, { "epoch": 3.0, "grad_norm": 3.918289435262239, "learning_rate": 5.3569969883371376e-11, "loss": 0.287, "step": 70790 }, { "epoch": 3.0, "grad_norm": 3.846698890372857, "learning_rate": 4.802261316272372e-11, "loss": 0.3282, "step": 70795 }, { "epoch": 3.0, "grad_norm": 3.969683298009337, "learning_rate": 4.2778388803510484e-11, "loss": 0.2916, "step": 70800 }, { "epoch": 3.0, "grad_norm": 3.767344262973072, "learning_rate": 3.7837297122145236e-11, "loss": 0.3059, "step": 70805 }, { "epoch": 3.0, "grad_norm": 3.612186813738805, "learning_rate": 3.3199338418943294e-11, "loss": 0.2826, "step": 70810 }, { "epoch": 3.0, "grad_norm": 3.513144080364581, "learning_rate": 2.8864512975346204e-11, "loss": 0.3038, "step": 70815 }, { "epoch": 3.0, "grad_norm": 3.4086362770214045, "learning_rate": 2.4832821053366597e-11, "loss": 0.3084, "step": 70820 }, { "epoch": 3.0, "grad_norm": 4.614494861743323, "learning_rate": 2.1104262898363758e-11, "loss": 0.3175, "step": 70825 }, { "epoch": 3.0, "grad_norm": 3.819530642605835, "learning_rate": 1.7678838735157854e-11, "loss": 0.2838, "step": 70830 }, { "epoch": 3.0, "grad_norm": 3.7057447575294216, "learning_rate": 1.4556548772470813e-11, "loss": 0.3005, "step": 70835 }, { "epoch": 3.0, "grad_norm": 3.777074680632009, "learning_rate": 1.173739319959566e-11, "loss": 0.2786, "step": 70840 }, { "epoch": 3.0, "grad_norm": 3.9879301558525824, "learning_rate": 9.221372186951627e-12, "loss": 0.2998, "step": 70845 }, { "epoch": 3.0, "grad_norm": 3.61498691886502, "learning_rate": 7.008485887194383e-12, "loss": 0.3116, "step": 70850 }, { "epoch": 3.0, "grad_norm": 4.217555785581664, "learning_rate": 5.098734434660912e-12, "loss": 0.3335, "step": 70855 }, { "epoch": 3.0, "grad_norm": 4.419913126093161, "learning_rate": 3.4921179448144103e-12, "loss": 0.3083, "step": 70860 }, { "epoch": 3.0, "grad_norm": 3.7644958113238682, "learning_rate": 2.1886365153545032e-12, "loss": 0.3034, "step": 70865 }, { "epoch": 3.0, "grad_norm": 5.072162291473488, "learning_rate": 1.1882902251070249e-12, "loss": 0.278, "step": 70870 }, { "epoch": 3.0, "grad_norm": 3.9881709860328476, "learning_rate": 4.910791351342425e-13, "loss": 0.2982, "step": 70875 }, { "epoch": 3.0, "grad_norm": 3.951610246893262, "learning_rate": 9.700328706951923e-14, "loss": 0.2976, "step": 70880 }, { "epoch": 3.0, "eval_loss": 0.8875364065170288, "eval_runtime": 37.0526, "eval_samples_per_second": 31.55, "eval_steps_per_second": 3.967, "step": 70884 }, { "epoch": 3.0, "step": 70884, "total_flos": 2440295425769472.0, "train_loss": 0.81574390725434, "train_runtime": 170799.3559, "train_samples_per_second": 6.64, "train_steps_per_second": 0.415 } ], "logging_steps": 5, "max_steps": 70884, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 2440295425769472.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }