{ "best_metric": 1.1968414783477783, "best_model_checkpoint": "./results/models/checkpoint-307584", "epoch": 16.0, "eval_steps": 500, "global_step": 307584, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.0019989596337910945, "loss": 2.0581, "step": 500 }, { "epoch": 0.05, "learning_rate": 0.001997919267582189, "loss": 1.5829, "step": 1000 }, { "epoch": 0.08, "learning_rate": 0.0019968789013732834, "loss": 1.5037, "step": 1500 }, { "epoch": 0.1, "learning_rate": 0.001995838535164378, "loss": 1.4638, "step": 2000 }, { "epoch": 0.13, "learning_rate": 0.0019947981689554723, "loss": 1.4347, "step": 2500 }, { "epoch": 0.16, "learning_rate": 0.001993757802746567, "loss": 1.4135, "step": 3000 }, { "epoch": 0.18, "learning_rate": 0.0019927174365376612, "loss": 1.3959, "step": 3500 }, { "epoch": 0.21, "learning_rate": 0.0019916770703287557, "loss": 1.3816, "step": 4000 }, { "epoch": 0.23, "learning_rate": 0.00199063670411985, "loss": 1.3715, "step": 4500 }, { "epoch": 0.26, "learning_rate": 0.0019895963379109446, "loss": 1.3634, "step": 5000 }, { "epoch": 0.29, "learning_rate": 0.001988555971702039, "loss": 1.354, "step": 5500 }, { "epoch": 0.31, "learning_rate": 0.0019875156054931335, "loss": 1.3475, "step": 6000 }, { "epoch": 0.34, "learning_rate": 0.001986475239284228, "loss": 1.3392, "step": 6500 }, { "epoch": 0.36, "learning_rate": 0.0019854348730753224, "loss": 1.3336, "step": 7000 }, { "epoch": 0.39, "learning_rate": 0.001984394506866417, "loss": 1.3283, "step": 7500 }, { "epoch": 0.42, "learning_rate": 0.0019833541406575114, "loss": 1.3234, "step": 8000 }, { "epoch": 0.44, "learning_rate": 0.001982313774448606, "loss": 1.321, "step": 8500 }, { "epoch": 0.47, "learning_rate": 0.0019812734082397003, "loss": 1.3154, "step": 9000 }, { "epoch": 0.49, "learning_rate": 0.0019802330420307947, "loss": 1.3126, "step": 9500 }, { "epoch": 0.52, "learning_rate": 0.0019791926758218896, "loss": 1.3087, "step": 10000 }, { "epoch": 0.55, "learning_rate": 0.001978152309612984, "loss": 1.3058, "step": 10500 }, { "epoch": 0.57, "learning_rate": 0.001977111943404078, "loss": 1.3022, "step": 11000 }, { "epoch": 0.6, "learning_rate": 0.0019760715771951726, "loss": 1.3002, "step": 11500 }, { "epoch": 0.62, "learning_rate": 0.001975031210986267, "loss": 1.2965, "step": 12000 }, { "epoch": 0.65, "learning_rate": 0.0019739908447773615, "loss": 1.2948, "step": 12500 }, { "epoch": 0.68, "learning_rate": 0.0019729504785684564, "loss": 1.2932, "step": 13000 }, { "epoch": 0.7, "learning_rate": 0.001971910112359551, "loss": 1.289, "step": 13500 }, { "epoch": 0.73, "learning_rate": 0.0019708697461506453, "loss": 1.2894, "step": 14000 }, { "epoch": 0.75, "learning_rate": 0.0019698293799417393, "loss": 1.2862, "step": 14500 }, { "epoch": 0.78, "learning_rate": 0.0019687890137328337, "loss": 1.2827, "step": 15000 }, { "epoch": 0.81, "learning_rate": 0.0019677486475239286, "loss": 1.2818, "step": 15500 }, { "epoch": 0.83, "learning_rate": 0.001966708281315023, "loss": 1.2813, "step": 16000 }, { "epoch": 0.86, "learning_rate": 0.0019656679151061176, "loss": 1.279, "step": 16500 }, { "epoch": 0.88, "learning_rate": 0.001964627548897212, "loss": 1.276, "step": 17000 }, { "epoch": 0.91, "learning_rate": 0.0019635871826883065, "loss": 1.2761, "step": 17500 }, { "epoch": 0.94, "learning_rate": 0.0019625468164794005, "loss": 1.2737, "step": 18000 }, { "epoch": 0.96, "learning_rate": 0.0019615064502704954, "loss": 1.2729, "step": 18500 }, { "epoch": 0.99, "learning_rate": 0.00196046608406159, "loss": 1.2708, "step": 19000 }, { "epoch": 1.0, "eval_loss": 1.28058660030365, "eval_runtime": 0.6137, "eval_samples_per_second": 1629.563, "eval_steps_per_second": 3.259, "step": 19224 }, { "epoch": 1.01, "learning_rate": 0.0019594257178526843, "loss": 1.2682, "step": 19500 }, { "epoch": 1.04, "learning_rate": 0.0019583853516437788, "loss": 1.2641, "step": 20000 }, { "epoch": 1.07, "learning_rate": 0.001957344985434873, "loss": 1.2635, "step": 20500 }, { "epoch": 1.09, "learning_rate": 0.0019563046192259677, "loss": 1.265, "step": 21000 }, { "epoch": 1.12, "learning_rate": 0.001955264253017062, "loss": 1.2636, "step": 21500 }, { "epoch": 1.14, "learning_rate": 0.0019542238868081566, "loss": 1.2617, "step": 22000 }, { "epoch": 1.17, "learning_rate": 0.001953183520599251, "loss": 1.2605, "step": 22500 }, { "epoch": 1.2, "learning_rate": 0.0019521431543903455, "loss": 1.2585, "step": 23000 }, { "epoch": 1.22, "learning_rate": 0.0019511027881814397, "loss": 1.2583, "step": 23500 }, { "epoch": 1.25, "learning_rate": 0.0019500624219725344, "loss": 1.2565, "step": 24000 }, { "epoch": 1.27, "learning_rate": 0.0019490220557636289, "loss": 1.2558, "step": 24500 }, { "epoch": 1.3, "learning_rate": 0.0019479816895547233, "loss": 1.2523, "step": 25000 }, { "epoch": 1.33, "learning_rate": 0.0019469413233458178, "loss": 1.2553, "step": 25500 }, { "epoch": 1.35, "learning_rate": 0.0019459009571369122, "loss": 1.2539, "step": 26000 }, { "epoch": 1.38, "learning_rate": 0.0019448605909280067, "loss": 1.2521, "step": 26500 }, { "epoch": 1.4, "learning_rate": 0.0019438202247191011, "loss": 1.2511, "step": 27000 }, { "epoch": 1.43, "learning_rate": 0.0019427798585101956, "loss": 1.2517, "step": 27500 }, { "epoch": 1.46, "learning_rate": 0.00194173949230129, "loss": 1.2497, "step": 28000 }, { "epoch": 1.48, "learning_rate": 0.0019406991260923845, "loss": 1.2495, "step": 28500 }, { "epoch": 1.51, "learning_rate": 0.001939658759883479, "loss": 1.2488, "step": 29000 }, { "epoch": 1.53, "learning_rate": 0.0019386183936745737, "loss": 1.248, "step": 29500 }, { "epoch": 1.56, "learning_rate": 0.001937578027465668, "loss": 1.2471, "step": 30000 }, { "epoch": 1.59, "learning_rate": 0.0019365376612567623, "loss": 1.248, "step": 30500 }, { "epoch": 1.61, "learning_rate": 0.0019354972950478568, "loss": 1.2446, "step": 31000 }, { "epoch": 1.64, "learning_rate": 0.0019344569288389513, "loss": 1.2447, "step": 31500 }, { "epoch": 1.66, "learning_rate": 0.0019334165626300457, "loss": 1.2439, "step": 32000 }, { "epoch": 1.69, "learning_rate": 0.0019323761964211404, "loss": 1.2442, "step": 32500 }, { "epoch": 1.72, "learning_rate": 0.0019313358302122348, "loss": 1.2444, "step": 33000 }, { "epoch": 1.74, "learning_rate": 0.0019302954640033293, "loss": 1.2417, "step": 33500 }, { "epoch": 1.77, "learning_rate": 0.0019292550977944235, "loss": 1.2429, "step": 34000 }, { "epoch": 1.79, "learning_rate": 0.001928214731585518, "loss": 1.2398, "step": 34500 }, { "epoch": 1.82, "learning_rate": 0.0019271743653766125, "loss": 1.2419, "step": 35000 }, { "epoch": 1.85, "learning_rate": 0.0019261339991677071, "loss": 1.2393, "step": 35500 }, { "epoch": 1.87, "learning_rate": 0.0019250936329588016, "loss": 1.2396, "step": 36000 }, { "epoch": 1.9, "learning_rate": 0.001924053266749896, "loss": 1.2374, "step": 36500 }, { "epoch": 1.92, "learning_rate": 0.0019230129005409905, "loss": 1.24, "step": 37000 }, { "epoch": 1.95, "learning_rate": 0.001921972534332085, "loss": 1.2377, "step": 37500 }, { "epoch": 1.98, "learning_rate": 0.0019209321681231794, "loss": 1.2387, "step": 38000 }, { "epoch": 2.0, "eval_loss": 1.2544126510620117, "eval_runtime": 0.5912, "eval_samples_per_second": 1691.485, "eval_steps_per_second": 3.383, "step": 38448 }, { "epoch": 2.0, "learning_rate": 0.0019198918019142739, "loss": 1.2349, "step": 38500 }, { "epoch": 2.03, "learning_rate": 0.0019188514357053683, "loss": 1.2314, "step": 39000 }, { "epoch": 2.05, "learning_rate": 0.0019178110694964628, "loss": 1.2342, "step": 39500 }, { "epoch": 2.08, "learning_rate": 0.0019167707032875572, "loss": 1.2318, "step": 40000 }, { "epoch": 2.11, "learning_rate": 0.0019157303370786517, "loss": 1.231, "step": 40500 }, { "epoch": 2.13, "learning_rate": 0.0019146899708697464, "loss": 1.2321, "step": 41000 }, { "epoch": 2.16, "learning_rate": 0.0019136496046608406, "loss": 1.2315, "step": 41500 }, { "epoch": 2.18, "learning_rate": 0.001912609238451935, "loss": 1.2308, "step": 42000 }, { "epoch": 2.21, "learning_rate": 0.0019115688722430295, "loss": 1.2303, "step": 42500 }, { "epoch": 2.24, "learning_rate": 0.001910528506034124, "loss": 1.2297, "step": 43000 }, { "epoch": 2.26, "learning_rate": 0.0019094881398252184, "loss": 1.2295, "step": 43500 }, { "epoch": 2.29, "learning_rate": 0.0019084477736163131, "loss": 1.2271, "step": 44000 }, { "epoch": 2.31, "learning_rate": 0.0019074074074074076, "loss": 1.2275, "step": 44500 }, { "epoch": 2.34, "learning_rate": 0.0019063670411985018, "loss": 1.2284, "step": 45000 }, { "epoch": 2.37, "learning_rate": 0.0019053266749895963, "loss": 1.2327, "step": 45500 }, { "epoch": 2.39, "learning_rate": 0.0019042863087806907, "loss": 1.2286, "step": 46000 }, { "epoch": 2.42, "learning_rate": 0.0019032459425717854, "loss": 1.2293, "step": 46500 }, { "epoch": 2.44, "learning_rate": 0.0019022055763628799, "loss": 1.2263, "step": 47000 }, { "epoch": 2.47, "learning_rate": 0.0019011652101539743, "loss": 1.2297, "step": 47500 }, { "epoch": 2.5, "learning_rate": 0.0019001248439450688, "loss": 1.227, "step": 48000 }, { "epoch": 2.52, "learning_rate": 0.001899084477736163, "loss": 1.2263, "step": 48500 }, { "epoch": 2.55, "learning_rate": 0.0018980441115272575, "loss": 1.2274, "step": 49000 }, { "epoch": 2.57, "learning_rate": 0.0018970037453183521, "loss": 1.2271, "step": 49500 }, { "epoch": 2.6, "learning_rate": 0.0018959633791094466, "loss": 1.2262, "step": 50000 }, { "epoch": 2.63, "learning_rate": 0.001894923012900541, "loss": 1.2256, "step": 50500 }, { "epoch": 2.65, "learning_rate": 0.0018938826466916355, "loss": 1.2256, "step": 51000 }, { "epoch": 2.68, "learning_rate": 0.00189284228048273, "loss": 1.225, "step": 51500 }, { "epoch": 2.7, "learning_rate": 0.0018918019142738244, "loss": 1.2254, "step": 52000 }, { "epoch": 2.73, "learning_rate": 0.0018907615480649189, "loss": 1.2249, "step": 52500 }, { "epoch": 2.76, "learning_rate": 0.0018897211818560133, "loss": 1.2247, "step": 53000 }, { "epoch": 2.78, "learning_rate": 0.0018886808156471078, "loss": 1.2231, "step": 53500 }, { "epoch": 2.81, "learning_rate": 0.0018876404494382023, "loss": 1.2226, "step": 54000 }, { "epoch": 2.83, "learning_rate": 0.0018866000832292967, "loss": 1.2239, "step": 54500 }, { "epoch": 2.86, "learning_rate": 0.0018855597170203914, "loss": 1.2226, "step": 55000 }, { "epoch": 2.89, "learning_rate": 0.0018845193508114856, "loss": 1.2234, "step": 55500 }, { "epoch": 2.91, "learning_rate": 0.00188347898460258, "loss": 1.221, "step": 56000 }, { "epoch": 2.94, "learning_rate": 0.0018824386183936745, "loss": 1.2227, "step": 56500 }, { "epoch": 2.97, "learning_rate": 0.001881398252184769, "loss": 1.2199, "step": 57000 }, { "epoch": 2.99, "learning_rate": 0.0018803578859758635, "loss": 1.2195, "step": 57500 }, { "epoch": 3.0, "eval_loss": 1.233520746231079, "eval_runtime": 0.6051, "eval_samples_per_second": 1652.629, "eval_steps_per_second": 3.305, "step": 57672 }, { "epoch": 3.02, "learning_rate": 0.0018793175197669581, "loss": 1.2162, "step": 58000 }, { "epoch": 3.04, "learning_rate": 0.0018782771535580526, "loss": 1.2161, "step": 58500 }, { "epoch": 3.07, "learning_rate": 0.001877236787349147, "loss": 1.2146, "step": 59000 }, { "epoch": 3.1, "learning_rate": 0.0018761964211402413, "loss": 1.2163, "step": 59500 }, { "epoch": 3.12, "learning_rate": 0.0018751560549313357, "loss": 1.2163, "step": 60000 }, { "epoch": 3.15, "learning_rate": 0.0018741156887224304, "loss": 1.2152, "step": 60500 }, { "epoch": 3.17, "learning_rate": 0.0018730753225135249, "loss": 1.2164, "step": 61000 }, { "epoch": 3.2, "learning_rate": 0.0018720349563046193, "loss": 1.2157, "step": 61500 }, { "epoch": 3.23, "learning_rate": 0.0018709945900957138, "loss": 1.2176, "step": 62000 }, { "epoch": 3.25, "learning_rate": 0.0018699542238868082, "loss": 1.2168, "step": 62500 }, { "epoch": 3.28, "learning_rate": 0.0018689138576779025, "loss": 1.2159, "step": 63000 }, { "epoch": 3.3, "learning_rate": 0.0018678734914689972, "loss": 1.2145, "step": 63500 }, { "epoch": 3.33, "learning_rate": 0.0018668331252600916, "loss": 1.2148, "step": 64000 }, { "epoch": 3.36, "learning_rate": 0.001865792759051186, "loss": 1.2151, "step": 64500 }, { "epoch": 3.38, "learning_rate": 0.0018647523928422805, "loss": 1.2145, "step": 65000 }, { "epoch": 3.41, "learning_rate": 0.001863712026633375, "loss": 1.216, "step": 65500 }, { "epoch": 3.43, "learning_rate": 0.0018626716604244697, "loss": 1.2152, "step": 66000 }, { "epoch": 3.46, "learning_rate": 0.001861631294215564, "loss": 1.2132, "step": 66500 }, { "epoch": 3.49, "learning_rate": 0.0018605909280066584, "loss": 1.2137, "step": 67000 }, { "epoch": 3.51, "learning_rate": 0.0018595505617977528, "loss": 1.2145, "step": 67500 }, { "epoch": 3.54, "learning_rate": 0.0018585101955888473, "loss": 1.2141, "step": 68000 }, { "epoch": 3.56, "learning_rate": 0.0018574698293799417, "loss": 1.2148, "step": 68500 }, { "epoch": 3.59, "learning_rate": 0.0018564294631710364, "loss": 1.2125, "step": 69000 }, { "epoch": 3.62, "learning_rate": 0.0018553890969621309, "loss": 1.2132, "step": 69500 }, { "epoch": 3.64, "learning_rate": 0.001854348730753225, "loss": 1.2124, "step": 70000 }, { "epoch": 3.67, "learning_rate": 0.0018533083645443196, "loss": 1.2148, "step": 70500 }, { "epoch": 3.69, "learning_rate": 0.001852267998335414, "loss": 1.2135, "step": 71000 }, { "epoch": 3.72, "learning_rate": 0.0018512276321265085, "loss": 1.2132, "step": 71500 }, { "epoch": 3.75, "learning_rate": 0.0018501872659176031, "loss": 1.2133, "step": 72000 }, { "epoch": 3.77, "learning_rate": 0.0018491468997086976, "loss": 1.2144, "step": 72500 }, { "epoch": 3.8, "learning_rate": 0.001848106533499792, "loss": 1.2101, "step": 73000 }, { "epoch": 3.82, "learning_rate": 0.0018470661672908863, "loss": 1.212, "step": 73500 }, { "epoch": 3.85, "learning_rate": 0.0018460258010819808, "loss": 1.2119, "step": 74000 }, { "epoch": 3.88, "learning_rate": 0.0018449854348730754, "loss": 1.2117, "step": 74500 }, { "epoch": 3.9, "learning_rate": 0.0018439450686641699, "loss": 1.2106, "step": 75000 }, { "epoch": 3.93, "learning_rate": 0.0018429047024552643, "loss": 1.2125, "step": 75500 }, { "epoch": 3.95, "learning_rate": 0.0018418643362463588, "loss": 1.2106, "step": 76000 }, { "epoch": 3.98, "learning_rate": 0.0018408239700374533, "loss": 1.2108, "step": 76500 }, { "epoch": 4.0, "eval_loss": 1.2261288166046143, "eval_runtime": 0.8323, "eval_samples_per_second": 1201.497, "eval_steps_per_second": 2.403, "step": 76896 }, { "epoch": 4.01, "learning_rate": 0.0018397836038285475, "loss": 1.2091, "step": 77000 }, { "epoch": 4.03, "learning_rate": 0.0018387432376196422, "loss": 1.2055, "step": 77500 }, { "epoch": 4.06, "learning_rate": 0.0018377028714107366, "loss": 1.2061, "step": 78000 }, { "epoch": 4.08, "learning_rate": 0.001836662505201831, "loss": 1.2047, "step": 78500 }, { "epoch": 4.11, "learning_rate": 0.0018356221389929255, "loss": 1.2077, "step": 79000 }, { "epoch": 4.14, "learning_rate": 0.00183458177278402, "loss": 1.2069, "step": 79500 }, { "epoch": 4.16, "learning_rate": 0.0018335414065751145, "loss": 1.2078, "step": 80000 }, { "epoch": 4.19, "learning_rate": 0.001832501040366209, "loss": 1.2058, "step": 80500 }, { "epoch": 4.21, "learning_rate": 0.0018314606741573034, "loss": 1.2075, "step": 81000 }, { "epoch": 4.24, "learning_rate": 0.0018304203079483978, "loss": 1.2064, "step": 81500 }, { "epoch": 4.27, "learning_rate": 0.0018293799417394923, "loss": 1.2059, "step": 82000 }, { "epoch": 4.29, "learning_rate": 0.0018283395755305867, "loss": 1.2075, "step": 82500 }, { "epoch": 4.32, "learning_rate": 0.0018272992093216814, "loss": 1.2042, "step": 83000 }, { "epoch": 4.34, "learning_rate": 0.0018262588431127759, "loss": 1.2051, "step": 83500 }, { "epoch": 4.37, "learning_rate": 0.0018252184769038703, "loss": 1.2066, "step": 84000 }, { "epoch": 4.4, "learning_rate": 0.0018241781106949646, "loss": 1.2081, "step": 84500 }, { "epoch": 4.42, "learning_rate": 0.001823137744486059, "loss": 1.2072, "step": 85000 }, { "epoch": 4.45, "learning_rate": 0.0018220973782771535, "loss": 1.2052, "step": 85500 }, { "epoch": 4.47, "learning_rate": 0.0018210570120682482, "loss": 1.2042, "step": 86000 }, { "epoch": 4.5, "learning_rate": 0.0018200166458593426, "loss": 1.2058, "step": 86500 }, { "epoch": 4.53, "learning_rate": 0.001818976279650437, "loss": 1.2064, "step": 87000 }, { "epoch": 4.55, "learning_rate": 0.0018179359134415315, "loss": 1.2032, "step": 87500 }, { "epoch": 4.58, "learning_rate": 0.0018168955472326258, "loss": 1.2059, "step": 88000 }, { "epoch": 4.6, "learning_rate": 0.0018158551810237204, "loss": 1.2058, "step": 88500 }, { "epoch": 4.63, "learning_rate": 0.001814814814814815, "loss": 1.2041, "step": 89000 }, { "epoch": 4.66, "learning_rate": 0.0018137744486059093, "loss": 1.2057, "step": 89500 }, { "epoch": 4.68, "learning_rate": 0.0018127340823970038, "loss": 1.2031, "step": 90000 }, { "epoch": 4.71, "learning_rate": 0.0018116937161880983, "loss": 1.2062, "step": 90500 }, { "epoch": 4.73, "learning_rate": 0.0018106533499791927, "loss": 1.2051, "step": 91000 }, { "epoch": 4.76, "learning_rate": 0.0018096129837702872, "loss": 1.2037, "step": 91500 }, { "epoch": 4.79, "learning_rate": 0.0018085726175613816, "loss": 1.2053, "step": 92000 }, { "epoch": 4.81, "learning_rate": 0.001807532251352476, "loss": 1.2046, "step": 92500 }, { "epoch": 4.84, "learning_rate": 0.0018064918851435705, "loss": 1.2023, "step": 93000 }, { "epoch": 4.86, "learning_rate": 0.001805451518934665, "loss": 1.2045, "step": 93500 }, { "epoch": 4.89, "learning_rate": 0.0018044111527257595, "loss": 1.204, "step": 94000 }, { "epoch": 4.92, "learning_rate": 0.0018033707865168541, "loss": 1.2037, "step": 94500 }, { "epoch": 4.94, "learning_rate": 0.0018023304203079484, "loss": 1.204, "step": 95000 }, { "epoch": 4.97, "learning_rate": 0.0018012900540990428, "loss": 1.2044, "step": 95500 }, { "epoch": 4.99, "learning_rate": 0.0018002496878901373, "loss": 1.2022, "step": 96000 }, { "epoch": 5.0, "eval_loss": 1.2207547426223755, "eval_runtime": 0.6112, "eval_samples_per_second": 1636.066, "eval_steps_per_second": 3.272, "step": 96120 }, { "epoch": 5.02, "learning_rate": 0.0017992093216812317, "loss": 1.2001, "step": 96500 }, { "epoch": 5.05, "learning_rate": 0.0017981689554723264, "loss": 1.1996, "step": 97000 }, { "epoch": 5.07, "learning_rate": 0.0017971285892634209, "loss": 1.1989, "step": 97500 }, { "epoch": 5.1, "learning_rate": 0.0017960882230545153, "loss": 1.1998, "step": 98000 }, { "epoch": 5.12, "learning_rate": 0.0017950478568456096, "loss": 1.1989, "step": 98500 }, { "epoch": 5.15, "learning_rate": 0.001794007490636704, "loss": 1.1984, "step": 99000 }, { "epoch": 5.18, "learning_rate": 0.0017929671244277985, "loss": 1.1991, "step": 99500 }, { "epoch": 5.2, "learning_rate": 0.0017919267582188932, "loss": 1.1993, "step": 100000 }, { "epoch": 5.23, "learning_rate": 0.0017908863920099876, "loss": 1.1996, "step": 100500 }, { "epoch": 5.25, "learning_rate": 0.001789846025801082, "loss": 1.1995, "step": 101000 }, { "epoch": 5.28, "learning_rate": 0.0017888056595921765, "loss": 1.1987, "step": 101500 }, { "epoch": 5.31, "learning_rate": 0.0017877652933832708, "loss": 1.1971, "step": 102000 }, { "epoch": 5.33, "learning_rate": 0.0017867249271743654, "loss": 1.1984, "step": 102500 }, { "epoch": 5.36, "learning_rate": 0.00178568456096546, "loss": 1.2003, "step": 103000 }, { "epoch": 5.38, "learning_rate": 0.0017846441947565544, "loss": 1.2011, "step": 103500 }, { "epoch": 5.41, "learning_rate": 0.0017836038285476488, "loss": 1.1994, "step": 104000 }, { "epoch": 5.44, "learning_rate": 0.0017825634623387433, "loss": 1.1989, "step": 104500 }, { "epoch": 5.46, "learning_rate": 0.0017815230961298377, "loss": 1.1996, "step": 105000 }, { "epoch": 5.49, "learning_rate": 0.0017804827299209324, "loss": 1.1982, "step": 105500 }, { "epoch": 5.51, "learning_rate": 0.0017794423637120266, "loss": 1.1971, "step": 106000 }, { "epoch": 5.54, "learning_rate": 0.001778401997503121, "loss": 1.1988, "step": 106500 }, { "epoch": 5.57, "learning_rate": 0.0017773616312942156, "loss": 1.1996, "step": 107000 }, { "epoch": 5.59, "learning_rate": 0.00177632126508531, "loss": 1.1972, "step": 107500 }, { "epoch": 5.62, "learning_rate": 0.0017752808988764045, "loss": 1.1991, "step": 108000 }, { "epoch": 5.64, "learning_rate": 0.0017742405326674991, "loss": 1.1987, "step": 108500 }, { "epoch": 5.67, "learning_rate": 0.0017732001664585936, "loss": 1.1983, "step": 109000 }, { "epoch": 5.7, "learning_rate": 0.0017721598002496878, "loss": 1.1993, "step": 109500 }, { "epoch": 5.72, "learning_rate": 0.0017711194340407823, "loss": 1.1979, "step": 110000 }, { "epoch": 5.75, "learning_rate": 0.0017700790678318768, "loss": 1.1989, "step": 110500 }, { "epoch": 5.77, "learning_rate": 0.0017690387016229714, "loss": 1.2003, "step": 111000 }, { "epoch": 5.8, "learning_rate": 0.0017679983354140659, "loss": 1.1987, "step": 111500 }, { "epoch": 5.83, "learning_rate": 0.0017669579692051603, "loss": 1.1987, "step": 112000 }, { "epoch": 5.85, "learning_rate": 0.0017659176029962548, "loss": 1.1976, "step": 112500 }, { "epoch": 5.88, "learning_rate": 0.001764877236787349, "loss": 1.1983, "step": 113000 }, { "epoch": 5.9, "learning_rate": 0.0017638368705784435, "loss": 1.1981, "step": 113500 }, { "epoch": 5.93, "learning_rate": 0.0017627965043695382, "loss": 1.1989, "step": 114000 }, { "epoch": 5.96, "learning_rate": 0.0017617561381606326, "loss": 1.1963, "step": 114500 }, { "epoch": 5.98, "learning_rate": 0.001760715771951727, "loss": 1.1983, "step": 115000 }, { "epoch": 6.0, "eval_loss": 1.216284155845642, "eval_runtime": 0.6159, "eval_samples_per_second": 1623.524, "eval_steps_per_second": 3.247, "step": 115344 }, { "epoch": 6.01, "learning_rate": 0.0017596754057428215, "loss": 1.1979, "step": 115500 }, { "epoch": 6.03, "learning_rate": 0.001758635039533916, "loss": 1.1933, "step": 116000 }, { "epoch": 6.06, "learning_rate": 0.0017575946733250102, "loss": 1.1942, "step": 116500 }, { "epoch": 6.09, "learning_rate": 0.001756554307116105, "loss": 1.1937, "step": 117000 }, { "epoch": 6.11, "learning_rate": 0.0017555139409071994, "loss": 1.1937, "step": 117500 }, { "epoch": 6.14, "learning_rate": 0.0017544735746982938, "loss": 1.1958, "step": 118000 }, { "epoch": 6.16, "learning_rate": 0.0017534332084893883, "loss": 1.1936, "step": 118500 }, { "epoch": 6.19, "learning_rate": 0.0017523928422804827, "loss": 1.1952, "step": 119000 }, { "epoch": 6.22, "learning_rate": 0.0017513524760715774, "loss": 1.1959, "step": 119500 }, { "epoch": 6.24, "learning_rate": 0.0017503121098626717, "loss": 1.1943, "step": 120000 }, { "epoch": 6.27, "learning_rate": 0.0017492717436537661, "loss": 1.1945, "step": 120500 }, { "epoch": 6.29, "learning_rate": 0.0017482313774448606, "loss": 1.1952, "step": 121000 }, { "epoch": 6.32, "learning_rate": 0.001747191011235955, "loss": 1.1972, "step": 121500 }, { "epoch": 6.35, "learning_rate": 0.0017461506450270495, "loss": 1.1933, "step": 122000 }, { "epoch": 6.37, "learning_rate": 0.0017451102788181442, "loss": 1.1949, "step": 122500 }, { "epoch": 6.4, "learning_rate": 0.0017440699126092386, "loss": 1.1947, "step": 123000 }, { "epoch": 6.42, "learning_rate": 0.0017430295464003329, "loss": 1.1935, "step": 123500 }, { "epoch": 6.45, "learning_rate": 0.0017419891801914273, "loss": 1.1954, "step": 124000 }, { "epoch": 6.48, "learning_rate": 0.0017409488139825218, "loss": 1.193, "step": 124500 }, { "epoch": 6.5, "learning_rate": 0.0017399084477736164, "loss": 1.1938, "step": 125000 }, { "epoch": 6.53, "learning_rate": 0.001738868081564711, "loss": 1.1939, "step": 125500 }, { "epoch": 6.55, "learning_rate": 0.0017378277153558054, "loss": 1.1948, "step": 126000 }, { "epoch": 6.58, "learning_rate": 0.0017367873491468998, "loss": 1.1926, "step": 126500 }, { "epoch": 6.61, "learning_rate": 0.001735746982937994, "loss": 1.1936, "step": 127000 }, { "epoch": 6.63, "learning_rate": 0.0017347066167290885, "loss": 1.1933, "step": 127500 }, { "epoch": 6.66, "learning_rate": 0.0017336662505201832, "loss": 1.1947, "step": 128000 }, { "epoch": 6.68, "learning_rate": 0.0017326258843112776, "loss": 1.1931, "step": 128500 }, { "epoch": 6.71, "learning_rate": 0.001731585518102372, "loss": 1.1931, "step": 129000 }, { "epoch": 6.74, "learning_rate": 0.0017305451518934666, "loss": 1.1938, "step": 129500 }, { "epoch": 6.76, "learning_rate": 0.001729504785684561, "loss": 1.1939, "step": 130000 }, { "epoch": 6.79, "learning_rate": 0.0017284644194756553, "loss": 1.1923, "step": 130500 }, { "epoch": 6.81, "learning_rate": 0.00172742405326675, "loss": 1.1932, "step": 131000 }, { "epoch": 6.84, "learning_rate": 0.0017263836870578444, "loss": 1.1926, "step": 131500 }, { "epoch": 6.87, "learning_rate": 0.0017253433208489388, "loss": 1.1929, "step": 132000 }, { "epoch": 6.89, "learning_rate": 0.0017243029546400333, "loss": 1.1932, "step": 132500 }, { "epoch": 6.92, "learning_rate": 0.0017232625884311278, "loss": 1.1932, "step": 133000 }, { "epoch": 6.94, "learning_rate": 0.0017222222222222224, "loss": 1.1952, "step": 133500 }, { "epoch": 6.97, "learning_rate": 0.0017211818560133169, "loss": 1.1924, "step": 134000 }, { "epoch": 7.0, "learning_rate": 0.0017201414898044111, "loss": 1.1927, "step": 134500 }, { "epoch": 7.0, "eval_loss": 1.2103557586669922, "eval_runtime": 0.6147, "eval_samples_per_second": 1626.936, "eval_steps_per_second": 3.254, "step": 134568 }, { "epoch": 7.02, "learning_rate": 0.0017191011235955056, "loss": 1.1869, "step": 135000 }, { "epoch": 7.05, "learning_rate": 0.0017180607573866, "loss": 1.1872, "step": 135500 }, { "epoch": 7.07, "learning_rate": 0.0017170203911776945, "loss": 1.1898, "step": 136000 }, { "epoch": 7.1, "learning_rate": 0.0017159800249687892, "loss": 1.1895, "step": 136500 }, { "epoch": 7.13, "learning_rate": 0.0017149396587598836, "loss": 1.1902, "step": 137000 }, { "epoch": 7.15, "learning_rate": 0.001713899292550978, "loss": 1.1901, "step": 137500 }, { "epoch": 7.18, "learning_rate": 0.0017128589263420723, "loss": 1.1892, "step": 138000 }, { "epoch": 7.2, "learning_rate": 0.0017118185601331668, "loss": 1.1902, "step": 138500 }, { "epoch": 7.23, "learning_rate": 0.0017107781939242615, "loss": 1.1906, "step": 139000 }, { "epoch": 7.26, "learning_rate": 0.001709737827715356, "loss": 1.1904, "step": 139500 }, { "epoch": 7.28, "learning_rate": 0.0017086974615064504, "loss": 1.1898, "step": 140000 }, { "epoch": 7.31, "learning_rate": 0.0017076570952975448, "loss": 1.1917, "step": 140500 }, { "epoch": 7.33, "learning_rate": 0.0017066167290886393, "loss": 1.1914, "step": 141000 }, { "epoch": 7.36, "learning_rate": 0.0017055763628797335, "loss": 1.1905, "step": 141500 }, { "epoch": 7.39, "learning_rate": 0.0017045359966708282, "loss": 1.1921, "step": 142000 }, { "epoch": 7.41, "learning_rate": 0.0017034956304619227, "loss": 1.1899, "step": 142500 }, { "epoch": 7.44, "learning_rate": 0.001702455264253017, "loss": 1.19, "step": 143000 }, { "epoch": 7.46, "learning_rate": 0.0017014148980441116, "loss": 1.1883, "step": 143500 }, { "epoch": 7.49, "learning_rate": 0.001700374531835206, "loss": 1.191, "step": 144000 }, { "epoch": 7.52, "learning_rate": 0.0016993341656263005, "loss": 1.1896, "step": 144500 }, { "epoch": 7.54, "learning_rate": 0.001698293799417395, "loss": 1.1893, "step": 145000 }, { "epoch": 7.57, "learning_rate": 0.0016972534332084894, "loss": 1.1892, "step": 145500 }, { "epoch": 7.59, "learning_rate": 0.0016962130669995838, "loss": 1.1887, "step": 146000 }, { "epoch": 7.62, "learning_rate": 0.0016951727007906783, "loss": 1.1913, "step": 146500 }, { "epoch": 7.65, "learning_rate": 0.0016941323345817728, "loss": 1.1895, "step": 147000 }, { "epoch": 7.67, "learning_rate": 0.0016930919683728674, "loss": 1.1891, "step": 147500 }, { "epoch": 7.7, "learning_rate": 0.001692051602163962, "loss": 1.189, "step": 148000 }, { "epoch": 7.72, "learning_rate": 0.0016910112359550561, "loss": 1.1907, "step": 148500 }, { "epoch": 7.75, "learning_rate": 0.0016899708697461506, "loss": 1.1911, "step": 149000 }, { "epoch": 7.78, "learning_rate": 0.001688930503537245, "loss": 1.1881, "step": 149500 }, { "epoch": 7.8, "learning_rate": 0.0016878901373283395, "loss": 1.1893, "step": 150000 }, { "epoch": 7.83, "learning_rate": 0.0016868497711194342, "loss": 1.1902, "step": 150500 }, { "epoch": 7.85, "learning_rate": 0.0016858094049105286, "loss": 1.1912, "step": 151000 }, { "epoch": 7.88, "learning_rate": 0.001684769038701623, "loss": 1.1907, "step": 151500 }, { "epoch": 7.91, "learning_rate": 0.0016837286724927173, "loss": 1.1909, "step": 152000 }, { "epoch": 7.93, "learning_rate": 0.0016826883062838118, "loss": 1.1875, "step": 152500 }, { "epoch": 7.96, "learning_rate": 0.0016816479400749065, "loss": 1.19, "step": 153000 }, { "epoch": 7.98, "learning_rate": 0.001680607573866001, "loss": 1.1881, "step": 153500 }, { "epoch": 8.0, "eval_loss": 1.2096730470657349, "eval_runtime": 0.6211, "eval_samples_per_second": 1609.947, "eval_steps_per_second": 3.22, "step": 153792 }, { "epoch": 8.01, "learning_rate": 0.0016795672076570954, "loss": 1.1874, "step": 154000 }, { "epoch": 8.04, "learning_rate": 0.0016785268414481898, "loss": 1.1844, "step": 154500 }, { "epoch": 8.06, "learning_rate": 0.0016774864752392843, "loss": 1.1845, "step": 155000 }, { "epoch": 8.09, "learning_rate": 0.0016764461090303787, "loss": 1.186, "step": 155500 }, { "epoch": 8.11, "learning_rate": 0.0016754057428214732, "loss": 1.1851, "step": 156000 }, { "epoch": 8.14, "learning_rate": 0.0016743653766125677, "loss": 1.1872, "step": 156500 }, { "epoch": 8.17, "learning_rate": 0.0016733250104036621, "loss": 1.1872, "step": 157000 }, { "epoch": 8.19, "learning_rate": 0.0016722846441947566, "loss": 1.1862, "step": 157500 }, { "epoch": 8.22, "learning_rate": 0.001671244277985851, "loss": 1.1867, "step": 158000 }, { "epoch": 8.24, "learning_rate": 0.0016702039117769455, "loss": 1.186, "step": 158500 }, { "epoch": 8.27, "learning_rate": 0.0016691635455680402, "loss": 1.1858, "step": 159000 }, { "epoch": 8.3, "learning_rate": 0.0016681231793591344, "loss": 1.1843, "step": 159500 }, { "epoch": 8.32, "learning_rate": 0.0016670828131502289, "loss": 1.1857, "step": 160000 }, { "epoch": 8.35, "learning_rate": 0.0016660424469413233, "loss": 1.1872, "step": 160500 }, { "epoch": 8.37, "learning_rate": 0.0016650020807324178, "loss": 1.1859, "step": 161000 }, { "epoch": 8.4, "learning_rate": 0.0016639617145235124, "loss": 1.1865, "step": 161500 }, { "epoch": 8.43, "learning_rate": 0.001662921348314607, "loss": 1.1871, "step": 162000 }, { "epoch": 8.45, "learning_rate": 0.0016618809821057014, "loss": 1.1871, "step": 162500 }, { "epoch": 8.48, "learning_rate": 0.0016608406158967956, "loss": 1.1876, "step": 163000 }, { "epoch": 8.5, "learning_rate": 0.00165980024968789, "loss": 1.1876, "step": 163500 }, { "epoch": 8.53, "learning_rate": 0.0016587598834789845, "loss": 1.1874, "step": 164000 }, { "epoch": 8.56, "learning_rate": 0.0016577195172700792, "loss": 1.186, "step": 164500 }, { "epoch": 8.58, "learning_rate": 0.0016566791510611736, "loss": 1.1869, "step": 165000 }, { "epoch": 8.61, "learning_rate": 0.001655638784852268, "loss": 1.1865, "step": 165500 }, { "epoch": 8.64, "learning_rate": 0.0016545984186433626, "loss": 1.1862, "step": 166000 }, { "epoch": 8.66, "learning_rate": 0.0016535580524344568, "loss": 1.1881, "step": 166500 }, { "epoch": 8.69, "learning_rate": 0.0016525176862255513, "loss": 1.1875, "step": 167000 }, { "epoch": 8.71, "learning_rate": 0.001651477320016646, "loss": 1.1866, "step": 167500 }, { "epoch": 8.74, "learning_rate": 0.0016504369538077404, "loss": 1.1868, "step": 168000 }, { "epoch": 8.77, "learning_rate": 0.0016493965875988348, "loss": 1.1867, "step": 168500 }, { "epoch": 8.79, "learning_rate": 0.0016483562213899293, "loss": 1.1858, "step": 169000 }, { "epoch": 8.82, "learning_rate": 0.0016473158551810238, "loss": 1.1869, "step": 169500 }, { "epoch": 8.84, "learning_rate": 0.0016462754889721182, "loss": 1.1861, "step": 170000 }, { "epoch": 8.87, "learning_rate": 0.0016452351227632127, "loss": 1.1849, "step": 170500 }, { "epoch": 8.9, "learning_rate": 0.0016441947565543071, "loss": 1.1857, "step": 171000 }, { "epoch": 8.92, "learning_rate": 0.0016431543903454016, "loss": 1.1883, "step": 171500 }, { "epoch": 8.95, "learning_rate": 0.001642114024136496, "loss": 1.1862, "step": 172000 }, { "epoch": 8.97, "learning_rate": 0.0016410736579275905, "loss": 1.1859, "step": 172500 }, { "epoch": 9.0, "learning_rate": 0.0016400332917186852, "loss": 1.1865, "step": 173000 }, { "epoch": 9.0, "eval_loss": 1.2050005197525024, "eval_runtime": 0.6154, "eval_samples_per_second": 1624.869, "eval_steps_per_second": 3.25, "step": 173016 }, { "epoch": 9.03, "learning_rate": 0.0016389929255097794, "loss": 1.181, "step": 173500 }, { "epoch": 9.05, "learning_rate": 0.0016379525593008739, "loss": 1.1819, "step": 174000 }, { "epoch": 9.08, "learning_rate": 0.0016369121930919683, "loss": 1.1832, "step": 174500 }, { "epoch": 9.1, "learning_rate": 0.0016358718268830628, "loss": 1.1833, "step": 175000 }, { "epoch": 9.13, "learning_rate": 0.0016348314606741575, "loss": 1.1814, "step": 175500 }, { "epoch": 9.16, "learning_rate": 0.001633791094465252, "loss": 1.1824, "step": 176000 }, { "epoch": 9.18, "learning_rate": 0.0016327507282563464, "loss": 1.1836, "step": 176500 }, { "epoch": 9.21, "learning_rate": 0.0016317103620474408, "loss": 1.1824, "step": 177000 }, { "epoch": 9.23, "learning_rate": 0.001630669995838535, "loss": 1.1837, "step": 177500 }, { "epoch": 9.26, "learning_rate": 0.0016296296296296295, "loss": 1.1859, "step": 178000 }, { "epoch": 9.29, "learning_rate": 0.0016285892634207242, "loss": 1.1834, "step": 178500 }, { "epoch": 9.31, "learning_rate": 0.0016275488972118187, "loss": 1.1829, "step": 179000 }, { "epoch": 9.34, "learning_rate": 0.0016265085310029131, "loss": 1.1827, "step": 179500 }, { "epoch": 9.36, "learning_rate": 0.0016254681647940076, "loss": 1.1845, "step": 180000 }, { "epoch": 9.39, "learning_rate": 0.001624427798585102, "loss": 1.1836, "step": 180500 }, { "epoch": 9.42, "learning_rate": 0.0016233874323761963, "loss": 1.1821, "step": 181000 }, { "epoch": 9.44, "learning_rate": 0.001622347066167291, "loss": 1.1833, "step": 181500 }, { "epoch": 9.47, "learning_rate": 0.0016213066999583854, "loss": 1.1843, "step": 182000 }, { "epoch": 9.49, "learning_rate": 0.0016202663337494799, "loss": 1.1845, "step": 182500 }, { "epoch": 9.52, "learning_rate": 0.0016192259675405743, "loss": 1.1837, "step": 183000 }, { "epoch": 9.55, "learning_rate": 0.0016181856013316688, "loss": 1.1837, "step": 183500 }, { "epoch": 9.57, "learning_rate": 0.0016171452351227634, "loss": 1.184, "step": 184000 }, { "epoch": 9.6, "learning_rate": 0.0016161048689138577, "loss": 1.1835, "step": 184500 }, { "epoch": 9.62, "learning_rate": 0.0016150645027049521, "loss": 1.1826, "step": 185000 }, { "epoch": 9.65, "learning_rate": 0.0016140241364960466, "loss": 1.1846, "step": 185500 }, { "epoch": 9.68, "learning_rate": 0.001612983770287141, "loss": 1.1833, "step": 186000 }, { "epoch": 9.7, "learning_rate": 0.0016119434040782355, "loss": 1.1829, "step": 186500 }, { "epoch": 9.73, "learning_rate": 0.0016109030378693302, "loss": 1.183, "step": 187000 }, { "epoch": 9.75, "learning_rate": 0.0016098626716604246, "loss": 1.184, "step": 187500 }, { "epoch": 9.78, "learning_rate": 0.0016088223054515189, "loss": 1.1831, "step": 188000 }, { "epoch": 9.81, "learning_rate": 0.0016077819392426133, "loss": 1.185, "step": 188500 }, { "epoch": 9.83, "learning_rate": 0.0016067415730337078, "loss": 1.183, "step": 189000 }, { "epoch": 9.86, "learning_rate": 0.0016057012068248025, "loss": 1.183, "step": 189500 }, { "epoch": 9.88, "learning_rate": 0.001604660840615897, "loss": 1.1837, "step": 190000 }, { "epoch": 9.91, "learning_rate": 0.0016036204744069914, "loss": 1.1809, "step": 190500 }, { "epoch": 9.94, "learning_rate": 0.0016025801081980858, "loss": 1.1838, "step": 191000 }, { "epoch": 9.96, "learning_rate": 0.00160153974198918, "loss": 1.1831, "step": 191500 }, { "epoch": 9.99, "learning_rate": 0.0016004993757802745, "loss": 1.1846, "step": 192000 }, { "epoch": 10.0, "eval_loss": 1.2037365436553955, "eval_runtime": 0.6098, "eval_samples_per_second": 1639.906, "eval_steps_per_second": 3.28, "step": 192240 }, { "epoch": 10.01, "learning_rate": 0.0015994590095713692, "loss": 1.181, "step": 192500 }, { "epoch": 10.04, "learning_rate": 0.0015984186433624637, "loss": 1.1792, "step": 193000 }, { "epoch": 10.07, "learning_rate": 0.0015973782771535581, "loss": 1.1796, "step": 193500 }, { "epoch": 10.09, "learning_rate": 0.0015963379109446526, "loss": 1.1806, "step": 194000 }, { "epoch": 10.12, "learning_rate": 0.001595297544735747, "loss": 1.1803, "step": 194500 }, { "epoch": 10.14, "learning_rate": 0.0015942571785268413, "loss": 1.1801, "step": 195000 }, { "epoch": 10.17, "learning_rate": 0.001593216812317936, "loss": 1.1799, "step": 195500 }, { "epoch": 10.2, "learning_rate": 0.0015921764461090304, "loss": 1.1812, "step": 196000 }, { "epoch": 10.22, "learning_rate": 0.0015911360799001249, "loss": 1.1795, "step": 196500 }, { "epoch": 10.25, "learning_rate": 0.0015900957136912193, "loss": 1.1812, "step": 197000 }, { "epoch": 10.27, "learning_rate": 0.0015890553474823138, "loss": 1.1803, "step": 197500 }, { "epoch": 10.3, "learning_rate": 0.0015880149812734085, "loss": 1.1818, "step": 198000 }, { "epoch": 10.33, "learning_rate": 0.0015869746150645027, "loss": 1.1802, "step": 198500 }, { "epoch": 10.35, "learning_rate": 0.0015859342488555972, "loss": 1.1805, "step": 199000 }, { "epoch": 10.38, "learning_rate": 0.0015848938826466916, "loss": 1.1802, "step": 199500 }, { "epoch": 10.4, "learning_rate": 0.001583853516437786, "loss": 1.1812, "step": 200000 }, { "epoch": 10.43, "learning_rate": 0.0015828131502288805, "loss": 1.1817, "step": 200500 }, { "epoch": 10.46, "learning_rate": 0.0015817727840199752, "loss": 1.1828, "step": 201000 }, { "epoch": 10.48, "learning_rate": 0.0015807324178110697, "loss": 1.1798, "step": 201500 }, { "epoch": 10.51, "learning_rate": 0.0015796920516021641, "loss": 1.1817, "step": 202000 }, { "epoch": 10.53, "learning_rate": 0.0015786516853932584, "loss": 1.181, "step": 202500 }, { "epoch": 10.56, "learning_rate": 0.0015776113191843528, "loss": 1.1814, "step": 203000 }, { "epoch": 10.59, "learning_rate": 0.0015765709529754473, "loss": 1.1798, "step": 203500 }, { "epoch": 10.61, "learning_rate": 0.001575530586766542, "loss": 1.1819, "step": 204000 }, { "epoch": 10.64, "learning_rate": 0.0015744902205576364, "loss": 1.1818, "step": 204500 }, { "epoch": 10.66, "learning_rate": 0.0015734498543487309, "loss": 1.182, "step": 205000 }, { "epoch": 10.69, "learning_rate": 0.0015724094881398253, "loss": 1.1821, "step": 205500 }, { "epoch": 10.72, "learning_rate": 0.0015713691219309195, "loss": 1.1819, "step": 206000 }, { "epoch": 10.74, "learning_rate": 0.0015703287557220142, "loss": 1.1809, "step": 206500 }, { "epoch": 10.77, "learning_rate": 0.0015692883895131087, "loss": 1.1806, "step": 207000 }, { "epoch": 10.79, "learning_rate": 0.0015682480233042031, "loss": 1.1814, "step": 207500 }, { "epoch": 10.82, "learning_rate": 0.0015672076570952976, "loss": 1.181, "step": 208000 }, { "epoch": 10.85, "learning_rate": 0.001566167290886392, "loss": 1.183, "step": 208500 }, { "epoch": 10.87, "learning_rate": 0.0015651269246774865, "loss": 1.1812, "step": 209000 }, { "epoch": 10.9, "learning_rate": 0.001564086558468581, "loss": 1.1803, "step": 209500 }, { "epoch": 10.92, "learning_rate": 0.0015630461922596754, "loss": 1.1817, "step": 210000 }, { "epoch": 10.95, "learning_rate": 0.0015620058260507699, "loss": 1.1781, "step": 210500 }, { "epoch": 10.98, "learning_rate": 0.0015609654598418643, "loss": 1.1806, "step": 211000 }, { "epoch": 11.0, "eval_loss": 1.2047163248062134, "eval_runtime": 0.6153, "eval_samples_per_second": 1625.195, "eval_steps_per_second": 3.25, "step": 211464 }, { "epoch": 11.0, "learning_rate": 0.0015599250936329588, "loss": 1.1819, "step": 211500 }, { "epoch": 11.03, "learning_rate": 0.0015588847274240535, "loss": 1.1753, "step": 212000 }, { "epoch": 11.05, "learning_rate": 0.001557844361215148, "loss": 1.1781, "step": 212500 }, { "epoch": 11.08, "learning_rate": 0.0015568039950062422, "loss": 1.1788, "step": 213000 }, { "epoch": 11.11, "learning_rate": 0.0015557636287973366, "loss": 1.1768, "step": 213500 }, { "epoch": 11.13, "learning_rate": 0.001554723262588431, "loss": 1.1775, "step": 214000 }, { "epoch": 11.16, "learning_rate": 0.0015536828963795255, "loss": 1.1782, "step": 214500 }, { "epoch": 11.18, "learning_rate": 0.0015526425301706202, "loss": 1.1771, "step": 215000 }, { "epoch": 11.21, "learning_rate": 0.0015516021639617147, "loss": 1.1778, "step": 215500 }, { "epoch": 11.24, "learning_rate": 0.0015505617977528091, "loss": 1.1767, "step": 216000 }, { "epoch": 11.26, "learning_rate": 0.0015495214315439034, "loss": 1.1781, "step": 216500 }, { "epoch": 11.29, "learning_rate": 0.0015484810653349978, "loss": 1.1781, "step": 217000 }, { "epoch": 11.31, "learning_rate": 0.0015474406991260923, "loss": 1.179, "step": 217500 }, { "epoch": 11.34, "learning_rate": 0.001546400332917187, "loss": 1.1775, "step": 218000 }, { "epoch": 11.37, "learning_rate": 0.0015453599667082814, "loss": 1.1799, "step": 218500 }, { "epoch": 11.39, "learning_rate": 0.0015443196004993759, "loss": 1.1773, "step": 219000 }, { "epoch": 11.42, "learning_rate": 0.0015432792342904703, "loss": 1.1786, "step": 219500 }, { "epoch": 11.44, "learning_rate": 0.0015422388680815646, "loss": 1.1766, "step": 220000 }, { "epoch": 11.47, "learning_rate": 0.0015411985018726592, "loss": 1.1793, "step": 220500 }, { "epoch": 11.5, "learning_rate": 0.0015401581356637537, "loss": 1.1785, "step": 221000 }, { "epoch": 11.52, "learning_rate": 0.0015391177694548481, "loss": 1.1787, "step": 221500 }, { "epoch": 11.55, "learning_rate": 0.0015380774032459426, "loss": 1.1786, "step": 222000 }, { "epoch": 11.57, "learning_rate": 0.001537037037037037, "loss": 1.1796, "step": 222500 }, { "epoch": 11.6, "learning_rate": 0.0015359966708281315, "loss": 1.1785, "step": 223000 }, { "epoch": 11.63, "learning_rate": 0.0015349563046192262, "loss": 1.1789, "step": 223500 }, { "epoch": 11.65, "learning_rate": 0.0015339159384103204, "loss": 1.1775, "step": 224000 }, { "epoch": 11.68, "learning_rate": 0.0015328755722014149, "loss": 1.181, "step": 224500 }, { "epoch": 11.7, "learning_rate": 0.0015318352059925093, "loss": 1.1781, "step": 225000 }, { "epoch": 11.73, "learning_rate": 0.0015307948397836038, "loss": 1.1791, "step": 225500 }, { "epoch": 11.76, "learning_rate": 0.0015297544735746985, "loss": 1.1797, "step": 226000 }, { "epoch": 11.78, "learning_rate": 0.001528714107365793, "loss": 1.1779, "step": 226500 }, { "epoch": 11.81, "learning_rate": 0.0015276737411568874, "loss": 1.1773, "step": 227000 }, { "epoch": 11.83, "learning_rate": 0.0015266333749479816, "loss": 1.178, "step": 227500 }, { "epoch": 11.86, "learning_rate": 0.001525593008739076, "loss": 1.179, "step": 228000 }, { "epoch": 11.89, "learning_rate": 0.0015245526425301705, "loss": 1.1782, "step": 228500 }, { "epoch": 11.91, "learning_rate": 0.0015235122763212652, "loss": 1.1796, "step": 229000 }, { "epoch": 11.94, "learning_rate": 0.0015224719101123597, "loss": 1.1771, "step": 229500 }, { "epoch": 11.96, "learning_rate": 0.0015214315439034541, "loss": 1.179, "step": 230000 }, { "epoch": 11.99, "learning_rate": 0.0015203911776945486, "loss": 1.1791, "step": 230500 }, { "epoch": 12.0, "eval_loss": 1.1990782022476196, "eval_runtime": 0.5976, "eval_samples_per_second": 1673.372, "eval_steps_per_second": 3.347, "step": 230688 }, { "epoch": 12.02, "learning_rate": 0.0015193508114856428, "loss": 1.1745, "step": 231000 }, { "epoch": 12.04, "learning_rate": 0.0015183104452767373, "loss": 1.1744, "step": 231500 }, { "epoch": 12.07, "learning_rate": 0.001517270079067832, "loss": 1.1743, "step": 232000 }, { "epoch": 12.09, "learning_rate": 0.0015162297128589264, "loss": 1.1758, "step": 232500 }, { "epoch": 12.12, "learning_rate": 0.0015151893466500209, "loss": 1.1738, "step": 233000 }, { "epoch": 12.15, "learning_rate": 0.0015141489804411153, "loss": 1.1752, "step": 233500 }, { "epoch": 12.17, "learning_rate": 0.0015131086142322098, "loss": 1.1753, "step": 234000 }, { "epoch": 12.2, "learning_rate": 0.0015120682480233042, "loss": 1.1764, "step": 234500 }, { "epoch": 12.22, "learning_rate": 0.0015110278818143987, "loss": 1.174, "step": 235000 }, { "epoch": 12.25, "learning_rate": 0.0015099875156054932, "loss": 1.176, "step": 235500 }, { "epoch": 12.28, "learning_rate": 0.0015089471493965876, "loss": 1.176, "step": 236000 }, { "epoch": 12.3, "learning_rate": 0.001507906783187682, "loss": 1.1758, "step": 236500 }, { "epoch": 12.33, "learning_rate": 0.0015068664169787765, "loss": 1.1747, "step": 237000 }, { "epoch": 12.35, "learning_rate": 0.0015058260507698712, "loss": 1.1752, "step": 237500 }, { "epoch": 12.38, "learning_rate": 0.0015047856845609654, "loss": 1.1761, "step": 238000 }, { "epoch": 12.41, "learning_rate": 0.00150374531835206, "loss": 1.1771, "step": 238500 }, { "epoch": 12.43, "learning_rate": 0.0015027049521431544, "loss": 1.1764, "step": 239000 }, { "epoch": 12.46, "learning_rate": 0.0015016645859342488, "loss": 1.1754, "step": 239500 }, { "epoch": 12.48, "learning_rate": 0.0015006242197253433, "loss": 1.1763, "step": 240000 }, { "epoch": 12.51, "learning_rate": 0.001499583853516438, "loss": 1.1751, "step": 240500 }, { "epoch": 12.54, "learning_rate": 0.0014985434873075324, "loss": 1.177, "step": 241000 }, { "epoch": 12.56, "learning_rate": 0.0014975031210986266, "loss": 1.178, "step": 241500 }, { "epoch": 12.59, "learning_rate": 0.001496462754889721, "loss": 1.178, "step": 242000 }, { "epoch": 12.61, "learning_rate": 0.0014954223886808156, "loss": 1.1761, "step": 242500 }, { "epoch": 12.64, "learning_rate": 0.0014943820224719102, "loss": 1.1763, "step": 243000 }, { "epoch": 12.67, "learning_rate": 0.0014933416562630047, "loss": 1.1781, "step": 243500 }, { "epoch": 12.69, "learning_rate": 0.0014923012900540991, "loss": 1.1773, "step": 244000 }, { "epoch": 12.72, "learning_rate": 0.0014912609238451936, "loss": 1.176, "step": 244500 }, { "epoch": 12.74, "learning_rate": 0.0014902205576362878, "loss": 1.1762, "step": 245000 }, { "epoch": 12.77, "learning_rate": 0.0014891801914273823, "loss": 1.1776, "step": 245500 }, { "epoch": 12.8, "learning_rate": 0.001488139825218477, "loss": 1.1774, "step": 246000 }, { "epoch": 12.82, "learning_rate": 0.0014870994590095714, "loss": 1.1759, "step": 246500 }, { "epoch": 12.85, "learning_rate": 0.0014860590928006659, "loss": 1.1783, "step": 247000 }, { "epoch": 12.87, "learning_rate": 0.0014850187265917603, "loss": 1.1757, "step": 247500 }, { "epoch": 12.9, "learning_rate": 0.0014839783603828548, "loss": 1.1769, "step": 248000 }, { "epoch": 12.93, "learning_rate": 0.0014829379941739495, "loss": 1.1749, "step": 248500 }, { "epoch": 12.95, "learning_rate": 0.0014818976279650437, "loss": 1.1751, "step": 249000 }, { "epoch": 12.98, "learning_rate": 0.0014808572617561382, "loss": 1.1757, "step": 249500 }, { "epoch": 13.0, "eval_loss": 1.198763132095337, "eval_runtime": 0.6114, "eval_samples_per_second": 1635.549, "eval_steps_per_second": 3.271, "step": 249912 }, { "epoch": 13.0, "learning_rate": 0.0014798168955472326, "loss": 1.1756, "step": 250000 }, { "epoch": 13.03, "learning_rate": 0.001478776529338327, "loss": 1.1712, "step": 250500 }, { "epoch": 13.06, "learning_rate": 0.0014777361631294215, "loss": 1.1714, "step": 251000 }, { "epoch": 13.08, "learning_rate": 0.0014766957969205162, "loss": 1.1724, "step": 251500 }, { "epoch": 13.11, "learning_rate": 0.0014756554307116107, "loss": 1.1736, "step": 252000 }, { "epoch": 13.13, "learning_rate": 0.001474615064502705, "loss": 1.1739, "step": 252500 }, { "epoch": 13.16, "learning_rate": 0.0014735746982937994, "loss": 1.1737, "step": 253000 }, { "epoch": 13.19, "learning_rate": 0.0014725343320848938, "loss": 1.1732, "step": 253500 }, { "epoch": 13.21, "learning_rate": 0.0014714939658759883, "loss": 1.1725, "step": 254000 }, { "epoch": 13.24, "learning_rate": 0.001470453599667083, "loss": 1.1737, "step": 254500 }, { "epoch": 13.26, "learning_rate": 0.0014694132334581774, "loss": 1.1737, "step": 255000 }, { "epoch": 13.29, "learning_rate": 0.0014683728672492719, "loss": 1.1733, "step": 255500 }, { "epoch": 13.32, "learning_rate": 0.0014673325010403661, "loss": 1.1729, "step": 256000 }, { "epoch": 13.34, "learning_rate": 0.0014662921348314606, "loss": 1.1742, "step": 256500 }, { "epoch": 13.37, "learning_rate": 0.0014652517686225552, "loss": 1.174, "step": 257000 }, { "epoch": 13.39, "learning_rate": 0.0014642114024136497, "loss": 1.1749, "step": 257500 }, { "epoch": 13.42, "learning_rate": 0.0014631710362047442, "loss": 1.1735, "step": 258000 }, { "epoch": 13.45, "learning_rate": 0.0014621306699958386, "loss": 1.1724, "step": 258500 }, { "epoch": 13.47, "learning_rate": 0.001461090303786933, "loss": 1.1743, "step": 259000 }, { "epoch": 13.5, "learning_rate": 0.0014600499375780273, "loss": 1.1755, "step": 259500 }, { "epoch": 13.52, "learning_rate": 0.001459009571369122, "loss": 1.1759, "step": 260000 }, { "epoch": 13.55, "learning_rate": 0.0014579692051602164, "loss": 1.1749, "step": 260500 }, { "epoch": 13.58, "learning_rate": 0.001456928838951311, "loss": 1.1756, "step": 261000 }, { "epoch": 13.6, "learning_rate": 0.0014558884727424054, "loss": 1.1747, "step": 261500 }, { "epoch": 13.63, "learning_rate": 0.0014548481065334998, "loss": 1.1745, "step": 262000 }, { "epoch": 13.65, "learning_rate": 0.0014538077403245945, "loss": 1.1736, "step": 262500 }, { "epoch": 13.68, "learning_rate": 0.0014527673741156887, "loss": 1.1749, "step": 263000 }, { "epoch": 13.71, "learning_rate": 0.0014517270079067832, "loss": 1.1747, "step": 263500 }, { "epoch": 13.73, "learning_rate": 0.0014506866416978776, "loss": 1.1735, "step": 264000 }, { "epoch": 13.76, "learning_rate": 0.001449646275488972, "loss": 1.1736, "step": 264500 }, { "epoch": 13.78, "learning_rate": 0.0014486059092800666, "loss": 1.1741, "step": 265000 }, { "epoch": 13.81, "learning_rate": 0.0014475655430711612, "loss": 1.1756, "step": 265500 }, { "epoch": 13.84, "learning_rate": 0.0014465251768622557, "loss": 1.1746, "step": 266000 }, { "epoch": 13.86, "learning_rate": 0.00144548481065335, "loss": 1.176, "step": 266500 }, { "epoch": 13.89, "learning_rate": 0.0014444444444444444, "loss": 1.1746, "step": 267000 }, { "epoch": 13.91, "learning_rate": 0.0014434040782355388, "loss": 1.1746, "step": 267500 }, { "epoch": 13.94, "learning_rate": 0.0014423637120266333, "loss": 1.1741, "step": 268000 }, { "epoch": 13.97, "learning_rate": 0.001441323345817728, "loss": 1.173, "step": 268500 }, { "epoch": 13.99, "learning_rate": 0.0014402829796088224, "loss": 1.1741, "step": 269000 }, { "epoch": 14.0, "eval_loss": 1.199006199836731, "eval_runtime": 0.8396, "eval_samples_per_second": 1191.026, "eval_steps_per_second": 2.382, "step": 269136 }, { "epoch": 14.02, "learning_rate": 0.0014392426133999169, "loss": 1.1715, "step": 269500 }, { "epoch": 14.04, "learning_rate": 0.0014382022471910111, "loss": 1.1707, "step": 270000 }, { "epoch": 14.07, "learning_rate": 0.0014371618809821056, "loss": 1.171, "step": 270500 }, { "epoch": 14.1, "learning_rate": 0.0014361215147732003, "loss": 1.1708, "step": 271000 }, { "epoch": 14.12, "learning_rate": 0.0014350811485642947, "loss": 1.1715, "step": 271500 }, { "epoch": 14.15, "learning_rate": 0.0014340407823553892, "loss": 1.171, "step": 272000 }, { "epoch": 14.17, "learning_rate": 0.0014330004161464836, "loss": 1.1721, "step": 272500 }, { "epoch": 14.2, "learning_rate": 0.001431960049937578, "loss": 1.1717, "step": 273000 }, { "epoch": 14.23, "learning_rate": 0.0014309196837286725, "loss": 1.1715, "step": 273500 }, { "epoch": 14.25, "learning_rate": 0.001429879317519767, "loss": 1.171, "step": 274000 }, { "epoch": 14.28, "learning_rate": 0.0014288389513108614, "loss": 1.1712, "step": 274500 }, { "epoch": 14.31, "learning_rate": 0.001427798585101956, "loss": 1.1726, "step": 275000 }, { "epoch": 14.33, "learning_rate": 0.0014267582188930504, "loss": 1.173, "step": 275500 }, { "epoch": 14.36, "learning_rate": 0.0014257178526841448, "loss": 1.1709, "step": 276000 }, { "epoch": 14.38, "learning_rate": 0.0014246774864752393, "loss": 1.1728, "step": 276500 }, { "epoch": 14.41, "learning_rate": 0.001423637120266334, "loss": 1.1721, "step": 277000 }, { "epoch": 14.44, "learning_rate": 0.0014225967540574282, "loss": 1.1732, "step": 277500 }, { "epoch": 14.46, "learning_rate": 0.0014215563878485226, "loss": 1.1718, "step": 278000 }, { "epoch": 14.49, "learning_rate": 0.001420516021639617, "loss": 1.1716, "step": 278500 }, { "epoch": 14.51, "learning_rate": 0.0014194756554307116, "loss": 1.1735, "step": 279000 }, { "epoch": 14.54, "learning_rate": 0.0014184352892218062, "loss": 1.1731, "step": 279500 }, { "epoch": 14.57, "learning_rate": 0.0014173949230129007, "loss": 1.1724, "step": 280000 }, { "epoch": 14.59, "learning_rate": 0.0014163545568039951, "loss": 1.1717, "step": 280500 }, { "epoch": 14.62, "learning_rate": 0.0014153141905950894, "loss": 1.1714, "step": 281000 }, { "epoch": 14.64, "learning_rate": 0.0014142738243861838, "loss": 1.1725, "step": 281500 }, { "epoch": 14.67, "learning_rate": 0.0014132334581772783, "loss": 1.1726, "step": 282000 }, { "epoch": 14.7, "learning_rate": 0.001412193091968373, "loss": 1.1731, "step": 282500 }, { "epoch": 14.72, "learning_rate": 0.0014111527257594674, "loss": 1.1727, "step": 283000 }, { "epoch": 14.75, "learning_rate": 0.0014101123595505619, "loss": 1.1724, "step": 283500 }, { "epoch": 14.77, "learning_rate": 0.0014090719933416563, "loss": 1.1715, "step": 284000 }, { "epoch": 14.8, "learning_rate": 0.0014080316271327506, "loss": 1.173, "step": 284500 }, { "epoch": 14.83, "learning_rate": 0.0014069912609238453, "loss": 1.1727, "step": 285000 }, { "epoch": 14.85, "learning_rate": 0.0014059508947149397, "loss": 1.1744, "step": 285500 }, { "epoch": 14.88, "learning_rate": 0.0014049105285060342, "loss": 1.1709, "step": 286000 }, { "epoch": 14.9, "learning_rate": 0.0014038701622971286, "loss": 1.1727, "step": 286500 }, { "epoch": 14.93, "learning_rate": 0.001402829796088223, "loss": 1.1733, "step": 287000 }, { "epoch": 14.96, "learning_rate": 0.0014017894298793175, "loss": 1.1736, "step": 287500 }, { "epoch": 14.98, "learning_rate": 0.001400749063670412, "loss": 1.1734, "step": 288000 }, { "epoch": 15.0, "eval_loss": 1.1976137161254883, "eval_runtime": 0.6131, "eval_samples_per_second": 1631.003, "eval_steps_per_second": 3.262, "step": 288360 }, { "epoch": 15.01, "learning_rate": 0.0013997086974615065, "loss": 1.1707, "step": 288500 }, { "epoch": 15.03, "learning_rate": 0.001398668331252601, "loss": 1.1673, "step": 289000 }, { "epoch": 15.06, "learning_rate": 0.0013976279650436954, "loss": 1.1696, "step": 289500 }, { "epoch": 15.09, "learning_rate": 0.0013965875988347898, "loss": 1.169, "step": 290000 }, { "epoch": 15.11, "learning_rate": 0.0013955472326258843, "loss": 1.1689, "step": 290500 }, { "epoch": 15.14, "learning_rate": 0.001394506866416979, "loss": 1.1702, "step": 291000 }, { "epoch": 15.16, "learning_rate": 0.0013934665002080732, "loss": 1.1687, "step": 291500 }, { "epoch": 15.19, "learning_rate": 0.0013924261339991677, "loss": 1.1688, "step": 292000 }, { "epoch": 15.22, "learning_rate": 0.0013913857677902621, "loss": 1.1693, "step": 292500 }, { "epoch": 15.24, "learning_rate": 0.0013903454015813566, "loss": 1.1703, "step": 293000 }, { "epoch": 15.27, "learning_rate": 0.0013893050353724512, "loss": 1.1719, "step": 293500 }, { "epoch": 15.29, "learning_rate": 0.0013882646691635457, "loss": 1.1701, "step": 294000 }, { "epoch": 15.32, "learning_rate": 0.0013872243029546402, "loss": 1.1707, "step": 294500 }, { "epoch": 15.35, "learning_rate": 0.0013861839367457346, "loss": 1.1708, "step": 295000 }, { "epoch": 15.37, "learning_rate": 0.0013851435705368289, "loss": 1.1716, "step": 295500 }, { "epoch": 15.4, "learning_rate": 0.0013841032043279233, "loss": 1.1716, "step": 296000 }, { "epoch": 15.42, "learning_rate": 0.001383062838119018, "loss": 1.1707, "step": 296500 }, { "epoch": 15.45, "learning_rate": 0.0013820224719101124, "loss": 1.1708, "step": 297000 }, { "epoch": 15.48, "learning_rate": 0.001380982105701207, "loss": 1.1691, "step": 297500 }, { "epoch": 15.5, "learning_rate": 0.0013799417394923014, "loss": 1.1725, "step": 298000 }, { "epoch": 15.53, "learning_rate": 0.0013789013732833958, "loss": 1.1697, "step": 298500 }, { "epoch": 15.55, "learning_rate": 0.0013778610070744903, "loss": 1.1715, "step": 299000 }, { "epoch": 15.58, "learning_rate": 0.0013768206408655847, "loss": 1.1713, "step": 299500 }, { "epoch": 15.61, "learning_rate": 0.0013757802746566792, "loss": 1.1708, "step": 300000 }, { "epoch": 15.63, "learning_rate": 0.0013747399084477736, "loss": 1.1705, "step": 300500 }, { "epoch": 15.66, "learning_rate": 0.001373699542238868, "loss": 1.1712, "step": 301000 }, { "epoch": 15.68, "learning_rate": 0.0013726591760299626, "loss": 1.1712, "step": 301500 }, { "epoch": 15.71, "learning_rate": 0.0013716188098210572, "loss": 1.1715, "step": 302000 }, { "epoch": 15.74, "learning_rate": 0.0013705784436121515, "loss": 1.1709, "step": 302500 }, { "epoch": 15.76, "learning_rate": 0.001369538077403246, "loss": 1.1722, "step": 303000 }, { "epoch": 15.79, "learning_rate": 0.0013684977111943404, "loss": 1.1718, "step": 303500 }, { "epoch": 15.81, "learning_rate": 0.0013674573449854348, "loss": 1.1708, "step": 304000 }, { "epoch": 15.84, "learning_rate": 0.0013664169787765293, "loss": 1.1723, "step": 304500 }, { "epoch": 15.87, "learning_rate": 0.001365376612567624, "loss": 1.1702, "step": 305000 }, { "epoch": 15.89, "learning_rate": 0.0013643362463587184, "loss": 1.1714, "step": 305500 }, { "epoch": 15.92, "learning_rate": 0.0013632958801498127, "loss": 1.1701, "step": 306000 }, { "epoch": 15.94, "learning_rate": 0.0013622555139409071, "loss": 1.1709, "step": 306500 }, { "epoch": 15.97, "learning_rate": 0.0013612151477320016, "loss": 1.1697, "step": 307000 }, { "epoch": 16.0, "learning_rate": 0.0013601747815230963, "loss": 1.1709, "step": 307500 }, { "epoch": 16.0, "eval_loss": 1.1968414783477783, "eval_runtime": 0.6109, "eval_samples_per_second": 1636.861, "eval_steps_per_second": 3.274, "step": 307584 } ], "logging_steps": 500, "max_steps": 961200, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1.2378919133169423e+18, "train_batch_size": 512, "trial_name": null, "trial_params": null }