{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.0, "eval_steps": 500, "global_step": 1231340, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "learning_rate": 0.0003, "loss": 3.0982, "step": 2000 }, { "epoch": 0.09, "learning_rate": 0.0002995282277087592, "loss": 1.1999, "step": 4000 }, { "epoch": 0.14, "learning_rate": 0.00029905645541751844, "loss": 1.0995, "step": 6000 }, { "epoch": 0.19, "learning_rate": 0.0002985846831262777, "loss": 1.0716, "step": 8000 }, { "epoch": 0.24, "learning_rate": 0.0002981129108350369, "loss": 1.0664, "step": 10000 }, { "epoch": 0.28, "learning_rate": 0.0002976411385437962, "loss": 1.0223, "step": 12000 }, { "epoch": 0.33, "learning_rate": 0.00029716936625255537, "loss": 0.9955, "step": 14000 }, { "epoch": 0.38, "learning_rate": 0.00029669759396131466, "loss": 0.9925, "step": 16000 }, { "epoch": 0.42, "learning_rate": 0.0002962258216700739, "loss": 0.9951, "step": 18000 }, { "epoch": 0.47, "learning_rate": 0.0002957540493788331, "loss": 0.9811, "step": 20000 }, { "epoch": 0.52, "learning_rate": 0.00029528227708759236, "loss": 0.9727, "step": 22000 }, { "epoch": 0.57, "learning_rate": 0.0002948105047963516, "loss": 0.9655, "step": 24000 }, { "epoch": 0.61, "learning_rate": 0.0002943387325051109, "loss": 0.959, "step": 26000 }, { "epoch": 0.66, "learning_rate": 0.00029386696021387005, "loss": 0.9591, "step": 28000 }, { "epoch": 0.71, "learning_rate": 0.00029339518792262934, "loss": 0.9455, "step": 30000 }, { "epoch": 0.75, "learning_rate": 0.0002929234156313886, "loss": 0.9425, "step": 32000 }, { "epoch": 0.8, "learning_rate": 0.0002924516433401478, "loss": 0.9373, "step": 34000 }, { "epoch": 0.85, "learning_rate": 0.00029197987104890704, "loss": 0.9341, "step": 36000 }, { "epoch": 0.89, "learning_rate": 0.00029150809875766627, "loss": 0.93, "step": 38000 }, { "epoch": 0.94, "learning_rate": 0.0002910363264664255, "loss": 0.9184, "step": 40000 }, { "epoch": 0.99, "learning_rate": 0.00029056455417518474, "loss": 0.9222, "step": 42000 }, { "epoch": 1.04, "learning_rate": 0.000290092781883944, "loss": 0.8941, "step": 44000 }, { "epoch": 1.08, "learning_rate": 0.0002896210095927032, "loss": 0.8868, "step": 46000 }, { "epoch": 1.13, "learning_rate": 0.0002891492373014625, "loss": 0.8894, "step": 48000 }, { "epoch": 1.18, "learning_rate": 0.0002886774650102217, "loss": 0.8799, "step": 50000 }, { "epoch": 1.22, "learning_rate": 0.00028820569271898095, "loss": 0.8781, "step": 52000 }, { "epoch": 1.27, "learning_rate": 0.0002877339204277402, "loss": 0.8853, "step": 54000 }, { "epoch": 1.32, "learning_rate": 0.0002872621481364994, "loss": 0.8829, "step": 56000 }, { "epoch": 1.37, "learning_rate": 0.00028679037584525865, "loss": 0.8747, "step": 58000 }, { "epoch": 1.41, "learning_rate": 0.0002863186035540179, "loss": 0.8806, "step": 60000 }, { "epoch": 1.46, "learning_rate": 0.00028584683126277717, "loss": 0.8868, "step": 62000 }, { "epoch": 1.51, "learning_rate": 0.00028537505897153635, "loss": 0.8806, "step": 64000 }, { "epoch": 1.55, "learning_rate": 0.00028490328668029564, "loss": 0.8771, "step": 66000 }, { "epoch": 1.6, "learning_rate": 0.00028443151438905487, "loss": 0.8648, "step": 68000 }, { "epoch": 1.65, "learning_rate": 0.0002839597420978141, "loss": 0.8747, "step": 70000 }, { "epoch": 1.7, "learning_rate": 0.00028348796980657334, "loss": 0.8719, "step": 72000 }, { "epoch": 1.74, "learning_rate": 0.00028301619751533257, "loss": 0.8641, "step": 74000 }, { "epoch": 1.79, "learning_rate": 0.0002825444252240918, "loss": 0.8555, "step": 76000 }, { "epoch": 1.84, "learning_rate": 0.00028207265293285103, "loss": 0.8583, "step": 78000 }, { "epoch": 1.88, "learning_rate": 0.0002816008806416103, "loss": 0.8624, "step": 80000 }, { "epoch": 1.93, "learning_rate": 0.0002811291083503695, "loss": 0.8578, "step": 82000 }, { "epoch": 1.98, "learning_rate": 0.0002806573360591288, "loss": 0.8474, "step": 84000 }, { "epoch": 2.03, "learning_rate": 0.000280185563767888, "loss": 0.8366, "step": 86000 }, { "epoch": 2.07, "learning_rate": 0.00027971379147664725, "loss": 0.8226, "step": 88000 }, { "epoch": 2.12, "learning_rate": 0.0002792420191854065, "loss": 0.8267, "step": 90000 }, { "epoch": 2.17, "learning_rate": 0.0002787702468941657, "loss": 0.8231, "step": 92000 }, { "epoch": 2.21, "learning_rate": 0.00027829847460292495, "loss": 0.8284, "step": 94000 }, { "epoch": 2.26, "learning_rate": 0.0002778267023116842, "loss": 0.8153, "step": 96000 }, { "epoch": 2.31, "learning_rate": 0.00027735493002044347, "loss": 0.8241, "step": 98000 }, { "epoch": 2.36, "learning_rate": 0.00027688315772920265, "loss": 0.8246, "step": 100000 }, { "epoch": 2.4, "learning_rate": 0.00027641138543796194, "loss": 0.8219, "step": 102000 }, { "epoch": 2.45, "learning_rate": 0.00027593961314672117, "loss": 0.8244, "step": 104000 }, { "epoch": 2.5, "learning_rate": 0.0002754678408554804, "loss": 0.8201, "step": 106000 }, { "epoch": 2.54, "learning_rate": 0.00027499606856423963, "loss": 0.8193, "step": 108000 }, { "epoch": 2.59, "learning_rate": 0.00027452429627299887, "loss": 0.8225, "step": 110000 }, { "epoch": 2.64, "learning_rate": 0.0002740525239817581, "loss": 0.8105, "step": 112000 }, { "epoch": 2.68, "learning_rate": 0.00027358075169051733, "loss": 0.82, "step": 114000 }, { "epoch": 2.73, "learning_rate": 0.0002731089793992766, "loss": 0.8191, "step": 116000 }, { "epoch": 2.78, "learning_rate": 0.00027263720710803585, "loss": 0.8102, "step": 118000 }, { "epoch": 2.83, "learning_rate": 0.0002721654348167951, "loss": 0.8189, "step": 120000 }, { "epoch": 2.87, "learning_rate": 0.0002716936625255543, "loss": 0.8128, "step": 122000 }, { "epoch": 2.92, "learning_rate": 0.00027122189023431355, "loss": 0.815, "step": 124000 }, { "epoch": 2.97, "learning_rate": 0.0002707501179430728, "loss": 0.8123, "step": 126000 }, { "epoch": 3.01, "learning_rate": 0.000270278345651832, "loss": 0.7986, "step": 128000 }, { "epoch": 3.06, "learning_rate": 0.00026980657336059125, "loss": 0.7701, "step": 130000 }, { "epoch": 3.11, "learning_rate": 0.0002693348010693505, "loss": 0.7819, "step": 132000 }, { "epoch": 3.16, "learning_rate": 0.00026886302877810977, "loss": 0.7826, "step": 134000 }, { "epoch": 3.2, "learning_rate": 0.000268391256486869, "loss": 0.7868, "step": 136000 }, { "epoch": 3.25, "learning_rate": 0.00026791948419562823, "loss": 0.7843, "step": 138000 }, { "epoch": 3.3, "learning_rate": 0.00026744771190438747, "loss": 0.7841, "step": 140000 }, { "epoch": 3.34, "learning_rate": 0.0002669759396131467, "loss": 0.7845, "step": 142000 }, { "epoch": 3.39, "learning_rate": 0.00026650416732190593, "loss": 0.7859, "step": 144000 }, { "epoch": 3.44, "learning_rate": 0.00026603239503066516, "loss": 0.7746, "step": 146000 }, { "epoch": 3.49, "learning_rate": 0.0002655606227394244, "loss": 0.7749, "step": 148000 }, { "epoch": 3.53, "learning_rate": 0.00026508885044818363, "loss": 0.7747, "step": 150000 }, { "epoch": 3.58, "learning_rate": 0.0002646170781569429, "loss": 0.7816, "step": 152000 }, { "epoch": 3.63, "learning_rate": 0.00026414530586570215, "loss": 0.7807, "step": 154000 }, { "epoch": 3.67, "learning_rate": 0.0002636735335744614, "loss": 0.7817, "step": 156000 }, { "epoch": 3.72, "learning_rate": 0.0002632017612832206, "loss": 0.787, "step": 158000 }, { "epoch": 3.77, "learning_rate": 0.00026272998899197985, "loss": 0.7796, "step": 160000 }, { "epoch": 3.82, "learning_rate": 0.0002622582167007391, "loss": 0.7662, "step": 162000 }, { "epoch": 3.86, "learning_rate": 0.0002617864444094983, "loss": 0.7898, "step": 164000 }, { "epoch": 3.91, "learning_rate": 0.00026131467211825755, "loss": 0.7705, "step": 166000 }, { "epoch": 3.96, "learning_rate": 0.0002608428998270168, "loss": 0.7876, "step": 168000 }, { "epoch": 4.0, "learning_rate": 0.00026037112753577607, "loss": 0.7747, "step": 170000 }, { "epoch": 4.05, "learning_rate": 0.0002598993552445353, "loss": 0.7517, "step": 172000 }, { "epoch": 4.1, "learning_rate": 0.00025942758295329453, "loss": 0.7465, "step": 174000 }, { "epoch": 4.15, "learning_rate": 0.00025895581066205376, "loss": 0.7485, "step": 176000 }, { "epoch": 4.19, "learning_rate": 0.000258484038370813, "loss": 0.7531, "step": 178000 }, { "epoch": 4.24, "learning_rate": 0.00025801226607957223, "loss": 0.7521, "step": 180000 }, { "epoch": 4.29, "learning_rate": 0.00025754049378833146, "loss": 0.7477, "step": 182000 }, { "epoch": 4.33, "learning_rate": 0.0002570687214970907, "loss": 0.7621, "step": 184000 }, { "epoch": 4.38, "learning_rate": 0.00025659694920585, "loss": 0.7552, "step": 186000 }, { "epoch": 4.43, "learning_rate": 0.0002561251769146092, "loss": 0.7592, "step": 188000 }, { "epoch": 4.47, "learning_rate": 0.00025565340462336845, "loss": 0.7508, "step": 190000 }, { "epoch": 4.52, "learning_rate": 0.0002551816323321277, "loss": 0.7547, "step": 192000 }, { "epoch": 4.57, "learning_rate": 0.0002547098600408869, "loss": 0.7439, "step": 194000 }, { "epoch": 4.62, "learning_rate": 0.00025423808774964615, "loss": 0.762, "step": 196000 }, { "epoch": 4.66, "learning_rate": 0.0002537663154584054, "loss": 0.7507, "step": 198000 }, { "epoch": 4.71, "learning_rate": 0.0002532945431671646, "loss": 0.7553, "step": 200000 }, { "epoch": 4.76, "learning_rate": 0.00025282277087592384, "loss": 0.7498, "step": 202000 }, { "epoch": 4.8, "learning_rate": 0.00025235099858468313, "loss": 0.7466, "step": 204000 }, { "epoch": 4.85, "learning_rate": 0.0002518792262934423, "loss": 0.7496, "step": 206000 }, { "epoch": 4.9, "learning_rate": 0.0002514074540022016, "loss": 0.7406, "step": 208000 }, { "epoch": 4.95, "learning_rate": 0.00025093568171096083, "loss": 0.7447, "step": 210000 }, { "epoch": 4.99, "learning_rate": 0.00025046390941972006, "loss": 0.7532, "step": 212000 }, { "epoch": 5.04, "learning_rate": 0.0002499921371284793, "loss": 0.7339, "step": 214000 }, { "epoch": 5.09, "learning_rate": 0.00024952036483723853, "loss": 0.7214, "step": 216000 }, { "epoch": 5.13, "learning_rate": 0.00024904859254599776, "loss": 0.7227, "step": 218000 }, { "epoch": 5.18, "learning_rate": 0.000248576820254757, "loss": 0.7344, "step": 220000 }, { "epoch": 5.23, "learning_rate": 0.0002481050479635163, "loss": 0.7272, "step": 222000 }, { "epoch": 5.28, "learning_rate": 0.00024763327567227546, "loss": 0.7271, "step": 224000 }, { "epoch": 5.32, "learning_rate": 0.00024716150338103475, "loss": 0.7302, "step": 226000 }, { "epoch": 5.37, "learning_rate": 0.000246689731089794, "loss": 0.7218, "step": 228000 }, { "epoch": 5.42, "learning_rate": 0.0002462179587985532, "loss": 0.7242, "step": 230000 }, { "epoch": 5.46, "learning_rate": 0.00024574618650731244, "loss": 0.7306, "step": 232000 }, { "epoch": 5.51, "learning_rate": 0.0002452744142160717, "loss": 0.7351, "step": 234000 }, { "epoch": 5.56, "learning_rate": 0.0002448026419248309, "loss": 0.7225, "step": 236000 }, { "epoch": 5.61, "learning_rate": 0.00024433086963359014, "loss": 0.7256, "step": 238000 }, { "epoch": 5.65, "learning_rate": 0.0002438590973423494, "loss": 0.7256, "step": 240000 }, { "epoch": 5.7, "learning_rate": 0.00024338732505110863, "loss": 0.7293, "step": 242000 }, { "epoch": 5.75, "learning_rate": 0.0002429155527598679, "loss": 0.7272, "step": 244000 }, { "epoch": 5.79, "learning_rate": 0.00024244378046862713, "loss": 0.7154, "step": 246000 }, { "epoch": 5.84, "learning_rate": 0.00024197200817738636, "loss": 0.7274, "step": 248000 }, { "epoch": 5.89, "learning_rate": 0.00024150023588614562, "loss": 0.7184, "step": 250000 }, { "epoch": 5.93, "learning_rate": 0.00024102846359490483, "loss": 0.7218, "step": 252000 }, { "epoch": 5.98, "learning_rate": 0.00024055669130366409, "loss": 0.7246, "step": 254000 }, { "epoch": 6.03, "learning_rate": 0.00024008491901242332, "loss": 0.7036, "step": 256000 }, { "epoch": 6.08, "learning_rate": 0.00023961314672118255, "loss": 0.6992, "step": 258000 }, { "epoch": 6.12, "learning_rate": 0.00023914137442994178, "loss": 0.7029, "step": 260000 }, { "epoch": 6.17, "learning_rate": 0.00023866960213870104, "loss": 0.7097, "step": 262000 }, { "epoch": 6.22, "learning_rate": 0.00023819782984746028, "loss": 0.7042, "step": 264000 }, { "epoch": 6.26, "learning_rate": 0.0002377260575562195, "loss": 0.7076, "step": 266000 }, { "epoch": 6.31, "learning_rate": 0.00023725428526497877, "loss": 0.6972, "step": 268000 }, { "epoch": 6.36, "learning_rate": 0.00023678251297373797, "loss": 0.7097, "step": 270000 }, { "epoch": 6.41, "learning_rate": 0.00023631074068249723, "loss": 0.7025, "step": 272000 }, { "epoch": 6.45, "learning_rate": 0.00023583896839125647, "loss": 0.7041, "step": 274000 }, { "epoch": 6.5, "learning_rate": 0.0002353671961000157, "loss": 0.7098, "step": 276000 }, { "epoch": 6.55, "learning_rate": 0.00023489542380877493, "loss": 0.704, "step": 278000 }, { "epoch": 6.59, "learning_rate": 0.0002344236515175342, "loss": 0.7046, "step": 280000 }, { "epoch": 6.64, "learning_rate": 0.00023395187922629343, "loss": 0.701, "step": 282000 }, { "epoch": 6.69, "learning_rate": 0.00023348010693505266, "loss": 0.7107, "step": 284000 }, { "epoch": 6.74, "learning_rate": 0.00023300833464381192, "loss": 0.7107, "step": 286000 }, { "epoch": 6.78, "learning_rate": 0.00023253656235257112, "loss": 0.7051, "step": 288000 }, { "epoch": 6.83, "learning_rate": 0.00023206479006133038, "loss": 0.7055, "step": 290000 }, { "epoch": 6.88, "learning_rate": 0.00023159301777008962, "loss": 0.7004, "step": 292000 }, { "epoch": 6.92, "learning_rate": 0.00023112124547884885, "loss": 0.7051, "step": 294000 }, { "epoch": 6.97, "learning_rate": 0.00023064947318760808, "loss": 0.7025, "step": 296000 }, { "epoch": 7.02, "learning_rate": 0.00023017770089636734, "loss": 0.6976, "step": 298000 }, { "epoch": 7.07, "learning_rate": 0.0002297059286051266, "loss": 0.6785, "step": 300000 }, { "epoch": 7.11, "learning_rate": 0.0002292341563138858, "loss": 0.6773, "step": 302000 }, { "epoch": 7.16, "learning_rate": 0.00022876238402264507, "loss": 0.6763, "step": 304000 }, { "epoch": 7.21, "learning_rate": 0.00022829061173140427, "loss": 0.6774, "step": 306000 }, { "epoch": 7.25, "learning_rate": 0.00022781883944016353, "loss": 0.6786, "step": 308000 }, { "epoch": 7.3, "learning_rate": 0.00022734706714892276, "loss": 0.6885, "step": 310000 }, { "epoch": 7.35, "learning_rate": 0.000226875294857682, "loss": 0.6852, "step": 312000 }, { "epoch": 7.4, "learning_rate": 0.00022640352256644123, "loss": 0.6842, "step": 314000 }, { "epoch": 7.44, "learning_rate": 0.0002259317502752005, "loss": 0.6868, "step": 316000 }, { "epoch": 7.49, "learning_rate": 0.00022545997798395975, "loss": 0.6918, "step": 318000 }, { "epoch": 7.54, "learning_rate": 0.00022498820569271896, "loss": 0.6846, "step": 320000 }, { "epoch": 7.58, "learning_rate": 0.00022451643340147822, "loss": 0.6881, "step": 322000 }, { "epoch": 7.63, "learning_rate": 0.00022404466111023742, "loss": 0.69, "step": 324000 }, { "epoch": 7.68, "learning_rate": 0.00022357288881899668, "loss": 0.6823, "step": 326000 }, { "epoch": 7.72, "learning_rate": 0.00022310111652775591, "loss": 0.6896, "step": 328000 }, { "epoch": 7.77, "learning_rate": 0.00022262934423651517, "loss": 0.6862, "step": 330000 }, { "epoch": 7.82, "learning_rate": 0.00022215757194527438, "loss": 0.6858, "step": 332000 }, { "epoch": 7.87, "learning_rate": 0.00022168579965403364, "loss": 0.691, "step": 334000 }, { "epoch": 7.91, "learning_rate": 0.00022121402736279284, "loss": 0.6895, "step": 336000 }, { "epoch": 7.96, "learning_rate": 0.0002207422550715521, "loss": 0.6924, "step": 338000 }, { "epoch": 8.01, "learning_rate": 0.00022027048278031136, "loss": 0.6903, "step": 340000 }, { "epoch": 8.05, "learning_rate": 0.0002197987104890706, "loss": 0.6637, "step": 342000 }, { "epoch": 8.1, "learning_rate": 0.00021932693819782983, "loss": 0.6612, "step": 344000 }, { "epoch": 8.15, "learning_rate": 0.00021885516590658906, "loss": 0.6665, "step": 346000 }, { "epoch": 8.2, "learning_rate": 0.00021838339361534832, "loss": 0.6702, "step": 348000 }, { "epoch": 8.24, "learning_rate": 0.00021791162132410753, "loss": 0.6667, "step": 350000 }, { "epoch": 8.29, "learning_rate": 0.0002174398490328668, "loss": 0.6674, "step": 352000 }, { "epoch": 8.34, "learning_rate": 0.000216968076741626, "loss": 0.6719, "step": 354000 }, { "epoch": 8.38, "learning_rate": 0.00021649630445038525, "loss": 0.6671, "step": 356000 }, { "epoch": 8.43, "learning_rate": 0.0002160245321591445, "loss": 0.6647, "step": 358000 }, { "epoch": 8.48, "learning_rate": 0.00021555275986790375, "loss": 0.6671, "step": 360000 }, { "epoch": 8.53, "learning_rate": 0.00021508098757666298, "loss": 0.6681, "step": 362000 }, { "epoch": 8.57, "learning_rate": 0.0002146092152854222, "loss": 0.6727, "step": 364000 }, { "epoch": 8.62, "learning_rate": 0.00021413744299418147, "loss": 0.6767, "step": 366000 }, { "epoch": 8.67, "learning_rate": 0.00021366567070294068, "loss": 0.6749, "step": 368000 }, { "epoch": 8.71, "learning_rate": 0.00021319389841169994, "loss": 0.6704, "step": 370000 }, { "epoch": 8.76, "learning_rate": 0.00021272212612045917, "loss": 0.6729, "step": 372000 }, { "epoch": 8.81, "learning_rate": 0.0002122503538292184, "loss": 0.6641, "step": 374000 }, { "epoch": 8.86, "learning_rate": 0.00021177858153797766, "loss": 0.6678, "step": 376000 }, { "epoch": 8.9, "learning_rate": 0.0002113068092467369, "loss": 0.6677, "step": 378000 }, { "epoch": 8.95, "learning_rate": 0.00021083503695549613, "loss": 0.6683, "step": 380000 }, { "epoch": 9.0, "learning_rate": 0.00021036326466425536, "loss": 0.6746, "step": 382000 }, { "epoch": 9.04, "learning_rate": 0.00020989149237301462, "loss": 0.6469, "step": 384000 }, { "epoch": 9.09, "learning_rate": 0.00020941972008177383, "loss": 0.6496, "step": 386000 }, { "epoch": 9.14, "learning_rate": 0.00020894794779053309, "loss": 0.6513, "step": 388000 }, { "epoch": 9.19, "learning_rate": 0.00020847617549929232, "loss": 0.6516, "step": 390000 }, { "epoch": 9.23, "learning_rate": 0.00020800440320805155, "loss": 0.6556, "step": 392000 }, { "epoch": 9.28, "learning_rate": 0.0002075326309168108, "loss": 0.6477, "step": 394000 }, { "epoch": 9.33, "learning_rate": 0.00020706085862557004, "loss": 0.6571, "step": 396000 }, { "epoch": 9.37, "learning_rate": 0.00020658908633432928, "loss": 0.6479, "step": 398000 }, { "epoch": 9.42, "learning_rate": 0.0002061173140430885, "loss": 0.65, "step": 400000 }, { "epoch": 9.47, "learning_rate": 0.00020564554175184777, "loss": 0.6557, "step": 402000 }, { "epoch": 9.51, "learning_rate": 0.00020517376946060697, "loss": 0.6644, "step": 404000 }, { "epoch": 9.56, "learning_rate": 0.00020470199716936623, "loss": 0.6558, "step": 406000 }, { "epoch": 9.61, "learning_rate": 0.00020423022487812547, "loss": 0.6503, "step": 408000 }, { "epoch": 9.66, "learning_rate": 0.0002037584525868847, "loss": 0.6569, "step": 410000 }, { "epoch": 9.7, "learning_rate": 0.00020328668029564396, "loss": 0.66, "step": 412000 }, { "epoch": 9.75, "learning_rate": 0.0002028149080044032, "loss": 0.6528, "step": 414000 }, { "epoch": 9.8, "learning_rate": 0.00020234313571316245, "loss": 0.6493, "step": 416000 }, { "epoch": 9.84, "learning_rate": 0.00020187136342192166, "loss": 0.6599, "step": 418000 }, { "epoch": 9.89, "learning_rate": 0.00020139959113068092, "loss": 0.6534, "step": 420000 }, { "epoch": 9.94, "learning_rate": 0.00020092781883944012, "loss": 0.6546, "step": 422000 }, { "epoch": 9.99, "learning_rate": 0.00020045604654819938, "loss": 0.6488, "step": 424000 }, { "epoch": 10.03, "learning_rate": 0.00019998427425695862, "loss": 0.643, "step": 426000 }, { "epoch": 10.08, "learning_rate": 0.00019951250196571788, "loss": 0.6465, "step": 428000 }, { "epoch": 10.13, "learning_rate": 0.0001990407296744771, "loss": 0.6324, "step": 430000 }, { "epoch": 10.17, "learning_rate": 0.00019856895738323634, "loss": 0.6446, "step": 432000 }, { "epoch": 10.22, "learning_rate": 0.0001980971850919956, "loss": 0.6283, "step": 434000 }, { "epoch": 10.27, "learning_rate": 0.0001976254128007548, "loss": 0.635, "step": 436000 }, { "epoch": 10.32, "learning_rate": 0.00019715364050951407, "loss": 0.6333, "step": 438000 }, { "epoch": 10.36, "learning_rate": 0.0001966818682182733, "loss": 0.6378, "step": 440000 }, { "epoch": 10.41, "learning_rate": 0.00019621009592703253, "loss": 0.6369, "step": 442000 }, { "epoch": 10.46, "learning_rate": 0.00019573832363579177, "loss": 0.6348, "step": 444000 }, { "epoch": 10.5, "learning_rate": 0.00019526655134455103, "loss": 0.6439, "step": 446000 }, { "epoch": 10.55, "learning_rate": 0.00019479477905331026, "loss": 0.6342, "step": 448000 }, { "epoch": 10.6, "learning_rate": 0.0001943230067620695, "loss": 0.6424, "step": 450000 }, { "epoch": 10.65, "learning_rate": 0.00019385123447082875, "loss": 0.6408, "step": 452000 }, { "epoch": 10.69, "learning_rate": 0.00019337946217958796, "loss": 0.6413, "step": 454000 }, { "epoch": 10.74, "learning_rate": 0.00019290768988834722, "loss": 0.643, "step": 456000 }, { "epoch": 10.79, "learning_rate": 0.00019243591759710645, "loss": 0.6421, "step": 458000 }, { "epoch": 10.83, "learning_rate": 0.00019196414530586568, "loss": 0.6393, "step": 460000 }, { "epoch": 10.88, "learning_rate": 0.00019149237301462491, "loss": 0.6385, "step": 462000 }, { "epoch": 10.93, "learning_rate": 0.00019102060072338417, "loss": 0.6387, "step": 464000 }, { "epoch": 10.98, "learning_rate": 0.00019054882843214338, "loss": 0.6442, "step": 466000 }, { "epoch": 11.02, "learning_rate": 0.00019007705614090264, "loss": 0.6294, "step": 468000 }, { "epoch": 11.07, "learning_rate": 0.0001896052838496619, "loss": 0.6191, "step": 470000 }, { "epoch": 11.12, "learning_rate": 0.0001891335115584211, "loss": 0.6226, "step": 472000 }, { "epoch": 11.16, "learning_rate": 0.00018866173926718036, "loss": 0.6207, "step": 474000 }, { "epoch": 11.21, "learning_rate": 0.0001881899669759396, "loss": 0.6282, "step": 476000 }, { "epoch": 11.26, "learning_rate": 0.00018771819468469883, "loss": 0.615, "step": 478000 }, { "epoch": 11.3, "learning_rate": 0.00018724642239345806, "loss": 0.6201, "step": 480000 }, { "epoch": 11.35, "learning_rate": 0.00018677465010221732, "loss": 0.6199, "step": 482000 }, { "epoch": 11.4, "learning_rate": 0.00018630287781097653, "loss": 0.6211, "step": 484000 }, { "epoch": 11.45, "learning_rate": 0.0001858311055197358, "loss": 0.6372, "step": 486000 }, { "epoch": 11.49, "learning_rate": 0.00018535933322849505, "loss": 0.6268, "step": 488000 }, { "epoch": 11.54, "learning_rate": 0.00018488756093725425, "loss": 0.6283, "step": 490000 }, { "epoch": 11.59, "learning_rate": 0.00018441578864601351, "loss": 0.6398, "step": 492000 }, { "epoch": 11.63, "learning_rate": 0.00018394401635477275, "loss": 0.6282, "step": 494000 }, { "epoch": 11.68, "learning_rate": 0.00018347224406353198, "loss": 0.6327, "step": 496000 }, { "epoch": 11.73, "learning_rate": 0.0001830004717722912, "loss": 0.6226, "step": 498000 }, { "epoch": 11.78, "learning_rate": 0.00018252869948105047, "loss": 0.6321, "step": 500000 }, { "epoch": 11.82, "learning_rate": 0.00018205692718980968, "loss": 0.6244, "step": 502000 }, { "epoch": 11.87, "learning_rate": 0.00018158515489856894, "loss": 0.6257, "step": 504000 }, { "epoch": 11.92, "learning_rate": 0.0001811133826073282, "loss": 0.6291, "step": 506000 }, { "epoch": 11.96, "learning_rate": 0.0001806416103160874, "loss": 0.6283, "step": 508000 }, { "epoch": 12.01, "learning_rate": 0.00018016983802484666, "loss": 0.6246, "step": 510000 }, { "epoch": 12.06, "learning_rate": 0.0001796980657336059, "loss": 0.6133, "step": 512000 }, { "epoch": 12.11, "learning_rate": 0.00017922629344236516, "loss": 0.6069, "step": 514000 }, { "epoch": 12.15, "learning_rate": 0.00017875452115112436, "loss": 0.6163, "step": 516000 }, { "epoch": 12.2, "learning_rate": 0.00017828274885988362, "loss": 0.6131, "step": 518000 }, { "epoch": 12.25, "learning_rate": 0.00017781097656864283, "loss": 0.6096, "step": 520000 }, { "epoch": 12.29, "learning_rate": 0.00017733920427740209, "loss": 0.6073, "step": 522000 }, { "epoch": 12.34, "learning_rate": 0.00017686743198616135, "loss": 0.6068, "step": 524000 }, { "epoch": 12.39, "learning_rate": 0.00017639565969492058, "loss": 0.6114, "step": 526000 }, { "epoch": 12.44, "learning_rate": 0.0001759238874036798, "loss": 0.6189, "step": 528000 }, { "epoch": 12.48, "learning_rate": 0.00017545211511243904, "loss": 0.6111, "step": 530000 }, { "epoch": 12.53, "learning_rate": 0.0001749803428211983, "loss": 0.6085, "step": 532000 }, { "epoch": 12.58, "learning_rate": 0.0001745085705299575, "loss": 0.6164, "step": 534000 }, { "epoch": 12.62, "learning_rate": 0.00017403679823871677, "loss": 0.6169, "step": 536000 }, { "epoch": 12.67, "learning_rate": 0.00017356502594747598, "loss": 0.615, "step": 538000 }, { "epoch": 12.72, "learning_rate": 0.00017309325365623524, "loss": 0.6145, "step": 540000 }, { "epoch": 12.76, "learning_rate": 0.0001726214813649945, "loss": 0.6149, "step": 542000 }, { "epoch": 12.81, "learning_rate": 0.00017214970907375373, "loss": 0.6128, "step": 544000 }, { "epoch": 12.86, "learning_rate": 0.00017167793678251296, "loss": 0.6123, "step": 546000 }, { "epoch": 12.91, "learning_rate": 0.0001712061644912722, "loss": 0.6132, "step": 548000 }, { "epoch": 12.95, "learning_rate": 0.00017073439220003145, "loss": 0.6154, "step": 550000 }, { "epoch": 13.0, "learning_rate": 0.00017026261990879066, "loss": 0.6168, "step": 552000 }, { "epoch": 13.05, "learning_rate": 0.00016979084761754992, "loss": 0.5926, "step": 554000 }, { "epoch": 13.09, "learning_rate": 0.00016931907532630915, "loss": 0.5924, "step": 556000 }, { "epoch": 13.14, "learning_rate": 0.00016884730303506838, "loss": 0.5949, "step": 558000 }, { "epoch": 13.19, "learning_rate": 0.00016837553074382764, "loss": 0.5947, "step": 560000 }, { "epoch": 13.24, "learning_rate": 0.00016790375845258688, "loss": 0.5971, "step": 562000 }, { "epoch": 13.28, "learning_rate": 0.0001674319861613461, "loss": 0.5976, "step": 564000 }, { "epoch": 13.33, "learning_rate": 0.00016696021387010534, "loss": 0.5984, "step": 566000 }, { "epoch": 13.38, "learning_rate": 0.0001664884415788646, "loss": 0.606, "step": 568000 }, { "epoch": 13.42, "learning_rate": 0.0001660166692876238, "loss": 0.6091, "step": 570000 }, { "epoch": 13.47, "learning_rate": 0.00016554489699638307, "loss": 0.5927, "step": 572000 }, { "epoch": 13.52, "learning_rate": 0.0001650731247051423, "loss": 0.6067, "step": 574000 }, { "epoch": 13.57, "learning_rate": 0.00016460135241390153, "loss": 0.6024, "step": 576000 }, { "epoch": 13.61, "learning_rate": 0.0001641295801226608, "loss": 0.6055, "step": 578000 }, { "epoch": 13.66, "learning_rate": 0.00016365780783142003, "loss": 0.6003, "step": 580000 }, { "epoch": 13.71, "learning_rate": 0.00016318603554017929, "loss": 0.6076, "step": 582000 }, { "epoch": 13.75, "learning_rate": 0.0001627142632489385, "loss": 0.606, "step": 584000 }, { "epoch": 13.8, "learning_rate": 0.00016224249095769775, "loss": 0.5901, "step": 586000 }, { "epoch": 13.85, "learning_rate": 0.00016177071866645696, "loss": 0.6084, "step": 588000 }, { "epoch": 13.9, "learning_rate": 0.00016129894637521622, "loss": 0.6075, "step": 590000 }, { "epoch": 13.94, "learning_rate": 0.00016082717408397545, "loss": 0.6085, "step": 592000 }, { "epoch": 13.99, "learning_rate": 0.00016035540179273468, "loss": 0.6009, "step": 594000 }, { "epoch": 14.04, "learning_rate": 0.00015988362950149391, "loss": 0.5894, "step": 596000 }, { "epoch": 14.08, "learning_rate": 0.00015941185721025317, "loss": 0.5835, "step": 598000 }, { "epoch": 14.13, "learning_rate": 0.00015894008491901243, "loss": 0.5859, "step": 600000 }, { "epoch": 14.18, "learning_rate": 0.00015846831262777164, "loss": 0.5929, "step": 602000 }, { "epoch": 14.23, "learning_rate": 0.0001579965403365309, "loss": 0.59, "step": 604000 }, { "epoch": 14.27, "learning_rate": 0.0001575247680452901, "loss": 0.582, "step": 606000 }, { "epoch": 14.32, "learning_rate": 0.00015705299575404937, "loss": 0.588, "step": 608000 }, { "epoch": 14.37, "learning_rate": 0.0001565812234628086, "loss": 0.5923, "step": 610000 }, { "epoch": 14.41, "learning_rate": 0.00015610945117156786, "loss": 0.5837, "step": 612000 }, { "epoch": 14.46, "learning_rate": 0.00015563767888032706, "loss": 0.5898, "step": 614000 }, { "epoch": 14.51, "learning_rate": 0.00015516590658908632, "loss": 0.5904, "step": 616000 }, { "epoch": 14.55, "learning_rate": 0.00015469413429784558, "loss": 0.5892, "step": 618000 }, { "epoch": 14.6, "learning_rate": 0.0001542223620066048, "loss": 0.5887, "step": 620000 }, { "epoch": 14.65, "learning_rate": 0.00015375058971536405, "loss": 0.5843, "step": 622000 }, { "epoch": 14.7, "learning_rate": 0.00015327881742412328, "loss": 0.5853, "step": 624000 }, { "epoch": 14.74, "learning_rate": 0.00015280704513288251, "loss": 0.5919, "step": 626000 }, { "epoch": 14.79, "learning_rate": 0.00015233527284164175, "loss": 0.5915, "step": 628000 }, { "epoch": 14.84, "learning_rate": 0.000151863500550401, "loss": 0.5918, "step": 630000 }, { "epoch": 14.88, "learning_rate": 0.0001513917282591602, "loss": 0.5932, "step": 632000 }, { "epoch": 14.93, "learning_rate": 0.00015091995596791947, "loss": 0.5891, "step": 634000 }, { "epoch": 14.98, "learning_rate": 0.00015044818367667873, "loss": 0.5888, "step": 636000 }, { "epoch": 15.03, "learning_rate": 0.00014997641138543794, "loss": 0.5817, "step": 638000 }, { "epoch": 15.07, "learning_rate": 0.00014950463909419717, "loss": 0.574, "step": 640000 }, { "epoch": 15.12, "learning_rate": 0.00014903286680295643, "loss": 0.5723, "step": 642000 }, { "epoch": 15.17, "learning_rate": 0.00014856109451171566, "loss": 0.573, "step": 644000 }, { "epoch": 15.21, "learning_rate": 0.00014808932222047492, "loss": 0.5758, "step": 646000 }, { "epoch": 15.26, "learning_rate": 0.00014761754992923416, "loss": 0.5796, "step": 648000 }, { "epoch": 15.31, "learning_rate": 0.0001471457776379934, "loss": 0.5722, "step": 650000 }, { "epoch": 15.36, "learning_rate": 0.00014667400534675262, "loss": 0.5825, "step": 652000 }, { "epoch": 15.4, "learning_rate": 0.00014620223305551185, "loss": 0.5744, "step": 654000 }, { "epoch": 15.45, "learning_rate": 0.0001457304607642711, "loss": 0.5786, "step": 656000 }, { "epoch": 15.5, "learning_rate": 0.00014525868847303032, "loss": 0.5808, "step": 658000 }, { "epoch": 15.54, "learning_rate": 0.00014478691618178958, "loss": 0.5855, "step": 660000 }, { "epoch": 15.59, "learning_rate": 0.0001443151438905488, "loss": 0.585, "step": 662000 }, { "epoch": 15.64, "learning_rate": 0.00014384337159930807, "loss": 0.5692, "step": 664000 }, { "epoch": 15.69, "learning_rate": 0.0001433715993080673, "loss": 0.5789, "step": 666000 }, { "epoch": 15.73, "learning_rate": 0.00014289982701682654, "loss": 0.5836, "step": 668000 }, { "epoch": 15.78, "learning_rate": 0.00014242805472558577, "loss": 0.5796, "step": 670000 }, { "epoch": 15.83, "learning_rate": 0.000141956282434345, "loss": 0.5756, "step": 672000 }, { "epoch": 15.87, "learning_rate": 0.00014148451014310424, "loss": 0.5667, "step": 674000 }, { "epoch": 15.92, "learning_rate": 0.0001410127378518635, "loss": 0.5845, "step": 676000 }, { "epoch": 15.97, "learning_rate": 0.00014054096556062273, "loss": 0.5783, "step": 678000 }, { "epoch": 16.02, "learning_rate": 0.00014006919326938196, "loss": 0.5749, "step": 680000 }, { "epoch": 16.06, "learning_rate": 0.00013959742097814122, "loss": 0.5602, "step": 682000 }, { "epoch": 16.11, "learning_rate": 0.00013912564868690045, "loss": 0.564, "step": 684000 }, { "epoch": 16.16, "learning_rate": 0.00013865387639565969, "loss": 0.5659, "step": 686000 }, { "epoch": 16.2, "learning_rate": 0.00013818210410441892, "loss": 0.5608, "step": 688000 }, { "epoch": 16.25, "learning_rate": 0.00013771033181317815, "loss": 0.5668, "step": 690000 }, { "epoch": 16.3, "learning_rate": 0.00013723855952193738, "loss": 0.5646, "step": 692000 }, { "epoch": 16.34, "learning_rate": 0.00013676678723069664, "loss": 0.5663, "step": 694000 }, { "epoch": 16.39, "learning_rate": 0.00013629501493945588, "loss": 0.5716, "step": 696000 }, { "epoch": 16.44, "learning_rate": 0.0001358232426482151, "loss": 0.5716, "step": 698000 }, { "epoch": 16.49, "learning_rate": 0.00013535147035697437, "loss": 0.568, "step": 700000 }, { "epoch": 16.53, "learning_rate": 0.0001348796980657336, "loss": 0.566, "step": 702000 }, { "epoch": 16.58, "learning_rate": 0.00013440792577449284, "loss": 0.5657, "step": 704000 }, { "epoch": 16.63, "learning_rate": 0.00013393615348325207, "loss": 0.5641, "step": 706000 }, { "epoch": 16.67, "learning_rate": 0.0001334643811920113, "loss": 0.5679, "step": 708000 }, { "epoch": 16.72, "learning_rate": 0.00013299260890077056, "loss": 0.5652, "step": 710000 }, { "epoch": 16.77, "learning_rate": 0.0001325208366095298, "loss": 0.5672, "step": 712000 }, { "epoch": 16.82, "learning_rate": 0.00013204906431828903, "loss": 0.5695, "step": 714000 }, { "epoch": 16.86, "learning_rate": 0.00013157729202704826, "loss": 0.5719, "step": 716000 }, { "epoch": 16.91, "learning_rate": 0.00013110551973580752, "loss": 0.571, "step": 718000 }, { "epoch": 16.96, "learning_rate": 0.00013063374744456675, "loss": 0.5684, "step": 720000 }, { "epoch": 17.0, "learning_rate": 0.00013016197515332598, "loss": 0.5672, "step": 722000 }, { "epoch": 17.05, "learning_rate": 0.00012969020286208522, "loss": 0.5566, "step": 724000 }, { "epoch": 17.1, "learning_rate": 0.00012921843057084445, "loss": 0.5459, "step": 726000 }, { "epoch": 17.15, "learning_rate": 0.0001287466582796037, "loss": 0.5522, "step": 728000 }, { "epoch": 17.19, "learning_rate": 0.00012827488598836294, "loss": 0.5586, "step": 730000 }, { "epoch": 17.24, "learning_rate": 0.00012780311369712217, "loss": 0.5551, "step": 732000 }, { "epoch": 17.29, "learning_rate": 0.0001273313414058814, "loss": 0.558, "step": 734000 }, { "epoch": 17.33, "learning_rate": 0.00012685956911464067, "loss": 0.5501, "step": 736000 }, { "epoch": 17.38, "learning_rate": 0.0001263877968233999, "loss": 0.5496, "step": 738000 }, { "epoch": 17.43, "learning_rate": 0.00012591602453215913, "loss": 0.5617, "step": 740000 }, { "epoch": 17.48, "learning_rate": 0.00012544425224091837, "loss": 0.5481, "step": 742000 }, { "epoch": 17.52, "learning_rate": 0.00012497247994967763, "loss": 0.5627, "step": 744000 }, { "epoch": 17.57, "learning_rate": 0.00012450070765843686, "loss": 0.5556, "step": 746000 }, { "epoch": 17.62, "learning_rate": 0.0001240289353671961, "loss": 0.5565, "step": 748000 }, { "epoch": 17.66, "learning_rate": 0.00012355716307595532, "loss": 0.5591, "step": 750000 }, { "epoch": 17.71, "learning_rate": 0.00012308539078471456, "loss": 0.5526, "step": 752000 }, { "epoch": 17.76, "learning_rate": 0.00012261361849347382, "loss": 0.5564, "step": 754000 }, { "epoch": 17.8, "learning_rate": 0.00012214184620223305, "loss": 0.5541, "step": 756000 }, { "epoch": 17.85, "learning_rate": 0.00012167007391099228, "loss": 0.5601, "step": 758000 }, { "epoch": 17.9, "learning_rate": 0.00012119830161975153, "loss": 0.5555, "step": 760000 }, { "epoch": 17.95, "learning_rate": 0.00012072652932851076, "loss": 0.5525, "step": 762000 }, { "epoch": 17.99, "learning_rate": 0.00012025475703727, "loss": 0.5577, "step": 764000 }, { "epoch": 18.04, "learning_rate": 0.00011978298474602924, "loss": 0.5403, "step": 766000 }, { "epoch": 18.09, "learning_rate": 0.00011931121245478847, "loss": 0.5466, "step": 768000 }, { "epoch": 18.13, "learning_rate": 0.0001188394401635477, "loss": 0.546, "step": 770000 }, { "epoch": 18.18, "learning_rate": 0.00011836766787230695, "loss": 0.5435, "step": 772000 }, { "epoch": 18.23, "learning_rate": 0.0001178958955810662, "loss": 0.5442, "step": 774000 }, { "epoch": 18.28, "learning_rate": 0.00011742412328982544, "loss": 0.5381, "step": 776000 }, { "epoch": 18.32, "learning_rate": 0.00011695235099858468, "loss": 0.542, "step": 778000 }, { "epoch": 18.37, "learning_rate": 0.00011648057870734391, "loss": 0.5458, "step": 780000 }, { "epoch": 18.42, "learning_rate": 0.00011600880641610316, "loss": 0.5473, "step": 782000 }, { "epoch": 18.46, "learning_rate": 0.00011553703412486239, "loss": 0.5511, "step": 784000 }, { "epoch": 18.51, "learning_rate": 0.00011506526183362162, "loss": 0.5415, "step": 786000 }, { "epoch": 18.56, "learning_rate": 0.00011459348954238087, "loss": 0.5431, "step": 788000 }, { "epoch": 18.61, "learning_rate": 0.0001141217172511401, "loss": 0.5403, "step": 790000 }, { "epoch": 18.65, "learning_rate": 0.00011364994495989935, "loss": 0.5402, "step": 792000 }, { "epoch": 18.7, "learning_rate": 0.00011317817266865859, "loss": 0.5447, "step": 794000 }, { "epoch": 18.75, "learning_rate": 0.00011270640037741783, "loss": 0.5485, "step": 796000 }, { "epoch": 18.79, "learning_rate": 0.00011223462808617706, "loss": 0.5485, "step": 798000 }, { "epoch": 18.84, "learning_rate": 0.0001117628557949363, "loss": 0.5441, "step": 800000 }, { "epoch": 18.89, "learning_rate": 0.00011129108350369554, "loss": 0.5457, "step": 802000 }, { "epoch": 18.94, "learning_rate": 0.00011081931121245477, "loss": 0.5416, "step": 804000 }, { "epoch": 18.98, "learning_rate": 0.00011034753892121402, "loss": 0.5494, "step": 806000 }, { "epoch": 19.03, "learning_rate": 0.00010987576662997325, "loss": 0.5347, "step": 808000 }, { "epoch": 19.08, "learning_rate": 0.00010940399433873251, "loss": 0.5257, "step": 810000 }, { "epoch": 19.12, "learning_rate": 0.00010893222204749174, "loss": 0.5278, "step": 812000 }, { "epoch": 19.17, "learning_rate": 0.00010846044975625097, "loss": 0.5355, "step": 814000 }, { "epoch": 19.22, "learning_rate": 0.00010798867746501022, "loss": 0.5378, "step": 816000 }, { "epoch": 19.27, "learning_rate": 0.00010751690517376945, "loss": 0.537, "step": 818000 }, { "epoch": 19.31, "learning_rate": 0.00010704513288252869, "loss": 0.5385, "step": 820000 }, { "epoch": 19.36, "learning_rate": 0.00010657336059128793, "loss": 0.5302, "step": 822000 }, { "epoch": 19.41, "learning_rate": 0.00010610158830004717, "loss": 0.5409, "step": 824000 }, { "epoch": 19.45, "learning_rate": 0.0001056298160088064, "loss": 0.5347, "step": 826000 }, { "epoch": 19.5, "learning_rate": 0.00010515804371756563, "loss": 0.5288, "step": 828000 }, { "epoch": 19.55, "learning_rate": 0.00010468627142632489, "loss": 0.5276, "step": 830000 }, { "epoch": 19.59, "learning_rate": 0.00010421449913508412, "loss": 0.5334, "step": 832000 }, { "epoch": 19.64, "learning_rate": 0.00010374272684384337, "loss": 0.5407, "step": 834000 }, { "epoch": 19.69, "learning_rate": 0.0001032709545526026, "loss": 0.5354, "step": 836000 }, { "epoch": 19.74, "learning_rate": 0.00010279918226136184, "loss": 0.5274, "step": 838000 }, { "epoch": 19.78, "learning_rate": 0.00010232740997012108, "loss": 0.529, "step": 840000 }, { "epoch": 19.83, "learning_rate": 0.00010185563767888031, "loss": 0.5327, "step": 842000 }, { "epoch": 19.88, "learning_rate": 0.00010138386538763955, "loss": 0.5383, "step": 844000 }, { "epoch": 19.92, "learning_rate": 0.0001009120930963988, "loss": 0.5352, "step": 846000 }, { "epoch": 19.97, "learning_rate": 0.00010044032080515804, "loss": 0.5355, "step": 848000 }, { "epoch": 20.02, "learning_rate": 9.996854851391727e-05, "loss": 0.5233, "step": 850000 }, { "epoch": 20.07, "learning_rate": 9.949677622267652e-05, "loss": 0.5255, "step": 852000 }, { "epoch": 20.11, "learning_rate": 9.902500393143575e-05, "loss": 0.5168, "step": 854000 }, { "epoch": 20.16, "learning_rate": 9.855323164019498e-05, "loss": 0.5181, "step": 856000 }, { "epoch": 20.21, "learning_rate": 9.808145934895423e-05, "loss": 0.5245, "step": 858000 }, { "epoch": 20.25, "learning_rate": 9.760968705771346e-05, "loss": 0.5191, "step": 860000 }, { "epoch": 20.3, "learning_rate": 9.71379147664727e-05, "loss": 0.5207, "step": 862000 }, { "epoch": 20.35, "learning_rate": 9.666614247523194e-05, "loss": 0.5251, "step": 864000 }, { "epoch": 20.4, "learning_rate": 9.619437018399119e-05, "loss": 0.5158, "step": 866000 }, { "epoch": 20.44, "learning_rate": 9.572259789275043e-05, "loss": 0.5178, "step": 868000 }, { "epoch": 20.49, "learning_rate": 9.525082560150967e-05, "loss": 0.5213, "step": 870000 }, { "epoch": 20.54, "learning_rate": 9.47790533102689e-05, "loss": 0.5271, "step": 872000 }, { "epoch": 20.58, "learning_rate": 9.430728101902815e-05, "loss": 0.5242, "step": 874000 }, { "epoch": 20.63, "learning_rate": 9.383550872778738e-05, "loss": 0.5278, "step": 876000 }, { "epoch": 20.68, "learning_rate": 9.336373643654661e-05, "loss": 0.5222, "step": 878000 }, { "epoch": 20.73, "learning_rate": 9.289196414530586e-05, "loss": 0.5242, "step": 880000 }, { "epoch": 20.77, "learning_rate": 9.242019185406509e-05, "loss": 0.5271, "step": 882000 }, { "epoch": 20.82, "learning_rate": 9.194841956282434e-05, "loss": 0.5267, "step": 884000 }, { "epoch": 20.87, "learning_rate": 9.147664727158358e-05, "loss": 0.5195, "step": 886000 }, { "epoch": 20.91, "learning_rate": 9.100487498034282e-05, "loss": 0.5162, "step": 888000 }, { "epoch": 20.96, "learning_rate": 9.053310268910205e-05, "loss": 0.5166, "step": 890000 }, { "epoch": 21.01, "learning_rate": 9.00613303978613e-05, "loss": 0.5266, "step": 892000 }, { "epoch": 21.06, "learning_rate": 8.958955810662053e-05, "loss": 0.5149, "step": 894000 }, { "epoch": 21.1, "learning_rate": 8.911778581537976e-05, "loss": 0.5023, "step": 896000 }, { "epoch": 21.15, "learning_rate": 8.864601352413901e-05, "loss": 0.5112, "step": 898000 }, { "epoch": 21.2, "learning_rate": 8.817424123289824e-05, "loss": 0.5084, "step": 900000 }, { "epoch": 21.24, "learning_rate": 8.770246894165747e-05, "loss": 0.509, "step": 902000 }, { "epoch": 21.29, "learning_rate": 8.723069665041673e-05, "loss": 0.5043, "step": 904000 }, { "epoch": 21.34, "learning_rate": 8.675892435917597e-05, "loss": 0.5188, "step": 906000 }, { "epoch": 21.38, "learning_rate": 8.628715206793521e-05, "loss": 0.5094, "step": 908000 }, { "epoch": 21.43, "learning_rate": 8.581537977669444e-05, "loss": 0.519, "step": 910000 }, { "epoch": 21.48, "learning_rate": 8.534360748545368e-05, "loss": 0.5108, "step": 912000 }, { "epoch": 21.53, "learning_rate": 8.487183519421292e-05, "loss": 0.5054, "step": 914000 }, { "epoch": 21.57, "learning_rate": 8.440006290297216e-05, "loss": 0.5092, "step": 916000 }, { "epoch": 21.62, "learning_rate": 8.392829061173139e-05, "loss": 0.5075, "step": 918000 }, { "epoch": 21.67, "learning_rate": 8.345651832049062e-05, "loss": 0.5024, "step": 920000 }, { "epoch": 21.71, "learning_rate": 8.298474602924988e-05, "loss": 0.5161, "step": 922000 }, { "epoch": 21.76, "learning_rate": 8.251297373800911e-05, "loss": 0.5117, "step": 924000 }, { "epoch": 21.81, "learning_rate": 8.204120144676836e-05, "loss": 0.5136, "step": 926000 }, { "epoch": 21.86, "learning_rate": 8.15694291555276e-05, "loss": 0.5062, "step": 928000 }, { "epoch": 21.9, "learning_rate": 8.109765686428683e-05, "loss": 0.5122, "step": 930000 }, { "epoch": 21.95, "learning_rate": 8.062588457304607e-05, "loss": 0.5113, "step": 932000 }, { "epoch": 22.0, "learning_rate": 8.01541122818053e-05, "loss": 0.5073, "step": 934000 }, { "epoch": 22.04, "learning_rate": 7.968233999056454e-05, "loss": 0.4986, "step": 936000 }, { "epoch": 22.09, "learning_rate": 7.921056769932378e-05, "loss": 0.5061, "step": 938000 }, { "epoch": 22.14, "learning_rate": 7.873879540808303e-05, "loss": 0.4963, "step": 940000 }, { "epoch": 22.19, "learning_rate": 7.826702311684228e-05, "loss": 0.4981, "step": 942000 }, { "epoch": 22.23, "learning_rate": 7.779525082560151e-05, "loss": 0.498, "step": 944000 }, { "epoch": 22.28, "learning_rate": 7.732347853436074e-05, "loss": 0.4963, "step": 946000 }, { "epoch": 22.33, "learning_rate": 7.685170624311998e-05, "loss": 0.4987, "step": 948000 }, { "epoch": 22.37, "learning_rate": 7.637993395187922e-05, "loss": 0.4999, "step": 950000 }, { "epoch": 22.42, "learning_rate": 7.590816166063845e-05, "loss": 0.4945, "step": 952000 }, { "epoch": 22.47, "learning_rate": 7.543638936939769e-05, "loss": 0.4986, "step": 954000 }, { "epoch": 22.52, "learning_rate": 7.496461707815693e-05, "loss": 0.4956, "step": 956000 }, { "epoch": 22.56, "learning_rate": 7.449284478691618e-05, "loss": 0.4958, "step": 958000 }, { "epoch": 22.61, "learning_rate": 7.402107249567541e-05, "loss": 0.5033, "step": 960000 }, { "epoch": 22.66, "learning_rate": 7.354930020443464e-05, "loss": 0.4976, "step": 962000 }, { "epoch": 22.7, "learning_rate": 7.307752791319389e-05, "loss": 0.5036, "step": 964000 }, { "epoch": 22.75, "learning_rate": 7.260575562195314e-05, "loss": 0.4973, "step": 966000 }, { "epoch": 22.8, "learning_rate": 7.213398333071237e-05, "loss": 0.501, "step": 968000 }, { "epoch": 22.85, "learning_rate": 7.16622110394716e-05, "loss": 0.5005, "step": 970000 }, { "epoch": 22.89, "learning_rate": 7.119043874823085e-05, "loss": 0.4991, "step": 972000 }, { "epoch": 22.94, "learning_rate": 7.07186664569901e-05, "loss": 0.495, "step": 974000 }, { "epoch": 22.99, "learning_rate": 7.024689416574933e-05, "loss": 0.5016, "step": 976000 }, { "epoch": 23.03, "learning_rate": 6.977512187450856e-05, "loss": 0.4966, "step": 978000 }, { "epoch": 23.08, "learning_rate": 6.93033495832678e-05, "loss": 0.4909, "step": 980000 }, { "epoch": 23.13, "learning_rate": 6.883157729202704e-05, "loss": 0.4846, "step": 982000 }, { "epoch": 23.17, "learning_rate": 6.835980500078629e-05, "loss": 0.4938, "step": 984000 }, { "epoch": 23.22, "learning_rate": 6.788803270954552e-05, "loss": 0.4967, "step": 986000 }, { "epoch": 23.27, "learning_rate": 6.741626041830475e-05, "loss": 0.4894, "step": 988000 }, { "epoch": 23.32, "learning_rate": 6.6944488127064e-05, "loss": 0.4912, "step": 990000 }, { "epoch": 23.36, "learning_rate": 6.647271583582324e-05, "loss": 0.4898, "step": 992000 }, { "epoch": 23.41, "learning_rate": 6.600094354458248e-05, "loss": 0.4854, "step": 994000 }, { "epoch": 23.46, "learning_rate": 6.552917125334171e-05, "loss": 0.4838, "step": 996000 }, { "epoch": 23.5, "learning_rate": 6.505739896210096e-05, "loss": 0.4846, "step": 998000 }, { "epoch": 23.55, "learning_rate": 6.45856266708602e-05, "loss": 0.4829, "step": 1000000 }, { "epoch": 23.6, "learning_rate": 6.411385437961944e-05, "loss": 0.4792, "step": 1002000 }, { "epoch": 23.65, "learning_rate": 6.364208208837867e-05, "loss": 0.486, "step": 1004000 }, { "epoch": 23.69, "learning_rate": 6.317030979713791e-05, "loss": 0.4946, "step": 1006000 }, { "epoch": 23.74, "learning_rate": 6.269853750589715e-05, "loss": 0.4825, "step": 1008000 }, { "epoch": 23.79, "learning_rate": 6.22267652146564e-05, "loss": 0.4845, "step": 1010000 }, { "epoch": 23.83, "learning_rate": 6.175499292341563e-05, "loss": 0.4875, "step": 1012000 }, { "epoch": 23.88, "learning_rate": 6.128322063217486e-05, "loss": 0.4935, "step": 1014000 }, { "epoch": 23.93, "learning_rate": 6.0811448340934105e-05, "loss": 0.4834, "step": 1016000 }, { "epoch": 23.98, "learning_rate": 6.033967604969334e-05, "loss": 0.4853, "step": 1018000 }, { "epoch": 24.02, "learning_rate": 5.9867903758452584e-05, "loss": 0.485, "step": 1020000 }, { "epoch": 24.07, "learning_rate": 5.9396131467211824e-05, "loss": 0.4821, "step": 1022000 }, { "epoch": 24.12, "learning_rate": 5.8924359175971057e-05, "loss": 0.4739, "step": 1024000 }, { "epoch": 24.16, "learning_rate": 5.8452586884730296e-05, "loss": 0.4824, "step": 1026000 }, { "epoch": 24.21, "learning_rate": 5.798081459348954e-05, "loss": 0.4823, "step": 1028000 }, { "epoch": 24.26, "learning_rate": 5.7509042302248775e-05, "loss": 0.4805, "step": 1030000 }, { "epoch": 24.31, "learning_rate": 5.7037270011008015e-05, "loss": 0.4727, "step": 1032000 }, { "epoch": 24.35, "learning_rate": 5.6565497719767254e-05, "loss": 0.4726, "step": 1034000 }, { "epoch": 24.4, "learning_rate": 5.609372542852649e-05, "loss": 0.4734, "step": 1036000 }, { "epoch": 24.45, "learning_rate": 5.562195313728573e-05, "loss": 0.4775, "step": 1038000 }, { "epoch": 24.49, "learning_rate": 5.515018084604497e-05, "loss": 0.4761, "step": 1040000 }, { "epoch": 24.54, "learning_rate": 5.467840855480421e-05, "loss": 0.4692, "step": 1042000 }, { "epoch": 24.59, "learning_rate": 5.4206636263563445e-05, "loss": 0.4826, "step": 1044000 }, { "epoch": 24.63, "learning_rate": 5.3734863972322684e-05, "loss": 0.4767, "step": 1046000 }, { "epoch": 24.68, "learning_rate": 5.326309168108193e-05, "loss": 0.4733, "step": 1048000 }, { "epoch": 24.73, "learning_rate": 5.2791319389841163e-05, "loss": 0.4772, "step": 1050000 }, { "epoch": 24.78, "learning_rate": 5.23195470986004e-05, "loss": 0.4758, "step": 1052000 }, { "epoch": 24.82, "learning_rate": 5.184777480735964e-05, "loss": 0.48, "step": 1054000 }, { "epoch": 24.87, "learning_rate": 5.137600251611889e-05, "loss": 0.477, "step": 1056000 }, { "epoch": 24.92, "learning_rate": 5.090423022487812e-05, "loss": 0.4715, "step": 1058000 }, { "epoch": 24.96, "learning_rate": 5.043245793363736e-05, "loss": 0.4696, "step": 1060000 }, { "epoch": 25.01, "learning_rate": 4.99606856423966e-05, "loss": 0.4682, "step": 1062000 }, { "epoch": 25.06, "learning_rate": 4.948891335115583e-05, "loss": 0.4725, "step": 1064000 }, { "epoch": 25.11, "learning_rate": 4.901714105991508e-05, "loss": 0.4655, "step": 1066000 }, { "epoch": 25.15, "learning_rate": 4.854536876867432e-05, "loss": 0.4629, "step": 1068000 }, { "epoch": 25.2, "learning_rate": 4.807359647743355e-05, "loss": 0.4654, "step": 1070000 }, { "epoch": 25.25, "learning_rate": 4.760182418619279e-05, "loss": 0.4663, "step": 1072000 }, { "epoch": 25.29, "learning_rate": 4.713005189495203e-05, "loss": 0.4682, "step": 1074000 }, { "epoch": 25.34, "learning_rate": 4.665827960371127e-05, "loss": 0.4647, "step": 1076000 }, { "epoch": 25.39, "learning_rate": 4.618650731247051e-05, "loss": 0.4722, "step": 1078000 }, { "epoch": 25.44, "learning_rate": 4.571473502122975e-05, "loss": 0.4604, "step": 1080000 }, { "epoch": 25.48, "learning_rate": 4.524296272998898e-05, "loss": 0.4585, "step": 1082000 }, { "epoch": 25.53, "learning_rate": 4.477119043874823e-05, "loss": 0.4619, "step": 1084000 }, { "epoch": 25.58, "learning_rate": 4.429941814750747e-05, "loss": 0.4661, "step": 1086000 }, { "epoch": 25.62, "learning_rate": 4.382764585626671e-05, "loss": 0.4724, "step": 1088000 }, { "epoch": 25.67, "learning_rate": 4.335587356502594e-05, "loss": 0.4667, "step": 1090000 }, { "epoch": 25.72, "learning_rate": 4.288410127378518e-05, "loss": 0.4692, "step": 1092000 }, { "epoch": 25.77, "learning_rate": 4.2412328982544426e-05, "loss": 0.4624, "step": 1094000 }, { "epoch": 25.81, "learning_rate": 4.194055669130366e-05, "loss": 0.4587, "step": 1096000 }, { "epoch": 25.86, "learning_rate": 4.14687844000629e-05, "loss": 0.4604, "step": 1098000 }, { "epoch": 25.91, "learning_rate": 4.099701210882214e-05, "loss": 0.4718, "step": 1100000 }, { "epoch": 25.95, "learning_rate": 4.052523981758137e-05, "loss": 0.4666, "step": 1102000 }, { "epoch": 26.0, "learning_rate": 4.005346752634062e-05, "loss": 0.4616, "step": 1104000 }, { "epoch": 26.05, "learning_rate": 3.9581695235099856e-05, "loss": 0.456, "step": 1106000 }, { "epoch": 26.1, "learning_rate": 3.9109922943859096e-05, "loss": 0.4573, "step": 1108000 }, { "epoch": 26.14, "learning_rate": 3.863815065261833e-05, "loss": 0.4522, "step": 1110000 }, { "epoch": 26.19, "learning_rate": 3.8166378361377575e-05, "loss": 0.4604, "step": 1112000 }, { "epoch": 26.24, "learning_rate": 3.7694606070136815e-05, "loss": 0.4539, "step": 1114000 }, { "epoch": 26.28, "learning_rate": 3.722283377889605e-05, "loss": 0.4485, "step": 1116000 }, { "epoch": 26.33, "learning_rate": 3.675106148765529e-05, "loss": 0.4596, "step": 1118000 }, { "epoch": 26.38, "learning_rate": 3.6279289196414526e-05, "loss": 0.4487, "step": 1120000 }, { "epoch": 26.42, "learning_rate": 3.5807516905173766e-05, "loss": 0.459, "step": 1122000 }, { "epoch": 26.47, "learning_rate": 3.5335744613933005e-05, "loss": 0.4609, "step": 1124000 }, { "epoch": 26.52, "learning_rate": 3.4863972322692245e-05, "loss": 0.4549, "step": 1126000 }, { "epoch": 26.57, "learning_rate": 3.4392200031451484e-05, "loss": 0.4553, "step": 1128000 }, { "epoch": 26.61, "learning_rate": 3.3920427740210724e-05, "loss": 0.4552, "step": 1130000 }, { "epoch": 26.66, "learning_rate": 3.344865544896996e-05, "loss": 0.4534, "step": 1132000 }, { "epoch": 26.71, "learning_rate": 3.29768831577292e-05, "loss": 0.4522, "step": 1134000 }, { "epoch": 26.75, "learning_rate": 3.2505110866488436e-05, "loss": 0.4544, "step": 1136000 }, { "epoch": 26.8, "learning_rate": 3.203333857524768e-05, "loss": 0.458, "step": 1138000 }, { "epoch": 26.85, "learning_rate": 3.1561566284006915e-05, "loss": 0.4537, "step": 1140000 }, { "epoch": 26.9, "learning_rate": 3.1089793992766154e-05, "loss": 0.4581, "step": 1142000 }, { "epoch": 26.94, "learning_rate": 3.0618021701525394e-05, "loss": 0.4525, "step": 1144000 }, { "epoch": 26.99, "learning_rate": 3.014624941028463e-05, "loss": 0.4562, "step": 1146000 }, { "epoch": 27.04, "learning_rate": 2.9674477119043873e-05, "loss": 0.4516, "step": 1148000 }, { "epoch": 27.08, "learning_rate": 2.920270482780311e-05, "loss": 0.4472, "step": 1150000 }, { "epoch": 27.13, "learning_rate": 2.8730932536562352e-05, "loss": 0.4507, "step": 1152000 }, { "epoch": 27.18, "learning_rate": 2.8259160245321588e-05, "loss": 0.4421, "step": 1154000 }, { "epoch": 27.23, "learning_rate": 2.778738795408083e-05, "loss": 0.4487, "step": 1156000 }, { "epoch": 27.27, "learning_rate": 2.7315615662840067e-05, "loss": 0.4528, "step": 1158000 }, { "epoch": 27.32, "learning_rate": 2.6843843371599303e-05, "loss": 0.4483, "step": 1160000 }, { "epoch": 27.37, "learning_rate": 2.6372071080358546e-05, "loss": 0.45, "step": 1162000 }, { "epoch": 27.41, "learning_rate": 2.5900298789117782e-05, "loss": 0.4517, "step": 1164000 }, { "epoch": 27.46, "learning_rate": 2.5428526497877022e-05, "loss": 0.4444, "step": 1166000 }, { "epoch": 27.51, "learning_rate": 2.495675420663626e-05, "loss": 0.4457, "step": 1168000 }, { "epoch": 27.56, "learning_rate": 2.44849819153955e-05, "loss": 0.4471, "step": 1170000 }, { "epoch": 27.6, "learning_rate": 2.401320962415474e-05, "loss": 0.4455, "step": 1172000 }, { "epoch": 27.65, "learning_rate": 2.3541437332913976e-05, "loss": 0.4466, "step": 1174000 }, { "epoch": 27.7, "learning_rate": 2.3069665041673216e-05, "loss": 0.4421, "step": 1176000 }, { "epoch": 27.74, "learning_rate": 2.2597892750432455e-05, "loss": 0.446, "step": 1178000 }, { "epoch": 27.79, "learning_rate": 2.2126120459191695e-05, "loss": 0.4441, "step": 1180000 }, { "epoch": 27.84, "learning_rate": 2.1654348167950935e-05, "loss": 0.4452, "step": 1182000 }, { "epoch": 27.89, "learning_rate": 2.1182575876710174e-05, "loss": 0.4418, "step": 1184000 }, { "epoch": 27.93, "learning_rate": 2.071080358546941e-05, "loss": 0.4396, "step": 1186000 }, { "epoch": 27.98, "learning_rate": 2.0239031294228653e-05, "loss": 0.4477, "step": 1188000 }, { "epoch": 28.03, "learning_rate": 1.976725900298789e-05, "loss": 0.4435, "step": 1190000 }, { "epoch": 28.07, "learning_rate": 1.9295486711747125e-05, "loss": 0.4379, "step": 1192000 }, { "epoch": 28.12, "learning_rate": 1.8823714420506368e-05, "loss": 0.4442, "step": 1194000 }, { "epoch": 28.17, "learning_rate": 1.8351942129265604e-05, "loss": 0.4329, "step": 1196000 }, { "epoch": 28.21, "learning_rate": 1.7880169838024844e-05, "loss": 0.4368, "step": 1198000 }, { "epoch": 28.26, "learning_rate": 1.7408397546784083e-05, "loss": 0.4427, "step": 1200000 }, { "epoch": 28.31, "learning_rate": 1.6936625255543323e-05, "loss": 0.4426, "step": 1202000 }, { "epoch": 28.36, "learning_rate": 1.6464852964302562e-05, "loss": 0.4374, "step": 1204000 }, { "epoch": 28.4, "learning_rate": 1.5993080673061802e-05, "loss": 0.4372, "step": 1206000 }, { "epoch": 28.45, "learning_rate": 1.552130838182104e-05, "loss": 0.4365, "step": 1208000 }, { "epoch": 28.5, "learning_rate": 1.504953609058028e-05, "loss": 0.4329, "step": 1210000 }, { "epoch": 28.54, "learning_rate": 1.4577763799339517e-05, "loss": 0.4397, "step": 1212000 }, { "epoch": 28.59, "learning_rate": 1.4105991508098757e-05, "loss": 0.4355, "step": 1214000 }, { "epoch": 28.64, "learning_rate": 1.3634219216857994e-05, "loss": 0.4386, "step": 1216000 }, { "epoch": 28.69, "learning_rate": 1.3162446925617234e-05, "loss": 0.4369, "step": 1218000 }, { "epoch": 28.73, "learning_rate": 1.2690674634376474e-05, "loss": 0.441, "step": 1220000 }, { "epoch": 28.78, "learning_rate": 1.2218902343135713e-05, "loss": 0.4383, "step": 1222000 }, { "epoch": 28.83, "learning_rate": 1.1747130051894953e-05, "loss": 0.4338, "step": 1224000 }, { "epoch": 28.87, "learning_rate": 1.1275357760654189e-05, "loss": 0.4373, "step": 1226000 }, { "epoch": 28.92, "learning_rate": 1.0803585469413428e-05, "loss": 0.4392, "step": 1228000 }, { "epoch": 28.97, "learning_rate": 1.0331813178172668e-05, "loss": 0.4401, "step": 1230000 } ], "logging_steps": 2000, "max_steps": 1273800, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 500, "total_flos": 2.4919775353735186e+21, "train_batch_size": 2, "trial_name": null, "trial_params": null }