|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 30.0, |
|
"eval_steps": 500, |
|
"global_step": 1273800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0003, |
|
"loss": 3.0982, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002995282277087592, |
|
"loss": 1.1999, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00029905645541751844, |
|
"loss": 1.0995, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002985846831262777, |
|
"loss": 1.0716, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002981129108350369, |
|
"loss": 1.0664, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002976411385437962, |
|
"loss": 1.0223, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00029716936625255537, |
|
"loss": 0.9955, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00029669759396131466, |
|
"loss": 0.9925, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0002962258216700739, |
|
"loss": 0.9951, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002957540493788331, |
|
"loss": 0.9811, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00029528227708759236, |
|
"loss": 0.9727, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0002948105047963516, |
|
"loss": 0.9655, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0002943387325051109, |
|
"loss": 0.959, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00029386696021387005, |
|
"loss": 0.9591, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00029339518792262934, |
|
"loss": 0.9455, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0002929234156313886, |
|
"loss": 0.9425, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002924516433401478, |
|
"loss": 0.9373, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00029197987104890704, |
|
"loss": 0.9341, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00029150809875766627, |
|
"loss": 0.93, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0002910363264664255, |
|
"loss": 0.9184, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00029056455417518474, |
|
"loss": 0.9222, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.000290092781883944, |
|
"loss": 0.8941, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0002896210095927032, |
|
"loss": 0.8868, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0002891492373014625, |
|
"loss": 0.8894, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0002886774650102217, |
|
"loss": 0.8799, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00028820569271898095, |
|
"loss": 0.8781, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0002877339204277402, |
|
"loss": 0.8853, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0002872621481364994, |
|
"loss": 0.8829, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00028679037584525865, |
|
"loss": 0.8747, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0002863186035540179, |
|
"loss": 0.8806, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00028584683126277717, |
|
"loss": 0.8868, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00028537505897153635, |
|
"loss": 0.8806, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00028490328668029564, |
|
"loss": 0.8771, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00028443151438905487, |
|
"loss": 0.8648, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0002839597420978141, |
|
"loss": 0.8747, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00028348796980657334, |
|
"loss": 0.8719, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00028301619751533257, |
|
"loss": 0.8641, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0002825444252240918, |
|
"loss": 0.8555, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00028207265293285103, |
|
"loss": 0.8583, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0002816008806416103, |
|
"loss": 0.8624, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0002811291083503695, |
|
"loss": 0.8578, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0002806573360591288, |
|
"loss": 0.8474, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.000280185563767888, |
|
"loss": 0.8366, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00027971379147664725, |
|
"loss": 0.8226, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.0002792420191854065, |
|
"loss": 0.8267, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0002787702468941657, |
|
"loss": 0.8231, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00027829847460292495, |
|
"loss": 0.8284, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002778267023116842, |
|
"loss": 0.8153, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00027735493002044347, |
|
"loss": 0.8241, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00027688315772920265, |
|
"loss": 0.8246, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.00027641138543796194, |
|
"loss": 0.8219, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.00027593961314672117, |
|
"loss": 0.8244, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.0002754678408554804, |
|
"loss": 0.8201, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.00027499606856423963, |
|
"loss": 0.8193, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00027452429627299887, |
|
"loss": 0.8225, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.0002740525239817581, |
|
"loss": 0.8105, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.00027358075169051733, |
|
"loss": 0.82, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.0002731089793992766, |
|
"loss": 0.8191, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.00027263720710803585, |
|
"loss": 0.8102, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.0002721654348167951, |
|
"loss": 0.8189, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.0002716936625255543, |
|
"loss": 0.8128, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.00027122189023431355, |
|
"loss": 0.815, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.0002707501179430728, |
|
"loss": 0.8123, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 0.000270278345651832, |
|
"loss": 0.7986, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.00026980657336059125, |
|
"loss": 0.7701, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 0.0002693348010693505, |
|
"loss": 0.7819, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 0.00026886302877810977, |
|
"loss": 0.7826, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.000268391256486869, |
|
"loss": 0.7868, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 0.00026791948419562823, |
|
"loss": 0.7843, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.00026744771190438747, |
|
"loss": 0.7841, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 0.0002669759396131467, |
|
"loss": 0.7845, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.00026650416732190593, |
|
"loss": 0.7859, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.00026603239503066516, |
|
"loss": 0.7746, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.0002655606227394244, |
|
"loss": 0.7749, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.00026508885044818363, |
|
"loss": 0.7747, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 0.0002646170781569429, |
|
"loss": 0.7816, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.00026414530586570215, |
|
"loss": 0.7807, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.0002636735335744614, |
|
"loss": 0.7817, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 0.0002632017612832206, |
|
"loss": 0.787, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 0.00026272998899197985, |
|
"loss": 0.7796, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 0.0002622582167007391, |
|
"loss": 0.7662, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 0.0002617864444094983, |
|
"loss": 0.7898, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.00026131467211825755, |
|
"loss": 0.7705, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.0002608428998270168, |
|
"loss": 0.7876, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00026037112753577607, |
|
"loss": 0.7747, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.0002598993552445353, |
|
"loss": 0.7517, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.00025942758295329453, |
|
"loss": 0.7465, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.00025895581066205376, |
|
"loss": 0.7485, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.000258484038370813, |
|
"loss": 0.7531, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 0.00025801226607957223, |
|
"loss": 0.7521, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.00025754049378833146, |
|
"loss": 0.7477, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 0.0002570687214970907, |
|
"loss": 0.7621, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 0.00025659694920585, |
|
"loss": 0.7552, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.0002561251769146092, |
|
"loss": 0.7592, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 0.00025565340462336845, |
|
"loss": 0.7508, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.0002551816323321277, |
|
"loss": 0.7547, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 0.0002547098600408869, |
|
"loss": 0.7439, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 0.00025423808774964615, |
|
"loss": 0.762, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.0002537663154584054, |
|
"loss": 0.7507, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 0.0002532945431671646, |
|
"loss": 0.7553, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.00025282277087592384, |
|
"loss": 0.7498, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.00025235099858468313, |
|
"loss": 0.7466, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 0.0002518792262934423, |
|
"loss": 0.7496, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.0002514074540022016, |
|
"loss": 0.7406, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.00025093568171096083, |
|
"loss": 0.7447, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.00025046390941972006, |
|
"loss": 0.7532, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 5.04, |
|
"learning_rate": 0.0002499921371284793, |
|
"loss": 0.7339, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.00024952036483723853, |
|
"loss": 0.7214, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 0.00024904859254599776, |
|
"loss": 0.7227, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 0.000248576820254757, |
|
"loss": 0.7344, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 0.0002481050479635163, |
|
"loss": 0.7272, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 0.00024763327567227546, |
|
"loss": 0.7271, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.00024716150338103475, |
|
"loss": 0.7302, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 0.000246689731089794, |
|
"loss": 0.7218, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.0002462179587985532, |
|
"loss": 0.7242, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 0.00024574618650731244, |
|
"loss": 0.7306, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 0.0002452744142160717, |
|
"loss": 0.7351, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.0002448026419248309, |
|
"loss": 0.7225, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 0.00024433086963359014, |
|
"loss": 0.7256, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 0.0002438590973423494, |
|
"loss": 0.7256, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.00024338732505110863, |
|
"loss": 0.7293, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 0.0002429155527598679, |
|
"loss": 0.7272, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 0.00024244378046862713, |
|
"loss": 0.7154, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 0.00024197200817738636, |
|
"loss": 0.7274, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.00024150023588614562, |
|
"loss": 0.7184, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 0.00024102846359490483, |
|
"loss": 0.7218, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.00024055669130366409, |
|
"loss": 0.7246, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 0.00024008491901242332, |
|
"loss": 0.7036, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 0.00023961314672118255, |
|
"loss": 0.6992, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 0.00023914137442994178, |
|
"loss": 0.7029, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.00023866960213870104, |
|
"loss": 0.7097, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 0.00023819782984746028, |
|
"loss": 0.7042, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.0002377260575562195, |
|
"loss": 0.7076, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 0.00023725428526497877, |
|
"loss": 0.6972, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.00023678251297373797, |
|
"loss": 0.7097, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 0.00023631074068249723, |
|
"loss": 0.7025, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.00023583896839125647, |
|
"loss": 0.7041, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 0.0002353671961000157, |
|
"loss": 0.7098, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.00023489542380877493, |
|
"loss": 0.704, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 0.0002344236515175342, |
|
"loss": 0.7046, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 0.00023395187922629343, |
|
"loss": 0.701, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 0.00023348010693505266, |
|
"loss": 0.7107, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 0.00023300833464381192, |
|
"loss": 0.7107, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.00023253656235257112, |
|
"loss": 0.7051, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 0.00023206479006133038, |
|
"loss": 0.7055, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 0.00023159301777008962, |
|
"loss": 0.7004, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 0.00023112124547884885, |
|
"loss": 0.7051, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 0.00023064947318760808, |
|
"loss": 0.7025, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 0.00023017770089636734, |
|
"loss": 0.6976, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 0.0002297059286051266, |
|
"loss": 0.6785, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 0.0002292341563138858, |
|
"loss": 0.6773, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 0.00022876238402264507, |
|
"loss": 0.6763, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 0.00022829061173140427, |
|
"loss": 0.6774, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.00022781883944016353, |
|
"loss": 0.6786, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 0.00022734706714892276, |
|
"loss": 0.6885, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.000226875294857682, |
|
"loss": 0.6852, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 0.00022640352256644123, |
|
"loss": 0.6842, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 0.0002259317502752005, |
|
"loss": 0.6868, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 0.00022545997798395975, |
|
"loss": 0.6918, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 0.00022498820569271896, |
|
"loss": 0.6846, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 0.00022451643340147822, |
|
"loss": 0.6881, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 0.00022404466111023742, |
|
"loss": 0.69, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.00022357288881899668, |
|
"loss": 0.6823, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 0.00022310111652775591, |
|
"loss": 0.6896, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 0.00022262934423651517, |
|
"loss": 0.6862, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 0.00022215757194527438, |
|
"loss": 0.6858, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 0.00022168579965403364, |
|
"loss": 0.691, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.00022121402736279284, |
|
"loss": 0.6895, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 0.0002207422550715521, |
|
"loss": 0.6924, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 0.00022027048278031136, |
|
"loss": 0.6903, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 0.0002197987104890706, |
|
"loss": 0.6637, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 0.00021932693819782983, |
|
"loss": 0.6612, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 0.00021885516590658906, |
|
"loss": 0.6665, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 0.00021838339361534832, |
|
"loss": 0.6702, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 0.00021791162132410753, |
|
"loss": 0.6667, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 0.0002174398490328668, |
|
"loss": 0.6674, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 0.000216968076741626, |
|
"loss": 0.6719, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 0.00021649630445038525, |
|
"loss": 0.6671, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 0.0002160245321591445, |
|
"loss": 0.6647, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 0.00021555275986790375, |
|
"loss": 0.6671, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 0.00021508098757666298, |
|
"loss": 0.6681, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 0.0002146092152854222, |
|
"loss": 0.6727, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 0.00021413744299418147, |
|
"loss": 0.6767, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 0.00021366567070294068, |
|
"loss": 0.6749, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 0.00021319389841169994, |
|
"loss": 0.6704, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 0.00021272212612045917, |
|
"loss": 0.6729, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 0.0002122503538292184, |
|
"loss": 0.6641, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 0.00021177858153797766, |
|
"loss": 0.6678, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 0.0002113068092467369, |
|
"loss": 0.6677, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 0.00021083503695549613, |
|
"loss": 0.6683, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.00021036326466425536, |
|
"loss": 0.6746, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 0.00020989149237301462, |
|
"loss": 0.6469, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 0.00020941972008177383, |
|
"loss": 0.6496, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 0.00020894794779053309, |
|
"loss": 0.6513, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 0.00020847617549929232, |
|
"loss": 0.6516, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 0.00020800440320805155, |
|
"loss": 0.6556, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 0.0002075326309168108, |
|
"loss": 0.6477, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 0.00020706085862557004, |
|
"loss": 0.6571, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 0.00020658908633432928, |
|
"loss": 0.6479, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 0.0002061173140430885, |
|
"loss": 0.65, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 0.00020564554175184777, |
|
"loss": 0.6557, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"learning_rate": 0.00020517376946060697, |
|
"loss": 0.6644, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"learning_rate": 0.00020470199716936623, |
|
"loss": 0.6558, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 0.00020423022487812547, |
|
"loss": 0.6503, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 0.0002037584525868847, |
|
"loss": 0.6569, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 0.00020328668029564396, |
|
"loss": 0.66, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 0.0002028149080044032, |
|
"loss": 0.6528, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 0.00020234313571316245, |
|
"loss": 0.6493, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 0.00020187136342192166, |
|
"loss": 0.6599, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 0.00020139959113068092, |
|
"loss": 0.6534, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 0.00020092781883944012, |
|
"loss": 0.6546, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 0.00020045604654819938, |
|
"loss": 0.6488, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 0.00019998427425695862, |
|
"loss": 0.643, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 0.00019951250196571788, |
|
"loss": 0.6465, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 0.0001990407296744771, |
|
"loss": 0.6324, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.00019856895738323634, |
|
"loss": 0.6446, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 0.0001980971850919956, |
|
"loss": 0.6283, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 0.0001976254128007548, |
|
"loss": 0.635, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 0.00019715364050951407, |
|
"loss": 0.6333, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"learning_rate": 0.0001966818682182733, |
|
"loss": 0.6378, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 0.00019621009592703253, |
|
"loss": 0.6369, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 10.46, |
|
"learning_rate": 0.00019573832363579177, |
|
"loss": 0.6348, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 0.00019526655134455103, |
|
"loss": 0.6439, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 0.00019479477905331026, |
|
"loss": 0.6342, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 0.0001943230067620695, |
|
"loss": 0.6424, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 0.00019385123447082875, |
|
"loss": 0.6408, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 10.69, |
|
"learning_rate": 0.00019337946217958796, |
|
"loss": 0.6413, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 10.74, |
|
"learning_rate": 0.00019290768988834722, |
|
"loss": 0.643, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 10.79, |
|
"learning_rate": 0.00019243591759710645, |
|
"loss": 0.6421, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 0.00019196414530586568, |
|
"loss": 0.6393, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 0.00019149237301462491, |
|
"loss": 0.6385, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 0.00019102060072338417, |
|
"loss": 0.6387, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 10.98, |
|
"learning_rate": 0.00019054882843214338, |
|
"loss": 0.6442, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 0.00019007705614090264, |
|
"loss": 0.6294, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 0.0001896052838496619, |
|
"loss": 0.6191, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 11.12, |
|
"learning_rate": 0.0001891335115584211, |
|
"loss": 0.6226, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 11.16, |
|
"learning_rate": 0.00018866173926718036, |
|
"loss": 0.6207, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 0.0001881899669759396, |
|
"loss": 0.6282, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 11.26, |
|
"learning_rate": 0.00018771819468469883, |
|
"loss": 0.615, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 0.00018724642239345806, |
|
"loss": 0.6201, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 11.35, |
|
"learning_rate": 0.00018677465010221732, |
|
"loss": 0.6199, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 0.00018630287781097653, |
|
"loss": 0.6211, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 11.45, |
|
"learning_rate": 0.0001858311055197358, |
|
"loss": 0.6372, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 0.00018535933322849505, |
|
"loss": 0.6268, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 0.00018488756093725425, |
|
"loss": 0.6283, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 0.00018441578864601351, |
|
"loss": 0.6398, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 0.00018394401635477275, |
|
"loss": 0.6282, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 11.68, |
|
"learning_rate": 0.00018347224406353198, |
|
"loss": 0.6327, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 11.73, |
|
"learning_rate": 0.0001830004717722912, |
|
"loss": 0.6226, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 0.00018252869948105047, |
|
"loss": 0.6321, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"learning_rate": 0.00018205692718980968, |
|
"loss": 0.6244, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 11.87, |
|
"learning_rate": 0.00018158515489856894, |
|
"loss": 0.6257, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 11.92, |
|
"learning_rate": 0.0001811133826073282, |
|
"loss": 0.6291, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 11.96, |
|
"learning_rate": 0.0001806416103160874, |
|
"loss": 0.6283, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 0.00018016983802484666, |
|
"loss": 0.6246, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 0.0001796980657336059, |
|
"loss": 0.6133, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 0.00017922629344236516, |
|
"loss": 0.6069, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 0.00017875452115112436, |
|
"loss": 0.6163, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 0.00017828274885988362, |
|
"loss": 0.6131, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 0.00017781097656864283, |
|
"loss": 0.6096, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"learning_rate": 0.00017733920427740209, |
|
"loss": 0.6073, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 0.00017686743198616135, |
|
"loss": 0.6068, |
|
"step": 524000 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 0.00017639565969492058, |
|
"loss": 0.6114, |
|
"step": 526000 |
|
}, |
|
{ |
|
"epoch": 12.44, |
|
"learning_rate": 0.0001759238874036798, |
|
"loss": 0.6189, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 12.48, |
|
"learning_rate": 0.00017545211511243904, |
|
"loss": 0.6111, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 0.0001749803428211983, |
|
"loss": 0.6085, |
|
"step": 532000 |
|
}, |
|
{ |
|
"epoch": 12.58, |
|
"learning_rate": 0.0001745085705299575, |
|
"loss": 0.6164, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"learning_rate": 0.00017403679823871677, |
|
"loss": 0.6169, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 0.00017356502594747598, |
|
"loss": 0.615, |
|
"step": 538000 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 0.00017309325365623524, |
|
"loss": 0.6145, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"learning_rate": 0.0001726214813649945, |
|
"loss": 0.6149, |
|
"step": 542000 |
|
}, |
|
{ |
|
"epoch": 12.81, |
|
"learning_rate": 0.00017214970907375373, |
|
"loss": 0.6128, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 0.00017167793678251296, |
|
"loss": 0.6123, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"learning_rate": 0.0001712061644912722, |
|
"loss": 0.6132, |
|
"step": 548000 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"learning_rate": 0.00017073439220003145, |
|
"loss": 0.6154, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 0.00017026261990879066, |
|
"loss": 0.6168, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 0.00016979084761754992, |
|
"loss": 0.5926, |
|
"step": 554000 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 0.00016931907532630915, |
|
"loss": 0.5924, |
|
"step": 556000 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"learning_rate": 0.00016884730303506838, |
|
"loss": 0.5949, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 0.00016837553074382764, |
|
"loss": 0.5947, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 0.00016790375845258688, |
|
"loss": 0.5971, |
|
"step": 562000 |
|
}, |
|
{ |
|
"epoch": 13.28, |
|
"learning_rate": 0.0001674319861613461, |
|
"loss": 0.5976, |
|
"step": 564000 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 0.00016696021387010534, |
|
"loss": 0.5984, |
|
"step": 566000 |
|
}, |
|
{ |
|
"epoch": 13.38, |
|
"learning_rate": 0.0001664884415788646, |
|
"loss": 0.606, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 13.42, |
|
"learning_rate": 0.0001660166692876238, |
|
"loss": 0.6091, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 0.00016554489699638307, |
|
"loss": 0.5927, |
|
"step": 572000 |
|
}, |
|
{ |
|
"epoch": 13.52, |
|
"learning_rate": 0.0001650731247051423, |
|
"loss": 0.6067, |
|
"step": 574000 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 0.00016460135241390153, |
|
"loss": 0.6024, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 0.0001641295801226608, |
|
"loss": 0.6055, |
|
"step": 578000 |
|
}, |
|
{ |
|
"epoch": 13.66, |
|
"learning_rate": 0.00016365780783142003, |
|
"loss": 0.6003, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 0.00016318603554017929, |
|
"loss": 0.6076, |
|
"step": 582000 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 0.0001627142632489385, |
|
"loss": 0.606, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 0.00016224249095769775, |
|
"loss": 0.5901, |
|
"step": 586000 |
|
}, |
|
{ |
|
"epoch": 13.85, |
|
"learning_rate": 0.00016177071866645696, |
|
"loss": 0.6084, |
|
"step": 588000 |
|
}, |
|
{ |
|
"epoch": 13.9, |
|
"learning_rate": 0.00016129894637521622, |
|
"loss": 0.6075, |
|
"step": 590000 |
|
}, |
|
{ |
|
"epoch": 13.94, |
|
"learning_rate": 0.00016082717408397545, |
|
"loss": 0.6085, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 0.00016035540179273468, |
|
"loss": 0.6009, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 0.00015988362950149391, |
|
"loss": 0.5894, |
|
"step": 596000 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 0.00015941185721025317, |
|
"loss": 0.5835, |
|
"step": 598000 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"learning_rate": 0.00015894008491901243, |
|
"loss": 0.5859, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 0.00015846831262777164, |
|
"loss": 0.5929, |
|
"step": 602000 |
|
}, |
|
{ |
|
"epoch": 14.23, |
|
"learning_rate": 0.0001579965403365309, |
|
"loss": 0.59, |
|
"step": 604000 |
|
}, |
|
{ |
|
"epoch": 14.27, |
|
"learning_rate": 0.0001575247680452901, |
|
"loss": 0.582, |
|
"step": 606000 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"learning_rate": 0.00015705299575404937, |
|
"loss": 0.588, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"learning_rate": 0.0001565812234628086, |
|
"loss": 0.5923, |
|
"step": 610000 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 0.00015610945117156786, |
|
"loss": 0.5837, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 14.46, |
|
"learning_rate": 0.00015563767888032706, |
|
"loss": 0.5898, |
|
"step": 614000 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"learning_rate": 0.00015516590658908632, |
|
"loss": 0.5904, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"learning_rate": 0.00015469413429784558, |
|
"loss": 0.5892, |
|
"step": 618000 |
|
}, |
|
{ |
|
"epoch": 14.6, |
|
"learning_rate": 0.0001542223620066048, |
|
"loss": 0.5887, |
|
"step": 620000 |
|
}, |
|
{ |
|
"epoch": 14.65, |
|
"learning_rate": 0.00015375058971536405, |
|
"loss": 0.5843, |
|
"step": 622000 |
|
}, |
|
{ |
|
"epoch": 14.7, |
|
"learning_rate": 0.00015327881742412328, |
|
"loss": 0.5853, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 0.00015280704513288251, |
|
"loss": 0.5919, |
|
"step": 626000 |
|
}, |
|
{ |
|
"epoch": 14.79, |
|
"learning_rate": 0.00015233527284164175, |
|
"loss": 0.5915, |
|
"step": 628000 |
|
}, |
|
{ |
|
"epoch": 14.84, |
|
"learning_rate": 0.000151863500550401, |
|
"loss": 0.5918, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"learning_rate": 0.0001513917282591602, |
|
"loss": 0.5932, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 0.00015091995596791947, |
|
"loss": 0.5891, |
|
"step": 634000 |
|
}, |
|
{ |
|
"epoch": 14.98, |
|
"learning_rate": 0.00015044818367667873, |
|
"loss": 0.5888, |
|
"step": 636000 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"learning_rate": 0.00014997641138543794, |
|
"loss": 0.5817, |
|
"step": 638000 |
|
}, |
|
{ |
|
"epoch": 15.07, |
|
"learning_rate": 0.00014950463909419717, |
|
"loss": 0.574, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 15.12, |
|
"learning_rate": 0.00014903286680295643, |
|
"loss": 0.5723, |
|
"step": 642000 |
|
}, |
|
{ |
|
"epoch": 15.17, |
|
"learning_rate": 0.00014856109451171566, |
|
"loss": 0.573, |
|
"step": 644000 |
|
}, |
|
{ |
|
"epoch": 15.21, |
|
"learning_rate": 0.00014808932222047492, |
|
"loss": 0.5758, |
|
"step": 646000 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"learning_rate": 0.00014761754992923416, |
|
"loss": 0.5796, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"learning_rate": 0.0001471457776379934, |
|
"loss": 0.5722, |
|
"step": 650000 |
|
}, |
|
{ |
|
"epoch": 15.36, |
|
"learning_rate": 0.00014667400534675262, |
|
"loss": 0.5825, |
|
"step": 652000 |
|
}, |
|
{ |
|
"epoch": 15.4, |
|
"learning_rate": 0.00014620223305551185, |
|
"loss": 0.5744, |
|
"step": 654000 |
|
}, |
|
{ |
|
"epoch": 15.45, |
|
"learning_rate": 0.0001457304607642711, |
|
"loss": 0.5786, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 15.5, |
|
"learning_rate": 0.00014525868847303032, |
|
"loss": 0.5808, |
|
"step": 658000 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"learning_rate": 0.00014478691618178958, |
|
"loss": 0.5855, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"learning_rate": 0.0001443151438905488, |
|
"loss": 0.585, |
|
"step": 662000 |
|
}, |
|
{ |
|
"epoch": 15.64, |
|
"learning_rate": 0.00014384337159930807, |
|
"loss": 0.5692, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"learning_rate": 0.0001433715993080673, |
|
"loss": 0.5789, |
|
"step": 666000 |
|
}, |
|
{ |
|
"epoch": 15.73, |
|
"learning_rate": 0.00014289982701682654, |
|
"loss": 0.5836, |
|
"step": 668000 |
|
}, |
|
{ |
|
"epoch": 15.78, |
|
"learning_rate": 0.00014242805472558577, |
|
"loss": 0.5796, |
|
"step": 670000 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"learning_rate": 0.000141956282434345, |
|
"loss": 0.5756, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 0.00014148451014310424, |
|
"loss": 0.5667, |
|
"step": 674000 |
|
}, |
|
{ |
|
"epoch": 15.92, |
|
"learning_rate": 0.0001410127378518635, |
|
"loss": 0.5845, |
|
"step": 676000 |
|
}, |
|
{ |
|
"epoch": 15.97, |
|
"learning_rate": 0.00014054096556062273, |
|
"loss": 0.5783, |
|
"step": 678000 |
|
}, |
|
{ |
|
"epoch": 16.02, |
|
"learning_rate": 0.00014006919326938196, |
|
"loss": 0.5749, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 0.00013959742097814122, |
|
"loss": 0.5602, |
|
"step": 682000 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 0.00013912564868690045, |
|
"loss": 0.564, |
|
"step": 684000 |
|
}, |
|
{ |
|
"epoch": 16.16, |
|
"learning_rate": 0.00013865387639565969, |
|
"loss": 0.5659, |
|
"step": 686000 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 0.00013818210410441892, |
|
"loss": 0.5608, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 16.25, |
|
"learning_rate": 0.00013771033181317815, |
|
"loss": 0.5668, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 0.00013723855952193738, |
|
"loss": 0.5646, |
|
"step": 692000 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"learning_rate": 0.00013676678723069664, |
|
"loss": 0.5663, |
|
"step": 694000 |
|
}, |
|
{ |
|
"epoch": 16.39, |
|
"learning_rate": 0.00013629501493945588, |
|
"loss": 0.5716, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 16.44, |
|
"learning_rate": 0.0001358232426482151, |
|
"loss": 0.5716, |
|
"step": 698000 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"learning_rate": 0.00013535147035697437, |
|
"loss": 0.568, |
|
"step": 700000 |
|
}, |
|
{ |
|
"epoch": 16.53, |
|
"learning_rate": 0.0001348796980657336, |
|
"loss": 0.566, |
|
"step": 702000 |
|
}, |
|
{ |
|
"epoch": 16.58, |
|
"learning_rate": 0.00013440792577449284, |
|
"loss": 0.5657, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 16.63, |
|
"learning_rate": 0.00013393615348325207, |
|
"loss": 0.5641, |
|
"step": 706000 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 0.0001334643811920113, |
|
"loss": 0.5679, |
|
"step": 708000 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"learning_rate": 0.00013299260890077056, |
|
"loss": 0.5652, |
|
"step": 710000 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"learning_rate": 0.0001325208366095298, |
|
"loss": 0.5672, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 16.82, |
|
"learning_rate": 0.00013204906431828903, |
|
"loss": 0.5695, |
|
"step": 714000 |
|
}, |
|
{ |
|
"epoch": 16.86, |
|
"learning_rate": 0.00013157729202704826, |
|
"loss": 0.5719, |
|
"step": 716000 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"learning_rate": 0.00013110551973580752, |
|
"loss": 0.571, |
|
"step": 718000 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 0.00013063374744456675, |
|
"loss": 0.5684, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 0.00013016197515332598, |
|
"loss": 0.5672, |
|
"step": 722000 |
|
}, |
|
{ |
|
"epoch": 17.05, |
|
"learning_rate": 0.00012969020286208522, |
|
"loss": 0.5566, |
|
"step": 724000 |
|
}, |
|
{ |
|
"epoch": 17.1, |
|
"learning_rate": 0.00012921843057084445, |
|
"loss": 0.5459, |
|
"step": 726000 |
|
}, |
|
{ |
|
"epoch": 17.15, |
|
"learning_rate": 0.0001287466582796037, |
|
"loss": 0.5522, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 17.19, |
|
"learning_rate": 0.00012827488598836294, |
|
"loss": 0.5586, |
|
"step": 730000 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 0.00012780311369712217, |
|
"loss": 0.5551, |
|
"step": 732000 |
|
}, |
|
{ |
|
"epoch": 17.29, |
|
"learning_rate": 0.0001273313414058814, |
|
"loss": 0.558, |
|
"step": 734000 |
|
}, |
|
{ |
|
"epoch": 17.33, |
|
"learning_rate": 0.00012685956911464067, |
|
"loss": 0.5501, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 17.38, |
|
"learning_rate": 0.0001263877968233999, |
|
"loss": 0.5496, |
|
"step": 738000 |
|
}, |
|
{ |
|
"epoch": 17.43, |
|
"learning_rate": 0.00012591602453215913, |
|
"loss": 0.5617, |
|
"step": 740000 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"learning_rate": 0.00012544425224091837, |
|
"loss": 0.5481, |
|
"step": 742000 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 0.00012497247994967763, |
|
"loss": 0.5627, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 17.57, |
|
"learning_rate": 0.00012450070765843686, |
|
"loss": 0.5556, |
|
"step": 746000 |
|
}, |
|
{ |
|
"epoch": 17.62, |
|
"learning_rate": 0.0001240289353671961, |
|
"loss": 0.5565, |
|
"step": 748000 |
|
}, |
|
{ |
|
"epoch": 17.66, |
|
"learning_rate": 0.00012355716307595532, |
|
"loss": 0.5591, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"learning_rate": 0.00012308539078471456, |
|
"loss": 0.5526, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 17.76, |
|
"learning_rate": 0.00012261361849347382, |
|
"loss": 0.5564, |
|
"step": 754000 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"learning_rate": 0.00012214184620223305, |
|
"loss": 0.5541, |
|
"step": 756000 |
|
}, |
|
{ |
|
"epoch": 17.85, |
|
"learning_rate": 0.00012167007391099228, |
|
"loss": 0.5601, |
|
"step": 758000 |
|
}, |
|
{ |
|
"epoch": 17.9, |
|
"learning_rate": 0.00012119830161975153, |
|
"loss": 0.5555, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"learning_rate": 0.00012072652932851076, |
|
"loss": 0.5525, |
|
"step": 762000 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"learning_rate": 0.00012025475703727, |
|
"loss": 0.5577, |
|
"step": 764000 |
|
}, |
|
{ |
|
"epoch": 18.04, |
|
"learning_rate": 0.00011978298474602924, |
|
"loss": 0.5403, |
|
"step": 766000 |
|
}, |
|
{ |
|
"epoch": 18.09, |
|
"learning_rate": 0.00011931121245478847, |
|
"loss": 0.5466, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 0.0001188394401635477, |
|
"loss": 0.546, |
|
"step": 770000 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"learning_rate": 0.00011836766787230695, |
|
"loss": 0.5435, |
|
"step": 772000 |
|
}, |
|
{ |
|
"epoch": 18.23, |
|
"learning_rate": 0.0001178958955810662, |
|
"loss": 0.5442, |
|
"step": 774000 |
|
}, |
|
{ |
|
"epoch": 18.28, |
|
"learning_rate": 0.00011742412328982544, |
|
"loss": 0.5381, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 18.32, |
|
"learning_rate": 0.00011695235099858468, |
|
"loss": 0.542, |
|
"step": 778000 |
|
}, |
|
{ |
|
"epoch": 18.37, |
|
"learning_rate": 0.00011648057870734391, |
|
"loss": 0.5458, |
|
"step": 780000 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"learning_rate": 0.00011600880641610316, |
|
"loss": 0.5473, |
|
"step": 782000 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"learning_rate": 0.00011553703412486239, |
|
"loss": 0.5511, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 0.00011506526183362162, |
|
"loss": 0.5415, |
|
"step": 786000 |
|
}, |
|
{ |
|
"epoch": 18.56, |
|
"learning_rate": 0.00011459348954238087, |
|
"loss": 0.5431, |
|
"step": 788000 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"learning_rate": 0.0001141217172511401, |
|
"loss": 0.5403, |
|
"step": 790000 |
|
}, |
|
{ |
|
"epoch": 18.65, |
|
"learning_rate": 0.00011364994495989935, |
|
"loss": 0.5402, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 18.7, |
|
"learning_rate": 0.00011317817266865859, |
|
"loss": 0.5447, |
|
"step": 794000 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"learning_rate": 0.00011270640037741783, |
|
"loss": 0.5485, |
|
"step": 796000 |
|
}, |
|
{ |
|
"epoch": 18.79, |
|
"learning_rate": 0.00011223462808617706, |
|
"loss": 0.5485, |
|
"step": 798000 |
|
}, |
|
{ |
|
"epoch": 18.84, |
|
"learning_rate": 0.0001117628557949363, |
|
"loss": 0.5441, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 18.89, |
|
"learning_rate": 0.00011129108350369554, |
|
"loss": 0.5457, |
|
"step": 802000 |
|
}, |
|
{ |
|
"epoch": 18.94, |
|
"learning_rate": 0.00011081931121245477, |
|
"loss": 0.5416, |
|
"step": 804000 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"learning_rate": 0.00011034753892121402, |
|
"loss": 0.5494, |
|
"step": 806000 |
|
}, |
|
{ |
|
"epoch": 19.03, |
|
"learning_rate": 0.00010987576662997325, |
|
"loss": 0.5347, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 19.08, |
|
"learning_rate": 0.00010940399433873251, |
|
"loss": 0.5257, |
|
"step": 810000 |
|
}, |
|
{ |
|
"epoch": 19.12, |
|
"learning_rate": 0.00010893222204749174, |
|
"loss": 0.5278, |
|
"step": 812000 |
|
}, |
|
{ |
|
"epoch": 19.17, |
|
"learning_rate": 0.00010846044975625097, |
|
"loss": 0.5355, |
|
"step": 814000 |
|
}, |
|
{ |
|
"epoch": 19.22, |
|
"learning_rate": 0.00010798867746501022, |
|
"loss": 0.5378, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 19.27, |
|
"learning_rate": 0.00010751690517376945, |
|
"loss": 0.537, |
|
"step": 818000 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"learning_rate": 0.00010704513288252869, |
|
"loss": 0.5385, |
|
"step": 820000 |
|
}, |
|
{ |
|
"epoch": 19.36, |
|
"learning_rate": 0.00010657336059128793, |
|
"loss": 0.5302, |
|
"step": 822000 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"learning_rate": 0.00010610158830004717, |
|
"loss": 0.5409, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 19.45, |
|
"learning_rate": 0.0001056298160088064, |
|
"loss": 0.5347, |
|
"step": 826000 |
|
}, |
|
{ |
|
"epoch": 19.5, |
|
"learning_rate": 0.00010515804371756563, |
|
"loss": 0.5288, |
|
"step": 828000 |
|
}, |
|
{ |
|
"epoch": 19.55, |
|
"learning_rate": 0.00010468627142632489, |
|
"loss": 0.5276, |
|
"step": 830000 |
|
}, |
|
{ |
|
"epoch": 19.59, |
|
"learning_rate": 0.00010421449913508412, |
|
"loss": 0.5334, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 19.64, |
|
"learning_rate": 0.00010374272684384337, |
|
"loss": 0.5407, |
|
"step": 834000 |
|
}, |
|
{ |
|
"epoch": 19.69, |
|
"learning_rate": 0.0001032709545526026, |
|
"loss": 0.5354, |
|
"step": 836000 |
|
}, |
|
{ |
|
"epoch": 19.74, |
|
"learning_rate": 0.00010279918226136184, |
|
"loss": 0.5274, |
|
"step": 838000 |
|
}, |
|
{ |
|
"epoch": 19.78, |
|
"learning_rate": 0.00010232740997012108, |
|
"loss": 0.529, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 19.83, |
|
"learning_rate": 0.00010185563767888031, |
|
"loss": 0.5327, |
|
"step": 842000 |
|
}, |
|
{ |
|
"epoch": 19.88, |
|
"learning_rate": 0.00010138386538763955, |
|
"loss": 0.5383, |
|
"step": 844000 |
|
}, |
|
{ |
|
"epoch": 19.92, |
|
"learning_rate": 0.0001009120930963988, |
|
"loss": 0.5352, |
|
"step": 846000 |
|
}, |
|
{ |
|
"epoch": 19.97, |
|
"learning_rate": 0.00010044032080515804, |
|
"loss": 0.5355, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 20.02, |
|
"learning_rate": 9.996854851391727e-05, |
|
"loss": 0.5233, |
|
"step": 850000 |
|
}, |
|
{ |
|
"epoch": 20.07, |
|
"learning_rate": 9.949677622267652e-05, |
|
"loss": 0.5255, |
|
"step": 852000 |
|
}, |
|
{ |
|
"epoch": 20.11, |
|
"learning_rate": 9.902500393143575e-05, |
|
"loss": 0.5168, |
|
"step": 854000 |
|
}, |
|
{ |
|
"epoch": 20.16, |
|
"learning_rate": 9.855323164019498e-05, |
|
"loss": 0.5181, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 20.21, |
|
"learning_rate": 9.808145934895423e-05, |
|
"loss": 0.5245, |
|
"step": 858000 |
|
}, |
|
{ |
|
"epoch": 20.25, |
|
"learning_rate": 9.760968705771346e-05, |
|
"loss": 0.5191, |
|
"step": 860000 |
|
}, |
|
{ |
|
"epoch": 20.3, |
|
"learning_rate": 9.71379147664727e-05, |
|
"loss": 0.5207, |
|
"step": 862000 |
|
}, |
|
{ |
|
"epoch": 20.35, |
|
"learning_rate": 9.666614247523194e-05, |
|
"loss": 0.5251, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"learning_rate": 9.619437018399119e-05, |
|
"loss": 0.5158, |
|
"step": 866000 |
|
}, |
|
{ |
|
"epoch": 20.44, |
|
"learning_rate": 9.572259789275043e-05, |
|
"loss": 0.5178, |
|
"step": 868000 |
|
}, |
|
{ |
|
"epoch": 20.49, |
|
"learning_rate": 9.525082560150967e-05, |
|
"loss": 0.5213, |
|
"step": 870000 |
|
}, |
|
{ |
|
"epoch": 20.54, |
|
"learning_rate": 9.47790533102689e-05, |
|
"loss": 0.5271, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 20.58, |
|
"learning_rate": 9.430728101902815e-05, |
|
"loss": 0.5242, |
|
"step": 874000 |
|
}, |
|
{ |
|
"epoch": 20.63, |
|
"learning_rate": 9.383550872778738e-05, |
|
"loss": 0.5278, |
|
"step": 876000 |
|
}, |
|
{ |
|
"epoch": 20.68, |
|
"learning_rate": 9.336373643654661e-05, |
|
"loss": 0.5222, |
|
"step": 878000 |
|
}, |
|
{ |
|
"epoch": 20.73, |
|
"learning_rate": 9.289196414530586e-05, |
|
"loss": 0.5242, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 20.77, |
|
"learning_rate": 9.242019185406509e-05, |
|
"loss": 0.5271, |
|
"step": 882000 |
|
}, |
|
{ |
|
"epoch": 20.82, |
|
"learning_rate": 9.194841956282434e-05, |
|
"loss": 0.5267, |
|
"step": 884000 |
|
}, |
|
{ |
|
"epoch": 20.87, |
|
"learning_rate": 9.147664727158358e-05, |
|
"loss": 0.5195, |
|
"step": 886000 |
|
}, |
|
{ |
|
"epoch": 20.91, |
|
"learning_rate": 9.100487498034282e-05, |
|
"loss": 0.5162, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 20.96, |
|
"learning_rate": 9.053310268910205e-05, |
|
"loss": 0.5166, |
|
"step": 890000 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 9.00613303978613e-05, |
|
"loss": 0.5266, |
|
"step": 892000 |
|
}, |
|
{ |
|
"epoch": 21.06, |
|
"learning_rate": 8.958955810662053e-05, |
|
"loss": 0.5149, |
|
"step": 894000 |
|
}, |
|
{ |
|
"epoch": 21.1, |
|
"learning_rate": 8.911778581537976e-05, |
|
"loss": 0.5023, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 21.15, |
|
"learning_rate": 8.864601352413901e-05, |
|
"loss": 0.5112, |
|
"step": 898000 |
|
}, |
|
{ |
|
"epoch": 21.2, |
|
"learning_rate": 8.817424123289824e-05, |
|
"loss": 0.5084, |
|
"step": 900000 |
|
}, |
|
{ |
|
"epoch": 21.24, |
|
"learning_rate": 8.770246894165747e-05, |
|
"loss": 0.509, |
|
"step": 902000 |
|
}, |
|
{ |
|
"epoch": 21.29, |
|
"learning_rate": 8.723069665041673e-05, |
|
"loss": 0.5043, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 21.34, |
|
"learning_rate": 8.675892435917597e-05, |
|
"loss": 0.5188, |
|
"step": 906000 |
|
}, |
|
{ |
|
"epoch": 21.38, |
|
"learning_rate": 8.628715206793521e-05, |
|
"loss": 0.5094, |
|
"step": 908000 |
|
}, |
|
{ |
|
"epoch": 21.43, |
|
"learning_rate": 8.581537977669444e-05, |
|
"loss": 0.519, |
|
"step": 910000 |
|
}, |
|
{ |
|
"epoch": 21.48, |
|
"learning_rate": 8.534360748545368e-05, |
|
"loss": 0.5108, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 21.53, |
|
"learning_rate": 8.487183519421292e-05, |
|
"loss": 0.5054, |
|
"step": 914000 |
|
}, |
|
{ |
|
"epoch": 21.57, |
|
"learning_rate": 8.440006290297216e-05, |
|
"loss": 0.5092, |
|
"step": 916000 |
|
}, |
|
{ |
|
"epoch": 21.62, |
|
"learning_rate": 8.392829061173139e-05, |
|
"loss": 0.5075, |
|
"step": 918000 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"learning_rate": 8.345651832049062e-05, |
|
"loss": 0.5024, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 21.71, |
|
"learning_rate": 8.298474602924988e-05, |
|
"loss": 0.5161, |
|
"step": 922000 |
|
}, |
|
{ |
|
"epoch": 21.76, |
|
"learning_rate": 8.251297373800911e-05, |
|
"loss": 0.5117, |
|
"step": 924000 |
|
}, |
|
{ |
|
"epoch": 21.81, |
|
"learning_rate": 8.204120144676836e-05, |
|
"loss": 0.5136, |
|
"step": 926000 |
|
}, |
|
{ |
|
"epoch": 21.86, |
|
"learning_rate": 8.15694291555276e-05, |
|
"loss": 0.5062, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 21.9, |
|
"learning_rate": 8.109765686428683e-05, |
|
"loss": 0.5122, |
|
"step": 930000 |
|
}, |
|
{ |
|
"epoch": 21.95, |
|
"learning_rate": 8.062588457304607e-05, |
|
"loss": 0.5113, |
|
"step": 932000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 8.01541122818053e-05, |
|
"loss": 0.5073, |
|
"step": 934000 |
|
}, |
|
{ |
|
"epoch": 22.04, |
|
"learning_rate": 7.968233999056454e-05, |
|
"loss": 0.4986, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 22.09, |
|
"learning_rate": 7.921056769932378e-05, |
|
"loss": 0.5061, |
|
"step": 938000 |
|
}, |
|
{ |
|
"epoch": 22.14, |
|
"learning_rate": 7.873879540808303e-05, |
|
"loss": 0.4963, |
|
"step": 940000 |
|
}, |
|
{ |
|
"epoch": 22.19, |
|
"learning_rate": 7.826702311684228e-05, |
|
"loss": 0.4981, |
|
"step": 942000 |
|
}, |
|
{ |
|
"epoch": 22.23, |
|
"learning_rate": 7.779525082560151e-05, |
|
"loss": 0.498, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 22.28, |
|
"learning_rate": 7.732347853436074e-05, |
|
"loss": 0.4963, |
|
"step": 946000 |
|
}, |
|
{ |
|
"epoch": 22.33, |
|
"learning_rate": 7.685170624311998e-05, |
|
"loss": 0.4987, |
|
"step": 948000 |
|
}, |
|
{ |
|
"epoch": 22.37, |
|
"learning_rate": 7.637993395187922e-05, |
|
"loss": 0.4999, |
|
"step": 950000 |
|
}, |
|
{ |
|
"epoch": 22.42, |
|
"learning_rate": 7.590816166063845e-05, |
|
"loss": 0.4945, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 22.47, |
|
"learning_rate": 7.543638936939769e-05, |
|
"loss": 0.4986, |
|
"step": 954000 |
|
}, |
|
{ |
|
"epoch": 22.52, |
|
"learning_rate": 7.496461707815693e-05, |
|
"loss": 0.4956, |
|
"step": 956000 |
|
}, |
|
{ |
|
"epoch": 22.56, |
|
"learning_rate": 7.449284478691618e-05, |
|
"loss": 0.4958, |
|
"step": 958000 |
|
}, |
|
{ |
|
"epoch": 22.61, |
|
"learning_rate": 7.402107249567541e-05, |
|
"loss": 0.5033, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 22.66, |
|
"learning_rate": 7.354930020443464e-05, |
|
"loss": 0.4976, |
|
"step": 962000 |
|
}, |
|
{ |
|
"epoch": 22.7, |
|
"learning_rate": 7.307752791319389e-05, |
|
"loss": 0.5036, |
|
"step": 964000 |
|
}, |
|
{ |
|
"epoch": 22.75, |
|
"learning_rate": 7.260575562195314e-05, |
|
"loss": 0.4973, |
|
"step": 966000 |
|
}, |
|
{ |
|
"epoch": 22.8, |
|
"learning_rate": 7.213398333071237e-05, |
|
"loss": 0.501, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 22.85, |
|
"learning_rate": 7.16622110394716e-05, |
|
"loss": 0.5005, |
|
"step": 970000 |
|
}, |
|
{ |
|
"epoch": 22.89, |
|
"learning_rate": 7.119043874823085e-05, |
|
"loss": 0.4991, |
|
"step": 972000 |
|
}, |
|
{ |
|
"epoch": 22.94, |
|
"learning_rate": 7.07186664569901e-05, |
|
"loss": 0.495, |
|
"step": 974000 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"learning_rate": 7.024689416574933e-05, |
|
"loss": 0.5016, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 23.03, |
|
"learning_rate": 6.977512187450856e-05, |
|
"loss": 0.4966, |
|
"step": 978000 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 6.93033495832678e-05, |
|
"loss": 0.4909, |
|
"step": 980000 |
|
}, |
|
{ |
|
"epoch": 23.13, |
|
"learning_rate": 6.883157729202704e-05, |
|
"loss": 0.4846, |
|
"step": 982000 |
|
}, |
|
{ |
|
"epoch": 23.17, |
|
"learning_rate": 6.835980500078629e-05, |
|
"loss": 0.4938, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 23.22, |
|
"learning_rate": 6.788803270954552e-05, |
|
"loss": 0.4967, |
|
"step": 986000 |
|
}, |
|
{ |
|
"epoch": 23.27, |
|
"learning_rate": 6.741626041830475e-05, |
|
"loss": 0.4894, |
|
"step": 988000 |
|
}, |
|
{ |
|
"epoch": 23.32, |
|
"learning_rate": 6.6944488127064e-05, |
|
"loss": 0.4912, |
|
"step": 990000 |
|
}, |
|
{ |
|
"epoch": 23.36, |
|
"learning_rate": 6.647271583582324e-05, |
|
"loss": 0.4898, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 23.41, |
|
"learning_rate": 6.600094354458248e-05, |
|
"loss": 0.4854, |
|
"step": 994000 |
|
}, |
|
{ |
|
"epoch": 23.46, |
|
"learning_rate": 6.552917125334171e-05, |
|
"loss": 0.4838, |
|
"step": 996000 |
|
}, |
|
{ |
|
"epoch": 23.5, |
|
"learning_rate": 6.505739896210096e-05, |
|
"loss": 0.4846, |
|
"step": 998000 |
|
}, |
|
{ |
|
"epoch": 23.55, |
|
"learning_rate": 6.45856266708602e-05, |
|
"loss": 0.4829, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 23.6, |
|
"learning_rate": 6.411385437961944e-05, |
|
"loss": 0.4792, |
|
"step": 1002000 |
|
}, |
|
{ |
|
"epoch": 23.65, |
|
"learning_rate": 6.364208208837867e-05, |
|
"loss": 0.486, |
|
"step": 1004000 |
|
}, |
|
{ |
|
"epoch": 23.69, |
|
"learning_rate": 6.317030979713791e-05, |
|
"loss": 0.4946, |
|
"step": 1006000 |
|
}, |
|
{ |
|
"epoch": 23.74, |
|
"learning_rate": 6.269853750589715e-05, |
|
"loss": 0.4825, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 23.79, |
|
"learning_rate": 6.22267652146564e-05, |
|
"loss": 0.4845, |
|
"step": 1010000 |
|
}, |
|
{ |
|
"epoch": 23.83, |
|
"learning_rate": 6.175499292341563e-05, |
|
"loss": 0.4875, |
|
"step": 1012000 |
|
}, |
|
{ |
|
"epoch": 23.88, |
|
"learning_rate": 6.128322063217486e-05, |
|
"loss": 0.4935, |
|
"step": 1014000 |
|
}, |
|
{ |
|
"epoch": 23.93, |
|
"learning_rate": 6.0811448340934105e-05, |
|
"loss": 0.4834, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 23.98, |
|
"learning_rate": 6.033967604969334e-05, |
|
"loss": 0.4853, |
|
"step": 1018000 |
|
}, |
|
{ |
|
"epoch": 24.02, |
|
"learning_rate": 5.9867903758452584e-05, |
|
"loss": 0.485, |
|
"step": 1020000 |
|
}, |
|
{ |
|
"epoch": 24.07, |
|
"learning_rate": 5.9396131467211824e-05, |
|
"loss": 0.4821, |
|
"step": 1022000 |
|
}, |
|
{ |
|
"epoch": 24.12, |
|
"learning_rate": 5.8924359175971057e-05, |
|
"loss": 0.4739, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 24.16, |
|
"learning_rate": 5.8452586884730296e-05, |
|
"loss": 0.4824, |
|
"step": 1026000 |
|
}, |
|
{ |
|
"epoch": 24.21, |
|
"learning_rate": 5.798081459348954e-05, |
|
"loss": 0.4823, |
|
"step": 1028000 |
|
}, |
|
{ |
|
"epoch": 24.26, |
|
"learning_rate": 5.7509042302248775e-05, |
|
"loss": 0.4805, |
|
"step": 1030000 |
|
}, |
|
{ |
|
"epoch": 24.31, |
|
"learning_rate": 5.7037270011008015e-05, |
|
"loss": 0.4727, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 24.35, |
|
"learning_rate": 5.6565497719767254e-05, |
|
"loss": 0.4726, |
|
"step": 1034000 |
|
}, |
|
{ |
|
"epoch": 24.4, |
|
"learning_rate": 5.609372542852649e-05, |
|
"loss": 0.4734, |
|
"step": 1036000 |
|
}, |
|
{ |
|
"epoch": 24.45, |
|
"learning_rate": 5.562195313728573e-05, |
|
"loss": 0.4775, |
|
"step": 1038000 |
|
}, |
|
{ |
|
"epoch": 24.49, |
|
"learning_rate": 5.515018084604497e-05, |
|
"loss": 0.4761, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 24.54, |
|
"learning_rate": 5.467840855480421e-05, |
|
"loss": 0.4692, |
|
"step": 1042000 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"learning_rate": 5.4206636263563445e-05, |
|
"loss": 0.4826, |
|
"step": 1044000 |
|
}, |
|
{ |
|
"epoch": 24.63, |
|
"learning_rate": 5.3734863972322684e-05, |
|
"loss": 0.4767, |
|
"step": 1046000 |
|
}, |
|
{ |
|
"epoch": 24.68, |
|
"learning_rate": 5.326309168108193e-05, |
|
"loss": 0.4733, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 24.73, |
|
"learning_rate": 5.2791319389841163e-05, |
|
"loss": 0.4772, |
|
"step": 1050000 |
|
}, |
|
{ |
|
"epoch": 24.78, |
|
"learning_rate": 5.23195470986004e-05, |
|
"loss": 0.4758, |
|
"step": 1052000 |
|
}, |
|
{ |
|
"epoch": 24.82, |
|
"learning_rate": 5.184777480735964e-05, |
|
"loss": 0.48, |
|
"step": 1054000 |
|
}, |
|
{ |
|
"epoch": 24.87, |
|
"learning_rate": 5.137600251611889e-05, |
|
"loss": 0.477, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 24.92, |
|
"learning_rate": 5.090423022487812e-05, |
|
"loss": 0.4715, |
|
"step": 1058000 |
|
}, |
|
{ |
|
"epoch": 24.96, |
|
"learning_rate": 5.043245793363736e-05, |
|
"loss": 0.4696, |
|
"step": 1060000 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 4.99606856423966e-05, |
|
"loss": 0.4682, |
|
"step": 1062000 |
|
}, |
|
{ |
|
"epoch": 25.06, |
|
"learning_rate": 4.948891335115583e-05, |
|
"loss": 0.4725, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 25.11, |
|
"learning_rate": 4.901714105991508e-05, |
|
"loss": 0.4655, |
|
"step": 1066000 |
|
}, |
|
{ |
|
"epoch": 25.15, |
|
"learning_rate": 4.854536876867432e-05, |
|
"loss": 0.4629, |
|
"step": 1068000 |
|
}, |
|
{ |
|
"epoch": 25.2, |
|
"learning_rate": 4.807359647743355e-05, |
|
"loss": 0.4654, |
|
"step": 1070000 |
|
}, |
|
{ |
|
"epoch": 25.25, |
|
"learning_rate": 4.760182418619279e-05, |
|
"loss": 0.4663, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 25.29, |
|
"learning_rate": 4.713005189495203e-05, |
|
"loss": 0.4682, |
|
"step": 1074000 |
|
}, |
|
{ |
|
"epoch": 25.34, |
|
"learning_rate": 4.665827960371127e-05, |
|
"loss": 0.4647, |
|
"step": 1076000 |
|
}, |
|
{ |
|
"epoch": 25.39, |
|
"learning_rate": 4.618650731247051e-05, |
|
"loss": 0.4722, |
|
"step": 1078000 |
|
}, |
|
{ |
|
"epoch": 25.44, |
|
"learning_rate": 4.571473502122975e-05, |
|
"loss": 0.4604, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 25.48, |
|
"learning_rate": 4.524296272998898e-05, |
|
"loss": 0.4585, |
|
"step": 1082000 |
|
}, |
|
{ |
|
"epoch": 25.53, |
|
"learning_rate": 4.477119043874823e-05, |
|
"loss": 0.4619, |
|
"step": 1084000 |
|
}, |
|
{ |
|
"epoch": 25.58, |
|
"learning_rate": 4.429941814750747e-05, |
|
"loss": 0.4661, |
|
"step": 1086000 |
|
}, |
|
{ |
|
"epoch": 25.62, |
|
"learning_rate": 4.382764585626671e-05, |
|
"loss": 0.4724, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 25.67, |
|
"learning_rate": 4.335587356502594e-05, |
|
"loss": 0.4667, |
|
"step": 1090000 |
|
}, |
|
{ |
|
"epoch": 25.72, |
|
"learning_rate": 4.288410127378518e-05, |
|
"loss": 0.4692, |
|
"step": 1092000 |
|
}, |
|
{ |
|
"epoch": 25.77, |
|
"learning_rate": 4.2412328982544426e-05, |
|
"loss": 0.4624, |
|
"step": 1094000 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"learning_rate": 4.194055669130366e-05, |
|
"loss": 0.4587, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 25.86, |
|
"learning_rate": 4.14687844000629e-05, |
|
"loss": 0.4604, |
|
"step": 1098000 |
|
}, |
|
{ |
|
"epoch": 25.91, |
|
"learning_rate": 4.099701210882214e-05, |
|
"loss": 0.4718, |
|
"step": 1100000 |
|
}, |
|
{ |
|
"epoch": 25.95, |
|
"learning_rate": 4.052523981758137e-05, |
|
"loss": 0.4666, |
|
"step": 1102000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 4.005346752634062e-05, |
|
"loss": 0.4616, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 26.05, |
|
"learning_rate": 3.9581695235099856e-05, |
|
"loss": 0.456, |
|
"step": 1106000 |
|
}, |
|
{ |
|
"epoch": 26.1, |
|
"learning_rate": 3.9109922943859096e-05, |
|
"loss": 0.4573, |
|
"step": 1108000 |
|
}, |
|
{ |
|
"epoch": 26.14, |
|
"learning_rate": 3.863815065261833e-05, |
|
"loss": 0.4522, |
|
"step": 1110000 |
|
}, |
|
{ |
|
"epoch": 26.19, |
|
"learning_rate": 3.8166378361377575e-05, |
|
"loss": 0.4604, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 26.24, |
|
"learning_rate": 3.7694606070136815e-05, |
|
"loss": 0.4539, |
|
"step": 1114000 |
|
}, |
|
{ |
|
"epoch": 26.28, |
|
"learning_rate": 3.722283377889605e-05, |
|
"loss": 0.4485, |
|
"step": 1116000 |
|
}, |
|
{ |
|
"epoch": 26.33, |
|
"learning_rate": 3.675106148765529e-05, |
|
"loss": 0.4596, |
|
"step": 1118000 |
|
}, |
|
{ |
|
"epoch": 26.38, |
|
"learning_rate": 3.6279289196414526e-05, |
|
"loss": 0.4487, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 26.42, |
|
"learning_rate": 3.5807516905173766e-05, |
|
"loss": 0.459, |
|
"step": 1122000 |
|
}, |
|
{ |
|
"epoch": 26.47, |
|
"learning_rate": 3.5335744613933005e-05, |
|
"loss": 0.4609, |
|
"step": 1124000 |
|
}, |
|
{ |
|
"epoch": 26.52, |
|
"learning_rate": 3.4863972322692245e-05, |
|
"loss": 0.4549, |
|
"step": 1126000 |
|
}, |
|
{ |
|
"epoch": 26.57, |
|
"learning_rate": 3.4392200031451484e-05, |
|
"loss": 0.4553, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 26.61, |
|
"learning_rate": 3.3920427740210724e-05, |
|
"loss": 0.4552, |
|
"step": 1130000 |
|
}, |
|
{ |
|
"epoch": 26.66, |
|
"learning_rate": 3.344865544896996e-05, |
|
"loss": 0.4534, |
|
"step": 1132000 |
|
}, |
|
{ |
|
"epoch": 26.71, |
|
"learning_rate": 3.29768831577292e-05, |
|
"loss": 0.4522, |
|
"step": 1134000 |
|
}, |
|
{ |
|
"epoch": 26.75, |
|
"learning_rate": 3.2505110866488436e-05, |
|
"loss": 0.4544, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 26.8, |
|
"learning_rate": 3.203333857524768e-05, |
|
"loss": 0.458, |
|
"step": 1138000 |
|
}, |
|
{ |
|
"epoch": 26.85, |
|
"learning_rate": 3.1561566284006915e-05, |
|
"loss": 0.4537, |
|
"step": 1140000 |
|
}, |
|
{ |
|
"epoch": 26.9, |
|
"learning_rate": 3.1089793992766154e-05, |
|
"loss": 0.4581, |
|
"step": 1142000 |
|
}, |
|
{ |
|
"epoch": 26.94, |
|
"learning_rate": 3.0618021701525394e-05, |
|
"loss": 0.4525, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"learning_rate": 3.014624941028463e-05, |
|
"loss": 0.4562, |
|
"step": 1146000 |
|
}, |
|
{ |
|
"epoch": 27.04, |
|
"learning_rate": 2.9674477119043873e-05, |
|
"loss": 0.4516, |
|
"step": 1148000 |
|
}, |
|
{ |
|
"epoch": 27.08, |
|
"learning_rate": 2.920270482780311e-05, |
|
"loss": 0.4472, |
|
"step": 1150000 |
|
}, |
|
{ |
|
"epoch": 27.13, |
|
"learning_rate": 2.8730932536562352e-05, |
|
"loss": 0.4507, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 27.18, |
|
"learning_rate": 2.8259160245321588e-05, |
|
"loss": 0.4421, |
|
"step": 1154000 |
|
}, |
|
{ |
|
"epoch": 27.23, |
|
"learning_rate": 2.778738795408083e-05, |
|
"loss": 0.4487, |
|
"step": 1156000 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"learning_rate": 2.7315615662840067e-05, |
|
"loss": 0.4528, |
|
"step": 1158000 |
|
}, |
|
{ |
|
"epoch": 27.32, |
|
"learning_rate": 2.6843843371599303e-05, |
|
"loss": 0.4483, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 27.37, |
|
"learning_rate": 2.6372071080358546e-05, |
|
"loss": 0.45, |
|
"step": 1162000 |
|
}, |
|
{ |
|
"epoch": 27.41, |
|
"learning_rate": 2.5900298789117782e-05, |
|
"loss": 0.4517, |
|
"step": 1164000 |
|
}, |
|
{ |
|
"epoch": 27.46, |
|
"learning_rate": 2.5428526497877022e-05, |
|
"loss": 0.4444, |
|
"step": 1166000 |
|
}, |
|
{ |
|
"epoch": 27.51, |
|
"learning_rate": 2.495675420663626e-05, |
|
"loss": 0.4457, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 27.56, |
|
"learning_rate": 2.44849819153955e-05, |
|
"loss": 0.4471, |
|
"step": 1170000 |
|
}, |
|
{ |
|
"epoch": 27.6, |
|
"learning_rate": 2.401320962415474e-05, |
|
"loss": 0.4455, |
|
"step": 1172000 |
|
}, |
|
{ |
|
"epoch": 27.65, |
|
"learning_rate": 2.3541437332913976e-05, |
|
"loss": 0.4466, |
|
"step": 1174000 |
|
}, |
|
{ |
|
"epoch": 27.7, |
|
"learning_rate": 2.3069665041673216e-05, |
|
"loss": 0.4421, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 27.74, |
|
"learning_rate": 2.2597892750432455e-05, |
|
"loss": 0.446, |
|
"step": 1178000 |
|
}, |
|
{ |
|
"epoch": 27.79, |
|
"learning_rate": 2.2126120459191695e-05, |
|
"loss": 0.4441, |
|
"step": 1180000 |
|
}, |
|
{ |
|
"epoch": 27.84, |
|
"learning_rate": 2.1654348167950935e-05, |
|
"loss": 0.4452, |
|
"step": 1182000 |
|
}, |
|
{ |
|
"epoch": 27.89, |
|
"learning_rate": 2.1182575876710174e-05, |
|
"loss": 0.4418, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 27.93, |
|
"learning_rate": 2.071080358546941e-05, |
|
"loss": 0.4396, |
|
"step": 1186000 |
|
}, |
|
{ |
|
"epoch": 27.98, |
|
"learning_rate": 2.0239031294228653e-05, |
|
"loss": 0.4477, |
|
"step": 1188000 |
|
}, |
|
{ |
|
"epoch": 28.03, |
|
"learning_rate": 1.976725900298789e-05, |
|
"loss": 0.4435, |
|
"step": 1190000 |
|
}, |
|
{ |
|
"epoch": 28.07, |
|
"learning_rate": 1.9295486711747125e-05, |
|
"loss": 0.4379, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 28.12, |
|
"learning_rate": 1.8823714420506368e-05, |
|
"loss": 0.4442, |
|
"step": 1194000 |
|
}, |
|
{ |
|
"epoch": 28.17, |
|
"learning_rate": 1.8351942129265604e-05, |
|
"loss": 0.4329, |
|
"step": 1196000 |
|
}, |
|
{ |
|
"epoch": 28.21, |
|
"learning_rate": 1.7880169838024844e-05, |
|
"loss": 0.4368, |
|
"step": 1198000 |
|
}, |
|
{ |
|
"epoch": 28.26, |
|
"learning_rate": 1.7408397546784083e-05, |
|
"loss": 0.4427, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 28.31, |
|
"learning_rate": 1.6936625255543323e-05, |
|
"loss": 0.4426, |
|
"step": 1202000 |
|
}, |
|
{ |
|
"epoch": 28.36, |
|
"learning_rate": 1.6464852964302562e-05, |
|
"loss": 0.4374, |
|
"step": 1204000 |
|
}, |
|
{ |
|
"epoch": 28.4, |
|
"learning_rate": 1.5993080673061802e-05, |
|
"loss": 0.4372, |
|
"step": 1206000 |
|
}, |
|
{ |
|
"epoch": 28.45, |
|
"learning_rate": 1.552130838182104e-05, |
|
"loss": 0.4365, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 28.5, |
|
"learning_rate": 1.504953609058028e-05, |
|
"loss": 0.4329, |
|
"step": 1210000 |
|
}, |
|
{ |
|
"epoch": 28.54, |
|
"learning_rate": 1.4577763799339517e-05, |
|
"loss": 0.4397, |
|
"step": 1212000 |
|
}, |
|
{ |
|
"epoch": 28.59, |
|
"learning_rate": 1.4105991508098757e-05, |
|
"loss": 0.4355, |
|
"step": 1214000 |
|
}, |
|
{ |
|
"epoch": 28.64, |
|
"learning_rate": 1.3634219216857994e-05, |
|
"loss": 0.4386, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 28.69, |
|
"learning_rate": 1.3162446925617234e-05, |
|
"loss": 0.4369, |
|
"step": 1218000 |
|
}, |
|
{ |
|
"epoch": 28.73, |
|
"learning_rate": 1.2690674634376474e-05, |
|
"loss": 0.441, |
|
"step": 1220000 |
|
}, |
|
{ |
|
"epoch": 28.78, |
|
"learning_rate": 1.2218902343135713e-05, |
|
"loss": 0.4383, |
|
"step": 1222000 |
|
}, |
|
{ |
|
"epoch": 28.83, |
|
"learning_rate": 1.1747130051894953e-05, |
|
"loss": 0.4338, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 28.87, |
|
"learning_rate": 1.1275357760654189e-05, |
|
"loss": 0.4373, |
|
"step": 1226000 |
|
}, |
|
{ |
|
"epoch": 28.92, |
|
"learning_rate": 1.0803585469413428e-05, |
|
"loss": 0.4392, |
|
"step": 1228000 |
|
}, |
|
{ |
|
"epoch": 28.97, |
|
"learning_rate": 1.0331813178172668e-05, |
|
"loss": 0.4401, |
|
"step": 1230000 |
|
}, |
|
{ |
|
"epoch": 29.02, |
|
"learning_rate": 9.860040886931907e-06, |
|
"loss": 0.4394, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 29.06, |
|
"learning_rate": 9.388268595691145e-06, |
|
"loss": 0.4316, |
|
"step": 1234000 |
|
}, |
|
{ |
|
"epoch": 29.11, |
|
"learning_rate": 8.916496304450385e-06, |
|
"loss": 0.4372, |
|
"step": 1236000 |
|
}, |
|
{ |
|
"epoch": 29.16, |
|
"learning_rate": 8.444724013209622e-06, |
|
"loss": 0.4363, |
|
"step": 1238000 |
|
}, |
|
{ |
|
"epoch": 29.2, |
|
"learning_rate": 7.972951721968862e-06, |
|
"loss": 0.4348, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"learning_rate": 7.5011794307281015e-06, |
|
"loss": 0.4342, |
|
"step": 1242000 |
|
}, |
|
{ |
|
"epoch": 29.3, |
|
"learning_rate": 7.02940713948734e-06, |
|
"loss": 0.4295, |
|
"step": 1244000 |
|
}, |
|
{ |
|
"epoch": 29.35, |
|
"learning_rate": 6.557634848246579e-06, |
|
"loss": 0.4358, |
|
"step": 1246000 |
|
}, |
|
{ |
|
"epoch": 29.39, |
|
"learning_rate": 6.085862557005818e-06, |
|
"loss": 0.4317, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 29.44, |
|
"learning_rate": 5.614090265765056e-06, |
|
"loss": 0.431, |
|
"step": 1250000 |
|
}, |
|
{ |
|
"epoch": 29.49, |
|
"learning_rate": 5.142317974524296e-06, |
|
"loss": 0.4355, |
|
"step": 1252000 |
|
}, |
|
{ |
|
"epoch": 29.53, |
|
"learning_rate": 4.670545683283535e-06, |
|
"loss": 0.4301, |
|
"step": 1254000 |
|
}, |
|
{ |
|
"epoch": 29.58, |
|
"learning_rate": 4.198773392042774e-06, |
|
"loss": 0.4272, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 29.63, |
|
"learning_rate": 3.7270011008020125e-06, |
|
"loss": 0.4325, |
|
"step": 1258000 |
|
}, |
|
{ |
|
"epoch": 29.67, |
|
"learning_rate": 3.255228809561251e-06, |
|
"loss": 0.4288, |
|
"step": 1260000 |
|
}, |
|
{ |
|
"epoch": 29.72, |
|
"learning_rate": 2.7834565183204907e-06, |
|
"loss": 0.4246, |
|
"step": 1262000 |
|
}, |
|
{ |
|
"epoch": 29.77, |
|
"learning_rate": 2.3116842270797294e-06, |
|
"loss": 0.437, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 29.82, |
|
"learning_rate": 1.8399119358389683e-06, |
|
"loss": 0.4337, |
|
"step": 1266000 |
|
}, |
|
{ |
|
"epoch": 29.86, |
|
"learning_rate": 1.3681396445982072e-06, |
|
"loss": 0.4281, |
|
"step": 1268000 |
|
}, |
|
{ |
|
"epoch": 29.91, |
|
"learning_rate": 8.963673533574461e-07, |
|
"loss": 0.4334, |
|
"step": 1270000 |
|
}, |
|
{ |
|
"epoch": 29.96, |
|
"learning_rate": 4.2459506211668494e-07, |
|
"loss": 0.4294, |
|
"step": 1272000 |
|
} |
|
], |
|
"logging_steps": 2000, |
|
"max_steps": 1273800, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 2.5779090018764566e+21, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|