|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 4325, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00023121387283236994, |
|
"grad_norm": 0.6965160472814916, |
|
"learning_rate": 4.6189376443418015e-07, |
|
"loss": 1.086, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0011560693641618498, |
|
"grad_norm": 0.6835787858040867, |
|
"learning_rate": 2.309468822170901e-06, |
|
"loss": 1.128, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0023121387283236996, |
|
"grad_norm": 0.6184758767742636, |
|
"learning_rate": 4.618937644341802e-06, |
|
"loss": 1.1188, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.003468208092485549, |
|
"grad_norm": 0.6078620387437725, |
|
"learning_rate": 6.928406466512702e-06, |
|
"loss": 1.1243, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.004624277456647399, |
|
"grad_norm": 0.5222130979899404, |
|
"learning_rate": 9.237875288683604e-06, |
|
"loss": 1.1013, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.005780346820809248, |
|
"grad_norm": 0.4982345960164489, |
|
"learning_rate": 1.1547344110854504e-05, |
|
"loss": 1.0928, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.006936416184971098, |
|
"grad_norm": 0.40171891380185876, |
|
"learning_rate": 1.3856812933025404e-05, |
|
"loss": 1.0736, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.008092485549132947, |
|
"grad_norm": 0.41106693801534305, |
|
"learning_rate": 1.6166281755196306e-05, |
|
"loss": 1.0672, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.009248554913294798, |
|
"grad_norm": 0.4109497681037648, |
|
"learning_rate": 1.8475750577367208e-05, |
|
"loss": 1.0258, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.010404624277456647, |
|
"grad_norm": 0.379459396453402, |
|
"learning_rate": 2.0785219399538107e-05, |
|
"loss": 1.0102, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.011560693641618497, |
|
"grad_norm": 0.3721675211937845, |
|
"learning_rate": 2.309468822170901e-05, |
|
"loss": 1.0234, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.012716763005780347, |
|
"grad_norm": 0.373222147309766, |
|
"learning_rate": 2.540415704387991e-05, |
|
"loss": 0.992, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.013872832369942197, |
|
"grad_norm": 0.3702598509393352, |
|
"learning_rate": 2.771362586605081e-05, |
|
"loss": 1.0326, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.015028901734104046, |
|
"grad_norm": 0.35490433661120324, |
|
"learning_rate": 3.0023094688221707e-05, |
|
"loss": 1.0246, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.016184971098265895, |
|
"grad_norm": 0.34072939280119285, |
|
"learning_rate": 3.233256351039261e-05, |
|
"loss": 1.0115, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.017341040462427744, |
|
"grad_norm": 0.3831809542290894, |
|
"learning_rate": 3.464203233256351e-05, |
|
"loss": 1.0331, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.018497109826589597, |
|
"grad_norm": 0.3623596879709559, |
|
"learning_rate": 3.6951501154734416e-05, |
|
"loss": 1.0022, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.019653179190751446, |
|
"grad_norm": 0.3931577119185935, |
|
"learning_rate": 3.9260969976905315e-05, |
|
"loss": 0.9997, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.020809248554913295, |
|
"grad_norm": 0.4109841798218973, |
|
"learning_rate": 4.1570438799076213e-05, |
|
"loss": 0.9914, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.021965317919075144, |
|
"grad_norm": 0.40213543229926174, |
|
"learning_rate": 4.387990762124711e-05, |
|
"loss": 0.9643, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.023121387283236993, |
|
"grad_norm": 0.3657003831875091, |
|
"learning_rate": 4.618937644341802e-05, |
|
"loss": 0.9957, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.024277456647398842, |
|
"grad_norm": 0.38609844859658143, |
|
"learning_rate": 4.8498845265588916e-05, |
|
"loss": 1.0111, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.025433526011560695, |
|
"grad_norm": 0.3583592530014769, |
|
"learning_rate": 5.080831408775982e-05, |
|
"loss": 0.9963, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.026589595375722544, |
|
"grad_norm": 0.3554977804638662, |
|
"learning_rate": 5.311778290993071e-05, |
|
"loss": 0.9817, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.027745664739884393, |
|
"grad_norm": 0.3840657570276599, |
|
"learning_rate": 5.542725173210162e-05, |
|
"loss": 1.0232, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.028901734104046242, |
|
"grad_norm": 0.3774759292695808, |
|
"learning_rate": 5.7736720554272516e-05, |
|
"loss": 1.0139, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03005780346820809, |
|
"grad_norm": 0.360754142577289, |
|
"learning_rate": 6.0046189376443415e-05, |
|
"loss": 0.9982, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.03121387283236994, |
|
"grad_norm": 0.3811963374124325, |
|
"learning_rate": 6.235565819861431e-05, |
|
"loss": 1.0088, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.03236994219653179, |
|
"grad_norm": 0.3667939507833288, |
|
"learning_rate": 6.466512702078523e-05, |
|
"loss": 0.9887, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03352601156069364, |
|
"grad_norm": 0.3988058302280759, |
|
"learning_rate": 6.697459584295612e-05, |
|
"loss": 0.9513, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.03468208092485549, |
|
"grad_norm": 0.3603023427371918, |
|
"learning_rate": 6.928406466512702e-05, |
|
"loss": 1.0239, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.035838150289017344, |
|
"grad_norm": 0.321539455275297, |
|
"learning_rate": 7.159353348729792e-05, |
|
"loss": 0.9821, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.03699421965317919, |
|
"grad_norm": 0.3548185437448054, |
|
"learning_rate": 7.390300230946883e-05, |
|
"loss": 1.0028, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03815028901734104, |
|
"grad_norm": 0.33174112797447736, |
|
"learning_rate": 7.621247113163973e-05, |
|
"loss": 0.9852, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.03930635838150289, |
|
"grad_norm": 0.3428952862407286, |
|
"learning_rate": 7.852193995381063e-05, |
|
"loss": 1.0035, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04046242774566474, |
|
"grad_norm": 0.3164096194909282, |
|
"learning_rate": 8.083140877598153e-05, |
|
"loss": 0.9594, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.04161849710982659, |
|
"grad_norm": 0.3210772711947753, |
|
"learning_rate": 8.314087759815243e-05, |
|
"loss": 0.9751, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04277456647398844, |
|
"grad_norm": 0.31913209612903376, |
|
"learning_rate": 8.545034642032334e-05, |
|
"loss": 0.9816, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.04393063583815029, |
|
"grad_norm": 0.32476583892299626, |
|
"learning_rate": 8.775981524249422e-05, |
|
"loss": 1.0382, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04508670520231214, |
|
"grad_norm": 0.33804033601960015, |
|
"learning_rate": 9.006928406466512e-05, |
|
"loss": 1.0278, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.046242774566473986, |
|
"grad_norm": 0.3034185595284857, |
|
"learning_rate": 9.237875288683603e-05, |
|
"loss": 0.9209, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.047398843930635835, |
|
"grad_norm": 0.30966667195251285, |
|
"learning_rate": 9.468822170900693e-05, |
|
"loss": 0.9929, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.048554913294797684, |
|
"grad_norm": 0.3110271670167101, |
|
"learning_rate": 9.699769053117783e-05, |
|
"loss": 0.999, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.04971098265895954, |
|
"grad_norm": 0.3168339881753396, |
|
"learning_rate": 9.930715935334873e-05, |
|
"loss": 0.9801, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.05086705202312139, |
|
"grad_norm": 0.2980534767723837, |
|
"learning_rate": 0.00010161662817551964, |
|
"loss": 0.964, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.05202312138728324, |
|
"grad_norm": 0.29439975733255125, |
|
"learning_rate": 0.00010392609699769054, |
|
"loss": 1.0141, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.05317919075144509, |
|
"grad_norm": 0.2939300503984728, |
|
"learning_rate": 0.00010623556581986143, |
|
"loss": 1.029, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.05433526011560694, |
|
"grad_norm": 0.2921065506824694, |
|
"learning_rate": 0.00010854503464203234, |
|
"loss": 1.0008, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.055491329479768786, |
|
"grad_norm": 0.29281624873588324, |
|
"learning_rate": 0.00011085450346420324, |
|
"loss": 1.0286, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.056647398843930635, |
|
"grad_norm": 0.30008365055838865, |
|
"learning_rate": 0.00011316397228637415, |
|
"loss": 1.0009, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.057803468208092484, |
|
"grad_norm": 0.2821304520615669, |
|
"learning_rate": 0.00011547344110854503, |
|
"loss": 1.0172, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.058959537572254334, |
|
"grad_norm": 0.28925572801814625, |
|
"learning_rate": 0.00011778290993071594, |
|
"loss": 1.0106, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.06011560693641618, |
|
"grad_norm": 0.2979979142081887, |
|
"learning_rate": 0.00012009237875288683, |
|
"loss": 0.9851, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.06127167630057803, |
|
"grad_norm": 0.27608874909832487, |
|
"learning_rate": 0.00012240184757505776, |
|
"loss": 0.9386, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.06242774566473988, |
|
"grad_norm": 0.270653606190121, |
|
"learning_rate": 0.00012471131639722863, |
|
"loss": 0.9823, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.06358381502890173, |
|
"grad_norm": 0.2889956899438749, |
|
"learning_rate": 0.00012702078521939955, |
|
"loss": 1.0096, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.06473988439306358, |
|
"grad_norm": 0.30488051626437424, |
|
"learning_rate": 0.00012933025404157045, |
|
"loss": 0.9819, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06589595375722543, |
|
"grad_norm": 0.2884447508894878, |
|
"learning_rate": 0.00013163972286374135, |
|
"loss": 0.9468, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.06705202312138728, |
|
"grad_norm": 0.2755861252646099, |
|
"learning_rate": 0.00013394919168591225, |
|
"loss": 0.9828, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.06820809248554913, |
|
"grad_norm": 0.3025742735794824, |
|
"learning_rate": 0.00013625866050808315, |
|
"loss": 1.0178, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.06936416184971098, |
|
"grad_norm": 0.28423295151616335, |
|
"learning_rate": 0.00013856812933025404, |
|
"loss": 0.9945, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07052023121387284, |
|
"grad_norm": 0.27585086154702476, |
|
"learning_rate": 0.00014087759815242494, |
|
"loss": 0.9695, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.07167630057803469, |
|
"grad_norm": 0.3038442212974576, |
|
"learning_rate": 0.00014318706697459584, |
|
"loss": 0.9808, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.07283236994219654, |
|
"grad_norm": 0.27277935404650916, |
|
"learning_rate": 0.00014549653579676674, |
|
"loss": 0.9846, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.07398843930635839, |
|
"grad_norm": 0.29591421870168255, |
|
"learning_rate": 0.00014780600461893767, |
|
"loss": 0.9954, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.07514450867052024, |
|
"grad_norm": 0.2823015918956852, |
|
"learning_rate": 0.00015011547344110854, |
|
"loss": 0.9674, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.07630057803468208, |
|
"grad_norm": 0.27344790533462154, |
|
"learning_rate": 0.00015242494226327946, |
|
"loss": 1.0236, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.07745664739884393, |
|
"grad_norm": 0.2949347092864385, |
|
"learning_rate": 0.00015473441108545036, |
|
"loss": 0.9813, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.07861271676300578, |
|
"grad_norm": 0.272235850063355, |
|
"learning_rate": 0.00015704387990762126, |
|
"loss": 0.987, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.07976878612716763, |
|
"grad_norm": 0.28565982534338485, |
|
"learning_rate": 0.00015935334872979216, |
|
"loss": 0.9716, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.08092485549132948, |
|
"grad_norm": 0.28274111136822716, |
|
"learning_rate": 0.00016166281755196306, |
|
"loss": 0.9779, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08208092485549133, |
|
"grad_norm": 0.2878620812403313, |
|
"learning_rate": 0.00016397228637413396, |
|
"loss": 1.0209, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.08323699421965318, |
|
"grad_norm": 0.2865629307216328, |
|
"learning_rate": 0.00016628175519630485, |
|
"loss": 0.9921, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.08439306358381503, |
|
"grad_norm": 0.2826816337246457, |
|
"learning_rate": 0.00016859122401847575, |
|
"loss": 0.9348, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.08554913294797688, |
|
"grad_norm": 0.2881907360895622, |
|
"learning_rate": 0.00017090069284064668, |
|
"loss": 1.0231, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.08670520231213873, |
|
"grad_norm": 0.27638308021239116, |
|
"learning_rate": 0.00017321016166281755, |
|
"loss": 1.0079, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.08786127167630058, |
|
"grad_norm": 0.28798387568151884, |
|
"learning_rate": 0.00017551963048498845, |
|
"loss": 0.9165, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.08901734104046242, |
|
"grad_norm": 0.2767437621962458, |
|
"learning_rate": 0.00017782909930715937, |
|
"loss": 0.9489, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.09017341040462427, |
|
"grad_norm": 0.2877748564441354, |
|
"learning_rate": 0.00018013856812933024, |
|
"loss": 1.0022, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.09132947976878612, |
|
"grad_norm": 0.28925322685576144, |
|
"learning_rate": 0.00018244803695150117, |
|
"loss": 1.004, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.09248554913294797, |
|
"grad_norm": 0.31070512225405156, |
|
"learning_rate": 0.00018475750577367207, |
|
"loss": 0.9753, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09364161849710982, |
|
"grad_norm": 0.31735644814371816, |
|
"learning_rate": 0.00018706697459584297, |
|
"loss": 0.9781, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.09479768786127167, |
|
"grad_norm": 0.2974104968051762, |
|
"learning_rate": 0.00018937644341801387, |
|
"loss": 0.9659, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.09595375722543352, |
|
"grad_norm": 0.2701025540904289, |
|
"learning_rate": 0.00019168591224018476, |
|
"loss": 0.9294, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.09710982658959537, |
|
"grad_norm": 0.27428411358071536, |
|
"learning_rate": 0.00019399538106235566, |
|
"loss": 0.9513, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.09826589595375723, |
|
"grad_norm": 0.2745240121214777, |
|
"learning_rate": 0.0001963048498845266, |
|
"loss": 1.0152, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.09942196531791908, |
|
"grad_norm": 0.274405426645142, |
|
"learning_rate": 0.00019861431870669746, |
|
"loss": 0.9863, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.10057803468208093, |
|
"grad_norm": 0.9048729498402529, |
|
"learning_rate": 0.00019999986968812804, |
|
"loss": 0.9992, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.10173410404624278, |
|
"grad_norm": 0.29062903824052616, |
|
"learning_rate": 0.00019999840368346898, |
|
"loss": 1.0509, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.10289017341040463, |
|
"grad_norm": 0.28317998532870003, |
|
"learning_rate": 0.0001999953088082702, |
|
"loss": 0.9774, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.10404624277456648, |
|
"grad_norm": 0.3000365098515979, |
|
"learning_rate": 0.000199990585112944, |
|
"loss": 0.9822, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.10520231213872833, |
|
"grad_norm": 0.28508661433222776, |
|
"learning_rate": 0.00019998423267443454, |
|
"loss": 0.9991, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.10635838150289018, |
|
"grad_norm": 0.300390972643917, |
|
"learning_rate": 0.00019997625159621642, |
|
"loss": 0.9411, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.10751445086705202, |
|
"grad_norm": 0.3065580188577536, |
|
"learning_rate": 0.0001999666420082932, |
|
"loss": 1.0153, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.10867052023121387, |
|
"grad_norm": 0.30176770344761106, |
|
"learning_rate": 0.00019995540406719507, |
|
"loss": 0.9451, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.10982658959537572, |
|
"grad_norm": 0.28363462288539226, |
|
"learning_rate": 0.0001999425379559765, |
|
"loss": 1.0229, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.11098265895953757, |
|
"grad_norm": 0.2979648349669768, |
|
"learning_rate": 0.00019992804388421312, |
|
"loss": 0.9615, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.11213872832369942, |
|
"grad_norm": 0.3533699255705701, |
|
"learning_rate": 0.00019991192208799837, |
|
"loss": 0.9945, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.11329479768786127, |
|
"grad_norm": 0.29374690183301444, |
|
"learning_rate": 0.0001998941728299396, |
|
"loss": 0.9481, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.11445086705202312, |
|
"grad_norm": 0.27638233259638606, |
|
"learning_rate": 0.0001998747963991539, |
|
"loss": 0.976, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.11560693641618497, |
|
"grad_norm": 0.29591583372167063, |
|
"learning_rate": 0.00019985379311126327, |
|
"loss": 0.9776, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11676300578034682, |
|
"grad_norm": 0.28596466519406494, |
|
"learning_rate": 0.00019983116330838955, |
|
"loss": 1.0003, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.11791907514450867, |
|
"grad_norm": 0.27570121782043344, |
|
"learning_rate": 0.00019980690735914877, |
|
"loss": 0.9797, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.11907514450867052, |
|
"grad_norm": 0.30038501431153675, |
|
"learning_rate": 0.0001997810256586453, |
|
"loss": 0.9865, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.12023121387283237, |
|
"grad_norm": 0.31132305182282943, |
|
"learning_rate": 0.00019975351862846523, |
|
"loss": 1.0071, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.12138728323699421, |
|
"grad_norm": 0.28621069496048757, |
|
"learning_rate": 0.00019972438671666967, |
|
"loss": 0.9877, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.12254335260115606, |
|
"grad_norm": 0.3030051142967184, |
|
"learning_rate": 0.00019969363039778728, |
|
"loss": 0.9894, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.12369942196531791, |
|
"grad_norm": 0.2866405334142299, |
|
"learning_rate": 0.0001996612501728067, |
|
"loss": 0.9893, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.12485549132947976, |
|
"grad_norm": 0.2867249309375898, |
|
"learning_rate": 0.00019962724656916826, |
|
"loss": 0.9765, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1260115606936416, |
|
"grad_norm": 0.31798324367678865, |
|
"learning_rate": 0.00019959162014075553, |
|
"loss": 0.9465, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.12716763005780346, |
|
"grad_norm": 0.30253196958089823, |
|
"learning_rate": 0.0001995543714678861, |
|
"loss": 0.9635, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.1283236994219653, |
|
"grad_norm": 0.28954658171708875, |
|
"learning_rate": 0.00019951550115730244, |
|
"loss": 1.0041, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.12947976878612716, |
|
"grad_norm": 0.28359238422516453, |
|
"learning_rate": 0.00019947500984216157, |
|
"loss": 0.9837, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.130635838150289, |
|
"grad_norm": 0.29624734780777734, |
|
"learning_rate": 0.00019943289818202519, |
|
"loss": 0.9375, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.13179190751445086, |
|
"grad_norm": 0.291198302886137, |
|
"learning_rate": 0.0001993891668628486, |
|
"loss": 0.9665, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1329479768786127, |
|
"grad_norm": 0.31056892991094237, |
|
"learning_rate": 0.00019934381659696989, |
|
"loss": 0.9414, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.13410404624277455, |
|
"grad_norm": 0.3136680287460065, |
|
"learning_rate": 0.0001992968481230978, |
|
"loss": 1.0442, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.1352601156069364, |
|
"grad_norm": 0.30128304774644027, |
|
"learning_rate": 0.0001992482622063003, |
|
"loss": 0.9916, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.13641618497109825, |
|
"grad_norm": 0.29097714851626455, |
|
"learning_rate": 0.00019919805963799166, |
|
"loss": 0.9947, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.1375722543352601, |
|
"grad_norm": 0.280417582339227, |
|
"learning_rate": 0.0001991462412359198, |
|
"loss": 0.9825, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.13872832369942195, |
|
"grad_norm": 0.28842456996684646, |
|
"learning_rate": 0.00019909280784415287, |
|
"loss": 1.0237, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13988439306358383, |
|
"grad_norm": 0.2874190385868597, |
|
"learning_rate": 0.00019903776033306555, |
|
"loss": 0.9611, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.14104046242774568, |
|
"grad_norm": 0.2854599682323198, |
|
"learning_rate": 0.00019898109959932478, |
|
"loss": 0.9879, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.14219653179190753, |
|
"grad_norm": 0.2986976522432421, |
|
"learning_rate": 0.0001989228265658754, |
|
"loss": 0.9911, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.14335260115606938, |
|
"grad_norm": 0.329057703574734, |
|
"learning_rate": 0.00019886294218192477, |
|
"loss": 0.9714, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.14450867052023122, |
|
"grad_norm": 0.29538740567126964, |
|
"learning_rate": 0.00019880144742292753, |
|
"loss": 0.955, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.14566473988439307, |
|
"grad_norm": 0.32615973261215037, |
|
"learning_rate": 0.00019873834329056975, |
|
"loss": 0.9789, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.14682080924855492, |
|
"grad_norm": 0.26819671057976713, |
|
"learning_rate": 0.00019867363081275242, |
|
"loss": 0.9471, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.14797687861271677, |
|
"grad_norm": 0.28488339516835476, |
|
"learning_rate": 0.00019860731104357485, |
|
"loss": 0.9779, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.14913294797687862, |
|
"grad_norm": 0.27998012828783303, |
|
"learning_rate": 0.00019853938506331749, |
|
"loss": 0.951, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.15028901734104047, |
|
"grad_norm": 0.29388395642325527, |
|
"learning_rate": 0.00019846985397842427, |
|
"loss": 0.9554, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15144508670520232, |
|
"grad_norm": 0.2896808457234832, |
|
"learning_rate": 0.0001983987189214846, |
|
"loss": 0.9651, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.15260115606936417, |
|
"grad_norm": 0.30032384976542736, |
|
"learning_rate": 0.000198325981051215, |
|
"loss": 0.943, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.15375722543352602, |
|
"grad_norm": 0.2977312487124492, |
|
"learning_rate": 0.00019825164155244012, |
|
"loss": 0.9887, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.15491329479768787, |
|
"grad_norm": 0.341662364901083, |
|
"learning_rate": 0.00019817570163607347, |
|
"loss": 1.0059, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.15606936416184972, |
|
"grad_norm": 0.2996891760902693, |
|
"learning_rate": 0.00019809816253909773, |
|
"loss": 0.9673, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.15722543352601157, |
|
"grad_norm": 0.2882952073089595, |
|
"learning_rate": 0.00019801902552454454, |
|
"loss": 0.9558, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.15838150289017341, |
|
"grad_norm": 0.3203389676438109, |
|
"learning_rate": 0.00019793829188147406, |
|
"loss": 1.0122, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.15953757225433526, |
|
"grad_norm": 0.30270844487268483, |
|
"learning_rate": 0.00019785596292495376, |
|
"loss": 0.9822, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.1606936416184971, |
|
"grad_norm": 0.27999850496563145, |
|
"learning_rate": 0.00019777203999603717, |
|
"loss": 0.9841, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.16184971098265896, |
|
"grad_norm": 0.29383524324706056, |
|
"learning_rate": 0.000197686524461742, |
|
"loss": 1.0269, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1630057803468208, |
|
"grad_norm": 0.28066838492867907, |
|
"learning_rate": 0.0001975994177150278, |
|
"loss": 0.9927, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.16416184971098266, |
|
"grad_norm": 0.31122106212206363, |
|
"learning_rate": 0.0001975107211747734, |
|
"loss": 0.9632, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1653179190751445, |
|
"grad_norm": 0.30120413493928255, |
|
"learning_rate": 0.00019742043628575364, |
|
"loss": 0.9739, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.16647398843930636, |
|
"grad_norm": 0.30660646504263267, |
|
"learning_rate": 0.00019732856451861594, |
|
"loss": 0.976, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.1676300578034682, |
|
"grad_norm": 0.2856850255835453, |
|
"learning_rate": 0.0001972351073698564, |
|
"loss": 0.9848, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.16878612716763006, |
|
"grad_norm": 0.3147493299962656, |
|
"learning_rate": 0.0001971400663617952, |
|
"loss": 0.9921, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.1699421965317919, |
|
"grad_norm": 0.29251745238857035, |
|
"learning_rate": 0.0001970434430425521, |
|
"loss": 0.9398, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.17109826589595376, |
|
"grad_norm": 0.2938440274420457, |
|
"learning_rate": 0.000196945238986021, |
|
"loss": 1.0153, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.1722543352601156, |
|
"grad_norm": 0.2898363733115959, |
|
"learning_rate": 0.00019684545579184433, |
|
"loss": 0.9576, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.17341040462427745, |
|
"grad_norm": 0.3088888037796155, |
|
"learning_rate": 0.00019674409508538718, |
|
"loss": 0.9696, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.1745664739884393, |
|
"grad_norm": 0.2942346284280998, |
|
"learning_rate": 0.0001966411585177105, |
|
"loss": 1.0203, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.17572254335260115, |
|
"grad_norm": 0.2852125727236794, |
|
"learning_rate": 0.00019653664776554455, |
|
"loss": 0.9556, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.176878612716763, |
|
"grad_norm": 0.3016262447783914, |
|
"learning_rate": 0.0001964305645312613, |
|
"loss": 0.9896, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.17803468208092485, |
|
"grad_norm": 0.30255168675186533, |
|
"learning_rate": 0.00019632291054284693, |
|
"loss": 0.9839, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1791907514450867, |
|
"grad_norm": 0.2806238558760804, |
|
"learning_rate": 0.0001962136875538735, |
|
"loss": 0.9748, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.18034682080924855, |
|
"grad_norm": 0.2945672906624874, |
|
"learning_rate": 0.00019610289734347053, |
|
"loss": 0.9479, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.1815028901734104, |
|
"grad_norm": 0.3012247410303452, |
|
"learning_rate": 0.00019599054171629595, |
|
"loss": 1.0132, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.18265895953757225, |
|
"grad_norm": 0.2944499117709193, |
|
"learning_rate": 0.0001958766225025066, |
|
"loss": 0.9336, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.1838150289017341, |
|
"grad_norm": 0.2989322109974369, |
|
"learning_rate": 0.0001957611415577287, |
|
"loss": 0.9857, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.18497109826589594, |
|
"grad_norm": 0.2859334477998094, |
|
"learning_rate": 0.0001956441007630273, |
|
"loss": 0.9831, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.1861271676300578, |
|
"grad_norm": 0.27928427673168016, |
|
"learning_rate": 0.0001955255020248759, |
|
"loss": 0.9582, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.18728323699421964, |
|
"grad_norm": 0.3044716011707441, |
|
"learning_rate": 0.00019540534727512522, |
|
"loss": 1.0061, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.1884393063583815, |
|
"grad_norm": 0.30123009109430604, |
|
"learning_rate": 0.00019528363847097185, |
|
"loss": 1.0015, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.18959537572254334, |
|
"grad_norm": 0.2983431588431708, |
|
"learning_rate": 0.00019516037759492627, |
|
"loss": 0.9917, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.1907514450867052, |
|
"grad_norm": 0.3045474200889231, |
|
"learning_rate": 0.00019503556665478067, |
|
"loss": 0.9924, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.19190751445086704, |
|
"grad_norm": 0.3023598788495744, |
|
"learning_rate": 0.00019490920768357607, |
|
"loss": 0.9824, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.1930635838150289, |
|
"grad_norm": 0.288004788186244, |
|
"learning_rate": 0.00019478130273956943, |
|
"loss": 0.9756, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.19421965317919074, |
|
"grad_norm": 0.28774556008482255, |
|
"learning_rate": 0.00019465185390619996, |
|
"loss": 0.9292, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.19537572254335261, |
|
"grad_norm": 0.29766392431820693, |
|
"learning_rate": 0.00019452086329205522, |
|
"loss": 1.008, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.19653179190751446, |
|
"grad_norm": 0.2968523784854959, |
|
"learning_rate": 0.00019438833303083678, |
|
"loss": 0.9469, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1976878612716763, |
|
"grad_norm": 0.32050710908212127, |
|
"learning_rate": 0.00019425426528132546, |
|
"loss": 0.9584, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.19884393063583816, |
|
"grad_norm": 0.30039833468190924, |
|
"learning_rate": 0.00019411866222734627, |
|
"loss": 0.9345, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.3003418210003542, |
|
"learning_rate": 0.00019398152607773264, |
|
"loss": 1.0149, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.20115606936416186, |
|
"grad_norm": 0.28740303119481114, |
|
"learning_rate": 0.00019384285906629055, |
|
"loss": 0.9453, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.2023121387283237, |
|
"grad_norm": 0.3022020472256075, |
|
"learning_rate": 0.00019370266345176214, |
|
"loss": 0.9964, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.20346820809248556, |
|
"grad_norm": 0.2887054624910223, |
|
"learning_rate": 0.00019356094151778895, |
|
"loss": 0.9857, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.2046242774566474, |
|
"grad_norm": 0.38190567119224494, |
|
"learning_rate": 0.00019341769557287467, |
|
"loss": 0.9554, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.20578034682080926, |
|
"grad_norm": 0.29810678010523667, |
|
"learning_rate": 0.00019327292795034753, |
|
"loss": 0.9138, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.2069364161849711, |
|
"grad_norm": 0.30694177853911697, |
|
"learning_rate": 0.00019312664100832233, |
|
"loss": 0.9504, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.20809248554913296, |
|
"grad_norm": 0.31819590085942273, |
|
"learning_rate": 0.00019297883712966204, |
|
"loss": 0.9969, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2092485549132948, |
|
"grad_norm": 0.2915158769948663, |
|
"learning_rate": 0.00019282951872193885, |
|
"loss": 1.0207, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.21040462427745665, |
|
"grad_norm": 0.3020525391391881, |
|
"learning_rate": 0.0001926786882173952, |
|
"loss": 0.9424, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.2115606936416185, |
|
"grad_norm": 0.3095938351879466, |
|
"learning_rate": 0.000192526348072904, |
|
"loss": 0.9762, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.21271676300578035, |
|
"grad_norm": 0.32067080864793646, |
|
"learning_rate": 0.0001923725007699285, |
|
"loss": 0.9423, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.2138728323699422, |
|
"grad_norm": 0.3144763229931657, |
|
"learning_rate": 0.00019221714881448217, |
|
"loss": 0.9824, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.21502890173410405, |
|
"grad_norm": 0.3179959968229521, |
|
"learning_rate": 0.0001920602947370876, |
|
"loss": 0.9829, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.2161849710982659, |
|
"grad_norm": 0.30113560950220297, |
|
"learning_rate": 0.00019190194109273544, |
|
"loss": 0.9463, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.21734104046242775, |
|
"grad_norm": 0.2896899865821755, |
|
"learning_rate": 0.00019174209046084276, |
|
"loss": 0.9786, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.2184971098265896, |
|
"grad_norm": 0.2969332481357098, |
|
"learning_rate": 0.00019158074544521094, |
|
"loss": 1.0183, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.21965317919075145, |
|
"grad_norm": 0.29984068010125464, |
|
"learning_rate": 0.0001914179086739834, |
|
"loss": 0.9688, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2208092485549133, |
|
"grad_norm": 0.3179298483108417, |
|
"learning_rate": 0.0001912535827996026, |
|
"loss": 1.0213, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.22196531791907514, |
|
"grad_norm": 0.30071667875773894, |
|
"learning_rate": 0.0001910877704987671, |
|
"loss": 0.9947, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.223121387283237, |
|
"grad_norm": 0.3162017726279279, |
|
"learning_rate": 0.00019092047447238773, |
|
"loss": 0.9765, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.22427745664739884, |
|
"grad_norm": 0.31999805840625895, |
|
"learning_rate": 0.0001907516974455436, |
|
"loss": 0.9956, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.2254335260115607, |
|
"grad_norm": 0.2931503664377131, |
|
"learning_rate": 0.00019058144216743797, |
|
"loss": 1.002, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.22658959537572254, |
|
"grad_norm": 0.30298932952701096, |
|
"learning_rate": 0.0001904097114113531, |
|
"loss": 1.0159, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2277456647398844, |
|
"grad_norm": 0.29804869616385626, |
|
"learning_rate": 0.0001902365079746054, |
|
"loss": 0.9771, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.22890173410404624, |
|
"grad_norm": 0.30761841093550274, |
|
"learning_rate": 0.00019006183467849957, |
|
"loss": 1.0157, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.2300578034682081, |
|
"grad_norm": 0.3080044180069082, |
|
"learning_rate": 0.000189885694368283, |
|
"loss": 0.9463, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.23121387283236994, |
|
"grad_norm": 0.29852967122969754, |
|
"learning_rate": 0.00018970808991309904, |
|
"loss": 1.0021, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2323699421965318, |
|
"grad_norm": 0.2987990831868652, |
|
"learning_rate": 0.00018952902420594058, |
|
"loss": 0.9492, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.23352601156069364, |
|
"grad_norm": 0.30446826555699585, |
|
"learning_rate": 0.0001893485001636026, |
|
"loss": 1.0019, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.23468208092485549, |
|
"grad_norm": 0.30998357590060016, |
|
"learning_rate": 0.00018916652072663515, |
|
"loss": 0.9478, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.23583815028901733, |
|
"grad_norm": 0.3109448921090665, |
|
"learning_rate": 0.0001889830888592949, |
|
"loss": 0.9797, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.23699421965317918, |
|
"grad_norm": 0.30658444282489306, |
|
"learning_rate": 0.00018879820754949718, |
|
"loss": 0.9976, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.23815028901734103, |
|
"grad_norm": 0.32337481964353393, |
|
"learning_rate": 0.0001886118798087673, |
|
"loss": 0.9622, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.23930635838150288, |
|
"grad_norm": 0.30934913326885294, |
|
"learning_rate": 0.00018842410867219136, |
|
"loss": 1.0095, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.24046242774566473, |
|
"grad_norm": 0.32554022935815935, |
|
"learning_rate": 0.0001882348971983669, |
|
"loss": 1.0082, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.24161849710982658, |
|
"grad_norm": 0.28387403686918444, |
|
"learning_rate": 0.0001880442484693531, |
|
"loss": 0.9433, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.24277456647398843, |
|
"grad_norm": 0.2889713456813008, |
|
"learning_rate": 0.0001878521655906205, |
|
"loss": 0.994, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.24393063583815028, |
|
"grad_norm": 0.3038933468668014, |
|
"learning_rate": 0.00018765865169100048, |
|
"loss": 0.966, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.24508670520231213, |
|
"grad_norm": 0.294597892145829, |
|
"learning_rate": 0.00018746370992263423, |
|
"loss": 0.9501, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.24624277456647398, |
|
"grad_norm": 0.30895343503048994, |
|
"learning_rate": 0.00018726734346092148, |
|
"loss": 0.9663, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.24739884393063583, |
|
"grad_norm": 0.32867090123543136, |
|
"learning_rate": 0.00018706955550446878, |
|
"loss": 1.0, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.24855491329479767, |
|
"grad_norm": 0.2860732876796574, |
|
"learning_rate": 0.00018687034927503728, |
|
"loss": 0.9282, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.24971098265895952, |
|
"grad_norm": 0.3622792683930793, |
|
"learning_rate": 0.00018666972801749035, |
|
"loss": 0.9534, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2508670520231214, |
|
"grad_norm": 0.3215073797028641, |
|
"learning_rate": 0.00018646769499974076, |
|
"loss": 1.0177, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.2520231213872832, |
|
"grad_norm": 0.30391881014243827, |
|
"learning_rate": 0.00018626425351269733, |
|
"loss": 1.0213, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.25317919075144507, |
|
"grad_norm": 0.2959182253635083, |
|
"learning_rate": 0.00018605940687021133, |
|
"loss": 0.9265, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.2543352601156069, |
|
"grad_norm": 0.3054358776400607, |
|
"learning_rate": 0.00018585315840902275, |
|
"loss": 0.9566, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.25549132947976877, |
|
"grad_norm": 0.3038159760611022, |
|
"learning_rate": 0.00018564551148870563, |
|
"loss": 0.9728, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.2566473988439306, |
|
"grad_norm": 0.295408221908172, |
|
"learning_rate": 0.0001854364694916134, |
|
"loss": 0.9769, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.25780346820809247, |
|
"grad_norm": 0.3040191717007843, |
|
"learning_rate": 0.00018522603582282396, |
|
"loss": 0.9745, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.2589595375722543, |
|
"grad_norm": 0.3276795968564327, |
|
"learning_rate": 0.0001850142139100841, |
|
"loss": 0.9843, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.26011560693641617, |
|
"grad_norm": 0.30521432519770036, |
|
"learning_rate": 0.0001848010072037536, |
|
"loss": 0.9958, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.261271676300578, |
|
"grad_norm": 0.3074723851239691, |
|
"learning_rate": 0.0001845864191767491, |
|
"loss": 1.0061, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.26242774566473986, |
|
"grad_norm": 0.38542353780888683, |
|
"learning_rate": 0.0001843704533244876, |
|
"loss": 0.9527, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.2635838150289017, |
|
"grad_norm": 0.31655887417225065, |
|
"learning_rate": 0.00018415311316482934, |
|
"loss": 0.9677, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.26473988439306356, |
|
"grad_norm": 0.296622948449929, |
|
"learning_rate": 0.00018393440223802077, |
|
"loss": 1.0277, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.2658959537572254, |
|
"grad_norm": 0.30339302168936083, |
|
"learning_rate": 0.0001837143241066365, |
|
"loss": 0.9996, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.26705202312138726, |
|
"grad_norm": 0.321536892508796, |
|
"learning_rate": 0.00018349288235552168, |
|
"loss": 0.9599, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.2682080924855491, |
|
"grad_norm": 0.3220388229874098, |
|
"learning_rate": 0.0001832700805917333, |
|
"loss": 0.9957, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.26936416184971096, |
|
"grad_norm": 0.2951235948754925, |
|
"learning_rate": 0.00018304592244448156, |
|
"loss": 1.0261, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.2705202312138728, |
|
"grad_norm": 0.3044130581874429, |
|
"learning_rate": 0.0001828204115650708, |
|
"loss": 0.9871, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.27167630057803466, |
|
"grad_norm": 0.3009718363326464, |
|
"learning_rate": 0.00018259355162684, |
|
"loss": 0.9963, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.2728323699421965, |
|
"grad_norm": 0.28905525049692726, |
|
"learning_rate": 0.00018236534632510277, |
|
"loss": 0.9848, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.27398843930635836, |
|
"grad_norm": 0.35621478263055945, |
|
"learning_rate": 0.00018213579937708735, |
|
"loss": 0.9529, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.2751445086705202, |
|
"grad_norm": 0.3185821009153887, |
|
"learning_rate": 0.00018190491452187613, |
|
"loss": 1.0045, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.27630057803468205, |
|
"grad_norm": 0.3174337839488537, |
|
"learning_rate": 0.00018167269552034446, |
|
"loss": 0.9549, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.2774566473988439, |
|
"grad_norm": 0.2929697771476232, |
|
"learning_rate": 0.00018143914615509967, |
|
"loss": 0.9463, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2786127167630058, |
|
"grad_norm": 0.3218921944038643, |
|
"learning_rate": 0.00018120427023041925, |
|
"loss": 1.0432, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.27976878612716766, |
|
"grad_norm": 0.29522034854514784, |
|
"learning_rate": 0.00018096807157218909, |
|
"loss": 0.9354, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2809248554913295, |
|
"grad_norm": 0.31103148068930697, |
|
"learning_rate": 0.0001807305540278409, |
|
"loss": 0.9275, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.28208092485549136, |
|
"grad_norm": 0.3268759634134707, |
|
"learning_rate": 0.00018049172146628975, |
|
"loss": 1.0122, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.2832369942196532, |
|
"grad_norm": 0.2992667041917287, |
|
"learning_rate": 0.00018025157777787102, |
|
"loss": 0.9422, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.28439306358381505, |
|
"grad_norm": 0.31617502406730474, |
|
"learning_rate": 0.00018001012687427688, |
|
"loss": 0.9909, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.2855491329479769, |
|
"grad_norm": 0.31251457847228237, |
|
"learning_rate": 0.0001797673726884928, |
|
"loss": 0.9885, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.28670520231213875, |
|
"grad_norm": 0.32021979333618866, |
|
"learning_rate": 0.00017952331917473336, |
|
"loss": 0.9396, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.2878612716763006, |
|
"grad_norm": 0.34841403249467495, |
|
"learning_rate": 0.00017927797030837768, |
|
"loss": 0.9188, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.28901734104046245, |
|
"grad_norm": 0.40965133837398776, |
|
"learning_rate": 0.0001790313300859051, |
|
"loss": 0.9582, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2901734104046243, |
|
"grad_norm": 0.3128265434514317, |
|
"learning_rate": 0.00017878340252482956, |
|
"loss": 0.9891, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.29132947976878615, |
|
"grad_norm": 0.3762731976913158, |
|
"learning_rate": 0.00017853419166363458, |
|
"loss": 0.973, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.292485549132948, |
|
"grad_norm": 0.3150565577316399, |
|
"learning_rate": 0.00017828370156170727, |
|
"loss": 0.9777, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.29364161849710985, |
|
"grad_norm": 0.3124944800404496, |
|
"learning_rate": 0.00017803193629927223, |
|
"loss": 0.984, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.2947976878612717, |
|
"grad_norm": 0.3456192268358289, |
|
"learning_rate": 0.0001777788999773251, |
|
"loss": 0.9881, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.29595375722543354, |
|
"grad_norm": 0.309939339205246, |
|
"learning_rate": 0.0001775245967175658, |
|
"loss": 0.9483, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.2971098265895954, |
|
"grad_norm": 0.29097211271370754, |
|
"learning_rate": 0.00017726903066233134, |
|
"loss": 0.9438, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.29826589595375724, |
|
"grad_norm": 0.29586840025056343, |
|
"learning_rate": 0.00017701220597452833, |
|
"loss": 0.9754, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.2994219653179191, |
|
"grad_norm": 0.3017342795593241, |
|
"learning_rate": 0.0001767541268375652, |
|
"loss": 0.9805, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.30057803468208094, |
|
"grad_norm": 0.32061970031794484, |
|
"learning_rate": 0.00017649479745528417, |
|
"loss": 0.9818, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.3017341040462428, |
|
"grad_norm": 0.3011478634925178, |
|
"learning_rate": 0.00017623422205189252, |
|
"loss": 0.9815, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.30289017341040464, |
|
"grad_norm": 0.3086640164396148, |
|
"learning_rate": 0.000175972404871894, |
|
"loss": 0.9756, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.3040462427745665, |
|
"grad_norm": 0.31272520758866784, |
|
"learning_rate": 0.0001757093501800196, |
|
"loss": 0.9993, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.30520231213872834, |
|
"grad_norm": 0.3574223169536535, |
|
"learning_rate": 0.0001754450622611581, |
|
"loss": 0.9468, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3063583815028902, |
|
"grad_norm": 0.3147796469526059, |
|
"learning_rate": 0.0001751795454202863, |
|
"loss": 0.9848, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.30751445086705204, |
|
"grad_norm": 0.3118480322453381, |
|
"learning_rate": 0.0001749128039823988, |
|
"loss": 1.0249, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.3086705202312139, |
|
"grad_norm": 0.3023964221368843, |
|
"learning_rate": 0.00017464484229243768, |
|
"loss": 0.9232, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.30982658959537573, |
|
"grad_norm": 0.3310385612654826, |
|
"learning_rate": 0.0001743756647152216, |
|
"loss": 0.9975, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.3109826589595376, |
|
"grad_norm": 0.3318179078521207, |
|
"learning_rate": 0.00017410527563537488, |
|
"loss": 0.9776, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.31213872832369943, |
|
"grad_norm": 0.31582505608794464, |
|
"learning_rate": 0.00017383367945725584, |
|
"loss": 0.9191, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.3132947976878613, |
|
"grad_norm": 0.30919652895574773, |
|
"learning_rate": 0.00017356088060488525, |
|
"loss": 0.9813, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.31445086705202313, |
|
"grad_norm": 0.30484218478884034, |
|
"learning_rate": 0.00017328688352187416, |
|
"loss": 0.9791, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.315606936416185, |
|
"grad_norm": 0.3016535900655947, |
|
"learning_rate": 0.00017301169267135163, |
|
"loss": 0.9918, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.31676300578034683, |
|
"grad_norm": 0.2828981498735541, |
|
"learning_rate": 0.00017273531253589187, |
|
"loss": 0.9266, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3179190751445087, |
|
"grad_norm": 0.3077602888091621, |
|
"learning_rate": 0.00017245774761744134, |
|
"loss": 0.9674, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.3190751445086705, |
|
"grad_norm": 0.3099506136784924, |
|
"learning_rate": 0.00017217900243724543, |
|
"loss": 0.9836, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3202312138728324, |
|
"grad_norm": 0.31771107668630955, |
|
"learning_rate": 0.00017189908153577473, |
|
"loss": 0.9387, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.3213872832369942, |
|
"grad_norm": 0.3121605825107148, |
|
"learning_rate": 0.0001716179894726511, |
|
"loss": 1.0108, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.3225433526011561, |
|
"grad_norm": 0.32262006217730343, |
|
"learning_rate": 0.0001713357308265735, |
|
"loss": 1.0374, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.3236994219653179, |
|
"grad_norm": 0.3209655049858561, |
|
"learning_rate": 0.0001710523101952432, |
|
"loss": 0.9936, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3248554913294798, |
|
"grad_norm": 0.32077514063057966, |
|
"learning_rate": 0.00017076773219528905, |
|
"loss": 0.9704, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.3260115606936416, |
|
"grad_norm": 0.30932663652684755, |
|
"learning_rate": 0.0001704820014621923, |
|
"loss": 1.003, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.32716763005780347, |
|
"grad_norm": 0.32273320904553404, |
|
"learning_rate": 0.00017019512265021097, |
|
"loss": 1.0388, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.3283236994219653, |
|
"grad_norm": 0.31422034130330717, |
|
"learning_rate": 0.00016990710043230406, |
|
"loss": 0.9556, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.32947976878612717, |
|
"grad_norm": 0.31168769800585655, |
|
"learning_rate": 0.00016961793950005558, |
|
"loss": 0.9746, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.330635838150289, |
|
"grad_norm": 0.3052533915264079, |
|
"learning_rate": 0.00016932764456359793, |
|
"loss": 0.9542, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.33179190751445087, |
|
"grad_norm": 0.2905803213236271, |
|
"learning_rate": 0.0001690362203515353, |
|
"loss": 0.97, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.3329479768786127, |
|
"grad_norm": 0.3350090558904066, |
|
"learning_rate": 0.00016874367161086662, |
|
"loss": 1.0443, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.33410404624277457, |
|
"grad_norm": 0.3060737171676248, |
|
"learning_rate": 0.00016845000310690815, |
|
"loss": 1.043, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.3352601156069364, |
|
"grad_norm": 0.3129317388012455, |
|
"learning_rate": 0.00016815521962321604, |
|
"loss": 0.9288, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.33641618497109826, |
|
"grad_norm": 0.3197265364198292, |
|
"learning_rate": 0.00016785932596150827, |
|
"loss": 0.984, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.3375722543352601, |
|
"grad_norm": 0.29604487460555096, |
|
"learning_rate": 0.0001675623269415864, |
|
"loss": 1.0016, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.33872832369942196, |
|
"grad_norm": 0.30262162872771553, |
|
"learning_rate": 0.00016726422740125728, |
|
"loss": 0.9856, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.3398843930635838, |
|
"grad_norm": 0.3087826779620012, |
|
"learning_rate": 0.000166965032196254, |
|
"loss": 0.9957, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.34104046242774566, |
|
"grad_norm": 0.3024417730810271, |
|
"learning_rate": 0.00016666474620015686, |
|
"loss": 0.939, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.3421965317919075, |
|
"grad_norm": 0.3138788978371571, |
|
"learning_rate": 0.0001663633743043141, |
|
"loss": 0.9363, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.34335260115606936, |
|
"grad_norm": 0.30022727252981385, |
|
"learning_rate": 0.0001660609214177621, |
|
"loss": 1.0356, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.3445086705202312, |
|
"grad_norm": 0.3554930404416277, |
|
"learning_rate": 0.00016575739246714547, |
|
"loss": 0.9741, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.34566473988439306, |
|
"grad_norm": 0.29207690202141207, |
|
"learning_rate": 0.00016545279239663682, |
|
"loss": 0.9914, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.3468208092485549, |
|
"grad_norm": 0.3029574756031942, |
|
"learning_rate": 0.00016514712616785612, |
|
"loss": 0.9421, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.34797687861271676, |
|
"grad_norm": 0.3003131478682294, |
|
"learning_rate": 0.00016484039875979005, |
|
"loss": 0.9536, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.3491329479768786, |
|
"grad_norm": 0.31513567646148855, |
|
"learning_rate": 0.00016453261516871068, |
|
"loss": 0.9426, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.35028901734104045, |
|
"grad_norm": 0.2996325149723125, |
|
"learning_rate": 0.00016422378040809437, |
|
"loss": 1.0104, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.3514450867052023, |
|
"grad_norm": 0.31000327701469227, |
|
"learning_rate": 0.00016391389950853977, |
|
"loss": 0.9899, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.35260115606936415, |
|
"grad_norm": 0.3077861834938761, |
|
"learning_rate": 0.0001636029775176862, |
|
"loss": 0.9865, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.353757225433526, |
|
"grad_norm": 0.3065445301834393, |
|
"learning_rate": 0.00016329101950013122, |
|
"loss": 0.9833, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.35491329479768785, |
|
"grad_norm": 0.3317987511497053, |
|
"learning_rate": 0.00016297803053734816, |
|
"loss": 0.9549, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.3560693641618497, |
|
"grad_norm": 0.3196645968421778, |
|
"learning_rate": 0.0001626640157276034, |
|
"loss": 0.9675, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.35722543352601155, |
|
"grad_norm": 0.3134584834564055, |
|
"learning_rate": 0.00016234898018587337, |
|
"loss": 0.9516, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.3583815028901734, |
|
"grad_norm": 0.3117834490142485, |
|
"learning_rate": 0.00016203292904376105, |
|
"loss": 0.9846, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.35953757225433525, |
|
"grad_norm": 0.3217830266213364, |
|
"learning_rate": 0.00016171586744941264, |
|
"loss": 0.967, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.3606936416184971, |
|
"grad_norm": 0.30934961569422764, |
|
"learning_rate": 0.00016139780056743342, |
|
"loss": 0.9649, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.36184971098265895, |
|
"grad_norm": 0.3292849028870402, |
|
"learning_rate": 0.00016107873357880384, |
|
"loss": 1.0175, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.3630057803468208, |
|
"grad_norm": 0.31062236776235347, |
|
"learning_rate": 0.00016075867168079507, |
|
"loss": 0.9696, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.36416184971098264, |
|
"grad_norm": 0.318894494073892, |
|
"learning_rate": 0.00016043762008688433, |
|
"loss": 0.9286, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.3653179190751445, |
|
"grad_norm": 0.3029890310755476, |
|
"learning_rate": 0.00016011558402666983, |
|
"loss": 0.9594, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.36647398843930634, |
|
"grad_norm": 0.3081821304700694, |
|
"learning_rate": 0.00015979256874578594, |
|
"loss": 0.987, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.3676300578034682, |
|
"grad_norm": 0.29222596605397133, |
|
"learning_rate": 0.00015946857950581734, |
|
"loss": 0.9919, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.36878612716763004, |
|
"grad_norm": 0.2955787813149893, |
|
"learning_rate": 0.0001591436215842135, |
|
"loss": 0.9653, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.3699421965317919, |
|
"grad_norm": 0.2991599664341822, |
|
"learning_rate": 0.0001588177002742029, |
|
"loss": 0.9874, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.37109826589595374, |
|
"grad_norm": 0.44600654437638615, |
|
"learning_rate": 0.00015849082088470638, |
|
"loss": 0.9504, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.3722543352601156, |
|
"grad_norm": 0.33315550683583905, |
|
"learning_rate": 0.00015816298874025102, |
|
"loss": 1.0328, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.37341040462427744, |
|
"grad_norm": 0.292061245143086, |
|
"learning_rate": 0.00015783420918088337, |
|
"loss": 0.9762, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.3745664739884393, |
|
"grad_norm": 0.31976882089395187, |
|
"learning_rate": 0.0001575044875620822, |
|
"loss": 1.026, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.37572254335260113, |
|
"grad_norm": 0.3238783922087859, |
|
"learning_rate": 0.0001571738292546716, |
|
"loss": 0.9496, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.376878612716763, |
|
"grad_norm": 0.2959033885199569, |
|
"learning_rate": 0.00015684223964473337, |
|
"loss": 0.9656, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.37803468208092483, |
|
"grad_norm": 0.3043686833702477, |
|
"learning_rate": 0.0001565097241335191, |
|
"loss": 0.954, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.3791907514450867, |
|
"grad_norm": 0.3015192978319062, |
|
"learning_rate": 0.00015617628813736247, |
|
"loss": 0.9908, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.38034682080924853, |
|
"grad_norm": 0.3082482576635595, |
|
"learning_rate": 0.00015584193708759094, |
|
"loss": 0.9477, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.3815028901734104, |
|
"grad_norm": 0.30690197747994147, |
|
"learning_rate": 0.00015550667643043716, |
|
"loss": 0.9547, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.38265895953757223, |
|
"grad_norm": 0.32095813086552044, |
|
"learning_rate": 0.0001551705116269504, |
|
"loss": 0.9946, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.3838150289017341, |
|
"grad_norm": 0.3102088056503803, |
|
"learning_rate": 0.0001548334481529075, |
|
"loss": 0.9755, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.38497109826589593, |
|
"grad_norm": 0.31740424344750273, |
|
"learning_rate": 0.00015449549149872376, |
|
"loss": 0.986, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.3861271676300578, |
|
"grad_norm": 0.29760230294640583, |
|
"learning_rate": 0.00015415664716936345, |
|
"loss": 0.9736, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.3872832369942196, |
|
"grad_norm": 0.33696332230509884, |
|
"learning_rate": 0.00015381692068425004, |
|
"loss": 0.9833, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.3884393063583815, |
|
"grad_norm": 0.2971442840267895, |
|
"learning_rate": 0.0001534763175771766, |
|
"loss": 0.9787, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.3895953757225434, |
|
"grad_norm": 0.2985513113352265, |
|
"learning_rate": 0.00015313484339621534, |
|
"loss": 0.9586, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.39075144508670523, |
|
"grad_norm": 0.3070566835370781, |
|
"learning_rate": 0.00015279250370362735, |
|
"loss": 0.9878, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.3919075144508671, |
|
"grad_norm": 0.3277524257534511, |
|
"learning_rate": 0.00015244930407577205, |
|
"loss": 1.0016, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.3930635838150289, |
|
"grad_norm": 0.30050526813256595, |
|
"learning_rate": 0.00015210525010301638, |
|
"loss": 0.9553, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.3942196531791908, |
|
"grad_norm": 0.30810810818757023, |
|
"learning_rate": 0.0001517603473896435, |
|
"loss": 0.9559, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.3953757225433526, |
|
"grad_norm": 0.299950087627466, |
|
"learning_rate": 0.00015141460155376182, |
|
"loss": 0.9609, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.3965317919075145, |
|
"grad_norm": 0.2925418247045739, |
|
"learning_rate": 0.00015106801822721338, |
|
"loss": 0.9763, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.3976878612716763, |
|
"grad_norm": 0.30196182567060115, |
|
"learning_rate": 0.00015072060305548187, |
|
"loss": 0.9959, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.3988439306358382, |
|
"grad_norm": 0.30989108180452857, |
|
"learning_rate": 0.0001503723616976011, |
|
"loss": 1.0003, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.3039991146331331, |
|
"learning_rate": 0.00015002329982606255, |
|
"loss": 1.0345, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.40115606936416187, |
|
"grad_norm": 0.3211973783178471, |
|
"learning_rate": 0.00014967342312672283, |
|
"loss": 0.9384, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.4023121387283237, |
|
"grad_norm": 0.29388068969488124, |
|
"learning_rate": 0.00014932273729871152, |
|
"loss": 0.9051, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.40346820809248557, |
|
"grad_norm": 0.3265810444042218, |
|
"learning_rate": 0.0001489712480543379, |
|
"loss": 0.9835, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.4046242774566474, |
|
"grad_norm": 0.303317950576793, |
|
"learning_rate": 0.0001486189611189981, |
|
"loss": 0.9446, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.40578034682080927, |
|
"grad_norm": 0.2967360524243329, |
|
"learning_rate": 0.00014826588223108185, |
|
"loss": 0.9908, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.4069364161849711, |
|
"grad_norm": 0.30908569033672595, |
|
"learning_rate": 0.00014791201714187897, |
|
"loss": 0.9118, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.40809248554913297, |
|
"grad_norm": 0.2921540136185523, |
|
"learning_rate": 0.0001475573716154856, |
|
"loss": 1.0177, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.4092485549132948, |
|
"grad_norm": 0.30891513255558445, |
|
"learning_rate": 0.00014720195142871054, |
|
"loss": 0.9528, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.41040462427745666, |
|
"grad_norm": 0.3116110416347837, |
|
"learning_rate": 0.00014684576237098082, |
|
"loss": 1.0153, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.4115606936416185, |
|
"grad_norm": 0.29853304778547163, |
|
"learning_rate": 0.00014648881024424774, |
|
"loss": 0.9607, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.41271676300578036, |
|
"grad_norm": 0.29621019258375375, |
|
"learning_rate": 0.00014613110086289218, |
|
"loss": 1.0178, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.4138728323699422, |
|
"grad_norm": 0.32202847876721696, |
|
"learning_rate": 0.00014577264005362985, |
|
"loss": 0.9274, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.41502890173410406, |
|
"grad_norm": 0.30619120168198916, |
|
"learning_rate": 0.00014541343365541645, |
|
"loss": 0.9435, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.4161849710982659, |
|
"grad_norm": 0.3001554889949122, |
|
"learning_rate": 0.00014505348751935263, |
|
"loss": 0.9738, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.41734104046242776, |
|
"grad_norm": 0.3065569332725715, |
|
"learning_rate": 0.00014469280750858854, |
|
"loss": 0.9627, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.4184971098265896, |
|
"grad_norm": 0.3346803326675102, |
|
"learning_rate": 0.00014433139949822837, |
|
"loss": 1.0008, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.41965317919075146, |
|
"grad_norm": 0.30285946590074797, |
|
"learning_rate": 0.00014396926937523477, |
|
"loss": 0.9681, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.4208092485549133, |
|
"grad_norm": 0.323926429665197, |
|
"learning_rate": 0.0001436064230383327, |
|
"loss": 0.9883, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.42196531791907516, |
|
"grad_norm": 0.30822208654391275, |
|
"learning_rate": 0.00014324286639791367, |
|
"loss": 0.9471, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.423121387283237, |
|
"grad_norm": 0.3043728994137006, |
|
"learning_rate": 0.00014287860537593917, |
|
"loss": 0.9837, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.42427745664739885, |
|
"grad_norm": 0.3042147218697011, |
|
"learning_rate": 0.00014251364590584444, |
|
"loss": 0.9576, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.4254335260115607, |
|
"grad_norm": 0.32773524321463, |
|
"learning_rate": 0.00014214799393244166, |
|
"loss": 0.9356, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.42658959537572255, |
|
"grad_norm": 0.30439292824288355, |
|
"learning_rate": 0.00014178165541182312, |
|
"loss": 0.9421, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.4277456647398844, |
|
"grad_norm": 0.321382456218326, |
|
"learning_rate": 0.00014141463631126442, |
|
"loss": 0.9515, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.42890173410404625, |
|
"grad_norm": 0.3130786437336031, |
|
"learning_rate": 0.0001410469426091269, |
|
"loss": 0.9715, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.4300578034682081, |
|
"grad_norm": 0.3135399317680074, |
|
"learning_rate": 0.00014067858029476063, |
|
"loss": 0.9474, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.43121387283236995, |
|
"grad_norm": 0.31368812931362966, |
|
"learning_rate": 0.00014030955536840656, |
|
"loss": 1.0225, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.4323699421965318, |
|
"grad_norm": 0.3332336708705887, |
|
"learning_rate": 0.00013993987384109898, |
|
"loss": 1.0098, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.43352601156069365, |
|
"grad_norm": 0.2950594206550405, |
|
"learning_rate": 0.00013956954173456747, |
|
"loss": 0.9846, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.4346820809248555, |
|
"grad_norm": 0.3001574947847011, |
|
"learning_rate": 0.000139198565081139, |
|
"loss": 0.9853, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.43583815028901735, |
|
"grad_norm": 0.29311121484460284, |
|
"learning_rate": 0.00013882694992363936, |
|
"loss": 1.0175, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.4369942196531792, |
|
"grad_norm": 0.3286518580874968, |
|
"learning_rate": 0.00013845470231529502, |
|
"loss": 0.9845, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.43815028901734104, |
|
"grad_norm": 0.31669776510548286, |
|
"learning_rate": 0.00013808182831963442, |
|
"loss": 1.0096, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.4393063583815029, |
|
"grad_norm": 0.3230500170692119, |
|
"learning_rate": 0.00013770833401038912, |
|
"loss": 0.9652, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.44046242774566474, |
|
"grad_norm": 0.314774115101565, |
|
"learning_rate": 0.0001373342254713951, |
|
"loss": 0.9884, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.4416184971098266, |
|
"grad_norm": 0.309673258526753, |
|
"learning_rate": 0.00013695950879649338, |
|
"loss": 0.9617, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.44277456647398844, |
|
"grad_norm": 0.31688401964004287, |
|
"learning_rate": 0.00013658419008943088, |
|
"loss": 1.0007, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.4439306358381503, |
|
"grad_norm": 0.3115831394799577, |
|
"learning_rate": 0.00013620827546376112, |
|
"loss": 0.9837, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.44508670520231214, |
|
"grad_norm": 0.3134574472279371, |
|
"learning_rate": 0.00013583177104274435, |
|
"loss": 0.9748, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.446242774566474, |
|
"grad_norm": 0.3388852510725773, |
|
"learning_rate": 0.00013545468295924812, |
|
"loss": 0.9825, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.44739884393063584, |
|
"grad_norm": 0.33068272114069625, |
|
"learning_rate": 0.00013507701735564716, |
|
"loss": 0.9552, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.4485549132947977, |
|
"grad_norm": 0.3070498417777377, |
|
"learning_rate": 0.00013469878038372348, |
|
"loss": 0.9842, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.44971098265895953, |
|
"grad_norm": 0.32088136083821, |
|
"learning_rate": 0.00013431997820456592, |
|
"loss": 0.9635, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.4508670520231214, |
|
"grad_norm": 0.317988833081837, |
|
"learning_rate": 0.00013394061698847022, |
|
"loss": 0.9922, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.45202312138728323, |
|
"grad_norm": 0.29873515381181037, |
|
"learning_rate": 0.000133560702914838, |
|
"loss": 0.9808, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.4531791907514451, |
|
"grad_norm": 0.334307242594275, |
|
"learning_rate": 0.00013318024217207652, |
|
"loss": 0.9285, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.45433526011560693, |
|
"grad_norm": 0.3161167520009514, |
|
"learning_rate": 0.00013279924095749768, |
|
"loss": 0.9721, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.4554913294797688, |
|
"grad_norm": 0.3219859959358273, |
|
"learning_rate": 0.00013241770547721703, |
|
"loss": 1.008, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.45664739884393063, |
|
"grad_norm": 0.27935961874420406, |
|
"learning_rate": 0.00013203564194605284, |
|
"loss": 0.9502, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.4578034682080925, |
|
"grad_norm": 0.29913347538052254, |
|
"learning_rate": 0.0001316530565874248, |
|
"loss": 0.9791, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.45895953757225433, |
|
"grad_norm": 0.30294269390330414, |
|
"learning_rate": 0.00013126995563325254, |
|
"loss": 0.9763, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.4601156069364162, |
|
"grad_norm": 0.32096224239126736, |
|
"learning_rate": 0.00013088634532385424, |
|
"loss": 0.9238, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.461271676300578, |
|
"grad_norm": 0.2960586714061201, |
|
"learning_rate": 0.000130502231907845, |
|
"loss": 0.9533, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.4624277456647399, |
|
"grad_norm": 0.29803737045431256, |
|
"learning_rate": 0.000130117621642035, |
|
"loss": 0.9526, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4635838150289017, |
|
"grad_norm": 0.31720967984437226, |
|
"learning_rate": 0.00012973252079132749, |
|
"loss": 0.9566, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.4647398843930636, |
|
"grad_norm": 0.31204560106706253, |
|
"learning_rate": 0.00012934693562861692, |
|
"loss": 0.9821, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.4658959537572254, |
|
"grad_norm": 0.3452586478382497, |
|
"learning_rate": 0.00012896087243468673, |
|
"loss": 0.9866, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.46705202312138727, |
|
"grad_norm": 0.30419391343270963, |
|
"learning_rate": 0.00012857433749810691, |
|
"loss": 0.9465, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.4682080924855491, |
|
"grad_norm": 0.302216494494177, |
|
"learning_rate": 0.00012818733711513164, |
|
"loss": 0.9928, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.46936416184971097, |
|
"grad_norm": 0.29660145267520094, |
|
"learning_rate": 0.00012779987758959683, |
|
"loss": 0.9714, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.4705202312138728, |
|
"grad_norm": 0.3375993332751583, |
|
"learning_rate": 0.00012741196523281728, |
|
"loss": 1.004, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.47167630057803467, |
|
"grad_norm": 0.31685124172490736, |
|
"learning_rate": 0.0001270236063634839, |
|
"loss": 0.9686, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.4728323699421965, |
|
"grad_norm": 0.30517277761996336, |
|
"learning_rate": 0.00012663480730756095, |
|
"loss": 0.97, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.47398843930635837, |
|
"grad_norm": 0.3075134986191579, |
|
"learning_rate": 0.00012624557439818275, |
|
"loss": 0.9535, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4751445086705202, |
|
"grad_norm": 0.2914116111037525, |
|
"learning_rate": 0.00012585591397555078, |
|
"loss": 0.9549, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.47630057803468207, |
|
"grad_norm": 0.3065733883077486, |
|
"learning_rate": 0.00012546583238683015, |
|
"loss": 0.9694, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.4774566473988439, |
|
"grad_norm": 0.30076466916700556, |
|
"learning_rate": 0.00012507533598604632, |
|
"loss": 0.9802, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.47861271676300576, |
|
"grad_norm": 0.29670240314259055, |
|
"learning_rate": 0.00012468443113398175, |
|
"loss": 0.9366, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.4797687861271676, |
|
"grad_norm": 0.31183074363125884, |
|
"learning_rate": 0.00012429312419807198, |
|
"loss": 0.966, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.48092485549132946, |
|
"grad_norm": 0.31278790481596425, |
|
"learning_rate": 0.00012390142155230217, |
|
"loss": 0.9893, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.4820809248554913, |
|
"grad_norm": 0.28207826631174193, |
|
"learning_rate": 0.0001235093295771032, |
|
"loss": 0.9472, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.48323699421965316, |
|
"grad_norm": 0.32635523820738965, |
|
"learning_rate": 0.00012311685465924774, |
|
"loss": 0.9089, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.484393063583815, |
|
"grad_norm": 0.2977916871662523, |
|
"learning_rate": 0.00012272400319174607, |
|
"loss": 0.9834, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.48554913294797686, |
|
"grad_norm": 0.31990291106992935, |
|
"learning_rate": 0.00012233078157374217, |
|
"loss": 0.9312, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.4867052023121387, |
|
"grad_norm": 0.33073575866363214, |
|
"learning_rate": 0.00012193719621040942, |
|
"loss": 0.9795, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.48786127167630056, |
|
"grad_norm": 0.2915838062263623, |
|
"learning_rate": 0.00012154325351284618, |
|
"loss": 0.9789, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.4890173410404624, |
|
"grad_norm": 0.33822747094942934, |
|
"learning_rate": 0.00012114895989797144, |
|
"loss": 0.9304, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.49017341040462425, |
|
"grad_norm": 0.32370602693562334, |
|
"learning_rate": 0.00012075432178842021, |
|
"loss": 0.9428, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.4913294797687861, |
|
"grad_norm": 0.32375527811459415, |
|
"learning_rate": 0.00012035934561243905, |
|
"loss": 0.9718, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.49248554913294795, |
|
"grad_norm": 0.2943351586407433, |
|
"learning_rate": 0.00011996403780378123, |
|
"loss": 0.9712, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.4936416184971098, |
|
"grad_norm": 0.3223304229208655, |
|
"learning_rate": 0.00011956840480160194, |
|
"loss": 1.0046, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.49479768786127165, |
|
"grad_norm": 0.3032804365412004, |
|
"learning_rate": 0.00011917245305035354, |
|
"loss": 0.9596, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.4959537572254335, |
|
"grad_norm": 0.3086608217360584, |
|
"learning_rate": 0.00011877618899968037, |
|
"loss": 0.9473, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.49710982658959535, |
|
"grad_norm": 0.3131187277547376, |
|
"learning_rate": 0.00011837961910431383, |
|
"loss": 1.0065, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.4982658959537572, |
|
"grad_norm": 0.2975605931413944, |
|
"learning_rate": 0.00011798274982396726, |
|
"loss": 0.9481, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.49942196531791905, |
|
"grad_norm": 0.29498275298057963, |
|
"learning_rate": 0.00011758558762323067, |
|
"loss": 0.9884, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.500578034682081, |
|
"grad_norm": 0.2906355713880922, |
|
"learning_rate": 0.00011718813897146535, |
|
"loss": 0.9643, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.5017341040462427, |
|
"grad_norm": 0.2879177716708955, |
|
"learning_rate": 0.00011679041034269869, |
|
"loss": 0.9496, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5028901734104047, |
|
"grad_norm": 0.3107002014222183, |
|
"learning_rate": 0.00011639240821551858, |
|
"loss": 0.9489, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.5040462427745664, |
|
"grad_norm": 0.30854297451303886, |
|
"learning_rate": 0.00011599413907296785, |
|
"loss": 0.9887, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.5052023121387283, |
|
"grad_norm": 0.3250596343211611, |
|
"learning_rate": 0.00011559560940243888, |
|
"loss": 0.9421, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.5063583815028901, |
|
"grad_norm": 0.303124134082483, |
|
"learning_rate": 0.00011519682569556758, |
|
"loss": 0.967, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.507514450867052, |
|
"grad_norm": 0.29292499319855175, |
|
"learning_rate": 0.00011479779444812808, |
|
"loss": 0.9679, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.5086705202312138, |
|
"grad_norm": 0.30291810874235703, |
|
"learning_rate": 0.00011439852215992647, |
|
"loss": 0.997, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5098265895953757, |
|
"grad_norm": 0.3234308605182878, |
|
"learning_rate": 0.0001139990153346953, |
|
"loss": 0.9876, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.5109826589595375, |
|
"grad_norm": 0.3137214941805028, |
|
"learning_rate": 0.00011359928047998744, |
|
"loss": 1.0407, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5121387283236994, |
|
"grad_norm": 0.32171251618436913, |
|
"learning_rate": 0.0001131993241070701, |
|
"loss": 0.9783, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.5132947976878612, |
|
"grad_norm": 0.2971713793214721, |
|
"learning_rate": 0.00011279915273081876, |
|
"loss": 0.9678, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5144508670520231, |
|
"grad_norm": 0.30876666041737444, |
|
"learning_rate": 0.00011239877286961122, |
|
"loss": 0.9717, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.5156069364161849, |
|
"grad_norm": 0.31611543232380335, |
|
"learning_rate": 0.00011199819104522114, |
|
"loss": 0.9611, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5167630057803468, |
|
"grad_norm": 0.31365800007794736, |
|
"learning_rate": 0.000111597413782712, |
|
"loss": 0.986, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.5179190751445086, |
|
"grad_norm": 0.3387876838248837, |
|
"learning_rate": 0.00011119644761033078, |
|
"loss": 0.9865, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.5190751445086705, |
|
"grad_norm": 0.3090392049931908, |
|
"learning_rate": 0.00011079529905940163, |
|
"loss": 0.9264, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.5202312138728323, |
|
"grad_norm": 0.30547601371038785, |
|
"learning_rate": 0.0001103939746642194, |
|
"loss": 0.9293, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5213872832369942, |
|
"grad_norm": 0.30920860300711217, |
|
"learning_rate": 0.00010999248096194326, |
|
"loss": 0.9759, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.522543352601156, |
|
"grad_norm": 0.30207153503695156, |
|
"learning_rate": 0.00010959082449249026, |
|
"loss": 0.9557, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5236994219653179, |
|
"grad_norm": 0.29504681849985004, |
|
"learning_rate": 0.00010918901179842877, |
|
"loss": 0.9686, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.5248554913294797, |
|
"grad_norm": 0.29286119267320176, |
|
"learning_rate": 0.00010878704942487183, |
|
"loss": 1.0042, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5260115606936416, |
|
"grad_norm": 0.3062243965654378, |
|
"learning_rate": 0.00010838494391937064, |
|
"loss": 0.9784, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.5271676300578034, |
|
"grad_norm": 0.3079900592590067, |
|
"learning_rate": 0.00010798270183180794, |
|
"loss": 0.9503, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5283236994219653, |
|
"grad_norm": 0.3154259872984876, |
|
"learning_rate": 0.0001075803297142911, |
|
"loss": 0.9509, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 0.5294797687861271, |
|
"grad_norm": 0.29229323286742326, |
|
"learning_rate": 0.00010717783412104568, |
|
"loss": 0.9557, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.530635838150289, |
|
"grad_norm": 0.31601404726519217, |
|
"learning_rate": 0.00010677522160830848, |
|
"loss": 0.9042, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 0.5317919075144508, |
|
"grad_norm": 0.28505318471127816, |
|
"learning_rate": 0.00010637249873422077, |
|
"loss": 0.9692, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5329479768786127, |
|
"grad_norm": 0.32485305181812835, |
|
"learning_rate": 0.00010596967205872154, |
|
"loss": 1.0065, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 0.5341040462427745, |
|
"grad_norm": 0.3078039739362063, |
|
"learning_rate": 0.00010556674814344059, |
|
"loss": 0.9284, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.5352601156069364, |
|
"grad_norm": 0.3208549342399588, |
|
"learning_rate": 0.00010516373355159159, |
|
"loss": 0.9477, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 0.5364161849710982, |
|
"grad_norm": 0.30748525424656054, |
|
"learning_rate": 0.00010476063484786535, |
|
"loss": 0.9629, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.5375722543352601, |
|
"grad_norm": 0.35275688331568944, |
|
"learning_rate": 0.0001043574585983227, |
|
"loss": 1.0113, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.5387283236994219, |
|
"grad_norm": 0.3253346439794717, |
|
"learning_rate": 0.00010395421137028761, |
|
"loss": 1.0346, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.5398843930635838, |
|
"grad_norm": 0.3095697055293057, |
|
"learning_rate": 0.00010355089973224026, |
|
"loss": 0.9546, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 0.5410404624277456, |
|
"grad_norm": 0.307599147895419, |
|
"learning_rate": 0.00010314753025370991, |
|
"loss": 0.9836, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.5421965317919075, |
|
"grad_norm": 0.31630009518917934, |
|
"learning_rate": 0.00010274410950516815, |
|
"loss": 1.0071, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 0.5433526011560693, |
|
"grad_norm": 0.2834312244927633, |
|
"learning_rate": 0.00010234064405792154, |
|
"loss": 0.9489, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5445086705202312, |
|
"grad_norm": 0.2970129068571585, |
|
"learning_rate": 0.0001019371404840048, |
|
"loss": 0.9351, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.545664739884393, |
|
"grad_norm": 0.3025660920447834, |
|
"learning_rate": 0.0001015336053560737, |
|
"loss": 0.99, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.5468208092485549, |
|
"grad_norm": 0.30633618369872967, |
|
"learning_rate": 0.00010113004524729799, |
|
"loss": 0.954, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 0.5479768786127167, |
|
"grad_norm": 0.28629344395996326, |
|
"learning_rate": 0.00010072646673125432, |
|
"loss": 0.9783, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.5491329479768786, |
|
"grad_norm": 0.3097159968894246, |
|
"learning_rate": 0.00010032287638181919, |
|
"loss": 1.0303, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.5502890173410404, |
|
"grad_norm": 0.30322928513172714, |
|
"learning_rate": 9.991928077306183e-05, |
|
"loss": 1.0, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.5514450867052023, |
|
"grad_norm": 0.4644501347952281, |
|
"learning_rate": 9.951568647913718e-05, |
|
"loss": 0.9294, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 0.5526011560693641, |
|
"grad_norm": 0.3124185608435605, |
|
"learning_rate": 9.911210007417869e-05, |
|
"loss": 0.9847, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.553757225433526, |
|
"grad_norm": 0.32991063517383223, |
|
"learning_rate": 9.870852813219143e-05, |
|
"loss": 0.9755, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 0.5549132947976878, |
|
"grad_norm": 0.31527757887354024, |
|
"learning_rate": 9.830497722694478e-05, |
|
"loss": 0.9819, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5560693641618497, |
|
"grad_norm": 0.3094199650313883, |
|
"learning_rate": 9.790145393186541e-05, |
|
"loss": 0.9409, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 0.5572254335260116, |
|
"grad_norm": 0.3055144703983288, |
|
"learning_rate": 9.749796481993042e-05, |
|
"loss": 0.9674, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.5583815028901734, |
|
"grad_norm": 0.31607857296755776, |
|
"learning_rate": 9.709451646355996e-05, |
|
"loss": 1.0174, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 0.5595375722543353, |
|
"grad_norm": 0.2975421743209585, |
|
"learning_rate": 9.669111543451033e-05, |
|
"loss": 0.9683, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.5606936416184971, |
|
"grad_norm": 0.30435556233851097, |
|
"learning_rate": 9.628776830376698e-05, |
|
"loss": 0.9539, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.561849710982659, |
|
"grad_norm": 0.31164699307502913, |
|
"learning_rate": 9.588448164143739e-05, |
|
"loss": 0.9716, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.5630057803468208, |
|
"grad_norm": 0.3029837051614153, |
|
"learning_rate": 9.548126201664398e-05, |
|
"loss": 0.9488, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 0.5641618497109827, |
|
"grad_norm": 0.30375216989550236, |
|
"learning_rate": 9.507811599741735e-05, |
|
"loss": 0.9241, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.5653179190751445, |
|
"grad_norm": 0.3044100811963364, |
|
"learning_rate": 9.467505015058901e-05, |
|
"loss": 1.0212, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 0.5664739884393064, |
|
"grad_norm": 0.3079385379059537, |
|
"learning_rate": 9.427207104168467e-05, |
|
"loss": 0.9341, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5676300578034682, |
|
"grad_norm": 0.3012241533223519, |
|
"learning_rate": 9.386918523481709e-05, |
|
"loss": 0.9533, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 0.5687861271676301, |
|
"grad_norm": 0.32392283471513106, |
|
"learning_rate": 9.346639929257916e-05, |
|
"loss": 0.9888, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.5699421965317919, |
|
"grad_norm": 0.28835183242553575, |
|
"learning_rate": 9.306371977593726e-05, |
|
"loss": 0.9847, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 0.5710982658959538, |
|
"grad_norm": 0.29592049768177997, |
|
"learning_rate": 9.26611532441241e-05, |
|
"loss": 0.9075, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5722543352601156, |
|
"grad_norm": 0.30510640464990846, |
|
"learning_rate": 9.225870625453192e-05, |
|
"loss": 0.9276, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.5734104046242775, |
|
"grad_norm": 0.32217649840985024, |
|
"learning_rate": 9.18563853626059e-05, |
|
"loss": 0.9936, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5745664739884393, |
|
"grad_norm": 0.2945778811908368, |
|
"learning_rate": 9.145419712173713e-05, |
|
"loss": 0.9775, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 0.5757225433526012, |
|
"grad_norm": 0.2937161972776823, |
|
"learning_rate": 9.105214808315588e-05, |
|
"loss": 0.9293, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.576878612716763, |
|
"grad_norm": 0.34400282833354867, |
|
"learning_rate": 9.065024479582513e-05, |
|
"loss": 1.0045, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 0.5780346820809249, |
|
"grad_norm": 0.30233509314710183, |
|
"learning_rate": 9.024849380633359e-05, |
|
"loss": 0.9786, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5791907514450867, |
|
"grad_norm": 0.29454389871868664, |
|
"learning_rate": 8.984690165878921e-05, |
|
"loss": 0.9584, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 0.5803468208092486, |
|
"grad_norm": 0.30071228762049557, |
|
"learning_rate": 8.944547489471265e-05, |
|
"loss": 0.955, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.5815028901734104, |
|
"grad_norm": 0.3169803125653554, |
|
"learning_rate": 8.904422005293052e-05, |
|
"loss": 1.0198, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 0.5826589595375723, |
|
"grad_norm": 0.31831872317303483, |
|
"learning_rate": 8.864314366946913e-05, |
|
"loss": 0.9781, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.5838150289017341, |
|
"grad_norm": 0.30646509617401063, |
|
"learning_rate": 8.824225227744782e-05, |
|
"loss": 0.9556, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.584971098265896, |
|
"grad_norm": 0.28426966152836436, |
|
"learning_rate": 8.784155240697254e-05, |
|
"loss": 0.9811, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.5861271676300578, |
|
"grad_norm": 0.29076631427987554, |
|
"learning_rate": 8.74410505850297e-05, |
|
"loss": 0.9653, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 0.5872832369942197, |
|
"grad_norm": 0.2993968457362386, |
|
"learning_rate": 8.704075333537963e-05, |
|
"loss": 0.9267, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.5884393063583815, |
|
"grad_norm": 0.28060562517633875, |
|
"learning_rate": 8.66406671784503e-05, |
|
"loss": 0.9767, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 0.5895953757225434, |
|
"grad_norm": 0.29394163379907895, |
|
"learning_rate": 8.624079863123135e-05, |
|
"loss": 0.9692, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.5907514450867052, |
|
"grad_norm": 0.29752047867631554, |
|
"learning_rate": 8.584115420716777e-05, |
|
"loss": 1.0218, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 0.5919075144508671, |
|
"grad_norm": 0.2960489971126818, |
|
"learning_rate": 8.544174041605363e-05, |
|
"loss": 0.9386, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.5930635838150289, |
|
"grad_norm": 0.2991003815095411, |
|
"learning_rate": 8.504256376392647e-05, |
|
"loss": 0.951, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 0.5942196531791908, |
|
"grad_norm": 0.2875329275503883, |
|
"learning_rate": 8.464363075296095e-05, |
|
"loss": 0.9595, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.5953757225433526, |
|
"grad_norm": 0.29629340379738117, |
|
"learning_rate": 8.424494788136303e-05, |
|
"loss": 0.946, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.5965317919075145, |
|
"grad_norm": 0.3039710490176798, |
|
"learning_rate": 8.384652164326432e-05, |
|
"loss": 0.9297, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.5976878612716763, |
|
"grad_norm": 0.2867197601664578, |
|
"learning_rate": 8.344835852861595e-05, |
|
"loss": 0.9655, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 0.5988439306358382, |
|
"grad_norm": 0.3021141932586307, |
|
"learning_rate": 8.305046502308319e-05, |
|
"loss": 0.9388, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.32608595836134247, |
|
"learning_rate": 8.265284760793957e-05, |
|
"loss": 0.948, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 0.6011560693641619, |
|
"grad_norm": 0.3077259465134535, |
|
"learning_rate": 8.225551275996138e-05, |
|
"loss": 1.0123, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6023121387283237, |
|
"grad_norm": 0.28311149702035393, |
|
"learning_rate": 8.185846695132227e-05, |
|
"loss": 0.9456, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 0.6034682080924856, |
|
"grad_norm": 0.31276946079278556, |
|
"learning_rate": 8.146171664948769e-05, |
|
"loss": 0.9755, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6046242774566474, |
|
"grad_norm": 0.3264513747288462, |
|
"learning_rate": 8.10652683171095e-05, |
|
"loss": 0.9619, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 0.6057803468208093, |
|
"grad_norm": 0.30285428517053464, |
|
"learning_rate": 8.066912841192099e-05, |
|
"loss": 0.9344, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6069364161849711, |
|
"grad_norm": 0.3644501014383341, |
|
"learning_rate": 8.027330338663132e-05, |
|
"loss": 0.9794, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.608092485549133, |
|
"grad_norm": 0.28904372054279964, |
|
"learning_rate": 7.987779968882061e-05, |
|
"loss": 0.941, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6092485549132948, |
|
"grad_norm": 0.2895705859655337, |
|
"learning_rate": 7.9482623760835e-05, |
|
"loss": 0.951, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.6104046242774567, |
|
"grad_norm": 0.29749813183970186, |
|
"learning_rate": 7.908778203968146e-05, |
|
"loss": 0.9244, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6115606936416185, |
|
"grad_norm": 0.31971185227728377, |
|
"learning_rate": 7.869328095692312e-05, |
|
"loss": 0.9645, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 0.6127167630057804, |
|
"grad_norm": 0.2873929264676909, |
|
"learning_rate": 7.829912693857454e-05, |
|
"loss": 0.9739, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.6138728323699422, |
|
"grad_norm": 0.2962385037449908, |
|
"learning_rate": 7.79053264049968e-05, |
|
"loss": 1.0025, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 0.6150289017341041, |
|
"grad_norm": 0.2889472954071748, |
|
"learning_rate": 7.751188577079327e-05, |
|
"loss": 0.9764, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6161849710982659, |
|
"grad_norm": 0.2914115227852295, |
|
"learning_rate": 7.711881144470481e-05, |
|
"loss": 0.9575, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 0.6173410404624278, |
|
"grad_norm": 0.31913799827615985, |
|
"learning_rate": 7.672610982950546e-05, |
|
"loss": 0.9684, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.6184971098265896, |
|
"grad_norm": 0.3005365331976947, |
|
"learning_rate": 7.633378732189833e-05, |
|
"loss": 0.941, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.6196531791907515, |
|
"grad_norm": 0.29855395413135466, |
|
"learning_rate": 7.594185031241115e-05, |
|
"loss": 0.9504, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.6208092485549133, |
|
"grad_norm": 0.30079166608986607, |
|
"learning_rate": 7.555030518529227e-05, |
|
"loss": 0.9489, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 0.6219653179190752, |
|
"grad_norm": 0.2835216761875895, |
|
"learning_rate": 7.515915831840682e-05, |
|
"loss": 1.0283, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.623121387283237, |
|
"grad_norm": 0.296763030995749, |
|
"learning_rate": 7.476841608313253e-05, |
|
"loss": 0.9495, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 0.6242774566473989, |
|
"grad_norm": 0.2965441757959662, |
|
"learning_rate": 7.437808484425614e-05, |
|
"loss": 0.9207, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6254335260115607, |
|
"grad_norm": 0.28563226791333124, |
|
"learning_rate": 7.398817095986978e-05, |
|
"loss": 0.9529, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 0.6265895953757226, |
|
"grad_norm": 0.2958876716229884, |
|
"learning_rate": 7.359868078126714e-05, |
|
"loss": 0.9415, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.6277456647398844, |
|
"grad_norm": 0.29947263183760775, |
|
"learning_rate": 7.320962065284032e-05, |
|
"loss": 0.9153, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 0.6289017341040463, |
|
"grad_norm": 0.2921294166805471, |
|
"learning_rate": 7.282099691197632e-05, |
|
"loss": 1.0061, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.630057803468208, |
|
"grad_norm": 0.3065806154787742, |
|
"learning_rate": 7.243281588895374e-05, |
|
"loss": 0.9713, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.63121387283237, |
|
"grad_norm": 0.29670093707685785, |
|
"learning_rate": 7.204508390683991e-05, |
|
"loss": 0.9152, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.6323699421965318, |
|
"grad_norm": 0.28334644508416545, |
|
"learning_rate": 7.165780728138769e-05, |
|
"loss": 0.9216, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 0.6335260115606937, |
|
"grad_norm": 0.2983458223496864, |
|
"learning_rate": 7.127099232093252e-05, |
|
"loss": 0.9684, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.6346820809248555, |
|
"grad_norm": 0.297707639389454, |
|
"learning_rate": 7.08846453262901e-05, |
|
"loss": 0.9677, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 0.6358381502890174, |
|
"grad_norm": 0.3030445642630547, |
|
"learning_rate": 7.049877259065312e-05, |
|
"loss": 0.991, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6369942196531792, |
|
"grad_norm": 0.2967929219140181, |
|
"learning_rate": 7.011338039948925e-05, |
|
"loss": 0.9331, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 0.638150289017341, |
|
"grad_norm": 0.30678542010472665, |
|
"learning_rate": 6.972847503043864e-05, |
|
"loss": 0.9952, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.6393063583815028, |
|
"grad_norm": 0.2869522581890146, |
|
"learning_rate": 6.934406275321147e-05, |
|
"loss": 0.9817, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 0.6404624277456648, |
|
"grad_norm": 0.30227078806254565, |
|
"learning_rate": 6.896014982948602e-05, |
|
"loss": 0.9713, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.6416184971098265, |
|
"grad_norm": 0.29642633769940174, |
|
"learning_rate": 6.857674251280671e-05, |
|
"loss": 0.9495, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.6427745664739885, |
|
"grad_norm": 0.3324961966488245, |
|
"learning_rate": 6.819384704848199e-05, |
|
"loss": 1.0679, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.6439306358381502, |
|
"grad_norm": 0.2888690447316259, |
|
"learning_rate": 6.781146967348284e-05, |
|
"loss": 0.941, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 0.6450867052023121, |
|
"grad_norm": 0.29579888471073373, |
|
"learning_rate": 6.742961661634115e-05, |
|
"loss": 1.0323, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.6462427745664739, |
|
"grad_norm": 0.2989300466555907, |
|
"learning_rate": 6.704829409704809e-05, |
|
"loss": 0.9723, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 0.6473988439306358, |
|
"grad_norm": 0.27150342274150757, |
|
"learning_rate": 6.666750832695306e-05, |
|
"loss": 0.9397, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6485549132947976, |
|
"grad_norm": 0.29998837345134693, |
|
"learning_rate": 6.628726550866227e-05, |
|
"loss": 0.9835, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 0.6497109826589595, |
|
"grad_norm": 0.28399979161419353, |
|
"learning_rate": 6.59075718359378e-05, |
|
"loss": 0.9509, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.6508670520231213, |
|
"grad_norm": 0.30370562141401386, |
|
"learning_rate": 6.552843349359688e-05, |
|
"loss": 0.967, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 0.6520231213872832, |
|
"grad_norm": 0.3363515271195273, |
|
"learning_rate": 6.514985665741073e-05, |
|
"loss": 1.0054, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.653179190751445, |
|
"grad_norm": 0.2941850764424752, |
|
"learning_rate": 6.477184749400438e-05, |
|
"loss": 0.9958, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.6543352601156069, |
|
"grad_norm": 0.2845955247940358, |
|
"learning_rate": 6.439441216075605e-05, |
|
"loss": 0.9199, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.6554913294797687, |
|
"grad_norm": 0.30264607648319775, |
|
"learning_rate": 6.401755680569683e-05, |
|
"loss": 0.9974, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 0.6566473988439306, |
|
"grad_norm": 0.3049902419448789, |
|
"learning_rate": 6.36412875674105e-05, |
|
"loss": 0.9412, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.6578034682080924, |
|
"grad_norm": 0.28427694016822674, |
|
"learning_rate": 6.326561057493376e-05, |
|
"loss": 0.9618, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 0.6589595375722543, |
|
"grad_norm": 0.2923557343391648, |
|
"learning_rate": 6.28905319476561e-05, |
|
"loss": 0.9738, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6601156069364161, |
|
"grad_norm": 0.3011922931000182, |
|
"learning_rate": 6.251605779522032e-05, |
|
"loss": 0.9547, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 0.661271676300578, |
|
"grad_norm": 0.2778141274186397, |
|
"learning_rate": 6.214219421742295e-05, |
|
"loss": 1.0062, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.6624277456647398, |
|
"grad_norm": 0.28425833137988876, |
|
"learning_rate": 6.176894730411483e-05, |
|
"loss": 0.9529, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 0.6635838150289017, |
|
"grad_norm": 0.2914968347637112, |
|
"learning_rate": 6.139632313510212e-05, |
|
"loss": 0.944, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.6647398843930635, |
|
"grad_norm": 0.2910244571300189, |
|
"learning_rate": 6.1024327780046944e-05, |
|
"loss": 1.0063, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.6658959537572254, |
|
"grad_norm": 0.28730856037506486, |
|
"learning_rate": 6.065296729836879e-05, |
|
"loss": 0.978, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.6670520231213873, |
|
"grad_norm": 0.26692983480937005, |
|
"learning_rate": 6.028224773914575e-05, |
|
"loss": 0.953, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 0.6682080924855491, |
|
"grad_norm": 0.28871757324400055, |
|
"learning_rate": 5.991217514101586e-05, |
|
"loss": 0.9275, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.669364161849711, |
|
"grad_norm": 0.29493246673699086, |
|
"learning_rate": 5.9542755532078856e-05, |
|
"loss": 1.0001, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 0.6705202312138728, |
|
"grad_norm": 0.3100505532341507, |
|
"learning_rate": 5.917399492979805e-05, |
|
"loss": 0.9716, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6716763005780347, |
|
"grad_norm": 0.296746629997667, |
|
"learning_rate": 5.880589934090206e-05, |
|
"loss": 0.987, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 0.6728323699421965, |
|
"grad_norm": 0.3175494758298602, |
|
"learning_rate": 5.843847476128722e-05, |
|
"loss": 0.9643, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.6739884393063584, |
|
"grad_norm": 0.310484738297623, |
|
"learning_rate": 5.807172717591984e-05, |
|
"loss": 0.9322, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 0.6751445086705202, |
|
"grad_norm": 0.29571778911697455, |
|
"learning_rate": 5.770566255873866e-05, |
|
"loss": 0.929, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.6763005780346821, |
|
"grad_norm": 0.2876079644684902, |
|
"learning_rate": 5.734028687255751e-05, |
|
"loss": 0.9644, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.6774566473988439, |
|
"grad_norm": 0.3002944071515893, |
|
"learning_rate": 5.697560606896839e-05, |
|
"loss": 0.9987, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.6786127167630058, |
|
"grad_norm": 0.2931767870621979, |
|
"learning_rate": 5.6611626088244194e-05, |
|
"loss": 0.9474, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 0.6797687861271676, |
|
"grad_norm": 0.28720352206351696, |
|
"learning_rate": 5.6248352859242314e-05, |
|
"loss": 1.0355, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.6809248554913295, |
|
"grad_norm": 0.3191718472968375, |
|
"learning_rate": 5.588579229930784e-05, |
|
"loss": 0.9699, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.6820809248554913, |
|
"grad_norm": 0.30266420054049115, |
|
"learning_rate": 5.552395031417712e-05, |
|
"loss": 0.9358, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.6832369942196532, |
|
"grad_norm": 0.30130721873082394, |
|
"learning_rate": 5.516283279788183e-05, |
|
"loss": 0.9496, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 0.684393063583815, |
|
"grad_norm": 0.2929120042219513, |
|
"learning_rate": 5.4802445632652634e-05, |
|
"loss": 0.9248, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.6855491329479769, |
|
"grad_norm": 0.31437058476192303, |
|
"learning_rate": 5.444279468882358e-05, |
|
"loss": 0.9745, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 0.6867052023121387, |
|
"grad_norm": 0.2878711246279315, |
|
"learning_rate": 5.408388582473651e-05, |
|
"loss": 1.0008, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.6878612716763006, |
|
"grad_norm": 0.2808303288729568, |
|
"learning_rate": 5.3725724886645526e-05, |
|
"loss": 0.9381, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.6890173410404624, |
|
"grad_norm": 0.3006592351672636, |
|
"learning_rate": 5.3368317708621674e-05, |
|
"loss": 1.0076, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.6901734104046243, |
|
"grad_norm": 0.30870133430305197, |
|
"learning_rate": 5.3011670112458224e-05, |
|
"loss": 0.9616, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 0.6913294797687861, |
|
"grad_norm": 0.29489554268370943, |
|
"learning_rate": 5.2655787907575436e-05, |
|
"loss": 0.908, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.692485549132948, |
|
"grad_norm": 0.30595244192322063, |
|
"learning_rate": 5.230067689092629e-05, |
|
"loss": 0.9902, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 0.6936416184971098, |
|
"grad_norm": 0.3007538157511167, |
|
"learning_rate": 5.19463428469019e-05, |
|
"loss": 1.0259, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6947976878612717, |
|
"grad_norm": 0.28815906202526853, |
|
"learning_rate": 5.159279154723715e-05, |
|
"loss": 0.9497, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 0.6959537572254335, |
|
"grad_norm": 0.2752889596438775, |
|
"learning_rate": 5.124002875091704e-05, |
|
"loss": 0.9586, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.6971098265895954, |
|
"grad_norm": 0.2850467130558078, |
|
"learning_rate": 5.088806020408252e-05, |
|
"loss": 0.9049, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 0.6982658959537572, |
|
"grad_norm": 0.2750258968098293, |
|
"learning_rate": 5.053689163993703e-05, |
|
"loss": 0.937, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.6994219653179191, |
|
"grad_norm": 0.29519726643012756, |
|
"learning_rate": 5.018652877865322e-05, |
|
"loss": 0.9325, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.7005780346820809, |
|
"grad_norm": 0.31869143160605945, |
|
"learning_rate": 4.983697732727964e-05, |
|
"loss": 0.9484, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7017341040462428, |
|
"grad_norm": 0.30604777371620057, |
|
"learning_rate": 4.948824297964774e-05, |
|
"loss": 0.9497, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 0.7028901734104046, |
|
"grad_norm": 0.2781484111225009, |
|
"learning_rate": 4.914033141627931e-05, |
|
"loss": 0.9732, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7040462427745665, |
|
"grad_norm": 0.287979860552131, |
|
"learning_rate": 4.87932483042937e-05, |
|
"loss": 0.9513, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 0.7052023121387283, |
|
"grad_norm": 0.28553452094386605, |
|
"learning_rate": 4.8446999297315764e-05, |
|
"loss": 0.9528, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.7063583815028902, |
|
"grad_norm": 0.26876115159064556, |
|
"learning_rate": 4.810159003538365e-05, |
|
"loss": 0.9513, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 0.707514450867052, |
|
"grad_norm": 0.29148448420091455, |
|
"learning_rate": 4.775702614485678e-05, |
|
"loss": 0.9732, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.7086705202312139, |
|
"grad_norm": 0.2858967419780267, |
|
"learning_rate": 4.7413313238324556e-05, |
|
"loss": 0.9874, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 0.7098265895953757, |
|
"grad_norm": 0.2853440571230289, |
|
"learning_rate": 4.707045691451456e-05, |
|
"loss": 0.9365, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.7109826589595376, |
|
"grad_norm": 0.2756362587234919, |
|
"learning_rate": 4.6728462758201574e-05, |
|
"loss": 0.918, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.7121387283236994, |
|
"grad_norm": 0.2933985662205089, |
|
"learning_rate": 4.638733634011663e-05, |
|
"loss": 0.9652, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.7132947976878613, |
|
"grad_norm": 0.3088785959772121, |
|
"learning_rate": 4.604708321685618e-05, |
|
"loss": 0.9468, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 0.7144508670520231, |
|
"grad_norm": 0.2897414636494522, |
|
"learning_rate": 4.5707708930791514e-05, |
|
"loss": 0.9136, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.715606936416185, |
|
"grad_norm": 0.2977453761954692, |
|
"learning_rate": 4.536921900997872e-05, |
|
"loss": 0.9684, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 0.7167630057803468, |
|
"grad_norm": 0.31568624217549895, |
|
"learning_rate": 4.5031618968068325e-05, |
|
"loss": 0.9804, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.7179190751445087, |
|
"grad_norm": 0.2937121832237494, |
|
"learning_rate": 4.4694914304215796e-05, |
|
"loss": 0.9923, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 0.7190751445086705, |
|
"grad_norm": 0.31223322912252777, |
|
"learning_rate": 4.4359110502991773e-05, |
|
"loss": 0.9493, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.7202312138728324, |
|
"grad_norm": 0.2948338805474285, |
|
"learning_rate": 4.402421303429274e-05, |
|
"loss": 1.0058, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 0.7213872832369942, |
|
"grad_norm": 0.2998471688331991, |
|
"learning_rate": 4.3690227353251944e-05, |
|
"loss": 0.9793, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.7225433526011561, |
|
"grad_norm": 0.30105760378227486, |
|
"learning_rate": 4.335715890015067e-05, |
|
"loss": 0.9636, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.7236994219653179, |
|
"grad_norm": 0.298969154483284, |
|
"learning_rate": 4.302501310032937e-05, |
|
"loss": 0.9849, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.7248554913294798, |
|
"grad_norm": 0.30686646202523543, |
|
"learning_rate": 4.26937953640995e-05, |
|
"loss": 0.943, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 0.7260115606936416, |
|
"grad_norm": 0.2906008606262198, |
|
"learning_rate": 4.236351108665537e-05, |
|
"loss": 0.9584, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.7271676300578035, |
|
"grad_norm": 0.2965620261809724, |
|
"learning_rate": 4.203416564798608e-05, |
|
"loss": 0.9376, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 0.7283236994219653, |
|
"grad_norm": 0.29569547507554944, |
|
"learning_rate": 4.170576441278815e-05, |
|
"loss": 1.0158, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7294797687861272, |
|
"grad_norm": 0.2873361360138618, |
|
"learning_rate": 4.137831273037793e-05, |
|
"loss": 0.956, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 0.730635838150289, |
|
"grad_norm": 0.28863435820628536, |
|
"learning_rate": 4.1051815934604465e-05, |
|
"loss": 0.9456, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.7317919075144509, |
|
"grad_norm": 0.30820662139709337, |
|
"learning_rate": 4.072627934376292e-05, |
|
"loss": 0.9627, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 0.7329479768786127, |
|
"grad_norm": 0.2733290689658499, |
|
"learning_rate": 4.0401708260507495e-05, |
|
"loss": 0.9896, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.7341040462427746, |
|
"grad_norm": 0.29448552437974784, |
|
"learning_rate": 4.00781079717653e-05, |
|
"loss": 0.9817, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.7352601156069364, |
|
"grad_norm": 0.31792876220744604, |
|
"learning_rate": 3.975548374865034e-05, |
|
"loss": 0.98, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.7364161849710983, |
|
"grad_norm": 0.2867123384035597, |
|
"learning_rate": 3.943384084637732e-05, |
|
"loss": 0.9845, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 0.7375722543352601, |
|
"grad_norm": 0.29809867628254705, |
|
"learning_rate": 3.9113184504176426e-05, |
|
"loss": 0.9589, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.738728323699422, |
|
"grad_norm": 0.2962633412794635, |
|
"learning_rate": 3.879351994520774e-05, |
|
"loss": 0.9644, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 0.7398843930635838, |
|
"grad_norm": 0.3173965169375924, |
|
"learning_rate": 3.847485237647614e-05, |
|
"loss": 0.9243, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7410404624277457, |
|
"grad_norm": 0.2802160296625643, |
|
"learning_rate": 3.815718698874672e-05, |
|
"loss": 0.9627, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 0.7421965317919075, |
|
"grad_norm": 0.30606251064274426, |
|
"learning_rate": 3.7840528956459956e-05, |
|
"loss": 1.0133, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.7433526011560694, |
|
"grad_norm": 0.29567079239489624, |
|
"learning_rate": 3.752488343764751e-05, |
|
"loss": 1.0064, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 0.7445086705202312, |
|
"grad_norm": 0.2908365758289181, |
|
"learning_rate": 3.721025557384845e-05, |
|
"loss": 0.939, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.7456647398843931, |
|
"grad_norm": 0.2821772422176111, |
|
"learning_rate": 3.689665049002513e-05, |
|
"loss": 0.9176, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.7468208092485549, |
|
"grad_norm": 0.28323988627298013, |
|
"learning_rate": 3.658407329447986e-05, |
|
"loss": 0.9504, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.7479768786127168, |
|
"grad_norm": 0.3052577077686943, |
|
"learning_rate": 3.627252907877184e-05, |
|
"loss": 0.963, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 0.7491329479768786, |
|
"grad_norm": 0.29287895420892424, |
|
"learning_rate": 3.5962022917633976e-05, |
|
"loss": 0.9758, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.7502890173410405, |
|
"grad_norm": 0.26219424592081725, |
|
"learning_rate": 3.56525598688904e-05, |
|
"loss": 0.9501, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 0.7514450867052023, |
|
"grad_norm": 0.2999575076011054, |
|
"learning_rate": 3.534414497337406e-05, |
|
"loss": 0.9979, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7526011560693642, |
|
"grad_norm": 0.30401739845864295, |
|
"learning_rate": 3.503678325484448e-05, |
|
"loss": 0.96, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 0.753757225433526, |
|
"grad_norm": 0.2861448274685923, |
|
"learning_rate": 3.473047971990605e-05, |
|
"loss": 0.9956, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.7549132947976879, |
|
"grad_norm": 0.2749463140020902, |
|
"learning_rate": 3.442523935792651e-05, |
|
"loss": 1.0207, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 0.7560693641618497, |
|
"grad_norm": 0.2987193247584773, |
|
"learning_rate": 3.4121067140955455e-05, |
|
"loss": 0.9469, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.7572254335260116, |
|
"grad_norm": 0.29389883736035816, |
|
"learning_rate": 3.3817968023643766e-05, |
|
"loss": 0.9934, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.7583815028901734, |
|
"grad_norm": 0.28653966462371155, |
|
"learning_rate": 3.351594694316239e-05, |
|
"loss": 0.9634, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.7595375722543353, |
|
"grad_norm": 0.29816187415165, |
|
"learning_rate": 3.321500881912225e-05, |
|
"loss": 0.9398, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 0.7606936416184971, |
|
"grad_norm": 0.289319657394509, |
|
"learning_rate": 3.29151585534941e-05, |
|
"loss": 0.8928, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.761849710982659, |
|
"grad_norm": 0.29521868021236264, |
|
"learning_rate": 3.261640103052849e-05, |
|
"loss": 0.9653, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 0.7630057803468208, |
|
"grad_norm": 0.29934248191757035, |
|
"learning_rate": 3.23187411166764e-05, |
|
"loss": 0.97, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7641618497109827, |
|
"grad_norm": 0.32270679766275234, |
|
"learning_rate": 3.2022183660509916e-05, |
|
"loss": 0.9495, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 0.7653179190751445, |
|
"grad_norm": 0.30814076544629976, |
|
"learning_rate": 3.172673349264316e-05, |
|
"loss": 0.9897, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.7664739884393064, |
|
"grad_norm": 0.2731527495101352, |
|
"learning_rate": 3.143239542565365e-05, |
|
"loss": 0.9922, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 0.7676300578034682, |
|
"grad_norm": 0.29822125577085, |
|
"learning_rate": 3.113917425400406e-05, |
|
"loss": 0.974, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.7687861271676301, |
|
"grad_norm": 0.27831179956286994, |
|
"learning_rate": 3.084707475396385e-05, |
|
"loss": 0.9309, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.7699421965317919, |
|
"grad_norm": 0.30061057019675924, |
|
"learning_rate": 3.05561016835317e-05, |
|
"loss": 0.9889, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.7710982658959538, |
|
"grad_norm": 0.2839150865870422, |
|
"learning_rate": 3.026625978235793e-05, |
|
"loss": 0.9279, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 0.7722543352601156, |
|
"grad_norm": 0.28191298234209533, |
|
"learning_rate": 2.9977553771667178e-05, |
|
"loss": 0.9043, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.7734104046242775, |
|
"grad_norm": 0.2839643289974997, |
|
"learning_rate": 2.968998835418174e-05, |
|
"loss": 0.958, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 0.7745664739884393, |
|
"grad_norm": 0.2727626163667981, |
|
"learning_rate": 2.9403568214044687e-05, |
|
"loss": 1.0045, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.7757225433526012, |
|
"grad_norm": 0.2829586617421094, |
|
"learning_rate": 2.9118298016743815e-05, |
|
"loss": 0.9477, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 0.776878612716763, |
|
"grad_norm": 0.2863725325553883, |
|
"learning_rate": 2.8834182409035527e-05, |
|
"loss": 1.0095, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.7780346820809249, |
|
"grad_norm": 0.2981971073978521, |
|
"learning_rate": 2.8551226018869105e-05, |
|
"loss": 1.0128, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 0.7791907514450868, |
|
"grad_norm": 0.2783020472744863, |
|
"learning_rate": 2.8269433455311378e-05, |
|
"loss": 0.9581, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.7803468208092486, |
|
"grad_norm": 0.29194953070328256, |
|
"learning_rate": 2.798880930847173e-05, |
|
"loss": 0.9402, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.7815028901734105, |
|
"grad_norm": 0.31785104915649315, |
|
"learning_rate": 2.7709358149427113e-05, |
|
"loss": 0.9912, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.7826589595375723, |
|
"grad_norm": 0.27416031521109446, |
|
"learning_rate": 2.7431084530147834e-05, |
|
"loss": 0.9413, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 0.7838150289017342, |
|
"grad_norm": 0.3023857012875686, |
|
"learning_rate": 2.7153992983423283e-05, |
|
"loss": 1.0101, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.784971098265896, |
|
"grad_norm": 0.27883119977909965, |
|
"learning_rate": 2.687808802278805e-05, |
|
"loss": 0.9699, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 0.7861271676300579, |
|
"grad_norm": 0.28656925239378356, |
|
"learning_rate": 2.6603374142448467e-05, |
|
"loss": 0.9459, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.7872832369942196, |
|
"grad_norm": 0.2805856931300408, |
|
"learning_rate": 2.632985581720947e-05, |
|
"loss": 0.9347, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 0.7884393063583816, |
|
"grad_norm": 0.28925155839879374, |
|
"learning_rate": 2.6057537502401598e-05, |
|
"loss": 0.9549, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.7895953757225433, |
|
"grad_norm": 0.30326180151228377, |
|
"learning_rate": 2.5786423633808487e-05, |
|
"loss": 0.9954, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 0.7907514450867053, |
|
"grad_norm": 0.310861152386562, |
|
"learning_rate": 2.5516518627594542e-05, |
|
"loss": 0.981, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.791907514450867, |
|
"grad_norm": 0.29730609595809665, |
|
"learning_rate": 2.524782688023305e-05, |
|
"loss": 0.9933, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.793063583815029, |
|
"grad_norm": 0.29011083826669604, |
|
"learning_rate": 2.4980352768434643e-05, |
|
"loss": 1.0396, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.7942196531791907, |
|
"grad_norm": 0.2941491077374515, |
|
"learning_rate": 2.4714100649075833e-05, |
|
"loss": 0.9676, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 0.7953757225433526, |
|
"grad_norm": 0.28795285220192574, |
|
"learning_rate": 2.4449074859128197e-05, |
|
"loss": 0.9545, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.7965317919075144, |
|
"grad_norm": 0.2973713924271482, |
|
"learning_rate": 2.4185279715587704e-05, |
|
"loss": 0.925, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 0.7976878612716763, |
|
"grad_norm": 0.28132269243331093, |
|
"learning_rate": 2.39227195154043e-05, |
|
"loss": 0.974, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.7988439306358381, |
|
"grad_norm": 0.3130593570967486, |
|
"learning_rate": 2.366139853541197e-05, |
|
"loss": 0.9429, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.28725553924621783, |
|
"learning_rate": 2.340132103225916e-05, |
|
"loss": 0.9609, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.8011560693641618, |
|
"grad_norm": 0.2927313493050735, |
|
"learning_rate": 2.3142491242339338e-05, |
|
"loss": 0.9801, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 0.8023121387283237, |
|
"grad_norm": 0.2959751791171696, |
|
"learning_rate": 2.288491338172196e-05, |
|
"loss": 0.9983, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.8034682080924855, |
|
"grad_norm": 0.29009100429119816, |
|
"learning_rate": 2.262859164608393e-05, |
|
"loss": 0.9435, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.8046242774566474, |
|
"grad_norm": 0.28065545174572615, |
|
"learning_rate": 2.2373530210641103e-05, |
|
"loss": 0.9664, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.8057803468208092, |
|
"grad_norm": 0.3079082233043266, |
|
"learning_rate": 2.2119733230080408e-05, |
|
"loss": 0.9339, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 0.8069364161849711, |
|
"grad_norm": 0.2994856214599522, |
|
"learning_rate": 2.186720483849206e-05, |
|
"loss": 0.9642, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.8080924855491329, |
|
"grad_norm": 0.3150825598363239, |
|
"learning_rate": 2.1615949149302305e-05, |
|
"loss": 0.9391, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 0.8092485549132948, |
|
"grad_norm": 0.28663989647188765, |
|
"learning_rate": 2.1365970255206402e-05, |
|
"loss": 0.9533, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8104046242774566, |
|
"grad_norm": 0.3064963810102998, |
|
"learning_rate": 2.1117272228101902e-05, |
|
"loss": 0.9269, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 0.8115606936416185, |
|
"grad_norm": 0.2888662994868131, |
|
"learning_rate": 2.0869859119022328e-05, |
|
"loss": 0.9684, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.8127167630057803, |
|
"grad_norm": 0.29449459192953004, |
|
"learning_rate": 2.0623734958071296e-05, |
|
"loss": 0.9588, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 0.8138728323699422, |
|
"grad_norm": 0.28505307744008485, |
|
"learning_rate": 2.037890375435677e-05, |
|
"loss": 0.969, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.815028901734104, |
|
"grad_norm": 0.264553046638561, |
|
"learning_rate": 2.0135369495925714e-05, |
|
"loss": 0.8865, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.8161849710982659, |
|
"grad_norm": 0.2854615552898905, |
|
"learning_rate": 1.9893136149699287e-05, |
|
"loss": 0.9606, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.8173410404624277, |
|
"grad_norm": 0.30132043472669484, |
|
"learning_rate": 1.9652207661408073e-05, |
|
"loss": 0.9783, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 0.8184971098265896, |
|
"grad_norm": 0.28158707938088035, |
|
"learning_rate": 1.941258795552785e-05, |
|
"loss": 0.9702, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.8196531791907514, |
|
"grad_norm": 0.3117678798282903, |
|
"learning_rate": 1.917428093521576e-05, |
|
"loss": 0.9381, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 0.8208092485549133, |
|
"grad_norm": 0.2721446216347946, |
|
"learning_rate": 1.8937290482246606e-05, |
|
"loss": 0.9294, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.8219653179190751, |
|
"grad_norm": 0.274947743569076, |
|
"learning_rate": 1.870162045694971e-05, |
|
"loss": 0.9874, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 0.823121387283237, |
|
"grad_norm": 0.29164661845373113, |
|
"learning_rate": 1.8467274698145942e-05, |
|
"loss": 0.9641, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.8242774566473988, |
|
"grad_norm": 0.2857254550756751, |
|
"learning_rate": 1.8234257023085234e-05, |
|
"loss": 0.9629, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 0.8254335260115607, |
|
"grad_norm": 0.28344620289201494, |
|
"learning_rate": 1.8002571227384467e-05, |
|
"loss": 0.9213, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.8265895953757225, |
|
"grad_norm": 0.2940307676845195, |
|
"learning_rate": 1.777222108496558e-05, |
|
"loss": 0.9567, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.8277456647398844, |
|
"grad_norm": 0.29951372831586726, |
|
"learning_rate": 1.7543210347994022e-05, |
|
"loss": 0.9813, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.8289017341040462, |
|
"grad_norm": 0.2878040658280256, |
|
"learning_rate": 1.7315542746817825e-05, |
|
"loss": 0.9152, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 0.8300578034682081, |
|
"grad_norm": 0.30016211006512195, |
|
"learning_rate": 1.7089221989906633e-05, |
|
"loss": 0.9656, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.8312138728323699, |
|
"grad_norm": 0.2744245197404456, |
|
"learning_rate": 1.6864251763791428e-05, |
|
"loss": 1.0234, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 0.8323699421965318, |
|
"grad_norm": 0.28064881967639976, |
|
"learning_rate": 1.664063573300446e-05, |
|
"loss": 1.0006, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8335260115606936, |
|
"grad_norm": 0.28500326120194347, |
|
"learning_rate": 1.6418377540019536e-05, |
|
"loss": 0.9442, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 0.8346820809248555, |
|
"grad_norm": 0.27741047130235014, |
|
"learning_rate": 1.6197480805192634e-05, |
|
"loss": 0.9173, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.8358381502890173, |
|
"grad_norm": 0.2952544695479881, |
|
"learning_rate": 1.5977949126703084e-05, |
|
"loss": 0.9546, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 0.8369942196531792, |
|
"grad_norm": 0.2753923526465453, |
|
"learning_rate": 1.5759786080494743e-05, |
|
"loss": 0.9922, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.838150289017341, |
|
"grad_norm": 0.2713440737201456, |
|
"learning_rate": 1.554299522021796e-05, |
|
"loss": 0.9215, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.8393063583815029, |
|
"grad_norm": 0.2924976737824655, |
|
"learning_rate": 1.5327580077171587e-05, |
|
"loss": 0.9553, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.8404624277456647, |
|
"grad_norm": 0.28640397728398614, |
|
"learning_rate": 1.5113544160245397e-05, |
|
"loss": 0.9395, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 0.8416184971098266, |
|
"grad_norm": 0.2859545519050961, |
|
"learning_rate": 1.4900890955863067e-05, |
|
"loss": 0.9711, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.8427745664739884, |
|
"grad_norm": 0.3089713704921713, |
|
"learning_rate": 1.4689623927925289e-05, |
|
"loss": 1.0092, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 0.8439306358381503, |
|
"grad_norm": 0.2837926097401027, |
|
"learning_rate": 1.4479746517753335e-05, |
|
"loss": 1.0052, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.8450867052023121, |
|
"grad_norm": 0.30261005244224143, |
|
"learning_rate": 1.4271262144033116e-05, |
|
"loss": 0.9991, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 0.846242774566474, |
|
"grad_norm": 0.2674381336270983, |
|
"learning_rate": 1.4064174202759407e-05, |
|
"loss": 0.9552, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.8473988439306358, |
|
"grad_norm": 0.2767756118448426, |
|
"learning_rate": 1.3858486067180465e-05, |
|
"loss": 0.9573, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 0.8485549132947977, |
|
"grad_norm": 0.2836915452259556, |
|
"learning_rate": 1.3654201087743279e-05, |
|
"loss": 0.9466, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.8497109826589595, |
|
"grad_norm": 0.30898236268087276, |
|
"learning_rate": 1.3451322592038774e-05, |
|
"loss": 0.9379, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.8508670520231214, |
|
"grad_norm": 0.30463841544406706, |
|
"learning_rate": 1.3249853884747753e-05, |
|
"loss": 0.9506, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.8520231213872832, |
|
"grad_norm": 0.29760105531487135, |
|
"learning_rate": 1.3049798247587064e-05, |
|
"loss": 0.9603, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 0.8531791907514451, |
|
"grad_norm": 0.2910112980743013, |
|
"learning_rate": 1.2851158939256002e-05, |
|
"loss": 0.9903, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.8543352601156069, |
|
"grad_norm": 0.28051123703308095, |
|
"learning_rate": 1.2653939195383446e-05, |
|
"loss": 0.9552, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 0.8554913294797688, |
|
"grad_norm": 0.32807409572083474, |
|
"learning_rate": 1.2458142228474967e-05, |
|
"loss": 0.9391, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8566473988439306, |
|
"grad_norm": 0.2857511643393787, |
|
"learning_rate": 1.2263771227860555e-05, |
|
"loss": 0.9249, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 0.8578034682080925, |
|
"grad_norm": 0.3130878799401169, |
|
"learning_rate": 1.2070829359642743e-05, |
|
"loss": 0.9801, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.8589595375722543, |
|
"grad_norm": 0.2857834049135047, |
|
"learning_rate": 1.1879319766644969e-05, |
|
"loss": 0.9881, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 0.8601156069364162, |
|
"grad_norm": 0.28295250626185514, |
|
"learning_rate": 1.168924556836034e-05, |
|
"loss": 0.9212, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.861271676300578, |
|
"grad_norm": 0.270720741833164, |
|
"learning_rate": 1.1500609860900934e-05, |
|
"loss": 0.9208, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.8624277456647399, |
|
"grad_norm": 0.3310546829847928, |
|
"learning_rate": 1.131341571694724e-05, |
|
"loss": 0.9134, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.8635838150289017, |
|
"grad_norm": 0.2906866125100536, |
|
"learning_rate": 1.1127666185698183e-05, |
|
"loss": 0.9292, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 0.8647398843930636, |
|
"grad_norm": 0.2857402985603114, |
|
"learning_rate": 1.0943364292821478e-05, |
|
"loss": 0.9782, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.8658959537572254, |
|
"grad_norm": 0.30621748628878964, |
|
"learning_rate": 1.0760513040404275e-05, |
|
"loss": 0.9538, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 0.8670520231213873, |
|
"grad_norm": 0.3076325870742748, |
|
"learning_rate": 1.0579115406904327e-05, |
|
"loss": 0.9814, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.8682080924855491, |
|
"grad_norm": 0.2875679417313269, |
|
"learning_rate": 1.0399174347101404e-05, |
|
"loss": 0.9682, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 0.869364161849711, |
|
"grad_norm": 0.2827349143068363, |
|
"learning_rate": 1.0220692792049169e-05, |
|
"loss": 0.9715, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.8705202312138728, |
|
"grad_norm": 0.2898950516764476, |
|
"learning_rate": 1.0043673649027518e-05, |
|
"loss": 0.9809, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 0.8716763005780347, |
|
"grad_norm": 0.29127323235770597, |
|
"learning_rate": 9.86811980149519e-06, |
|
"loss": 0.9738, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.8728323699421965, |
|
"grad_norm": 0.289489961872677, |
|
"learning_rate": 9.694034109042694e-06, |
|
"loss": 1.0206, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.8739884393063584, |
|
"grad_norm": 0.2820602963126248, |
|
"learning_rate": 9.521419407345878e-06, |
|
"loss": 0.8967, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.8751445086705202, |
|
"grad_norm": 0.31061259437004146, |
|
"learning_rate": 9.350278508119636e-06, |
|
"loss": 0.9786, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 0.8763005780346821, |
|
"grad_norm": 0.2815496774647879, |
|
"learning_rate": 9.180614199072146e-06, |
|
"loss": 0.9485, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.8774566473988439, |
|
"grad_norm": 0.3518473115040322, |
|
"learning_rate": 9.012429243859487e-06, |
|
"loss": 0.9903, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 0.8786127167630058, |
|
"grad_norm": 0.2916712847778055, |
|
"learning_rate": 8.845726382040597e-06, |
|
"loss": 0.9397, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.8797687861271676, |
|
"grad_norm": 0.29392008724784624, |
|
"learning_rate": 8.680508329032589e-06, |
|
"loss": 0.9629, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 0.8809248554913295, |
|
"grad_norm": 0.28601993585254176, |
|
"learning_rate": 8.516777776066643e-06, |
|
"loss": 0.982, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.8820809248554913, |
|
"grad_norm": 0.28346541614728327, |
|
"learning_rate": 8.354537390144057e-06, |
|
"loss": 0.9446, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 0.8832369942196532, |
|
"grad_norm": 0.289284064978837, |
|
"learning_rate": 8.19378981399287e-06, |
|
"loss": 0.9647, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.884393063583815, |
|
"grad_norm": 0.2867377684057541, |
|
"learning_rate": 8.034537666024822e-06, |
|
"loss": 0.9455, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.8855491329479769, |
|
"grad_norm": 0.2756548624705791, |
|
"learning_rate": 7.876783540292599e-06, |
|
"loss": 1.0273, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.8867052023121387, |
|
"grad_norm": 0.2936449029949148, |
|
"learning_rate": 7.720530006447736e-06, |
|
"loss": 0.9742, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 0.8878612716763006, |
|
"grad_norm": 0.297967509970897, |
|
"learning_rate": 7.565779609698631e-06, |
|
"loss": 0.9329, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.8890173410404625, |
|
"grad_norm": 0.2942735618729235, |
|
"learning_rate": 7.412534870769116e-06, |
|
"loss": 0.9581, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 0.8901734104046243, |
|
"grad_norm": 0.28806737344976613, |
|
"learning_rate": 7.260798285857484e-06, |
|
"loss": 1.0075, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.8913294797687862, |
|
"grad_norm": 0.2831951777045236, |
|
"learning_rate": 7.110572326595711e-06, |
|
"loss": 0.9971, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 0.892485549132948, |
|
"grad_norm": 0.28520928967193043, |
|
"learning_rate": 6.961859440009233e-06, |
|
"loss": 1.0001, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.8936416184971099, |
|
"grad_norm": 0.27689057228854386, |
|
"learning_rate": 6.8146620484771495e-06, |
|
"loss": 0.9889, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 0.8947976878612717, |
|
"grad_norm": 0.285699685858991, |
|
"learning_rate": 6.668982549692649e-06, |
|
"loss": 0.9478, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.8959537572254336, |
|
"grad_norm": 0.2754348125812895, |
|
"learning_rate": 6.524823316624063e-06, |
|
"loss": 0.9343, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.8971098265895954, |
|
"grad_norm": 0.29989809168873266, |
|
"learning_rate": 6.382186697476167e-06, |
|
"loss": 0.9789, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.8982658959537573, |
|
"grad_norm": 0.2828138414197748, |
|
"learning_rate": 6.2410750156518985e-06, |
|
"loss": 1.0062, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 0.8994219653179191, |
|
"grad_norm": 0.2889085637993367, |
|
"learning_rate": 6.101490569714574e-06, |
|
"loss": 0.9696, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.900578034682081, |
|
"grad_norm": 0.29960325302848234, |
|
"learning_rate": 5.963435633350412e-06, |
|
"loss": 0.9162, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 0.9017341040462428, |
|
"grad_norm": 0.3005589289503204, |
|
"learning_rate": 5.826912455331468e-06, |
|
"loss": 0.9903, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.9028901734104047, |
|
"grad_norm": 0.2928261089403607, |
|
"learning_rate": 5.691923259479093e-06, |
|
"loss": 0.9736, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 0.9040462427745665, |
|
"grad_norm": 0.28429998893881614, |
|
"learning_rate": 5.558470244627634e-06, |
|
"loss": 0.9711, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.9052023121387284, |
|
"grad_norm": 0.2784468816364227, |
|
"learning_rate": 5.4265555845886215e-06, |
|
"loss": 0.9811, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 0.9063583815028902, |
|
"grad_norm": 0.2872050965542848, |
|
"learning_rate": 5.29618142811541e-06, |
|
"loss": 0.954, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.9075144508670521, |
|
"grad_norm": 0.2948554706499198, |
|
"learning_rate": 5.16734989886809e-06, |
|
"loss": 0.9811, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.9086705202312139, |
|
"grad_norm": 0.28399320997388067, |
|
"learning_rate": 5.040063095379011e-06, |
|
"loss": 0.9175, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.9098265895953758, |
|
"grad_norm": 0.2855808882463355, |
|
"learning_rate": 4.914323091018535e-06, |
|
"loss": 0.9901, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 0.9109826589595376, |
|
"grad_norm": 0.3021329131025513, |
|
"learning_rate": 4.790131933961206e-06, |
|
"loss": 0.9613, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.9121387283236995, |
|
"grad_norm": 0.2862589716791116, |
|
"learning_rate": 4.6674916471524995e-06, |
|
"loss": 0.9961, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 0.9132947976878613, |
|
"grad_norm": 0.2998472712073513, |
|
"learning_rate": 4.546404228275824e-06, |
|
"loss": 0.9845, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.9144508670520232, |
|
"grad_norm": 0.28603652316011496, |
|
"learning_rate": 4.426871649719932e-06, |
|
"loss": 0.9765, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 0.915606936416185, |
|
"grad_norm": 0.2872761079408887, |
|
"learning_rate": 4.3088958585468686e-06, |
|
"loss": 0.9997, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.9167630057803469, |
|
"grad_norm": 0.2873199733628149, |
|
"learning_rate": 4.192478776460229e-06, |
|
"loss": 1.0064, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 0.9179190751445087, |
|
"grad_norm": 0.27515435582137365, |
|
"learning_rate": 4.077622299773831e-06, |
|
"loss": 0.9189, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.9190751445086706, |
|
"grad_norm": 0.27999010046883294, |
|
"learning_rate": 3.96432829938086e-06, |
|
"loss": 0.9885, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.9202312138728324, |
|
"grad_norm": 0.2635891865916151, |
|
"learning_rate": 3.8525986207233465e-06, |
|
"loss": 0.9352, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.9213872832369943, |
|
"grad_norm": 0.28633688125843376, |
|
"learning_rate": 3.742435083762186e-06, |
|
"loss": 0.9599, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 0.922543352601156, |
|
"grad_norm": 0.2826637970887437, |
|
"learning_rate": 3.633839482947421e-06, |
|
"loss": 0.9948, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.923699421965318, |
|
"grad_norm": 0.30153590546133957, |
|
"learning_rate": 3.526813587189026e-06, |
|
"loss": 0.9804, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 0.9248554913294798, |
|
"grad_norm": 0.2858269823618572, |
|
"learning_rate": 3.4213591398281175e-06, |
|
"loss": 0.974, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9260115606936417, |
|
"grad_norm": 0.2820688133929364, |
|
"learning_rate": 3.3174778586085643e-06, |
|
"loss": 0.9566, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 0.9271676300578034, |
|
"grad_norm": 0.3000844480451751, |
|
"learning_rate": 3.2151714356489225e-06, |
|
"loss": 1.0118, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.9283236994219654, |
|
"grad_norm": 0.2790666100533931, |
|
"learning_rate": 3.114441537415014e-06, |
|
"loss": 0.9533, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 0.9294797687861271, |
|
"grad_norm": 0.29749566810305794, |
|
"learning_rate": 3.0152898046926557e-06, |
|
"loss": 0.9569, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.930635838150289, |
|
"grad_norm": 0.28219907290129675, |
|
"learning_rate": 2.917717852560997e-06, |
|
"loss": 0.9512, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.9317919075144508, |
|
"grad_norm": 0.29888043306041123, |
|
"learning_rate": 2.8217272703661923e-06, |
|
"loss": 1.0231, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.9329479768786128, |
|
"grad_norm": 0.26919643142805166, |
|
"learning_rate": 2.727319621695501e-06, |
|
"loss": 0.9873, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 0.9341040462427745, |
|
"grad_norm": 0.29380224096591, |
|
"learning_rate": 2.6344964443518526e-06, |
|
"loss": 0.9385, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.9352601156069364, |
|
"grad_norm": 0.28299962979190113, |
|
"learning_rate": 2.5432592503288e-06, |
|
"loss": 0.9292, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 0.9364161849710982, |
|
"grad_norm": 0.29962326178158855, |
|
"learning_rate": 2.453609525785816e-06, |
|
"loss": 0.9773, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.9375722543352601, |
|
"grad_norm": 0.32360192327986553, |
|
"learning_rate": 2.3655487310241585e-06, |
|
"loss": 0.9529, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 0.9387283236994219, |
|
"grad_norm": 0.30410924441522585, |
|
"learning_rate": 2.279078300463089e-06, |
|
"loss": 1.0106, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.9398843930635838, |
|
"grad_norm": 0.30431377413637145, |
|
"learning_rate": 2.1941996426164344e-06, |
|
"loss": 0.9735, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 0.9410404624277456, |
|
"grad_norm": 0.2903468628517127, |
|
"learning_rate": 2.1109141400697418e-06, |
|
"loss": 0.972, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.9421965317919075, |
|
"grad_norm": 0.2954528106164249, |
|
"learning_rate": 2.029223149457682e-06, |
|
"loss": 0.9662, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.9433526011560693, |
|
"grad_norm": 0.27439734246785746, |
|
"learning_rate": 1.949128001441969e-06, |
|
"loss": 0.9509, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.9445086705202312, |
|
"grad_norm": 0.27172305484407105, |
|
"learning_rate": 1.8706300006896882e-06, |
|
"loss": 0.8628, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 0.945664739884393, |
|
"grad_norm": 0.27216092212065524, |
|
"learning_rate": 1.7937304258520692e-06, |
|
"loss": 0.9785, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.9468208092485549, |
|
"grad_norm": 0.2918352349773759, |
|
"learning_rate": 1.718430529543613e-06, |
|
"loss": 0.9571, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 0.9479768786127167, |
|
"grad_norm": 0.272999376680257, |
|
"learning_rate": 1.6447315383217643e-06, |
|
"loss": 0.918, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.9491329479768786, |
|
"grad_norm": 0.27901699608664005, |
|
"learning_rate": 1.5726346526668156e-06, |
|
"loss": 0.9528, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 0.9502890173410404, |
|
"grad_norm": 0.28289047639247433, |
|
"learning_rate": 1.5021410469624465e-06, |
|
"loss": 0.9067, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.9514450867052023, |
|
"grad_norm": 0.2825322282067992, |
|
"learning_rate": 1.4332518694765707e-06, |
|
"loss": 0.8735, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 0.9526011560693641, |
|
"grad_norm": 0.28837647678016226, |
|
"learning_rate": 1.3659682423425968e-06, |
|
"loss": 0.9432, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.953757225433526, |
|
"grad_norm": 0.2876129817030531, |
|
"learning_rate": 1.30029126154122e-06, |
|
"loss": 0.9268, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.9549132947976878, |
|
"grad_norm": 0.2792371887058164, |
|
"learning_rate": 1.236221996882514e-06, |
|
"loss": 0.9726, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.9560693641618497, |
|
"grad_norm": 0.29099718253951457, |
|
"learning_rate": 1.1737614919885008e-06, |
|
"loss": 0.9209, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 0.9572254335260115, |
|
"grad_norm": 0.2995617326772327, |
|
"learning_rate": 1.1129107642761872e-06, |
|
"loss": 0.9187, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.9583815028901734, |
|
"grad_norm": 0.2762400721384854, |
|
"learning_rate": 1.053670804940987e-06, |
|
"loss": 0.9142, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 0.9595375722543352, |
|
"grad_norm": 0.2931863784593866, |
|
"learning_rate": 9.960425789405258e-07, |
|
"loss": 0.9084, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9606936416184971, |
|
"grad_norm": 0.2880567882354411, |
|
"learning_rate": 9.400270249789955e-07, |
|
"loss": 0.9868, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 0.9618497109826589, |
|
"grad_norm": 0.2871636561882668, |
|
"learning_rate": 8.856250554918344e-07, |
|
"loss": 0.9152, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.9630057803468208, |
|
"grad_norm": 0.28517050924588744, |
|
"learning_rate": 8.328375566308166e-07, |
|
"loss": 0.9757, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 0.9641618497109826, |
|
"grad_norm": 0.28587044682235146, |
|
"learning_rate": 7.816653882496971e-07, |
|
"loss": 0.9537, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.9653179190751445, |
|
"grad_norm": 0.29706875055091925, |
|
"learning_rate": 7.321093838901449e-07, |
|
"loss": 0.9247, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.9664739884393063, |
|
"grad_norm": 0.2861635930847044, |
|
"learning_rate": 6.841703507682206e-07, |
|
"loss": 0.8905, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.9676300578034682, |
|
"grad_norm": 0.2813627882340819, |
|
"learning_rate": 6.378490697611761e-07, |
|
"loss": 0.9749, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 0.96878612716763, |
|
"grad_norm": 0.2865848575770194, |
|
"learning_rate": 5.93146295394742e-07, |
|
"loss": 0.9443, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.9699421965317919, |
|
"grad_norm": 0.27882077648739195, |
|
"learning_rate": 5.500627558308713e-07, |
|
"loss": 0.9398, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 0.9710982658959537, |
|
"grad_norm": 0.3065313177380281, |
|
"learning_rate": 5.085991528558487e-07, |
|
"loss": 0.9236, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.9722543352601156, |
|
"grad_norm": 0.2871839420940439, |
|
"learning_rate": 4.687561618688663e-07, |
|
"loss": 0.9676, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 0.9734104046242774, |
|
"grad_norm": 0.2637381920338272, |
|
"learning_rate": 4.3053443187103247e-07, |
|
"loss": 0.8815, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.9745664739884393, |
|
"grad_norm": 0.285755856830912, |
|
"learning_rate": 3.9393458545479157e-07, |
|
"loss": 0.9926, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 0.9757225433526011, |
|
"grad_norm": 0.30359812477643444, |
|
"learning_rate": 3.589572187937651e-07, |
|
"loss": 0.9604, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.976878612716763, |
|
"grad_norm": 0.29778116804881444, |
|
"learning_rate": 3.2560290163307083e-07, |
|
"loss": 0.9788, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.9780346820809248, |
|
"grad_norm": 0.2794224102916479, |
|
"learning_rate": 2.93872177280019e-07, |
|
"loss": 0.9427, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.9791907514450867, |
|
"grad_norm": 0.3008883017130643, |
|
"learning_rate": 2.637655625952973e-07, |
|
"loss": 0.9475, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 0.9803468208092485, |
|
"grad_norm": 0.28456190197348324, |
|
"learning_rate": 2.3528354798451058e-07, |
|
"loss": 0.9461, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.9815028901734104, |
|
"grad_norm": 0.2829229588306782, |
|
"learning_rate": 2.0842659739019887e-07, |
|
"loss": 0.9524, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 0.9826589595375722, |
|
"grad_norm": 0.283105235302632, |
|
"learning_rate": 1.8319514828430973e-07, |
|
"loss": 0.9474, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.9838150289017341, |
|
"grad_norm": 0.27690208166745106, |
|
"learning_rate": 1.5958961166104847e-07, |
|
"loss": 0.9351, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 0.9849710982658959, |
|
"grad_norm": 0.25712842285219684, |
|
"learning_rate": 1.3761037203017245e-07, |
|
"loss": 0.914, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.9861271676300578, |
|
"grad_norm": 0.2735837511043363, |
|
"learning_rate": 1.1725778741076276e-07, |
|
"loss": 0.8924, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 0.9872832369942196, |
|
"grad_norm": 0.2820464817023967, |
|
"learning_rate": 9.853218932536212e-08, |
|
"loss": 0.9359, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.9884393063583815, |
|
"grad_norm": 0.27653871874823904, |
|
"learning_rate": 8.143388279460151e-08, |
|
"loss": 0.9704, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.9895953757225433, |
|
"grad_norm": 0.2836276601553615, |
|
"learning_rate": 6.596314633219303e-08, |
|
"loss": 0.9966, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.9907514450867052, |
|
"grad_norm": 0.27880904921583405, |
|
"learning_rate": 5.2120231940433474e-08, |
|
"loss": 0.9227, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 0.991907514450867, |
|
"grad_norm": 0.2657505634085022, |
|
"learning_rate": 3.990536510608544e-08, |
|
"loss": 0.9657, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.9930635838150289, |
|
"grad_norm": 0.2908473414800192, |
|
"learning_rate": 2.9318744796669183e-08, |
|
"loss": 0.9206, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 0.9942196531791907, |
|
"grad_norm": 0.2957217704373017, |
|
"learning_rate": 2.036054345729843e-08, |
|
"loss": 0.9528, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.9953757225433526, |
|
"grad_norm": 0.2910406474246415, |
|
"learning_rate": 1.3030907007793857e-08, |
|
"loss": 1.0005, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 0.9965317919075144, |
|
"grad_norm": 0.2796254503042765, |
|
"learning_rate": 7.329954840362696e-09, |
|
"loss": 1.0259, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.9976878612716763, |
|
"grad_norm": 0.2844197423108749, |
|
"learning_rate": 3.257779817600337e-09, |
|
"loss": 0.9363, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 0.9988439306358381, |
|
"grad_norm": 0.3388694141182201, |
|
"learning_rate": 8.144482710248369e-10, |
|
"loss": 1.0117, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.29406054557823574, |
|
"learning_rate": 0.0, |
|
"loss": 0.9703, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_runtime": 3.3948, |
|
"eval_samples_per_second": 2.946, |
|
"eval_steps_per_second": 0.884, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 4325, |
|
"total_flos": 1.71674607550464e+16, |
|
"train_loss": 0.9719910388461427, |
|
"train_runtime": 20047.9922, |
|
"train_samples_per_second": 3.452, |
|
"train_steps_per_second": 0.216 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 4325, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.71674607550464e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|