|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9987239472564866, |
|
"eval_steps": 500, |
|
"global_step": 587, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0017014036580178648, |
|
"grad_norm": 6.9911926276762415, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 1.7618, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0034028073160357296, |
|
"grad_norm": 5.967485415913987, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 1.816, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.005104210974053594, |
|
"grad_norm": 5.237633301735092, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.7219, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.006805614632071459, |
|
"grad_norm": 5.640571574498427, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 1.8444, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.008507018290089324, |
|
"grad_norm": 4.459110210993377, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 1.705, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.010208421948107189, |
|
"grad_norm": 4.47503295247886, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.5832, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.011909825606125054, |
|
"grad_norm": 4.084281383937613, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 1.5393, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.013611229264142918, |
|
"grad_norm": 3.669506717278801, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 1.4783, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.015312632922160783, |
|
"grad_norm": 2.7312576917476763, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3517, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.017014036580178648, |
|
"grad_norm": 3.675807427395712, |
|
"learning_rate": 1.1111111111111113e-05, |
|
"loss": 1.3203, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01871544023819651, |
|
"grad_norm": 2.610967088571799, |
|
"learning_rate": 1.2222222222222224e-05, |
|
"loss": 1.184, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.020416843896214378, |
|
"grad_norm": 3.3404049751198204, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 1.2124, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02211824755423224, |
|
"grad_norm": 2.749767662527906, |
|
"learning_rate": 1.4444444444444446e-05, |
|
"loss": 1.0995, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.023819651212250107, |
|
"grad_norm": 3.372563247688594, |
|
"learning_rate": 1.555555555555556e-05, |
|
"loss": 1.0566, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02552105487026797, |
|
"grad_norm": 3.6606428955549446, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.9939, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.027222458528285837, |
|
"grad_norm": 2.6528456388837065, |
|
"learning_rate": 1.7777777777777777e-05, |
|
"loss": 0.8771, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0289238621863037, |
|
"grad_norm": 3.4243275408942195, |
|
"learning_rate": 1.888888888888889e-05, |
|
"loss": 0.888, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.030625265844321566, |
|
"grad_norm": 2.609060463016623, |
|
"learning_rate": 2e-05, |
|
"loss": 0.6589, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03232666950233943, |
|
"grad_norm": 2.9242170923508106, |
|
"learning_rate": 1.9999847579243196e-05, |
|
"loss": 0.7496, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.034028073160357296, |
|
"grad_norm": 2.6492050387366466, |
|
"learning_rate": 1.9999390321619196e-05, |
|
"loss": 0.6022, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03572947681837516, |
|
"grad_norm": 2.550062716080491, |
|
"learning_rate": 1.9998628241067113e-05, |
|
"loss": 0.6182, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.03743088047639302, |
|
"grad_norm": 2.1416608664408936, |
|
"learning_rate": 1.9997561360818322e-05, |
|
"loss": 0.5865, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03913228413441089, |
|
"grad_norm": 2.7322643613244613, |
|
"learning_rate": 1.999618971339577e-05, |
|
"loss": 0.5472, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.040833687792428755, |
|
"grad_norm": 2.2213657292081224, |
|
"learning_rate": 1.9994513340612957e-05, |
|
"loss": 0.5155, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.04253509145044662, |
|
"grad_norm": 2.303603778443028, |
|
"learning_rate": 1.9992532293572688e-05, |
|
"loss": 0.4958, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04423649510846448, |
|
"grad_norm": 1.7534676737667882, |
|
"learning_rate": 1.9990246632665503e-05, |
|
"loss": 0.3881, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.04593789876648235, |
|
"grad_norm": 3.2948686963336242, |
|
"learning_rate": 1.998765642756783e-05, |
|
"loss": 0.4327, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.047639302424500214, |
|
"grad_norm": 1.779238054183184, |
|
"learning_rate": 1.9984761757239878e-05, |
|
"loss": 0.3997, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.04934070608251808, |
|
"grad_norm": 1.9200152427050028, |
|
"learning_rate": 1.998156270992321e-05, |
|
"loss": 0.3813, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.05104210974053594, |
|
"grad_norm": 2.001461463300215, |
|
"learning_rate": 1.9978059383138073e-05, |
|
"loss": 0.4106, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05274351339855381, |
|
"grad_norm": 1.7813646875429028, |
|
"learning_rate": 1.997425188368041e-05, |
|
"loss": 0.3742, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.05444491705657167, |
|
"grad_norm": 1.995594487847021, |
|
"learning_rate": 1.9970140327618612e-05, |
|
"loss": 0.3599, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.05614632071458953, |
|
"grad_norm": 1.6098560851301231, |
|
"learning_rate": 1.9965724840289972e-05, |
|
"loss": 0.2282, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0578477243726074, |
|
"grad_norm": 1.5795395141948803, |
|
"learning_rate": 1.9961005556296875e-05, |
|
"loss": 0.3284, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.059549128030625266, |
|
"grad_norm": 1.797868976276165, |
|
"learning_rate": 1.9955982619502693e-05, |
|
"loss": 0.288, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06125053168864313, |
|
"grad_norm": 1.9470932909182852, |
|
"learning_rate": 1.9950656183027392e-05, |
|
"loss": 0.3571, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.06295193534666099, |
|
"grad_norm": 1.6744432746691509, |
|
"learning_rate": 1.994502640924286e-05, |
|
"loss": 0.3255, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.06465333900467886, |
|
"grad_norm": 1.1682129416657814, |
|
"learning_rate": 1.993909346976798e-05, |
|
"loss": 0.2146, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.06635474266269673, |
|
"grad_norm": 1.6768837338494562, |
|
"learning_rate": 1.993285754546338e-05, |
|
"loss": 0.2801, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.06805614632071459, |
|
"grad_norm": 1.2215370489316386, |
|
"learning_rate": 1.9926318826425905e-05, |
|
"loss": 0.2286, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06975754997873246, |
|
"grad_norm": 1.3879380238369607, |
|
"learning_rate": 1.9919477511982873e-05, |
|
"loss": 0.2383, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.07145895363675032, |
|
"grad_norm": 1.406525770078526, |
|
"learning_rate": 1.991233381068594e-05, |
|
"loss": 0.2455, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.07316035729476818, |
|
"grad_norm": 1.5179467212387738, |
|
"learning_rate": 1.990488794030478e-05, |
|
"loss": 0.2933, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.07486176095278604, |
|
"grad_norm": 1.6385645309602779, |
|
"learning_rate": 1.9897140127820432e-05, |
|
"loss": 0.2177, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.07656316461080391, |
|
"grad_norm": 1.1617109957020149, |
|
"learning_rate": 1.9889090609418384e-05, |
|
"loss": 0.2346, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.07826456826882178, |
|
"grad_norm": 1.2407685878736125, |
|
"learning_rate": 1.9880739630481376e-05, |
|
"loss": 0.206, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.07996597192683964, |
|
"grad_norm": 1.385423536690397, |
|
"learning_rate": 1.9872087445581912e-05, |
|
"loss": 0.2126, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.08166737558485751, |
|
"grad_norm": 1.1282002542462168, |
|
"learning_rate": 1.9863134318474504e-05, |
|
"loss": 0.1781, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.08336877924287538, |
|
"grad_norm": 1.551014126299438, |
|
"learning_rate": 1.985388052208764e-05, |
|
"loss": 0.2023, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.08507018290089324, |
|
"grad_norm": 1.3643327541760562, |
|
"learning_rate": 1.9844326338515444e-05, |
|
"loss": 0.205, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0867715865589111, |
|
"grad_norm": 1.3588992640234894, |
|
"learning_rate": 1.9834472059009097e-05, |
|
"loss": 0.1885, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.08847299021692896, |
|
"grad_norm": 1.1226842944866124, |
|
"learning_rate": 1.982431798396794e-05, |
|
"loss": 0.1957, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.09017439387494683, |
|
"grad_norm": 1.5828760965632824, |
|
"learning_rate": 1.9813864422930345e-05, |
|
"loss": 0.2829, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0918757975329647, |
|
"grad_norm": 1.205011521011545, |
|
"learning_rate": 1.9803111694564246e-05, |
|
"loss": 0.1703, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.09357720119098256, |
|
"grad_norm": 1.2862224239348294, |
|
"learning_rate": 1.9792060126657437e-05, |
|
"loss": 0.1896, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.09527860484900043, |
|
"grad_norm": 0.996017309514186, |
|
"learning_rate": 1.9780710056107587e-05, |
|
"loss": 0.1431, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0969800085070183, |
|
"grad_norm": 1.3079914833792219, |
|
"learning_rate": 1.976906182891197e-05, |
|
"loss": 0.1675, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.09868141216503616, |
|
"grad_norm": 1.2961500263200691, |
|
"learning_rate": 1.97571158001569e-05, |
|
"loss": 0.2208, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.10038281582305401, |
|
"grad_norm": 1.1298713738909105, |
|
"learning_rate": 1.9744872334006936e-05, |
|
"loss": 0.177, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.10208421948107188, |
|
"grad_norm": 1.2798494623271113, |
|
"learning_rate": 1.973233180369374e-05, |
|
"loss": 0.1858, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.10378562313908975, |
|
"grad_norm": 1.1639949502743814, |
|
"learning_rate": 1.9719494591504747e-05, |
|
"loss": 0.1374, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.10548702679710761, |
|
"grad_norm": 1.0484473870712556, |
|
"learning_rate": 1.9706361088771474e-05, |
|
"loss": 0.1738, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.10718843045512548, |
|
"grad_norm": 1.2241999313712146, |
|
"learning_rate": 1.96929316958576e-05, |
|
"loss": 0.1978, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.10888983411314335, |
|
"grad_norm": 1.4772394862229892, |
|
"learning_rate": 1.9679206822146776e-05, |
|
"loss": 0.2373, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.11059123777116121, |
|
"grad_norm": 1.097080265423446, |
|
"learning_rate": 1.9665186886030135e-05, |
|
"loss": 0.1733, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.11229264142917907, |
|
"grad_norm": 0.9660234098323358, |
|
"learning_rate": 1.9650872314893523e-05, |
|
"loss": 0.1725, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.11399404508719693, |
|
"grad_norm": 1.1576510656012562, |
|
"learning_rate": 1.9636263545104498e-05, |
|
"loss": 0.2095, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1156954487452148, |
|
"grad_norm": 1.4925958803001196, |
|
"learning_rate": 1.962136102199901e-05, |
|
"loss": 0.2475, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.11739685240323267, |
|
"grad_norm": 1.2355340147204608, |
|
"learning_rate": 1.9606165199867822e-05, |
|
"loss": 0.147, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.11909825606125053, |
|
"grad_norm": 1.3588414143397163, |
|
"learning_rate": 1.959067654194268e-05, |
|
"loss": 0.247, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1207996597192684, |
|
"grad_norm": 1.2247972500018036, |
|
"learning_rate": 1.9574895520382183e-05, |
|
"loss": 0.2284, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.12250106337728627, |
|
"grad_norm": 1.3201500380185944, |
|
"learning_rate": 1.955882261625737e-05, |
|
"loss": 0.2222, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.12420246703530413, |
|
"grad_norm": 1.0481960090401325, |
|
"learning_rate": 1.9542458319537094e-05, |
|
"loss": 0.1605, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.12590387069332198, |
|
"grad_norm": 0.9723275202327377, |
|
"learning_rate": 1.9525803129073046e-05, |
|
"loss": 0.1214, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.12760527435133986, |
|
"grad_norm": 1.1334387179775511, |
|
"learning_rate": 1.9508857552584574e-05, |
|
"loss": 0.1726, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.12930667800935772, |
|
"grad_norm": 1.3165643507080502, |
|
"learning_rate": 1.9491622106643195e-05, |
|
"loss": 0.1892, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1310080816673756, |
|
"grad_norm": 1.054590261412754, |
|
"learning_rate": 1.9474097316656856e-05, |
|
"loss": 0.1585, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.13270948532539345, |
|
"grad_norm": 1.376136178400177, |
|
"learning_rate": 1.9456283716853906e-05, |
|
"loss": 0.2075, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1344108889834113, |
|
"grad_norm": 1.224747493019771, |
|
"learning_rate": 1.9438181850266815e-05, |
|
"loss": 0.2071, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.13611229264142918, |
|
"grad_norm": 1.1012108063521493, |
|
"learning_rate": 1.941979226871563e-05, |
|
"loss": 0.1641, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.13781369629944704, |
|
"grad_norm": 1.0608643422778392, |
|
"learning_rate": 1.9401115532791134e-05, |
|
"loss": 0.1759, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.13951509995746492, |
|
"grad_norm": 1.166727863718526, |
|
"learning_rate": 1.938215221183777e-05, |
|
"loss": 0.1873, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.14121650361548277, |
|
"grad_norm": 1.2536986042781049, |
|
"learning_rate": 1.936290288393629e-05, |
|
"loss": 0.1715, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.14291790727350065, |
|
"grad_norm": 1.099668804865605, |
|
"learning_rate": 1.9343368135886112e-05, |
|
"loss": 0.2042, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.1446193109315185, |
|
"grad_norm": 0.9910009741184473, |
|
"learning_rate": 1.932354856318746e-05, |
|
"loss": 0.1656, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.14632071458953635, |
|
"grad_norm": 1.7641820391925394, |
|
"learning_rate": 1.9303444770023184e-05, |
|
"loss": 0.2039, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.14802211824755424, |
|
"grad_norm": 1.4259122950718577, |
|
"learning_rate": 1.9283057369240358e-05, |
|
"loss": 0.2408, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.1497235219055721, |
|
"grad_norm": 1.2173305326266373, |
|
"learning_rate": 1.9262386982331596e-05, |
|
"loss": 0.1942, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.15142492556358997, |
|
"grad_norm": 1.16090353646738, |
|
"learning_rate": 1.9241434239416093e-05, |
|
"loss": 0.1882, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.15312632922160782, |
|
"grad_norm": 1.0574403120559566, |
|
"learning_rate": 1.922019977922045e-05, |
|
"loss": 0.1709, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1548277328796257, |
|
"grad_norm": 2.0338394122982755, |
|
"learning_rate": 1.919868424905915e-05, |
|
"loss": 0.1898, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.15652913653764355, |
|
"grad_norm": 1.002950600937973, |
|
"learning_rate": 1.9176888304814882e-05, |
|
"loss": 0.128, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.15823054019566143, |
|
"grad_norm": 1.4905981697484776, |
|
"learning_rate": 1.9154812610918503e-05, |
|
"loss": 0.2261, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.1599319438536793, |
|
"grad_norm": 3.6410188347303554, |
|
"learning_rate": 1.913245784032881e-05, |
|
"loss": 0.1545, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.16163334751169714, |
|
"grad_norm": 0.9791986885782344, |
|
"learning_rate": 1.9109824674512014e-05, |
|
"loss": 0.143, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.16333475116971502, |
|
"grad_norm": 1.616911556183482, |
|
"learning_rate": 1.9086913803420966e-05, |
|
"loss": 0.2317, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.16503615482773287, |
|
"grad_norm": 1.8630327951285053, |
|
"learning_rate": 1.906372592547413e-05, |
|
"loss": 0.2108, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.16673755848575075, |
|
"grad_norm": 1.4145751439464074, |
|
"learning_rate": 1.9040261747534282e-05, |
|
"loss": 0.1645, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.1684389621437686, |
|
"grad_norm": 1.6383401430353326, |
|
"learning_rate": 1.9016521984886984e-05, |
|
"loss": 0.1964, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.1701403658017865, |
|
"grad_norm": 1.1305872234143883, |
|
"learning_rate": 1.8992507361218743e-05, |
|
"loss": 0.1719, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17184176945980434, |
|
"grad_norm": 0.9212110648235973, |
|
"learning_rate": 1.8968218608594987e-05, |
|
"loss": 0.1041, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.1735431731178222, |
|
"grad_norm": 1.4867476390524645, |
|
"learning_rate": 1.8943656467437726e-05, |
|
"loss": 0.2201, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.17524457677584007, |
|
"grad_norm": 1.232901326719632, |
|
"learning_rate": 1.8918821686502992e-05, |
|
"loss": 0.1844, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.17694598043385792, |
|
"grad_norm": 0.9249397462332263, |
|
"learning_rate": 1.8893715022858e-05, |
|
"loss": 0.1266, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.1786473840918758, |
|
"grad_norm": 1.10293476258483, |
|
"learning_rate": 1.886833724185809e-05, |
|
"loss": 0.1702, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.18034878774989366, |
|
"grad_norm": 0.9437280287141773, |
|
"learning_rate": 1.8842689117123377e-05, |
|
"loss": 0.1091, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.18205019140791154, |
|
"grad_norm": 1.1163247821615636, |
|
"learning_rate": 1.8816771430515178e-05, |
|
"loss": 0.1705, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.1837515950659294, |
|
"grad_norm": 1.173349634021317, |
|
"learning_rate": 1.8790584972112174e-05, |
|
"loss": 0.1617, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.18545299872394724, |
|
"grad_norm": 1.6711842060073634, |
|
"learning_rate": 1.876413054018633e-05, |
|
"loss": 0.273, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.18715440238196512, |
|
"grad_norm": 1.241159770325543, |
|
"learning_rate": 1.873740894117854e-05, |
|
"loss": 0.1867, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.18885580603998298, |
|
"grad_norm": 0.9298106529169693, |
|
"learning_rate": 1.8710420989674093e-05, |
|
"loss": 0.1442, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.19055720969800086, |
|
"grad_norm": 0.8514950506084779, |
|
"learning_rate": 1.8683167508377775e-05, |
|
"loss": 0.1394, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.1922586133560187, |
|
"grad_norm": 1.159669352047277, |
|
"learning_rate": 1.8655649328088836e-05, |
|
"loss": 0.1174, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.1939600170140366, |
|
"grad_norm": 1.117625315370739, |
|
"learning_rate": 1.862786728767565e-05, |
|
"loss": 0.1603, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.19566142067205444, |
|
"grad_norm": 1.3416593805973367, |
|
"learning_rate": 1.8599822234050143e-05, |
|
"loss": 0.1875, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.19736282433007232, |
|
"grad_norm": 0.9921414446012113, |
|
"learning_rate": 1.8571515022141974e-05, |
|
"loss": 0.158, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.19906422798809018, |
|
"grad_norm": 1.2663199688342266, |
|
"learning_rate": 1.8542946514872478e-05, |
|
"loss": 0.1639, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.20076563164610803, |
|
"grad_norm": 1.502373927040382, |
|
"learning_rate": 1.851411758312835e-05, |
|
"loss": 0.1102, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.2024670353041259, |
|
"grad_norm": 0.9112340285667759, |
|
"learning_rate": 1.8485029105735112e-05, |
|
"loss": 0.1373, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.20416843896214376, |
|
"grad_norm": 0.9308775387978712, |
|
"learning_rate": 1.8455681969430307e-05, |
|
"loss": 0.1367, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.20586984262016164, |
|
"grad_norm": 1.0498855289840079, |
|
"learning_rate": 1.8426077068836487e-05, |
|
"loss": 0.1484, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.2075712462781795, |
|
"grad_norm": 0.7911114454233587, |
|
"learning_rate": 1.839621530643392e-05, |
|
"loss": 0.117, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.20927264993619737, |
|
"grad_norm": 1.0125626321631322, |
|
"learning_rate": 1.8366097592533095e-05, |
|
"loss": 0.1112, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.21097405359421523, |
|
"grad_norm": 1.2070151224720589, |
|
"learning_rate": 1.8335724845246948e-05, |
|
"loss": 0.2101, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.21267545725223308, |
|
"grad_norm": 0.8912405247679976, |
|
"learning_rate": 1.830509799046292e-05, |
|
"loss": 0.1536, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.21437686091025096, |
|
"grad_norm": 0.943905994134866, |
|
"learning_rate": 1.8274217961814682e-05, |
|
"loss": 0.1234, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.2160782645682688, |
|
"grad_norm": 1.0629115609465312, |
|
"learning_rate": 1.8243085700653698e-05, |
|
"loss": 0.164, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.2177796682262867, |
|
"grad_norm": 0.9167444126586589, |
|
"learning_rate": 1.821170215602053e-05, |
|
"loss": 0.1366, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.21948107188430455, |
|
"grad_norm": 1.3561117487616148, |
|
"learning_rate": 1.818006828461591e-05, |
|
"loss": 0.1491, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.22118247554232243, |
|
"grad_norm": 1.0924892277382052, |
|
"learning_rate": 1.8148185050771554e-05, |
|
"loss": 0.1609, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.22288387920034028, |
|
"grad_norm": 0.8222865380048915, |
|
"learning_rate": 1.8116053426420793e-05, |
|
"loss": 0.1542, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.22458528285835813, |
|
"grad_norm": 0.9936845495660046, |
|
"learning_rate": 1.8083674391068925e-05, |
|
"loss": 0.1603, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.226286686516376, |
|
"grad_norm": 0.9125563797717536, |
|
"learning_rate": 1.8051048931763366e-05, |
|
"loss": 0.1351, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.22798809017439386, |
|
"grad_norm": 0.8848502502479684, |
|
"learning_rate": 1.8018178043063554e-05, |
|
"loss": 0.139, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.22968949383241175, |
|
"grad_norm": 0.8829591378486032, |
|
"learning_rate": 1.798506272701064e-05, |
|
"loss": 0.1071, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2313908974904296, |
|
"grad_norm": 1.2518522764811424, |
|
"learning_rate": 1.795170399309692e-05, |
|
"loss": 0.1845, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.23309230114844748, |
|
"grad_norm": 1.1836157164291814, |
|
"learning_rate": 1.7918102858235103e-05, |
|
"loss": 0.1843, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.23479370480646533, |
|
"grad_norm": 1.0028146137981997, |
|
"learning_rate": 1.7884260346727257e-05, |
|
"loss": 0.1523, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.2364951084644832, |
|
"grad_norm": 0.9312803934338035, |
|
"learning_rate": 1.7850177490233635e-05, |
|
"loss": 0.1505, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.23819651212250106, |
|
"grad_norm": 1.4062348205690975, |
|
"learning_rate": 1.7815855327741185e-05, |
|
"loss": 0.184, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.23989791578051892, |
|
"grad_norm": 1.202402031733493, |
|
"learning_rate": 1.7781294905531908e-05, |
|
"loss": 0.1635, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.2415993194385368, |
|
"grad_norm": 1.0855843964177576, |
|
"learning_rate": 1.774649727715094e-05, |
|
"loss": 0.1504, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.24330072309655465, |
|
"grad_norm": 0.776506687452474, |
|
"learning_rate": 1.7711463503374466e-05, |
|
"loss": 0.0911, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.24500212675457253, |
|
"grad_norm": 0.9545701276843519, |
|
"learning_rate": 1.7676194652177333e-05, |
|
"loss": 0.1245, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.24670353041259038, |
|
"grad_norm": 1.014164593806905, |
|
"learning_rate": 1.764069179870055e-05, |
|
"loss": 0.181, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.24840493407060826, |
|
"grad_norm": 0.7754970071101359, |
|
"learning_rate": 1.760495602521847e-05, |
|
"loss": 0.114, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.2501063377286261, |
|
"grad_norm": 1.1564504154977566, |
|
"learning_rate": 1.756898842110582e-05, |
|
"loss": 0.209, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.25180774138664397, |
|
"grad_norm": 1.1287141860391865, |
|
"learning_rate": 1.753279008280449e-05, |
|
"loss": 0.1298, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.2535091450446618, |
|
"grad_norm": 0.9691449559553242, |
|
"learning_rate": 1.74963621137901e-05, |
|
"loss": 0.1258, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.25521054870267973, |
|
"grad_norm": 1.035658869069728, |
|
"learning_rate": 1.7459705624538383e-05, |
|
"loss": 0.1419, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2569119523606976, |
|
"grad_norm": 1.0753488541101328, |
|
"learning_rate": 1.7422821732491297e-05, |
|
"loss": 0.135, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.25861335601871543, |
|
"grad_norm": 0.8229354151040922, |
|
"learning_rate": 1.7385711562022988e-05, |
|
"loss": 0.1164, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.2603147596767333, |
|
"grad_norm": 1.016446132319777, |
|
"learning_rate": 1.734837624440551e-05, |
|
"loss": 0.1661, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.2620161633347512, |
|
"grad_norm": 0.9558420891892299, |
|
"learning_rate": 1.731081691777434e-05, |
|
"loss": 0.1197, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.26371756699276905, |
|
"grad_norm": 0.9986826502725739, |
|
"learning_rate": 1.7273034727093677e-05, |
|
"loss": 0.1654, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.2654189706507869, |
|
"grad_norm": 1.2437727283048936, |
|
"learning_rate": 1.7235030824121542e-05, |
|
"loss": 0.1472, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.26712037430880475, |
|
"grad_norm": 0.9201621015876281, |
|
"learning_rate": 1.7196806367374656e-05, |
|
"loss": 0.1427, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.2688217779668226, |
|
"grad_norm": 0.8981299082841511, |
|
"learning_rate": 1.7158362522093153e-05, |
|
"loss": 0.1356, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.2705231816248405, |
|
"grad_norm": 0.8438284299933941, |
|
"learning_rate": 1.7119700460205026e-05, |
|
"loss": 0.1537, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.27222458528285837, |
|
"grad_norm": 0.9992590548158468, |
|
"learning_rate": 1.7080821360290426e-05, |
|
"loss": 0.1149, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2739259889408762, |
|
"grad_norm": 1.0293167471941735, |
|
"learning_rate": 1.7041726407545716e-05, |
|
"loss": 0.1018, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.27562739259889407, |
|
"grad_norm": 1.2090694788101504, |
|
"learning_rate": 1.7002416793747354e-05, |
|
"loss": 0.1409, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.2773287962569119, |
|
"grad_norm": 1.0236086029383877, |
|
"learning_rate": 1.696289371721556e-05, |
|
"loss": 0.1702, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.27903019991492983, |
|
"grad_norm": 1.3417300786636221, |
|
"learning_rate": 1.692315838277778e-05, |
|
"loss": 0.2483, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.2807316035729477, |
|
"grad_norm": 1.0989869821812306, |
|
"learning_rate": 1.6883212001731956e-05, |
|
"loss": 0.1782, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.28243300723096554, |
|
"grad_norm": 0.529248875869627, |
|
"learning_rate": 1.6843055791809623e-05, |
|
"loss": 0.0617, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.2841344108889834, |
|
"grad_norm": 0.912576853390358, |
|
"learning_rate": 1.680269097713876e-05, |
|
"loss": 0.1415, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.2858358145470013, |
|
"grad_norm": 0.9909341183986241, |
|
"learning_rate": 1.6762118788206488e-05, |
|
"loss": 0.1234, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.28753721820501915, |
|
"grad_norm": 1.2344432350943482, |
|
"learning_rate": 1.6721340461821555e-05, |
|
"loss": 0.1964, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.289238621863037, |
|
"grad_norm": 1.3663387049811897, |
|
"learning_rate": 1.6680357241076632e-05, |
|
"loss": 0.1883, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.29094002552105486, |
|
"grad_norm": 1.059631703359694, |
|
"learning_rate": 1.6639170375310422e-05, |
|
"loss": 0.1116, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.2926414291790727, |
|
"grad_norm": 1.0508255410491976, |
|
"learning_rate": 1.6597781120069584e-05, |
|
"loss": 0.1662, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.2943428328370906, |
|
"grad_norm": 1.0537186165554597, |
|
"learning_rate": 1.655619073707043e-05, |
|
"loss": 0.1497, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.29604423649510847, |
|
"grad_norm": 0.7500117116797194, |
|
"learning_rate": 1.6514400494160498e-05, |
|
"loss": 0.1014, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.2977456401531263, |
|
"grad_norm": 0.9365762295606946, |
|
"learning_rate": 1.6472411665279872e-05, |
|
"loss": 0.1692, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2994470438111442, |
|
"grad_norm": 0.9936157472994437, |
|
"learning_rate": 1.643022553042237e-05, |
|
"loss": 0.1329, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.3011484474691621, |
|
"grad_norm": 0.9502764427701426, |
|
"learning_rate": 1.6387843375596513e-05, |
|
"loss": 0.1497, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.30284985112717994, |
|
"grad_norm": 0.9034816217862629, |
|
"learning_rate": 1.634526649278632e-05, |
|
"loss": 0.1341, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.3045512547851978, |
|
"grad_norm": 1.2235420334078282, |
|
"learning_rate": 1.630249617991194e-05, |
|
"loss": 0.1691, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.30625265844321564, |
|
"grad_norm": 1.070819167390794, |
|
"learning_rate": 1.6259533740790055e-05, |
|
"loss": 0.1893, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3079540621012335, |
|
"grad_norm": 1.0149277093510711, |
|
"learning_rate": 1.6216380485094164e-05, |
|
"loss": 0.1581, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.3096554657592514, |
|
"grad_norm": 1.2727184403130332, |
|
"learning_rate": 1.617303772831465e-05, |
|
"loss": 0.1918, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.31135686941726926, |
|
"grad_norm": 1.0175891234730905, |
|
"learning_rate": 1.6129506791718665e-05, |
|
"loss": 0.1503, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.3130582730752871, |
|
"grad_norm": 0.8269197562005047, |
|
"learning_rate": 1.6085789002309873e-05, |
|
"loss": 0.1324, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.31475967673330496, |
|
"grad_norm": 1.1533751660178018, |
|
"learning_rate": 1.6041885692787985e-05, |
|
"loss": 0.1574, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.31646108039132287, |
|
"grad_norm": 0.8735591976051513, |
|
"learning_rate": 1.599779820150813e-05, |
|
"loss": 0.1139, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.3181624840493407, |
|
"grad_norm": 6.2499538759448034, |
|
"learning_rate": 1.5953527872440063e-05, |
|
"loss": 0.2183, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.3198638877073586, |
|
"grad_norm": 0.904041081802588, |
|
"learning_rate": 1.5909076055127202e-05, |
|
"loss": 0.1619, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.3215652913653764, |
|
"grad_norm": 1.4719450867461397, |
|
"learning_rate": 1.5864444104645473e-05, |
|
"loss": 0.1781, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.3232666950233943, |
|
"grad_norm": 1.2964548761824104, |
|
"learning_rate": 1.581963338156201e-05, |
|
"loss": 0.2205, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3249680986814122, |
|
"grad_norm": 0.9295125169199553, |
|
"learning_rate": 1.5774645251893673e-05, |
|
"loss": 0.1244, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.32666950233943004, |
|
"grad_norm": 1.0204273767596037, |
|
"learning_rate": 1.5729481087065423e-05, |
|
"loss": 0.1404, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.3283709059974479, |
|
"grad_norm": 0.8337505371214884, |
|
"learning_rate": 1.5684142263868493e-05, |
|
"loss": 0.1473, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.33007230965546575, |
|
"grad_norm": 1.0928052339478989, |
|
"learning_rate": 1.5638630164418435e-05, |
|
"loss": 0.137, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.3317737133134836, |
|
"grad_norm": 1.6830022787415706, |
|
"learning_rate": 1.5592946176112973e-05, |
|
"loss": 0.1891, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.3334751169715015, |
|
"grad_norm": 1.0495686844810206, |
|
"learning_rate": 1.554709169158972e-05, |
|
"loss": 0.184, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.33517652062951936, |
|
"grad_norm": 1.011837190532919, |
|
"learning_rate": 1.550106810868373e-05, |
|
"loss": 0.1265, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.3368779242875372, |
|
"grad_norm": 1.1498721398868388, |
|
"learning_rate": 1.5454876830384868e-05, |
|
"loss": 0.1531, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.33857932794555506, |
|
"grad_norm": 1.2864468646292577, |
|
"learning_rate": 1.540851926479505e-05, |
|
"loss": 0.1889, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.340280731603573, |
|
"grad_norm": 1.0553252849977297, |
|
"learning_rate": 1.536199682508533e-05, |
|
"loss": 0.1345, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3419821352615908, |
|
"grad_norm": 0.889196583250441, |
|
"learning_rate": 1.531531092945279e-05, |
|
"loss": 0.1102, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.3436835389196087, |
|
"grad_norm": 1.1994000234252027, |
|
"learning_rate": 1.526846300107734e-05, |
|
"loss": 0.2369, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.34538494257762653, |
|
"grad_norm": 0.9518805403077071, |
|
"learning_rate": 1.5221454468078336e-05, |
|
"loss": 0.1712, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.3470863462356444, |
|
"grad_norm": 1.2937076434972201, |
|
"learning_rate": 1.5174286763470995e-05, |
|
"loss": 0.1848, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.3487877498936623, |
|
"grad_norm": 0.7845825707725936, |
|
"learning_rate": 1.5126961325122773e-05, |
|
"loss": 0.1097, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.35048915355168014, |
|
"grad_norm": 1.0471712678416134, |
|
"learning_rate": 1.5079479595709493e-05, |
|
"loss": 0.2015, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.352190557209698, |
|
"grad_norm": 0.6389298881241116, |
|
"learning_rate": 1.5031843022671377e-05, |
|
"loss": 0.0863, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.35389196086771585, |
|
"grad_norm": 0.9730117762878676, |
|
"learning_rate": 1.4984053058168936e-05, |
|
"loss": 0.1233, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.35559336452573376, |
|
"grad_norm": 1.0321283116382443, |
|
"learning_rate": 1.4936111159038677e-05, |
|
"loss": 0.1655, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.3572947681837516, |
|
"grad_norm": 1.1161778201265327, |
|
"learning_rate": 1.4888018786748713e-05, |
|
"loss": 0.18, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.35899617184176946, |
|
"grad_norm": 1.240719081282556, |
|
"learning_rate": 1.4839777407354194e-05, |
|
"loss": 0.2124, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.3606975754997873, |
|
"grad_norm": 0.9976402494534861, |
|
"learning_rate": 1.4791388491452637e-05, |
|
"loss": 0.1894, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.36239897915780517, |
|
"grad_norm": 1.1346780693826046, |
|
"learning_rate": 1.4742853514139076e-05, |
|
"loss": 0.181, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.3641003828158231, |
|
"grad_norm": 1.082220133582736, |
|
"learning_rate": 1.4694173954961105e-05, |
|
"loss": 0.1781, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.36580178647384093, |
|
"grad_norm": 0.9799894414710469, |
|
"learning_rate": 1.4645351297873774e-05, |
|
"loss": 0.1446, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.3675031901318588, |
|
"grad_norm": 0.9322926757789775, |
|
"learning_rate": 1.4596387031194354e-05, |
|
"loss": 0.1458, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.36920459378987663, |
|
"grad_norm": 0.9669419010934909, |
|
"learning_rate": 1.4547282647556964e-05, |
|
"loss": 0.1854, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.3709059974478945, |
|
"grad_norm": 0.8073117793398128, |
|
"learning_rate": 1.449803964386706e-05, |
|
"loss": 0.1436, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.3726074011059124, |
|
"grad_norm": 0.7928236938106081, |
|
"learning_rate": 1.4448659521255823e-05, |
|
"loss": 0.134, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.37430880476393025, |
|
"grad_norm": 1.1023563168423864, |
|
"learning_rate": 1.4399143785034388e-05, |
|
"loss": 0.1806, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3760102084219481, |
|
"grad_norm": 0.8661885088173654, |
|
"learning_rate": 1.4349493944647953e-05, |
|
"loss": 0.1252, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.37771161207996595, |
|
"grad_norm": 1.297973502891389, |
|
"learning_rate": 1.4299711513629759e-05, |
|
"loss": 0.1964, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.37941301573798386, |
|
"grad_norm": 1.1335119776385323, |
|
"learning_rate": 1.4249798009554979e-05, |
|
"loss": 0.1982, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.3811144193960017, |
|
"grad_norm": 1.1726247095139843, |
|
"learning_rate": 1.419975495399442e-05, |
|
"loss": 0.199, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.38281582305401957, |
|
"grad_norm": 0.8891933996157408, |
|
"learning_rate": 1.4149583872468165e-05, |
|
"loss": 0.1292, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3845172267120374, |
|
"grad_norm": 0.7603931038731861, |
|
"learning_rate": 1.4099286294399051e-05, |
|
"loss": 0.0952, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.38621863037005527, |
|
"grad_norm": 1.1672152863793608, |
|
"learning_rate": 1.404886375306607e-05, |
|
"loss": 0.1611, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.3879200340280732, |
|
"grad_norm": 0.8620430776020629, |
|
"learning_rate": 1.3998317785557597e-05, |
|
"loss": 0.1348, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.38962143768609103, |
|
"grad_norm": 0.9552569121402584, |
|
"learning_rate": 1.3947649932724563e-05, |
|
"loss": 0.1726, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.3913228413441089, |
|
"grad_norm": 0.6526188073680776, |
|
"learning_rate": 1.3896861739133456e-05, |
|
"loss": 0.0828, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.39302424500212674, |
|
"grad_norm": 1.1261254351408145, |
|
"learning_rate": 1.384595475301926e-05, |
|
"loss": 0.1666, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.39472564866014465, |
|
"grad_norm": 1.0213490251792958, |
|
"learning_rate": 1.3794930526238246e-05, |
|
"loss": 0.1238, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.3964270523181625, |
|
"grad_norm": 1.0422843362362886, |
|
"learning_rate": 1.3743790614220664e-05, |
|
"loss": 0.1542, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.39812845597618035, |
|
"grad_norm": 1.1712283445792941, |
|
"learning_rate": 1.3692536575923334e-05, |
|
"loss": 0.1619, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.3998298596341982, |
|
"grad_norm": 0.9722112293600863, |
|
"learning_rate": 1.3641169973782117e-05, |
|
"loss": 0.1487, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.40153126329221606, |
|
"grad_norm": 0.9748390707529867, |
|
"learning_rate": 1.3589692373664288e-05, |
|
"loss": 0.1281, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.40323266695023396, |
|
"grad_norm": 1.5657080809883168, |
|
"learning_rate": 1.3538105344820798e-05, |
|
"loss": 0.1728, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.4049340706082518, |
|
"grad_norm": 0.8408747728353582, |
|
"learning_rate": 1.3486410459838448e-05, |
|
"loss": 0.1316, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.40663547426626967, |
|
"grad_norm": 0.9029876737747633, |
|
"learning_rate": 1.343460929459193e-05, |
|
"loss": 0.157, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.4083368779242875, |
|
"grad_norm": 1.1908098440690353, |
|
"learning_rate": 1.3382703428195812e-05, |
|
"loss": 0.2164, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.4100382815823054, |
|
"grad_norm": 0.7354672073569528, |
|
"learning_rate": 1.3330694442956376e-05, |
|
"loss": 0.1052, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.4117396852403233, |
|
"grad_norm": 0.7769967136278536, |
|
"learning_rate": 1.3278583924323405e-05, |
|
"loss": 0.1007, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.41344108889834114, |
|
"grad_norm": 0.7667344440078264, |
|
"learning_rate": 1.3226373460841835e-05, |
|
"loss": 0.1383, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.415142492556359, |
|
"grad_norm": 0.795735262361974, |
|
"learning_rate": 1.3174064644103334e-05, |
|
"loss": 0.1151, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.41684389621437684, |
|
"grad_norm": 0.6474341608732114, |
|
"learning_rate": 1.3121659068697797e-05, |
|
"loss": 0.0632, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.41854529987239475, |
|
"grad_norm": 1.2790933846234822, |
|
"learning_rate": 1.306915833216471e-05, |
|
"loss": 0.1294, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.4202467035304126, |
|
"grad_norm": 1.0004086926301767, |
|
"learning_rate": 1.3016564034944473e-05, |
|
"loss": 0.1272, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.42194810718843045, |
|
"grad_norm": 1.2932792230934473, |
|
"learning_rate": 1.29638777803296e-05, |
|
"loss": 0.201, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.4236495108464483, |
|
"grad_norm": 0.8319761692099532, |
|
"learning_rate": 1.2911101174415861e-05, |
|
"loss": 0.1297, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.42535091450446616, |
|
"grad_norm": 0.9446815368539271, |
|
"learning_rate": 1.2858235826053294e-05, |
|
"loss": 0.1336, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.42705231816248407, |
|
"grad_norm": 1.016975563258099, |
|
"learning_rate": 1.2805283346797179e-05, |
|
"loss": 0.1411, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.4287537218205019, |
|
"grad_norm": 0.732577169895616, |
|
"learning_rate": 1.2752245350858905e-05, |
|
"loss": 0.1278, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.4304551254785198, |
|
"grad_norm": 0.6594182477673913, |
|
"learning_rate": 1.2699123455056777e-05, |
|
"loss": 0.0643, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.4321565291365376, |
|
"grad_norm": 1.3288334602081366, |
|
"learning_rate": 1.26459192787667e-05, |
|
"loss": 0.1937, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.43385793279455553, |
|
"grad_norm": 0.9662932023952568, |
|
"learning_rate": 1.2592634443872842e-05, |
|
"loss": 0.1635, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.4355593364525734, |
|
"grad_norm": 1.0490078641576344, |
|
"learning_rate": 1.2539270574718172e-05, |
|
"loss": 0.1725, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.43726074011059124, |
|
"grad_norm": 1.024114729528168, |
|
"learning_rate": 1.2485829298054952e-05, |
|
"loss": 0.1616, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.4389621437686091, |
|
"grad_norm": 1.032340498139421, |
|
"learning_rate": 1.2432312242995158e-05, |
|
"loss": 0.2006, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.44066354742662694, |
|
"grad_norm": 0.9662483951159501, |
|
"learning_rate": 1.2378721040960788e-05, |
|
"loss": 0.2017, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.44236495108464485, |
|
"grad_norm": 1.7452442995933304, |
|
"learning_rate": 1.232505732563416e-05, |
|
"loss": 0.2189, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4440663547426627, |
|
"grad_norm": 1.4701622612115262, |
|
"learning_rate": 1.2271322732908091e-05, |
|
"loss": 0.1274, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.44576775840068056, |
|
"grad_norm": 1.3198760014504118, |
|
"learning_rate": 1.2217518900836045e-05, |
|
"loss": 0.1733, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.4474691620586984, |
|
"grad_norm": 0.7648660349856883, |
|
"learning_rate": 1.2163647469582181e-05, |
|
"loss": 0.0914, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.44917056571671626, |
|
"grad_norm": 1.007435841972258, |
|
"learning_rate": 1.210971008137136e-05, |
|
"loss": 0.1805, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.45087196937473417, |
|
"grad_norm": 1.215388205761567, |
|
"learning_rate": 1.2055708380439089e-05, |
|
"loss": 0.2073, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.452573373032752, |
|
"grad_norm": 1.1544616644141075, |
|
"learning_rate": 1.2001644012981392e-05, |
|
"loss": 0.1398, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.4542747766907699, |
|
"grad_norm": 1.0533254551023599, |
|
"learning_rate": 1.1947518627104637e-05, |
|
"loss": 0.1951, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.45597618034878773, |
|
"grad_norm": 0.9812187726267535, |
|
"learning_rate": 1.1893333872775275e-05, |
|
"loss": 0.1742, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.45767758400680564, |
|
"grad_norm": 0.7325445025398932, |
|
"learning_rate": 1.1839091401769559e-05, |
|
"loss": 0.1051, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.4593789876648235, |
|
"grad_norm": 0.8301737461261519, |
|
"learning_rate": 1.1784792867623179e-05, |
|
"loss": 0.1138, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.46108039132284134, |
|
"grad_norm": 0.9822341756525031, |
|
"learning_rate": 1.1730439925580876e-05, |
|
"loss": 0.1822, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.4627817949808592, |
|
"grad_norm": 1.1111507637182583, |
|
"learning_rate": 1.1676034232545963e-05, |
|
"loss": 0.1669, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.46448319863887705, |
|
"grad_norm": 1.1574890172808852, |
|
"learning_rate": 1.1621577447029816e-05, |
|
"loss": 0.1194, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.46618460229689496, |
|
"grad_norm": 2.0493950053963674, |
|
"learning_rate": 1.1567071229101332e-05, |
|
"loss": 0.1955, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.4678860059549128, |
|
"grad_norm": 0.9605842671373481, |
|
"learning_rate": 1.1512517240336304e-05, |
|
"loss": 0.1463, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.46958740961293066, |
|
"grad_norm": 0.9613890557759487, |
|
"learning_rate": 1.1457917143766786e-05, |
|
"loss": 0.1348, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.4712888132709485, |
|
"grad_norm": 0.8592623226268763, |
|
"learning_rate": 1.1403272603830384e-05, |
|
"loss": 0.122, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.4729902169289664, |
|
"grad_norm": 0.823977210047947, |
|
"learning_rate": 1.1348585286319529e-05, |
|
"loss": 0.0959, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.4746916205869843, |
|
"grad_norm": 1.2394663199353901, |
|
"learning_rate": 1.1293856858330678e-05, |
|
"loss": 0.1515, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.47639302424500213, |
|
"grad_norm": 0.9415416594734269, |
|
"learning_rate": 1.1239088988213522e-05, |
|
"loss": 0.1402, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.47809442790302, |
|
"grad_norm": 0.6692152879666028, |
|
"learning_rate": 1.11842833455201e-05, |
|
"loss": 0.1038, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.47979583156103783, |
|
"grad_norm": 1.279963114211638, |
|
"learning_rate": 1.1129441600953916e-05, |
|
"loss": 0.179, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.48149723521905574, |
|
"grad_norm": 1.0561646858074096, |
|
"learning_rate": 1.1074565426319014e-05, |
|
"loss": 0.1793, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.4831986388770736, |
|
"grad_norm": 0.9873861580062439, |
|
"learning_rate": 1.101965649446901e-05, |
|
"loss": 0.1266, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.48490004253509145, |
|
"grad_norm": 1.3992617370873837, |
|
"learning_rate": 1.0964716479256094e-05, |
|
"loss": 0.1779, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.4866014461931093, |
|
"grad_norm": 0.6047306625037067, |
|
"learning_rate": 1.0909747055480004e-05, |
|
"loss": 0.0748, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.4883028498511272, |
|
"grad_norm": 0.9243553903907195, |
|
"learning_rate": 1.0854749898836974e-05, |
|
"loss": 0.0768, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.49000425350914506, |
|
"grad_norm": 0.981134680334624, |
|
"learning_rate": 1.0799726685868648e-05, |
|
"loss": 0.1082, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.4917056571671629, |
|
"grad_norm": 0.8947071035708567, |
|
"learning_rate": 1.0744679093910987e-05, |
|
"loss": 0.1516, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.49340706082518077, |
|
"grad_norm": 0.9633671611183833, |
|
"learning_rate": 1.0689608801043107e-05, |
|
"loss": 0.1431, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4951084644831986, |
|
"grad_norm": 0.9783028097642653, |
|
"learning_rate": 1.063451748603616e-05, |
|
"loss": 0.1725, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.4968098681412165, |
|
"grad_norm": 1.1011125326638287, |
|
"learning_rate": 1.0579406828302124e-05, |
|
"loss": 0.1559, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.4985112717992344, |
|
"grad_norm": 1.3678905512011608, |
|
"learning_rate": 1.0524278507842637e-05, |
|
"loss": 0.2389, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.5002126754572522, |
|
"grad_norm": 0.9590558986038028, |
|
"learning_rate": 1.0469134205197762e-05, |
|
"loss": 0.167, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.5019140791152701, |
|
"grad_norm": 0.7852144914992837, |
|
"learning_rate": 1.0413975601394765e-05, |
|
"loss": 0.14, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5036154827732879, |
|
"grad_norm": 0.9312639100976492, |
|
"learning_rate": 1.0358804377896876e-05, |
|
"loss": 0.1787, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.5053168864313058, |
|
"grad_norm": 1.1537236217667737, |
|
"learning_rate": 1.0303622216552022e-05, |
|
"loss": 0.1578, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.5070182900893236, |
|
"grad_norm": 0.6075562366273578, |
|
"learning_rate": 1.0248430799541564e-05, |
|
"loss": 0.0764, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.5087196937473416, |
|
"grad_norm": 1.1242541214557573, |
|
"learning_rate": 1.019323180932901e-05, |
|
"loss": 0.1921, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.5104210974053595, |
|
"grad_norm": 1.4234304726043698, |
|
"learning_rate": 1.013802692860873e-05, |
|
"loss": 0.1666, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5121225010633773, |
|
"grad_norm": 1.2757943782273478, |
|
"learning_rate": 1.0082817840254667e-05, |
|
"loss": 0.1887, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.5138239047213952, |
|
"grad_norm": 0.996005178297661, |
|
"learning_rate": 1.0027606227269026e-05, |
|
"loss": 0.1747, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.515525308379413, |
|
"grad_norm": 0.7387713580838607, |
|
"learning_rate": 9.972393772730975e-06, |
|
"loss": 0.1085, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.5172267120374309, |
|
"grad_norm": 1.2995451243064031, |
|
"learning_rate": 9.917182159745335e-06, |
|
"loss": 0.1821, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.5189281156954487, |
|
"grad_norm": 1.0921442436054, |
|
"learning_rate": 9.861973071391272e-06, |
|
"loss": 0.1875, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.5206295193534666, |
|
"grad_norm": 0.8460606336221084, |
|
"learning_rate": 9.806768190670994e-06, |
|
"loss": 0.128, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.5223309230114844, |
|
"grad_norm": 1.3555278122778684, |
|
"learning_rate": 9.751569200458438e-06, |
|
"loss": 0.2287, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.5240323266695024, |
|
"grad_norm": 0.8944437738286967, |
|
"learning_rate": 9.69637778344798e-06, |
|
"loss": 0.1808, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.5257337303275202, |
|
"grad_norm": 1.0653760514010453, |
|
"learning_rate": 9.641195622103126e-06, |
|
"loss": 0.1212, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.5274351339855381, |
|
"grad_norm": 0.9102431997252817, |
|
"learning_rate": 9.586024398605238e-06, |
|
"loss": 0.1433, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.529136537643556, |
|
"grad_norm": 0.6764029808372342, |
|
"learning_rate": 9.530865794802243e-06, |
|
"loss": 0.0951, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.5308379413015738, |
|
"grad_norm": 0.9520815336747558, |
|
"learning_rate": 9.475721492157365e-06, |
|
"loss": 0.1077, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.5325393449595917, |
|
"grad_norm": 1.063523212464188, |
|
"learning_rate": 9.420593171697876e-06, |
|
"loss": 0.2007, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.5342407486176095, |
|
"grad_norm": 1.0407079644610695, |
|
"learning_rate": 9.365482513963844e-06, |
|
"loss": 0.1712, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.5359421522756274, |
|
"grad_norm": 0.8825609862775818, |
|
"learning_rate": 9.310391198956896e-06, |
|
"loss": 0.1273, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.5376435559336452, |
|
"grad_norm": 1.938917918187333, |
|
"learning_rate": 9.255320906089017e-06, |
|
"loss": 0.0991, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.5393449595916632, |
|
"grad_norm": 0.7107490419953799, |
|
"learning_rate": 9.200273314131356e-06, |
|
"loss": 0.0971, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.541046363249681, |
|
"grad_norm": 0.8846209277092395, |
|
"learning_rate": 9.145250101163032e-06, |
|
"loss": 0.113, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.5427477669076989, |
|
"grad_norm": 1.4146454732178861, |
|
"learning_rate": 9.090252944520002e-06, |
|
"loss": 0.2643, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.5444491705657167, |
|
"grad_norm": 0.8100753678201862, |
|
"learning_rate": 9.035283520743911e-06, |
|
"loss": 0.1318, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5461505742237346, |
|
"grad_norm": 0.8667951679113716, |
|
"learning_rate": 8.980343505530988e-06, |
|
"loss": 0.1278, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.5478519778817524, |
|
"grad_norm": 1.147668283457312, |
|
"learning_rate": 8.925434573680986e-06, |
|
"loss": 0.1609, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.5495533815397703, |
|
"grad_norm": 1.131156467858024, |
|
"learning_rate": 8.870558399046086e-06, |
|
"loss": 0.146, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.5512547851977881, |
|
"grad_norm": 0.86377239081248, |
|
"learning_rate": 8.815716654479903e-06, |
|
"loss": 0.1308, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.552956188855806, |
|
"grad_norm": 0.9720608614924628, |
|
"learning_rate": 8.76091101178648e-06, |
|
"loss": 0.165, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5546575925138238, |
|
"grad_norm": 0.8608166282424758, |
|
"learning_rate": 8.706143141669324e-06, |
|
"loss": 0.1218, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.5563589961718418, |
|
"grad_norm": 1.1970393819234055, |
|
"learning_rate": 8.651414713680474e-06, |
|
"loss": 0.1962, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.5580603998298597, |
|
"grad_norm": 0.8612813600703227, |
|
"learning_rate": 8.59672739616962e-06, |
|
"loss": 0.1417, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.5597618034878775, |
|
"grad_norm": 1.0102595912727714, |
|
"learning_rate": 8.542082856233216e-06, |
|
"loss": 0.1564, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.5614632071458954, |
|
"grad_norm": 0.9406498527873578, |
|
"learning_rate": 8.487482759663696e-06, |
|
"loss": 0.149, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5631646108039132, |
|
"grad_norm": 0.8125094662451422, |
|
"learning_rate": 8.43292877089867e-06, |
|
"loss": 0.1179, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.5648660144619311, |
|
"grad_norm": 1.021325838301208, |
|
"learning_rate": 8.378422552970185e-06, |
|
"loss": 0.1969, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.5665674181199489, |
|
"grad_norm": 0.8810448268824426, |
|
"learning_rate": 8.32396576745404e-06, |
|
"loss": 0.1531, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.5682688217779668, |
|
"grad_norm": 0.8255230324993776, |
|
"learning_rate": 8.269560074419126e-06, |
|
"loss": 0.1082, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.5699702254359846, |
|
"grad_norm": 1.133323698725157, |
|
"learning_rate": 8.215207132376824e-06, |
|
"loss": 0.1874, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.5716716290940026, |
|
"grad_norm": 0.9575951327422774, |
|
"learning_rate": 8.160908598230448e-06, |
|
"loss": 0.1554, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.5733730327520205, |
|
"grad_norm": 0.7406219285140028, |
|
"learning_rate": 8.10666612722473e-06, |
|
"loss": 0.121, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.5750744364100383, |
|
"grad_norm": 0.7152136920800151, |
|
"learning_rate": 8.052481372895363e-06, |
|
"loss": 0.1093, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.5767758400680562, |
|
"grad_norm": 1.0053517443579167, |
|
"learning_rate": 7.998355987018606e-06, |
|
"loss": 0.193, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.578477243726074, |
|
"grad_norm": 0.761575926299739, |
|
"learning_rate": 7.944291619560914e-06, |
|
"loss": 0.0975, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5801786473840919, |
|
"grad_norm": 0.7751700853107011, |
|
"learning_rate": 7.890289918628644e-06, |
|
"loss": 0.1028, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.5818800510421097, |
|
"grad_norm": 0.8869639117177723, |
|
"learning_rate": 7.836352530417824e-06, |
|
"loss": 0.1134, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.5835814547001276, |
|
"grad_norm": 1.0462667085672495, |
|
"learning_rate": 7.782481099163958e-06, |
|
"loss": 0.1548, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.5852828583581454, |
|
"grad_norm": 0.7001102008399384, |
|
"learning_rate": 7.728677267091912e-06, |
|
"loss": 0.11, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.5869842620161634, |
|
"grad_norm": 0.7186481834763698, |
|
"learning_rate": 7.674942674365847e-06, |
|
"loss": 0.1133, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.5886856656741812, |
|
"grad_norm": 0.6682605278304701, |
|
"learning_rate": 7.621278959039217e-06, |
|
"loss": 0.093, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.5903870693321991, |
|
"grad_norm": 0.9377055159259335, |
|
"learning_rate": 7.567687757004843e-06, |
|
"loss": 0.0935, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.5920884729902169, |
|
"grad_norm": 0.9075218346520041, |
|
"learning_rate": 7.514170701945047e-06, |
|
"loss": 0.1305, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.5937898766482348, |
|
"grad_norm": 1.2321152146728744, |
|
"learning_rate": 7.460729425281831e-06, |
|
"loss": 0.1567, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.5954912803062526, |
|
"grad_norm": 0.6819589386563149, |
|
"learning_rate": 7.407365556127162e-06, |
|
"loss": 0.1071, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5971926839642705, |
|
"grad_norm": 0.8602007969363553, |
|
"learning_rate": 7.354080721233303e-06, |
|
"loss": 0.0992, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.5988940876222884, |
|
"grad_norm": 0.8186635068032451, |
|
"learning_rate": 7.300876544943227e-06, |
|
"loss": 0.1026, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.6005954912803062, |
|
"grad_norm": 0.9440660658297884, |
|
"learning_rate": 7.247754649141097e-06, |
|
"loss": 0.1397, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.6022968949383242, |
|
"grad_norm": 0.7406658935971615, |
|
"learning_rate": 7.194716653202826e-06, |
|
"loss": 0.1235, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.603998298596342, |
|
"grad_norm": 1.023190929228503, |
|
"learning_rate": 7.1417641739467104e-06, |
|
"loss": 0.1499, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.6056997022543599, |
|
"grad_norm": 0.9452102068231549, |
|
"learning_rate": 7.088898825584139e-06, |
|
"loss": 0.1302, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.6074011059123777, |
|
"grad_norm": 0.7457983213357819, |
|
"learning_rate": 7.036122219670398e-06, |
|
"loss": 0.1384, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.6091025095703956, |
|
"grad_norm": 0.7495503225450667, |
|
"learning_rate": 6.9834359650555305e-06, |
|
"loss": 0.1231, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.6108039132284134, |
|
"grad_norm": 0.7997916558501216, |
|
"learning_rate": 6.930841667835295e-06, |
|
"loss": 0.1194, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.6125053168864313, |
|
"grad_norm": 0.9510354173960561, |
|
"learning_rate": 6.878340931302208e-06, |
|
"loss": 0.1495, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.6142067205444491, |
|
"grad_norm": 0.9065606885758422, |
|
"learning_rate": 6.825935355896669e-06, |
|
"loss": 0.1441, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.615908124202467, |
|
"grad_norm": 1.2836448147049502, |
|
"learning_rate": 6.773626539158171e-06, |
|
"loss": 0.2146, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.617609527860485, |
|
"grad_norm": 0.9624335481976285, |
|
"learning_rate": 6.721416075676601e-06, |
|
"loss": 0.1606, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.6193109315185028, |
|
"grad_norm": 0.6357303231994696, |
|
"learning_rate": 6.669305557043626e-06, |
|
"loss": 0.0998, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.6210123351765207, |
|
"grad_norm": 1.3683425531744842, |
|
"learning_rate": 6.617296571804191e-06, |
|
"loss": 0.2092, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.6227137388345385, |
|
"grad_norm": 0.9206216658476813, |
|
"learning_rate": 6.565390705408072e-06, |
|
"loss": 0.1049, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.6244151424925564, |
|
"grad_norm": 1.0820482105025058, |
|
"learning_rate": 6.513589540161556e-06, |
|
"loss": 0.1302, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.6261165461505742, |
|
"grad_norm": 0.7483207917292378, |
|
"learning_rate": 6.461894655179204e-06, |
|
"loss": 0.1249, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.6278179498085921, |
|
"grad_norm": 1.1325083196229184, |
|
"learning_rate": 6.410307626335717e-06, |
|
"loss": 0.159, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.6295193534666099, |
|
"grad_norm": 1.243950543231958, |
|
"learning_rate": 6.358830026217887e-06, |
|
"loss": 0.179, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6312207571246278, |
|
"grad_norm": 0.9764890716406548, |
|
"learning_rate": 6.30746342407667e-06, |
|
"loss": 0.1553, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.6329221607826457, |
|
"grad_norm": 0.876268059085391, |
|
"learning_rate": 6.256209385779341e-06, |
|
"loss": 0.1273, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.6346235644406636, |
|
"grad_norm": 1.0844520376132052, |
|
"learning_rate": 6.205069473761756e-06, |
|
"loss": 0.1335, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.6363249680986814, |
|
"grad_norm": 1.0020799137598357, |
|
"learning_rate": 6.154045246980742e-06, |
|
"loss": 0.1193, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.6380263717566993, |
|
"grad_norm": 0.855728002090308, |
|
"learning_rate": 6.1031382608665456e-06, |
|
"loss": 0.1108, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.6397277754147171, |
|
"grad_norm": 0.8773888809471199, |
|
"learning_rate": 6.052350067275441e-06, |
|
"loss": 0.1282, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.641429179072735, |
|
"grad_norm": 0.9006481684565109, |
|
"learning_rate": 6.001682214442406e-06, |
|
"loss": 0.1301, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.6431305827307529, |
|
"grad_norm": 0.9584790584065768, |
|
"learning_rate": 5.951136246933933e-06, |
|
"loss": 0.1487, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.6448319863887707, |
|
"grad_norm": 1.104605217113324, |
|
"learning_rate": 5.900713705600951e-06, |
|
"loss": 0.1759, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.6465333900467886, |
|
"grad_norm": 0.9879344636473479, |
|
"learning_rate": 5.850416127531841e-06, |
|
"loss": 0.1377, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6482347937048064, |
|
"grad_norm": 1.1463063310140142, |
|
"learning_rate": 5.800245046005585e-06, |
|
"loss": 0.1488, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.6499361973628244, |
|
"grad_norm": 0.8327925347275126, |
|
"learning_rate": 5.750201990445024e-06, |
|
"loss": 0.1441, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.6516376010208422, |
|
"grad_norm": 0.789671883690968, |
|
"learning_rate": 5.70028848637024e-06, |
|
"loss": 0.1302, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.6533390046788601, |
|
"grad_norm": 0.9198435092469637, |
|
"learning_rate": 5.650506055352052e-06, |
|
"loss": 0.1164, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.6550404083368779, |
|
"grad_norm": 0.9179626567799303, |
|
"learning_rate": 5.600856214965613e-06, |
|
"loss": 0.1362, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.6567418119948958, |
|
"grad_norm": 0.8767201265948665, |
|
"learning_rate": 5.551340478744176e-06, |
|
"loss": 0.154, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.6584432156529136, |
|
"grad_norm": 0.9544128683831228, |
|
"learning_rate": 5.501960356132945e-06, |
|
"loss": 0.156, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.6601446193109315, |
|
"grad_norm": 0.9549458997168718, |
|
"learning_rate": 5.4527173524430395e-06, |
|
"loss": 0.1645, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.6618460229689493, |
|
"grad_norm": 0.8850775843370307, |
|
"learning_rate": 5.403612968805649e-06, |
|
"loss": 0.0994, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.6635474266269672, |
|
"grad_norm": 1.1024037997608411, |
|
"learning_rate": 5.354648702126229e-06, |
|
"loss": 0.1951, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6652488302849852, |
|
"grad_norm": 1.2094664207353945, |
|
"learning_rate": 5.305826045038899e-06, |
|
"loss": 0.1328, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.666950233943003, |
|
"grad_norm": 0.9462460124134728, |
|
"learning_rate": 5.257146485860927e-06, |
|
"loss": 0.1769, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.6686516376010209, |
|
"grad_norm": 0.8786684281110547, |
|
"learning_rate": 5.208611508547367e-06, |
|
"loss": 0.146, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.6703530412590387, |
|
"grad_norm": 0.7842667507919817, |
|
"learning_rate": 5.160222592645808e-06, |
|
"loss": 0.117, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.6720544449170566, |
|
"grad_norm": 1.1357067632204891, |
|
"learning_rate": 5.111981213251293e-06, |
|
"loss": 0.1792, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.6737558485750744, |
|
"grad_norm": 0.9388409193539972, |
|
"learning_rate": 5.063888840961325e-06, |
|
"loss": 0.1562, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.6754572522330923, |
|
"grad_norm": 0.801123367757538, |
|
"learning_rate": 5.015946941831064e-06, |
|
"loss": 0.1296, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.6771586558911101, |
|
"grad_norm": 0.8535813470137829, |
|
"learning_rate": 4.968156977328626e-06, |
|
"loss": 0.1484, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.678860059549128, |
|
"grad_norm": 1.2248435999982104, |
|
"learning_rate": 4.920520404290512e-06, |
|
"loss": 0.1464, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.680561463207146, |
|
"grad_norm": 0.7210259524074228, |
|
"learning_rate": 4.87303867487723e-06, |
|
"loss": 0.1224, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6822628668651638, |
|
"grad_norm": 0.7005598922136381, |
|
"learning_rate": 4.825713236529005e-06, |
|
"loss": 0.0804, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.6839642705231816, |
|
"grad_norm": 0.7084992648825957, |
|
"learning_rate": 4.778545531921668e-06, |
|
"loss": 0.1118, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.6856656741811995, |
|
"grad_norm": 0.7988193456386986, |
|
"learning_rate": 4.731536998922657e-06, |
|
"loss": 0.1481, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.6873670778392174, |
|
"grad_norm": 0.5955639940984955, |
|
"learning_rate": 4.684689070547216e-06, |
|
"loss": 0.0999, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.6890684814972352, |
|
"grad_norm": 0.5771691889562411, |
|
"learning_rate": 4.638003174914675e-06, |
|
"loss": 0.0875, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.6907698851552531, |
|
"grad_norm": 0.9925814202496841, |
|
"learning_rate": 4.591480735204953e-06, |
|
"loss": 0.1349, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.6924712888132709, |
|
"grad_norm": 0.8994659904668703, |
|
"learning_rate": 4.545123169615134e-06, |
|
"loss": 0.1548, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.6941726924712888, |
|
"grad_norm": 0.9580142998259727, |
|
"learning_rate": 4.49893189131627e-06, |
|
"loss": 0.1587, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.6958740961293067, |
|
"grad_norm": 0.6788465105557742, |
|
"learning_rate": 4.45290830841028e-06, |
|
"loss": 0.0926, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.6975754997873246, |
|
"grad_norm": 0.9024039513933189, |
|
"learning_rate": 4.407053823887033e-06, |
|
"loss": 0.1529, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6992769034453424, |
|
"grad_norm": 0.8820937053345296, |
|
"learning_rate": 4.361369835581569e-06, |
|
"loss": 0.1462, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.7009783071033603, |
|
"grad_norm": 0.895339461723458, |
|
"learning_rate": 4.315857736131508e-06, |
|
"loss": 0.122, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.7026797107613781, |
|
"grad_norm": 0.9930787706041434, |
|
"learning_rate": 4.2705189129345814e-06, |
|
"loss": 0.1714, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.704381114419396, |
|
"grad_norm": 0.7941569999620287, |
|
"learning_rate": 4.225354748106328e-06, |
|
"loss": 0.1183, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.7060825180774138, |
|
"grad_norm": 1.045533327522596, |
|
"learning_rate": 4.180366618437996e-06, |
|
"loss": 0.1748, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.7077839217354317, |
|
"grad_norm": 0.8163342078381849, |
|
"learning_rate": 4.13555589535453e-06, |
|
"loss": 0.1101, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.7094853253934496, |
|
"grad_norm": 0.6685227131497675, |
|
"learning_rate": 4.0909239448727985e-06, |
|
"loss": 0.1116, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.7111867290514675, |
|
"grad_norm": 1.042711187603995, |
|
"learning_rate": 4.046472127559937e-06, |
|
"loss": 0.1476, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.7128881327094854, |
|
"grad_norm": 0.7990571644153376, |
|
"learning_rate": 4.002201798491875e-06, |
|
"loss": 0.1241, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.7145895363675032, |
|
"grad_norm": 0.8607452302823387, |
|
"learning_rate": 3.958114307212018e-06, |
|
"loss": 0.1479, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.7162909400255211, |
|
"grad_norm": 0.9017446318408956, |
|
"learning_rate": 3.91421099769013e-06, |
|
"loss": 0.1281, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.7179923436835389, |
|
"grad_norm": 0.8608148447694013, |
|
"learning_rate": 3.870493208281337e-06, |
|
"loss": 0.1392, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.7196937473415568, |
|
"grad_norm": 0.9732198553868028, |
|
"learning_rate": 3.826962271685351e-06, |
|
"loss": 0.1443, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.7213951509995746, |
|
"grad_norm": 0.7026930248963273, |
|
"learning_rate": 3.7836195149058386e-06, |
|
"loss": 0.1159, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.7230965546575925, |
|
"grad_norm": 1.0616878292696266, |
|
"learning_rate": 3.7404662592099483e-06, |
|
"loss": 0.178, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.7247979583156103, |
|
"grad_norm": 0.879922946105996, |
|
"learning_rate": 3.697503820088063e-06, |
|
"loss": 0.1345, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.7264993619736282, |
|
"grad_norm": 0.8118424189657202, |
|
"learning_rate": 3.654733507213678e-06, |
|
"loss": 0.1107, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.7282007656316462, |
|
"grad_norm": 1.0124821315311636, |
|
"learning_rate": 3.61215662440349e-06, |
|
"loss": 0.1597, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.729902169289664, |
|
"grad_norm": 0.584812477581454, |
|
"learning_rate": 3.5697744695776326e-06, |
|
"loss": 0.0792, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.7316035729476819, |
|
"grad_norm": 1.0457097189478148, |
|
"learning_rate": 3.5275883347201336e-06, |
|
"loss": 0.1489, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7333049766056997, |
|
"grad_norm": 0.9729869947589316, |
|
"learning_rate": 3.4855995058395066e-06, |
|
"loss": 0.1275, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.7350063802637176, |
|
"grad_norm": 0.8868662034897864, |
|
"learning_rate": 3.443809262929575e-06, |
|
"loss": 0.168, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.7367077839217354, |
|
"grad_norm": 0.9125187516121847, |
|
"learning_rate": 3.4022188799304214e-06, |
|
"loss": 0.1623, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.7384091875797533, |
|
"grad_norm": 0.8317592980564871, |
|
"learning_rate": 3.36082962468958e-06, |
|
"loss": 0.1231, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.7401105912377711, |
|
"grad_norm": 0.7739839576951205, |
|
"learning_rate": 3.3196427589233725e-06, |
|
"loss": 0.12, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.741811994895789, |
|
"grad_norm": 0.797509591834182, |
|
"learning_rate": 3.2786595381784512e-06, |
|
"loss": 0.1299, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.7435133985538069, |
|
"grad_norm": 1.2352332695500554, |
|
"learning_rate": 3.2378812117935154e-06, |
|
"loss": 0.2158, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.7452148022118248, |
|
"grad_norm": 0.9970358273478818, |
|
"learning_rate": 3.1973090228612404e-06, |
|
"loss": 0.1889, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.7469162058698426, |
|
"grad_norm": 0.9175281342866968, |
|
"learning_rate": 3.15694420819038e-06, |
|
"loss": 0.1451, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.7486176095278605, |
|
"grad_norm": 0.7771607469867583, |
|
"learning_rate": 3.116787998268046e-06, |
|
"loss": 0.1273, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.7503190131858783, |
|
"grad_norm": 1.1386198810950086, |
|
"learning_rate": 3.076841617222228e-06, |
|
"loss": 0.1755, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.7520204168438962, |
|
"grad_norm": 0.7884645481853416, |
|
"learning_rate": 3.0371062827844434e-06, |
|
"loss": 0.137, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.753721820501914, |
|
"grad_norm": 1.1651926590842914, |
|
"learning_rate": 2.997583206252647e-06, |
|
"loss": 0.1388, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.7554232241599319, |
|
"grad_norm": 1.1937560745179017, |
|
"learning_rate": 2.958273592454285e-06, |
|
"loss": 0.1742, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.7571246278179498, |
|
"grad_norm": 1.0502635521989399, |
|
"learning_rate": 2.9191786397095778e-06, |
|
"loss": 0.1498, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.7588260314759677, |
|
"grad_norm": 0.7584831550160862, |
|
"learning_rate": 2.880299539794975e-06, |
|
"loss": 0.1086, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.7605274351339856, |
|
"grad_norm": 1.1225417606344352, |
|
"learning_rate": 2.841637477906851e-06, |
|
"loss": 0.1934, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.7622288387920034, |
|
"grad_norm": 0.9497099717053716, |
|
"learning_rate": 2.803193632625346e-06, |
|
"loss": 0.164, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.7639302424500213, |
|
"grad_norm": 0.6837798268193367, |
|
"learning_rate": 2.7649691758784603e-06, |
|
"loss": 0.1114, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.7656316461080391, |
|
"grad_norm": 0.6950024172256357, |
|
"learning_rate": 2.7269652729063233e-06, |
|
"loss": 0.0977, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.767333049766057, |
|
"grad_norm": 0.9354904788771492, |
|
"learning_rate": 2.689183082225659e-06, |
|
"loss": 0.1591, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.7690344534240748, |
|
"grad_norm": 1.2402009468321933, |
|
"learning_rate": 2.65162375559449e-06, |
|
"loss": 0.1772, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.7707358570820927, |
|
"grad_norm": 0.7465094532185766, |
|
"learning_rate": 2.614288437977014e-06, |
|
"loss": 0.1195, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.7724372607401105, |
|
"grad_norm": 1.0226055457032026, |
|
"learning_rate": 2.5771782675087078e-06, |
|
"loss": 0.2099, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.7741386643981285, |
|
"grad_norm": 0.9551942400194852, |
|
"learning_rate": 2.5402943754616182e-06, |
|
"loss": 0.173, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.7758400680561464, |
|
"grad_norm": 0.6594348635504637, |
|
"learning_rate": 2.5036378862099e-06, |
|
"loss": 0.0883, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.7775414717141642, |
|
"grad_norm": 0.7901774411413286, |
|
"learning_rate": 2.467209917195513e-06, |
|
"loss": 0.1237, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.7792428753721821, |
|
"grad_norm": 0.8910357359774559, |
|
"learning_rate": 2.4310115788941855e-06, |
|
"loss": 0.1289, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.7809442790301999, |
|
"grad_norm": 1.136218990072521, |
|
"learning_rate": 2.3950439747815357e-06, |
|
"loss": 0.1519, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.7826456826882178, |
|
"grad_norm": 0.8023078317049139, |
|
"learning_rate": 2.359308201299454e-06, |
|
"loss": 0.1147, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7843470863462356, |
|
"grad_norm": 0.7751079437436228, |
|
"learning_rate": 2.3238053478226665e-06, |
|
"loss": 0.1497, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.7860484900042535, |
|
"grad_norm": 0.6377332784824975, |
|
"learning_rate": 2.2885364966255372e-06, |
|
"loss": 0.1229, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.7877498936622713, |
|
"grad_norm": 1.2952811713921435, |
|
"learning_rate": 2.2535027228490582e-06, |
|
"loss": 0.1986, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.7894512973202893, |
|
"grad_norm": 0.9212470224777787, |
|
"learning_rate": 2.2187050944680942e-06, |
|
"loss": 0.178, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.7911527009783071, |
|
"grad_norm": 1.093266320340803, |
|
"learning_rate": 2.18414467225882e-06, |
|
"loss": 0.1485, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.792854104636325, |
|
"grad_norm": 1.2237974652268393, |
|
"learning_rate": 2.1498225097663695e-06, |
|
"loss": 0.1855, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.7945555082943428, |
|
"grad_norm": 1.010538661263038, |
|
"learning_rate": 2.115739653272747e-06, |
|
"loss": 0.1374, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.7962569119523607, |
|
"grad_norm": 0.8405255367131824, |
|
"learning_rate": 2.0818971417649013e-06, |
|
"loss": 0.121, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.7979583156103786, |
|
"grad_norm": 1.609522304609243, |
|
"learning_rate": 2.048296006903081e-06, |
|
"loss": 0.196, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.7996597192683964, |
|
"grad_norm": 0.8809611996542851, |
|
"learning_rate": 2.0149372729893646e-06, |
|
"loss": 0.0851, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.8013611229264143, |
|
"grad_norm": 1.0627552307732382, |
|
"learning_rate": 1.981821956936448e-06, |
|
"loss": 0.1401, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.8030625265844321, |
|
"grad_norm": 1.005097101735857, |
|
"learning_rate": 1.9489510682366363e-06, |
|
"loss": 0.1487, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.8047639302424501, |
|
"grad_norm": 0.6705089050955295, |
|
"learning_rate": 1.916325608931079e-06, |
|
"loss": 0.0876, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.8064653339004679, |
|
"grad_norm": 0.8633874336922923, |
|
"learning_rate": 1.8839465735792095e-06, |
|
"loss": 0.1301, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.8081667375584858, |
|
"grad_norm": 1.1059783421414893, |
|
"learning_rate": 1.8518149492284477e-06, |
|
"loss": 0.1877, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.8098681412165036, |
|
"grad_norm": 0.8023451929643662, |
|
"learning_rate": 1.8199317153840933e-06, |
|
"loss": 0.1397, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.8115695448745215, |
|
"grad_norm": 1.2934630086997319, |
|
"learning_rate": 1.7882978439794708e-06, |
|
"loss": 0.1874, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.8132709485325393, |
|
"grad_norm": 0.8838382576359304, |
|
"learning_rate": 1.756914299346304e-06, |
|
"loss": 0.1401, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.8149723521905572, |
|
"grad_norm": 0.7756837276148775, |
|
"learning_rate": 1.7257820381853197e-06, |
|
"loss": 0.1112, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.816673755848575, |
|
"grad_norm": 0.7565281231601331, |
|
"learning_rate": 1.6949020095370816e-06, |
|
"loss": 0.1233, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.8183751595065929, |
|
"grad_norm": 0.853948523291977, |
|
"learning_rate": 1.6642751547530512e-06, |
|
"loss": 0.1039, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.8200765631646108, |
|
"grad_norm": 0.8352361826998009, |
|
"learning_rate": 1.6339024074669107e-06, |
|
"loss": 0.1534, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.8217779668226287, |
|
"grad_norm": 0.8688740479788196, |
|
"learning_rate": 1.6037846935660807e-06, |
|
"loss": 0.1406, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.8234793704806466, |
|
"grad_norm": 0.8005917780121297, |
|
"learning_rate": 1.5739229311635152e-06, |
|
"loss": 0.1378, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.8251807741386644, |
|
"grad_norm": 0.7689221194951362, |
|
"learning_rate": 1.5443180305696948e-06, |
|
"loss": 0.1257, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.8268821777966823, |
|
"grad_norm": 0.7665161442309016, |
|
"learning_rate": 1.5149708942648922e-06, |
|
"loss": 0.1179, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.8285835814547001, |
|
"grad_norm": 1.169882211334333, |
|
"learning_rate": 1.4858824168716524e-06, |
|
"loss": 0.1262, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.830284985112718, |
|
"grad_norm": 1.0029808688868465, |
|
"learning_rate": 1.4570534851275241e-06, |
|
"loss": 0.1709, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.8319863887707358, |
|
"grad_norm": 0.9018685205076322, |
|
"learning_rate": 1.4284849778580279e-06, |
|
"loss": 0.1275, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.8336877924287537, |
|
"grad_norm": 0.6540418058871127, |
|
"learning_rate": 1.4001777659498584e-06, |
|
"loss": 0.0756, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8353891960867715, |
|
"grad_norm": 1.5133534763569585, |
|
"learning_rate": 1.3721327123243533e-06, |
|
"loss": 0.1723, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.8370905997447895, |
|
"grad_norm": 0.8207160486641186, |
|
"learning_rate": 1.3443506719111666e-06, |
|
"loss": 0.0973, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.8387920034028074, |
|
"grad_norm": 0.8940821425158962, |
|
"learning_rate": 1.3168324916222296e-06, |
|
"loss": 0.1626, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.8404934070608252, |
|
"grad_norm": 0.9443999012934854, |
|
"learning_rate": 1.28957901032591e-06, |
|
"loss": 0.1581, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.8421948107188431, |
|
"grad_norm": 0.7851253215751375, |
|
"learning_rate": 1.2625910588214608e-06, |
|
"loss": 0.1166, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.8438962143768609, |
|
"grad_norm": 0.888618596471637, |
|
"learning_rate": 1.2358694598136755e-06, |
|
"loss": 0.1159, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.8455976180348788, |
|
"grad_norm": 1.4198001597035892, |
|
"learning_rate": 1.2094150278878303e-06, |
|
"loss": 0.166, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.8472990216928966, |
|
"grad_norm": 0.5631587010169332, |
|
"learning_rate": 1.1832285694848255e-06, |
|
"loss": 0.0981, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.8490004253509145, |
|
"grad_norm": 0.8202300136945526, |
|
"learning_rate": 1.1573108828766255e-06, |
|
"loss": 0.1413, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.8507018290089323, |
|
"grad_norm": 0.9023011380495052, |
|
"learning_rate": 1.1316627581419137e-06, |
|
"loss": 0.1455, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8524032326669503, |
|
"grad_norm": 1.235848904880368, |
|
"learning_rate": 1.1062849771420025e-06, |
|
"loss": 0.2096, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.8541046363249681, |
|
"grad_norm": 1.0464082421195386, |
|
"learning_rate": 1.0811783134970132e-06, |
|
"loss": 0.1646, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.855806039982986, |
|
"grad_norm": 1.0819062403332675, |
|
"learning_rate": 1.0563435325622762e-06, |
|
"loss": 0.1738, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.8575074436410038, |
|
"grad_norm": 1.473560386930757, |
|
"learning_rate": 1.0317813914050157e-06, |
|
"loss": 0.1692, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.8592088472990217, |
|
"grad_norm": 0.978349330529313, |
|
"learning_rate": 1.007492638781259e-06, |
|
"loss": 0.1513, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.8609102509570395, |
|
"grad_norm": 1.179643658189341, |
|
"learning_rate": 9.834780151130196e-07, |
|
"loss": 0.166, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.8626116546150574, |
|
"grad_norm": 0.766029475252831, |
|
"learning_rate": 9.597382524657173e-07, |
|
"loss": 0.1134, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.8643130582730753, |
|
"grad_norm": 0.9024327918183763, |
|
"learning_rate": 9.362740745258736e-07, |
|
"loss": 0.1149, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.8660144619310931, |
|
"grad_norm": 0.7739449751707675, |
|
"learning_rate": 9.13086196579035e-07, |
|
"loss": 0.1597, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.8677158655891111, |
|
"grad_norm": 0.9425165834042365, |
|
"learning_rate": 8.901753254879885e-07, |
|
"loss": 0.1085, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8694172692471289, |
|
"grad_norm": 0.7591763618142894, |
|
"learning_rate": 8.67542159671192e-07, |
|
"loss": 0.0942, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.8711186729051468, |
|
"grad_norm": 1.025073614077726, |
|
"learning_rate": 8.451873890814988e-07, |
|
"loss": 0.1793, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.8728200765631646, |
|
"grad_norm": 0.7371355385468613, |
|
"learning_rate": 8.231116951851204e-07, |
|
"loss": 0.096, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.8745214802211825, |
|
"grad_norm": 1.101658847251625, |
|
"learning_rate": 8.013157509408509e-07, |
|
"loss": 0.1348, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.8762228838792003, |
|
"grad_norm": 1.2039979968776104, |
|
"learning_rate": 7.79800220779554e-07, |
|
"loss": 0.2153, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.8779242875372182, |
|
"grad_norm": 1.006732552857591, |
|
"learning_rate": 7.585657605839059e-07, |
|
"loss": 0.1703, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.879625691195236, |
|
"grad_norm": 1.0145016790131924, |
|
"learning_rate": 7.376130176684082e-07, |
|
"loss": 0.1912, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.8813270948532539, |
|
"grad_norm": 0.6883624712674228, |
|
"learning_rate": 7.169426307596428e-07, |
|
"loss": 0.1084, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.8830284985112719, |
|
"grad_norm": 0.6860856488272892, |
|
"learning_rate": 6.965552299768186e-07, |
|
"loss": 0.1126, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.8847299021692897, |
|
"grad_norm": 0.6619502831120293, |
|
"learning_rate": 6.764514368125419e-07, |
|
"loss": 0.1024, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8864313058273076, |
|
"grad_norm": 0.7410446301641695, |
|
"learning_rate": 6.566318641138902e-07, |
|
"loss": 0.1221, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.8881327094853254, |
|
"grad_norm": 0.49451324336653424, |
|
"learning_rate": 6.370971160637129e-07, |
|
"loss": 0.0695, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.8898341131433433, |
|
"grad_norm": 1.0914793541244312, |
|
"learning_rate": 6.178477881622325e-07, |
|
"loss": 0.1097, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.8915355168013611, |
|
"grad_norm": 0.9323787575824379, |
|
"learning_rate": 5.98884467208869e-07, |
|
"loss": 0.1511, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.893236920459379, |
|
"grad_norm": 0.9363542835251712, |
|
"learning_rate": 5.802077312843723e-07, |
|
"loss": 0.1318, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.8949383241173968, |
|
"grad_norm": 0.603259894149764, |
|
"learning_rate": 5.618181497331865e-07, |
|
"loss": 0.0853, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.8966397277754147, |
|
"grad_norm": 0.7303483060977899, |
|
"learning_rate": 5.437162831460962e-07, |
|
"loss": 0.1045, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.8983411314334325, |
|
"grad_norm": 0.8018716290849719, |
|
"learning_rate": 5.259026833431468e-07, |
|
"loss": 0.0994, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.9000425350914505, |
|
"grad_norm": 0.6742736755775497, |
|
"learning_rate": 5.083778933568073e-07, |
|
"loss": 0.1319, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.9017439387494683, |
|
"grad_norm": 1.1269218361928497, |
|
"learning_rate": 4.911424474154314e-07, |
|
"loss": 0.179, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.9034453424074862, |
|
"grad_norm": 0.8025742885030408, |
|
"learning_rate": 4.741968709269573e-07, |
|
"loss": 0.1092, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.905146746065504, |
|
"grad_norm": 0.9305265429776364, |
|
"learning_rate": 4.575416804629085e-07, |
|
"loss": 0.1735, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.9068481497235219, |
|
"grad_norm": 0.6809160303515213, |
|
"learning_rate": 4.411773837426303e-07, |
|
"loss": 0.1104, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.9085495533815398, |
|
"grad_norm": 0.9045295991595489, |
|
"learning_rate": 4.2510447961782055e-07, |
|
"loss": 0.1734, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.9102509570395576, |
|
"grad_norm": 0.8166470828418425, |
|
"learning_rate": 4.093234580573202e-07, |
|
"loss": 0.1523, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.9119523606975755, |
|
"grad_norm": 0.708095759490807, |
|
"learning_rate": 3.938348001321812e-07, |
|
"loss": 0.0868, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.9136537643555933, |
|
"grad_norm": 0.6245594804666802, |
|
"learning_rate": 3.786389780009958e-07, |
|
"loss": 0.0807, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.9153551680136113, |
|
"grad_norm": 0.7746580581966721, |
|
"learning_rate": 3.637364548955047e-07, |
|
"loss": 0.0765, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.9170565716716291, |
|
"grad_norm": 0.8160416161103592, |
|
"learning_rate": 3.491276851064784e-07, |
|
"loss": 0.1216, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.918757975329647, |
|
"grad_norm": 0.8270184476140136, |
|
"learning_rate": 3.3481311396986626e-07, |
|
"loss": 0.1134, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.9204593789876648, |
|
"grad_norm": 1.0313796709120198, |
|
"learning_rate": 3.2079317785322363e-07, |
|
"loss": 0.1766, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.9221607826456827, |
|
"grad_norm": 1.0201847755126947, |
|
"learning_rate": 3.0706830414240164e-07, |
|
"loss": 0.1729, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.9238621863037005, |
|
"grad_norm": 0.7776265936946829, |
|
"learning_rate": 2.9363891122853097e-07, |
|
"loss": 0.1241, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.9255635899617184, |
|
"grad_norm": 0.7554593104785499, |
|
"learning_rate": 2.805054084952552e-07, |
|
"loss": 0.1108, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.9272649936197362, |
|
"grad_norm": 0.9610672965393776, |
|
"learning_rate": 2.6766819630626216e-07, |
|
"loss": 0.1401, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.9289663972777541, |
|
"grad_norm": 0.6998221281429967, |
|
"learning_rate": 2.5512766599306903e-07, |
|
"loss": 0.0957, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.9306678009357721, |
|
"grad_norm": 0.8390384737658598, |
|
"learning_rate": 2.4288419984310086e-07, |
|
"loss": 0.1168, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.9323692045937899, |
|
"grad_norm": 0.9236463852965615, |
|
"learning_rate": 2.3093817108803318e-07, |
|
"loss": 0.1634, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.9340706082518078, |
|
"grad_norm": 0.7447775141776533, |
|
"learning_rate": 2.1928994389241454e-07, |
|
"loss": 0.1004, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.9357720119098256, |
|
"grad_norm": 0.7255330710827995, |
|
"learning_rate": 2.0793987334256637e-07, |
|
"loss": 0.1272, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.9374734155678435, |
|
"grad_norm": 0.9390672914885052, |
|
"learning_rate": 1.968883054357562e-07, |
|
"loss": 0.1273, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.9391748192258613, |
|
"grad_norm": 0.9272533734619594, |
|
"learning_rate": 1.861355770696549e-07, |
|
"loss": 0.1338, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.9408762228838792, |
|
"grad_norm": 0.8915590435767178, |
|
"learning_rate": 1.7568201603205827e-07, |
|
"loss": 0.1478, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.942577626541897, |
|
"grad_norm": 0.833580814718019, |
|
"learning_rate": 1.6552794099090718e-07, |
|
"loss": 0.1641, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.9442790301999149, |
|
"grad_norm": 0.7262763652837203, |
|
"learning_rate": 1.5567366148455887e-07, |
|
"loss": 0.0812, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.9459804338579328, |
|
"grad_norm": 0.6130128705786809, |
|
"learning_rate": 1.4611947791236314e-07, |
|
"loss": 0.084, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.9476818375159507, |
|
"grad_norm": 0.9063134327194557, |
|
"learning_rate": 1.3686568152549539e-07, |
|
"loss": 0.1301, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.9493832411739686, |
|
"grad_norm": 1.1026165821069918, |
|
"learning_rate": 1.2791255441809037e-07, |
|
"loss": 0.1414, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.9510846448319864, |
|
"grad_norm": 0.9387139772419816, |
|
"learning_rate": 1.1926036951862563e-07, |
|
"loss": 0.1326, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.9527860484900043, |
|
"grad_norm": 0.5842678864666834, |
|
"learning_rate": 1.109093905816172e-07, |
|
"loss": 0.102, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9544874521480221, |
|
"grad_norm": 0.9322995560475983, |
|
"learning_rate": 1.0285987217957038e-07, |
|
"loss": 0.171, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.95618885580604, |
|
"grad_norm": 0.8354465318723641, |
|
"learning_rate": 9.511205969522263e-08, |
|
"loss": 0.1443, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.9578902594640578, |
|
"grad_norm": 0.9920641234095123, |
|
"learning_rate": 8.76661893140629e-08, |
|
"loss": 0.1504, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.9595916631220757, |
|
"grad_norm": 0.9511533047381777, |
|
"learning_rate": 8.052248801712958e-08, |
|
"loss": 0.151, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.9612930667800936, |
|
"grad_norm": 0.9354186186695939, |
|
"learning_rate": 7.36811735740961e-08, |
|
"loss": 0.1229, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.9629944704381115, |
|
"grad_norm": 0.6362057824024612, |
|
"learning_rate": 6.714245453662504e-08, |
|
"loss": 0.091, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.9646958740961293, |
|
"grad_norm": 1.0083389751537988, |
|
"learning_rate": 6.090653023201997e-08, |
|
"loss": 0.1456, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.9663972777541472, |
|
"grad_norm": 0.8907631394850228, |
|
"learning_rate": 5.497359075714026e-08, |
|
"loss": 0.1594, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.968098681412165, |
|
"grad_norm": 0.7531697026641528, |
|
"learning_rate": 4.934381697261015e-08, |
|
"loss": 0.1197, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.9698000850701829, |
|
"grad_norm": 0.8062893099107721, |
|
"learning_rate": 4.401738049730653e-08, |
|
"loss": 0.0974, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.9715014887282007, |
|
"grad_norm": 1.2377725717443462, |
|
"learning_rate": 3.899444370312533e-08, |
|
"loss": 0.2178, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.9732028923862186, |
|
"grad_norm": 0.9551200579988982, |
|
"learning_rate": 3.4275159710032146e-08, |
|
"loss": 0.1566, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.9749042960442365, |
|
"grad_norm": 0.8038517484152471, |
|
"learning_rate": 2.9859672381392644e-08, |
|
"loss": 0.1219, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.9766056997022544, |
|
"grad_norm": 0.8743135576113755, |
|
"learning_rate": 2.574811631959273e-08, |
|
"loss": 0.1568, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.9783071033602723, |
|
"grad_norm": 0.8522700286101278, |
|
"learning_rate": 2.1940616861929608e-08, |
|
"loss": 0.1487, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.9800085070182901, |
|
"grad_norm": 0.8224250512901891, |
|
"learning_rate": 1.8437290076792624e-08, |
|
"loss": 0.1498, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.981709910676308, |
|
"grad_norm": 0.8848861332502466, |
|
"learning_rate": 1.5238242760126088e-08, |
|
"loss": 0.1504, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.9834113143343258, |
|
"grad_norm": 1.1465336786229596, |
|
"learning_rate": 1.234357243217188e-08, |
|
"loss": 0.2006, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.9851127179923437, |
|
"grad_norm": 0.8142904720294101, |
|
"learning_rate": 9.753367334499608e-09, |
|
"loss": 0.1086, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.9868141216503615, |
|
"grad_norm": 1.0733470427784377, |
|
"learning_rate": 7.467706427312093e-09, |
|
"loss": 0.1226, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9885155253083794, |
|
"grad_norm": 0.7419763368783449, |
|
"learning_rate": 5.486659387043958e-09, |
|
"loss": 0.072, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.9902169289663972, |
|
"grad_norm": 0.9725691812544829, |
|
"learning_rate": 3.810286604232216e-09, |
|
"loss": 0.1285, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.9919183326244151, |
|
"grad_norm": 1.006297166436942, |
|
"learning_rate": 2.4386391816777488e-09, |
|
"loss": 0.1627, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.993619736282433, |
|
"grad_norm": 2.3873068728832902, |
|
"learning_rate": 1.3717589328898773e-09, |
|
"loss": 0.102, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.9953211399404509, |
|
"grad_norm": 1.0284942567315343, |
|
"learning_rate": 6.096783808062778e-10, |
|
"loss": 0.1859, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.9970225435984688, |
|
"grad_norm": 0.8620294782566316, |
|
"learning_rate": 1.524207568059932e-10, |
|
"loss": 0.1134, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.9987239472564866, |
|
"grad_norm": 0.9608067188231881, |
|
"learning_rate": 0.0, |
|
"loss": 0.1686, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.9987239472564866, |
|
"step": 587, |
|
"total_flos": 521029723553792.0, |
|
"train_loss": 0.19605895173712118, |
|
"train_runtime": 4311.1152, |
|
"train_samples_per_second": 17.449, |
|
"train_steps_per_second": 0.136 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 587, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 521029723553792.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|