|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.5071225071225074, |
|
"eval_steps": 500, |
|
"global_step": 440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0056657223796034, |
|
"grad_norm": 24.973131796915897, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.8537, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0113314447592068, |
|
"grad_norm": 32.79573813738381, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.0212, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0169971671388102, |
|
"grad_norm": 23.800880905805656, |
|
"learning_rate": 3e-06, |
|
"loss": 2.1456, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0226628895184136, |
|
"grad_norm": 19.091198715081358, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.9808, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.028328611898016998, |
|
"grad_norm": 14.124470348172405, |
|
"learning_rate": 5e-06, |
|
"loss": 2.1825, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0339943342776204, |
|
"grad_norm": 11.461608032959802, |
|
"learning_rate": 6e-06, |
|
"loss": 1.6353, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.039660056657223795, |
|
"grad_norm": 10.354681496346823, |
|
"learning_rate": 7e-06, |
|
"loss": 1.9076, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0453257790368272, |
|
"grad_norm": 10.167669680172194, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.4754, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.05099150141643059, |
|
"grad_norm": 7.5541696713086255, |
|
"learning_rate": 9e-06, |
|
"loss": 1.6213, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.056657223796033995, |
|
"grad_norm": 4.087852973173369, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5217, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06232294617563739, |
|
"grad_norm": 4.071392878063137, |
|
"learning_rate": 9.999948174819623e-06, |
|
"loss": 1.6551, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0679886685552408, |
|
"grad_norm": 5.2075402015034, |
|
"learning_rate": 9.999792700352826e-06, |
|
"loss": 1.4474, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.07365439093484419, |
|
"grad_norm": 3.6492933345906637, |
|
"learning_rate": 9.999533579822611e-06, |
|
"loss": 1.5585, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.07932011331444759, |
|
"grad_norm": 6.482920810973195, |
|
"learning_rate": 9.999170818600562e-06, |
|
"loss": 1.3317, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.08498583569405099, |
|
"grad_norm": 4.137365745831386, |
|
"learning_rate": 9.998704424206747e-06, |
|
"loss": 1.4029, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0906515580736544, |
|
"grad_norm": 4.745717244720069, |
|
"learning_rate": 9.998134406309555e-06, |
|
"loss": 1.6586, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.09631728045325778, |
|
"grad_norm": 5.4377770096801346, |
|
"learning_rate": 9.997460776725497e-06, |
|
"loss": 1.365, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.10198300283286119, |
|
"grad_norm": 3.317130182493388, |
|
"learning_rate": 9.996683549418964e-06, |
|
"loss": 1.4956, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.10764872521246459, |
|
"grad_norm": 1.7845609616841893, |
|
"learning_rate": 9.995802740501933e-06, |
|
"loss": 1.3472, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.11331444759206799, |
|
"grad_norm": 14.387033772755194, |
|
"learning_rate": 9.994818368233639e-06, |
|
"loss": 1.4116, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11898016997167139, |
|
"grad_norm": 6.920700020611593, |
|
"learning_rate": 9.993730453020187e-06, |
|
"loss": 1.2776, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.12464589235127478, |
|
"grad_norm": 6.05951274644599, |
|
"learning_rate": 9.99253901741414e-06, |
|
"loss": 1.4433, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.13031161473087818, |
|
"grad_norm": 3.0541449788715935, |
|
"learning_rate": 9.991244086114046e-06, |
|
"loss": 1.3396, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.1359773371104816, |
|
"grad_norm": 1.8438099140328046, |
|
"learning_rate": 9.989845685963917e-06, |
|
"loss": 1.3061, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.141643059490085, |
|
"grad_norm": 4.048301070320613, |
|
"learning_rate": 9.988343845952697e-06, |
|
"loss": 1.2283, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.14730878186968838, |
|
"grad_norm": 3.5627296346591457, |
|
"learning_rate": 9.986738597213633e-06, |
|
"loss": 1.2865, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.1529745042492918, |
|
"grad_norm": 2.237494567304501, |
|
"learning_rate": 9.98502997302365e-06, |
|
"loss": 1.3233, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.15864022662889518, |
|
"grad_norm": 3.479719952104877, |
|
"learning_rate": 9.983218008802648e-06, |
|
"loss": 1.3033, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1643059490084986, |
|
"grad_norm": 2.066121083229141, |
|
"learning_rate": 9.98130274211278e-06, |
|
"loss": 1.3326, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.16997167138810199, |
|
"grad_norm": 4.090684571263736, |
|
"learning_rate": 9.979284212657658e-06, |
|
"loss": 1.3102, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17563739376770537, |
|
"grad_norm": 2.369637256277251, |
|
"learning_rate": 9.977162462281544e-06, |
|
"loss": 1.4067, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.1813031161473088, |
|
"grad_norm": 1.4378564529803546, |
|
"learning_rate": 9.97493753496848e-06, |
|
"loss": 1.2409, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.18696883852691218, |
|
"grad_norm": 1.810353068849482, |
|
"learning_rate": 9.972609476841368e-06, |
|
"loss": 1.2659, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.19263456090651557, |
|
"grad_norm": 2.954930884156565, |
|
"learning_rate": 9.970178336161018e-06, |
|
"loss": 1.3727, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.19830028328611898, |
|
"grad_norm": 2.053307140265503, |
|
"learning_rate": 9.967644163325157e-06, |
|
"loss": 1.3463, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.20396600566572237, |
|
"grad_norm": 1.8032124432327943, |
|
"learning_rate": 9.965007010867366e-06, |
|
"loss": 1.1998, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.2096317280453258, |
|
"grad_norm": 1.4952983263862012, |
|
"learning_rate": 9.962266933456008e-06, |
|
"loss": 1.2829, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.21529745042492918, |
|
"grad_norm": 1.3649794008291625, |
|
"learning_rate": 9.959423987893086e-06, |
|
"loss": 1.2056, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.22096317280453256, |
|
"grad_norm": 1.4380773398306634, |
|
"learning_rate": 9.956478233113066e-06, |
|
"loss": 1.29, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.22662889518413598, |
|
"grad_norm": 1.6072540934424309, |
|
"learning_rate": 9.953429730181653e-06, |
|
"loss": 1.2593, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.23229461756373937, |
|
"grad_norm": 1.6010739399694889, |
|
"learning_rate": 9.95027854229454e-06, |
|
"loss": 1.2117, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.23796033994334279, |
|
"grad_norm": 1.2474393925785745, |
|
"learning_rate": 9.947024734776076e-06, |
|
"loss": 1.2022, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.24362606232294617, |
|
"grad_norm": 1.4019264249340568, |
|
"learning_rate": 9.943668375077926e-06, |
|
"loss": 1.2365, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.24929178470254956, |
|
"grad_norm": 1.5087040675714003, |
|
"learning_rate": 9.940209532777666e-06, |
|
"loss": 1.274, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.254957507082153, |
|
"grad_norm": 1.1953570915609946, |
|
"learning_rate": 9.93664827957735e-06, |
|
"loss": 1.2526, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.26062322946175637, |
|
"grad_norm": 1.4826450819224886, |
|
"learning_rate": 9.932984689302012e-06, |
|
"loss": 1.1978, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.26628895184135976, |
|
"grad_norm": 1.1937833972167977, |
|
"learning_rate": 9.929218837898143e-06, |
|
"loss": 1.1816, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.2719546742209632, |
|
"grad_norm": 1.1238100782353855, |
|
"learning_rate": 9.925350803432112e-06, |
|
"loss": 1.1931, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2776203966005666, |
|
"grad_norm": 1.3338900623153498, |
|
"learning_rate": 9.921380666088558e-06, |
|
"loss": 1.1978, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.28328611898017, |
|
"grad_norm": 1.3236848667289738, |
|
"learning_rate": 9.917308508168712e-06, |
|
"loss": 1.2551, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.28895184135977336, |
|
"grad_norm": 1.425578635546673, |
|
"learning_rate": 9.913134414088698e-06, |
|
"loss": 1.2441, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.29461756373937675, |
|
"grad_norm": 1.171581674684746, |
|
"learning_rate": 9.908858470377793e-06, |
|
"loss": 1.2369, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3002832861189802, |
|
"grad_norm": 1.1564744150302062, |
|
"learning_rate": 9.904480765676617e-06, |
|
"loss": 1.209, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.3059490084985836, |
|
"grad_norm": 1.1357504524893798, |
|
"learning_rate": 9.9000013907353e-06, |
|
"loss": 1.2152, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.311614730878187, |
|
"grad_norm": 1.0498825437855333, |
|
"learning_rate": 9.895420438411616e-06, |
|
"loss": 1.2043, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.31728045325779036, |
|
"grad_norm": 1.6465219316145685, |
|
"learning_rate": 9.890738003669029e-06, |
|
"loss": 1.2289, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.32294617563739375, |
|
"grad_norm": 1.711551232749367, |
|
"learning_rate": 9.885954183574753e-06, |
|
"loss": 1.1831, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.3286118980169972, |
|
"grad_norm": 1.2636664413259953, |
|
"learning_rate": 9.881069077297724e-06, |
|
"loss": 1.2061, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.3342776203966006, |
|
"grad_norm": 1.4260407982081962, |
|
"learning_rate": 9.876082786106546e-06, |
|
"loss": 1.1998, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.33994334277620397, |
|
"grad_norm": 1.95604739866899, |
|
"learning_rate": 9.870995413367397e-06, |
|
"loss": 1.2215, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.34560906515580736, |
|
"grad_norm": 1.2316545141521473, |
|
"learning_rate": 9.865807064541878e-06, |
|
"loss": 1.1599, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.35127478753541075, |
|
"grad_norm": 1.1178440688886253, |
|
"learning_rate": 9.860517847184837e-06, |
|
"loss": 1.1907, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.35694050991501414, |
|
"grad_norm": 1.305376049095191, |
|
"learning_rate": 9.855127870942131e-06, |
|
"loss": 1.1474, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.3626062322946176, |
|
"grad_norm": 1.0495122657744762, |
|
"learning_rate": 9.849637247548356e-06, |
|
"loss": 1.2424, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.36827195467422097, |
|
"grad_norm": 1.141538926125254, |
|
"learning_rate": 9.844046090824533e-06, |
|
"loss": 1.1689, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.37393767705382436, |
|
"grad_norm": 1.26961257521241, |
|
"learning_rate": 9.83835451667574e-06, |
|
"loss": 1.2106, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.37960339943342775, |
|
"grad_norm": 1.081533609255719, |
|
"learning_rate": 9.832562643088724e-06, |
|
"loss": 1.1834, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.38526912181303113, |
|
"grad_norm": 1.443083776392187, |
|
"learning_rate": 9.826670590129442e-06, |
|
"loss": 1.1505, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.3909348441926346, |
|
"grad_norm": 1.135777382976375, |
|
"learning_rate": 9.820678479940573e-06, |
|
"loss": 1.1489, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.39660056657223797, |
|
"grad_norm": 1.8779005247112062, |
|
"learning_rate": 9.814586436738998e-06, |
|
"loss": 1.1643, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.40226628895184136, |
|
"grad_norm": 1.7980060811236744, |
|
"learning_rate": 9.808394586813209e-06, |
|
"loss": 1.1594, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.40793201133144474, |
|
"grad_norm": 2.572405910372765, |
|
"learning_rate": 9.802103058520704e-06, |
|
"loss": 1.1854, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.41359773371104813, |
|
"grad_norm": 2.0253448122778606, |
|
"learning_rate": 9.795711982285317e-06, |
|
"loss": 1.1826, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.4192634560906516, |
|
"grad_norm": 6.483254642683073, |
|
"learning_rate": 9.78922149059452e-06, |
|
"loss": 1.1646, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.42492917847025496, |
|
"grad_norm": 1.2964281102887218, |
|
"learning_rate": 9.782631717996675e-06, |
|
"loss": 1.2379, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.43059490084985835, |
|
"grad_norm": 1.9517402996335103, |
|
"learning_rate": 9.775942801098241e-06, |
|
"loss": 1.164, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.43626062322946174, |
|
"grad_norm": 3.064531007561859, |
|
"learning_rate": 9.76915487856095e-06, |
|
"loss": 1.1418, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.44192634560906513, |
|
"grad_norm": 1.5009905490397355, |
|
"learning_rate": 9.762268091098926e-06, |
|
"loss": 1.1653, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4475920679886686, |
|
"grad_norm": 1.104518219439204, |
|
"learning_rate": 9.755282581475769e-06, |
|
"loss": 1.2025, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.45325779036827196, |
|
"grad_norm": 7.807500502849419, |
|
"learning_rate": 9.748198494501598e-06, |
|
"loss": 1.148, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.45892351274787535, |
|
"grad_norm": 6.196503908242147, |
|
"learning_rate": 9.741015977030046e-06, |
|
"loss": 1.1819, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.46458923512747874, |
|
"grad_norm": 2.2714978855142736, |
|
"learning_rate": 9.733735177955219e-06, |
|
"loss": 1.1907, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.4702549575070821, |
|
"grad_norm": 1.834743890260826, |
|
"learning_rate": 9.72635624820861e-06, |
|
"loss": 1.1381, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.47592067988668557, |
|
"grad_norm": 1.28470626171519, |
|
"learning_rate": 9.71887934075596e-06, |
|
"loss": 1.2079, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.48158640226628896, |
|
"grad_norm": 6.197048819949928, |
|
"learning_rate": 9.711304610594104e-06, |
|
"loss": 1.1272, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.48725212464589235, |
|
"grad_norm": 3.412508821399008, |
|
"learning_rate": 9.703632214747742e-06, |
|
"loss": 1.2382, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.49291784702549574, |
|
"grad_norm": 1.57336480270559, |
|
"learning_rate": 9.695862312266195e-06, |
|
"loss": 1.157, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.4985835694050991, |
|
"grad_norm": 7.383065472181884, |
|
"learning_rate": 9.687995064220102e-06, |
|
"loss": 1.1684, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5042492917847026, |
|
"grad_norm": 7.508526165016783, |
|
"learning_rate": 9.680030633698083e-06, |
|
"loss": 1.155, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.509915014164306, |
|
"grad_norm": 9.25317664016253, |
|
"learning_rate": 9.671969185803357e-06, |
|
"loss": 1.1452, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5155807365439093, |
|
"grad_norm": 2.2525643431971876, |
|
"learning_rate": 9.66381088765032e-06, |
|
"loss": 1.1505, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.5212464589235127, |
|
"grad_norm": 1.4721293586733248, |
|
"learning_rate": 9.65555590836108e-06, |
|
"loss": 1.1812, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.5269121813031161, |
|
"grad_norm": 2.6949100034582103, |
|
"learning_rate": 9.647204419061957e-06, |
|
"loss": 1.1739, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5325779036827195, |
|
"grad_norm": 2.027029228479332, |
|
"learning_rate": 9.638756592879923e-06, |
|
"loss": 1.1335, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.5382436260623229, |
|
"grad_norm": 1.8382974162119243, |
|
"learning_rate": 9.630212604939026e-06, |
|
"loss": 1.1298, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5439093484419264, |
|
"grad_norm": 1.2086577711922202, |
|
"learning_rate": 9.621572632356754e-06, |
|
"loss": 1.167, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5495750708215298, |
|
"grad_norm": 1.2819489966676616, |
|
"learning_rate": 9.61283685424036e-06, |
|
"loss": 1.1151, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.5552407932011332, |
|
"grad_norm": 1.6800709750196126, |
|
"learning_rate": 9.604005451683154e-06, |
|
"loss": 1.1945, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.5609065155807366, |
|
"grad_norm": 1.3375384173734144, |
|
"learning_rate": 9.59507860776075e-06, |
|
"loss": 1.1621, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.56657223796034, |
|
"grad_norm": 2.188062868326175, |
|
"learning_rate": 9.586056507527266e-06, |
|
"loss": 1.1555, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5722379603399433, |
|
"grad_norm": 1.3814102048227788, |
|
"learning_rate": 9.57693933801149e-06, |
|
"loss": 1.1733, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.5779036827195467, |
|
"grad_norm": 1.8014483071872645, |
|
"learning_rate": 9.567727288213005e-06, |
|
"loss": 1.1964, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.5835694050991501, |
|
"grad_norm": 1.1912746031738484, |
|
"learning_rate": 9.558420549098269e-06, |
|
"loss": 1.2144, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.5892351274787535, |
|
"grad_norm": 3.034007485521762, |
|
"learning_rate": 9.549019313596652e-06, |
|
"loss": 1.1321, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.5949008498583569, |
|
"grad_norm": 1.866729945439932, |
|
"learning_rate": 9.539523776596446e-06, |
|
"loss": 1.1539, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6005665722379604, |
|
"grad_norm": 1.5773392319922173, |
|
"learning_rate": 9.529934134940819e-06, |
|
"loss": 1.1373, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.6062322946175638, |
|
"grad_norm": 1.6561757646401918, |
|
"learning_rate": 9.520250587423733e-06, |
|
"loss": 1.1788, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.6118980169971672, |
|
"grad_norm": 1.2809743948171723, |
|
"learning_rate": 9.510473334785828e-06, |
|
"loss": 1.1509, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6175637393767706, |
|
"grad_norm": 3.3019220495325405, |
|
"learning_rate": 9.500602579710256e-06, |
|
"loss": 1.1879, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.623229461756374, |
|
"grad_norm": 1.5241985081276304, |
|
"learning_rate": 9.490638526818482e-06, |
|
"loss": 1.1114, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6288951841359773, |
|
"grad_norm": 2.053104975498995, |
|
"learning_rate": 9.480581382666041e-06, |
|
"loss": 1.2417, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.6345609065155807, |
|
"grad_norm": 1.450461775862418, |
|
"learning_rate": 9.470431355738257e-06, |
|
"loss": 1.0761, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.6402266288951841, |
|
"grad_norm": 2.831772615909268, |
|
"learning_rate": 9.460188656445921e-06, |
|
"loss": 1.1684, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.6458923512747875, |
|
"grad_norm": 1.5478096558601282, |
|
"learning_rate": 9.449853497120928e-06, |
|
"loss": 1.1695, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.6515580736543909, |
|
"grad_norm": 1.6582616402814803, |
|
"learning_rate": 9.439426092011877e-06, |
|
"loss": 1.1099, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.6572237960339944, |
|
"grad_norm": 1.0617767973732541, |
|
"learning_rate": 9.428906657279629e-06, |
|
"loss": 1.1584, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.6628895184135978, |
|
"grad_norm": 1.6822664727814025, |
|
"learning_rate": 9.418295410992821e-06, |
|
"loss": 1.1527, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.6685552407932012, |
|
"grad_norm": 1.1837357577931802, |
|
"learning_rate": 9.407592573123359e-06, |
|
"loss": 1.187, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.6742209631728046, |
|
"grad_norm": 1.690006148754325, |
|
"learning_rate": 9.396798365541841e-06, |
|
"loss": 1.1023, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.6798866855524079, |
|
"grad_norm": 1.2755747770023382, |
|
"learning_rate": 9.385913012012972e-06, |
|
"loss": 1.1779, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6855524079320113, |
|
"grad_norm": 1.0625930962823409, |
|
"learning_rate": 9.374936738190913e-06, |
|
"loss": 1.1586, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.6912181303116147, |
|
"grad_norm": 1.4107647400186194, |
|
"learning_rate": 9.363869771614615e-06, |
|
"loss": 1.1227, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.6968838526912181, |
|
"grad_norm": 1.4237393729227041, |
|
"learning_rate": 9.35271234170309e-06, |
|
"loss": 1.1526, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.7025495750708215, |
|
"grad_norm": 1.239081728465614, |
|
"learning_rate": 9.341464679750669e-06, |
|
"loss": 1.1676, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.7082152974504249, |
|
"grad_norm": 1.2250609811941313, |
|
"learning_rate": 9.330127018922195e-06, |
|
"loss": 1.1549, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.7138810198300283, |
|
"grad_norm": 1.0079463118549998, |
|
"learning_rate": 9.318699594248192e-06, |
|
"loss": 1.0825, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.7195467422096318, |
|
"grad_norm": 1.1822482076914111, |
|
"learning_rate": 9.307182642620001e-06, |
|
"loss": 1.1699, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.7252124645892352, |
|
"grad_norm": 1.192585782341377, |
|
"learning_rate": 9.295576402784858e-06, |
|
"loss": 1.1864, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.7308781869688386, |
|
"grad_norm": 1.1793876225801334, |
|
"learning_rate": 9.283881115340957e-06, |
|
"loss": 1.1592, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.7365439093484419, |
|
"grad_norm": 1.4328581990598621, |
|
"learning_rate": 9.272097022732444e-06, |
|
"loss": 1.1264, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7422096317280453, |
|
"grad_norm": 1.4063460821599099, |
|
"learning_rate": 9.260224369244414e-06, |
|
"loss": 1.1582, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.7478753541076487, |
|
"grad_norm": 1.3928551806399836, |
|
"learning_rate": 9.248263400997826e-06, |
|
"loss": 1.1036, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.7535410764872521, |
|
"grad_norm": 1.0443812793505807, |
|
"learning_rate": 9.236214365944418e-06, |
|
"loss": 1.1809, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.7592067988668555, |
|
"grad_norm": 7.4865021772015234, |
|
"learning_rate": 9.224077513861556e-06, |
|
"loss": 1.1432, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.7648725212464589, |
|
"grad_norm": 4.687727924279942, |
|
"learning_rate": 9.211853096347059e-06, |
|
"loss": 1.1436, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7705382436260623, |
|
"grad_norm": 1.7813513129483227, |
|
"learning_rate": 9.199541366813984e-06, |
|
"loss": 1.2003, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.7762039660056658, |
|
"grad_norm": 1.1574866856711652, |
|
"learning_rate": 9.18714258048537e-06, |
|
"loss": 1.0949, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.7818696883852692, |
|
"grad_norm": 1.5923532949818175, |
|
"learning_rate": 9.174656994388957e-06, |
|
"loss": 1.1312, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.7875354107648725, |
|
"grad_norm": 1.4090405021331738, |
|
"learning_rate": 9.16208486735184e-06, |
|
"loss": 1.1371, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.7932011331444759, |
|
"grad_norm": 1.1066958591085674, |
|
"learning_rate": 9.149426459995127e-06, |
|
"loss": 1.1892, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7988668555240793, |
|
"grad_norm": 1.3806489023187403, |
|
"learning_rate": 9.136682034728508e-06, |
|
"loss": 1.1203, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.8045325779036827, |
|
"grad_norm": 1.4492241915768966, |
|
"learning_rate": 9.123851855744842e-06, |
|
"loss": 1.1606, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.8101983002832861, |
|
"grad_norm": 1.2880006738591805, |
|
"learning_rate": 9.110936189014668e-06, |
|
"loss": 1.1363, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.8158640226628895, |
|
"grad_norm": 1.4252322295071467, |
|
"learning_rate": 9.097935302280682e-06, |
|
"loss": 1.1299, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.8215297450424929, |
|
"grad_norm": 1.1051239821774794, |
|
"learning_rate": 9.08484946505221e-06, |
|
"loss": 1.1855, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.8271954674220963, |
|
"grad_norm": 1.1582328438262173, |
|
"learning_rate": 9.0716789485996e-06, |
|
"loss": 1.1173, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.8328611898016998, |
|
"grad_norm": 1.1514645858243073, |
|
"learning_rate": 9.058424025948609e-06, |
|
"loss": 1.0758, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.8385269121813032, |
|
"grad_norm": 1.9099023373890425, |
|
"learning_rate": 9.045084971874738e-06, |
|
"loss": 1.1502, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.8441926345609065, |
|
"grad_norm": 1.4883203974156398, |
|
"learning_rate": 9.03166206289754e-06, |
|
"loss": 1.1244, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.8498583569405099, |
|
"grad_norm": 1.2439793782301596, |
|
"learning_rate": 9.018155577274891e-06, |
|
"loss": 1.1188, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8555240793201133, |
|
"grad_norm": 0.9842320904106822, |
|
"learning_rate": 9.004565794997209e-06, |
|
"loss": 1.0915, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.8611898016997167, |
|
"grad_norm": 1.1256206443075392, |
|
"learning_rate": 8.990892997781661e-06, |
|
"loss": 1.1418, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.8668555240793201, |
|
"grad_norm": 1.4668868690697237, |
|
"learning_rate": 8.977137469066321e-06, |
|
"loss": 1.1439, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.8725212464589235, |
|
"grad_norm": 1.0357963651071045, |
|
"learning_rate": 8.963299494004292e-06, |
|
"loss": 1.1489, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.8781869688385269, |
|
"grad_norm": 1.2279259538562963, |
|
"learning_rate": 8.949379359457795e-06, |
|
"loss": 1.148, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.8838526912181303, |
|
"grad_norm": 1.279164021341607, |
|
"learning_rate": 8.935377353992222e-06, |
|
"loss": 1.1291, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.8895184135977338, |
|
"grad_norm": 1.0117872914387078, |
|
"learning_rate": 8.921293767870157e-06, |
|
"loss": 1.1029, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.8951841359773371, |
|
"grad_norm": 1.0385739682984056, |
|
"learning_rate": 8.907128893045359e-06, |
|
"loss": 1.1378, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.9008498583569405, |
|
"grad_norm": 0.9862798736503189, |
|
"learning_rate": 8.892883023156703e-06, |
|
"loss": 1.1247, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.9065155807365439, |
|
"grad_norm": 1.0052226052209343, |
|
"learning_rate": 8.8785564535221e-06, |
|
"loss": 1.1396, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9121813031161473, |
|
"grad_norm": 1.0025191403649947, |
|
"learning_rate": 8.86414948113237e-06, |
|
"loss": 1.1072, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.9178470254957507, |
|
"grad_norm": 1.0190829556170014, |
|
"learning_rate": 8.849662404645097e-06, |
|
"loss": 1.0692, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9235127478753541, |
|
"grad_norm": 1.065083676666634, |
|
"learning_rate": 8.835095524378413e-06, |
|
"loss": 1.0839, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.9291784702549575, |
|
"grad_norm": 2.75250829153078, |
|
"learning_rate": 8.820449142304805e-06, |
|
"loss": 1.0976, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.9348441926345609, |
|
"grad_norm": 1.11457337735503, |
|
"learning_rate": 8.805723562044825e-06, |
|
"loss": 1.1383, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.9405099150141643, |
|
"grad_norm": 1.223823647150824, |
|
"learning_rate": 8.790919088860815e-06, |
|
"loss": 1.1331, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.9461756373937678, |
|
"grad_norm": 0.9688685956053592, |
|
"learning_rate": 8.776036029650573e-06, |
|
"loss": 1.1168, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.9518413597733711, |
|
"grad_norm": 1.0407006447195224, |
|
"learning_rate": 8.76107469294099e-06, |
|
"loss": 1.1353, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.9575070821529745, |
|
"grad_norm": 1.477166466547593, |
|
"learning_rate": 8.746035388881655e-06, |
|
"loss": 1.146, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.9631728045325779, |
|
"grad_norm": 1.1923873158431406, |
|
"learning_rate": 8.730918429238429e-06, |
|
"loss": 1.1513, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.9688385269121813, |
|
"grad_norm": 1.2104600261128056, |
|
"learning_rate": 8.715724127386971e-06, |
|
"loss": 1.0846, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.9745042492917847, |
|
"grad_norm": 1.026649259168152, |
|
"learning_rate": 8.70045279830626e-06, |
|
"loss": 1.0987, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.9801699716713881, |
|
"grad_norm": 1.1324270741577538, |
|
"learning_rate": 8.685104758572047e-06, |
|
"loss": 1.1884, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.9858356940509915, |
|
"grad_norm": 1.1264630127825281, |
|
"learning_rate": 8.669680326350303e-06, |
|
"loss": 1.1505, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.9915014164305949, |
|
"grad_norm": 1.0463584307162723, |
|
"learning_rate": 8.65417982139062e-06, |
|
"loss": 1.1194, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.9971671388101983, |
|
"grad_norm": 1.1195551791308074, |
|
"learning_rate": 8.638603565019588e-06, |
|
"loss": 1.1228, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.0113636363636365, |
|
"grad_norm": 1.7869848977800533, |
|
"learning_rate": 8.622951880134122e-06, |
|
"loss": 1.0017, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.0170454545454546, |
|
"grad_norm": 1.8967548711721598, |
|
"learning_rate": 8.60722509119478e-06, |
|
"loss": 1.0646, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.0227272727272727, |
|
"grad_norm": 2.7719840532515856, |
|
"learning_rate": 8.59142352421903e-06, |
|
"loss": 0.9887, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.0284090909090908, |
|
"grad_norm": 1.8480101734746917, |
|
"learning_rate": 8.575547506774498e-06, |
|
"loss": 1.0262, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0340909090909092, |
|
"grad_norm": 1.4999444026158775, |
|
"learning_rate": 8.559597367972168e-06, |
|
"loss": 0.9829, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.0397727272727273, |
|
"grad_norm": 1.38809085421665, |
|
"learning_rate": 8.543573438459573e-06, |
|
"loss": 1.0144, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.0454545454545454, |
|
"grad_norm": 1.2624399470463477, |
|
"learning_rate": 8.527476050413922e-06, |
|
"loss": 0.9867, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.0511363636363635, |
|
"grad_norm": 7.342610894443344, |
|
"learning_rate": 8.511305537535238e-06, |
|
"loss": 0.9866, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.0568181818181819, |
|
"grad_norm": 8.705248219538825, |
|
"learning_rate": 8.49506223503941e-06, |
|
"loss": 0.9728, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.0625, |
|
"grad_norm": 2.0263962989089936, |
|
"learning_rate": 8.47874647965128e-06, |
|
"loss": 0.9965, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.0681818181818181, |
|
"grad_norm": 2.13351438929688, |
|
"learning_rate": 8.462358609597629e-06, |
|
"loss": 1.0024, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.0738636363636365, |
|
"grad_norm": 2.0005753741817736, |
|
"learning_rate": 8.445898964600188e-06, |
|
"loss": 0.993, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.0795454545454546, |
|
"grad_norm": 2.084050032615475, |
|
"learning_rate": 8.429367885868582e-06, |
|
"loss": 0.9958, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0852272727272727, |
|
"grad_norm": 1.7516330808766072, |
|
"learning_rate": 8.412765716093273e-06, |
|
"loss": 1.0554, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0909090909090908, |
|
"grad_norm": 1.2861019981619892, |
|
"learning_rate": 8.396092799438429e-06, |
|
"loss": 1.013, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 1.0965909090909092, |
|
"grad_norm": 1.4381225932886976, |
|
"learning_rate": 8.379349481534822e-06, |
|
"loss": 0.9797, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.1022727272727273, |
|
"grad_norm": 1.8623594079891328, |
|
"learning_rate": 8.362536109472637e-06, |
|
"loss": 1.0018, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 1.1079545454545454, |
|
"grad_norm": 1.5115381108478676, |
|
"learning_rate": 8.345653031794292e-06, |
|
"loss": 1.016, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.1136363636363635, |
|
"grad_norm": 1.193026650866575, |
|
"learning_rate": 8.328700598487203e-06, |
|
"loss": 0.9977, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.1193181818181819, |
|
"grad_norm": 1.080840404605079, |
|
"learning_rate": 8.31167916097654e-06, |
|
"loss": 0.9982, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.125, |
|
"grad_norm": 1.244418182887263, |
|
"learning_rate": 8.294589072117925e-06, |
|
"loss": 1.0206, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 1.1306818181818181, |
|
"grad_norm": 1.054116651622593, |
|
"learning_rate": 8.277430686190137e-06, |
|
"loss": 0.9932, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 1.6708346020909142, |
|
"learning_rate": 8.260204358887753e-06, |
|
"loss": 0.9867, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 1.1420454545454546, |
|
"grad_norm": 1.764380671950815, |
|
"learning_rate": 8.24291044731378e-06, |
|
"loss": 1.0255, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.1477272727272727, |
|
"grad_norm": 1.4610852940462264, |
|
"learning_rate": 8.225549309972256e-06, |
|
"loss": 1.0016, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.1534090909090908, |
|
"grad_norm": 1.3465974910520928, |
|
"learning_rate": 8.208121306760806e-06, |
|
"loss": 0.9942, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.1590909090909092, |
|
"grad_norm": 3.407109598217383, |
|
"learning_rate": 8.190626798963198e-06, |
|
"loss": 0.9595, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.1647727272727273, |
|
"grad_norm": 3.4569449045424228, |
|
"learning_rate": 8.173066149241839e-06, |
|
"loss": 0.9679, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.1704545454545454, |
|
"grad_norm": 3.5722389574790623, |
|
"learning_rate": 8.155439721630265e-06, |
|
"loss": 1.0112, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.1761363636363638, |
|
"grad_norm": 1.7368368324960894, |
|
"learning_rate": 8.137747881525593e-06, |
|
"loss": 0.9658, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.1818181818181819, |
|
"grad_norm": 3.5425491105943365, |
|
"learning_rate": 8.119990995680942e-06, |
|
"loss": 1.0097, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.1875, |
|
"grad_norm": 4.277519958399436, |
|
"learning_rate": 8.102169432197842e-06, |
|
"loss": 1.0525, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.1931818181818181, |
|
"grad_norm": 1.5253776819790414, |
|
"learning_rate": 8.084283560518584e-06, |
|
"loss": 1.0257, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 1.1988636363636362, |
|
"grad_norm": 2.393941181872517, |
|
"learning_rate": 8.066333751418582e-06, |
|
"loss": 0.9519, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.2045454545454546, |
|
"grad_norm": 1.8648154402777406, |
|
"learning_rate": 8.048320376998675e-06, |
|
"loss": 1.0314, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 1.2102272727272727, |
|
"grad_norm": 1.1560926115738988, |
|
"learning_rate": 8.030243810677408e-06, |
|
"loss": 1.0079, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.2159090909090908, |
|
"grad_norm": 1.9861708806007312, |
|
"learning_rate": 8.012104427183313e-06, |
|
"loss": 0.9712, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.2215909090909092, |
|
"grad_norm": 1.6176603802315128, |
|
"learning_rate": 7.993902602547113e-06, |
|
"loss": 1.0604, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.2272727272727273, |
|
"grad_norm": 1.206136483858858, |
|
"learning_rate": 7.97563871409395e-06, |
|
"loss": 0.9968, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.2329545454545454, |
|
"grad_norm": 1.0849650106469113, |
|
"learning_rate": 7.957313140435545e-06, |
|
"loss": 1.0013, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.2386363636363638, |
|
"grad_norm": 1.2530592258144626, |
|
"learning_rate": 7.938926261462366e-06, |
|
"loss": 1.0392, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.2443181818181819, |
|
"grad_norm": 1.4528013728950318, |
|
"learning_rate": 7.920478458335738e-06, |
|
"loss": 0.945, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 1.1182010469150763, |
|
"learning_rate": 7.901970113479956e-06, |
|
"loss": 0.9755, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.2556818181818181, |
|
"grad_norm": 1.274158214216111, |
|
"learning_rate": 7.883401610574338e-06, |
|
"loss": 0.9827, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.2613636363636362, |
|
"grad_norm": 1.4460645426911298, |
|
"learning_rate": 7.86477333454529e-06, |
|
"loss": 1.0233, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 1.2670454545454546, |
|
"grad_norm": 1.004043430975716, |
|
"learning_rate": 7.84608567155832e-06, |
|
"loss": 0.988, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.2727272727272727, |
|
"grad_norm": 1.1277928768546195, |
|
"learning_rate": 7.82733900901003e-06, |
|
"loss": 1.0092, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 1.2784090909090908, |
|
"grad_norm": 1.30174465678015, |
|
"learning_rate": 7.808533735520087e-06, |
|
"loss": 1.0023, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.2840909090909092, |
|
"grad_norm": 1.155122280361969, |
|
"learning_rate": 7.789670240923169e-06, |
|
"loss": 0.9938, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.2897727272727273, |
|
"grad_norm": 1.1535920929699675, |
|
"learning_rate": 7.770748916260875e-06, |
|
"loss": 1.0215, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.2954545454545454, |
|
"grad_norm": 1.7495637702269113, |
|
"learning_rate": 7.751770153773635e-06, |
|
"loss": 0.9776, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 1.3011363636363638, |
|
"grad_norm": 1.2776922576240242, |
|
"learning_rate": 7.732734346892561e-06, |
|
"loss": 0.9716, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.3068181818181819, |
|
"grad_norm": 1.3172404492877499, |
|
"learning_rate": 7.71364189023131e-06, |
|
"loss": 0.9928, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 1.3125, |
|
"grad_norm": 1.0320305867343866, |
|
"learning_rate": 7.69449317957788e-06, |
|
"loss": 0.9544, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.3181818181818181, |
|
"grad_norm": 0.9917633137560159, |
|
"learning_rate": 7.675288611886423e-06, |
|
"loss": 0.9762, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.3238636363636362, |
|
"grad_norm": 0.8750459875550817, |
|
"learning_rate": 7.656028585269017e-06, |
|
"loss": 0.9649, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.3295454545454546, |
|
"grad_norm": 1.0172245413205394, |
|
"learning_rate": 7.636713498987405e-06, |
|
"loss": 0.9915, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 1.3352272727272727, |
|
"grad_norm": 1.1026610095660114, |
|
"learning_rate": 7.617343753444714e-06, |
|
"loss": 0.9167, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.3409090909090908, |
|
"grad_norm": 0.9838674494365538, |
|
"learning_rate": 7.597919750177168e-06, |
|
"loss": 0.9978, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.3465909090909092, |
|
"grad_norm": 0.9922575875228704, |
|
"learning_rate": 7.5784418918457605e-06, |
|
"loss": 1.0052, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.3522727272727273, |
|
"grad_norm": 0.9776223871792626, |
|
"learning_rate": 7.5589105822278944e-06, |
|
"loss": 1.0096, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.3579545454545454, |
|
"grad_norm": 1.4258305295766374, |
|
"learning_rate": 7.539326226209032e-06, |
|
"loss": 1.0458, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 1.0015058561164187, |
|
"learning_rate": 7.519689229774282e-06, |
|
"loss": 1.0248, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 1.3693181818181819, |
|
"grad_norm": 1.0082049852889665, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.9766, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.375, |
|
"grad_norm": 0.904307095617801, |
|
"learning_rate": 7.4802589450453415e-06, |
|
"loss": 1.029, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 1.3806818181818181, |
|
"grad_norm": 0.9709949750288794, |
|
"learning_rate": 7.4604664741437975e-06, |
|
"loss": 0.9803, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.3863636363636362, |
|
"grad_norm": 0.9137049440782995, |
|
"learning_rate": 7.440622997594718e-06, |
|
"loss": 0.9838, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.3920454545454546, |
|
"grad_norm": 0.955522616879317, |
|
"learning_rate": 7.420728926754803e-06, |
|
"loss": 0.9841, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.3977272727272727, |
|
"grad_norm": 0.8924545271105511, |
|
"learning_rate": 7.400784674029579e-06, |
|
"loss": 0.9747, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.4034090909090908, |
|
"grad_norm": 0.9275527221675671, |
|
"learning_rate": 7.380790652864842e-06, |
|
"loss": 1.0203, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.4090909090909092, |
|
"grad_norm": 0.9480980891308645, |
|
"learning_rate": 7.360747277738094e-06, |
|
"loss": 0.9923, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 1.4147727272727273, |
|
"grad_norm": 0.8427849664059336, |
|
"learning_rate": 7.340654964149947e-06, |
|
"loss": 0.9806, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.4204545454545454, |
|
"grad_norm": 0.9076953250803492, |
|
"learning_rate": 7.320514128615511e-06, |
|
"loss": 0.9982, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.4261363636363638, |
|
"grad_norm": 1.0540250139165377, |
|
"learning_rate": 7.300325188655762e-06, |
|
"loss": 0.9902, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.4318181818181819, |
|
"grad_norm": 0.9954503040475974, |
|
"learning_rate": 7.280088562788879e-06, |
|
"loss": 0.9809, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 1.4375, |
|
"grad_norm": 0.9967393104089797, |
|
"learning_rate": 7.259804670521579e-06, |
|
"loss": 1.0, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.4431818181818181, |
|
"grad_norm": 0.9891797210154472, |
|
"learning_rate": 7.2394739323404105e-06, |
|
"loss": 1.0005, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 1.4488636363636362, |
|
"grad_norm": 1.1178308003268749, |
|
"learning_rate": 7.219096769703045e-06, |
|
"loss": 0.9868, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.4545454545454546, |
|
"grad_norm": 1.0000809761609377, |
|
"learning_rate": 7.198673605029529e-06, |
|
"loss": 0.9648, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.4602272727272727, |
|
"grad_norm": 0.9396228245111997, |
|
"learning_rate": 7.178204861693546e-06, |
|
"loss": 1.0009, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.4659090909090908, |
|
"grad_norm": 1.055214770002229, |
|
"learning_rate": 7.15769096401362e-06, |
|
"loss": 0.9478, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 1.4715909090909092, |
|
"grad_norm": 1.0750160280057304, |
|
"learning_rate": 7.137132337244329e-06, |
|
"loss": 0.958, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.4772727272727273, |
|
"grad_norm": 1.0648150711699151, |
|
"learning_rate": 7.116529407567489e-06, |
|
"loss": 0.9828, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.4829545454545454, |
|
"grad_norm": 1.1192077304577122, |
|
"learning_rate": 7.095882602083321e-06, |
|
"loss": 0.9707, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.4886363636363638, |
|
"grad_norm": 1.1092309283046025, |
|
"learning_rate": 7.075192348801591e-06, |
|
"loss": 0.9842, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.4943181818181819, |
|
"grad_norm": 1.0585087928308756, |
|
"learning_rate": 7.054459076632742e-06, |
|
"loss": 1.0636, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 1.041991357364786, |
|
"learning_rate": 7.033683215379002e-06, |
|
"loss": 0.9753, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 1.5056818181818183, |
|
"grad_norm": 0.9720414152268064, |
|
"learning_rate": 7.012865195725473e-06, |
|
"loss": 0.9916, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.5113636363636362, |
|
"grad_norm": 1.1265716150738212, |
|
"learning_rate": 6.9920054492312086e-06, |
|
"loss": 1.0678, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.5170454545454546, |
|
"grad_norm": 1.0711823881169122, |
|
"learning_rate": 6.971104408320253e-06, |
|
"loss": 0.9776, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.5227272727272727, |
|
"grad_norm": 1.1256078273217827, |
|
"learning_rate": 6.950162506272697e-06, |
|
"loss": 0.9904, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.5284090909090908, |
|
"grad_norm": 0.9811471547098307, |
|
"learning_rate": 6.9291801772156775e-06, |
|
"loss": 0.987, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.5340909090909092, |
|
"grad_norm": 1.205853115403329, |
|
"learning_rate": 6.9081578561143924e-06, |
|
"loss": 0.9352, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 1.5397727272727273, |
|
"grad_norm": 0.9564252171879485, |
|
"learning_rate": 6.887095978763072e-06, |
|
"loss": 1.0099, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.5454545454545454, |
|
"grad_norm": 0.9739638011221726, |
|
"learning_rate": 6.865994981775958e-06, |
|
"loss": 0.9186, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 1.5511363636363638, |
|
"grad_norm": 1.3776679228140132, |
|
"learning_rate": 6.844855302578236e-06, |
|
"loss": 1.0077, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.5568181818181817, |
|
"grad_norm": 1.0125445825014543, |
|
"learning_rate": 6.823677379396984e-06, |
|
"loss": 0.9993, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"grad_norm": 0.9892499359106408, |
|
"learning_rate": 6.802461651252073e-06, |
|
"loss": 0.9571, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.5681818181818183, |
|
"grad_norm": 1.0831674501266864, |
|
"learning_rate": 6.781208557947085e-06, |
|
"loss": 1.0061, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.5738636363636362, |
|
"grad_norm": 0.9356751500366064, |
|
"learning_rate": 6.759918540060173e-06, |
|
"loss": 0.979, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.5795454545454546, |
|
"grad_norm": 1.0557115003350075, |
|
"learning_rate": 6.738592038934946e-06, |
|
"loss": 0.9961, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 1.5852272727272727, |
|
"grad_norm": 1.2599637679261655, |
|
"learning_rate": 6.717229496671307e-06, |
|
"loss": 0.9753, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.5909090909090908, |
|
"grad_norm": 1.0507134323091725, |
|
"learning_rate": 6.6958313561163046e-06, |
|
"loss": 0.9425, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.5965909090909092, |
|
"grad_norm": 0.9631905231298211, |
|
"learning_rate": 6.674398060854931e-06, |
|
"loss": 1.055, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.6022727272727273, |
|
"grad_norm": 0.9131560827453628, |
|
"learning_rate": 6.652930055200948e-06, |
|
"loss": 0.9929, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 1.6079545454545454, |
|
"grad_norm": 0.9138134537225251, |
|
"learning_rate": 6.631427784187658e-06, |
|
"loss": 0.952, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.6136363636363638, |
|
"grad_norm": 0.9436608998471452, |
|
"learning_rate": 6.609891693558692e-06, |
|
"loss": 1.0371, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 1.6193181818181817, |
|
"grad_norm": 1.077730549555469, |
|
"learning_rate": 6.588322229758764e-06, |
|
"loss": 1.0231, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.625, |
|
"grad_norm": 0.8542525239275349, |
|
"learning_rate": 6.566719839924412e-06, |
|
"loss": 0.9908, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.6306818181818183, |
|
"grad_norm": 0.9390889918397101, |
|
"learning_rate": 6.545084971874738e-06, |
|
"loss": 0.9965, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.6363636363636362, |
|
"grad_norm": 1.3403721698995363, |
|
"learning_rate": 6.523418074102117e-06, |
|
"loss": 0.9865, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.6420454545454546, |
|
"grad_norm": 0.9787534693003979, |
|
"learning_rate": 6.501719595762903e-06, |
|
"loss": 0.995, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.6477272727272727, |
|
"grad_norm": 0.8866152592349634, |
|
"learning_rate": 6.479989986668118e-06, |
|
"loss": 0.9846, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 1.6534090909090908, |
|
"grad_norm": 0.8915138418235523, |
|
"learning_rate": 6.458229697274125e-06, |
|
"loss": 1.0373, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.6590909090909092, |
|
"grad_norm": 0.9633872591030624, |
|
"learning_rate": 6.436439178673296e-06, |
|
"loss": 0.9864, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.6647727272727273, |
|
"grad_norm": 0.9836814915125117, |
|
"learning_rate": 6.41461888258465e-06, |
|
"loss": 0.9555, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.6704545454545454, |
|
"grad_norm": 0.9708188501717393, |
|
"learning_rate": 6.392769261344502e-06, |
|
"loss": 0.9448, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 1.6761363636363638, |
|
"grad_norm": 0.8777800692748914, |
|
"learning_rate": 6.370890767897078e-06, |
|
"loss": 1.0044, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.6818181818181817, |
|
"grad_norm": 1.0244121250661828, |
|
"learning_rate": 6.348983855785122e-06, |
|
"loss": 0.9802, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.6875, |
|
"grad_norm": 1.0027302545771752, |
|
"learning_rate": 6.3270489791405055e-06, |
|
"loss": 0.9562, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.6931818181818183, |
|
"grad_norm": 1.7051161806513946, |
|
"learning_rate": 6.305086592674802e-06, |
|
"loss": 0.9892, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.6988636363636362, |
|
"grad_norm": 1.12580729447642, |
|
"learning_rate": 6.283097151669869e-06, |
|
"loss": 0.9821, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.7045454545454546, |
|
"grad_norm": 0.9839470381373491, |
|
"learning_rate": 6.261081111968403e-06, |
|
"loss": 0.9916, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 1.7102272727272727, |
|
"grad_norm": 1.0613072641616672, |
|
"learning_rate": 6.2390389299645e-06, |
|
"loss": 0.9783, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7159090909090908, |
|
"grad_norm": 0.9792881716793711, |
|
"learning_rate": 6.216971062594179e-06, |
|
"loss": 1.0007, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 1.7215909090909092, |
|
"grad_norm": 1.1054016241161089, |
|
"learning_rate": 6.1948779673259256e-06, |
|
"loss": 1.0079, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 1.7272727272727273, |
|
"grad_norm": 1.2013950643084332, |
|
"learning_rate": 6.172760102151195e-06, |
|
"loss": 1.0137, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.7329545454545454, |
|
"grad_norm": 1.0486842583129228, |
|
"learning_rate": 6.1506179255749335e-06, |
|
"loss": 0.9611, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 1.7386363636363638, |
|
"grad_norm": 0.9879084512426718, |
|
"learning_rate": 6.128451896606054e-06, |
|
"loss": 0.987, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.7443181818181817, |
|
"grad_norm": 0.8702171126549813, |
|
"learning_rate": 6.106262474747939e-06, |
|
"loss": 1.0354, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.9479994120475482, |
|
"learning_rate": 6.084050119988905e-06, |
|
"loss": 0.9687, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 1.7556818181818183, |
|
"grad_norm": 0.841865035975423, |
|
"learning_rate": 6.061815292792666e-06, |
|
"loss": 0.9692, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 1.7613636363636362, |
|
"grad_norm": 1.1986107322286728, |
|
"learning_rate": 6.039558454088796e-06, |
|
"loss": 0.9869, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.7670454545454546, |
|
"grad_norm": 0.9606223972077408, |
|
"learning_rate": 6.0172800652631706e-06, |
|
"loss": 1.0164, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.7727272727272727, |
|
"grad_norm": 0.8967627253652938, |
|
"learning_rate": 5.994980588148391e-06, |
|
"loss": 1.043, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 1.7784090909090908, |
|
"grad_norm": 0.7941576266062421, |
|
"learning_rate": 5.972660485014231e-06, |
|
"loss": 0.9485, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.7840909090909092, |
|
"grad_norm": 1.0936763123716517, |
|
"learning_rate": 5.950320218558037e-06, |
|
"loss": 0.9886, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 1.7897727272727273, |
|
"grad_norm": 1.0795280588915757, |
|
"learning_rate": 5.927960251895146e-06, |
|
"loss": 1.0174, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 1.7954545454545454, |
|
"grad_norm": 0.8880700856278866, |
|
"learning_rate": 5.905581048549279e-06, |
|
"loss": 0.9825, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.8011363636363638, |
|
"grad_norm": 0.8742464433982793, |
|
"learning_rate": 5.883183072442938e-06, |
|
"loss": 0.9392, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 1.8068181818181817, |
|
"grad_norm": 0.9015845437433646, |
|
"learning_rate": 5.860766787887781e-06, |
|
"loss": 0.9507, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.8125, |
|
"grad_norm": 0.8777902350206828, |
|
"learning_rate": 5.838332659575005e-06, |
|
"loss": 1.0214, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 0.9432419707404883, |
|
"learning_rate": 5.815881152565712e-06, |
|
"loss": 0.9913, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 1.8238636363636362, |
|
"grad_norm": 1.554034736388586, |
|
"learning_rate": 5.793412732281258e-06, |
|
"loss": 0.9762, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.8295454545454546, |
|
"grad_norm": 0.9581038943273897, |
|
"learning_rate": 5.7709278644936164e-06, |
|
"loss": 0.9848, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.8352272727272727, |
|
"grad_norm": 0.8898637306384684, |
|
"learning_rate": 5.7484270153157215e-06, |
|
"loss": 0.9396, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.8409090909090908, |
|
"grad_norm": 1.0203919143753812, |
|
"learning_rate": 5.725910651191798e-06, |
|
"loss": 1.0037, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 1.8465909090909092, |
|
"grad_norm": 0.8907537657379099, |
|
"learning_rate": 5.703379238887703e-06, |
|
"loss": 0.9609, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.8522727272727273, |
|
"grad_norm": 1.114214216754724, |
|
"learning_rate": 5.680833245481234e-06, |
|
"loss": 0.9412, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.8579545454545454, |
|
"grad_norm": 1.0249614863719094, |
|
"learning_rate": 5.6582731383524625e-06, |
|
"loss": 1.0452, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 1.8636363636363638, |
|
"grad_norm": 0.9715196988270898, |
|
"learning_rate": 5.63569938517404e-06, |
|
"loss": 1.0453, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.8693181818181817, |
|
"grad_norm": 1.1613903786334339, |
|
"learning_rate": 5.613112453901493e-06, |
|
"loss": 0.9735, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 1.059608988677026, |
|
"learning_rate": 5.590512812763541e-06, |
|
"loss": 0.9618, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 1.8806818181818183, |
|
"grad_norm": 1.0952964220643884, |
|
"learning_rate": 5.567900930252375e-06, |
|
"loss": 0.9793, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.8863636363636362, |
|
"grad_norm": 1.014146750998599, |
|
"learning_rate": 5.5452772751139496e-06, |
|
"loss": 0.9863, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 1.8920454545454546, |
|
"grad_norm": 0.9663339556094782, |
|
"learning_rate": 5.522642316338268e-06, |
|
"loss": 1.0089, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 1.8977272727272727, |
|
"grad_norm": 0.9872369642699137, |
|
"learning_rate": 5.49999652314966e-06, |
|
"loss": 1.0105, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.9034090909090908, |
|
"grad_norm": 0.9388637738282897, |
|
"learning_rate": 5.477340364997051e-06, |
|
"loss": 0.9993, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 1.9090909090909092, |
|
"grad_norm": 1.005111659331097, |
|
"learning_rate": 5.454674311544236e-06, |
|
"loss": 1.024, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.9147727272727273, |
|
"grad_norm": 1.1189249784542552, |
|
"learning_rate": 5.431998832660136e-06, |
|
"loss": 0.9167, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.9204545454545454, |
|
"grad_norm": 0.8754985353482484, |
|
"learning_rate": 5.409314398409067e-06, |
|
"loss": 0.9509, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 1.9261363636363638, |
|
"grad_norm": 1.0077105144422567, |
|
"learning_rate": 5.386621479040985e-06, |
|
"loss": 0.9802, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 1.9318181818181817, |
|
"grad_norm": 1.014077284312571, |
|
"learning_rate": 5.363920544981749e-06, |
|
"loss": 1.0046, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.9375, |
|
"grad_norm": 0.8813929725147835, |
|
"learning_rate": 5.341212066823356e-06, |
|
"loss": 1.006, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.9431818181818183, |
|
"grad_norm": 0.9749444900176537, |
|
"learning_rate": 5.3184965153142e-06, |
|
"loss": 0.987, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 1.9488636363636362, |
|
"grad_norm": 0.9433156213620226, |
|
"learning_rate": 5.295774361349299e-06, |
|
"loss": 0.9846, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.9545454545454546, |
|
"grad_norm": 0.9268456057648533, |
|
"learning_rate": 5.27304607596055e-06, |
|
"loss": 0.9845, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 1.9602272727272727, |
|
"grad_norm": 0.8554873129583374, |
|
"learning_rate": 5.250312130306946e-06, |
|
"loss": 0.9835, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 1.9659090909090908, |
|
"grad_norm": 1.018982780208351, |
|
"learning_rate": 5.227572995664819e-06, |
|
"loss": 0.9825, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.9715909090909092, |
|
"grad_norm": 0.9391997048223797, |
|
"learning_rate": 5.204829143418072e-06, |
|
"loss": 1.0199, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 1.9772727272727273, |
|
"grad_norm": 1.0146418881124983, |
|
"learning_rate": 5.182081045048404e-06, |
|
"loss": 1.0376, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 1.9829545454545454, |
|
"grad_norm": 1.0574567491158355, |
|
"learning_rate": 5.159329172125533e-06, |
|
"loss": 0.9434, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.9886363636363638, |
|
"grad_norm": 0.8123284335215641, |
|
"learning_rate": 5.136573996297431e-06, |
|
"loss": 0.9802, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 1.9943181818181817, |
|
"grad_norm": 0.9618851741092689, |
|
"learning_rate": 5.113815989280528e-06, |
|
"loss": 1.0419, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.8632945643175781, |
|
"learning_rate": 5.091055622849958e-06, |
|
"loss": 0.976, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 2.005698005698006, |
|
"grad_norm": 1.6043377134817856, |
|
"learning_rate": 5.068293368829755e-06, |
|
"loss": 0.8913, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.011396011396011, |
|
"grad_norm": 1.3331364304662667, |
|
"learning_rate": 5.045529699083092e-06, |
|
"loss": 0.8424, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 2.017094017094017, |
|
"grad_norm": 1.100343372994173, |
|
"learning_rate": 5.022765085502478e-06, |
|
"loss": 0.8664, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.022792022792023, |
|
"grad_norm": 1.2647408619538267, |
|
"learning_rate": 5e-06, |
|
"loss": 0.8975, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.0284900284900287, |
|
"grad_norm": 1.3692030374819484, |
|
"learning_rate": 4.977234914497522e-06, |
|
"loss": 0.8659, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 2.034188034188034, |
|
"grad_norm": 1.04165152843705, |
|
"learning_rate": 4.9544703009169115e-06, |
|
"loss": 0.8465, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 2.03988603988604, |
|
"grad_norm": 1.069447973622135, |
|
"learning_rate": 4.931706631170246e-06, |
|
"loss": 0.8254, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 2.0455840455840457, |
|
"grad_norm": 1.1882943942044963, |
|
"learning_rate": 4.9089443771500435e-06, |
|
"loss": 0.8759, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 2.051282051282051, |
|
"grad_norm": 0.9445235142025882, |
|
"learning_rate": 4.886184010719472e-06, |
|
"loss": 0.8761, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.056980056980057, |
|
"grad_norm": 0.9617221724763185, |
|
"learning_rate": 4.863426003702572e-06, |
|
"loss": 0.822, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 2.0626780626780628, |
|
"grad_norm": 0.9901232814378744, |
|
"learning_rate": 4.840670827874468e-06, |
|
"loss": 0.8423, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.0683760683760686, |
|
"grad_norm": 0.8710776051974528, |
|
"learning_rate": 4.817918954951598e-06, |
|
"loss": 0.8415, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 2.074074074074074, |
|
"grad_norm": 1.2482792899259578, |
|
"learning_rate": 4.795170856581929e-06, |
|
"loss": 0.8921, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.07977207977208, |
|
"grad_norm": 1.1169049347453446, |
|
"learning_rate": 4.772427004335183e-06, |
|
"loss": 0.8731, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.0854700854700856, |
|
"grad_norm": 1.0557231424552356, |
|
"learning_rate": 4.749687869693056e-06, |
|
"loss": 0.8622, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.091168091168091, |
|
"grad_norm": 0.9181343036612701, |
|
"learning_rate": 4.7269539240394505e-06, |
|
"loss": 0.8653, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 2.096866096866097, |
|
"grad_norm": 0.9543401797100639, |
|
"learning_rate": 4.7042256386507e-06, |
|
"loss": 0.8419, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 2.1025641025641026, |
|
"grad_norm": 1.192131842860604, |
|
"learning_rate": 4.681503484685803e-06, |
|
"loss": 0.9153, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 2.1082621082621085, |
|
"grad_norm": 0.9650701175336839, |
|
"learning_rate": 4.6587879331766465e-06, |
|
"loss": 0.8422, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.113960113960114, |
|
"grad_norm": 0.9343115020962703, |
|
"learning_rate": 4.636079455018253e-06, |
|
"loss": 0.8433, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 2.1196581196581197, |
|
"grad_norm": 0.9058357605337869, |
|
"learning_rate": 4.613378520959016e-06, |
|
"loss": 0.8587, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.1253561253561255, |
|
"grad_norm": 0.9303289966062062, |
|
"learning_rate": 4.5906856015909365e-06, |
|
"loss": 0.8799, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 2.131054131054131, |
|
"grad_norm": 0.9993338551104146, |
|
"learning_rate": 4.568001167339866e-06, |
|
"loss": 0.8789, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.1367521367521367, |
|
"grad_norm": 1.003313234824171, |
|
"learning_rate": 4.545325688455766e-06, |
|
"loss": 0.8285, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.1424501424501425, |
|
"grad_norm": 0.9365672809002463, |
|
"learning_rate": 4.52265963500295e-06, |
|
"loss": 0.8561, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 2.148148148148148, |
|
"grad_norm": 0.8712507036248811, |
|
"learning_rate": 4.500003476850341e-06, |
|
"loss": 0.8262, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 2.1538461538461537, |
|
"grad_norm": 0.9228004881023822, |
|
"learning_rate": 4.477357683661734e-06, |
|
"loss": 0.8766, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.1595441595441596, |
|
"grad_norm": 1.057083805253911, |
|
"learning_rate": 4.454722724886051e-06, |
|
"loss": 0.8653, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 2.1652421652421654, |
|
"grad_norm": 0.9682059205532203, |
|
"learning_rate": 4.432099069747625e-06, |
|
"loss": 0.8305, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.1709401709401708, |
|
"grad_norm": 0.7938300778290989, |
|
"learning_rate": 4.40948718723646e-06, |
|
"loss": 0.8526, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 2.1766381766381766, |
|
"grad_norm": 0.992854757801764, |
|
"learning_rate": 4.386887546098509e-06, |
|
"loss": 0.7915, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.1823361823361824, |
|
"grad_norm": 1.1405534353610247, |
|
"learning_rate": 4.364300614825963e-06, |
|
"loss": 0.8756, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 2.1880341880341883, |
|
"grad_norm": 0.9074206322121355, |
|
"learning_rate": 4.341726861647537e-06, |
|
"loss": 0.8786, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.1937321937321936, |
|
"grad_norm": 0.9106405803513904, |
|
"learning_rate": 4.319166754518768e-06, |
|
"loss": 0.8736, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.1994301994301995, |
|
"grad_norm": 0.9498694178857152, |
|
"learning_rate": 4.296620761112299e-06, |
|
"loss": 0.8382, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.2051282051282053, |
|
"grad_norm": 0.9662171207890898, |
|
"learning_rate": 4.274089348808202e-06, |
|
"loss": 0.846, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 2.2108262108262107, |
|
"grad_norm": 0.9597347828021979, |
|
"learning_rate": 4.251572984684281e-06, |
|
"loss": 0.8565, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.2165242165242165, |
|
"grad_norm": 1.0199048543960996, |
|
"learning_rate": 4.229072135506384e-06, |
|
"loss": 0.8634, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 0.8699841121610784, |
|
"learning_rate": 4.206587267718743e-06, |
|
"loss": 0.8704, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.2279202279202277, |
|
"grad_norm": 0.9870860597778771, |
|
"learning_rate": 4.18411884743429e-06, |
|
"loss": 0.9155, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 2.2336182336182335, |
|
"grad_norm": 0.9765675083733482, |
|
"learning_rate": 4.161667340424996e-06, |
|
"loss": 0.9111, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.2393162393162394, |
|
"grad_norm": 1.0450993205368777, |
|
"learning_rate": 4.139233212112221e-06, |
|
"loss": 0.8791, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 2.245014245014245, |
|
"grad_norm": 1.1146726034384589, |
|
"learning_rate": 4.116816927557063e-06, |
|
"loss": 0.8808, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 2.2507122507122506, |
|
"grad_norm": 0.9072001670881498, |
|
"learning_rate": 4.094418951450721e-06, |
|
"loss": 0.855, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.2564102564102564, |
|
"grad_norm": 0.928713607803712, |
|
"learning_rate": 4.072039748104856e-06, |
|
"loss": 0.8895, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.262108262108262, |
|
"grad_norm": 0.9633556898613354, |
|
"learning_rate": 4.0496797814419655e-06, |
|
"loss": 0.8809, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 2.267806267806268, |
|
"grad_norm": 0.8844497867372285, |
|
"learning_rate": 4.0273395149857705e-06, |
|
"loss": 0.841, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.2735042735042734, |
|
"grad_norm": 0.9239145256816056, |
|
"learning_rate": 4.0050194118516095e-06, |
|
"loss": 0.8251, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 2.2792022792022792, |
|
"grad_norm": 1.1068686883079584, |
|
"learning_rate": 3.982719934736832e-06, |
|
"loss": 0.8515, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.284900284900285, |
|
"grad_norm": 1.178223126387429, |
|
"learning_rate": 3.960441545911205e-06, |
|
"loss": 0.886, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 2.2905982905982905, |
|
"grad_norm": 0.8243442773624833, |
|
"learning_rate": 3.9381847072073346e-06, |
|
"loss": 0.8073, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.2962962962962963, |
|
"grad_norm": 0.8877251522703663, |
|
"learning_rate": 3.915949880011096e-06, |
|
"loss": 0.8376, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 2.301994301994302, |
|
"grad_norm": 1.1086289853786166, |
|
"learning_rate": 3.893737525252063e-06, |
|
"loss": 0.835, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.3076923076923075, |
|
"grad_norm": 0.9736495968403257, |
|
"learning_rate": 3.871548103393947e-06, |
|
"loss": 0.8366, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.3133903133903133, |
|
"grad_norm": 0.883727910369667, |
|
"learning_rate": 3.849382074425069e-06, |
|
"loss": 0.8788, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.319088319088319, |
|
"grad_norm": 0.9302042209091447, |
|
"learning_rate": 3.827239897848805e-06, |
|
"loss": 0.8105, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 2.324786324786325, |
|
"grad_norm": 0.9816375724049557, |
|
"learning_rate": 3.805122032674077e-06, |
|
"loss": 0.8801, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.3304843304843303, |
|
"grad_norm": 0.9068093342113286, |
|
"learning_rate": 3.7830289374058214e-06, |
|
"loss": 0.8926, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 2.336182336182336, |
|
"grad_norm": 0.970100166469761, |
|
"learning_rate": 3.7609610700355014e-06, |
|
"loss": 0.8172, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.341880341880342, |
|
"grad_norm": 0.8283355970207111, |
|
"learning_rate": 3.7389188880315962e-06, |
|
"loss": 0.8541, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 2.347578347578348, |
|
"grad_norm": 0.836387825954222, |
|
"learning_rate": 3.7169028483301333e-06, |
|
"loss": 0.8566, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 2.353276353276353, |
|
"grad_norm": 0.9704274187846976, |
|
"learning_rate": 3.6949134073251993e-06, |
|
"loss": 0.856, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 2.358974358974359, |
|
"grad_norm": 0.8667279540573334, |
|
"learning_rate": 3.6729510208594954e-06, |
|
"loss": 0.896, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 2.364672364672365, |
|
"grad_norm": 0.9194321407732738, |
|
"learning_rate": 3.6510161442148783e-06, |
|
"loss": 0.8993, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.3703703703703702, |
|
"grad_norm": 0.8956254209520699, |
|
"learning_rate": 3.6291092321029244e-06, |
|
"loss": 0.871, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 2.376068376068376, |
|
"grad_norm": 0.8944268521885398, |
|
"learning_rate": 3.6072307386554983e-06, |
|
"loss": 0.8958, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 2.381766381766382, |
|
"grad_norm": 0.8881931841978906, |
|
"learning_rate": 3.58538111741535e-06, |
|
"loss": 0.8718, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 2.3874643874643873, |
|
"grad_norm": 0.8513595068343849, |
|
"learning_rate": 3.5635608213267063e-06, |
|
"loss": 0.8484, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 2.393162393162393, |
|
"grad_norm": 0.9552616565495209, |
|
"learning_rate": 3.5417703027258752e-06, |
|
"loss": 0.8576, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.398860398860399, |
|
"grad_norm": 0.884306660742374, |
|
"learning_rate": 3.5200100133318836e-06, |
|
"loss": 0.8623, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 2.4045584045584047, |
|
"grad_norm": 0.8217549127604973, |
|
"learning_rate": 3.4982804042370977e-06, |
|
"loss": 0.8789, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 2.41025641025641, |
|
"grad_norm": 0.9177953454550434, |
|
"learning_rate": 3.476581925897885e-06, |
|
"loss": 0.8761, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 2.415954415954416, |
|
"grad_norm": 0.9191232531329524, |
|
"learning_rate": 3.4549150281252635e-06, |
|
"loss": 0.8381, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 2.421652421652422, |
|
"grad_norm": 0.8942193186940697, |
|
"learning_rate": 3.4332801600755895e-06, |
|
"loss": 0.9022, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.427350427350427, |
|
"grad_norm": 1.2155759035608542, |
|
"learning_rate": 3.4116777702412374e-06, |
|
"loss": 0.8673, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 2.433048433048433, |
|
"grad_norm": 0.928253119658496, |
|
"learning_rate": 3.39010830644131e-06, |
|
"loss": 0.8412, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 2.438746438746439, |
|
"grad_norm": 0.8976369958824371, |
|
"learning_rate": 3.3685722158123435e-06, |
|
"loss": 0.8572, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.4444444444444446, |
|
"grad_norm": 0.9342007055562026, |
|
"learning_rate": 3.3470699447990527e-06, |
|
"loss": 0.8389, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 2.45014245014245, |
|
"grad_norm": 0.9368785720862421, |
|
"learning_rate": 3.3256019391450696e-06, |
|
"loss": 0.8447, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.455840455840456, |
|
"grad_norm": 0.8602147398886509, |
|
"learning_rate": 3.3041686438836984e-06, |
|
"loss": 0.8314, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 2.4615384615384617, |
|
"grad_norm": 0.7971529130684335, |
|
"learning_rate": 3.2827705033286937e-06, |
|
"loss": 0.8075, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 2.467236467236467, |
|
"grad_norm": 0.9022354930189497, |
|
"learning_rate": 3.261407961065056e-06, |
|
"loss": 0.864, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 2.472934472934473, |
|
"grad_norm": 0.8412103377280404, |
|
"learning_rate": 3.2400814599398283e-06, |
|
"loss": 0.825, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 2.4786324786324787, |
|
"grad_norm": 0.963324698161768, |
|
"learning_rate": 3.2187914420529176e-06, |
|
"loss": 0.8245, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.484330484330484, |
|
"grad_norm": 0.8974616882015672, |
|
"learning_rate": 3.197538348747927e-06, |
|
"loss": 0.8574, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 2.49002849002849, |
|
"grad_norm": 0.8375456208735425, |
|
"learning_rate": 3.176322620603018e-06, |
|
"loss": 0.8567, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 2.4957264957264957, |
|
"grad_norm": 0.8637885686817552, |
|
"learning_rate": 3.1551446974217643e-06, |
|
"loss": 0.8348, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 2.5014245014245016, |
|
"grad_norm": 0.8964567431940926, |
|
"learning_rate": 3.1340050182240438e-06, |
|
"loss": 0.8614, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 2.5071225071225074, |
|
"grad_norm": 1.0153388506539311, |
|
"learning_rate": 3.1129040212369286e-06, |
|
"loss": 0.8288, |
|
"step": 440 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 700, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 88, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.6500642935537664e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|