|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9998000399920016, |
|
"eval_steps": 500, |
|
"global_step": 1250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007998400319936012, |
|
"grad_norm": 4.033497738180381, |
|
"learning_rate": 0.0, |
|
"loss": -0.0349, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0015996800639872025, |
|
"grad_norm": 3.51489589336218, |
|
"learning_rate": 3.8110282485354675e-07, |
|
"loss": -0.0542, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0023995200959808036, |
|
"grad_norm": 3.185205419075282, |
|
"learning_rate": 6.040336863117743e-07, |
|
"loss": 0.0277, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.003199360127974405, |
|
"grad_norm": 2.6795442503962392, |
|
"learning_rate": 7.622056497070935e-07, |
|
"loss": 0.0346, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.003999200159968006, |
|
"grad_norm": 4.306548503833335, |
|
"learning_rate": 8.84893356068388e-07, |
|
"loss": -0.0123, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.004799040191961607, |
|
"grad_norm": 3.232420399829724, |
|
"learning_rate": 9.85136511165321e-07, |
|
"loss": -0.0277, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.005598880223955209, |
|
"grad_norm": 4.556647322380419, |
|
"learning_rate": 1.0698908911626617e-06, |
|
"loss": -0.0824, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00639872025594881, |
|
"grad_norm": 4.75835426891596, |
|
"learning_rate": 1.1433084745606403e-06, |
|
"loss": -0.0487, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.007198560287942412, |
|
"grad_norm": 3.671579551985109, |
|
"learning_rate": 1.2080673726235485e-06, |
|
"loss": -0.101, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.007998400319936013, |
|
"grad_norm": 5.478829821928343, |
|
"learning_rate": 1.2659961809219347e-06, |
|
"loss": 0.0194, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.008798240351929614, |
|
"grad_norm": 4.697076805586441, |
|
"learning_rate": 1.318399162250352e-06, |
|
"loss": -0.0713, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.009598080383923215, |
|
"grad_norm": 4.668341585855885, |
|
"learning_rate": 1.366239336018868e-06, |
|
"loss": -0.0365, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.010397920415916816, |
|
"grad_norm": 4.0841275175924725, |
|
"learning_rate": 1.4102480297838326e-06, |
|
"loss": -0.0814, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.011197760447910418, |
|
"grad_norm": 3.5676724545793244, |
|
"learning_rate": 1.4509937160162082e-06, |
|
"loss": -0.0815, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01199760047990402, |
|
"grad_norm": 3.467762437060136, |
|
"learning_rate": 1.4889270423801623e-06, |
|
"loss": -0.0654, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01279744051189762, |
|
"grad_norm": 2.8225474286212457, |
|
"learning_rate": 1.524411299414187e-06, |
|
"loss": -0.062, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.013597280543891222, |
|
"grad_norm": 5.079798214426631, |
|
"learning_rate": 1.5577436352844088e-06, |
|
"loss": -0.1394, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.014397120575884824, |
|
"grad_norm": 4.339673002027499, |
|
"learning_rate": 1.5891701974770953e-06, |
|
"loss": -0.042, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.015196960607878424, |
|
"grad_norm": 4.245593535465187, |
|
"learning_rate": 1.6188971751464532e-06, |
|
"loss": 0.0082, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.015996800639872025, |
|
"grad_norm": 2.975134158916166, |
|
"learning_rate": 1.6470990057754815e-06, |
|
"loss": -0.0294, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.016796640671865627, |
|
"grad_norm": 3.331212004539707, |
|
"learning_rate": 1.673924577474436e-06, |
|
"loss": -0.0544, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01759648070385923, |
|
"grad_norm": 4.545480625771349, |
|
"learning_rate": 1.6995019871038986e-06, |
|
"loss": -0.0731, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01839632073585283, |
|
"grad_norm": 3.7461986348064595, |
|
"learning_rate": 1.7239422398533632e-06, |
|
"loss": -0.1278, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01919616076784643, |
|
"grad_norm": 2.8335266795291543, |
|
"learning_rate": 1.7473421608724147e-06, |
|
"loss": -0.083, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01999600079984003, |
|
"grad_norm": 3.68991966387542, |
|
"learning_rate": 1.769786712136776e-06, |
|
"loss": -0.0695, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.020795840831833633, |
|
"grad_norm": 4.133751803923456, |
|
"learning_rate": 1.7913508546373795e-06, |
|
"loss": -0.0445, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.021595680863827234, |
|
"grad_norm": 5.000412923928387, |
|
"learning_rate": 1.812101058935323e-06, |
|
"loss": 0.1318, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.022395520895820836, |
|
"grad_norm": 3.5992183048553557, |
|
"learning_rate": 1.832096540869755e-06, |
|
"loss": -0.1249, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.023195360927814438, |
|
"grad_norm": 3.427019545745099, |
|
"learning_rate": 1.8513902803279621e-06, |
|
"loss": -0.1743, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02399520095980804, |
|
"grad_norm": 4.725372315931298, |
|
"learning_rate": 1.8700298672337092e-06, |
|
"loss": 0.0091, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.024795040991801638, |
|
"grad_norm": 5.254587258373332, |
|
"learning_rate": 1.888058208767457e-06, |
|
"loss": -0.0653, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.02559488102379524, |
|
"grad_norm": 8.849971672055872, |
|
"learning_rate": 1.905514124267734e-06, |
|
"loss": -0.0049, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.026394721055788842, |
|
"grad_norm": 3.4635929572339874, |
|
"learning_rate": 1.922432848562126e-06, |
|
"loss": -0.089, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.027194561087782444, |
|
"grad_norm": 5.9169701870000715, |
|
"learning_rate": 1.9388464601379558e-06, |
|
"loss": -0.1119, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.027994401119776045, |
|
"grad_norm": 5.188979707658736, |
|
"learning_rate": 1.9547842472310495e-06, |
|
"loss": -0.1121, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.028794241151769647, |
|
"grad_norm": 4.896152090964042, |
|
"learning_rate": 1.970273022330642e-06, |
|
"loss": -0.0525, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02959408118376325, |
|
"grad_norm": 5.994962166737604, |
|
"learning_rate": 1.9853373935840096e-06, |
|
"loss": -0.1089, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.030393921215756847, |
|
"grad_norm": 5.576677749054259, |
|
"learning_rate": 2e-06, |
|
"loss": 0.0304, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03119376124775045, |
|
"grad_norm": 4.313822225686198, |
|
"learning_rate": 2e-06, |
|
"loss": -0.1328, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03199360127974405, |
|
"grad_norm": 3.81381102986674, |
|
"learning_rate": 1.998349834983498e-06, |
|
"loss": -0.0792, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03279344131173765, |
|
"grad_norm": 4.839236808694631, |
|
"learning_rate": 1.996699669966997e-06, |
|
"loss": -0.056, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.033593281343731254, |
|
"grad_norm": 3.77305458921913, |
|
"learning_rate": 1.995049504950495e-06, |
|
"loss": -0.0339, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03439312137572485, |
|
"grad_norm": 5.867440087459917, |
|
"learning_rate": 1.9933993399339932e-06, |
|
"loss": -0.0176, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03519296140771846, |
|
"grad_norm": 6.4685114572374465, |
|
"learning_rate": 1.991749174917492e-06, |
|
"loss": 0.0402, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.035992801439712056, |
|
"grad_norm": 11.48483869174446, |
|
"learning_rate": 1.99009900990099e-06, |
|
"loss": -0.0702, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03679264147170566, |
|
"grad_norm": 4.2414264593452735, |
|
"learning_rate": 1.9884488448844884e-06, |
|
"loss": -0.1241, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03759248150369926, |
|
"grad_norm": 4.949735715342123, |
|
"learning_rate": 1.9867986798679866e-06, |
|
"loss": -0.0996, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03839232153569286, |
|
"grad_norm": 5.06186964090094, |
|
"learning_rate": 1.9851485148514852e-06, |
|
"loss": -0.1133, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.039192161567686463, |
|
"grad_norm": 3.9056723205659183, |
|
"learning_rate": 1.9834983498349835e-06, |
|
"loss": -0.1631, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.03999200159968006, |
|
"grad_norm": 5.51223763254555, |
|
"learning_rate": 1.9818481848184817e-06, |
|
"loss": 0.0191, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04079184163167367, |
|
"grad_norm": 3.7107878002289, |
|
"learning_rate": 1.98019801980198e-06, |
|
"loss": -0.0271, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.041591681663667265, |
|
"grad_norm": 5.618046340756691, |
|
"learning_rate": 1.9785478547854786e-06, |
|
"loss": -0.0227, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.04239152169566087, |
|
"grad_norm": 3.7602961019841468, |
|
"learning_rate": 1.976897689768977e-06, |
|
"loss": -0.126, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.04319136172765447, |
|
"grad_norm": 4.322826902384424, |
|
"learning_rate": 1.975247524752475e-06, |
|
"loss": -0.1196, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.04399120175964807, |
|
"grad_norm": 3.6276654934086565, |
|
"learning_rate": 1.9735973597359733e-06, |
|
"loss": -0.1098, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04479104179164167, |
|
"grad_norm": 3.729759012982189, |
|
"learning_rate": 1.971947194719472e-06, |
|
"loss": -0.2248, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04559088182363527, |
|
"grad_norm": 5.552584092947439, |
|
"learning_rate": 1.97029702970297e-06, |
|
"loss": -0.0821, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.046390721855628876, |
|
"grad_norm": 6.016002296406734, |
|
"learning_rate": 1.9686468646864684e-06, |
|
"loss": -0.1302, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.047190561887622474, |
|
"grad_norm": 6.7453871612622995, |
|
"learning_rate": 1.966996699669967e-06, |
|
"loss": -0.0652, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.04799040191961608, |
|
"grad_norm": 4.874246979289447, |
|
"learning_rate": 1.9653465346534653e-06, |
|
"loss": -0.0409, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04879024195160968, |
|
"grad_norm": 3.894046979082966, |
|
"learning_rate": 1.9636963696369635e-06, |
|
"loss": 0.021, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.049590081983603276, |
|
"grad_norm": 3.829546481539617, |
|
"learning_rate": 1.962046204620462e-06, |
|
"loss": -0.246, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.05038992201559688, |
|
"grad_norm": 5.021080021581999, |
|
"learning_rate": 1.9603960396039604e-06, |
|
"loss": -0.0029, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.05118976204759048, |
|
"grad_norm": 4.084832649304883, |
|
"learning_rate": 1.9587458745874586e-06, |
|
"loss": -0.1911, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.051989602079584085, |
|
"grad_norm": 3.4567077830219595, |
|
"learning_rate": 1.9570957095709572e-06, |
|
"loss": 0.0388, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.052789442111577684, |
|
"grad_norm": 4.523151395245523, |
|
"learning_rate": 1.9554455445544555e-06, |
|
"loss": -0.0422, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.05358928214357129, |
|
"grad_norm": 4.574942149645985, |
|
"learning_rate": 1.9537953795379537e-06, |
|
"loss": 0.0052, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.05438912217556489, |
|
"grad_norm": 5.884212332415378, |
|
"learning_rate": 1.952145214521452e-06, |
|
"loss": -0.0622, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.055188962207558485, |
|
"grad_norm": 3.172106804857128, |
|
"learning_rate": 1.95049504950495e-06, |
|
"loss": -0.0936, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.05598880223955209, |
|
"grad_norm": 4.882587885458746, |
|
"learning_rate": 1.948844884488449e-06, |
|
"loss": -0.1034, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05678864227154569, |
|
"grad_norm": 3.290096020906111, |
|
"learning_rate": 1.947194719471947e-06, |
|
"loss": -0.126, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.057588482303539294, |
|
"grad_norm": 4.175847937084437, |
|
"learning_rate": 1.9455445544554453e-06, |
|
"loss": -0.0295, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.05838832233553289, |
|
"grad_norm": 4.774862772782205, |
|
"learning_rate": 1.943894389438944e-06, |
|
"loss": -0.0737, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.0591881623675265, |
|
"grad_norm": 4.866413673374395, |
|
"learning_rate": 1.942244224422442e-06, |
|
"loss": 0.0154, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.059988002399520096, |
|
"grad_norm": 3.244110640100742, |
|
"learning_rate": 1.9405940594059404e-06, |
|
"loss": -0.0876, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.060787842431513694, |
|
"grad_norm": 4.94642971249312, |
|
"learning_rate": 1.938943894389439e-06, |
|
"loss": -0.0634, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0615876824635073, |
|
"grad_norm": 3.1477348357592705, |
|
"learning_rate": 1.9372937293729373e-06, |
|
"loss": -0.0934, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0623875224955009, |
|
"grad_norm": 4.180278871715678, |
|
"learning_rate": 1.9356435643564355e-06, |
|
"loss": -0.0158, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0631873625274945, |
|
"grad_norm": 4.73751736841566, |
|
"learning_rate": 1.933993399339934e-06, |
|
"loss": 0.0419, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.0639872025594881, |
|
"grad_norm": 3.7318625198178577, |
|
"learning_rate": 1.9323432343234324e-06, |
|
"loss": -0.0981, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0647870425914817, |
|
"grad_norm": 3.5344903982736016, |
|
"learning_rate": 1.9306930693069306e-06, |
|
"loss": 0.028, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0655868826234753, |
|
"grad_norm": 3.6655427915390653, |
|
"learning_rate": 1.9290429042904292e-06, |
|
"loss": -0.0343, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.06638672265546891, |
|
"grad_norm": 3.8402537750787817, |
|
"learning_rate": 1.9273927392739275e-06, |
|
"loss": -0.0479, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.06718656268746251, |
|
"grad_norm": 6.194776167870759, |
|
"learning_rate": 1.9257425742574257e-06, |
|
"loss": 0.0046, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06798640271945611, |
|
"grad_norm": 5.366879383554931, |
|
"learning_rate": 1.924092409240924e-06, |
|
"loss": -0.1113, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0687862427514497, |
|
"grad_norm": 3.9997732575047547, |
|
"learning_rate": 1.922442244224422e-06, |
|
"loss": 0.0446, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06958608278344332, |
|
"grad_norm": 9.73377697425672, |
|
"learning_rate": 1.920792079207921e-06, |
|
"loss": -0.0569, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.07038592281543692, |
|
"grad_norm": 5.0689420802437875, |
|
"learning_rate": 1.919141914191419e-06, |
|
"loss": -0.0352, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.07118576284743051, |
|
"grad_norm": 8.98640262446026, |
|
"learning_rate": 1.9174917491749173e-06, |
|
"loss": 0.041, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.07198560287942411, |
|
"grad_norm": 5.63457538673664, |
|
"learning_rate": 1.9158415841584155e-06, |
|
"loss": -0.0193, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.07278544291141771, |
|
"grad_norm": 4.290130537843607, |
|
"learning_rate": 1.914191419141914e-06, |
|
"loss": -0.0742, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.07358528294341132, |
|
"grad_norm": 4.0945792486692465, |
|
"learning_rate": 1.9125412541254124e-06, |
|
"loss": 0.029, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.07438512297540492, |
|
"grad_norm": 4.96670528541929, |
|
"learning_rate": 1.9108910891089106e-06, |
|
"loss": -0.1134, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.07518496300739852, |
|
"grad_norm": 5.027466862141088, |
|
"learning_rate": 1.9092409240924093e-06, |
|
"loss": -0.0508, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.07598480303939212, |
|
"grad_norm": 9.573722686429775, |
|
"learning_rate": 1.9075907590759075e-06, |
|
"loss": -0.1592, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07678464307138572, |
|
"grad_norm": 5.764961349212166, |
|
"learning_rate": 1.9059405940594057e-06, |
|
"loss": -0.0122, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07758448310337933, |
|
"grad_norm": 3.3502664260820247, |
|
"learning_rate": 1.9042904290429044e-06, |
|
"loss": -0.2179, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.07838432313537293, |
|
"grad_norm": 3.967491851586746, |
|
"learning_rate": 1.9026402640264026e-06, |
|
"loss": -0.0234, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.07918416316736653, |
|
"grad_norm": 4.552540817957586, |
|
"learning_rate": 1.9009900990099008e-06, |
|
"loss": -0.1591, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.07998400319936012, |
|
"grad_norm": 10.823710544953496, |
|
"learning_rate": 1.8993399339933993e-06, |
|
"loss": 0.0374, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08078384323135374, |
|
"grad_norm": 4.806468007236691, |
|
"learning_rate": 1.8976897689768975e-06, |
|
"loss": -0.0565, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.08158368326334733, |
|
"grad_norm": 3.634545747480329, |
|
"learning_rate": 1.896039603960396e-06, |
|
"loss": -0.0762, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.08238352329534093, |
|
"grad_norm": 3.2837047295849597, |
|
"learning_rate": 1.8943894389438944e-06, |
|
"loss": -0.0491, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.08318336332733453, |
|
"grad_norm": 5.176653817957751, |
|
"learning_rate": 1.8927392739273926e-06, |
|
"loss": -0.1934, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.08398320335932813, |
|
"grad_norm": 6.107024303996945, |
|
"learning_rate": 1.8910891089108908e-06, |
|
"loss": -0.1129, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.08478304339132174, |
|
"grad_norm": 4.489176343037952, |
|
"learning_rate": 1.8894389438943895e-06, |
|
"loss": 0.0883, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.08558288342331534, |
|
"grad_norm": 4.318302618280909, |
|
"learning_rate": 1.8877887788778877e-06, |
|
"loss": -0.1609, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.08638272345530894, |
|
"grad_norm": 4.634209202008312, |
|
"learning_rate": 1.886138613861386e-06, |
|
"loss": -0.0457, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.08718256348730254, |
|
"grad_norm": 3.630881832190838, |
|
"learning_rate": 1.8844884488448844e-06, |
|
"loss": -0.1382, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.08798240351929613, |
|
"grad_norm": 3.886065281514502, |
|
"learning_rate": 1.8828382838283828e-06, |
|
"loss": -0.0535, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08878224355128975, |
|
"grad_norm": 3.647392695144741, |
|
"learning_rate": 1.881188118811881e-06, |
|
"loss": -0.0809, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.08958208358328335, |
|
"grad_norm": 3.9753438884802463, |
|
"learning_rate": 1.8795379537953795e-06, |
|
"loss": -0.0791, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.09038192361527694, |
|
"grad_norm": 4.473252382488765, |
|
"learning_rate": 1.8778877887788777e-06, |
|
"loss": -0.0723, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.09118176364727054, |
|
"grad_norm": 4.928253206993449, |
|
"learning_rate": 1.876237623762376e-06, |
|
"loss": 0.0125, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.09198160367926415, |
|
"grad_norm": 4.557945800338486, |
|
"learning_rate": 1.8745874587458746e-06, |
|
"loss": -0.0889, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.09278144371125775, |
|
"grad_norm": 5.830924417742841, |
|
"learning_rate": 1.8729372937293728e-06, |
|
"loss": 0.0504, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.09358128374325135, |
|
"grad_norm": 6.4722171650631655, |
|
"learning_rate": 1.8712871287128713e-06, |
|
"loss": -0.008, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.09438112377524495, |
|
"grad_norm": 3.1676413558574428, |
|
"learning_rate": 1.8696369636963695e-06, |
|
"loss": -0.0483, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.09518096380723855, |
|
"grad_norm": 5.1310816710504845, |
|
"learning_rate": 1.867986798679868e-06, |
|
"loss": -0.0513, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.09598080383923216, |
|
"grad_norm": 3.620114816562482, |
|
"learning_rate": 1.8663366336633664e-06, |
|
"loss": -0.1453, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.09678064387122576, |
|
"grad_norm": 6.0676794834569865, |
|
"learning_rate": 1.8646864686468646e-06, |
|
"loss": -0.0194, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.09758048390321936, |
|
"grad_norm": 6.414733331241253, |
|
"learning_rate": 1.8630363036303628e-06, |
|
"loss": -0.0488, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.09838032393521295, |
|
"grad_norm": 4.6846628376767905, |
|
"learning_rate": 1.8613861386138615e-06, |
|
"loss": -0.0195, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.09918016396720655, |
|
"grad_norm": 3.235246476419315, |
|
"learning_rate": 1.8597359735973597e-06, |
|
"loss": -0.0942, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.09998000399920016, |
|
"grad_norm": 5.3470459527801495, |
|
"learning_rate": 1.858085808580858e-06, |
|
"loss": 0.0276, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.10077984403119376, |
|
"grad_norm": 3.9287996597379995, |
|
"learning_rate": 1.8564356435643564e-06, |
|
"loss": 0.0306, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.10157968406318736, |
|
"grad_norm": 4.995425229535215, |
|
"learning_rate": 1.8547854785478546e-06, |
|
"loss": 0.0087, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.10237952409518096, |
|
"grad_norm": 4.573732944820577, |
|
"learning_rate": 1.853135313531353e-06, |
|
"loss": -0.1424, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.10317936412717456, |
|
"grad_norm": 4.55020470630308, |
|
"learning_rate": 1.8514851485148515e-06, |
|
"loss": -0.0559, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.10397920415916817, |
|
"grad_norm": 4.41698840906731, |
|
"learning_rate": 1.8498349834983497e-06, |
|
"loss": -0.0188, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.10477904419116177, |
|
"grad_norm": 5.223224115420677, |
|
"learning_rate": 1.848184818481848e-06, |
|
"loss": -0.1098, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.10557888422315537, |
|
"grad_norm": 3.8011698979898005, |
|
"learning_rate": 1.8465346534653466e-06, |
|
"loss": -0.0328, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.10637872425514897, |
|
"grad_norm": 3.1746565925932835, |
|
"learning_rate": 1.8448844884488448e-06, |
|
"loss": -0.0525, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.10717856428714258, |
|
"grad_norm": 3.9995360105342903, |
|
"learning_rate": 1.843234323432343e-06, |
|
"loss": -0.0665, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.10797840431913618, |
|
"grad_norm": 4.722040708955319, |
|
"learning_rate": 1.8415841584158415e-06, |
|
"loss": -0.1398, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.10877824435112977, |
|
"grad_norm": 4.002530013096379, |
|
"learning_rate": 1.83993399339934e-06, |
|
"loss": -0.0609, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.10957808438312337, |
|
"grad_norm": 4.23131853552439, |
|
"learning_rate": 1.8382838283828382e-06, |
|
"loss": -0.0109, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.11037792441511697, |
|
"grad_norm": 5.324154803758963, |
|
"learning_rate": 1.8366336633663366e-06, |
|
"loss": -0.0251, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.11117776444711058, |
|
"grad_norm": 4.297402311394241, |
|
"learning_rate": 1.8349834983498348e-06, |
|
"loss": -0.0589, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.11197760447910418, |
|
"grad_norm": 4.454960816079389, |
|
"learning_rate": 1.833333333333333e-06, |
|
"loss": -0.1049, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.11277744451109778, |
|
"grad_norm": 4.709849875744532, |
|
"learning_rate": 1.8316831683168317e-06, |
|
"loss": -0.0279, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.11357728454309138, |
|
"grad_norm": 3.9184959414442724, |
|
"learning_rate": 1.83003300330033e-06, |
|
"loss": -0.1333, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.11437712457508498, |
|
"grad_norm": 4.362164005140024, |
|
"learning_rate": 1.8283828382838282e-06, |
|
"loss": -0.0821, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.11517696460707859, |
|
"grad_norm": 3.814336740776002, |
|
"learning_rate": 1.8267326732673266e-06, |
|
"loss": -0.0764, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.11597680463907219, |
|
"grad_norm": 4.1087265373281925, |
|
"learning_rate": 1.825082508250825e-06, |
|
"loss": 0.0595, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.11677664467106579, |
|
"grad_norm": 5.05463448309474, |
|
"learning_rate": 1.8234323432343233e-06, |
|
"loss": -0.0749, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.11757648470305938, |
|
"grad_norm": 7.009438010420224, |
|
"learning_rate": 1.8217821782178217e-06, |
|
"loss": -0.1623, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.118376324735053, |
|
"grad_norm": 5.86862518535322, |
|
"learning_rate": 1.82013201320132e-06, |
|
"loss": -0.1914, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.1191761647670466, |
|
"grad_norm": 8.568812361586986, |
|
"learning_rate": 1.8184818481848184e-06, |
|
"loss": -0.0496, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.11997600479904019, |
|
"grad_norm": 9.02774053582229, |
|
"learning_rate": 1.8168316831683168e-06, |
|
"loss": -0.022, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.12077584483103379, |
|
"grad_norm": 5.51491933312306, |
|
"learning_rate": 1.815181518151815e-06, |
|
"loss": -0.1322, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.12157568486302739, |
|
"grad_norm": 5.304215018308479, |
|
"learning_rate": 1.8135313531353133e-06, |
|
"loss": -0.0676, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.122375524895021, |
|
"grad_norm": 3.9922542678415565, |
|
"learning_rate": 1.811881188118812e-06, |
|
"loss": 0.0184, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.1231753649270146, |
|
"grad_norm": 4.724197779779715, |
|
"learning_rate": 1.8102310231023102e-06, |
|
"loss": -0.1204, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.1239752049590082, |
|
"grad_norm": 5.76455405608935, |
|
"learning_rate": 1.8085808580858084e-06, |
|
"loss": -0.1421, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1247750449910018, |
|
"grad_norm": 10.161753692062435, |
|
"learning_rate": 1.8069306930693068e-06, |
|
"loss": 0.0592, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.1255748850229954, |
|
"grad_norm": 3.667923249601308, |
|
"learning_rate": 1.805280528052805e-06, |
|
"loss": -0.0988, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.126374725054989, |
|
"grad_norm": 4.515737987543515, |
|
"learning_rate": 1.8036303630363035e-06, |
|
"loss": 0.0522, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.1271745650869826, |
|
"grad_norm": 3.336996513422035, |
|
"learning_rate": 1.801980198019802e-06, |
|
"loss": -0.09, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.1279744051189762, |
|
"grad_norm": 3.969953099317271, |
|
"learning_rate": 1.8003300330033002e-06, |
|
"loss": -0.0435, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.1287742451509698, |
|
"grad_norm": 4.549949209747214, |
|
"learning_rate": 1.7986798679867984e-06, |
|
"loss": -0.0613, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.1295740851829634, |
|
"grad_norm": 3.759639050223288, |
|
"learning_rate": 1.797029702970297e-06, |
|
"loss": -0.0784, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.130373925214957, |
|
"grad_norm": 4.619365249559499, |
|
"learning_rate": 1.7953795379537953e-06, |
|
"loss": -0.0111, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.1311737652469506, |
|
"grad_norm": 4.114791027895229, |
|
"learning_rate": 1.7937293729372935e-06, |
|
"loss": -0.0327, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.13197360527894422, |
|
"grad_norm": 3.8956026767168836, |
|
"learning_rate": 1.792079207920792e-06, |
|
"loss": -0.1106, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.13277344531093782, |
|
"grad_norm": 4.818435179721396, |
|
"learning_rate": 1.7904290429042904e-06, |
|
"loss": -0.0034, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.13357328534293142, |
|
"grad_norm": 6.763152130893218, |
|
"learning_rate": 1.7887788778877888e-06, |
|
"loss": 0.0651, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.13437312537492502, |
|
"grad_norm": 4.097132792098502, |
|
"learning_rate": 1.787128712871287e-06, |
|
"loss": -0.0269, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.13517296540691862, |
|
"grad_norm": 4.706830462846675, |
|
"learning_rate": 1.7854785478547853e-06, |
|
"loss": 0.0558, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.13597280543891221, |
|
"grad_norm": 4.254134691338051, |
|
"learning_rate": 1.783828382838284e-06, |
|
"loss": -0.0046, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1367726454709058, |
|
"grad_norm": 5.457939580250951, |
|
"learning_rate": 1.7821782178217822e-06, |
|
"loss": -0.0379, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.1375724855028994, |
|
"grad_norm": 3.2577166280201544, |
|
"learning_rate": 1.7805280528052804e-06, |
|
"loss": -0.0993, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.138372325534893, |
|
"grad_norm": 5.551040160162887, |
|
"learning_rate": 1.7788778877887789e-06, |
|
"loss": -0.0543, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.13917216556688664, |
|
"grad_norm": 3.69149537962834, |
|
"learning_rate": 1.777227722772277e-06, |
|
"loss": -0.0443, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.13997200559888023, |
|
"grad_norm": 4.4643620642536455, |
|
"learning_rate": 1.7755775577557755e-06, |
|
"loss": -0.0449, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.14077184563087383, |
|
"grad_norm": 3.5240643064279977, |
|
"learning_rate": 1.773927392739274e-06, |
|
"loss": -0.0928, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.14157168566286743, |
|
"grad_norm": 5.981016645991625, |
|
"learning_rate": 1.7722772277227722e-06, |
|
"loss": 0.0686, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.14237152569486103, |
|
"grad_norm": 4.336791468199441, |
|
"learning_rate": 1.7706270627062704e-06, |
|
"loss": -0.0617, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.14317136572685463, |
|
"grad_norm": 3.678032699373225, |
|
"learning_rate": 1.768976897689769e-06, |
|
"loss": -0.1058, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.14397120575884823, |
|
"grad_norm": 8.431078918847803, |
|
"learning_rate": 1.7673267326732673e-06, |
|
"loss": -0.034, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.14477104579084182, |
|
"grad_norm": 4.90238148952107, |
|
"learning_rate": 1.7656765676567655e-06, |
|
"loss": 0.0263, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.14557088582283542, |
|
"grad_norm": 4.1587161441545115, |
|
"learning_rate": 1.764026402640264e-06, |
|
"loss": -0.0128, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.14637072585482905, |
|
"grad_norm": 4.255313468888732, |
|
"learning_rate": 1.7623762376237624e-06, |
|
"loss": -0.0138, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.14717056588682265, |
|
"grad_norm": 6.24454443290786, |
|
"learning_rate": 1.7607260726072606e-06, |
|
"loss": -0.0941, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.14797040591881624, |
|
"grad_norm": 4.293655354485335, |
|
"learning_rate": 1.759075907590759e-06, |
|
"loss": -0.0621, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.14877024595080984, |
|
"grad_norm": 4.224321769134034, |
|
"learning_rate": 1.7574257425742573e-06, |
|
"loss": -0.0214, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.14957008598280344, |
|
"grad_norm": 3.7629471117165827, |
|
"learning_rate": 1.7557755775577555e-06, |
|
"loss": -0.0735, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.15036992601479704, |
|
"grad_norm": 4.511985288731285, |
|
"learning_rate": 1.7541254125412542e-06, |
|
"loss": -0.1376, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.15116976604679064, |
|
"grad_norm": 4.701449783409153, |
|
"learning_rate": 1.7524752475247524e-06, |
|
"loss": -0.1208, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.15196960607878424, |
|
"grad_norm": 7.169693891516351, |
|
"learning_rate": 1.7508250825082506e-06, |
|
"loss": 0.0103, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.15276944611077783, |
|
"grad_norm": 3.6302391864591126, |
|
"learning_rate": 1.749174917491749e-06, |
|
"loss": -0.036, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.15356928614277143, |
|
"grad_norm": 8.15707311459662, |
|
"learning_rate": 1.7475247524752475e-06, |
|
"loss": -0.0226, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.15436912617476506, |
|
"grad_norm": 4.001526302961896, |
|
"learning_rate": 1.7458745874587458e-06, |
|
"loss": -0.0587, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.15516896620675866, |
|
"grad_norm": 4.468601251007179, |
|
"learning_rate": 1.7442244224422442e-06, |
|
"loss": -0.1388, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.15596880623875226, |
|
"grad_norm": 4.107118632559092, |
|
"learning_rate": 1.7425742574257424e-06, |
|
"loss": -0.0961, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.15676864627074585, |
|
"grad_norm": 3.4961373949789665, |
|
"learning_rate": 1.7409240924092409e-06, |
|
"loss": -0.0567, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.15756848630273945, |
|
"grad_norm": 4.144654814148264, |
|
"learning_rate": 1.7392739273927393e-06, |
|
"loss": -0.1309, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.15836832633473305, |
|
"grad_norm": 3.6625054473315664, |
|
"learning_rate": 1.7376237623762375e-06, |
|
"loss": -0.0208, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.15916816636672665, |
|
"grad_norm": 4.664494531197071, |
|
"learning_rate": 1.7359735973597358e-06, |
|
"loss": 0.0178, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.15996800639872025, |
|
"grad_norm": 6.383022272218445, |
|
"learning_rate": 1.7343234323432342e-06, |
|
"loss": -0.0616, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.16076784643071385, |
|
"grad_norm": 5.505206158317875, |
|
"learning_rate": 1.7326732673267326e-06, |
|
"loss": -0.0452, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.16156768646270747, |
|
"grad_norm": 3.5601606217056765, |
|
"learning_rate": 1.7310231023102309e-06, |
|
"loss": 0.0225, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.16236752649470107, |
|
"grad_norm": 4.408138222273653, |
|
"learning_rate": 1.7293729372937293e-06, |
|
"loss": -0.1139, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.16316736652669467, |
|
"grad_norm": 3.2562884601218087, |
|
"learning_rate": 1.7277227722772275e-06, |
|
"loss": 0.0087, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.16396720655868827, |
|
"grad_norm": 4.350781355214131, |
|
"learning_rate": 1.726072607260726e-06, |
|
"loss": -0.0885, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.16476704659068186, |
|
"grad_norm": 3.3568949216522475, |
|
"learning_rate": 1.7244224422442244e-06, |
|
"loss": -0.0134, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.16556688662267546, |
|
"grad_norm": 6.798474914966856, |
|
"learning_rate": 1.7227722772277227e-06, |
|
"loss": -0.0945, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.16636672665466906, |
|
"grad_norm": 4.577665859282248, |
|
"learning_rate": 1.7211221122112209e-06, |
|
"loss": -0.1607, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.16716656668666266, |
|
"grad_norm": 6.460632243204499, |
|
"learning_rate": 1.7194719471947195e-06, |
|
"loss": -0.0235, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.16796640671865626, |
|
"grad_norm": 4.306267256349224, |
|
"learning_rate": 1.7178217821782178e-06, |
|
"loss": -0.0059, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.16876624675064986, |
|
"grad_norm": 3.0483507543879105, |
|
"learning_rate": 1.716171617161716e-06, |
|
"loss": -0.111, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.16956608678264348, |
|
"grad_norm": 5.737336519193611, |
|
"learning_rate": 1.7145214521452144e-06, |
|
"loss": 0.018, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.17036592681463708, |
|
"grad_norm": 3.7845990191052734, |
|
"learning_rate": 1.7128712871287127e-06, |
|
"loss": -0.0926, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.17116576684663068, |
|
"grad_norm": 3.669531800966666, |
|
"learning_rate": 1.711221122112211e-06, |
|
"loss": -0.0776, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.17196560687862428, |
|
"grad_norm": 4.005323920134325, |
|
"learning_rate": 1.7095709570957095e-06, |
|
"loss": -0.0399, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.17276544691061788, |
|
"grad_norm": 2.8598435648570186, |
|
"learning_rate": 1.7079207920792078e-06, |
|
"loss": -0.0548, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.17356528694261147, |
|
"grad_norm": 4.139220262158334, |
|
"learning_rate": 1.7062706270627062e-06, |
|
"loss": 0.0686, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.17436512697460507, |
|
"grad_norm": 4.988425208682803, |
|
"learning_rate": 1.7046204620462046e-06, |
|
"loss": -0.0028, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.17516496700659867, |
|
"grad_norm": 3.4806124639328164, |
|
"learning_rate": 1.7029702970297029e-06, |
|
"loss": -0.0914, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.17596480703859227, |
|
"grad_norm": 6.013581164060899, |
|
"learning_rate": 1.7013201320132013e-06, |
|
"loss": -0.0207, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1767646470705859, |
|
"grad_norm": 6.048232130793178, |
|
"learning_rate": 1.6996699669966995e-06, |
|
"loss": 0.0301, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.1775644871025795, |
|
"grad_norm": 4.206288334982141, |
|
"learning_rate": 1.698019801980198e-06, |
|
"loss": -0.0503, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.1783643271345731, |
|
"grad_norm": 4.383148234898824, |
|
"learning_rate": 1.6963696369636964e-06, |
|
"loss": 0.0425, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.1791641671665667, |
|
"grad_norm": 4.013900208301416, |
|
"learning_rate": 1.6947194719471947e-06, |
|
"loss": -0.0873, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.1799640071985603, |
|
"grad_norm": 3.729807083009099, |
|
"learning_rate": 1.6930693069306929e-06, |
|
"loss": -0.0124, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1807638472305539, |
|
"grad_norm": 4.739805223350201, |
|
"learning_rate": 1.6914191419141915e-06, |
|
"loss": -0.0965, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.18156368726254749, |
|
"grad_norm": 3.684225018193131, |
|
"learning_rate": 1.6897689768976898e-06, |
|
"loss": -0.0899, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.18236352729454108, |
|
"grad_norm": 4.647773349022286, |
|
"learning_rate": 1.688118811881188e-06, |
|
"loss": -0.1433, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.18316336732653468, |
|
"grad_norm": 4.314549940205055, |
|
"learning_rate": 1.6864686468646864e-06, |
|
"loss": -0.0987, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.1839632073585283, |
|
"grad_norm": 6.602144366923463, |
|
"learning_rate": 1.6848184818481847e-06, |
|
"loss": -0.0855, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.1847630473905219, |
|
"grad_norm": 4.611073533381248, |
|
"learning_rate": 1.683168316831683e-06, |
|
"loss": -0.1262, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.1855628874225155, |
|
"grad_norm": 4.9020247032635655, |
|
"learning_rate": 1.6815181518151815e-06, |
|
"loss": -0.1706, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.1863627274545091, |
|
"grad_norm": 4.16092482080365, |
|
"learning_rate": 1.6798679867986798e-06, |
|
"loss": -0.009, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.1871625674865027, |
|
"grad_norm": 3.5906088992190277, |
|
"learning_rate": 1.678217821782178e-06, |
|
"loss": 0.0999, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.1879624075184963, |
|
"grad_norm": 4.005270108795308, |
|
"learning_rate": 1.6765676567656767e-06, |
|
"loss": -0.0993, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.1887622475504899, |
|
"grad_norm": 6.563769408476828, |
|
"learning_rate": 1.6749174917491749e-06, |
|
"loss": -0.0193, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.1895620875824835, |
|
"grad_norm": 3.380070162840573, |
|
"learning_rate": 1.6732673267326731e-06, |
|
"loss": -0.0809, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.1903619276144771, |
|
"grad_norm": 4.931354996369631, |
|
"learning_rate": 1.6716171617161716e-06, |
|
"loss": -0.0658, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.1911617676464707, |
|
"grad_norm": 4.710207450817461, |
|
"learning_rate": 1.66996699669967e-06, |
|
"loss": -0.1167, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.19196160767846432, |
|
"grad_norm": 3.361685025176525, |
|
"learning_rate": 1.6683168316831682e-06, |
|
"loss": -0.1245, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.19276144771045792, |
|
"grad_norm": 3.767676589968502, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": -0.1638, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.19356128774245152, |
|
"grad_norm": 3.7460434704410575, |
|
"learning_rate": 1.6650165016501649e-06, |
|
"loss": -0.1207, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.1943611277744451, |
|
"grad_norm": 3.7655100191535413, |
|
"learning_rate": 1.6633663366336631e-06, |
|
"loss": 0.0038, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.1951609678064387, |
|
"grad_norm": 4.3387270640143685, |
|
"learning_rate": 1.6617161716171618e-06, |
|
"loss": -0.0234, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.1959608078384323, |
|
"grad_norm": 4.729420704117281, |
|
"learning_rate": 1.66006600660066e-06, |
|
"loss": -0.1446, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.1967606478704259, |
|
"grad_norm": 11.46352939122658, |
|
"learning_rate": 1.6584158415841582e-06, |
|
"loss": -0.0447, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.1975604879024195, |
|
"grad_norm": 4.6392172787916355, |
|
"learning_rate": 1.6567656765676567e-06, |
|
"loss": -0.1279, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.1983603279344131, |
|
"grad_norm": 7.81945174107532, |
|
"learning_rate": 1.6551155115511551e-06, |
|
"loss": -0.1788, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.19916016796640673, |
|
"grad_norm": 4.257894476705108, |
|
"learning_rate": 1.6534653465346533e-06, |
|
"loss": -0.0386, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.19996000799840033, |
|
"grad_norm": 3.9255930993081094, |
|
"learning_rate": 1.6518151815181518e-06, |
|
"loss": -0.0204, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.20075984803039393, |
|
"grad_norm": 8.61324493331346, |
|
"learning_rate": 1.65016501650165e-06, |
|
"loss": 0.0872, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.20155968806238753, |
|
"grad_norm": 3.7965562474708525, |
|
"learning_rate": 1.6485148514851484e-06, |
|
"loss": -0.0834, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.20235952809438112, |
|
"grad_norm": 4.327305685228189, |
|
"learning_rate": 1.6468646864686469e-06, |
|
"loss": -0.0639, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.20315936812637472, |
|
"grad_norm": 3.461407011747761, |
|
"learning_rate": 1.6452145214521451e-06, |
|
"loss": -0.1243, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.20395920815836832, |
|
"grad_norm": 5.164636623307167, |
|
"learning_rate": 1.6435643564356433e-06, |
|
"loss": -0.0318, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.20475904819036192, |
|
"grad_norm": 4.411537190722961, |
|
"learning_rate": 1.641914191419142e-06, |
|
"loss": -0.1533, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.20555888822235552, |
|
"grad_norm": 4.832045065537041, |
|
"learning_rate": 1.6402640264026402e-06, |
|
"loss": -0.0931, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.20635872825434912, |
|
"grad_norm": 4.133203614158014, |
|
"learning_rate": 1.6386138613861385e-06, |
|
"loss": -0.0951, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.20715856828634274, |
|
"grad_norm": 4.649558155027992, |
|
"learning_rate": 1.636963696369637e-06, |
|
"loss": -0.0459, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.20795840831833634, |
|
"grad_norm": 3.7050574045200126, |
|
"learning_rate": 1.6353135313531351e-06, |
|
"loss": -0.1324, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.20875824835032994, |
|
"grad_norm": 4.406446520163225, |
|
"learning_rate": 1.6336633663366336e-06, |
|
"loss": -0.0903, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.20955808838232354, |
|
"grad_norm": 4.150658998676116, |
|
"learning_rate": 1.632013201320132e-06, |
|
"loss": 0.052, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.21035792841431714, |
|
"grad_norm": 4.637643800546993, |
|
"learning_rate": 1.6303630363036302e-06, |
|
"loss": -0.1008, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.21115776844631073, |
|
"grad_norm": 4.356392007316505, |
|
"learning_rate": 1.6287128712871285e-06, |
|
"loss": -0.0666, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.21195760847830433, |
|
"grad_norm": 4.2232050225914675, |
|
"learning_rate": 1.6270627062706271e-06, |
|
"loss": -0.034, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.21275744851029793, |
|
"grad_norm": 4.621467065766651, |
|
"learning_rate": 1.6254125412541253e-06, |
|
"loss": -0.1166, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.21355728854229153, |
|
"grad_norm": 3.6996328893459385, |
|
"learning_rate": 1.6237623762376238e-06, |
|
"loss": -0.0651, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.21435712857428516, |
|
"grad_norm": 6.476232651431598, |
|
"learning_rate": 1.622112211221122e-06, |
|
"loss": -0.087, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.21515696860627875, |
|
"grad_norm": 3.3774511125642115, |
|
"learning_rate": 1.6204620462046205e-06, |
|
"loss": -0.0541, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.21595680863827235, |
|
"grad_norm": 8.039893341875281, |
|
"learning_rate": 1.6188118811881189e-06, |
|
"loss": 0.0426, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.21675664867026595, |
|
"grad_norm": 3.45288250792369, |
|
"learning_rate": 1.6171617161716171e-06, |
|
"loss": -0.071, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.21755648870225955, |
|
"grad_norm": 5.813145099240533, |
|
"learning_rate": 1.6155115511551154e-06, |
|
"loss": 0.0774, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.21835632873425315, |
|
"grad_norm": 3.4988010260216202, |
|
"learning_rate": 1.6138613861386138e-06, |
|
"loss": -0.0378, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.21915616876624675, |
|
"grad_norm": 4.136529473287242, |
|
"learning_rate": 1.6122112211221122e-06, |
|
"loss": 0.0215, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.21995600879824034, |
|
"grad_norm": 3.9538185204867884, |
|
"learning_rate": 1.6105610561056105e-06, |
|
"loss": -0.1014, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.22075584883023394, |
|
"grad_norm": 4.987429074808495, |
|
"learning_rate": 1.608910891089109e-06, |
|
"loss": -0.0326, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.22155568886222757, |
|
"grad_norm": 3.1011116987800595, |
|
"learning_rate": 1.6072607260726071e-06, |
|
"loss": -0.0742, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.22235552889422117, |
|
"grad_norm": 4.87646247250274, |
|
"learning_rate": 1.6056105610561056e-06, |
|
"loss": -0.0488, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.22315536892621476, |
|
"grad_norm": 5.406703510997709, |
|
"learning_rate": 1.603960396039604e-06, |
|
"loss": -0.0998, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.22395520895820836, |
|
"grad_norm": 4.199602090060885, |
|
"learning_rate": 1.6023102310231022e-06, |
|
"loss": 0.0787, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.22475504899020196, |
|
"grad_norm": 6.262166502823287, |
|
"learning_rate": 1.6006600660066005e-06, |
|
"loss": 0.0123, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.22555488902219556, |
|
"grad_norm": 5.4085402736640225, |
|
"learning_rate": 1.5990099009900991e-06, |
|
"loss": -0.1219, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.22635472905418916, |
|
"grad_norm": 14.35407252989058, |
|
"learning_rate": 1.5973597359735973e-06, |
|
"loss": -0.0427, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.22715456908618276, |
|
"grad_norm": 5.560237467243524, |
|
"learning_rate": 1.5957095709570956e-06, |
|
"loss": -0.0363, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.22795440911817635, |
|
"grad_norm": 5.376214533362693, |
|
"learning_rate": 1.594059405940594e-06, |
|
"loss": -0.1198, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.22875424915016995, |
|
"grad_norm": 7.872347430401011, |
|
"learning_rate": 1.5924092409240922e-06, |
|
"loss": 0.02, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.22955408918216358, |
|
"grad_norm": 4.079731942515135, |
|
"learning_rate": 1.5907590759075907e-06, |
|
"loss": -0.1465, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.23035392921415718, |
|
"grad_norm": 4.054081807256331, |
|
"learning_rate": 1.5891089108910891e-06, |
|
"loss": -0.097, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.23115376924615078, |
|
"grad_norm": 5.668828140611865, |
|
"learning_rate": 1.5874587458745874e-06, |
|
"loss": -0.0113, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.23195360927814437, |
|
"grad_norm": 4.222209049226612, |
|
"learning_rate": 1.5858085808580856e-06, |
|
"loss": -0.0565, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.23275344931013797, |
|
"grad_norm": 3.7308714963795735, |
|
"learning_rate": 1.5841584158415842e-06, |
|
"loss": -0.0311, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.23355328934213157, |
|
"grad_norm": 3.9384379405107914, |
|
"learning_rate": 1.5825082508250825e-06, |
|
"loss": -0.0921, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.23435312937412517, |
|
"grad_norm": 4.208635426370359, |
|
"learning_rate": 1.5808580858085807e-06, |
|
"loss": -0.1037, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.23515296940611877, |
|
"grad_norm": 4.471661666164002, |
|
"learning_rate": 1.5792079207920791e-06, |
|
"loss": -0.048, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.23595280943811237, |
|
"grad_norm": 5.922123322526879, |
|
"learning_rate": 1.5775577557755776e-06, |
|
"loss": -0.0822, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.236752649470106, |
|
"grad_norm": 3.9336004171911596, |
|
"learning_rate": 1.5759075907590758e-06, |
|
"loss": -0.0751, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.2375524895020996, |
|
"grad_norm": 2.9881202405051086, |
|
"learning_rate": 1.5742574257425742e-06, |
|
"loss": -0.0694, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.2383523295340932, |
|
"grad_norm": 7.04293625200489, |
|
"learning_rate": 1.5726072607260725e-06, |
|
"loss": -0.1209, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.2391521695660868, |
|
"grad_norm": 4.3791375350104165, |
|
"learning_rate": 1.5709570957095707e-06, |
|
"loss": -0.0704, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.23995200959808038, |
|
"grad_norm": 4.4299869604327835, |
|
"learning_rate": 1.5693069306930694e-06, |
|
"loss": 0.0578, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24075184963007398, |
|
"grad_norm": 5.721847612449816, |
|
"learning_rate": 1.5676567656765676e-06, |
|
"loss": 0.0124, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.24155168966206758, |
|
"grad_norm": 4.346519849517093, |
|
"learning_rate": 1.5660066006600658e-06, |
|
"loss": -0.0676, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.24235152969406118, |
|
"grad_norm": 4.166900068739509, |
|
"learning_rate": 1.5643564356435643e-06, |
|
"loss": -0.0379, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.24315136972605478, |
|
"grad_norm": 4.171740126126224, |
|
"learning_rate": 1.5627062706270627e-06, |
|
"loss": -0.0369, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.2439512097580484, |
|
"grad_norm": 4.571373866809776, |
|
"learning_rate": 1.561056105610561e-06, |
|
"loss": -0.0423, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.244751049790042, |
|
"grad_norm": 4.687528076087793, |
|
"learning_rate": 1.5594059405940594e-06, |
|
"loss": -0.0427, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.2455508898220356, |
|
"grad_norm": 4.099266935733802, |
|
"learning_rate": 1.5577557755775576e-06, |
|
"loss": -0.1168, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.2463507298540292, |
|
"grad_norm": 4.76705500925925, |
|
"learning_rate": 1.556105610561056e-06, |
|
"loss": -0.0726, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.2471505698860228, |
|
"grad_norm": 7.726027050692815, |
|
"learning_rate": 1.5544554455445545e-06, |
|
"loss": -0.0858, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.2479504099180164, |
|
"grad_norm": 4.588817621333979, |
|
"learning_rate": 1.5528052805280527e-06, |
|
"loss": 0.0889, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.24875024995001, |
|
"grad_norm": 5.351566242300243, |
|
"learning_rate": 1.551155115511551e-06, |
|
"loss": -0.143, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.2495500899820036, |
|
"grad_norm": 6.279051438632601, |
|
"learning_rate": 1.5495049504950496e-06, |
|
"loss": 0.0312, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.2503499300139972, |
|
"grad_norm": 4.251123392069971, |
|
"learning_rate": 1.5478547854785478e-06, |
|
"loss": -0.0477, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.2511497700459908, |
|
"grad_norm": 4.255617580398947, |
|
"learning_rate": 1.546204620462046e-06, |
|
"loss": -0.1445, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.2519496100779844, |
|
"grad_norm": 3.979778076387235, |
|
"learning_rate": 1.5445544554455445e-06, |
|
"loss": -0.0937, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.252749450109978, |
|
"grad_norm": 5.547095237980292, |
|
"learning_rate": 1.5429042904290427e-06, |
|
"loss": -0.0091, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.2535492901419716, |
|
"grad_norm": 5.863554498962612, |
|
"learning_rate": 1.5412541254125414e-06, |
|
"loss": -0.0883, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.2543491301739652, |
|
"grad_norm": 3.9832799266815533, |
|
"learning_rate": 1.5396039603960396e-06, |
|
"loss": -0.1173, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.2551489702059588, |
|
"grad_norm": 4.961222194402448, |
|
"learning_rate": 1.5379537953795378e-06, |
|
"loss": -0.0647, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.2559488102379524, |
|
"grad_norm": 2.7901444246654945, |
|
"learning_rate": 1.5363036303630363e-06, |
|
"loss": -0.0873, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.25674865026994603, |
|
"grad_norm": 4.6616454131308265, |
|
"learning_rate": 1.5346534653465347e-06, |
|
"loss": -0.1283, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.2575484903019396, |
|
"grad_norm": 4.32603696177896, |
|
"learning_rate": 1.533003300330033e-06, |
|
"loss": -0.0748, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.25834833033393323, |
|
"grad_norm": 4.653928241866685, |
|
"learning_rate": 1.5313531353135314e-06, |
|
"loss": -0.1215, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.2591481703659268, |
|
"grad_norm": 4.476046494175142, |
|
"learning_rate": 1.5297029702970296e-06, |
|
"loss": -0.0247, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.2599480103979204, |
|
"grad_norm": 5.41605277862076, |
|
"learning_rate": 1.528052805280528e-06, |
|
"loss": 0.0055, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.260747850429914, |
|
"grad_norm": 7.359939974664472, |
|
"learning_rate": 1.5264026402640265e-06, |
|
"loss": -0.0994, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.2615476904619076, |
|
"grad_norm": 3.7953460503418794, |
|
"learning_rate": 1.5247524752475247e-06, |
|
"loss": 0.0777, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.2623475304939012, |
|
"grad_norm": 4.375620334787856, |
|
"learning_rate": 1.523102310231023e-06, |
|
"loss": -0.1532, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.2631473705258948, |
|
"grad_norm": 5.690054518936246, |
|
"learning_rate": 1.5214521452145214e-06, |
|
"loss": -0.0744, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.26394721055788845, |
|
"grad_norm": 4.095859129867475, |
|
"learning_rate": 1.5198019801980198e-06, |
|
"loss": -0.1342, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.264747050589882, |
|
"grad_norm": 5.261928086906211, |
|
"learning_rate": 1.518151815181518e-06, |
|
"loss": -0.0327, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.26554689062187564, |
|
"grad_norm": 3.723958703243353, |
|
"learning_rate": 1.5165016501650165e-06, |
|
"loss": -0.0919, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.2663467306538692, |
|
"grad_norm": 7.064342427249925, |
|
"learning_rate": 1.5148514851485147e-06, |
|
"loss": -0.3055, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.26714657068586284, |
|
"grad_norm": 5.094162064249706, |
|
"learning_rate": 1.5132013201320131e-06, |
|
"loss": -0.1551, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.2679464107178564, |
|
"grad_norm": 5.182464177568643, |
|
"learning_rate": 1.5115511551155116e-06, |
|
"loss": -0.1656, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.26874625074985004, |
|
"grad_norm": 4.205631232130195, |
|
"learning_rate": 1.5099009900990098e-06, |
|
"loss": -0.1339, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.2695460907818436, |
|
"grad_norm": 3.4595951551287234, |
|
"learning_rate": 1.508250825082508e-06, |
|
"loss": -0.0259, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.27034593081383723, |
|
"grad_norm": 4.040524953973991, |
|
"learning_rate": 1.5066006600660067e-06, |
|
"loss": 0.0424, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.27114577084583086, |
|
"grad_norm": 3.3792775209230044, |
|
"learning_rate": 1.504950495049505e-06, |
|
"loss": -0.0252, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.27194561087782443, |
|
"grad_norm": 5.329937489556339, |
|
"learning_rate": 1.5033003300330032e-06, |
|
"loss": -0.1064, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.27274545090981805, |
|
"grad_norm": 3.8366500907383, |
|
"learning_rate": 1.5016501650165016e-06, |
|
"loss": -0.1327, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.2735452909418116, |
|
"grad_norm": 3.4211397121327334, |
|
"learning_rate": 1.5e-06, |
|
"loss": -0.0469, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.27434513097380525, |
|
"grad_norm": 7.190396728605877, |
|
"learning_rate": 1.4983498349834983e-06, |
|
"loss": -0.0455, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.2751449710057988, |
|
"grad_norm": 5.208941899667468, |
|
"learning_rate": 1.4966996699669967e-06, |
|
"loss": -0.0118, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.27594481103779245, |
|
"grad_norm": 5.0666099160345635, |
|
"learning_rate": 1.495049504950495e-06, |
|
"loss": -0.08, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.276744651069786, |
|
"grad_norm": 3.445240945570377, |
|
"learning_rate": 1.4933993399339932e-06, |
|
"loss": 0.0043, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.27754449110177964, |
|
"grad_norm": 6.719396089938185, |
|
"learning_rate": 1.4917491749174918e-06, |
|
"loss": -0.0528, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.27834433113377327, |
|
"grad_norm": 4.948551220275233, |
|
"learning_rate": 1.49009900990099e-06, |
|
"loss": -0.0351, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.27914417116576684, |
|
"grad_norm": 4.198757242081244, |
|
"learning_rate": 1.4884488448844883e-06, |
|
"loss": -0.1767, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.27994401119776047, |
|
"grad_norm": 4.020517893591624, |
|
"learning_rate": 1.4867986798679867e-06, |
|
"loss": -0.0777, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.28074385122975404, |
|
"grad_norm": 7.665385125345826, |
|
"learning_rate": 1.4851485148514852e-06, |
|
"loss": -0.0675, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.28154369126174766, |
|
"grad_norm": 4.359035902610134, |
|
"learning_rate": 1.4834983498349834e-06, |
|
"loss": 0.1022, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.28234353129374123, |
|
"grad_norm": 4.515833866344382, |
|
"learning_rate": 1.4818481848184818e-06, |
|
"loss": -0.1318, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.28314337132573486, |
|
"grad_norm": 10.48643307447715, |
|
"learning_rate": 1.48019801980198e-06, |
|
"loss": -0.0624, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.28394321135772843, |
|
"grad_norm": 3.9055137245563167, |
|
"learning_rate": 1.4785478547854785e-06, |
|
"loss": 0.0131, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.28474305138972206, |
|
"grad_norm": 5.064555563223541, |
|
"learning_rate": 1.476897689768977e-06, |
|
"loss": -0.0848, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.2855428914217157, |
|
"grad_norm": 6.403904331900866, |
|
"learning_rate": 1.4752475247524752e-06, |
|
"loss": -0.1231, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.28634273145370925, |
|
"grad_norm": 4.4680198659839405, |
|
"learning_rate": 1.4735973597359734e-06, |
|
"loss": -0.0751, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.2871425714857029, |
|
"grad_norm": 7.88048544071049, |
|
"learning_rate": 1.4719471947194718e-06, |
|
"loss": -0.0111, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.28794241151769645, |
|
"grad_norm": 4.041245481168213, |
|
"learning_rate": 1.4702970297029703e-06, |
|
"loss": -0.0219, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.2887422515496901, |
|
"grad_norm": 3.2378522821181748, |
|
"learning_rate": 1.4686468646864685e-06, |
|
"loss": 0.0154, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.28954209158168365, |
|
"grad_norm": 5.187324980575399, |
|
"learning_rate": 1.466996699669967e-06, |
|
"loss": 0.0768, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.2903419316136773, |
|
"grad_norm": 3.892629574264858, |
|
"learning_rate": 1.4653465346534652e-06, |
|
"loss": -0.1757, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.29114177164567084, |
|
"grad_norm": 4.66291997005039, |
|
"learning_rate": 1.4636963696369636e-06, |
|
"loss": -0.0897, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.29194161167766447, |
|
"grad_norm": 4.490266870328807, |
|
"learning_rate": 1.462046204620462e-06, |
|
"loss": -0.1638, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.2927414517096581, |
|
"grad_norm": 7.248644471878, |
|
"learning_rate": 1.4603960396039603e-06, |
|
"loss": -0.1413, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.29354129174165167, |
|
"grad_norm": 13.65496051906939, |
|
"learning_rate": 1.4587458745874585e-06, |
|
"loss": -0.0144, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.2943411317736453, |
|
"grad_norm": 3.0336626027850593, |
|
"learning_rate": 1.4570957095709572e-06, |
|
"loss": -0.008, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.29514097180563886, |
|
"grad_norm": 7.873854054225054, |
|
"learning_rate": 1.4554455445544554e-06, |
|
"loss": 0.1034, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.2959408118376325, |
|
"grad_norm": 3.727594324731175, |
|
"learning_rate": 1.4537953795379538e-06, |
|
"loss": -0.1401, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.29674065186962606, |
|
"grad_norm": 5.229701446706082, |
|
"learning_rate": 1.452145214521452e-06, |
|
"loss": -0.1203, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.2975404919016197, |
|
"grad_norm": 4.492128268970922, |
|
"learning_rate": 1.4504950495049503e-06, |
|
"loss": 0.0004, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.29834033193361326, |
|
"grad_norm": 5.077090301738471, |
|
"learning_rate": 1.448844884488449e-06, |
|
"loss": 0.0219, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.2991401719656069, |
|
"grad_norm": 4.796744776644939, |
|
"learning_rate": 1.4471947194719472e-06, |
|
"loss": -0.0064, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.29994001199760045, |
|
"grad_norm": 7.42447528462134, |
|
"learning_rate": 1.4455445544554454e-06, |
|
"loss": 0.07, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.3007398520295941, |
|
"grad_norm": 3.848638759590051, |
|
"learning_rate": 1.4438943894389438e-06, |
|
"loss": -0.0777, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.3015396920615877, |
|
"grad_norm": 4.256980996790008, |
|
"learning_rate": 1.4422442244224423e-06, |
|
"loss": -0.1766, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.3023395320935813, |
|
"grad_norm": 3.961327287203466, |
|
"learning_rate": 1.4405940594059405e-06, |
|
"loss": -0.0571, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.3031393721255749, |
|
"grad_norm": 5.478690567895318, |
|
"learning_rate": 1.438943894389439e-06, |
|
"loss": 0.0013, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.3039392121575685, |
|
"grad_norm": 3.8685538296119106, |
|
"learning_rate": 1.4372937293729372e-06, |
|
"loss": -0.0135, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3047390521895621, |
|
"grad_norm": 3.712350805091167, |
|
"learning_rate": 1.4356435643564356e-06, |
|
"loss": -0.0965, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.30553889222155567, |
|
"grad_norm": 4.12545866294737, |
|
"learning_rate": 1.433993399339934e-06, |
|
"loss": 0.0192, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.3063387322535493, |
|
"grad_norm": 3.9826126090375085, |
|
"learning_rate": 1.4323432343234323e-06, |
|
"loss": 0.0096, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.30713857228554287, |
|
"grad_norm": 5.253969236088526, |
|
"learning_rate": 1.4306930693069305e-06, |
|
"loss": 0.0596, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.3079384123175365, |
|
"grad_norm": 4.369221167744991, |
|
"learning_rate": 1.4290429042904292e-06, |
|
"loss": -0.0586, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.3087382523495301, |
|
"grad_norm": 3.386456014084215, |
|
"learning_rate": 1.4273927392739274e-06, |
|
"loss": -0.1952, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.3095380923815237, |
|
"grad_norm": 4.175162288229841, |
|
"learning_rate": 1.4257425742574256e-06, |
|
"loss": -0.1559, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.3103379324135173, |
|
"grad_norm": 4.07269720996871, |
|
"learning_rate": 1.424092409240924e-06, |
|
"loss": -0.0591, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.3111377724455109, |
|
"grad_norm": 3.873233515579836, |
|
"learning_rate": 1.4224422442244223e-06, |
|
"loss": -0.0649, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.3119376124775045, |
|
"grad_norm": 5.33165026969968, |
|
"learning_rate": 1.4207920792079207e-06, |
|
"loss": -0.0568, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3127374525094981, |
|
"grad_norm": 5.644618937197355, |
|
"learning_rate": 1.4191419141914192e-06, |
|
"loss": -0.0425, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.3135372925414917, |
|
"grad_norm": 4.609038777130941, |
|
"learning_rate": 1.4174917491749174e-06, |
|
"loss": -0.0991, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.3143371325734853, |
|
"grad_norm": 5.362814464107483, |
|
"learning_rate": 1.4158415841584156e-06, |
|
"loss": -0.0377, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.3151369726054789, |
|
"grad_norm": 4.1100020129716315, |
|
"learning_rate": 1.4141914191419143e-06, |
|
"loss": -0.0176, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.31593681263747253, |
|
"grad_norm": 3.6462471572713198, |
|
"learning_rate": 1.4125412541254125e-06, |
|
"loss": 0.0183, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.3167366526694661, |
|
"grad_norm": 3.425535847868438, |
|
"learning_rate": 1.4108910891089107e-06, |
|
"loss": -0.1166, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.3175364927014597, |
|
"grad_norm": 4.023065583159361, |
|
"learning_rate": 1.4092409240924092e-06, |
|
"loss": -0.069, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.3183363327334533, |
|
"grad_norm": 4.435192529053884, |
|
"learning_rate": 1.4075907590759076e-06, |
|
"loss": -0.1024, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.3191361727654469, |
|
"grad_norm": 4.351874787170239, |
|
"learning_rate": 1.4059405940594058e-06, |
|
"loss": -0.1381, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.3199360127974405, |
|
"grad_norm": 5.114118048590294, |
|
"learning_rate": 1.4042904290429043e-06, |
|
"loss": -0.027, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3207358528294341, |
|
"grad_norm": 6.62264310550409, |
|
"learning_rate": 1.4026402640264025e-06, |
|
"loss": -0.1372, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.3215356928614277, |
|
"grad_norm": 5.515472496453124, |
|
"learning_rate": 1.4009900990099007e-06, |
|
"loss": 0.0028, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.3223355328934213, |
|
"grad_norm": 5.43524070368167, |
|
"learning_rate": 1.3993399339933994e-06, |
|
"loss": -0.1192, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.32313537292541494, |
|
"grad_norm": 4.309916510249054, |
|
"learning_rate": 1.3976897689768976e-06, |
|
"loss": -0.049, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.3239352129574085, |
|
"grad_norm": 4.392826058059571, |
|
"learning_rate": 1.3960396039603959e-06, |
|
"loss": -0.1248, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.32473505298940214, |
|
"grad_norm": 5.384606404349416, |
|
"learning_rate": 1.3943894389438943e-06, |
|
"loss": -0.0248, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.3255348930213957, |
|
"grad_norm": 5.369884451931867, |
|
"learning_rate": 1.3927392739273927e-06, |
|
"loss": 0.0453, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.32633473305338934, |
|
"grad_norm": 3.799887635426884, |
|
"learning_rate": 1.391089108910891e-06, |
|
"loss": 0.0924, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.3271345730853829, |
|
"grad_norm": 5.151153079821819, |
|
"learning_rate": 1.3894389438943894e-06, |
|
"loss": -0.1524, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.32793441311737653, |
|
"grad_norm": 4.9429474730234935, |
|
"learning_rate": 1.3877887788778876e-06, |
|
"loss": -0.0066, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3287342531493701, |
|
"grad_norm": 3.8669767688401637, |
|
"learning_rate": 1.386138613861386e-06, |
|
"loss": -0.0998, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.32953409318136373, |
|
"grad_norm": 4.1249285605053165, |
|
"learning_rate": 1.3844884488448845e-06, |
|
"loss": -0.1198, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.33033393321335736, |
|
"grad_norm": 4.264021911092433, |
|
"learning_rate": 1.3828382838283827e-06, |
|
"loss": -0.027, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.3311337732453509, |
|
"grad_norm": 8.192155984704781, |
|
"learning_rate": 1.381188118811881e-06, |
|
"loss": 0.0037, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.33193361327734455, |
|
"grad_norm": 4.842071045333458, |
|
"learning_rate": 1.3795379537953794e-06, |
|
"loss": -0.0183, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.3327334533093381, |
|
"grad_norm": 5.69008602834876, |
|
"learning_rate": 1.3778877887788779e-06, |
|
"loss": -0.037, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.33353329334133175, |
|
"grad_norm": 3.4506588237689044, |
|
"learning_rate": 1.376237623762376e-06, |
|
"loss": -0.1827, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.3343331333733253, |
|
"grad_norm": 4.276859677588479, |
|
"learning_rate": 1.3745874587458745e-06, |
|
"loss": -0.1397, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.33513297340531895, |
|
"grad_norm": 5.137955642134524, |
|
"learning_rate": 1.3729372937293728e-06, |
|
"loss": 0.0313, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.3359328134373125, |
|
"grad_norm": 5.626427193889533, |
|
"learning_rate": 1.3712871287128714e-06, |
|
"loss": 0.0585, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.33673265346930614, |
|
"grad_norm": 4.259015114708382, |
|
"learning_rate": 1.3696369636963696e-06, |
|
"loss": -0.1352, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.3375324935012997, |
|
"grad_norm": 3.900501996524311, |
|
"learning_rate": 1.3679867986798679e-06, |
|
"loss": -0.1541, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.33833233353329334, |
|
"grad_norm": 21.342155698599925, |
|
"learning_rate": 1.3663366336633663e-06, |
|
"loss": 0.0163, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.33913217356528697, |
|
"grad_norm": 9.376314198251674, |
|
"learning_rate": 1.3646864686468647e-06, |
|
"loss": -0.1147, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.33993201359728054, |
|
"grad_norm": 3.9556694436435773, |
|
"learning_rate": 1.363036303630363e-06, |
|
"loss": 0.0607, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.34073185362927416, |
|
"grad_norm": 4.413407376716041, |
|
"learning_rate": 1.3613861386138614e-06, |
|
"loss": 0.0269, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.34153169366126773, |
|
"grad_norm": 4.5745629523971285, |
|
"learning_rate": 1.3597359735973596e-06, |
|
"loss": -0.1232, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.34233153369326136, |
|
"grad_norm": 6.482169711595175, |
|
"learning_rate": 1.3580858085808579e-06, |
|
"loss": -0.0933, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.34313137372525493, |
|
"grad_norm": 4.614948794989073, |
|
"learning_rate": 1.3564356435643565e-06, |
|
"loss": -0.0881, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.34393121375724856, |
|
"grad_norm": 4.902443350581836, |
|
"learning_rate": 1.3547854785478547e-06, |
|
"loss": -0.0492, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3447310537892421, |
|
"grad_norm": 4.293832374460016, |
|
"learning_rate": 1.353135313531353e-06, |
|
"loss": -0.0988, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.34553089382123575, |
|
"grad_norm": 4.239300667652253, |
|
"learning_rate": 1.3514851485148514e-06, |
|
"loss": -0.0395, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.3463307338532294, |
|
"grad_norm": 6.000658634911202, |
|
"learning_rate": 1.3498349834983499e-06, |
|
"loss": -0.0687, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.34713057388522295, |
|
"grad_norm": 4.533327665512432, |
|
"learning_rate": 1.348184818481848e-06, |
|
"loss": -0.0457, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.3479304139172166, |
|
"grad_norm": 4.966203144811649, |
|
"learning_rate": 1.3465346534653465e-06, |
|
"loss": -0.0891, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.34873025394921014, |
|
"grad_norm": 3.160979702375991, |
|
"learning_rate": 1.3448844884488448e-06, |
|
"loss": 0.0509, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.34953009398120377, |
|
"grad_norm": 7.163626654459487, |
|
"learning_rate": 1.3432343234323432e-06, |
|
"loss": -0.1026, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.35032993401319734, |
|
"grad_norm": 4.10929586240042, |
|
"learning_rate": 1.3415841584158416e-06, |
|
"loss": -0.0346, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.35112977404519097, |
|
"grad_norm": 7.444864169509166, |
|
"learning_rate": 1.3399339933993399e-06, |
|
"loss": -0.084, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.35192961407718454, |
|
"grad_norm": 4.279436158804133, |
|
"learning_rate": 1.338283828382838e-06, |
|
"loss": -0.0753, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.35272945410917816, |
|
"grad_norm": 7.0310221317242965, |
|
"learning_rate": 1.3366336633663367e-06, |
|
"loss": 0.0822, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.3535292941411718, |
|
"grad_norm": 3.546380500099962, |
|
"learning_rate": 1.334983498349835e-06, |
|
"loss": -0.0826, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.35432913417316536, |
|
"grad_norm": 3.978575910618056, |
|
"learning_rate": 1.3333333333333332e-06, |
|
"loss": -0.0183, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.355128974205159, |
|
"grad_norm": 4.893702894351932, |
|
"learning_rate": 1.3316831683168316e-06, |
|
"loss": 0.0513, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.35592881423715256, |
|
"grad_norm": 4.712476792012751, |
|
"learning_rate": 1.3300330033003299e-06, |
|
"loss": 0.0161, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.3567286542691462, |
|
"grad_norm": 4.363095681693482, |
|
"learning_rate": 1.3283828382838283e-06, |
|
"loss": -0.0878, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.35752849430113975, |
|
"grad_norm": 3.6779713769559206, |
|
"learning_rate": 1.3267326732673268e-06, |
|
"loss": -0.0884, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.3583283343331334, |
|
"grad_norm": 4.691244638726057, |
|
"learning_rate": 1.325082508250825e-06, |
|
"loss": 0.0164, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.35912817436512695, |
|
"grad_norm": 3.9918624835208574, |
|
"learning_rate": 1.3234323432343232e-06, |
|
"loss": -0.0623, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.3599280143971206, |
|
"grad_norm": 4.3423857158760475, |
|
"learning_rate": 1.3217821782178219e-06, |
|
"loss": -0.0166, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.3607278544291142, |
|
"grad_norm": 3.3557272335230266, |
|
"learning_rate": 1.32013201320132e-06, |
|
"loss": -0.1243, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.3615276944611078, |
|
"grad_norm": 4.121010209091045, |
|
"learning_rate": 1.3184818481848183e-06, |
|
"loss": 0.0157, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.3623275344931014, |
|
"grad_norm": 5.61777014754645, |
|
"learning_rate": 1.3168316831683168e-06, |
|
"loss": 0.0009, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.36312737452509497, |
|
"grad_norm": 3.9497241442966673, |
|
"learning_rate": 1.3151815181518152e-06, |
|
"loss": -0.0748, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.3639272145570886, |
|
"grad_norm": 3.78165099484685, |
|
"learning_rate": 1.3135313531353134e-06, |
|
"loss": 0.0137, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.36472705458908217, |
|
"grad_norm": 3.8365555088656573, |
|
"learning_rate": 1.3118811881188119e-06, |
|
"loss": 0.0, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.3655268946210758, |
|
"grad_norm": 3.9613296946642933, |
|
"learning_rate": 1.31023102310231e-06, |
|
"loss": -0.068, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.36632673465306936, |
|
"grad_norm": 3.558717962079936, |
|
"learning_rate": 1.3085808580858083e-06, |
|
"loss": -0.1112, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.367126574685063, |
|
"grad_norm": 4.93902023669042, |
|
"learning_rate": 1.306930693069307e-06, |
|
"loss": -0.0433, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.3679264147170566, |
|
"grad_norm": 4.69421251966819, |
|
"learning_rate": 1.3052805280528052e-06, |
|
"loss": 0.0994, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3687262547490502, |
|
"grad_norm": 6.3721851791610336, |
|
"learning_rate": 1.3036303630363034e-06, |
|
"loss": -0.1078, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.3695260947810438, |
|
"grad_norm": 3.389060929800596, |
|
"learning_rate": 1.3019801980198019e-06, |
|
"loss": -0.088, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.3703259348130374, |
|
"grad_norm": 5.748513070947605, |
|
"learning_rate": 1.3003300330033003e-06, |
|
"loss": -0.029, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.371125774845031, |
|
"grad_norm": 4.690511727792042, |
|
"learning_rate": 1.2986798679867985e-06, |
|
"loss": -0.0756, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.3719256148770246, |
|
"grad_norm": 5.217441052047622, |
|
"learning_rate": 1.297029702970297e-06, |
|
"loss": -0.0748, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3727254549090182, |
|
"grad_norm": 4.240980113487688, |
|
"learning_rate": 1.2953795379537952e-06, |
|
"loss": -0.0008, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.3735252949410118, |
|
"grad_norm": 4.743889341456478, |
|
"learning_rate": 1.2937293729372937e-06, |
|
"loss": -0.0671, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.3743251349730054, |
|
"grad_norm": 4.473362389672442, |
|
"learning_rate": 1.292079207920792e-06, |
|
"loss": 0.0101, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.375124975004999, |
|
"grad_norm": 4.197750015674087, |
|
"learning_rate": 1.2904290429042903e-06, |
|
"loss": 0.0175, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.3759248150369926, |
|
"grad_norm": 6.425414954415456, |
|
"learning_rate": 1.2887788778877888e-06, |
|
"loss": -0.0783, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3767246550689862, |
|
"grad_norm": 2.9864850798252855, |
|
"learning_rate": 1.2871287128712872e-06, |
|
"loss": -0.0884, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.3775244951009798, |
|
"grad_norm": 5.261385424958508, |
|
"learning_rate": 1.2854785478547854e-06, |
|
"loss": -0.09, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.3783243351329734, |
|
"grad_norm": 4.958045993888585, |
|
"learning_rate": 1.2838283828382839e-06, |
|
"loss": -0.0682, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.379124175164967, |
|
"grad_norm": 4.7190018138263605, |
|
"learning_rate": 1.282178217821782e-06, |
|
"loss": -0.1395, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.3799240151969606, |
|
"grad_norm": 5.882864051380202, |
|
"learning_rate": 1.2805280528052803e-06, |
|
"loss": -0.1829, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.3807238552289542, |
|
"grad_norm": 3.7556665205378352, |
|
"learning_rate": 1.278877887788779e-06, |
|
"loss": -0.021, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.3815236952609478, |
|
"grad_norm": 4.086321431606577, |
|
"learning_rate": 1.2772277227722772e-06, |
|
"loss": -0.0382, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.3823235352929414, |
|
"grad_norm": 4.616776862820448, |
|
"learning_rate": 1.2755775577557754e-06, |
|
"loss": -0.1779, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.383123375324935, |
|
"grad_norm": 4.004332580198827, |
|
"learning_rate": 1.2739273927392739e-06, |
|
"loss": -0.0252, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.38392321535692864, |
|
"grad_norm": 4.624789258949781, |
|
"learning_rate": 1.2722772277227723e-06, |
|
"loss": -0.0274, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.3847230553889222, |
|
"grad_norm": 4.107644532644881, |
|
"learning_rate": 1.2706270627062705e-06, |
|
"loss": -0.0706, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.38552289542091583, |
|
"grad_norm": 5.606536912327608, |
|
"learning_rate": 1.268976897689769e-06, |
|
"loss": -0.1579, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.3863227354529094, |
|
"grad_norm": 3.661768864377637, |
|
"learning_rate": 1.2673267326732672e-06, |
|
"loss": -0.0483, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.38712257548490303, |
|
"grad_norm": 4.163789722318428, |
|
"learning_rate": 1.2656765676567657e-06, |
|
"loss": -0.1628, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.3879224155168966, |
|
"grad_norm": 5.862521290689618, |
|
"learning_rate": 1.264026402640264e-06, |
|
"loss": -0.0378, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.3887222555488902, |
|
"grad_norm": 4.451191371926914, |
|
"learning_rate": 1.2623762376237623e-06, |
|
"loss": 0.041, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.3895220955808838, |
|
"grad_norm": 6.554041470323983, |
|
"learning_rate": 1.2607260726072606e-06, |
|
"loss": -0.0089, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.3903219356128774, |
|
"grad_norm": 4.958459911280161, |
|
"learning_rate": 1.259075907590759e-06, |
|
"loss": -0.0351, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.39112177564487105, |
|
"grad_norm": 5.5754285433841595, |
|
"learning_rate": 1.2574257425742574e-06, |
|
"loss": -0.0866, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.3919216156768646, |
|
"grad_norm": 4.927561354349523, |
|
"learning_rate": 1.2557755775577557e-06, |
|
"loss": 0.0114, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.39272145570885825, |
|
"grad_norm": 4.275369657183623, |
|
"learning_rate": 1.2541254125412541e-06, |
|
"loss": 0.0731, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.3935212957408518, |
|
"grad_norm": 4.553288397020381, |
|
"learning_rate": 1.2524752475247523e-06, |
|
"loss": -0.0366, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.39432113577284544, |
|
"grad_norm": 4.3640356820358415, |
|
"learning_rate": 1.2508250825082508e-06, |
|
"loss": -0.026, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.395120975804839, |
|
"grad_norm": 6.781778763227194, |
|
"learning_rate": 1.2491749174917492e-06, |
|
"loss": 0.0272, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.39592081583683264, |
|
"grad_norm": 4.147600624744722, |
|
"learning_rate": 1.2475247524752474e-06, |
|
"loss": -0.0533, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.3967206558688262, |
|
"grad_norm": 7.925587764087279, |
|
"learning_rate": 1.2458745874587457e-06, |
|
"loss": -0.0023, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.39752049590081984, |
|
"grad_norm": 3.9471683782785267, |
|
"learning_rate": 1.2442244224422443e-06, |
|
"loss": -0.0624, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.39832033593281346, |
|
"grad_norm": 5.046330000323796, |
|
"learning_rate": 1.2425742574257426e-06, |
|
"loss": -0.1152, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.39912017596480703, |
|
"grad_norm": 3.797212185428219, |
|
"learning_rate": 1.2409240924092408e-06, |
|
"loss": -0.0566, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.39992001599680066, |
|
"grad_norm": 5.219397955775355, |
|
"learning_rate": 1.2392739273927392e-06, |
|
"loss": -0.0473, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.40071985602879423, |
|
"grad_norm": 4.888043487068187, |
|
"learning_rate": 1.2376237623762375e-06, |
|
"loss": -0.0766, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.40151969606078786, |
|
"grad_norm": 4.812490488018197, |
|
"learning_rate": 1.2359735973597359e-06, |
|
"loss": -0.0063, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.4023195360927814, |
|
"grad_norm": 4.137421603194797, |
|
"learning_rate": 1.2343234323432343e-06, |
|
"loss": -0.0894, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.40311937612477505, |
|
"grad_norm": 4.446466976487968, |
|
"learning_rate": 1.2326732673267326e-06, |
|
"loss": -0.1032, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.4039192161567686, |
|
"grad_norm": 4.517828908408806, |
|
"learning_rate": 1.2310231023102308e-06, |
|
"loss": -0.0366, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.40471905618876225, |
|
"grad_norm": 4.025363379714323, |
|
"learning_rate": 1.2293729372937294e-06, |
|
"loss": -0.0322, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.4055188962207559, |
|
"grad_norm": 3.0065985273378026, |
|
"learning_rate": 1.2277227722772277e-06, |
|
"loss": -0.1451, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.40631873625274945, |
|
"grad_norm": 6.102992165053075, |
|
"learning_rate": 1.226072607260726e-06, |
|
"loss": -0.0947, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.4071185762847431, |
|
"grad_norm": 5.373085923158729, |
|
"learning_rate": 1.2244224422442243e-06, |
|
"loss": -0.038, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.40791841631673664, |
|
"grad_norm": 6.507842394701745, |
|
"learning_rate": 1.2227722772277228e-06, |
|
"loss": -0.0416, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.40871825634873027, |
|
"grad_norm": 6.211682775156014, |
|
"learning_rate": 1.221122112211221e-06, |
|
"loss": -0.0155, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.40951809638072384, |
|
"grad_norm": 3.253438304578216, |
|
"learning_rate": 1.2194719471947194e-06, |
|
"loss": -0.0971, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.41031793641271747, |
|
"grad_norm": 3.562574820820311, |
|
"learning_rate": 1.2178217821782177e-06, |
|
"loss": -0.0982, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.41111777644471104, |
|
"grad_norm": 3.5746182911507067, |
|
"learning_rate": 1.216171617161716e-06, |
|
"loss": -0.023, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.41191761647670466, |
|
"grad_norm": 3.488381447372906, |
|
"learning_rate": 1.2145214521452146e-06, |
|
"loss": 0.1215, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.41271745650869823, |
|
"grad_norm": 3.1641673883077788, |
|
"learning_rate": 1.2128712871287128e-06, |
|
"loss": -0.0269, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.41351729654069186, |
|
"grad_norm": 4.13780306256476, |
|
"learning_rate": 1.211221122112211e-06, |
|
"loss": -0.08, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.4143171365726855, |
|
"grad_norm": 5.23630954806011, |
|
"learning_rate": 1.2095709570957095e-06, |
|
"loss": 0.0683, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.41511697660467906, |
|
"grad_norm": 5.08041039318514, |
|
"learning_rate": 1.207920792079208e-06, |
|
"loss": -0.0634, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.4159168166366727, |
|
"grad_norm": 4.328106931793288, |
|
"learning_rate": 1.2062706270627063e-06, |
|
"loss": -0.1251, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.41671665666866625, |
|
"grad_norm": 3.8685318893058978, |
|
"learning_rate": 1.2046204620462046e-06, |
|
"loss": -0.0397, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.4175164967006599, |
|
"grad_norm": 4.228873306898751, |
|
"learning_rate": 1.2029702970297028e-06, |
|
"loss": -0.0097, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.41831633673265345, |
|
"grad_norm": 8.545101114191558, |
|
"learning_rate": 1.2013201320132014e-06, |
|
"loss": 0.0257, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.4191161767646471, |
|
"grad_norm": 4.940963303084406, |
|
"learning_rate": 1.1996699669966997e-06, |
|
"loss": -0.0218, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.41991601679664065, |
|
"grad_norm": 4.486186548838174, |
|
"learning_rate": 1.198019801980198e-06, |
|
"loss": -0.1274, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.42071585682863427, |
|
"grad_norm": 6.117424152809813, |
|
"learning_rate": 1.1963696369636963e-06, |
|
"loss": -0.0412, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.4215156968606279, |
|
"grad_norm": 6.026088581435606, |
|
"learning_rate": 1.1947194719471948e-06, |
|
"loss": -0.1461, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.42231553689262147, |
|
"grad_norm": 4.391032244166686, |
|
"learning_rate": 1.193069306930693e-06, |
|
"loss": 0.0956, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.4231153769246151, |
|
"grad_norm": 5.139889742785653, |
|
"learning_rate": 1.1914191419141915e-06, |
|
"loss": -0.0998, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.42391521695660866, |
|
"grad_norm": 5.63979191849408, |
|
"learning_rate": 1.1897689768976897e-06, |
|
"loss": -0.0672, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.4247150569886023, |
|
"grad_norm": 8.323803093358931, |
|
"learning_rate": 1.188118811881188e-06, |
|
"loss": -0.1988, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.42551489702059586, |
|
"grad_norm": 3.5224991117629263, |
|
"learning_rate": 1.1864686468646866e-06, |
|
"loss": -0.0976, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.4263147370525895, |
|
"grad_norm": 3.3222865799787407, |
|
"learning_rate": 1.1848184818481848e-06, |
|
"loss": -0.1625, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.42711457708458306, |
|
"grad_norm": 4.473219337166838, |
|
"learning_rate": 1.183168316831683e-06, |
|
"loss": -0.102, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.4279144171165767, |
|
"grad_norm": 4.255445918061684, |
|
"learning_rate": 1.1815181518151815e-06, |
|
"loss": 0.0347, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.4287142571485703, |
|
"grad_norm": 5.273596279438336, |
|
"learning_rate": 1.17986798679868e-06, |
|
"loss": -0.0233, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.4295140971805639, |
|
"grad_norm": 3.2928329204452167, |
|
"learning_rate": 1.1782178217821781e-06, |
|
"loss": -0.1795, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.4303139372125575, |
|
"grad_norm": 3.1943187707330676, |
|
"learning_rate": 1.1765676567656766e-06, |
|
"loss": -0.1193, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.4311137772445511, |
|
"grad_norm": 4.457107636902936, |
|
"learning_rate": 1.1749174917491748e-06, |
|
"loss": -0.0256, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.4319136172765447, |
|
"grad_norm": 4.508728040150466, |
|
"learning_rate": 1.1732673267326732e-06, |
|
"loss": -0.1272, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.4327134573085383, |
|
"grad_norm": 4.679176366336832, |
|
"learning_rate": 1.1716171617161717e-06, |
|
"loss": -0.0107, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.4335132973405319, |
|
"grad_norm": 4.572704243632147, |
|
"learning_rate": 1.16996699669967e-06, |
|
"loss": -0.0189, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.43431313737252547, |
|
"grad_norm": 3.823996049360206, |
|
"learning_rate": 1.1683168316831681e-06, |
|
"loss": 0.0071, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.4351129774045191, |
|
"grad_norm": 4.448417665137879, |
|
"learning_rate": 1.1666666666666668e-06, |
|
"loss": 0.0018, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.4359128174365127, |
|
"grad_norm": 3.7067784825161625, |
|
"learning_rate": 1.165016501650165e-06, |
|
"loss": -0.0643, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.4367126574685063, |
|
"grad_norm": 4.304960211061566, |
|
"learning_rate": 1.1633663366336632e-06, |
|
"loss": 0.0434, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.4375124975004999, |
|
"grad_norm": 4.809624741919171, |
|
"learning_rate": 1.1617161716171617e-06, |
|
"loss": -0.1175, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.4383123375324935, |
|
"grad_norm": 3.572402442577118, |
|
"learning_rate": 1.16006600660066e-06, |
|
"loss": 0.0641, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.4391121775644871, |
|
"grad_norm": 3.1323439706728173, |
|
"learning_rate": 1.1584158415841584e-06, |
|
"loss": -0.1315, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.4399120175964807, |
|
"grad_norm": 6.63310206919076, |
|
"learning_rate": 1.1567656765676568e-06, |
|
"loss": -0.1572, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.4407118576284743, |
|
"grad_norm": 5.7194336862922475, |
|
"learning_rate": 1.155115511551155e-06, |
|
"loss": -0.0498, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.4415116976604679, |
|
"grad_norm": 5.0355458371512976, |
|
"learning_rate": 1.1534653465346533e-06, |
|
"loss": -0.0343, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.4423115376924615, |
|
"grad_norm": 4.479813545297925, |
|
"learning_rate": 1.151815181518152e-06, |
|
"loss": -0.052, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.44311137772445514, |
|
"grad_norm": 5.515043665694904, |
|
"learning_rate": 1.1501650165016501e-06, |
|
"loss": -0.0962, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.4439112177564487, |
|
"grad_norm": 4.35349503345848, |
|
"learning_rate": 1.1485148514851484e-06, |
|
"loss": -0.1718, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.44471105778844233, |
|
"grad_norm": 4.324313582265245, |
|
"learning_rate": 1.1468646864686468e-06, |
|
"loss": -0.1059, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.4455108978204359, |
|
"grad_norm": 6.603357917371442, |
|
"learning_rate": 1.1452145214521452e-06, |
|
"loss": -0.0179, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.44631073785242953, |
|
"grad_norm": 4.731847308612818, |
|
"learning_rate": 1.1435643564356435e-06, |
|
"loss": -0.0285, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.4471105778844231, |
|
"grad_norm": 3.2387655192879, |
|
"learning_rate": 1.141914191419142e-06, |
|
"loss": 0.0157, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.4479104179164167, |
|
"grad_norm": 4.11046977381839, |
|
"learning_rate": 1.1402640264026401e-06, |
|
"loss": -0.0738, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.4487102579484103, |
|
"grad_norm": 4.7767170879491765, |
|
"learning_rate": 1.1386138613861384e-06, |
|
"loss": -0.1826, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.4495100979804039, |
|
"grad_norm": 4.712736226037487, |
|
"learning_rate": 1.136963696369637e-06, |
|
"loss": -0.0453, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.45030993801239755, |
|
"grad_norm": 5.2823586287855795, |
|
"learning_rate": 1.1353135313531353e-06, |
|
"loss": -0.0112, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.4511097780443911, |
|
"grad_norm": 4.313375854007458, |
|
"learning_rate": 1.1336633663366335e-06, |
|
"loss": -0.1529, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.45190961807638474, |
|
"grad_norm": 3.661113275988152, |
|
"learning_rate": 1.132013201320132e-06, |
|
"loss": 0.0182, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.4527094581083783, |
|
"grad_norm": 3.3641831166871015, |
|
"learning_rate": 1.1303630363036304e-06, |
|
"loss": -0.0967, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.45350929814037194, |
|
"grad_norm": 3.5187461731044634, |
|
"learning_rate": 1.1287128712871286e-06, |
|
"loss": -0.0115, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.4543091381723655, |
|
"grad_norm": 5.055238201441692, |
|
"learning_rate": 1.127062706270627e-06, |
|
"loss": -0.0701, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.45510897820435914, |
|
"grad_norm": 5.048998878882335, |
|
"learning_rate": 1.1254125412541253e-06, |
|
"loss": -0.0982, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.4559088182363527, |
|
"grad_norm": 6.706995417966311, |
|
"learning_rate": 1.123762376237624e-06, |
|
"loss": -0.0323, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.45670865826834633, |
|
"grad_norm": 5.717361031375047, |
|
"learning_rate": 1.1221122112211221e-06, |
|
"loss": -0.0251, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.4575084983003399, |
|
"grad_norm": 5.073568794377317, |
|
"learning_rate": 1.1204620462046204e-06, |
|
"loss": -0.2165, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.45830833833233353, |
|
"grad_norm": 4.340499430104141, |
|
"learning_rate": 1.1188118811881188e-06, |
|
"loss": -0.1008, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.45910817836432716, |
|
"grad_norm": 11.188522206922801, |
|
"learning_rate": 1.117161716171617e-06, |
|
"loss": -0.0362, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.45990801839632073, |
|
"grad_norm": 8.96889120914533, |
|
"learning_rate": 1.1155115511551155e-06, |
|
"loss": 0.0492, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.46070785842831435, |
|
"grad_norm": 4.7623433805729825, |
|
"learning_rate": 1.113861386138614e-06, |
|
"loss": -0.0462, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.4615076984603079, |
|
"grad_norm": 4.809552169223393, |
|
"learning_rate": 1.1122112211221121e-06, |
|
"loss": -0.0298, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.46230753849230155, |
|
"grad_norm": 4.199314588295882, |
|
"learning_rate": 1.1105610561056104e-06, |
|
"loss": -0.1501, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.4631073785242951, |
|
"grad_norm": 3.531450924178731, |
|
"learning_rate": 1.108910891089109e-06, |
|
"loss": -0.0589, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.46390721855628875, |
|
"grad_norm": 4.247443159328166, |
|
"learning_rate": 1.1072607260726073e-06, |
|
"loss": -0.0362, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4647070585882823, |
|
"grad_norm": 3.4845953217374346, |
|
"learning_rate": 1.1056105610561055e-06, |
|
"loss": -0.0171, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.46550689862027594, |
|
"grad_norm": 6.269891746353069, |
|
"learning_rate": 1.103960396039604e-06, |
|
"loss": -0.0552, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.46630673865226957, |
|
"grad_norm": 4.110925186237378, |
|
"learning_rate": 1.1023102310231024e-06, |
|
"loss": -0.1525, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.46710657868426314, |
|
"grad_norm": 4.027706037445169, |
|
"learning_rate": 1.1006600660066006e-06, |
|
"loss": -0.1283, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.46790641871625677, |
|
"grad_norm": 6.085436348609565, |
|
"learning_rate": 1.099009900990099e-06, |
|
"loss": 0.0152, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.46870625874825034, |
|
"grad_norm": 4.403688809188704, |
|
"learning_rate": 1.0973597359735973e-06, |
|
"loss": -0.0462, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.46950609878024396, |
|
"grad_norm": 3.7204758942669924, |
|
"learning_rate": 1.0957095709570955e-06, |
|
"loss": -0.0472, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.47030593881223753, |
|
"grad_norm": 4.16282077045935, |
|
"learning_rate": 1.0940594059405941e-06, |
|
"loss": -0.1098, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.47110577884423116, |
|
"grad_norm": 4.490618781800236, |
|
"learning_rate": 1.0924092409240924e-06, |
|
"loss": -0.0323, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.47190561887622473, |
|
"grad_norm": 3.83450769320751, |
|
"learning_rate": 1.0907590759075906e-06, |
|
"loss": -0.0482, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.47270545890821836, |
|
"grad_norm": 3.951497100856045, |
|
"learning_rate": 1.089108910891089e-06, |
|
"loss": -0.1206, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.473505298940212, |
|
"grad_norm": 4.798754196622245, |
|
"learning_rate": 1.0874587458745875e-06, |
|
"loss": 0.0584, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.47430513897220555, |
|
"grad_norm": 4.437199971976538, |
|
"learning_rate": 1.0858085808580857e-06, |
|
"loss": -0.0666, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.4751049790041992, |
|
"grad_norm": 5.877945506525689, |
|
"learning_rate": 1.0841584158415842e-06, |
|
"loss": -0.0795, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.47590481903619275, |
|
"grad_norm": 4.9827179740392165, |
|
"learning_rate": 1.0825082508250824e-06, |
|
"loss": -0.145, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.4767046590681864, |
|
"grad_norm": 4.579893011650921, |
|
"learning_rate": 1.0808580858085808e-06, |
|
"loss": 0.0438, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.47750449910017995, |
|
"grad_norm": 4.6741113317873975, |
|
"learning_rate": 1.0792079207920793e-06, |
|
"loss": -0.0471, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.4783043391321736, |
|
"grad_norm": 4.45102253856279, |
|
"learning_rate": 1.0775577557755775e-06, |
|
"loss": -0.0705, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.47910417916416714, |
|
"grad_norm": 7.735572429403455, |
|
"learning_rate": 1.0759075907590757e-06, |
|
"loss": -0.0004, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.47990401919616077, |
|
"grad_norm": 4.3890790831054645, |
|
"learning_rate": 1.0742574257425744e-06, |
|
"loss": -0.11, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4807038592281544, |
|
"grad_norm": 3.750715441802859, |
|
"learning_rate": 1.0726072607260726e-06, |
|
"loss": -0.0943, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.48150369926014797, |
|
"grad_norm": 3.65650652124133, |
|
"learning_rate": 1.0709570957095708e-06, |
|
"loss": -0.169, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.4823035392921416, |
|
"grad_norm": 4.55201247427091, |
|
"learning_rate": 1.0693069306930693e-06, |
|
"loss": -0.0753, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.48310337932413516, |
|
"grad_norm": 4.55776208974576, |
|
"learning_rate": 1.0676567656765675e-06, |
|
"loss": -0.1347, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.4839032193561288, |
|
"grad_norm": 4.48864117831728, |
|
"learning_rate": 1.066006600660066e-06, |
|
"loss": -0.0974, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.48470305938812236, |
|
"grad_norm": 3.9363005491507668, |
|
"learning_rate": 1.0643564356435644e-06, |
|
"loss": -0.0079, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.485502899420116, |
|
"grad_norm": 4.252862078789536, |
|
"learning_rate": 1.0627062706270626e-06, |
|
"loss": -0.0136, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.48630273945210956, |
|
"grad_norm": 4.543019341197776, |
|
"learning_rate": 1.0610561056105608e-06, |
|
"loss": -0.1351, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.4871025794841032, |
|
"grad_norm": 5.655845163937271, |
|
"learning_rate": 1.0594059405940595e-06, |
|
"loss": -0.0266, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.4879024195160968, |
|
"grad_norm": 11.681523554473147, |
|
"learning_rate": 1.0577557755775577e-06, |
|
"loss": -0.1126, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.4887022595480904, |
|
"grad_norm": 4.307554643653013, |
|
"learning_rate": 1.056105610561056e-06, |
|
"loss": -0.0647, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.489502099580084, |
|
"grad_norm": 5.732899988046993, |
|
"learning_rate": 1.0544554455445544e-06, |
|
"loss": -0.0934, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.4903019396120776, |
|
"grad_norm": 4.150445541916088, |
|
"learning_rate": 1.0528052805280528e-06, |
|
"loss": -0.1406, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.4911017796440712, |
|
"grad_norm": 5.1082669144378725, |
|
"learning_rate": 1.051155115511551e-06, |
|
"loss": 0.0558, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.49190161967606477, |
|
"grad_norm": 7.67678160227598, |
|
"learning_rate": 1.0495049504950495e-06, |
|
"loss": 0.0088, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4927014597080584, |
|
"grad_norm": 3.354349182723264, |
|
"learning_rate": 1.0478547854785477e-06, |
|
"loss": -0.1359, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.49350129974005197, |
|
"grad_norm": 4.725187848913473, |
|
"learning_rate": 1.046204620462046e-06, |
|
"loss": -0.0762, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.4943011397720456, |
|
"grad_norm": 5.7805399690672825, |
|
"learning_rate": 1.0445544554455446e-06, |
|
"loss": 0.0578, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.49510097980403917, |
|
"grad_norm": 4.179300735975294, |
|
"learning_rate": 1.0429042904290428e-06, |
|
"loss": 0.005, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.4959008198360328, |
|
"grad_norm": 5.098901682751034, |
|
"learning_rate": 1.0412541254125413e-06, |
|
"loss": -0.0156, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4967006598680264, |
|
"grad_norm": 8.248543813099444, |
|
"learning_rate": 1.0396039603960395e-06, |
|
"loss": -0.0457, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.49750049990002, |
|
"grad_norm": 4.864094199749138, |
|
"learning_rate": 1.037953795379538e-06, |
|
"loss": 0.0829, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.4983003399320136, |
|
"grad_norm": 4.692602610129244, |
|
"learning_rate": 1.0363036303630364e-06, |
|
"loss": -0.0396, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.4991001799640072, |
|
"grad_norm": 3.696934414641692, |
|
"learning_rate": 1.0346534653465346e-06, |
|
"loss": -0.1939, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.4999000199960008, |
|
"grad_norm": 5.491055932283412, |
|
"learning_rate": 1.0330033003300328e-06, |
|
"loss": -0.0639, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.5006998600279944, |
|
"grad_norm": 3.92108657024522, |
|
"learning_rate": 1.0313531353135315e-06, |
|
"loss": -0.0969, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.501499700059988, |
|
"grad_norm": 5.243975134143899, |
|
"learning_rate": 1.0297029702970297e-06, |
|
"loss": -0.0206, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.5022995400919816, |
|
"grad_norm": 4.328763949057532, |
|
"learning_rate": 1.028052805280528e-06, |
|
"loss": -0.1081, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.5030993801239753, |
|
"grad_norm": 4.197218364380273, |
|
"learning_rate": 1.0264026402640264e-06, |
|
"loss": -0.0932, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.5038992201559688, |
|
"grad_norm": 4.300285400855226, |
|
"learning_rate": 1.0247524752475248e-06, |
|
"loss": -0.1016, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5046990601879624, |
|
"grad_norm": 4.4977354073528675, |
|
"learning_rate": 1.023102310231023e-06, |
|
"loss": -0.0846, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.505498900219956, |
|
"grad_norm": 4.303063503427107, |
|
"learning_rate": 1.0214521452145215e-06, |
|
"loss": -0.0724, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.5062987402519497, |
|
"grad_norm": 6.073276027125735, |
|
"learning_rate": 1.0198019801980197e-06, |
|
"loss": -0.0326, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.5070985802839432, |
|
"grad_norm": 4.973392453051079, |
|
"learning_rate": 1.018151815181518e-06, |
|
"loss": 0.0189, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.5078984203159368, |
|
"grad_norm": 4.811288854611089, |
|
"learning_rate": 1.0165016501650166e-06, |
|
"loss": -0.1415, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.5086982603479304, |
|
"grad_norm": 4.853499316182358, |
|
"learning_rate": 1.0148514851485148e-06, |
|
"loss": -0.0896, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.509498100379924, |
|
"grad_norm": 3.839359784599905, |
|
"learning_rate": 1.013201320132013e-06, |
|
"loss": -0.0358, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.5102979404119176, |
|
"grad_norm": 4.783165043297262, |
|
"learning_rate": 1.0115511551155115e-06, |
|
"loss": -0.0102, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.5110977804439112, |
|
"grad_norm": 5.184943614862812, |
|
"learning_rate": 1.00990099009901e-06, |
|
"loss": -0.1354, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.5118976204759048, |
|
"grad_norm": 39.49636862757923, |
|
"learning_rate": 1.0082508250825082e-06, |
|
"loss": 0.0023, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.5126974605078984, |
|
"grad_norm": 4.083973866781674, |
|
"learning_rate": 1.0066006600660066e-06, |
|
"loss": -0.0357, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.5134973005398921, |
|
"grad_norm": 5.817761080874723, |
|
"learning_rate": 1.0049504950495048e-06, |
|
"loss": -0.0441, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.5142971405718856, |
|
"grad_norm": 8.811705641420119, |
|
"learning_rate": 1.0033003300330033e-06, |
|
"loss": 0.1013, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.5150969806038792, |
|
"grad_norm": 4.211618456869653, |
|
"learning_rate": 1.0016501650165017e-06, |
|
"loss": -0.1781, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.5158968206358728, |
|
"grad_norm": 6.430696021299668, |
|
"learning_rate": 1e-06, |
|
"loss": -0.0475, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.5166966606678665, |
|
"grad_norm": 4.287574273625528, |
|
"learning_rate": 9.983498349834984e-07, |
|
"loss": -0.0304, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.51749650069986, |
|
"grad_norm": 2.916483591782696, |
|
"learning_rate": 9.966996699669966e-07, |
|
"loss": -0.1574, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.5182963407318536, |
|
"grad_norm": 5.071406876730617, |
|
"learning_rate": 9.95049504950495e-07, |
|
"loss": -0.0227, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.5190961807638472, |
|
"grad_norm": 3.863107212570657, |
|
"learning_rate": 9.933993399339933e-07, |
|
"loss": -0.0452, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.5198960207958409, |
|
"grad_norm": 4.234640528387632, |
|
"learning_rate": 9.917491749174917e-07, |
|
"loss": -0.0816, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.5206958608278345, |
|
"grad_norm": 4.255603238118902, |
|
"learning_rate": 9.9009900990099e-07, |
|
"loss": -0.1311, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.521495700859828, |
|
"grad_norm": 4.723143092604518, |
|
"learning_rate": 9.884488448844884e-07, |
|
"loss": -0.1076, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.5222955408918216, |
|
"grad_norm": 4.609676855516043, |
|
"learning_rate": 9.867986798679866e-07, |
|
"loss": -0.0468, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.5230953809238152, |
|
"grad_norm": 4.648497611546731, |
|
"learning_rate": 9.85148514851485e-07, |
|
"loss": 0.066, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.5238952209558089, |
|
"grad_norm": 4.444890919088204, |
|
"learning_rate": 9.834983498349835e-07, |
|
"loss": -0.0954, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.5246950609878024, |
|
"grad_norm": 3.789960680030435, |
|
"learning_rate": 9.818481848184817e-07, |
|
"loss": -0.0846, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.525494901019796, |
|
"grad_norm": 7.20767352956141, |
|
"learning_rate": 9.801980198019802e-07, |
|
"loss": -0.0689, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.5262947410517896, |
|
"grad_norm": 3.8251645221108883, |
|
"learning_rate": 9.785478547854786e-07, |
|
"loss": -0.0026, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.5270945810837833, |
|
"grad_norm": 10.44191187281191, |
|
"learning_rate": 9.768976897689768e-07, |
|
"loss": -0.1835, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.5278944211157769, |
|
"grad_norm": 6.355832474610427, |
|
"learning_rate": 9.75247524752475e-07, |
|
"loss": -0.0168, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.5286942611477704, |
|
"grad_norm": 3.864986648832606, |
|
"learning_rate": 9.735973597359735e-07, |
|
"loss": -0.0699, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.529494101179764, |
|
"grad_norm": 9.07974895718424, |
|
"learning_rate": 9.71947194719472e-07, |
|
"loss": -0.0013, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.5302939412117577, |
|
"grad_norm": 10.325195266318097, |
|
"learning_rate": 9.702970297029702e-07, |
|
"loss": -0.1641, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.5310937812437513, |
|
"grad_norm": 4.441568363287549, |
|
"learning_rate": 9.686468646864686e-07, |
|
"loss": 0.0397, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.5318936212757448, |
|
"grad_norm": 5.1229672005813605, |
|
"learning_rate": 9.66996699669967e-07, |
|
"loss": -0.1918, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.5326934613077384, |
|
"grad_norm": 4.721345907443594, |
|
"learning_rate": 9.653465346534653e-07, |
|
"loss": -0.0215, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.533493301339732, |
|
"grad_norm": 7.118557074848867, |
|
"learning_rate": 9.636963696369637e-07, |
|
"loss": -0.0039, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.5342931413717257, |
|
"grad_norm": 4.7832103026691755, |
|
"learning_rate": 9.62046204620462e-07, |
|
"loss": -0.0221, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.5350929814037193, |
|
"grad_norm": 6.3240275362924505, |
|
"learning_rate": 9.603960396039604e-07, |
|
"loss": -0.0409, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.5358928214357128, |
|
"grad_norm": 4.027979744126303, |
|
"learning_rate": 9.587458745874586e-07, |
|
"loss": -0.0503, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.5366926614677064, |
|
"grad_norm": 4.74302864470449, |
|
"learning_rate": 9.57095709570957e-07, |
|
"loss": -0.0521, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.5374925014997001, |
|
"grad_norm": 4.985380734947703, |
|
"learning_rate": 9.554455445544553e-07, |
|
"loss": -0.082, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.5382923415316937, |
|
"grad_norm": 4.472849852523853, |
|
"learning_rate": 9.537953795379537e-07, |
|
"loss": 0.1016, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.5390921815636872, |
|
"grad_norm": 3.308647938974776, |
|
"learning_rate": 9.521452145214522e-07, |
|
"loss": -0.0457, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.5398920215956808, |
|
"grad_norm": 4.677054809392709, |
|
"learning_rate": 9.504950495049504e-07, |
|
"loss": -0.1658, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.5406918616276745, |
|
"grad_norm": 4.067875284374342, |
|
"learning_rate": 9.488448844884487e-07, |
|
"loss": 0.0196, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.5414917016596681, |
|
"grad_norm": 3.730643734647644, |
|
"learning_rate": 9.471947194719472e-07, |
|
"loss": -0.0894, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.5422915416916617, |
|
"grad_norm": 5.204622803431674, |
|
"learning_rate": 9.455445544554454e-07, |
|
"loss": -0.0538, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.5430913817236552, |
|
"grad_norm": 5.261361679954622, |
|
"learning_rate": 9.438943894389439e-07, |
|
"loss": -0.026, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.5438912217556489, |
|
"grad_norm": 4.057248991454938, |
|
"learning_rate": 9.422442244224422e-07, |
|
"loss": -0.0785, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.5446910617876425, |
|
"grad_norm": 5.580346613410825, |
|
"learning_rate": 9.405940594059405e-07, |
|
"loss": -0.0335, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.5454909018196361, |
|
"grad_norm": 3.287439521262259, |
|
"learning_rate": 9.389438943894389e-07, |
|
"loss": -0.014, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.5462907418516296, |
|
"grad_norm": 4.383770462305995, |
|
"learning_rate": 9.372937293729373e-07, |
|
"loss": -0.1349, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.5470905818836233, |
|
"grad_norm": 3.570830393151609, |
|
"learning_rate": 9.356435643564356e-07, |
|
"loss": -0.1603, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.5478904219156169, |
|
"grad_norm": 3.9301633755259076, |
|
"learning_rate": 9.33993399339934e-07, |
|
"loss": -0.0728, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.5486902619476105, |
|
"grad_norm": 4.577722525237458, |
|
"learning_rate": 9.323432343234323e-07, |
|
"loss": -0.0686, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.5494901019796041, |
|
"grad_norm": 3.4290177162671704, |
|
"learning_rate": 9.306930693069307e-07, |
|
"loss": -0.0002, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.5502899420115976, |
|
"grad_norm": 3.7664714194362574, |
|
"learning_rate": 9.29042904290429e-07, |
|
"loss": 0.0181, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.5510897820435913, |
|
"grad_norm": 3.53486758576286, |
|
"learning_rate": 9.273927392739273e-07, |
|
"loss": 0.0046, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.5518896220755849, |
|
"grad_norm": 5.533353761076084, |
|
"learning_rate": 9.257425742574257e-07, |
|
"loss": -0.1142, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.5526894621075785, |
|
"grad_norm": 3.618813464410366, |
|
"learning_rate": 9.24092409240924e-07, |
|
"loss": -0.0533, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.553489302139572, |
|
"grad_norm": 3.2399813948203064, |
|
"learning_rate": 9.224422442244224e-07, |
|
"loss": -0.2655, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.5542891421715657, |
|
"grad_norm": 4.777938394039966, |
|
"learning_rate": 9.207920792079208e-07, |
|
"loss": -0.1287, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.5550889822035593, |
|
"grad_norm": 4.753607333838816, |
|
"learning_rate": 9.191419141914191e-07, |
|
"loss": -0.0197, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.5558888222355529, |
|
"grad_norm": 4.574962995202333, |
|
"learning_rate": 9.174917491749174e-07, |
|
"loss": -0.0882, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.5566886622675465, |
|
"grad_norm": 5.716789868568477, |
|
"learning_rate": 9.158415841584159e-07, |
|
"loss": -0.0978, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.5574885022995401, |
|
"grad_norm": 5.1621141783698805, |
|
"learning_rate": 9.141914191419141e-07, |
|
"loss": -0.0492, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.5582883423315337, |
|
"grad_norm": 5.066721910041668, |
|
"learning_rate": 9.125412541254125e-07, |
|
"loss": -0.1249, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.5590881823635273, |
|
"grad_norm": 6.6614345364199, |
|
"learning_rate": 9.108910891089109e-07, |
|
"loss": -0.0784, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.5598880223955209, |
|
"grad_norm": 7.651991800546116, |
|
"learning_rate": 9.092409240924092e-07, |
|
"loss": 0.0583, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5606878624275144, |
|
"grad_norm": 5.556872832637128, |
|
"learning_rate": 9.075907590759075e-07, |
|
"loss": -0.0337, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.5614877024595081, |
|
"grad_norm": 10.302249877529787, |
|
"learning_rate": 9.05940594059406e-07, |
|
"loss": -0.0318, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.5622875424915017, |
|
"grad_norm": 6.446831319626775, |
|
"learning_rate": 9.042904290429042e-07, |
|
"loss": -0.032, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.5630873825234953, |
|
"grad_norm": 3.1431425262284156, |
|
"learning_rate": 9.026402640264025e-07, |
|
"loss": -0.1257, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.563887222555489, |
|
"grad_norm": 6.8613589813033755, |
|
"learning_rate": 9.00990099009901e-07, |
|
"loss": -0.0899, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.5646870625874825, |
|
"grad_norm": 4.727075571003651, |
|
"learning_rate": 8.993399339933992e-07, |
|
"loss": -0.0103, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.5654869026194761, |
|
"grad_norm": 3.757033554841126, |
|
"learning_rate": 8.976897689768976e-07, |
|
"loss": -0.1201, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.5662867426514697, |
|
"grad_norm": 3.6568326567325586, |
|
"learning_rate": 8.96039603960396e-07, |
|
"loss": -0.0442, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.5670865826834633, |
|
"grad_norm": 4.422118271245446, |
|
"learning_rate": 8.943894389438944e-07, |
|
"loss": -0.1491, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.5678864227154569, |
|
"grad_norm": 4.090528079313399, |
|
"learning_rate": 8.927392739273927e-07, |
|
"loss": -0.1213, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.5686862627474505, |
|
"grad_norm": 6.203118235394611, |
|
"learning_rate": 8.910891089108911e-07, |
|
"loss": -0.0415, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.5694861027794441, |
|
"grad_norm": 4.192997526379617, |
|
"learning_rate": 8.894389438943894e-07, |
|
"loss": -0.0378, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.5702859428114377, |
|
"grad_norm": 4.532567536428998, |
|
"learning_rate": 8.877887788778878e-07, |
|
"loss": -0.0275, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.5710857828434314, |
|
"grad_norm": 4.168577112275988, |
|
"learning_rate": 8.861386138613861e-07, |
|
"loss": -0.0494, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.5718856228754249, |
|
"grad_norm": 7.436541583714728, |
|
"learning_rate": 8.844884488448845e-07, |
|
"loss": 0.0338, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5726854629074185, |
|
"grad_norm": 4.3341221213745555, |
|
"learning_rate": 8.828382838283828e-07, |
|
"loss": -0.1824, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.5734853029394121, |
|
"grad_norm": 5.774496226610818, |
|
"learning_rate": 8.811881188118812e-07, |
|
"loss": -0.0764, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.5742851429714058, |
|
"grad_norm": 4.53339855951246, |
|
"learning_rate": 8.795379537953795e-07, |
|
"loss": -0.0494, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.5750849830033993, |
|
"grad_norm": 3.7011611817540118, |
|
"learning_rate": 8.778877887788778e-07, |
|
"loss": -0.1227, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.5758848230353929, |
|
"grad_norm": 4.117237995569142, |
|
"learning_rate": 8.762376237623762e-07, |
|
"loss": -0.0782, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5766846630673865, |
|
"grad_norm": 3.347109381610254, |
|
"learning_rate": 8.745874587458745e-07, |
|
"loss": -0.1517, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.5774845030993802, |
|
"grad_norm": 3.9587273384893447, |
|
"learning_rate": 8.729372937293729e-07, |
|
"loss": -0.0647, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.5782843431313738, |
|
"grad_norm": 4.073013317525639, |
|
"learning_rate": 8.712871287128712e-07, |
|
"loss": -0.0354, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.5790841831633673, |
|
"grad_norm": 4.360786018214523, |
|
"learning_rate": 8.696369636963697e-07, |
|
"loss": -0.0845, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.5798840231953609, |
|
"grad_norm": 4.911325926898916, |
|
"learning_rate": 8.679867986798679e-07, |
|
"loss": -0.0581, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5806838632273545, |
|
"grad_norm": 7.374565499647674, |
|
"learning_rate": 8.663366336633663e-07, |
|
"loss": 0.0876, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.5814837032593482, |
|
"grad_norm": 5.228899924882716, |
|
"learning_rate": 8.646864686468647e-07, |
|
"loss": -0.0188, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.5822835432913417, |
|
"grad_norm": 5.076190474367137, |
|
"learning_rate": 8.63036303630363e-07, |
|
"loss": -0.0482, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.5830833833233353, |
|
"grad_norm": 3.8391392043031067, |
|
"learning_rate": 8.613861386138613e-07, |
|
"loss": -0.0351, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.5838832233553289, |
|
"grad_norm": 4.720664943150823, |
|
"learning_rate": 8.597359735973598e-07, |
|
"loss": -0.0919, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.5846830633873226, |
|
"grad_norm": 3.696825036479519, |
|
"learning_rate": 8.58085808580858e-07, |
|
"loss": -0.0491, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.5854829034193162, |
|
"grad_norm": 4.957633580857494, |
|
"learning_rate": 8.564356435643563e-07, |
|
"loss": -0.0084, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.5862827434513097, |
|
"grad_norm": 4.145591204807576, |
|
"learning_rate": 8.547854785478548e-07, |
|
"loss": -0.0777, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.5870825834833033, |
|
"grad_norm": 6.008926427229853, |
|
"learning_rate": 8.531353135313531e-07, |
|
"loss": -0.0285, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.587882423515297, |
|
"grad_norm": 6.457940104008622, |
|
"learning_rate": 8.514851485148514e-07, |
|
"loss": -0.1126, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.5886822635472906, |
|
"grad_norm": 4.150286303835989, |
|
"learning_rate": 8.498349834983498e-07, |
|
"loss": -0.0633, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.5894821035792841, |
|
"grad_norm": 4.765817707105298, |
|
"learning_rate": 8.481848184818482e-07, |
|
"loss": -0.1274, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.5902819436112777, |
|
"grad_norm": 2.944307272093047, |
|
"learning_rate": 8.465346534653464e-07, |
|
"loss": -0.1143, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.5910817836432714, |
|
"grad_norm": 4.301068842918969, |
|
"learning_rate": 8.448844884488449e-07, |
|
"loss": -0.0303, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.591881623675265, |
|
"grad_norm": 3.5345181895694724, |
|
"learning_rate": 8.432343234323432e-07, |
|
"loss": -0.0369, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.5926814637072585, |
|
"grad_norm": 3.6758939784537477, |
|
"learning_rate": 8.415841584158416e-07, |
|
"loss": -0.0435, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.5934813037392521, |
|
"grad_norm": 9.280032916175081, |
|
"learning_rate": 8.399339933993399e-07, |
|
"loss": -0.077, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.5942811437712457, |
|
"grad_norm": 3.691324223005634, |
|
"learning_rate": 8.382838283828383e-07, |
|
"loss": 0.0384, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.5950809838032394, |
|
"grad_norm": 5.13228312974853, |
|
"learning_rate": 8.366336633663366e-07, |
|
"loss": -0.0101, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.595880823835233, |
|
"grad_norm": 3.127958499467288, |
|
"learning_rate": 8.34983498349835e-07, |
|
"loss": 0.0036, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.5966806638672265, |
|
"grad_norm": 4.371581567674568, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": -0.088, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.5974805038992201, |
|
"grad_norm": 3.7498910252313786, |
|
"learning_rate": 8.316831683168316e-07, |
|
"loss": -0.0249, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.5982803439312138, |
|
"grad_norm": 9.554152491664782, |
|
"learning_rate": 8.3003300330033e-07, |
|
"loss": -0.0718, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.5990801839632074, |
|
"grad_norm": 4.450073267301403, |
|
"learning_rate": 8.283828382838283e-07, |
|
"loss": -0.038, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.5998800239952009, |
|
"grad_norm": 4.885796549111672, |
|
"learning_rate": 8.267326732673267e-07, |
|
"loss": -0.0279, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.6006798640271945, |
|
"grad_norm": 4.729710058959354, |
|
"learning_rate": 8.25082508250825e-07, |
|
"loss": -0.0415, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.6014797040591882, |
|
"grad_norm": 5.310403387692374, |
|
"learning_rate": 8.234323432343234e-07, |
|
"loss": -0.1964, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.6022795440911818, |
|
"grad_norm": 4.462129183196535, |
|
"learning_rate": 8.217821782178217e-07, |
|
"loss": -0.0289, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.6030793841231754, |
|
"grad_norm": 4.343559206058792, |
|
"learning_rate": 8.201320132013201e-07, |
|
"loss": 0.0155, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.6038792241551689, |
|
"grad_norm": 7.14111860643498, |
|
"learning_rate": 8.184818481848184e-07, |
|
"loss": 0.076, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.6046790641871626, |
|
"grad_norm": 5.741261351757093, |
|
"learning_rate": 8.168316831683168e-07, |
|
"loss": -0.0684, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.6054789042191562, |
|
"grad_norm": 4.841100743497433, |
|
"learning_rate": 8.151815181518151e-07, |
|
"loss": 0.0251, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.6062787442511498, |
|
"grad_norm": 5.776828704222559, |
|
"learning_rate": 8.135313531353136e-07, |
|
"loss": 0.0922, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.6070785842831433, |
|
"grad_norm": 5.171546395456714, |
|
"learning_rate": 8.118811881188119e-07, |
|
"loss": 0.0378, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.607878424315137, |
|
"grad_norm": 4.5497593071961475, |
|
"learning_rate": 8.102310231023102e-07, |
|
"loss": -0.0091, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.6086782643471306, |
|
"grad_norm": 3.4008260116242837, |
|
"learning_rate": 8.085808580858086e-07, |
|
"loss": -0.0513, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.6094781043791242, |
|
"grad_norm": 5.9528170929025475, |
|
"learning_rate": 8.069306930693069e-07, |
|
"loss": -0.0846, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.6102779444111178, |
|
"grad_norm": 8.59254741230532, |
|
"learning_rate": 8.052805280528052e-07, |
|
"loss": 0.0454, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.6110777844431113, |
|
"grad_norm": 5.92388092933109, |
|
"learning_rate": 8.036303630363036e-07, |
|
"loss": -0.0677, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.611877624475105, |
|
"grad_norm": 4.5071343981279375, |
|
"learning_rate": 8.01980198019802e-07, |
|
"loss": -0.0598, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.6126774645070986, |
|
"grad_norm": 5.095952967655762, |
|
"learning_rate": 8.003300330033002e-07, |
|
"loss": -0.1077, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.6134773045390922, |
|
"grad_norm": 3.686767117360266, |
|
"learning_rate": 7.986798679867987e-07, |
|
"loss": -0.0509, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.6142771445710857, |
|
"grad_norm": 4.709313867244328, |
|
"learning_rate": 7.97029702970297e-07, |
|
"loss": -0.0384, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.6150769846030794, |
|
"grad_norm": 4.606485786769665, |
|
"learning_rate": 7.953795379537953e-07, |
|
"loss": -0.0639, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.615876824635073, |
|
"grad_norm": 4.126308811511686, |
|
"learning_rate": 7.937293729372937e-07, |
|
"loss": -0.1454, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.6166766646670666, |
|
"grad_norm": 5.0891862328923985, |
|
"learning_rate": 7.920792079207921e-07, |
|
"loss": 0.0552, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.6174765046990602, |
|
"grad_norm": 4.348447825302712, |
|
"learning_rate": 7.904290429042903e-07, |
|
"loss": -0.0914, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.6182763447310538, |
|
"grad_norm": 4.069062786121328, |
|
"learning_rate": 7.887788778877888e-07, |
|
"loss": -0.0, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.6190761847630474, |
|
"grad_norm": 4.293180451800697, |
|
"learning_rate": 7.871287128712871e-07, |
|
"loss": -0.2021, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.619876024795041, |
|
"grad_norm": 6.832958614340714, |
|
"learning_rate": 7.854785478547854e-07, |
|
"loss": -0.049, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.6206758648270346, |
|
"grad_norm": 4.62295713929861, |
|
"learning_rate": 7.838283828382838e-07, |
|
"loss": -0.0099, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.6214757048590281, |
|
"grad_norm": 16.021103293232393, |
|
"learning_rate": 7.821782178217821e-07, |
|
"loss": -0.0229, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.6222755448910218, |
|
"grad_norm": 6.878826117062365, |
|
"learning_rate": 7.805280528052805e-07, |
|
"loss": 0.0852, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.6230753849230154, |
|
"grad_norm": 3.5925006851057595, |
|
"learning_rate": 7.788778877887788e-07, |
|
"loss": -0.0838, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.623875224955009, |
|
"grad_norm": 4.312199015410901, |
|
"learning_rate": 7.772277227722772e-07, |
|
"loss": -0.1938, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.6246750649870026, |
|
"grad_norm": 4.427437777822434, |
|
"learning_rate": 7.755775577557755e-07, |
|
"loss": -0.1088, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.6254749050189962, |
|
"grad_norm": 3.391720502521415, |
|
"learning_rate": 7.739273927392739e-07, |
|
"loss": -0.1204, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.6262747450509898, |
|
"grad_norm": 3.3125718306904512, |
|
"learning_rate": 7.722772277227722e-07, |
|
"loss": -0.0828, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.6270745850829834, |
|
"grad_norm": 5.402117257600779, |
|
"learning_rate": 7.706270627062707e-07, |
|
"loss": -0.054, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.627874425114977, |
|
"grad_norm": 4.489840589382479, |
|
"learning_rate": 7.689768976897689e-07, |
|
"loss": -0.0368, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.6286742651469706, |
|
"grad_norm": 3.848546702562119, |
|
"learning_rate": 7.673267326732673e-07, |
|
"loss": -0.0842, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.6294741051789642, |
|
"grad_norm": 4.3003301981629765, |
|
"learning_rate": 7.656765676567657e-07, |
|
"loss": -0.1575, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.6302739452109578, |
|
"grad_norm": 3.234095759477404, |
|
"learning_rate": 7.64026402640264e-07, |
|
"loss": -0.1448, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.6310737852429514, |
|
"grad_norm": 4.587363650091271, |
|
"learning_rate": 7.623762376237624e-07, |
|
"loss": -0.0806, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.6318736252749451, |
|
"grad_norm": 4.91701458923025, |
|
"learning_rate": 7.607260726072607e-07, |
|
"loss": 0.0289, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.6326734653069386, |
|
"grad_norm": 4.26117605640356, |
|
"learning_rate": 7.59075907590759e-07, |
|
"loss": -0.0442, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.6334733053389322, |
|
"grad_norm": 4.714328336316601, |
|
"learning_rate": 7.574257425742574e-07, |
|
"loss": -0.1182, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.6342731453709258, |
|
"grad_norm": 4.8043256421789975, |
|
"learning_rate": 7.557755775577558e-07, |
|
"loss": -0.0532, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.6350729854029195, |
|
"grad_norm": 4.981158806362152, |
|
"learning_rate": 7.54125412541254e-07, |
|
"loss": -0.0322, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.635872825434913, |
|
"grad_norm": 6.600526915815604, |
|
"learning_rate": 7.524752475247525e-07, |
|
"loss": -0.1059, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.6366726654669066, |
|
"grad_norm": 4.989184316503347, |
|
"learning_rate": 7.508250825082508e-07, |
|
"loss": -0.0888, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.6374725054989002, |
|
"grad_norm": 4.683126825596764, |
|
"learning_rate": 7.491749174917491e-07, |
|
"loss": -0.0425, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.6382723455308938, |
|
"grad_norm": 2.835338855117646, |
|
"learning_rate": 7.475247524752475e-07, |
|
"loss": -0.1188, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.6390721855628875, |
|
"grad_norm": 3.334782349034145, |
|
"learning_rate": 7.458745874587459e-07, |
|
"loss": -0.0085, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.639872025594881, |
|
"grad_norm": 3.8950834045490677, |
|
"learning_rate": 7.442244224422441e-07, |
|
"loss": -0.0866, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6406718656268746, |
|
"grad_norm": 3.7954504535302047, |
|
"learning_rate": 7.425742574257426e-07, |
|
"loss": -0.1718, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.6414717056588682, |
|
"grad_norm": 3.9147882759458907, |
|
"learning_rate": 7.409240924092409e-07, |
|
"loss": -0.1172, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.6422715456908619, |
|
"grad_norm": 4.14195212922553, |
|
"learning_rate": 7.392739273927392e-07, |
|
"loss": -0.2055, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.6430713857228554, |
|
"grad_norm": 5.564167929906255, |
|
"learning_rate": 7.376237623762376e-07, |
|
"loss": -0.0587, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.643871225754849, |
|
"grad_norm": 5.104003509197404, |
|
"learning_rate": 7.359735973597359e-07, |
|
"loss": -0.1599, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.6446710657868426, |
|
"grad_norm": 3.249110466926901, |
|
"learning_rate": 7.343234323432343e-07, |
|
"loss": 0.0254, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.6454709058188363, |
|
"grad_norm": 5.330488201062819, |
|
"learning_rate": 7.326732673267326e-07, |
|
"loss": -0.0932, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.6462707458508299, |
|
"grad_norm": 3.391881050876262, |
|
"learning_rate": 7.31023102310231e-07, |
|
"loss": -0.0045, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.6470705858828234, |
|
"grad_norm": 4.729176906400958, |
|
"learning_rate": 7.293729372937293e-07, |
|
"loss": 0.0309, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.647870425914817, |
|
"grad_norm": 4.873305784391995, |
|
"learning_rate": 7.277227722772277e-07, |
|
"loss": -0.016, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.6486702659468107, |
|
"grad_norm": 4.737325724513948, |
|
"learning_rate": 7.26072607260726e-07, |
|
"loss": -0.0555, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.6494701059788043, |
|
"grad_norm": 4.700178573137915, |
|
"learning_rate": 7.244224422442245e-07, |
|
"loss": -0.0054, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.6502699460107978, |
|
"grad_norm": 3.8303301007119375, |
|
"learning_rate": 7.227722772277227e-07, |
|
"loss": -0.0237, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.6510697860427914, |
|
"grad_norm": 3.9787912774229404, |
|
"learning_rate": 7.211221122112211e-07, |
|
"loss": -0.0158, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.651869626074785, |
|
"grad_norm": 4.3995808661843805, |
|
"learning_rate": 7.194719471947195e-07, |
|
"loss": 0.0276, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.6526694661067787, |
|
"grad_norm": 4.052268422009291, |
|
"learning_rate": 7.178217821782178e-07, |
|
"loss": -0.0178, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.6534693061387723, |
|
"grad_norm": 4.162149014725009, |
|
"learning_rate": 7.161716171617161e-07, |
|
"loss": -0.0343, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.6542691461707658, |
|
"grad_norm": 4.733808124710197, |
|
"learning_rate": 7.145214521452146e-07, |
|
"loss": -0.0719, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.6550689862027594, |
|
"grad_norm": 4.367810691084756, |
|
"learning_rate": 7.128712871287128e-07, |
|
"loss": -0.1031, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.6558688262347531, |
|
"grad_norm": 3.393663864026175, |
|
"learning_rate": 7.112211221122111e-07, |
|
"loss": -0.0469, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.6566686662667467, |
|
"grad_norm": 6.593078579096907, |
|
"learning_rate": 7.095709570957096e-07, |
|
"loss": -0.0492, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.6574685062987402, |
|
"grad_norm": 5.90593512770552, |
|
"learning_rate": 7.079207920792078e-07, |
|
"loss": 0.017, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.6582683463307338, |
|
"grad_norm": 4.280214281867913, |
|
"learning_rate": 7.062706270627063e-07, |
|
"loss": -0.0432, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.6590681863627275, |
|
"grad_norm": 8.423741404535653, |
|
"learning_rate": 7.046204620462046e-07, |
|
"loss": -0.0291, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.6598680263947211, |
|
"grad_norm": 5.038317153573228, |
|
"learning_rate": 7.029702970297029e-07, |
|
"loss": -0.093, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.6606678664267147, |
|
"grad_norm": 6.706344720432834, |
|
"learning_rate": 7.013201320132013e-07, |
|
"loss": -0.1315, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.6614677064587082, |
|
"grad_norm": 3.716489601764274, |
|
"learning_rate": 6.996699669966997e-07, |
|
"loss": -0.0825, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.6622675464907019, |
|
"grad_norm": 4.158836764412884, |
|
"learning_rate": 6.980198019801979e-07, |
|
"loss": -0.0161, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.6630673865226955, |
|
"grad_norm": 3.733283485919958, |
|
"learning_rate": 6.963696369636964e-07, |
|
"loss": -0.0594, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.6638672265546891, |
|
"grad_norm": 6.968567713247902, |
|
"learning_rate": 6.947194719471947e-07, |
|
"loss": -0.0441, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.6646670665866826, |
|
"grad_norm": 5.008730323701448, |
|
"learning_rate": 6.93069306930693e-07, |
|
"loss": -0.1307, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.6654669066186762, |
|
"grad_norm": 4.2175650733942955, |
|
"learning_rate": 6.914191419141914e-07, |
|
"loss": -0.0771, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.6662667466506699, |
|
"grad_norm": 5.402322742920563, |
|
"learning_rate": 6.897689768976897e-07, |
|
"loss": -0.0841, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.6670665866826635, |
|
"grad_norm": 5.646942573991696, |
|
"learning_rate": 6.88118811881188e-07, |
|
"loss": -0.0622, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.667866426714657, |
|
"grad_norm": 13.649360926832344, |
|
"learning_rate": 6.864686468646864e-07, |
|
"loss": -0.0637, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.6686662667466506, |
|
"grad_norm": 5.461268948386568, |
|
"learning_rate": 6.848184818481848e-07, |
|
"loss": -0.0241, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.6694661067786443, |
|
"grad_norm": 4.011621520471584, |
|
"learning_rate": 6.831683168316831e-07, |
|
"loss": -0.0422, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.6702659468106379, |
|
"grad_norm": 20.835014010983784, |
|
"learning_rate": 6.815181518151815e-07, |
|
"loss": -0.126, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.6710657868426315, |
|
"grad_norm": 5.041368060556288, |
|
"learning_rate": 6.798679867986798e-07, |
|
"loss": -0.1016, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.671865626874625, |
|
"grad_norm": 5.834292995896152, |
|
"learning_rate": 6.782178217821783e-07, |
|
"loss": -0.0738, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.6726654669066187, |
|
"grad_norm": 3.5120723151753985, |
|
"learning_rate": 6.765676567656765e-07, |
|
"loss": -0.0504, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.6734653069386123, |
|
"grad_norm": 2.901517891733533, |
|
"learning_rate": 6.749174917491749e-07, |
|
"loss": -0.0698, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.6742651469706059, |
|
"grad_norm": 3.465841087435974, |
|
"learning_rate": 6.732673267326733e-07, |
|
"loss": -0.1227, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.6750649870025994, |
|
"grad_norm": 4.536588693958206, |
|
"learning_rate": 6.716171617161716e-07, |
|
"loss": -0.0602, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.675864827034593, |
|
"grad_norm": 10.903656834330391, |
|
"learning_rate": 6.699669966996699e-07, |
|
"loss": -0.1289, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.6766646670665867, |
|
"grad_norm": 7.296365266758308, |
|
"learning_rate": 6.683168316831684e-07, |
|
"loss": -0.1561, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.6774645070985803, |
|
"grad_norm": 4.412331570876947, |
|
"learning_rate": 6.666666666666666e-07, |
|
"loss": -0.0958, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.6782643471305739, |
|
"grad_norm": 2.8672230897612345, |
|
"learning_rate": 6.650165016501649e-07, |
|
"loss": -0.1643, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.6790641871625674, |
|
"grad_norm": 4.5674033793568904, |
|
"learning_rate": 6.633663366336634e-07, |
|
"loss": -0.1336, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.6798640271945611, |
|
"grad_norm": 4.381540695320094, |
|
"learning_rate": 6.617161716171616e-07, |
|
"loss": -0.0132, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.6806638672265547, |
|
"grad_norm": 5.664059132542103, |
|
"learning_rate": 6.6006600660066e-07, |
|
"loss": -0.0285, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.6814637072585483, |
|
"grad_norm": 5.7461780617115, |
|
"learning_rate": 6.584158415841584e-07, |
|
"loss": 0.0047, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.6822635472905418, |
|
"grad_norm": 4.320720191192789, |
|
"learning_rate": 6.567656765676567e-07, |
|
"loss": -0.1447, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.6830633873225355, |
|
"grad_norm": 3.108737285911658, |
|
"learning_rate": 6.55115511551155e-07, |
|
"loss": -0.1611, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.6838632273545291, |
|
"grad_norm": 5.023876212557061, |
|
"learning_rate": 6.534653465346535e-07, |
|
"loss": -0.1529, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.6846630673865227, |
|
"grad_norm": 4.545192341611211, |
|
"learning_rate": 6.518151815181517e-07, |
|
"loss": -0.0456, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.6854629074185163, |
|
"grad_norm": 5.267884265007784, |
|
"learning_rate": 6.501650165016502e-07, |
|
"loss": -0.0288, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.6862627474505099, |
|
"grad_norm": 5.101939165542976, |
|
"learning_rate": 6.485148514851485e-07, |
|
"loss": -0.0036, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.6870625874825035, |
|
"grad_norm": 3.55089136672625, |
|
"learning_rate": 6.468646864686468e-07, |
|
"loss": -0.0762, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.6878624275144971, |
|
"grad_norm": 4.121292066096188, |
|
"learning_rate": 6.452145214521452e-07, |
|
"loss": -0.0858, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6886622675464907, |
|
"grad_norm": 4.411771304555708, |
|
"learning_rate": 6.435643564356436e-07, |
|
"loss": -0.0359, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.6894621075784843, |
|
"grad_norm": 4.163024887578695, |
|
"learning_rate": 6.419141914191419e-07, |
|
"loss": -0.1036, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.6902619476104779, |
|
"grad_norm": 3.9253552561550307, |
|
"learning_rate": 6.402640264026402e-07, |
|
"loss": -0.092, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.6910617876424715, |
|
"grad_norm": 3.3756432096953906, |
|
"learning_rate": 6.386138613861386e-07, |
|
"loss": 0.0002, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.6918616276744651, |
|
"grad_norm": 4.4942197763700245, |
|
"learning_rate": 6.369636963696369e-07, |
|
"loss": 0.0044, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6926614677064588, |
|
"grad_norm": 4.469003096662444, |
|
"learning_rate": 6.353135313531353e-07, |
|
"loss": -0.0027, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.6934613077384523, |
|
"grad_norm": 3.212779461175308, |
|
"learning_rate": 6.336633663366336e-07, |
|
"loss": -0.0255, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.6942611477704459, |
|
"grad_norm": 3.473986685130551, |
|
"learning_rate": 6.32013201320132e-07, |
|
"loss": -0.0171, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.6950609878024395, |
|
"grad_norm": 4.216176017792983, |
|
"learning_rate": 6.303630363036303e-07, |
|
"loss": -0.0673, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.6958608278344331, |
|
"grad_norm": 12.098304541476889, |
|
"learning_rate": 6.287128712871287e-07, |
|
"loss": 0.0742, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6966606678664267, |
|
"grad_norm": 8.083305542363899, |
|
"learning_rate": 6.270627062706271e-07, |
|
"loss": 0.0401, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.6974605078984203, |
|
"grad_norm": 4.685734734341067, |
|
"learning_rate": 6.254125412541254e-07, |
|
"loss": -0.0235, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.6982603479304139, |
|
"grad_norm": 5.85956379904162, |
|
"learning_rate": 6.237623762376237e-07, |
|
"loss": -0.1556, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.6990601879624075, |
|
"grad_norm": 3.61321328588953, |
|
"learning_rate": 6.221122112211222e-07, |
|
"loss": -0.093, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.6998600279944012, |
|
"grad_norm": 3.528560458155755, |
|
"learning_rate": 6.204620462046204e-07, |
|
"loss": -0.0322, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.7006598680263947, |
|
"grad_norm": 6.51982029204985, |
|
"learning_rate": 6.188118811881187e-07, |
|
"loss": 0.0667, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.7014597080583883, |
|
"grad_norm": 4.530213336089527, |
|
"learning_rate": 6.171617161716172e-07, |
|
"loss": -0.0434, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.7022595480903819, |
|
"grad_norm": 6.616076811947681, |
|
"learning_rate": 6.155115511551154e-07, |
|
"loss": 0.0219, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.7030593881223756, |
|
"grad_norm": 4.1562171877376075, |
|
"learning_rate": 6.138613861386138e-07, |
|
"loss": -0.0648, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.7038592281543691, |
|
"grad_norm": 4.419647626337558, |
|
"learning_rate": 6.122112211221122e-07, |
|
"loss": -0.1112, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.7046590681863627, |
|
"grad_norm": 4.940388667457944, |
|
"learning_rate": 6.105610561056105e-07, |
|
"loss": -0.0162, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.7054589082183563, |
|
"grad_norm": 6.023947292183416, |
|
"learning_rate": 6.089108910891088e-07, |
|
"loss": -0.0435, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.70625874825035, |
|
"grad_norm": 7.1901426018379935, |
|
"learning_rate": 6.072607260726073e-07, |
|
"loss": -0.001, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.7070585882823436, |
|
"grad_norm": 5.858167518812244, |
|
"learning_rate": 6.056105610561055e-07, |
|
"loss": -0.0017, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.7078584283143371, |
|
"grad_norm": 4.6643313740940835, |
|
"learning_rate": 6.03960396039604e-07, |
|
"loss": -0.0808, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.7086582683463307, |
|
"grad_norm": 3.964900755824796, |
|
"learning_rate": 6.023102310231023e-07, |
|
"loss": -0.1257, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.7094581083783243, |
|
"grad_norm": 3.6312620074127797, |
|
"learning_rate": 6.006600660066007e-07, |
|
"loss": -0.044, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.710257948410318, |
|
"grad_norm": 4.46804227760141, |
|
"learning_rate": 5.99009900990099e-07, |
|
"loss": -0.0864, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.7110577884423115, |
|
"grad_norm": 3.9877331513072884, |
|
"learning_rate": 5.973597359735974e-07, |
|
"loss": -0.0555, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.7118576284743051, |
|
"grad_norm": 5.71452000001456, |
|
"learning_rate": 5.957095709570957e-07, |
|
"loss": -0.0516, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.7126574685062987, |
|
"grad_norm": 4.729225707280767, |
|
"learning_rate": 5.94059405940594e-07, |
|
"loss": -0.1252, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.7134573085382924, |
|
"grad_norm": 10.531702048903348, |
|
"learning_rate": 5.924092409240924e-07, |
|
"loss": -0.0143, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.714257148570286, |
|
"grad_norm": 4.337061940699697, |
|
"learning_rate": 5.907590759075907e-07, |
|
"loss": -0.0256, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.7150569886022795, |
|
"grad_norm": 4.553139268045056, |
|
"learning_rate": 5.891089108910891e-07, |
|
"loss": 0.0592, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.7158568286342731, |
|
"grad_norm": 3.65289282929829, |
|
"learning_rate": 5.874587458745874e-07, |
|
"loss": 0.0011, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.7166566686662668, |
|
"grad_norm": 2.969949901428, |
|
"learning_rate": 5.858085808580858e-07, |
|
"loss": -0.1288, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.7174565086982604, |
|
"grad_norm": 5.316965178875907, |
|
"learning_rate": 5.841584158415841e-07, |
|
"loss": -0.0688, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.7182563487302539, |
|
"grad_norm": 6.112915886146603, |
|
"learning_rate": 5.825082508250825e-07, |
|
"loss": -0.1923, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.7190561887622475, |
|
"grad_norm": 4.072265156624673, |
|
"learning_rate": 5.808580858085808e-07, |
|
"loss": -0.1749, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.7198560287942412, |
|
"grad_norm": 4.286524287381163, |
|
"learning_rate": 5.792079207920792e-07, |
|
"loss": -0.0736, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7206558688262348, |
|
"grad_norm": 6.654813369667659, |
|
"learning_rate": 5.775577557755775e-07, |
|
"loss": -0.1185, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.7214557088582284, |
|
"grad_norm": 5.075962580453491, |
|
"learning_rate": 5.75907590759076e-07, |
|
"loss": 0.0889, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.7222555488902219, |
|
"grad_norm": 5.581410015072146, |
|
"learning_rate": 5.742574257425742e-07, |
|
"loss": -0.0718, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.7230553889222155, |
|
"grad_norm": 7.760040178489886, |
|
"learning_rate": 5.726072607260726e-07, |
|
"loss": -0.0227, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.7238552289542092, |
|
"grad_norm": 4.491264765964933, |
|
"learning_rate": 5.70957095709571e-07, |
|
"loss": -0.0838, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.7246550689862028, |
|
"grad_norm": 4.876358038696258, |
|
"learning_rate": 5.693069306930692e-07, |
|
"loss": -0.172, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.7254549090181963, |
|
"grad_norm": 3.9354286195012422, |
|
"learning_rate": 5.676567656765676e-07, |
|
"loss": -0.0793, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.7262547490501899, |
|
"grad_norm": 3.51016598192195, |
|
"learning_rate": 5.66006600660066e-07, |
|
"loss": 0.0052, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.7270545890821836, |
|
"grad_norm": 3.6895775842146166, |
|
"learning_rate": 5.643564356435643e-07, |
|
"loss": -0.0167, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.7278544291141772, |
|
"grad_norm": 4.773443293841103, |
|
"learning_rate": 5.627062706270626e-07, |
|
"loss": -0.0359, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.7286542691461708, |
|
"grad_norm": 4.278237278247243, |
|
"learning_rate": 5.610561056105611e-07, |
|
"loss": -0.054, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.7294541091781643, |
|
"grad_norm": 10.27415077431224, |
|
"learning_rate": 5.594059405940594e-07, |
|
"loss": -0.0075, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.730253949210158, |
|
"grad_norm": 4.2766288821859755, |
|
"learning_rate": 5.577557755775577e-07, |
|
"loss": 0.0493, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.7310537892421516, |
|
"grad_norm": 3.5281016400546275, |
|
"learning_rate": 5.561056105610561e-07, |
|
"loss": -0.0766, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.7318536292741452, |
|
"grad_norm": 3.8674946364382223, |
|
"learning_rate": 5.544554455445545e-07, |
|
"loss": -0.0442, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.7326534693061387, |
|
"grad_norm": 5.4465871711884395, |
|
"learning_rate": 5.528052805280527e-07, |
|
"loss": -0.0841, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.7334533093381324, |
|
"grad_norm": 3.530741427097772, |
|
"learning_rate": 5.511551155115512e-07, |
|
"loss": -0.0629, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.734253149370126, |
|
"grad_norm": 3.965321298788348, |
|
"learning_rate": 5.495049504950495e-07, |
|
"loss": -0.0573, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.7350529894021196, |
|
"grad_norm": 4.295307109186891, |
|
"learning_rate": 5.478547854785477e-07, |
|
"loss": -0.1381, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.7358528294341132, |
|
"grad_norm": 3.8500617084264257, |
|
"learning_rate": 5.462046204620462e-07, |
|
"loss": -0.049, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.7366526694661067, |
|
"grad_norm": 4.8663143152337005, |
|
"learning_rate": 5.445544554455445e-07, |
|
"loss": -0.0553, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.7374525094981004, |
|
"grad_norm": 3.4389426238655476, |
|
"learning_rate": 5.429042904290429e-07, |
|
"loss": -0.1896, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.738252349530094, |
|
"grad_norm": 5.019665193069423, |
|
"learning_rate": 5.412541254125412e-07, |
|
"loss": 0.1007, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.7390521895620876, |
|
"grad_norm": 3.9100586900916126, |
|
"learning_rate": 5.396039603960396e-07, |
|
"loss": -0.0697, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.7398520295940811, |
|
"grad_norm": 3.919136094051066, |
|
"learning_rate": 5.379537953795379e-07, |
|
"loss": -0.0907, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.7406518696260748, |
|
"grad_norm": 4.124863593418168, |
|
"learning_rate": 5.363036303630363e-07, |
|
"loss": -0.0569, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.7414517096580684, |
|
"grad_norm": 3.9668145454046977, |
|
"learning_rate": 5.346534653465346e-07, |
|
"loss": -0.0045, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.742251549690062, |
|
"grad_norm": 4.794421236275003, |
|
"learning_rate": 5.33003300330033e-07, |
|
"loss": -0.1284, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.7430513897220556, |
|
"grad_norm": 5.586609443850984, |
|
"learning_rate": 5.313531353135313e-07, |
|
"loss": -0.0346, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.7438512297540492, |
|
"grad_norm": 5.408030486873039, |
|
"learning_rate": 5.297029702970297e-07, |
|
"loss": -0.1473, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.7446510697860428, |
|
"grad_norm": 3.506888694865617, |
|
"learning_rate": 5.28052805280528e-07, |
|
"loss": -0.0018, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.7454509098180364, |
|
"grad_norm": 4.703351899310227, |
|
"learning_rate": 5.264026402640264e-07, |
|
"loss": 0.0077, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.74625074985003, |
|
"grad_norm": 5.030173808558858, |
|
"learning_rate": 5.247524752475247e-07, |
|
"loss": -0.1189, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.7470505898820236, |
|
"grad_norm": 6.14459022838033, |
|
"learning_rate": 5.23102310231023e-07, |
|
"loss": 0.0135, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.7478504299140172, |
|
"grad_norm": 3.162063833925453, |
|
"learning_rate": 5.214521452145214e-07, |
|
"loss": -0.0648, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.7486502699460108, |
|
"grad_norm": 3.322990934843452, |
|
"learning_rate": 5.198019801980198e-07, |
|
"loss": -0.0444, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.7494501099780044, |
|
"grad_norm": 7.281580577762579, |
|
"learning_rate": 5.181518151815182e-07, |
|
"loss": -0.079, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.750249950009998, |
|
"grad_norm": 4.995689441346887, |
|
"learning_rate": 5.165016501650164e-07, |
|
"loss": -0.0494, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.7510497900419916, |
|
"grad_norm": 3.662981016059356, |
|
"learning_rate": 5.148514851485149e-07, |
|
"loss": -0.1306, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.7518496300739852, |
|
"grad_norm": 4.564675844346983, |
|
"learning_rate": 5.132013201320132e-07, |
|
"loss": 0.0849, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.7526494701059788, |
|
"grad_norm": 4.127114880881102, |
|
"learning_rate": 5.115511551155115e-07, |
|
"loss": -0.1003, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.7534493101379725, |
|
"grad_norm": 7.580284959172436, |
|
"learning_rate": 5.099009900990099e-07, |
|
"loss": 0.0127, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.754249150169966, |
|
"grad_norm": 3.8477764594959223, |
|
"learning_rate": 5.082508250825083e-07, |
|
"loss": -0.0247, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.7550489902019596, |
|
"grad_norm": 5.134480523239938, |
|
"learning_rate": 5.066006600660065e-07, |
|
"loss": -0.064, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.7558488302339532, |
|
"grad_norm": 4.184980403074917, |
|
"learning_rate": 5.04950495049505e-07, |
|
"loss": -0.0374, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.7566486702659468, |
|
"grad_norm": 4.095109087896494, |
|
"learning_rate": 5.033003300330033e-07, |
|
"loss": -0.0667, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.7574485102979404, |
|
"grad_norm": 4.493509273952016, |
|
"learning_rate": 5.016501650165016e-07, |
|
"loss": -0.046, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.758248350329934, |
|
"grad_norm": 4.37283031614021, |
|
"learning_rate": 5e-07, |
|
"loss": -0.0662, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.7590481903619276, |
|
"grad_norm": 4.509842387488209, |
|
"learning_rate": 4.983498349834983e-07, |
|
"loss": -0.0951, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.7598480303939212, |
|
"grad_norm": 5.100099502789577, |
|
"learning_rate": 4.966996699669966e-07, |
|
"loss": -0.1086, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.7606478704259149, |
|
"grad_norm": 4.034320460393353, |
|
"learning_rate": 4.95049504950495e-07, |
|
"loss": -0.0263, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.7614477104579084, |
|
"grad_norm": 4.593555887493731, |
|
"learning_rate": 4.933993399339933e-07, |
|
"loss": -0.1636, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.762247550489902, |
|
"grad_norm": 4.526550802808929, |
|
"learning_rate": 4.917491749174918e-07, |
|
"loss": -0.0163, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.7630473905218956, |
|
"grad_norm": 5.515431862710494, |
|
"learning_rate": 4.900990099009901e-07, |
|
"loss": 0.0348, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.7638472305538893, |
|
"grad_norm": 4.2009128577609145, |
|
"learning_rate": 4.884488448844884e-07, |
|
"loss": -0.2542, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.7646470705858828, |
|
"grad_norm": 4.271019064733657, |
|
"learning_rate": 4.867986798679868e-07, |
|
"loss": -0.1869, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.7654469106178764, |
|
"grad_norm": 2.77377368877347, |
|
"learning_rate": 4.851485148514851e-07, |
|
"loss": -0.0735, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.76624675064987, |
|
"grad_norm": 6.0608792055595995, |
|
"learning_rate": 4.834983498349835e-07, |
|
"loss": -0.0099, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.7670465906818636, |
|
"grad_norm": 4.371986441183748, |
|
"learning_rate": 4.818481848184819e-07, |
|
"loss": -0.0748, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.7678464307138573, |
|
"grad_norm": 3.611071563569357, |
|
"learning_rate": 4.801980198019802e-07, |
|
"loss": -0.085, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.7686462707458508, |
|
"grad_norm": 5.1052303944455915, |
|
"learning_rate": 4.785478547854785e-07, |
|
"loss": 0.0156, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.7694461107778444, |
|
"grad_norm": 4.3911870956367505, |
|
"learning_rate": 4.768976897689769e-07, |
|
"loss": -0.0685, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.770245950809838, |
|
"grad_norm": 4.259527634413816, |
|
"learning_rate": 4.752475247524752e-07, |
|
"loss": -0.0837, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.7710457908418317, |
|
"grad_norm": 4.970224413650343, |
|
"learning_rate": 4.735973597359736e-07, |
|
"loss": -0.1512, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.7718456308738252, |
|
"grad_norm": 5.7445596582962395, |
|
"learning_rate": 4.7194719471947193e-07, |
|
"loss": -0.0713, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.7726454709058188, |
|
"grad_norm": 4.543014932858551, |
|
"learning_rate": 4.7029702970297026e-07, |
|
"loss": -0.1337, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.7734453109378124, |
|
"grad_norm": 4.451634421927441, |
|
"learning_rate": 4.6864686468646865e-07, |
|
"loss": -0.1269, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.7742451509698061, |
|
"grad_norm": 5.379063157263456, |
|
"learning_rate": 4.66996699669967e-07, |
|
"loss": -0.0862, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.7750449910017997, |
|
"grad_norm": 10.70668137767473, |
|
"learning_rate": 4.6534653465346537e-07, |
|
"loss": 0.0881, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.7758448310337932, |
|
"grad_norm": 3.7167032202960177, |
|
"learning_rate": 4.6369636963696365e-07, |
|
"loss": -0.017, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.7766446710657868, |
|
"grad_norm": 4.634801839565637, |
|
"learning_rate": 4.62046204620462e-07, |
|
"loss": -0.1011, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.7774445110977805, |
|
"grad_norm": 3.7911455543889354, |
|
"learning_rate": 4.603960396039604e-07, |
|
"loss": -0.1149, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.7782443511297741, |
|
"grad_norm": 3.824528464380088, |
|
"learning_rate": 4.587458745874587e-07, |
|
"loss": 0.0263, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.7790441911617676, |
|
"grad_norm": 3.978602397648478, |
|
"learning_rate": 4.5709570957095705e-07, |
|
"loss": -0.0273, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.7798440311937612, |
|
"grad_norm": 3.9315702808225206, |
|
"learning_rate": 4.5544554455445543e-07, |
|
"loss": -0.1133, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.7806438712257548, |
|
"grad_norm": 4.4594711325427845, |
|
"learning_rate": 4.5379537953795377e-07, |
|
"loss": -0.0891, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.7814437112577485, |
|
"grad_norm": 4.03305817498308, |
|
"learning_rate": 4.521452145214521e-07, |
|
"loss": -0.1069, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.7822435512897421, |
|
"grad_norm": 4.444308363632601, |
|
"learning_rate": 4.504950495049505e-07, |
|
"loss": -0.1274, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.7830433913217356, |
|
"grad_norm": 3.8559207670610953, |
|
"learning_rate": 4.488448844884488e-07, |
|
"loss": -0.1089, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.7838432313537292, |
|
"grad_norm": 6.852327449815295, |
|
"learning_rate": 4.471947194719472e-07, |
|
"loss": -0.0608, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.7846430713857229, |
|
"grad_norm": 5.172264061437722, |
|
"learning_rate": 4.4554455445544555e-07, |
|
"loss": -0.0064, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.7854429114177165, |
|
"grad_norm": 4.338134868672705, |
|
"learning_rate": 4.438943894389439e-07, |
|
"loss": 0.0261, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.78624275144971, |
|
"grad_norm": 4.7120432908120975, |
|
"learning_rate": 4.4224422442244227e-07, |
|
"loss": -0.006, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.7870425914817036, |
|
"grad_norm": 4.093388665246742, |
|
"learning_rate": 4.405940594059406e-07, |
|
"loss": -0.1195, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.7878424315136973, |
|
"grad_norm": 3.9779679788074565, |
|
"learning_rate": 4.389438943894389e-07, |
|
"loss": -0.0318, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.7886422715456909, |
|
"grad_norm": 5.681211915009173, |
|
"learning_rate": 4.3729372937293727e-07, |
|
"loss": -0.0715, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.7894421115776845, |
|
"grad_norm": 3.6961307708427875, |
|
"learning_rate": 4.356435643564356e-07, |
|
"loss": -0.1238, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.790241951609678, |
|
"grad_norm": 5.551081220452864, |
|
"learning_rate": 4.3399339933993394e-07, |
|
"loss": -0.0353, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.7910417916416717, |
|
"grad_norm": 4.1540822277204725, |
|
"learning_rate": 4.3234323432343233e-07, |
|
"loss": -0.1044, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.7918416316736653, |
|
"grad_norm": 5.690644788629102, |
|
"learning_rate": 4.3069306930693066e-07, |
|
"loss": -0.0824, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.7926414717056589, |
|
"grad_norm": 5.090727863244342, |
|
"learning_rate": 4.29042904290429e-07, |
|
"loss": -0.0275, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.7934413117376524, |
|
"grad_norm": 4.9291753971455705, |
|
"learning_rate": 4.273927392739274e-07, |
|
"loss": -0.0339, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.794241151769646, |
|
"grad_norm": 6.803186332065206, |
|
"learning_rate": 4.257425742574257e-07, |
|
"loss": -0.0342, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.7950409918016397, |
|
"grad_norm": 5.804775232239933, |
|
"learning_rate": 4.240924092409241e-07, |
|
"loss": -0.0866, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.7958408318336333, |
|
"grad_norm": 4.750722930505078, |
|
"learning_rate": 4.2244224422442244e-07, |
|
"loss": -0.1836, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.7966406718656269, |
|
"grad_norm": 6.716244820472627, |
|
"learning_rate": 4.207920792079208e-07, |
|
"loss": -0.1024, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.7974405118976204, |
|
"grad_norm": 3.722729260908509, |
|
"learning_rate": 4.1914191419141916e-07, |
|
"loss": -0.0993, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.7982403519296141, |
|
"grad_norm": 3.311724877987371, |
|
"learning_rate": 4.174917491749175e-07, |
|
"loss": -0.104, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.7990401919616077, |
|
"grad_norm": 5.521959854449801, |
|
"learning_rate": 4.158415841584158e-07, |
|
"loss": -0.1918, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.7998400319936013, |
|
"grad_norm": 5.022786099690795, |
|
"learning_rate": 4.1419141914191417e-07, |
|
"loss": -0.044, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8006398720255948, |
|
"grad_norm": 3.85176033301189, |
|
"learning_rate": 4.125412541254125e-07, |
|
"loss": -0.1389, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 0.8014397120575885, |
|
"grad_norm": 3.850320054659798, |
|
"learning_rate": 4.1089108910891084e-07, |
|
"loss": -0.0621, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 0.8022395520895821, |
|
"grad_norm": 5.125417738846334, |
|
"learning_rate": 4.092409240924092e-07, |
|
"loss": 0.024, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 0.8030393921215757, |
|
"grad_norm": 5.550789325018317, |
|
"learning_rate": 4.0759075907590756e-07, |
|
"loss": -0.0596, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 0.8038392321535693, |
|
"grad_norm": 2.987341879008025, |
|
"learning_rate": 4.0594059405940595e-07, |
|
"loss": 0.0174, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.8046390721855629, |
|
"grad_norm": 3.840920872539408, |
|
"learning_rate": 4.042904290429043e-07, |
|
"loss": -0.1812, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 0.8054389122175565, |
|
"grad_norm": 4.5904346361674495, |
|
"learning_rate": 4.026402640264026e-07, |
|
"loss": -0.153, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 0.8062387522495501, |
|
"grad_norm": 4.716177536008103, |
|
"learning_rate": 4.00990099009901e-07, |
|
"loss": -0.0347, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 0.8070385922815437, |
|
"grad_norm": 3.8819770758540106, |
|
"learning_rate": 3.9933993399339934e-07, |
|
"loss": -0.0491, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 0.8078384323135372, |
|
"grad_norm": 4.337114713855018, |
|
"learning_rate": 3.9768976897689767e-07, |
|
"loss": -0.076, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.8086382723455309, |
|
"grad_norm": 5.085958876323165, |
|
"learning_rate": 3.9603960396039606e-07, |
|
"loss": -0.1292, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 0.8094381123775245, |
|
"grad_norm": 4.225428966888881, |
|
"learning_rate": 3.943894389438944e-07, |
|
"loss": -0.0349, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 0.8102379524095181, |
|
"grad_norm": 3.132492848210798, |
|
"learning_rate": 3.927392739273927e-07, |
|
"loss": 0.0267, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 0.8110377924415118, |
|
"grad_norm": 4.728427487496938, |
|
"learning_rate": 3.9108910891089106e-07, |
|
"loss": 0.0251, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 0.8118376324735053, |
|
"grad_norm": 3.231169950869779, |
|
"learning_rate": 3.894389438943894e-07, |
|
"loss": -0.0048, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.8126374725054989, |
|
"grad_norm": 5.4315342239443645, |
|
"learning_rate": 3.8778877887788773e-07, |
|
"loss": 0.0017, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 0.8134373125374925, |
|
"grad_norm": 4.1495886173643015, |
|
"learning_rate": 3.861386138613861e-07, |
|
"loss": -0.0606, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 0.8142371525694861, |
|
"grad_norm": 4.571814448385221, |
|
"learning_rate": 3.8448844884488445e-07, |
|
"loss": 0.0305, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 0.8150369926014797, |
|
"grad_norm": 5.376749508040782, |
|
"learning_rate": 3.8283828382838284e-07, |
|
"loss": -0.0529, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 0.8158368326334733, |
|
"grad_norm": 4.234529949886336, |
|
"learning_rate": 3.811881188118812e-07, |
|
"loss": -0.0884, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.8166366726654669, |
|
"grad_norm": 5.103075536241735, |
|
"learning_rate": 3.795379537953795e-07, |
|
"loss": -0.0217, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 0.8174365126974605, |
|
"grad_norm": 4.089650205096865, |
|
"learning_rate": 3.778877887788779e-07, |
|
"loss": -0.1912, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 0.8182363527294542, |
|
"grad_norm": 4.315512669100621, |
|
"learning_rate": 3.7623762376237623e-07, |
|
"loss": -0.1096, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 0.8190361927614477, |
|
"grad_norm": 5.730342268116768, |
|
"learning_rate": 3.7458745874587457e-07, |
|
"loss": -0.0613, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 0.8198360327934413, |
|
"grad_norm": 4.482232090208855, |
|
"learning_rate": 3.7293729372937295e-07, |
|
"loss": -0.0025, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.8206358728254349, |
|
"grad_norm": 4.242971330310737, |
|
"learning_rate": 3.712871287128713e-07, |
|
"loss": -0.0786, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 0.8214357128574286, |
|
"grad_norm": 4.11563596499524, |
|
"learning_rate": 3.696369636963696e-07, |
|
"loss": -0.082, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 0.8222355528894221, |
|
"grad_norm": 9.332422439085308, |
|
"learning_rate": 3.6798679867986796e-07, |
|
"loss": 0.0022, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 0.8230353929214157, |
|
"grad_norm": 4.067536269426566, |
|
"learning_rate": 3.663366336633663e-07, |
|
"loss": -0.0361, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 0.8238352329534093, |
|
"grad_norm": 6.044500944552922, |
|
"learning_rate": 3.6468646864686463e-07, |
|
"loss": 0.1496, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.824635072985403, |
|
"grad_norm": 4.30205099295344, |
|
"learning_rate": 3.63036303630363e-07, |
|
"loss": -0.045, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 0.8254349130173965, |
|
"grad_norm": 4.012282824952302, |
|
"learning_rate": 3.6138613861386135e-07, |
|
"loss": -0.1718, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 0.8262347530493901, |
|
"grad_norm": 4.159359564634324, |
|
"learning_rate": 3.5973597359735974e-07, |
|
"loss": -0.0454, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 0.8270345930813837, |
|
"grad_norm": 3.814387620245282, |
|
"learning_rate": 3.5808580858085807e-07, |
|
"loss": -0.0466, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 0.8278344331133773, |
|
"grad_norm": 6.904216207979962, |
|
"learning_rate": 3.564356435643564e-07, |
|
"loss": -0.1518, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.828634273145371, |
|
"grad_norm": 5.250865501632598, |
|
"learning_rate": 3.547854785478548e-07, |
|
"loss": -0.0118, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 0.8294341131773645, |
|
"grad_norm": 5.846194811534598, |
|
"learning_rate": 3.5313531353135313e-07, |
|
"loss": 0.0351, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 0.8302339532093581, |
|
"grad_norm": 4.460630924092106, |
|
"learning_rate": 3.5148514851485146e-07, |
|
"loss": -0.2443, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 0.8310337932413517, |
|
"grad_norm": 4.1447157895119995, |
|
"learning_rate": 3.4983498349834985e-07, |
|
"loss": -0.0134, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 0.8318336332733454, |
|
"grad_norm": 3.5252220895483517, |
|
"learning_rate": 3.481848184818482e-07, |
|
"loss": -0.0577, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.8326334733053389, |
|
"grad_norm": 4.361902741479118, |
|
"learning_rate": 3.465346534653465e-07, |
|
"loss": 0.0104, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 0.8334333133373325, |
|
"grad_norm": 4.461915879260683, |
|
"learning_rate": 3.4488448844884485e-07, |
|
"loss": -0.0156, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 0.8342331533693261, |
|
"grad_norm": 4.834838939615413, |
|
"learning_rate": 3.432343234323432e-07, |
|
"loss": -0.0306, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 0.8350329934013198, |
|
"grad_norm": 4.457492333115142, |
|
"learning_rate": 3.415841584158416e-07, |
|
"loss": -0.0158, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 0.8358328334333134, |
|
"grad_norm": 6.418129824325349, |
|
"learning_rate": 3.399339933993399e-07, |
|
"loss": 0.023, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.8366326734653069, |
|
"grad_norm": 5.631846859681406, |
|
"learning_rate": 3.3828382838283824e-07, |
|
"loss": -0.0842, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 0.8374325134973005, |
|
"grad_norm": 4.893647743608584, |
|
"learning_rate": 3.3663366336633663e-07, |
|
"loss": 0.0169, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 0.8382323535292941, |
|
"grad_norm": 3.327224537992695, |
|
"learning_rate": 3.3498349834983497e-07, |
|
"loss": 0.0234, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 0.8390321935612878, |
|
"grad_norm": 3.334769765979331, |
|
"learning_rate": 3.333333333333333e-07, |
|
"loss": -0.0878, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 0.8398320335932813, |
|
"grad_norm": 4.036389763362471, |
|
"learning_rate": 3.316831683168317e-07, |
|
"loss": -0.0402, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.8406318736252749, |
|
"grad_norm": 3.653874204118681, |
|
"learning_rate": 3.3003300330033e-07, |
|
"loss": -0.0631, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 0.8414317136572685, |
|
"grad_norm": 4.88359310166619, |
|
"learning_rate": 3.2838283828382836e-07, |
|
"loss": -0.0544, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 0.8422315536892622, |
|
"grad_norm": 6.462333703622296, |
|
"learning_rate": 3.2673267326732674e-07, |
|
"loss": -0.0678, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 0.8430313937212558, |
|
"grad_norm": 7.009021395345441, |
|
"learning_rate": 3.250825082508251e-07, |
|
"loss": 0.0099, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 0.8438312337532493, |
|
"grad_norm": 4.2767377032125875, |
|
"learning_rate": 3.234323432343234e-07, |
|
"loss": 0.0297, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.8446310737852429, |
|
"grad_norm": 9.79641552202019, |
|
"learning_rate": 3.217821782178218e-07, |
|
"loss": 0.0968, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.8454309138172366, |
|
"grad_norm": 4.669605737417231, |
|
"learning_rate": 3.201320132013201e-07, |
|
"loss": 0.0097, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 0.8462307538492302, |
|
"grad_norm": 3.1819061861624807, |
|
"learning_rate": 3.1848184818481847e-07, |
|
"loss": -0.0776, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 0.8470305938812237, |
|
"grad_norm": 4.1774987880629695, |
|
"learning_rate": 3.168316831683168e-07, |
|
"loss": -0.0806, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 0.8478304339132173, |
|
"grad_norm": 4.454569906758588, |
|
"learning_rate": 3.1518151815181514e-07, |
|
"loss": -0.0526, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.848630273945211, |
|
"grad_norm": 3.527299815228531, |
|
"learning_rate": 3.1353135313531353e-07, |
|
"loss": -0.1065, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 0.8494301139772046, |
|
"grad_norm": 4.481801002071373, |
|
"learning_rate": 3.1188118811881186e-07, |
|
"loss": -0.0022, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 0.8502299540091982, |
|
"grad_norm": 4.3147168197624755, |
|
"learning_rate": 3.102310231023102e-07, |
|
"loss": -0.011, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 0.8510297940411917, |
|
"grad_norm": 3.812340279657359, |
|
"learning_rate": 3.085808580858086e-07, |
|
"loss": -0.2093, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 0.8518296340731853, |
|
"grad_norm": 5.097295358094463, |
|
"learning_rate": 3.069306930693069e-07, |
|
"loss": -0.0048, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.852629474105179, |
|
"grad_norm": 5.088642578790314, |
|
"learning_rate": 3.0528052805280525e-07, |
|
"loss": -0.1315, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 0.8534293141371726, |
|
"grad_norm": 4.052723785754238, |
|
"learning_rate": 3.0363036303630364e-07, |
|
"loss": -0.1133, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 0.8542291541691661, |
|
"grad_norm": 5.193579179546016, |
|
"learning_rate": 3.01980198019802e-07, |
|
"loss": -0.0787, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 0.8550289942011597, |
|
"grad_norm": 3.030054387526671, |
|
"learning_rate": 3.0033003300330036e-07, |
|
"loss": -0.0677, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 0.8558288342331534, |
|
"grad_norm": 9.135373007054904, |
|
"learning_rate": 2.986798679867987e-07, |
|
"loss": -0.1171, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.856628674265147, |
|
"grad_norm": 3.3785319743939013, |
|
"learning_rate": 2.97029702970297e-07, |
|
"loss": -0.0803, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 0.8574285142971406, |
|
"grad_norm": 3.9077882713350762, |
|
"learning_rate": 2.9537953795379537e-07, |
|
"loss": -0.0513, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 0.8582283543291341, |
|
"grad_norm": 4.038560493011451, |
|
"learning_rate": 2.937293729372937e-07, |
|
"loss": -0.0518, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 0.8590281943611278, |
|
"grad_norm": 3.800775478942818, |
|
"learning_rate": 2.9207920792079203e-07, |
|
"loss": -0.1613, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 0.8598280343931214, |
|
"grad_norm": 5.948071515082444, |
|
"learning_rate": 2.904290429042904e-07, |
|
"loss": -0.0834, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.860627874425115, |
|
"grad_norm": 4.190899280558429, |
|
"learning_rate": 2.8877887788778876e-07, |
|
"loss": -0.103, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 0.8614277144571085, |
|
"grad_norm": 4.910742773988997, |
|
"learning_rate": 2.871287128712871e-07, |
|
"loss": -0.0388, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 0.8622275544891022, |
|
"grad_norm": 7.707730353888358, |
|
"learning_rate": 2.854785478547855e-07, |
|
"loss": -0.1477, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 0.8630273945210958, |
|
"grad_norm": 4.297638426499125, |
|
"learning_rate": 2.838283828382838e-07, |
|
"loss": 0.0134, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 0.8638272345530894, |
|
"grad_norm": 6.145968919540754, |
|
"learning_rate": 2.8217821782178215e-07, |
|
"loss": 0.0021, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.864627074585083, |
|
"grad_norm": 3.7854604687393296, |
|
"learning_rate": 2.8052805280528054e-07, |
|
"loss": -0.1547, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 0.8654269146170765, |
|
"grad_norm": 3.991674461171312, |
|
"learning_rate": 2.7887788778877887e-07, |
|
"loss": -0.0668, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 0.8662267546490702, |
|
"grad_norm": 4.795940892228349, |
|
"learning_rate": 2.7722772277227726e-07, |
|
"loss": 0.0146, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 0.8670265946810638, |
|
"grad_norm": 4.944098976058084, |
|
"learning_rate": 2.755775577557756e-07, |
|
"loss": -0.0523, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 0.8678264347130574, |
|
"grad_norm": 5.005504629817167, |
|
"learning_rate": 2.7392739273927387e-07, |
|
"loss": 0.0065, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.8686262747450509, |
|
"grad_norm": 4.33635141957305, |
|
"learning_rate": 2.7227722772277226e-07, |
|
"loss": 0.0051, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 0.8694261147770446, |
|
"grad_norm": 6.827221005304679, |
|
"learning_rate": 2.706270627062706e-07, |
|
"loss": -0.1068, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 0.8702259548090382, |
|
"grad_norm": 3.763012118037954, |
|
"learning_rate": 2.6897689768976893e-07, |
|
"loss": 0.0675, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 0.8710257948410318, |
|
"grad_norm": 3.476547412625268, |
|
"learning_rate": 2.673267326732673e-07, |
|
"loss": -0.1067, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 0.8718256348730254, |
|
"grad_norm": 3.622631746348685, |
|
"learning_rate": 2.6567656765676565e-07, |
|
"loss": -0.0829, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.872625474905019, |
|
"grad_norm": 4.835180762619133, |
|
"learning_rate": 2.64026402640264e-07, |
|
"loss": -0.0761, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 0.8734253149370126, |
|
"grad_norm": 4.188641976033946, |
|
"learning_rate": 2.623762376237624e-07, |
|
"loss": -0.114, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 0.8742251549690062, |
|
"grad_norm": 4.833712113544916, |
|
"learning_rate": 2.607260726072607e-07, |
|
"loss": -0.1158, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 0.8750249950009998, |
|
"grad_norm": 3.6494839656219935, |
|
"learning_rate": 2.590759075907591e-07, |
|
"loss": -0.048, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 0.8758248350329934, |
|
"grad_norm": 3.9306302162750857, |
|
"learning_rate": 2.5742574257425743e-07, |
|
"loss": -0.0928, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.876624675064987, |
|
"grad_norm": 3.7010390446563517, |
|
"learning_rate": 2.5577557755775576e-07, |
|
"loss": 0.0242, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 0.8774245150969806, |
|
"grad_norm": 3.641273539002507, |
|
"learning_rate": 2.5412541254125415e-07, |
|
"loss": -0.1014, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 0.8782243551289742, |
|
"grad_norm": 4.233409363271656, |
|
"learning_rate": 2.524752475247525e-07, |
|
"loss": -0.0404, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 0.8790241951609679, |
|
"grad_norm": 4.188973466495453, |
|
"learning_rate": 2.508250825082508e-07, |
|
"loss": -0.0684, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 0.8798240351929614, |
|
"grad_norm": 5.017584397195866, |
|
"learning_rate": 2.4917491749174916e-07, |
|
"loss": -0.0368, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.880623875224955, |
|
"grad_norm": 3.9510700176873566, |
|
"learning_rate": 2.475247524752475e-07, |
|
"loss": -0.1375, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 0.8814237152569486, |
|
"grad_norm": 5.84233851394486, |
|
"learning_rate": 2.458745874587459e-07, |
|
"loss": -0.1067, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 0.8822235552889423, |
|
"grad_norm": 5.406949565806744, |
|
"learning_rate": 2.442244224422442e-07, |
|
"loss": -0.0962, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 0.8830233953209358, |
|
"grad_norm": 5.315262379239265, |
|
"learning_rate": 2.4257425742574255e-07, |
|
"loss": -0.066, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 0.8838232353529294, |
|
"grad_norm": 4.836530658291514, |
|
"learning_rate": 2.4092409240924093e-07, |
|
"loss": -0.0646, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.884623075384923, |
|
"grad_norm": 3.293455547222145, |
|
"learning_rate": 2.3927392739273927e-07, |
|
"loss": -0.1701, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 0.8854229154169166, |
|
"grad_norm": 4.709525078481242, |
|
"learning_rate": 2.376237623762376e-07, |
|
"loss": -0.0504, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 0.8862227554489103, |
|
"grad_norm": 4.295657231556702, |
|
"learning_rate": 2.3597359735973596e-07, |
|
"loss": -0.1419, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 0.8870225954809038, |
|
"grad_norm": 3.682029286721376, |
|
"learning_rate": 2.3432343234323433e-07, |
|
"loss": -0.0927, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 0.8878224355128974, |
|
"grad_norm": 7.500929711256007, |
|
"learning_rate": 2.3267326732673269e-07, |
|
"loss": -0.001, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.888622275544891, |
|
"grad_norm": 3.370577280876358, |
|
"learning_rate": 2.31023102310231e-07, |
|
"loss": -0.0219, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 0.8894221155768847, |
|
"grad_norm": 4.603247549338215, |
|
"learning_rate": 2.2937293729372936e-07, |
|
"loss": -0.0407, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 0.8902219556088782, |
|
"grad_norm": 3.033292259385364, |
|
"learning_rate": 2.2772277227722772e-07, |
|
"loss": -0.0612, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 0.8910217956408718, |
|
"grad_norm": 5.654397566299044, |
|
"learning_rate": 2.2607260726072605e-07, |
|
"loss": -0.0081, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 0.8918216356728654, |
|
"grad_norm": 4.7463861069291235, |
|
"learning_rate": 2.244224422442244e-07, |
|
"loss": 0.0324, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.8926214757048591, |
|
"grad_norm": 3.6795751469461697, |
|
"learning_rate": 2.2277227722772277e-07, |
|
"loss": -0.082, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 0.8934213157368527, |
|
"grad_norm": 6.7304826361036385, |
|
"learning_rate": 2.2112211221122113e-07, |
|
"loss": -0.1421, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 0.8942211557688462, |
|
"grad_norm": 4.6084848352584, |
|
"learning_rate": 2.1947194719471944e-07, |
|
"loss": -0.0154, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 0.8950209958008398, |
|
"grad_norm": 3.3241186250160673, |
|
"learning_rate": 2.178217821782178e-07, |
|
"loss": -0.0463, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 0.8958208358328335, |
|
"grad_norm": 4.285630706698749, |
|
"learning_rate": 2.1617161716171616e-07, |
|
"loss": -0.0102, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.8966206758648271, |
|
"grad_norm": 3.7442923024099266, |
|
"learning_rate": 2.145214521452145e-07, |
|
"loss": -0.0737, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 0.8974205158968206, |
|
"grad_norm": 4.067618329578387, |
|
"learning_rate": 2.1287128712871286e-07, |
|
"loss": -0.0694, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 0.8982203559288142, |
|
"grad_norm": 4.012101702069505, |
|
"learning_rate": 2.1122112211221122e-07, |
|
"loss": 0.013, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 0.8990201959608078, |
|
"grad_norm": 3.927004556177739, |
|
"learning_rate": 2.0957095709570958e-07, |
|
"loss": 0.0032, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 0.8998200359928015, |
|
"grad_norm": 4.153485289274271, |
|
"learning_rate": 2.079207920792079e-07, |
|
"loss": 0.0691, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.9006198760247951, |
|
"grad_norm": 3.0852405388784936, |
|
"learning_rate": 2.0627062706270625e-07, |
|
"loss": -0.152, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 0.9014197160567886, |
|
"grad_norm": 5.668232907029322, |
|
"learning_rate": 2.046204620462046e-07, |
|
"loss": -0.0051, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 0.9022195560887822, |
|
"grad_norm": 4.016353081034237, |
|
"learning_rate": 2.0297029702970297e-07, |
|
"loss": 0.004, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 0.9030193961207759, |
|
"grad_norm": 5.222331571223233, |
|
"learning_rate": 2.013201320132013e-07, |
|
"loss": -0.0398, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 0.9038192361527695, |
|
"grad_norm": 6.050674073050146, |
|
"learning_rate": 1.9966996699669967e-07, |
|
"loss": -0.0201, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.904619076184763, |
|
"grad_norm": 3.6129712440915336, |
|
"learning_rate": 1.9801980198019803e-07, |
|
"loss": -0.0873, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 0.9054189162167566, |
|
"grad_norm": 3.9706646963831527, |
|
"learning_rate": 1.9636963696369634e-07, |
|
"loss": -0.0425, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 0.9062187562487503, |
|
"grad_norm": 3.575800911347383, |
|
"learning_rate": 1.947194719471947e-07, |
|
"loss": -0.0728, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 0.9070185962807439, |
|
"grad_norm": 4.9127144958415165, |
|
"learning_rate": 1.9306930693069306e-07, |
|
"loss": -0.1456, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 0.9078184363127374, |
|
"grad_norm": 4.630716245217967, |
|
"learning_rate": 1.9141914191419142e-07, |
|
"loss": -0.1385, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.908618276344731, |
|
"grad_norm": 3.7522928222148413, |
|
"learning_rate": 1.8976897689768976e-07, |
|
"loss": -0.0473, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 0.9094181163767247, |
|
"grad_norm": 3.702942791411621, |
|
"learning_rate": 1.8811881188118812e-07, |
|
"loss": -0.0115, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 0.9102179564087183, |
|
"grad_norm": 4.215778349737591, |
|
"learning_rate": 1.8646864686468648e-07, |
|
"loss": 0.0369, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 0.9110177964407119, |
|
"grad_norm": 4.312725558809124, |
|
"learning_rate": 1.848184818481848e-07, |
|
"loss": -0.0821, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 0.9118176364727054, |
|
"grad_norm": 4.321449833697151, |
|
"learning_rate": 1.8316831683168315e-07, |
|
"loss": -0.1315, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.912617476504699, |
|
"grad_norm": 3.2649224360601234, |
|
"learning_rate": 1.815181518151815e-07, |
|
"loss": -0.1925, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 0.9134173165366927, |
|
"grad_norm": 3.760790622253671, |
|
"learning_rate": 1.7986798679867987e-07, |
|
"loss": -0.0013, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 0.9142171565686863, |
|
"grad_norm": 4.157207407424451, |
|
"learning_rate": 1.782178217821782e-07, |
|
"loss": -0.0581, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 0.9150169966006798, |
|
"grad_norm": 5.763265152647982, |
|
"learning_rate": 1.7656765676567656e-07, |
|
"loss": -0.0789, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 0.9158168366326734, |
|
"grad_norm": 5.000391563342132, |
|
"learning_rate": 1.7491749174917492e-07, |
|
"loss": 0.017, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.9166166766646671, |
|
"grad_norm": 5.503835207911807, |
|
"learning_rate": 1.7326732673267326e-07, |
|
"loss": -0.0498, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 0.9174165166966607, |
|
"grad_norm": 4.892439922885906, |
|
"learning_rate": 1.716171617161716e-07, |
|
"loss": -0.0188, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 0.9182163567286543, |
|
"grad_norm": 4.1251081972670915, |
|
"learning_rate": 1.6996699669966995e-07, |
|
"loss": -0.1601, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 0.9190161967606478, |
|
"grad_norm": 5.234413078511215, |
|
"learning_rate": 1.6831683168316832e-07, |
|
"loss": -0.0751, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 0.9198160367926415, |
|
"grad_norm": 5.21079968866447, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"loss": 0.0173, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.9206158768246351, |
|
"grad_norm": 4.287315805109782, |
|
"learning_rate": 1.65016501650165e-07, |
|
"loss": -0.0245, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 0.9214157168566287, |
|
"grad_norm": 2.89403897319458, |
|
"learning_rate": 1.6336633663366337e-07, |
|
"loss": -0.1234, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 0.9222155568886222, |
|
"grad_norm": 5.421448264794362, |
|
"learning_rate": 1.617161716171617e-07, |
|
"loss": -0.0738, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 0.9230153969206158, |
|
"grad_norm": 5.340841829330172, |
|
"learning_rate": 1.6006600660066004e-07, |
|
"loss": -0.0978, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 0.9238152369526095, |
|
"grad_norm": 5.139659477858416, |
|
"learning_rate": 1.584158415841584e-07, |
|
"loss": -0.0053, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.9246150769846031, |
|
"grad_norm": 3.5692046134784676, |
|
"learning_rate": 1.5676567656765676e-07, |
|
"loss": -0.078, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 0.9254149170165967, |
|
"grad_norm": 3.6214949664373624, |
|
"learning_rate": 1.551155115511551e-07, |
|
"loss": -0.1948, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 0.9262147570485902, |
|
"grad_norm": 3.6462100170455516, |
|
"learning_rate": 1.5346534653465346e-07, |
|
"loss": -0.1466, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 0.9270145970805839, |
|
"grad_norm": 4.042490583513813, |
|
"learning_rate": 1.5181518151815182e-07, |
|
"loss": -0.0494, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 0.9278144371125775, |
|
"grad_norm": 3.570518304081052, |
|
"learning_rate": 1.5016501650165018e-07, |
|
"loss": -0.0203, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.9286142771445711, |
|
"grad_norm": 4.860609480391736, |
|
"learning_rate": 1.485148514851485e-07, |
|
"loss": -0.0608, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 0.9294141171765646, |
|
"grad_norm": 3.6503782154768336, |
|
"learning_rate": 1.4686468646864685e-07, |
|
"loss": -0.1209, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 0.9302139572085583, |
|
"grad_norm": 5.2679977730736915, |
|
"learning_rate": 1.452145214521452e-07, |
|
"loss": -0.2231, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 0.9310137972405519, |
|
"grad_norm": 4.077205627405786, |
|
"learning_rate": 1.4356435643564355e-07, |
|
"loss": -0.0918, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 0.9318136372725455, |
|
"grad_norm": 4.7895431899614245, |
|
"learning_rate": 1.419141914191419e-07, |
|
"loss": -0.0527, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.9326134773045391, |
|
"grad_norm": 5.279018314307402, |
|
"learning_rate": 1.4026402640264027e-07, |
|
"loss": 0.0147, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 0.9334133173365327, |
|
"grad_norm": 4.336258829943017, |
|
"learning_rate": 1.3861386138613863e-07, |
|
"loss": -0.0938, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 0.9342131573685263, |
|
"grad_norm": 4.54870086400182, |
|
"learning_rate": 1.3696369636963694e-07, |
|
"loss": -0.1337, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 0.9350129974005199, |
|
"grad_norm": 4.5996184978678105, |
|
"learning_rate": 1.353135313531353e-07, |
|
"loss": -0.0564, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 0.9358128374325135, |
|
"grad_norm": 3.580175353715861, |
|
"learning_rate": 1.3366336633663366e-07, |
|
"loss": -0.0917, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.936612677464507, |
|
"grad_norm": 4.236520821746979, |
|
"learning_rate": 1.32013201320132e-07, |
|
"loss": -0.0952, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 0.9374125174965007, |
|
"grad_norm": 4.45059914145225, |
|
"learning_rate": 1.3036303630363035e-07, |
|
"loss": -0.0501, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 0.9382123575284943, |
|
"grad_norm": 3.895112295625407, |
|
"learning_rate": 1.2871287128712872e-07, |
|
"loss": -0.0248, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 0.9390121975604879, |
|
"grad_norm": 3.446049410323525, |
|
"learning_rate": 1.2706270627062708e-07, |
|
"loss": -0.0609, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 0.9398120375924816, |
|
"grad_norm": 4.440477903266653, |
|
"learning_rate": 1.254125412541254e-07, |
|
"loss": -0.1183, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.9406118776244751, |
|
"grad_norm": 8.879875026201496, |
|
"learning_rate": 1.2376237623762375e-07, |
|
"loss": 0.0741, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 0.9414117176564687, |
|
"grad_norm": 3.251289551995566, |
|
"learning_rate": 1.221122112211221e-07, |
|
"loss": -0.2012, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 0.9422115576884623, |
|
"grad_norm": 5.090866489665643, |
|
"learning_rate": 1.2046204620462047e-07, |
|
"loss": -0.0937, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 0.943011397720456, |
|
"grad_norm": 5.634824692335556, |
|
"learning_rate": 1.188118811881188e-07, |
|
"loss": -0.0674, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 0.9438112377524495, |
|
"grad_norm": 7.237946586468722, |
|
"learning_rate": 1.1716171617161716e-07, |
|
"loss": 0.0063, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.9446110777844431, |
|
"grad_norm": 5.322093424679851, |
|
"learning_rate": 1.155115511551155e-07, |
|
"loss": 0.0182, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 0.9454109178164367, |
|
"grad_norm": 5.685444219272491, |
|
"learning_rate": 1.1386138613861386e-07, |
|
"loss": -0.0409, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 0.9462107578484303, |
|
"grad_norm": 4.89343356574685, |
|
"learning_rate": 1.122112211221122e-07, |
|
"loss": -0.1669, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 0.947010597880424, |
|
"grad_norm": 2.976462375028197, |
|
"learning_rate": 1.1056105610561057e-07, |
|
"loss": -0.1014, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 0.9478104379124175, |
|
"grad_norm": 5.789816947507454, |
|
"learning_rate": 1.089108910891089e-07, |
|
"loss": -0.0268, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.9486102779444111, |
|
"grad_norm": 4.547707602704605, |
|
"learning_rate": 1.0726072607260725e-07, |
|
"loss": -0.0252, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 0.9494101179764047, |
|
"grad_norm": 3.669232395567316, |
|
"learning_rate": 1.0561056105610561e-07, |
|
"loss": -0.0721, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 0.9502099580083984, |
|
"grad_norm": 4.725560206551925, |
|
"learning_rate": 1.0396039603960394e-07, |
|
"loss": -0.0374, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 0.9510097980403919, |
|
"grad_norm": 5.642201380367948, |
|
"learning_rate": 1.023102310231023e-07, |
|
"loss": -0.0524, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 0.9518096380723855, |
|
"grad_norm": 5.1228456388588555, |
|
"learning_rate": 1.0066006600660065e-07, |
|
"loss": -0.0122, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.9526094781043791, |
|
"grad_norm": 3.5927188886776995, |
|
"learning_rate": 9.900990099009901e-08, |
|
"loss": -0.1424, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.9534093181363728, |
|
"grad_norm": 3.815501173104961, |
|
"learning_rate": 9.735973597359735e-08, |
|
"loss": -0.1578, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 0.9542091581683664, |
|
"grad_norm": 3.474045980664194, |
|
"learning_rate": 9.570957095709571e-08, |
|
"loss": -0.0597, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 0.9550089982003599, |
|
"grad_norm": 4.782969446108987, |
|
"learning_rate": 9.405940594059406e-08, |
|
"loss": -0.0957, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 0.9558088382323535, |
|
"grad_norm": 4.74265001734386, |
|
"learning_rate": 9.24092409240924e-08, |
|
"loss": -0.0267, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.9566086782643471, |
|
"grad_norm": 3.661437568587583, |
|
"learning_rate": 9.075907590759075e-08, |
|
"loss": -0.1227, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 0.9574085182963408, |
|
"grad_norm": 4.279576775134146, |
|
"learning_rate": 8.91089108910891e-08, |
|
"loss": 0.0111, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 0.9582083583283343, |
|
"grad_norm": 4.169597103863264, |
|
"learning_rate": 8.745874587458746e-08, |
|
"loss": 0.0377, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 0.9590081983603279, |
|
"grad_norm": 6.207479567569039, |
|
"learning_rate": 8.58085808580858e-08, |
|
"loss": 0.0014, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 0.9598080383923215, |
|
"grad_norm": 3.5423075974898586, |
|
"learning_rate": 8.415841584158416e-08, |
|
"loss": -0.1664, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9606078784243152, |
|
"grad_norm": 3.6165897542858887, |
|
"learning_rate": 8.25082508250825e-08, |
|
"loss": -0.0884, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 0.9614077184563088, |
|
"grad_norm": 4.517734549618362, |
|
"learning_rate": 8.085808580858085e-08, |
|
"loss": -0.1076, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 0.9622075584883023, |
|
"grad_norm": 4.307866136219069, |
|
"learning_rate": 7.92079207920792e-08, |
|
"loss": -0.115, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 0.9630073985202959, |
|
"grad_norm": 4.922270840667124, |
|
"learning_rate": 7.755775577557755e-08, |
|
"loss": -0.055, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 0.9638072385522896, |
|
"grad_norm": 4.385179502669176, |
|
"learning_rate": 7.590759075907591e-08, |
|
"loss": -0.1254, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.9646070785842832, |
|
"grad_norm": 3.9178495988004443, |
|
"learning_rate": 7.425742574257424e-08, |
|
"loss": -0.1016, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 0.9654069186162767, |
|
"grad_norm": 3.3585705170911515, |
|
"learning_rate": 7.26072607260726e-08, |
|
"loss": -0.0209, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 0.9662067586482703, |
|
"grad_norm": 3.815494549495067, |
|
"learning_rate": 7.095709570957095e-08, |
|
"loss": -0.0635, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 0.967006598680264, |
|
"grad_norm": 8.403899931437618, |
|
"learning_rate": 6.930693069306931e-08, |
|
"loss": -0.0465, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 0.9678064387122576, |
|
"grad_norm": 4.29568964473992, |
|
"learning_rate": 6.765676567656765e-08, |
|
"loss": -0.0332, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.9686062787442512, |
|
"grad_norm": 3.4280415193587235, |
|
"learning_rate": 6.6006600660066e-08, |
|
"loss": -0.0803, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 0.9694061187762447, |
|
"grad_norm": 4.6120423955763625, |
|
"learning_rate": 6.435643564356436e-08, |
|
"loss": -0.0619, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 0.9702059588082383, |
|
"grad_norm": 6.424876752925553, |
|
"learning_rate": 6.27062706270627e-08, |
|
"loss": -0.1442, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 0.971005798840232, |
|
"grad_norm": 5.485217081397391, |
|
"learning_rate": 6.105610561056105e-08, |
|
"loss": -0.0939, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 0.9718056388722256, |
|
"grad_norm": 6.774111317136949, |
|
"learning_rate": 5.94059405940594e-08, |
|
"loss": -0.0439, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.9726054789042191, |
|
"grad_norm": 5.096515115630733, |
|
"learning_rate": 5.775577557755775e-08, |
|
"loss": -0.0734, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 0.9734053189362127, |
|
"grad_norm": 4.159248360440637, |
|
"learning_rate": 5.61056105610561e-08, |
|
"loss": -0.0121, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 0.9742051589682064, |
|
"grad_norm": 7.334455086425638, |
|
"learning_rate": 5.445544554455445e-08, |
|
"loss": -0.1328, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 0.9750049990002, |
|
"grad_norm": 5.126425754126674, |
|
"learning_rate": 5.2805280528052805e-08, |
|
"loss": -0.123, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 0.9758048390321936, |
|
"grad_norm": 4.215981033934002, |
|
"learning_rate": 5.115511551155115e-08, |
|
"loss": -0.0326, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.9766046790641871, |
|
"grad_norm": 5.727858996419284, |
|
"learning_rate": 4.950495049504951e-08, |
|
"loss": -0.078, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 0.9774045190961808, |
|
"grad_norm": 4.2278775409875475, |
|
"learning_rate": 4.7854785478547855e-08, |
|
"loss": -0.1444, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 0.9782043591281744, |
|
"grad_norm": 5.1475401401668455, |
|
"learning_rate": 4.62046204620462e-08, |
|
"loss": -0.0758, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 0.979004199160168, |
|
"grad_norm": 4.979540737683743, |
|
"learning_rate": 4.455445544554455e-08, |
|
"loss": -0.0669, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 0.9798040391921615, |
|
"grad_norm": 3.4515888370253385, |
|
"learning_rate": 4.29042904290429e-08, |
|
"loss": -0.0937, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.9806038792241552, |
|
"grad_norm": 6.288776500193402, |
|
"learning_rate": 4.125412541254125e-08, |
|
"loss": 0.1102, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 0.9814037192561488, |
|
"grad_norm": 5.48415137375722, |
|
"learning_rate": 3.96039603960396e-08, |
|
"loss": -0.0964, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 0.9822035592881424, |
|
"grad_norm": 4.355868334038742, |
|
"learning_rate": 3.7953795379537955e-08, |
|
"loss": -0.0328, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 0.9830033993201359, |
|
"grad_norm": 4.721491339476331, |
|
"learning_rate": 3.63036303630363e-08, |
|
"loss": -0.0365, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 0.9838032393521295, |
|
"grad_norm": 6.339518521675752, |
|
"learning_rate": 3.465346534653466e-08, |
|
"loss": -0.0573, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.9846030793841232, |
|
"grad_norm": 4.434922235230731, |
|
"learning_rate": 3.3003300330033e-08, |
|
"loss": -0.1461, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 0.9854029194161168, |
|
"grad_norm": 5.310987908083999, |
|
"learning_rate": 3.135313531353135e-08, |
|
"loss": -0.0746, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.9862027594481104, |
|
"grad_norm": 5.686966755780067, |
|
"learning_rate": 2.97029702970297e-08, |
|
"loss": -0.0133, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 0.9870025994801039, |
|
"grad_norm": 4.108463781012627, |
|
"learning_rate": 2.805280528052805e-08, |
|
"loss": 0.0749, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 0.9878024395120976, |
|
"grad_norm": 3.6672659008615764, |
|
"learning_rate": 2.6402640264026403e-08, |
|
"loss": -0.1047, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.9886022795440912, |
|
"grad_norm": 3.9834854628962146, |
|
"learning_rate": 2.4752475247524754e-08, |
|
"loss": -0.0921, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 0.9894021195760848, |
|
"grad_norm": 3.5139198067318054, |
|
"learning_rate": 2.31023102310231e-08, |
|
"loss": -0.1611, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 0.9902019596080783, |
|
"grad_norm": 4.1541924223616356, |
|
"learning_rate": 2.145214521452145e-08, |
|
"loss": -0.0147, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 0.991001799640072, |
|
"grad_norm": 6.337275466101498, |
|
"learning_rate": 1.98019801980198e-08, |
|
"loss": -0.0767, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 0.9918016396720656, |
|
"grad_norm": 82.20527671342789, |
|
"learning_rate": 1.815181518151815e-08, |
|
"loss": -0.0431, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.9926014797040592, |
|
"grad_norm": 2.6922514851959494, |
|
"learning_rate": 1.65016501650165e-08, |
|
"loss": -0.0195, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 0.9934013197360528, |
|
"grad_norm": 3.6925978256211747, |
|
"learning_rate": 1.485148514851485e-08, |
|
"loss": -0.1541, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 0.9942011597680463, |
|
"grad_norm": 4.79635865852686, |
|
"learning_rate": 1.3201320132013201e-08, |
|
"loss": -0.1104, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 0.99500099980004, |
|
"grad_norm": 4.327272847702339, |
|
"learning_rate": 1.155115511551155e-08, |
|
"loss": -0.0807, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 0.9958008398320336, |
|
"grad_norm": 4.256644720520306, |
|
"learning_rate": 9.9009900990099e-09, |
|
"loss": -0.0278, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.9966006798640272, |
|
"grad_norm": 3.586841344680467, |
|
"learning_rate": 8.25082508250825e-09, |
|
"loss": 0.0033, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 0.9974005198960207, |
|
"grad_norm": 4.084312918321821, |
|
"learning_rate": 6.600660066006601e-09, |
|
"loss": -0.0053, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 0.9982003599280144, |
|
"grad_norm": 3.983696015790867, |
|
"learning_rate": 4.95049504950495e-09, |
|
"loss": -0.0229, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 0.999000199960008, |
|
"grad_norm": 4.1770415014644104, |
|
"learning_rate": 3.3003300330033003e-09, |
|
"loss": -0.1898, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 0.9998000399920016, |
|
"grad_norm": 4.645732496504415, |
|
"learning_rate": 1.6501650165016502e-09, |
|
"loss": 0.0221, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.9998000399920016, |
|
"step": 1250, |
|
"total_flos": 208730583859200.0, |
|
"train_loss": -0.05900815903544426, |
|
"train_runtime": 14539.7464, |
|
"train_samples_per_second": 11.005, |
|
"train_steps_per_second": 0.086 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 208730583859200.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|