|
{ |
|
"best_metric": 1.322394609451294, |
|
"best_model_checkpoint": "output/output__lora/checkpoint-400", |
|
"epoch": 0.139640425903299, |
|
"eval_steps": 100, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00034910106475824753, |
|
"grad_norm": 2.6783504486083984, |
|
"learning_rate": 0.0, |
|
"loss": 1.5271, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0006982021295164951, |
|
"grad_norm": 1.3333820104599, |
|
"learning_rate": 8.859191006777897e-06, |
|
"loss": 1.3963, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0010473031942747426, |
|
"grad_norm": 1.2807133197784424, |
|
"learning_rate": 1.4041485532469073e-05, |
|
"loss": 1.4192, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0013964042590329901, |
|
"grad_norm": 1.1956514120101929, |
|
"learning_rate": 1.7718382013555794e-05, |
|
"loss": 1.5083, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0017455053237912376, |
|
"grad_norm": 1.2733005285263062, |
|
"learning_rate": 2.0570404496611053e-05, |
|
"loss": 1.4963, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0020946063885494853, |
|
"grad_norm": 0.8666600584983826, |
|
"learning_rate": 2.2900676539246968e-05, |
|
"loss": 1.5552, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0024437074533077328, |
|
"grad_norm": 0.7445533275604248, |
|
"learning_rate": 2.4870893478326387e-05, |
|
"loss": 1.2858, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0027928085180659802, |
|
"grad_norm": 0.8400186896324158, |
|
"learning_rate": 2.6577573020333684e-05, |
|
"loss": 1.3413, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0031419095828242277, |
|
"grad_norm": 0.8454774618148804, |
|
"learning_rate": 2.8082971064938146e-05, |
|
"loss": 1.467, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.003491010647582475, |
|
"grad_norm": 0.8853550553321838, |
|
"learning_rate": 2.9429595503388953e-05, |
|
"loss": 1.4477, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0038401117123407227, |
|
"grad_norm": 1.4953877925872803, |
|
"learning_rate": 3.064776548439465e-05, |
|
"loss": 1.4012, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0041892127770989706, |
|
"grad_norm": 0.8356307148933411, |
|
"learning_rate": 3.1759867546024865e-05, |
|
"loss": 1.3855, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.004538313841857218, |
|
"grad_norm": 0.7591987252235413, |
|
"learning_rate": 3.2782902272079295e-05, |
|
"loss": 1.3561, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0048874149066154655, |
|
"grad_norm": 0.9811077117919922, |
|
"learning_rate": 3.373008448510428e-05, |
|
"loss": 1.3175, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.005236515971373713, |
|
"grad_norm": 0.8403587341308594, |
|
"learning_rate": 3.4611890029080124e-05, |
|
"loss": 1.341, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0055856170361319605, |
|
"grad_norm": 0.750234067440033, |
|
"learning_rate": 3.543676402711159e-05, |
|
"loss": 1.4247, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.005934718100890208, |
|
"grad_norm": 0.7567417621612549, |
|
"learning_rate": 3.621161404374383e-05, |
|
"loss": 1.416, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.006283819165648455, |
|
"grad_norm": 0.7126427292823792, |
|
"learning_rate": 3.694216207171603e-05, |
|
"loss": 1.4426, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.006632920230406703, |
|
"grad_norm": 0.7808831930160522, |
|
"learning_rate": 3.76332012245438e-05, |
|
"loss": 1.4287, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.00698202129516495, |
|
"grad_norm": 0.6165328025817871, |
|
"learning_rate": 3.8288786510166846e-05, |
|
"loss": 1.3391, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007331122359923198, |
|
"grad_norm": 0.7212307453155518, |
|
"learning_rate": 3.8912379010795455e-05, |
|
"loss": 1.3375, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.007680223424681445, |
|
"grad_norm": 0.6797880530357361, |
|
"learning_rate": 3.9506956491172545e-05, |
|
"loss": 1.2713, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.008029324489439693, |
|
"grad_norm": 0.7757507562637329, |
|
"learning_rate": 4.007509939970292e-05, |
|
"loss": 1.3599, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.008378425554197941, |
|
"grad_norm": 0.539090096950531, |
|
"learning_rate": 4.061905855280276e-05, |
|
"loss": 1.5154, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.008727526618956188, |
|
"grad_norm": 0.652180552482605, |
|
"learning_rate": 4.1140808993222106e-05, |
|
"loss": 1.3438, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.009076627683714436, |
|
"grad_norm": 0.7319611310958862, |
|
"learning_rate": 4.164209327885719e-05, |
|
"loss": 1.5033, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.009425728748472683, |
|
"grad_norm": 0.702570378780365, |
|
"learning_rate": 4.2124456597407214e-05, |
|
"loss": 1.2238, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.009774829813230931, |
|
"grad_norm": 0.6835883855819702, |
|
"learning_rate": 4.258927549188218e-05, |
|
"loss": 1.3648, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.010123930877989178, |
|
"grad_norm": 0.6773353219032288, |
|
"learning_rate": 4.303778154313212e-05, |
|
"loss": 1.3074, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.010473031942747426, |
|
"grad_norm": 0.6387542486190796, |
|
"learning_rate": 4.347108103585803e-05, |
|
"loss": 1.2265, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.010822133007505673, |
|
"grad_norm": 0.6249099969863892, |
|
"learning_rate": 4.389017139879164e-05, |
|
"loss": 1.3321, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.011171234072263921, |
|
"grad_norm": 0.7121676802635193, |
|
"learning_rate": 4.429595503388948e-05, |
|
"loss": 1.3729, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.011520335137022168, |
|
"grad_norm": 0.7367205619812012, |
|
"learning_rate": 4.468925101686371e-05, |
|
"loss": 1.3937, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.011869436201780416, |
|
"grad_norm": 0.6183043718338013, |
|
"learning_rate": 4.507080505052173e-05, |
|
"loss": 1.4321, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.012218537266538662, |
|
"grad_norm": 1.1439142227172852, |
|
"learning_rate": 4.544129797493744e-05, |
|
"loss": 1.3515, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.01256763833129691, |
|
"grad_norm": 0.7980801463127136, |
|
"learning_rate": 4.5801353078493936e-05, |
|
"loss": 1.3929, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.012916739396055157, |
|
"grad_norm": 0.8890343904495239, |
|
"learning_rate": 4.615154240700883e-05, |
|
"loss": 1.2895, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.013265840460813406, |
|
"grad_norm": 0.7107703685760498, |
|
"learning_rate": 4.6492392231321696e-05, |
|
"loss": 1.3054, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.013614941525571652, |
|
"grad_norm": 0.605403482913971, |
|
"learning_rate": 4.682438780454837e-05, |
|
"loss": 1.3817, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0139640425903299, |
|
"grad_norm": 0.6489142775535583, |
|
"learning_rate": 4.714797751694474e-05, |
|
"loss": 1.4109, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.014313143655088147, |
|
"grad_norm": 0.5896831750869751, |
|
"learning_rate": 4.7463576537657414e-05, |
|
"loss": 1.3383, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.014662244719846396, |
|
"grad_norm": 0.8319935202598572, |
|
"learning_rate": 4.777157001757336e-05, |
|
"loss": 1.4239, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.015011345784604642, |
|
"grad_norm": 0.6128418445587158, |
|
"learning_rate": 4.8072315915252694e-05, |
|
"loss": 1.3541, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.01536044684936289, |
|
"grad_norm": 0.6820589900016785, |
|
"learning_rate": 4.8366147497950435e-05, |
|
"loss": 1.2663, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.015709547914121137, |
|
"grad_norm": 0.8375743627548218, |
|
"learning_rate": 4.8653375561549195e-05, |
|
"loss": 1.3803, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.016058648978879386, |
|
"grad_norm": 0.6585806608200073, |
|
"learning_rate": 4.8934290406480814e-05, |
|
"loss": 1.3143, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.016407750043637634, |
|
"grad_norm": 0.7528412342071533, |
|
"learning_rate": 4.920916360113129e-05, |
|
"loss": 1.293, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.016756851108395882, |
|
"grad_norm": 0.6918306946754456, |
|
"learning_rate": 4.947824955958066e-05, |
|
"loss": 1.4991, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.017105952173154127, |
|
"grad_norm": 0.6764557361602783, |
|
"learning_rate": 4.9741786956652774e-05, |
|
"loss": 1.2755, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.017455053237912375, |
|
"grad_norm": 0.6525936722755432, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3897, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.017804154302670624, |
|
"grad_norm": 0.627804160118103, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3027, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.018153255367428872, |
|
"grad_norm": 0.8060218095779419, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3477, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.018502356432187117, |
|
"grad_norm": 0.6655098795890808, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3631, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.018851457496945365, |
|
"grad_norm": 0.7165637016296387, |
|
"learning_rate": 5e-05, |
|
"loss": 1.347, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.019200558561703614, |
|
"grad_norm": 0.6562020778656006, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3535, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.019549659626461862, |
|
"grad_norm": 0.7588657736778259, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3291, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.019898760691220107, |
|
"grad_norm": 0.6295105814933777, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3542, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.020247861755978355, |
|
"grad_norm": 1.339097023010254, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3649, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.020596962820736604, |
|
"grad_norm": 0.6976660490036011, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2852, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.020946063885494852, |
|
"grad_norm": 0.7590420246124268, |
|
"learning_rate": 5e-05, |
|
"loss": 1.354, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.021295164950253097, |
|
"grad_norm": 0.6279817819595337, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2537, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.021644266015011345, |
|
"grad_norm": 0.6099221110343933, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2423, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.021993367079769593, |
|
"grad_norm": 0.6252647638320923, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3667, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.022342468144527842, |
|
"grad_norm": 0.8939846158027649, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2889, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.022691569209286087, |
|
"grad_norm": 0.85840904712677, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3747, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.023040670274044335, |
|
"grad_norm": 0.8478113412857056, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3417, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.023389771338802583, |
|
"grad_norm": 0.6869573593139648, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4033, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.02373887240356083, |
|
"grad_norm": 0.6566379070281982, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3617, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.02408797346831908, |
|
"grad_norm": 0.6871697306632996, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2932, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.024437074533077325, |
|
"grad_norm": 0.7102701663970947, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4062, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.024786175597835573, |
|
"grad_norm": 0.8392966985702515, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1992, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.02513527666259382, |
|
"grad_norm": 0.670971155166626, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4131, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.02548437772735207, |
|
"grad_norm": 0.7271628975868225, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2928, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.025833478792110315, |
|
"grad_norm": 0.7184221744537354, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2239, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.026182579856868563, |
|
"grad_norm": 0.5685485005378723, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2692, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.02653168092162681, |
|
"grad_norm": 0.5677881836891174, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2951, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.02688078198638506, |
|
"grad_norm": 0.6896436810493469, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3297, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.027229883051143305, |
|
"grad_norm": 0.6284964084625244, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2402, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.027578984115901553, |
|
"grad_norm": 0.618015468120575, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2999, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.0279280851806598, |
|
"grad_norm": 0.7585094571113586, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3378, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02827718624541805, |
|
"grad_norm": 0.6674929857254028, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3585, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.028626287310176295, |
|
"grad_norm": 0.583121120929718, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3236, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.028975388374934543, |
|
"grad_norm": 0.661668062210083, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3264, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.02932448943969279, |
|
"grad_norm": 0.8168457746505737, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3132, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.02967359050445104, |
|
"grad_norm": 0.6123843193054199, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3224, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.030022691569209285, |
|
"grad_norm": 0.7081793546676636, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3641, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.030371792633967533, |
|
"grad_norm": 0.7772612571716309, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3634, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.03072089369872578, |
|
"grad_norm": 0.603370726108551, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4486, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.03106999476348403, |
|
"grad_norm": 0.6567598581314087, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4228, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.031419095828242274, |
|
"grad_norm": 0.6245101690292358, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2928, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.031768196893000526, |
|
"grad_norm": 0.7198782563209534, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3304, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.03211729795775877, |
|
"grad_norm": 0.526452898979187, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3418, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.032466399022517016, |
|
"grad_norm": 0.7534317374229431, |
|
"learning_rate": 5e-05, |
|
"loss": 1.333, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.03281550008727527, |
|
"grad_norm": 0.5721869468688965, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1849, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.03316460115203351, |
|
"grad_norm": 0.6943261027336121, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3263, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.033513702216791764, |
|
"grad_norm": 0.5904171466827393, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3103, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.03386280328155001, |
|
"grad_norm": 0.7743117809295654, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3633, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.034211904346308254, |
|
"grad_norm": 1.298839807510376, |
|
"learning_rate": 5e-05, |
|
"loss": 1.335, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.034561005411066506, |
|
"grad_norm": 0.7134571671485901, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4154, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.03491010647582475, |
|
"grad_norm": 0.6801385879516602, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3412, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03491010647582475, |
|
"eval_loss": 1.337953805923462, |
|
"eval_runtime": 3305.6905, |
|
"eval_samples_per_second": 6.932, |
|
"eval_steps_per_second": 0.867, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.035259207540582996, |
|
"grad_norm": 1.0192288160324097, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2821, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.03560830860534125, |
|
"grad_norm": 0.6322550773620605, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3561, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.03595740967009949, |
|
"grad_norm": 0.6499407291412354, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3164, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.036306510734857744, |
|
"grad_norm": 0.7576645612716675, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2924, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.03665561179961599, |
|
"grad_norm": 0.6215568780899048, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2551, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.037004712864374234, |
|
"grad_norm": 0.6197790503501892, |
|
"learning_rate": 5e-05, |
|
"loss": 1.317, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.037353813929132486, |
|
"grad_norm": 0.677772045135498, |
|
"learning_rate": 5e-05, |
|
"loss": 1.428, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.03770291499389073, |
|
"grad_norm": 0.6386198401451111, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4206, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.038052016058648976, |
|
"grad_norm": 1.113053798675537, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3992, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.03840111712340723, |
|
"grad_norm": 0.668409526348114, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3358, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03875021818816547, |
|
"grad_norm": 0.6381022930145264, |
|
"learning_rate": 5e-05, |
|
"loss": 1.245, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.039099319252923724, |
|
"grad_norm": 0.7082274556159973, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3107, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.03944842031768197, |
|
"grad_norm": 0.6497403979301453, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3174, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.039797521382440214, |
|
"grad_norm": 0.7390655279159546, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2791, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.040146622447198466, |
|
"grad_norm": 0.6828505992889404, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3903, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.04049572351195671, |
|
"grad_norm": 0.6913119554519653, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3147, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.04084482457671496, |
|
"grad_norm": 0.6394439339637756, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3308, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.04119392564147321, |
|
"grad_norm": 0.6368663907051086, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3021, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.04154302670623145, |
|
"grad_norm": 0.625417947769165, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4122, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.041892127770989704, |
|
"grad_norm": 0.5640509724617004, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3216, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.04224122883574795, |
|
"grad_norm": 0.6355682611465454, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2522, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.042590329900506194, |
|
"grad_norm": 2.130183696746826, |
|
"learning_rate": 5e-05, |
|
"loss": 1.398, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.042939430965264445, |
|
"grad_norm": 0.7858290672302246, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3543, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.04328853203002269, |
|
"grad_norm": 0.6912608742713928, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3338, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.04363763309478094, |
|
"grad_norm": 0.6326834559440613, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2968, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.04398673415953919, |
|
"grad_norm": 0.6076151728630066, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2705, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.04433583522429743, |
|
"grad_norm": 0.767652153968811, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3601, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.044684936289055684, |
|
"grad_norm": 0.621769905090332, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2834, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.04503403735381393, |
|
"grad_norm": 0.6216384768486023, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3322, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.04538313841857217, |
|
"grad_norm": 0.626325249671936, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4601, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.045732239483330425, |
|
"grad_norm": 0.8063498735427856, |
|
"learning_rate": 5e-05, |
|
"loss": 1.293, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.04608134054808867, |
|
"grad_norm": 1.117038369178772, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3635, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.04643044161284692, |
|
"grad_norm": 1.4540647268295288, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3346, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.04677954267760517, |
|
"grad_norm": 0.6695774793624878, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4109, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.04712864374236341, |
|
"grad_norm": 0.8146533370018005, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3515, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.04747774480712166, |
|
"grad_norm": 0.6705998778343201, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2752, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.04782684587187991, |
|
"grad_norm": 0.7589219808578491, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4393, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.04817594693663816, |
|
"grad_norm": 0.9603825807571411, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4609, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.048525048001396405, |
|
"grad_norm": 0.6351510286331177, |
|
"learning_rate": 5e-05, |
|
"loss": 1.371, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.04887414906615465, |
|
"grad_norm": 0.5652881860733032, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2845, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.0492232501309129, |
|
"grad_norm": 0.7579118609428406, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2526, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.04957235119567115, |
|
"grad_norm": 0.7851598262786865, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3379, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.04992145226042939, |
|
"grad_norm": 0.5865357518196106, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4802, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.05027055332518764, |
|
"grad_norm": 1.3862611055374146, |
|
"learning_rate": 5e-05, |
|
"loss": 1.357, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.05061965438994589, |
|
"grad_norm": 0.6249399185180664, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2587, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.05096875545470414, |
|
"grad_norm": 0.5966644883155823, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3534, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.051317856519462385, |
|
"grad_norm": 0.6312971711158752, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1815, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.05166695758422063, |
|
"grad_norm": 0.6539703011512756, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3946, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.05201605864897888, |
|
"grad_norm": 0.8756076097488403, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2384, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.052365159713737126, |
|
"grad_norm": 0.7149311304092407, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2998, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.05271426077849537, |
|
"grad_norm": 0.79525226354599, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3376, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.05306336184325362, |
|
"grad_norm": 0.6921191811561584, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3461, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.05341246290801187, |
|
"grad_norm": 0.7444896697998047, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4089, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.05376156397277012, |
|
"grad_norm": 0.6216670274734497, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3402, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.054110665037528365, |
|
"grad_norm": 0.5917710661888123, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3253, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.05445976610228661, |
|
"grad_norm": 0.8648408055305481, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4447, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.05480886716704486, |
|
"grad_norm": 0.6752570271492004, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3097, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.055157968231803106, |
|
"grad_norm": 0.5603750944137573, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4177, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.05550706929656136, |
|
"grad_norm": 0.6317929029464722, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3509, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.0558561703613196, |
|
"grad_norm": 0.6017687320709229, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3471, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05620527142607785, |
|
"grad_norm": 0.6761009693145752, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4473, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.0565543724908361, |
|
"grad_norm": 0.7266319990158081, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2896, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.056903473555594344, |
|
"grad_norm": 0.6436321139335632, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2812, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.05725257462035259, |
|
"grad_norm": 0.9664864540100098, |
|
"learning_rate": 5e-05, |
|
"loss": 1.294, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.05760167568511084, |
|
"grad_norm": 0.6690096855163574, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2801, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.057950776749869086, |
|
"grad_norm": 0.6227753162384033, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3384, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.05829987781462734, |
|
"grad_norm": 0.7900117039680481, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3424, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.05864897887938558, |
|
"grad_norm": 0.6928064823150635, |
|
"learning_rate": 5e-05, |
|
"loss": 1.296, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.05899807994414383, |
|
"grad_norm": 0.8754634261131287, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4471, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.05934718100890208, |
|
"grad_norm": 0.5537067651748657, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2825, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.059696282073660324, |
|
"grad_norm": 0.6705783009529114, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3768, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.06004538313841857, |
|
"grad_norm": 0.5732744932174683, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3309, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.06039448420317682, |
|
"grad_norm": 1.120721459388733, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3702, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.060743585267935066, |
|
"grad_norm": 0.7755718231201172, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3425, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.06109268633269332, |
|
"grad_norm": 0.5984740257263184, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4886, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.06144178739745156, |
|
"grad_norm": 0.7374542951583862, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3667, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.06179088846220981, |
|
"grad_norm": 0.5558515787124634, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3737, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.06213998952696806, |
|
"grad_norm": 0.700268566608429, |
|
"learning_rate": 5e-05, |
|
"loss": 1.364, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.062489090591726304, |
|
"grad_norm": 0.5781232118606567, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3443, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.06283819165648455, |
|
"grad_norm": 0.7157448530197144, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3702, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.0631872927212428, |
|
"grad_norm": 0.5329631567001343, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1786, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.06353639378600105, |
|
"grad_norm": 0.5949011445045471, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3809, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.0638854948507593, |
|
"grad_norm": 0.6756107807159424, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2792, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.06423459591551754, |
|
"grad_norm": 0.7747790813446045, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3714, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.06458369698027579, |
|
"grad_norm": 1.1907461881637573, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3055, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.06493279804503403, |
|
"grad_norm": 0.5747818946838379, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2003, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.06528189910979229, |
|
"grad_norm": 0.614464521408081, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3108, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.06563100017455054, |
|
"grad_norm": 0.6040724515914917, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2371, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.06598010123930878, |
|
"grad_norm": 0.6369174122810364, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1662, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.06632920230406703, |
|
"grad_norm": 0.6132228374481201, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3257, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.06667830336882527, |
|
"grad_norm": 0.6686124801635742, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3757, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.06702740443358353, |
|
"grad_norm": 0.6709855794906616, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3341, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.06737650549834177, |
|
"grad_norm": 0.5295905470848083, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2587, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.06772560656310002, |
|
"grad_norm": 0.6111523509025574, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3365, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.06807470762785826, |
|
"grad_norm": 0.5655878782272339, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3265, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.06842380869261651, |
|
"grad_norm": 0.6125257015228271, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3475, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.06877290975737475, |
|
"grad_norm": 0.6268573999404907, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3002, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.06912201082213301, |
|
"grad_norm": 0.7267619967460632, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4104, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.06947111188689126, |
|
"grad_norm": 0.5741710066795349, |
|
"learning_rate": 5e-05, |
|
"loss": 1.318, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.0698202129516495, |
|
"grad_norm": 0.6447280049324036, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3477, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0698202129516495, |
|
"eval_loss": 1.3300124406814575, |
|
"eval_runtime": 3301.7334, |
|
"eval_samples_per_second": 6.941, |
|
"eval_steps_per_second": 0.868, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.07016931401640775, |
|
"grad_norm": 1.4164685010910034, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4048, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.07051841508116599, |
|
"grad_norm": 0.5867809057235718, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4018, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.07086751614592425, |
|
"grad_norm": 0.6882596611976624, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2737, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.0712166172106825, |
|
"grad_norm": 0.6038634181022644, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2399, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.07156571827544074, |
|
"grad_norm": 0.6428863406181335, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3729, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.07191481934019898, |
|
"grad_norm": 0.7008076906204224, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3353, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.07226392040495723, |
|
"grad_norm": 0.6662419438362122, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3442, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.07261302146971549, |
|
"grad_norm": 0.7249788045883179, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2526, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.07296212253447373, |
|
"grad_norm": 0.6323925852775574, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2929, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.07331122359923198, |
|
"grad_norm": 0.8273724317550659, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5291, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.07366032466399022, |
|
"grad_norm": 0.8445104956626892, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2417, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.07400942572874847, |
|
"grad_norm": 0.6157236695289612, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3739, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.07435852679350673, |
|
"grad_norm": 0.6917769312858582, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3078, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.07470762785826497, |
|
"grad_norm": 0.7838917970657349, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3086, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.07505672892302322, |
|
"grad_norm": 0.6962039470672607, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3907, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.07540582998778146, |
|
"grad_norm": 0.6962039470672607, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3615, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.0757549310525397, |
|
"grad_norm": 0.6687365770339966, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3408, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.07610403211729795, |
|
"grad_norm": 0.5566404461860657, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2872, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.07645313318205621, |
|
"grad_norm": 0.6419705748558044, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2883, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.07680223424681445, |
|
"grad_norm": 0.7758398652076721, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3832, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.0771513353115727, |
|
"grad_norm": 0.9763804078102112, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3414, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.07750043637633094, |
|
"grad_norm": 0.8815904259681702, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3297, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.07784953744108919, |
|
"grad_norm": 0.590263307094574, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3401, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.07819863850584745, |
|
"grad_norm": 0.677057147026062, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2449, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.07854773957060569, |
|
"grad_norm": 1.5185271501541138, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3127, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.07889684063536394, |
|
"grad_norm": 0.5751495957374573, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1587, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.07924594170012218, |
|
"grad_norm": 0.8122138977050781, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2316, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.07959504276488043, |
|
"grad_norm": 0.6675130724906921, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3539, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.07994414382963869, |
|
"grad_norm": 0.8163532614707947, |
|
"learning_rate": 5e-05, |
|
"loss": 1.328, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.08029324489439693, |
|
"grad_norm": 0.8377723693847656, |
|
"learning_rate": 5e-05, |
|
"loss": 1.353, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.08064234595915518, |
|
"grad_norm": 0.7325611710548401, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3396, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.08099144702391342, |
|
"grad_norm": 0.8941824436187744, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2906, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.08134054808867167, |
|
"grad_norm": 0.6284440159797668, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4264, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.08168964915342992, |
|
"grad_norm": 0.689984917640686, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3696, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.08203875021818817, |
|
"grad_norm": 0.5813177227973938, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2931, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.08238785128294641, |
|
"grad_norm": 0.5287997126579285, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3264, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.08273695234770466, |
|
"grad_norm": 0.7944268584251404, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2708, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.0830860534124629, |
|
"grad_norm": 0.534864068031311, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2535, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.08343515447722115, |
|
"grad_norm": 0.6260988712310791, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2757, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.08378425554197941, |
|
"grad_norm": 0.579078197479248, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2906, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.08413335660673765, |
|
"grad_norm": 0.5578561425209045, |
|
"learning_rate": 5e-05, |
|
"loss": 1.289, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.0844824576714959, |
|
"grad_norm": 0.626961350440979, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2807, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.08483155873625414, |
|
"grad_norm": 0.782669186592102, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3933, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.08518065980101239, |
|
"grad_norm": 0.6670363545417786, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2732, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.08552976086577065, |
|
"grad_norm": 0.7201350331306458, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2962, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.08587886193052889, |
|
"grad_norm": 0.6021212339401245, |
|
"learning_rate": 5e-05, |
|
"loss": 1.35, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.08622796299528714, |
|
"grad_norm": 0.8081540465354919, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3568, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.08657706406004538, |
|
"grad_norm": 0.5358250737190247, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4603, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.08692616512480363, |
|
"grad_norm": 0.6927733421325684, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2506, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.08727526618956188, |
|
"grad_norm": 0.6187159419059753, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3497, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.08762436725432013, |
|
"grad_norm": 0.6304159760475159, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3087, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.08797346831907837, |
|
"grad_norm": 0.6446660161018372, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3424, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.08832256938383662, |
|
"grad_norm": 0.6535473465919495, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3471, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.08867167044859486, |
|
"grad_norm": 0.601290225982666, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3557, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.08902077151335312, |
|
"grad_norm": 0.641854465007782, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3138, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.08936987257811137, |
|
"grad_norm": 0.5452507138252258, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2898, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.08971897364286961, |
|
"grad_norm": 0.5870373249053955, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2953, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.09006807470762786, |
|
"grad_norm": 0.5798627734184265, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2973, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.0904171757723861, |
|
"grad_norm": 0.5798627734184265, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3628, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.09076627683714435, |
|
"grad_norm": 0.7382280230522156, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3111, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.0911153779019026, |
|
"grad_norm": 0.6882988810539246, |
|
"learning_rate": 5e-05, |
|
"loss": 1.329, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.09146447896666085, |
|
"grad_norm": 0.6590788960456848, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3089, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.0918135800314191, |
|
"grad_norm": 0.682006299495697, |
|
"learning_rate": 5e-05, |
|
"loss": 1.344, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.09216268109617734, |
|
"grad_norm": 0.6040222644805908, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3919, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.09251178216093559, |
|
"grad_norm": 0.5964936017990112, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3397, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.09286088322569384, |
|
"grad_norm": 0.5645217299461365, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3488, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.09320998429045209, |
|
"grad_norm": 0.7771989703178406, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3485, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.09355908535521033, |
|
"grad_norm": 0.6003885865211487, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3109, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.09390818641996858, |
|
"grad_norm": 0.5627903938293457, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2906, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.09425728748472682, |
|
"grad_norm": 0.6381875276565552, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3063, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.09460638854948508, |
|
"grad_norm": 1.2558772563934326, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2985, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.09495548961424333, |
|
"grad_norm": 0.6977007389068604, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4955, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.09530459067900157, |
|
"grad_norm": 0.7846536040306091, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4439, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.09565369174375982, |
|
"grad_norm": 0.7036994695663452, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1942, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.09600279280851806, |
|
"grad_norm": 0.6119917631149292, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3607, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.09635189387327632, |
|
"grad_norm": 0.6243535280227661, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3029, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.09670099493803457, |
|
"grad_norm": 0.5424296855926514, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2995, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.09705009600279281, |
|
"grad_norm": 0.7677564024925232, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2686, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.09739919706755105, |
|
"grad_norm": 0.625275194644928, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2897, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.0977482981323093, |
|
"grad_norm": 0.5734910368919373, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3298, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.09809739919706754, |
|
"grad_norm": 0.660658061504364, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2643, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.0984465002618258, |
|
"grad_norm": 0.679891049861908, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3189, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.09879560132658405, |
|
"grad_norm": 0.6248694658279419, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1688, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.0991447023913423, |
|
"grad_norm": 0.6428897380828857, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3274, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.09949380345610054, |
|
"grad_norm": 0.586065411567688, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3852, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.09984290452085878, |
|
"grad_norm": 0.5755594372749329, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3665, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.10019200558561704, |
|
"grad_norm": 0.7748963236808777, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4551, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.10054110665037529, |
|
"grad_norm": 0.6308531165122986, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2793, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.10089020771513353, |
|
"grad_norm": 0.6195006966590881, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3649, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.10123930877989178, |
|
"grad_norm": 0.6098636984825134, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2956, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.10158840984465002, |
|
"grad_norm": 0.8072320818901062, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3469, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.10193751090940828, |
|
"grad_norm": 0.6090126633644104, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2958, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.10228661197416652, |
|
"grad_norm": 0.5718780159950256, |
|
"learning_rate": 5e-05, |
|
"loss": 1.363, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.10263571303892477, |
|
"grad_norm": 0.7197532653808594, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3868, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.10298481410368301, |
|
"grad_norm": 0.5578592419624329, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2627, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.10333391516844126, |
|
"grad_norm": 0.730226457118988, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3182, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.10368301623319952, |
|
"grad_norm": 0.6234796047210693, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1777, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.10403211729795776, |
|
"grad_norm": 0.5563578009605408, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3275, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.10438121836271601, |
|
"grad_norm": 0.6864249110221863, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2813, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.10473031942747425, |
|
"grad_norm": 0.8850319385528564, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3057, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.10473031942747425, |
|
"eval_loss": 1.3255380392074585, |
|
"eval_runtime": 3311.4237, |
|
"eval_samples_per_second": 6.92, |
|
"eval_steps_per_second": 0.865, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1050794204922325, |
|
"grad_norm": 0.9439303278923035, |
|
"learning_rate": 5e-05, |
|
"loss": 1.281, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.10542852155699074, |
|
"grad_norm": 0.6651242971420288, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3492, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.105777622621749, |
|
"grad_norm": 0.9047183394432068, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4246, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.10612672368650725, |
|
"grad_norm": 0.6983138918876648, |
|
"learning_rate": 5e-05, |
|
"loss": 1.324, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.10647582475126549, |
|
"grad_norm": 0.6347063779830933, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3389, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.10682492581602374, |
|
"grad_norm": 0.6051842570304871, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3278, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.10717402688078198, |
|
"grad_norm": 0.9355935454368591, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2663, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.10752312794554024, |
|
"grad_norm": 1.0706268548965454, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3142, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.10787222901029848, |
|
"grad_norm": 0.8131638765335083, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3445, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.10822133007505673, |
|
"grad_norm": 0.5791985392570496, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2746, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.10857043113981497, |
|
"grad_norm": 0.5536484718322754, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2613, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.10891953220457322, |
|
"grad_norm": 0.7847089767456055, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4607, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.10926863326933148, |
|
"grad_norm": 0.7828165888786316, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4399, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.10961773433408972, |
|
"grad_norm": 0.5692522525787354, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3044, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.10996683539884797, |
|
"grad_norm": 0.5592648386955261, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3211, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.11031593646360621, |
|
"grad_norm": 0.7055444717407227, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2944, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.11066503752836446, |
|
"grad_norm": 0.5370152592658997, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2776, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.11101413859312272, |
|
"grad_norm": 0.6320214867591858, |
|
"learning_rate": 5e-05, |
|
"loss": 1.347, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.11136323965788096, |
|
"grad_norm": 0.6425771713256836, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5038, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.1117123407226392, |
|
"grad_norm": 0.585542619228363, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3573, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.11206144178739745, |
|
"grad_norm": 0.5627699494361877, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2693, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.1124105428521557, |
|
"grad_norm": 0.6050506830215454, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2787, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.11275964391691394, |
|
"grad_norm": 0.6247337460517883, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4146, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.1131087449816722, |
|
"grad_norm": 0.7732966542243958, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2626, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.11345784604643044, |
|
"grad_norm": 0.5666255354881287, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4219, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.11380694711118869, |
|
"grad_norm": 0.5973132848739624, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3522, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.11415604817594693, |
|
"grad_norm": 0.8540626764297485, |
|
"learning_rate": 5e-05, |
|
"loss": 1.304, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.11450514924070518, |
|
"grad_norm": 0.574573278427124, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3487, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.11485425030546344, |
|
"grad_norm": 0.5949917435646057, |
|
"learning_rate": 5e-05, |
|
"loss": 1.254, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.11520335137022168, |
|
"grad_norm": 0.6005589365959167, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3073, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.11555245243497993, |
|
"grad_norm": 0.5026714205741882, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2418, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.11590155349973817, |
|
"grad_norm": 0.7160278558731079, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3437, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.11625065456449642, |
|
"grad_norm": 0.6049554347991943, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4858, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.11659975562925468, |
|
"grad_norm": 0.7706385254859924, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3971, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.11694885669401292, |
|
"grad_norm": 0.6254088282585144, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3359, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.11729795775877117, |
|
"grad_norm": 0.5904930830001831, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3262, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.11764705882352941, |
|
"grad_norm": 1.9982556104660034, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3656, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.11799615988828766, |
|
"grad_norm": 0.5776758790016174, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2654, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.1183452609530459, |
|
"grad_norm": 0.6094497442245483, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3505, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.11869436201780416, |
|
"grad_norm": 0.9940481185913086, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2853, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1190434630825624, |
|
"grad_norm": 1.1043668985366821, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2813, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.11939256414732065, |
|
"grad_norm": 0.5494128465652466, |
|
"learning_rate": 5e-05, |
|
"loss": 1.202, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.1197416652120789, |
|
"grad_norm": 0.6436132192611694, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2898, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.12009076627683714, |
|
"grad_norm": 0.6878450512886047, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3392, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.1204398673415954, |
|
"grad_norm": 0.5806905627250671, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2221, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.12078896840635364, |
|
"grad_norm": 0.5916112065315247, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2761, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.12113806947111189, |
|
"grad_norm": 0.5216647386550903, |
|
"learning_rate": 5e-05, |
|
"loss": 1.223, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.12148717053587013, |
|
"grad_norm": 0.707747220993042, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2933, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.12183627160062838, |
|
"grad_norm": 0.6644443273544312, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3367, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.12218537266538664, |
|
"grad_norm": 0.7112720012664795, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2368, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.12253447373014488, |
|
"grad_norm": 0.6551552414894104, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3348, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.12288357479490312, |
|
"grad_norm": 0.5377748012542725, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2859, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.12323267585966137, |
|
"grad_norm": 0.580769956111908, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2442, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.12358177692441961, |
|
"grad_norm": 0.6772916316986084, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2994, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.12393087798917787, |
|
"grad_norm": 0.6245989799499512, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2093, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.12427997905393612, |
|
"grad_norm": 0.6136452555656433, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2258, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.12462908011869436, |
|
"grad_norm": 0.5786277055740356, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2856, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.12497818118345261, |
|
"grad_norm": 0.5986611247062683, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4524, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.12532728224821085, |
|
"grad_norm": 0.6240454316139221, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3325, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.1256763833129691, |
|
"grad_norm": 0.6426084041595459, |
|
"learning_rate": 5e-05, |
|
"loss": 1.219, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.12602548437772734, |
|
"grad_norm": 0.6227401494979858, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3342, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.1263745854424856, |
|
"grad_norm": 0.7462456226348877, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3747, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.12672368650724386, |
|
"grad_norm": 0.7022641897201538, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2957, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.1270727875720021, |
|
"grad_norm": 0.657645046710968, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3125, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.12742188863676035, |
|
"grad_norm": 0.662497878074646, |
|
"learning_rate": 5e-05, |
|
"loss": 1.321, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.1277709897015186, |
|
"grad_norm": 0.6295817494392395, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3814, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.12812009076627684, |
|
"grad_norm": 0.7357390522956848, |
|
"learning_rate": 5e-05, |
|
"loss": 1.374, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.12846919183103508, |
|
"grad_norm": 0.6728739142417908, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1957, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.12881829289579333, |
|
"grad_norm": 0.6290231943130493, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2948, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.12916739396055157, |
|
"grad_norm": 1.0889554023742676, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3465, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.12951649502530982, |
|
"grad_norm": 0.6978388428688049, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2898, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.12986559609006806, |
|
"grad_norm": 1.0806949138641357, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2656, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.1302146971548263, |
|
"grad_norm": 0.5989696979522705, |
|
"learning_rate": 5e-05, |
|
"loss": 1.354, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.13056379821958458, |
|
"grad_norm": 0.5808868408203125, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2911, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.13091289928434283, |
|
"grad_norm": 0.6175510883331299, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3392, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.13126200034910107, |
|
"grad_norm": 0.7896063923835754, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3598, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.13161110141385932, |
|
"grad_norm": 0.6890353560447693, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2259, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.13196020247861756, |
|
"grad_norm": 0.7264868021011353, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3747, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.1323093035433758, |
|
"grad_norm": 0.5779114365577698, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2566, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.13265840460813405, |
|
"grad_norm": 0.6164990067481995, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3123, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1330075056728923, |
|
"grad_norm": 0.5990901589393616, |
|
"learning_rate": 5e-05, |
|
"loss": 1.399, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.13335660673765054, |
|
"grad_norm": 0.5799390077590942, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2697, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.13370570780240879, |
|
"grad_norm": 0.6446252465248108, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3321, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.13405480886716706, |
|
"grad_norm": 0.5626406669616699, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2867, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.1344039099319253, |
|
"grad_norm": 0.5967420935630798, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3514, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.13475301099668355, |
|
"grad_norm": 0.622344434261322, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2814, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.1351021120614418, |
|
"grad_norm": 0.5952975749969482, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3616, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.13545121312620004, |
|
"grad_norm": 1.6270025968551636, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3057, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.13580031419095828, |
|
"grad_norm": 0.6453176736831665, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2203, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.13614941525571653, |
|
"grad_norm": 0.6074663400650024, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2705, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.13649851632047477, |
|
"grad_norm": 0.5617640018463135, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2692, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.13684761738523302, |
|
"grad_norm": 0.5138052701950073, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2914, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.13719671844999126, |
|
"grad_norm": 0.6522411108016968, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3055, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.1375458195147495, |
|
"grad_norm": 0.6821246147155762, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2674, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.13789492057950778, |
|
"grad_norm": 0.6284828186035156, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2842, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.13824402164426602, |
|
"grad_norm": 0.6461937427520752, |
|
"learning_rate": 5e-05, |
|
"loss": 1.305, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.13859312270902427, |
|
"grad_norm": 0.8084800243377686, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3539, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.1389422237737825, |
|
"grad_norm": 0.5511135458946228, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2364, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.13929132483854076, |
|
"grad_norm": 0.6121107339859009, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3212, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.139640425903299, |
|
"grad_norm": 0.5705773234367371, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3116, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.139640425903299, |
|
"eval_loss": 1.322394609451294, |
|
"eval_runtime": 3311.45, |
|
"eval_samples_per_second": 6.92, |
|
"eval_steps_per_second": 0.865, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.139640425903299, |
|
"step": 400, |
|
"total_flos": 8.590417732871127e+17, |
|
"train_loss": 1.3312159395217895, |
|
"train_runtime": 17991.8527, |
|
"train_samples_per_second": 1.779, |
|
"train_steps_per_second": 0.056 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 8.590417732871127e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|