|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 5772, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005197505197505198, |
|
"grad_norm": 37.059080589232245, |
|
"learning_rate": 3.4602076124567476e-08, |
|
"loss": 2.0466, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002598752598752599, |
|
"grad_norm": 37.255614130391415, |
|
"learning_rate": 1.730103806228374e-07, |
|
"loss": 2.0444, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005197505197505198, |
|
"grad_norm": 38.706692454199526, |
|
"learning_rate": 3.460207612456748e-07, |
|
"loss": 2.0231, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007796257796257797, |
|
"grad_norm": 38.51739451223501, |
|
"learning_rate": 5.190311418685121e-07, |
|
"loss": 1.9458, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.010395010395010396, |
|
"grad_norm": 10.810954056905189, |
|
"learning_rate": 6.920415224913496e-07, |
|
"loss": 1.842, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.012993762993762994, |
|
"grad_norm": 5.592591424491592, |
|
"learning_rate": 8.650519031141868e-07, |
|
"loss": 1.736, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.015592515592515593, |
|
"grad_norm": 4.235861784422552, |
|
"learning_rate": 1.0380622837370243e-06, |
|
"loss": 1.6427, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.018191268191268192, |
|
"grad_norm": 3.005957983709911, |
|
"learning_rate": 1.2110726643598616e-06, |
|
"loss": 1.4661, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02079002079002079, |
|
"grad_norm": 2.1399033031159416, |
|
"learning_rate": 1.3840830449826992e-06, |
|
"loss": 1.3665, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02338877338877339, |
|
"grad_norm": 2.311859726684216, |
|
"learning_rate": 1.5570934256055365e-06, |
|
"loss": 1.2455, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02598752598752599, |
|
"grad_norm": 2.2333969372651588, |
|
"learning_rate": 1.7301038062283736e-06, |
|
"loss": 1.1655, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.028586278586278588, |
|
"grad_norm": 1.4673684609803286, |
|
"learning_rate": 1.9031141868512112e-06, |
|
"loss": 1.0897, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.031185031185031187, |
|
"grad_norm": 1.219673178403078, |
|
"learning_rate": 2.0761245674740485e-06, |
|
"loss": 1.0397, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.033783783783783786, |
|
"grad_norm": 1.177265012600064, |
|
"learning_rate": 2.249134948096886e-06, |
|
"loss": 0.9918, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.036382536382536385, |
|
"grad_norm": 1.1401627512076926, |
|
"learning_rate": 2.4221453287197232e-06, |
|
"loss": 0.9519, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03898128898128898, |
|
"grad_norm": 1.104171712944849, |
|
"learning_rate": 2.5951557093425604e-06, |
|
"loss": 0.9419, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04158004158004158, |
|
"grad_norm": 1.153098727216341, |
|
"learning_rate": 2.7681660899653983e-06, |
|
"loss": 0.9108, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04417879417879418, |
|
"grad_norm": 1.1902270145957274, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 0.8827, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.04677754677754678, |
|
"grad_norm": 1.246541694659634, |
|
"learning_rate": 3.114186851211073e-06, |
|
"loss": 0.8805, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04937629937629938, |
|
"grad_norm": 1.3692651531313507, |
|
"learning_rate": 3.28719723183391e-06, |
|
"loss": 0.872, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.05197505197505198, |
|
"grad_norm": 1.2906179501144006, |
|
"learning_rate": 3.4602076124567473e-06, |
|
"loss": 0.8603, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05457380457380458, |
|
"grad_norm": 1.1975871593644642, |
|
"learning_rate": 3.6332179930795853e-06, |
|
"loss": 0.8401, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.057172557172557176, |
|
"grad_norm": 1.0702701275027335, |
|
"learning_rate": 3.8062283737024224e-06, |
|
"loss": 0.8599, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.059771309771309775, |
|
"grad_norm": 1.2062361616415083, |
|
"learning_rate": 3.9792387543252595e-06, |
|
"loss": 0.8437, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.062370062370062374, |
|
"grad_norm": 1.0287364509894605, |
|
"learning_rate": 4.152249134948097e-06, |
|
"loss": 0.822, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06496881496881497, |
|
"grad_norm": 1.3847381271884296, |
|
"learning_rate": 4.325259515570935e-06, |
|
"loss": 0.8193, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.06756756756756757, |
|
"grad_norm": 1.2610205489076338, |
|
"learning_rate": 4.498269896193772e-06, |
|
"loss": 0.8202, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07016632016632017, |
|
"grad_norm": 1.2296632878962366, |
|
"learning_rate": 4.67128027681661e-06, |
|
"loss": 0.8231, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.07276507276507277, |
|
"grad_norm": 1.2968286734442396, |
|
"learning_rate": 4.8442906574394464e-06, |
|
"loss": 0.8196, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07536382536382537, |
|
"grad_norm": 1.1170405804017387, |
|
"learning_rate": 5.017301038062284e-06, |
|
"loss": 0.8174, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.07796257796257797, |
|
"grad_norm": 1.0601393778580994, |
|
"learning_rate": 5.190311418685121e-06, |
|
"loss": 0.8095, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08056133056133057, |
|
"grad_norm": 1.0897540737796731, |
|
"learning_rate": 5.363321799307959e-06, |
|
"loss": 0.7995, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.08316008316008316, |
|
"grad_norm": 1.2121810987520705, |
|
"learning_rate": 5.536332179930797e-06, |
|
"loss": 0.8207, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08575883575883576, |
|
"grad_norm": 1.1378189701749455, |
|
"learning_rate": 5.709342560553633e-06, |
|
"loss": 0.7946, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.08835758835758836, |
|
"grad_norm": 1.0503529917982035, |
|
"learning_rate": 5.882352941176471e-06, |
|
"loss": 0.8116, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09095634095634096, |
|
"grad_norm": 1.128045536223591, |
|
"learning_rate": 6.055363321799308e-06, |
|
"loss": 0.7943, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.09355509355509356, |
|
"grad_norm": 1.0062924807045572, |
|
"learning_rate": 6.228373702422146e-06, |
|
"loss": 0.7908, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09615384615384616, |
|
"grad_norm": 1.0645389567201315, |
|
"learning_rate": 6.401384083044984e-06, |
|
"loss": 0.7961, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.09875259875259876, |
|
"grad_norm": 1.1414748404258819, |
|
"learning_rate": 6.57439446366782e-06, |
|
"loss": 0.7847, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10135135135135136, |
|
"grad_norm": 1.205384007751443, |
|
"learning_rate": 6.747404844290658e-06, |
|
"loss": 0.7751, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.10395010395010396, |
|
"grad_norm": 1.1367398433720104, |
|
"learning_rate": 6.9204152249134946e-06, |
|
"loss": 0.7919, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10654885654885655, |
|
"grad_norm": 0.9307012296511041, |
|
"learning_rate": 7.093425605536333e-06, |
|
"loss": 0.7901, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.10914760914760915, |
|
"grad_norm": 1.0367934940766987, |
|
"learning_rate": 7.2664359861591705e-06, |
|
"loss": 0.7895, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11174636174636175, |
|
"grad_norm": 0.96847574603506, |
|
"learning_rate": 7.439446366782007e-06, |
|
"loss": 0.7883, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.11434511434511435, |
|
"grad_norm": 1.1618158896817028, |
|
"learning_rate": 7.612456747404845e-06, |
|
"loss": 0.7849, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.11694386694386695, |
|
"grad_norm": 2.5717464884960584, |
|
"learning_rate": 7.785467128027683e-06, |
|
"loss": 0.7826, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.11954261954261955, |
|
"grad_norm": 1.0453668836748238, |
|
"learning_rate": 7.958477508650519e-06, |
|
"loss": 0.7682, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12214137214137215, |
|
"grad_norm": 1.0156117659063706, |
|
"learning_rate": 8.131487889273357e-06, |
|
"loss": 0.7622, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.12474012474012475, |
|
"grad_norm": 0.9498681526378566, |
|
"learning_rate": 8.304498269896194e-06, |
|
"loss": 0.7861, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.12733887733887733, |
|
"grad_norm": 1.0843119677144408, |
|
"learning_rate": 8.477508650519032e-06, |
|
"loss": 0.7804, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.12993762993762994, |
|
"grad_norm": 1.1528568447769787, |
|
"learning_rate": 8.65051903114187e-06, |
|
"loss": 0.7639, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13253638253638253, |
|
"grad_norm": 1.1295719141693836, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 0.7816, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.13513513513513514, |
|
"grad_norm": 0.978783892464181, |
|
"learning_rate": 8.996539792387544e-06, |
|
"loss": 0.7672, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.13773388773388773, |
|
"grad_norm": 0.9379534418690467, |
|
"learning_rate": 9.16955017301038e-06, |
|
"loss": 0.7702, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.14033264033264034, |
|
"grad_norm": 1.1416793594082861, |
|
"learning_rate": 9.34256055363322e-06, |
|
"loss": 0.7738, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14293139293139293, |
|
"grad_norm": 0.9977084295945086, |
|
"learning_rate": 9.515570934256057e-06, |
|
"loss": 0.7696, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.14553014553014554, |
|
"grad_norm": 1.037149328356884, |
|
"learning_rate": 9.688581314878893e-06, |
|
"loss": 0.7674, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.14812889812889812, |
|
"grad_norm": 1.0165544577935077, |
|
"learning_rate": 9.86159169550173e-06, |
|
"loss": 0.7554, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.15072765072765074, |
|
"grad_norm": 0.9713706199068332, |
|
"learning_rate": 1.0034602076124568e-05, |
|
"loss": 0.7771, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15332640332640332, |
|
"grad_norm": 1.0519199834853972, |
|
"learning_rate": 1.0207612456747407e-05, |
|
"loss": 0.7652, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.15592515592515593, |
|
"grad_norm": 0.8942757233736588, |
|
"learning_rate": 1.0380622837370241e-05, |
|
"loss": 0.754, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15852390852390852, |
|
"grad_norm": 0.9760267256597028, |
|
"learning_rate": 1.055363321799308e-05, |
|
"loss": 0.7597, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.16112266112266113, |
|
"grad_norm": 0.8750773264970739, |
|
"learning_rate": 1.0726643598615918e-05, |
|
"loss": 0.7552, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.16372141372141372, |
|
"grad_norm": 0.9317612467807546, |
|
"learning_rate": 1.0899653979238756e-05, |
|
"loss": 0.7619, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.16632016632016633, |
|
"grad_norm": 0.9585051662580469, |
|
"learning_rate": 1.1072664359861593e-05, |
|
"loss": 0.7562, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.16891891891891891, |
|
"grad_norm": 0.9833066117827967, |
|
"learning_rate": 1.124567474048443e-05, |
|
"loss": 0.7668, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.17151767151767153, |
|
"grad_norm": 0.9999136205277245, |
|
"learning_rate": 1.1418685121107267e-05, |
|
"loss": 0.7563, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1741164241164241, |
|
"grad_norm": 1.063190427210389, |
|
"learning_rate": 1.1591695501730104e-05, |
|
"loss": 0.7463, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.17671517671517672, |
|
"grad_norm": 0.9998506539481437, |
|
"learning_rate": 1.1764705882352942e-05, |
|
"loss": 0.7533, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1793139293139293, |
|
"grad_norm": 0.929168293634566, |
|
"learning_rate": 1.1937716262975781e-05, |
|
"loss": 0.7466, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.18191268191268192, |
|
"grad_norm": 1.018693960607738, |
|
"learning_rate": 1.2110726643598615e-05, |
|
"loss": 0.7489, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1845114345114345, |
|
"grad_norm": 0.9657161121572101, |
|
"learning_rate": 1.2283737024221455e-05, |
|
"loss": 0.7639, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.18711018711018712, |
|
"grad_norm": 0.9274247630285816, |
|
"learning_rate": 1.2456747404844292e-05, |
|
"loss": 0.7458, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1897089397089397, |
|
"grad_norm": 0.8709049483455183, |
|
"learning_rate": 1.262975778546713e-05, |
|
"loss": 0.7542, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.19230769230769232, |
|
"grad_norm": 0.8529475456705145, |
|
"learning_rate": 1.2802768166089967e-05, |
|
"loss": 0.7615, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1949064449064449, |
|
"grad_norm": 0.8834877993689659, |
|
"learning_rate": 1.2975778546712803e-05, |
|
"loss": 0.7555, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.19750519750519752, |
|
"grad_norm": 0.8612036241498346, |
|
"learning_rate": 1.314878892733564e-05, |
|
"loss": 0.7455, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2001039501039501, |
|
"grad_norm": 0.972005034702574, |
|
"learning_rate": 1.3321799307958478e-05, |
|
"loss": 0.7335, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.20270270270270271, |
|
"grad_norm": 0.8405468852505008, |
|
"learning_rate": 1.3494809688581316e-05, |
|
"loss": 0.7454, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2053014553014553, |
|
"grad_norm": 0.94483984497754, |
|
"learning_rate": 1.3667820069204153e-05, |
|
"loss": 0.7509, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.2079002079002079, |
|
"grad_norm": 0.8568496193733218, |
|
"learning_rate": 1.3840830449826989e-05, |
|
"loss": 0.7386, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2104989604989605, |
|
"grad_norm": 0.9305490201344858, |
|
"learning_rate": 1.4013840830449827e-05, |
|
"loss": 0.7325, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.2130977130977131, |
|
"grad_norm": 0.8391743588987977, |
|
"learning_rate": 1.4186851211072666e-05, |
|
"loss": 0.7394, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.2156964656964657, |
|
"grad_norm": 0.8904148072363134, |
|
"learning_rate": 1.4359861591695503e-05, |
|
"loss": 0.7659, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.2182952182952183, |
|
"grad_norm": 0.9494764558208273, |
|
"learning_rate": 1.4532871972318341e-05, |
|
"loss": 0.7303, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2208939708939709, |
|
"grad_norm": 0.8729324500601073, |
|
"learning_rate": 1.4705882352941179e-05, |
|
"loss": 0.7464, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.2234927234927235, |
|
"grad_norm": 0.9426426724996545, |
|
"learning_rate": 1.4878892733564014e-05, |
|
"loss": 0.7425, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2260914760914761, |
|
"grad_norm": 0.8397393999023687, |
|
"learning_rate": 1.5051903114186852e-05, |
|
"loss": 0.7225, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.2286902286902287, |
|
"grad_norm": 0.822001337030522, |
|
"learning_rate": 1.522491349480969e-05, |
|
"loss": 0.7514, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2312889812889813, |
|
"grad_norm": 0.8196560129735319, |
|
"learning_rate": 1.539792387543253e-05, |
|
"loss": 0.7455, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.2338877338877339, |
|
"grad_norm": 0.9001216187487245, |
|
"learning_rate": 1.5570934256055366e-05, |
|
"loss": 0.7523, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.23648648648648649, |
|
"grad_norm": 0.9230142554852074, |
|
"learning_rate": 1.57439446366782e-05, |
|
"loss": 0.7569, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.2390852390852391, |
|
"grad_norm": 0.8290174186484409, |
|
"learning_rate": 1.5916955017301038e-05, |
|
"loss": 0.7428, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.24168399168399168, |
|
"grad_norm": 0.829715213003188, |
|
"learning_rate": 1.6089965397923876e-05, |
|
"loss": 0.7457, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.2442827442827443, |
|
"grad_norm": 0.8794988465121758, |
|
"learning_rate": 1.6262975778546713e-05, |
|
"loss": 0.7427, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.24688149688149688, |
|
"grad_norm": 0.860878867890723, |
|
"learning_rate": 1.6435986159169554e-05, |
|
"loss": 0.727, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.2494802494802495, |
|
"grad_norm": 0.8488363967170557, |
|
"learning_rate": 1.6608996539792388e-05, |
|
"loss": 0.7341, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2520790020790021, |
|
"grad_norm": 0.9024495405776305, |
|
"learning_rate": 1.6782006920415226e-05, |
|
"loss": 0.7445, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.25467775467775466, |
|
"grad_norm": 0.8297075062381525, |
|
"learning_rate": 1.6955017301038063e-05, |
|
"loss": 0.7618, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.25727650727650725, |
|
"grad_norm": 0.9522709115263103, |
|
"learning_rate": 1.71280276816609e-05, |
|
"loss": 0.7524, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.2598752598752599, |
|
"grad_norm": 0.8862001527957881, |
|
"learning_rate": 1.730103806228374e-05, |
|
"loss": 0.7392, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2624740124740125, |
|
"grad_norm": 0.8190856457278167, |
|
"learning_rate": 1.7474048442906576e-05, |
|
"loss": 0.7348, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.26507276507276506, |
|
"grad_norm": 0.8084717872755484, |
|
"learning_rate": 1.7647058823529414e-05, |
|
"loss": 0.7555, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.26767151767151764, |
|
"grad_norm": 0.8143004262657276, |
|
"learning_rate": 1.782006920415225e-05, |
|
"loss": 0.7493, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.2702702702702703, |
|
"grad_norm": 0.7439901950641524, |
|
"learning_rate": 1.799307958477509e-05, |
|
"loss": 0.7264, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.27286902286902287, |
|
"grad_norm": 0.7802119529056604, |
|
"learning_rate": 1.8166089965397926e-05, |
|
"loss": 0.7484, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.27546777546777546, |
|
"grad_norm": 0.7671220875794853, |
|
"learning_rate": 1.833910034602076e-05, |
|
"loss": 0.7365, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.27806652806652804, |
|
"grad_norm": 0.8013129805690585, |
|
"learning_rate": 1.8512110726643598e-05, |
|
"loss": 0.7586, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.2806652806652807, |
|
"grad_norm": 0.7731421991496061, |
|
"learning_rate": 1.868512110726644e-05, |
|
"loss": 0.7521, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.28326403326403327, |
|
"grad_norm": 0.8183545102345747, |
|
"learning_rate": 1.8858131487889276e-05, |
|
"loss": 0.7379, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.28586278586278585, |
|
"grad_norm": 0.761114380449014, |
|
"learning_rate": 1.9031141868512114e-05, |
|
"loss": 0.7489, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.28846153846153844, |
|
"grad_norm": 0.797967905949635, |
|
"learning_rate": 1.9204152249134948e-05, |
|
"loss": 0.7475, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.2910602910602911, |
|
"grad_norm": 0.8141772027308778, |
|
"learning_rate": 1.9377162629757786e-05, |
|
"loss": 0.7403, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.29365904365904366, |
|
"grad_norm": 1.0002732271715242, |
|
"learning_rate": 1.9550173010380623e-05, |
|
"loss": 0.7446, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.29625779625779625, |
|
"grad_norm": 0.7424317625579876, |
|
"learning_rate": 1.972318339100346e-05, |
|
"loss": 0.7432, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.29885654885654883, |
|
"grad_norm": 0.7975265418685308, |
|
"learning_rate": 1.98961937716263e-05, |
|
"loss": 0.7425, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.30145530145530147, |
|
"grad_norm": 0.8527920792318469, |
|
"learning_rate": 1.9999992683122277e-05, |
|
"loss": 0.7313, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.30405405405405406, |
|
"grad_norm": 0.7826703424284943, |
|
"learning_rate": 1.9999910368370826e-05, |
|
"loss": 0.7404, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.30665280665280664, |
|
"grad_norm": 0.7942647670210833, |
|
"learning_rate": 1.9999736593526133e-05, |
|
"loss": 0.7263, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3092515592515592, |
|
"grad_norm": 0.7552220975281737, |
|
"learning_rate": 1.999947136017756e-05, |
|
"loss": 0.7353, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.31185031185031187, |
|
"grad_norm": 0.7790597350916263, |
|
"learning_rate": 1.9999114670750955e-05, |
|
"loss": 0.7478, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31444906444906445, |
|
"grad_norm": 0.7982754500449706, |
|
"learning_rate": 1.9998666528508632e-05, |
|
"loss": 0.7414, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.31704781704781704, |
|
"grad_norm": 0.8159770553033799, |
|
"learning_rate": 1.9998126937549343e-05, |
|
"loss": 0.7285, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3196465696465696, |
|
"grad_norm": 0.8888821616512309, |
|
"learning_rate": 1.9997495902808233e-05, |
|
"loss": 0.751, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.32224532224532226, |
|
"grad_norm": 0.7544060206964511, |
|
"learning_rate": 1.9996773430056806e-05, |
|
"loss": 0.7385, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.32484407484407485, |
|
"grad_norm": 0.7895944868586088, |
|
"learning_rate": 1.9995959525902856e-05, |
|
"loss": 0.7369, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.32744282744282743, |
|
"grad_norm": 0.7602727085172243, |
|
"learning_rate": 1.999505419779044e-05, |
|
"loss": 0.757, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33004158004158, |
|
"grad_norm": 0.8764699729246701, |
|
"learning_rate": 1.9994057453999754e-05, |
|
"loss": 0.738, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.33264033264033266, |
|
"grad_norm": 0.7647288391752125, |
|
"learning_rate": 1.9992969303647124e-05, |
|
"loss": 0.7478, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.33523908523908524, |
|
"grad_norm": 0.7069726215488147, |
|
"learning_rate": 1.999178975668486e-05, |
|
"loss": 0.7149, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.33783783783783783, |
|
"grad_norm": 0.6497220082269107, |
|
"learning_rate": 1.9990518823901213e-05, |
|
"loss": 0.7496, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3404365904365904, |
|
"grad_norm": 0.6610573730423013, |
|
"learning_rate": 1.9989156516920248e-05, |
|
"loss": 0.7297, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.34303534303534305, |
|
"grad_norm": 0.6668607876517594, |
|
"learning_rate": 1.9987702848201748e-05, |
|
"loss": 0.7193, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.34563409563409564, |
|
"grad_norm": 0.7860493254567829, |
|
"learning_rate": 1.99861578310411e-05, |
|
"loss": 0.7374, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.3482328482328482, |
|
"grad_norm": 0.8925562926124014, |
|
"learning_rate": 1.9984521479569176e-05, |
|
"loss": 0.7237, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3508316008316008, |
|
"grad_norm": 0.7672501463240459, |
|
"learning_rate": 1.9982793808752193e-05, |
|
"loss": 0.7306, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.35343035343035345, |
|
"grad_norm": 0.7502385743686751, |
|
"learning_rate": 1.9980974834391583e-05, |
|
"loss": 0.7406, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.35602910602910603, |
|
"grad_norm": 0.7564023469276626, |
|
"learning_rate": 1.997906457312386e-05, |
|
"loss": 0.7354, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.3586278586278586, |
|
"grad_norm": 0.7147365409493106, |
|
"learning_rate": 1.9977063042420438e-05, |
|
"loss": 0.7312, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3612266112266112, |
|
"grad_norm": 0.8321336652388966, |
|
"learning_rate": 1.9974970260587507e-05, |
|
"loss": 0.7364, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.36382536382536385, |
|
"grad_norm": 0.6733104515770179, |
|
"learning_rate": 1.9972786246765832e-05, |
|
"loss": 0.7273, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.36642411642411643, |
|
"grad_norm": 0.6923993722045795, |
|
"learning_rate": 1.9970511020930612e-05, |
|
"loss": 0.7259, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.369022869022869, |
|
"grad_norm": 0.7106843030691585, |
|
"learning_rate": 1.9968144603891272e-05, |
|
"loss": 0.7409, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3716216216216216, |
|
"grad_norm": 0.6074610563242314, |
|
"learning_rate": 1.9965687017291268e-05, |
|
"loss": 0.7237, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.37422037422037424, |
|
"grad_norm": 0.6798181846974808, |
|
"learning_rate": 1.9963138283607918e-05, |
|
"loss": 0.7189, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3768191268191268, |
|
"grad_norm": 0.7233959402973988, |
|
"learning_rate": 1.996049842615217e-05, |
|
"loss": 0.7524, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.3794178794178794, |
|
"grad_norm": 0.7818955262414797, |
|
"learning_rate": 1.9957767469068405e-05, |
|
"loss": 0.7259, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.382016632016632, |
|
"grad_norm": 0.7248772563760029, |
|
"learning_rate": 1.9954945437334204e-05, |
|
"loss": 0.7312, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 0.7079790287253178, |
|
"learning_rate": 1.9952032356760125e-05, |
|
"loss": 0.7041, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3872141372141372, |
|
"grad_norm": 0.7390341417388404, |
|
"learning_rate": 1.994902825398947e-05, |
|
"loss": 0.7133, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.3898128898128898, |
|
"grad_norm": 0.8111822578128921, |
|
"learning_rate": 1.9945933156498043e-05, |
|
"loss": 0.729, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3924116424116424, |
|
"grad_norm": 0.7769690688975751, |
|
"learning_rate": 1.9942747092593877e-05, |
|
"loss": 0.715, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.39501039501039503, |
|
"grad_norm": 0.6870020477467483, |
|
"learning_rate": 1.9939470091417012e-05, |
|
"loss": 0.7132, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3976091476091476, |
|
"grad_norm": 0.6911704853875393, |
|
"learning_rate": 1.99361021829392e-05, |
|
"loss": 0.7206, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.4002079002079002, |
|
"grad_norm": 0.6600143593403244, |
|
"learning_rate": 1.993264339796363e-05, |
|
"loss": 0.7145, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.4028066528066528, |
|
"grad_norm": 0.6726432015084747, |
|
"learning_rate": 1.992909376812468e-05, |
|
"loss": 0.739, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.40540540540540543, |
|
"grad_norm": 0.70615782283502, |
|
"learning_rate": 1.9925453325887574e-05, |
|
"loss": 0.7222, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.408004158004158, |
|
"grad_norm": 0.5887807785936404, |
|
"learning_rate": 1.992172210454814e-05, |
|
"loss": 0.7221, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.4106029106029106, |
|
"grad_norm": 0.6357715206079563, |
|
"learning_rate": 1.991790013823246e-05, |
|
"loss": 0.717, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4132016632016632, |
|
"grad_norm": 0.6710571738289492, |
|
"learning_rate": 1.9913987461896597e-05, |
|
"loss": 0.7299, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.4158004158004158, |
|
"grad_norm": 0.66534846887862, |
|
"learning_rate": 1.990998411132624e-05, |
|
"loss": 0.719, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4183991683991684, |
|
"grad_norm": 0.6658127042254826, |
|
"learning_rate": 1.9905890123136396e-05, |
|
"loss": 0.7156, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.420997920997921, |
|
"grad_norm": 0.7461519732525459, |
|
"learning_rate": 1.990170553477106e-05, |
|
"loss": 0.7281, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4235966735966736, |
|
"grad_norm": 0.7960568157470115, |
|
"learning_rate": 1.9897430384502857e-05, |
|
"loss": 0.7229, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.4261954261954262, |
|
"grad_norm": 0.7377717323529744, |
|
"learning_rate": 1.9893064711432702e-05, |
|
"loss": 0.7207, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4287941787941788, |
|
"grad_norm": 0.6333020229736416, |
|
"learning_rate": 1.988860855548944e-05, |
|
"loss": 0.7104, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.4313929313929314, |
|
"grad_norm": 0.6099863945288464, |
|
"learning_rate": 1.988406195742948e-05, |
|
"loss": 0.7203, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.433991683991684, |
|
"grad_norm": 0.6785164904650527, |
|
"learning_rate": 1.987942495883642e-05, |
|
"loss": 0.711, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.4365904365904366, |
|
"grad_norm": 0.6622509941324428, |
|
"learning_rate": 1.9874697602120682e-05, |
|
"loss": 0.7325, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.4391891891891892, |
|
"grad_norm": 0.6613983198156271, |
|
"learning_rate": 1.986987993051909e-05, |
|
"loss": 0.7233, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.4417879417879418, |
|
"grad_norm": 0.6956512036405856, |
|
"learning_rate": 1.9864971988094515e-05, |
|
"loss": 0.7207, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.44438669438669437, |
|
"grad_norm": 0.6797121388808018, |
|
"learning_rate": 1.9859973819735443e-05, |
|
"loss": 0.7359, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.446985446985447, |
|
"grad_norm": 0.6634402820404799, |
|
"learning_rate": 1.9854885471155586e-05, |
|
"loss": 0.7094, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4495841995841996, |
|
"grad_norm": 0.6337794885487144, |
|
"learning_rate": 1.9849706988893433e-05, |
|
"loss": 0.7276, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.4521829521829522, |
|
"grad_norm": 0.6734603186331721, |
|
"learning_rate": 1.9844438420311863e-05, |
|
"loss": 0.7142, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.45478170478170477, |
|
"grad_norm": 0.7066192892075979, |
|
"learning_rate": 1.9839079813597687e-05, |
|
"loss": 0.7149, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.4573804573804574, |
|
"grad_norm": 0.6964600685285819, |
|
"learning_rate": 1.9833631217761204e-05, |
|
"loss": 0.7281, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.45997920997921, |
|
"grad_norm": 0.7109456157271579, |
|
"learning_rate": 1.9828092682635774e-05, |
|
"loss": 0.7332, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.4625779625779626, |
|
"grad_norm": 0.669236169004855, |
|
"learning_rate": 1.9822464258877345e-05, |
|
"loss": 0.7293, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.46517671517671516, |
|
"grad_norm": 0.6046679594816758, |
|
"learning_rate": 1.9816745997963996e-05, |
|
"loss": 0.706, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.4677754677754678, |
|
"grad_norm": 0.6553388635341802, |
|
"learning_rate": 1.981093795219546e-05, |
|
"loss": 0.7136, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4703742203742204, |
|
"grad_norm": 0.6699423752938592, |
|
"learning_rate": 1.980504017469265e-05, |
|
"loss": 0.7056, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.47297297297297297, |
|
"grad_norm": 0.637041202100537, |
|
"learning_rate": 1.9799052719397188e-05, |
|
"loss": 0.7221, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.47557172557172556, |
|
"grad_norm": 0.657103082344547, |
|
"learning_rate": 1.979297564107088e-05, |
|
"loss": 0.7271, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.4781704781704782, |
|
"grad_norm": 0.6885064889983316, |
|
"learning_rate": 1.978680899529524e-05, |
|
"loss": 0.7159, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.4807692307692308, |
|
"grad_norm": 0.7043512832125569, |
|
"learning_rate": 1.9780552838470976e-05, |
|
"loss": 0.7057, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.48336798336798337, |
|
"grad_norm": 0.6627639804543833, |
|
"learning_rate": 1.977420722781746e-05, |
|
"loss": 0.7194, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.48596673596673595, |
|
"grad_norm": 0.7069767251302125, |
|
"learning_rate": 1.976777222137224e-05, |
|
"loss": 0.7144, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.4885654885654886, |
|
"grad_norm": 0.6090332104645865, |
|
"learning_rate": 1.9761247877990465e-05, |
|
"loss": 0.7161, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4911642411642412, |
|
"grad_norm": 0.7085090978015706, |
|
"learning_rate": 1.9754634257344376e-05, |
|
"loss": 0.733, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.49376299376299376, |
|
"grad_norm": 0.6858008371045625, |
|
"learning_rate": 1.9747931419922756e-05, |
|
"loss": 0.7271, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.49636174636174635, |
|
"grad_norm": 0.6543468152194417, |
|
"learning_rate": 1.974113942703036e-05, |
|
"loss": 0.7052, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.498960498960499, |
|
"grad_norm": 0.7013937541029002, |
|
"learning_rate": 1.9734258340787376e-05, |
|
"loss": 0.7233, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5015592515592515, |
|
"grad_norm": 0.6660787930797433, |
|
"learning_rate": 1.9727288224128852e-05, |
|
"loss": 0.7196, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.5041580041580042, |
|
"grad_norm": 0.6541474437978503, |
|
"learning_rate": 1.972022914080411e-05, |
|
"loss": 0.7061, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5067567567567568, |
|
"grad_norm": 0.66883512467633, |
|
"learning_rate": 1.971308115537617e-05, |
|
"loss": 0.7146, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.5093555093555093, |
|
"grad_norm": 0.6381037219289445, |
|
"learning_rate": 1.970584433322116e-05, |
|
"loss": 0.7087, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.511954261954262, |
|
"grad_norm": 1.1110934200883047, |
|
"learning_rate": 1.969851874052771e-05, |
|
"loss": 0.73, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.5145530145530145, |
|
"grad_norm": 0.6310880004216817, |
|
"learning_rate": 1.969110444429637e-05, |
|
"loss": 0.7183, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5171517671517671, |
|
"grad_norm": 0.6410220872400427, |
|
"learning_rate": 1.9683601512338963e-05, |
|
"loss": 0.7086, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.5197505197505198, |
|
"grad_norm": 0.5996299242899708, |
|
"learning_rate": 1.9676010013277994e-05, |
|
"loss": 0.7267, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5223492723492723, |
|
"grad_norm": 0.6590949790761541, |
|
"learning_rate": 1.9668330016546004e-05, |
|
"loss": 0.7374, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.524948024948025, |
|
"grad_norm": 0.6221180388276606, |
|
"learning_rate": 1.9660561592384946e-05, |
|
"loss": 0.7249, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.5275467775467776, |
|
"grad_norm": 0.6026562255053431, |
|
"learning_rate": 1.965270481184553e-05, |
|
"loss": 0.7092, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.5301455301455301, |
|
"grad_norm": 0.6076685489615162, |
|
"learning_rate": 1.9644759746786598e-05, |
|
"loss": 0.7144, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5327442827442828, |
|
"grad_norm": 0.6466092379791593, |
|
"learning_rate": 1.9636726469874437e-05, |
|
"loss": 0.7021, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.5353430353430353, |
|
"grad_norm": 0.6614878159031965, |
|
"learning_rate": 1.962860505458213e-05, |
|
"loss": 0.7147, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5379417879417879, |
|
"grad_norm": 0.642038696775677, |
|
"learning_rate": 1.962039557518888e-05, |
|
"loss": 0.7064, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.5405405405405406, |
|
"grad_norm": 0.6053359618387539, |
|
"learning_rate": 1.961209810677934e-05, |
|
"loss": 0.7103, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5431392931392931, |
|
"grad_norm": 0.6208076453451457, |
|
"learning_rate": 1.960371272524291e-05, |
|
"loss": 0.717, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.5457380457380457, |
|
"grad_norm": 0.6668283857181149, |
|
"learning_rate": 1.9595239507273058e-05, |
|
"loss": 0.7048, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5483367983367984, |
|
"grad_norm": 0.6229130725064413, |
|
"learning_rate": 1.9586678530366607e-05, |
|
"loss": 0.7159, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.5509355509355509, |
|
"grad_norm": 0.5933645289790093, |
|
"learning_rate": 1.9578029872823038e-05, |
|
"loss": 0.7131, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5535343035343036, |
|
"grad_norm": 0.5857860624157782, |
|
"learning_rate": 1.9569293613743753e-05, |
|
"loss": 0.7037, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.5561330561330561, |
|
"grad_norm": 0.6425150123453736, |
|
"learning_rate": 1.9560469833031383e-05, |
|
"loss": 0.7098, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5587318087318087, |
|
"grad_norm": 0.619116481041439, |
|
"learning_rate": 1.955155861138903e-05, |
|
"loss": 0.7176, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.5613305613305614, |
|
"grad_norm": 0.635662450753945, |
|
"learning_rate": 1.9542560030319543e-05, |
|
"loss": 0.7104, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5639293139293139, |
|
"grad_norm": 0.6273126414001168, |
|
"learning_rate": 1.9533474172124763e-05, |
|
"loss": 0.7144, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.5665280665280665, |
|
"grad_norm": 0.5746605470244842, |
|
"learning_rate": 1.952430111990478e-05, |
|
"loss": 0.7058, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5691268191268192, |
|
"grad_norm": 0.616041790905867, |
|
"learning_rate": 1.9515040957557162e-05, |
|
"loss": 0.7144, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.5717255717255717, |
|
"grad_norm": 0.5604195549287683, |
|
"learning_rate": 1.950569376977621e-05, |
|
"loss": 0.7045, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5743243243243243, |
|
"grad_norm": 0.5873428291768331, |
|
"learning_rate": 1.9496259642052146e-05, |
|
"loss": 0.7121, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.5769230769230769, |
|
"grad_norm": 0.6316260348752082, |
|
"learning_rate": 1.9486738660670373e-05, |
|
"loss": 0.7147, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5795218295218295, |
|
"grad_norm": 0.5874076313405716, |
|
"learning_rate": 1.9477130912710648e-05, |
|
"loss": 0.7279, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.5821205821205822, |
|
"grad_norm": 0.6676704495322479, |
|
"learning_rate": 1.9467436486046317e-05, |
|
"loss": 0.7103, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5847193347193347, |
|
"grad_norm": 0.5827519526288305, |
|
"learning_rate": 1.9457655469343482e-05, |
|
"loss": 0.7014, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.5873180873180873, |
|
"grad_norm": 0.6037871799524279, |
|
"learning_rate": 1.944778795206023e-05, |
|
"loss": 0.7053, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.58991683991684, |
|
"grad_norm": 0.5712930594682987, |
|
"learning_rate": 1.9437834024445762e-05, |
|
"loss": 0.7177, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.5925155925155925, |
|
"grad_norm": 0.5970793241519367, |
|
"learning_rate": 1.9427793777539615e-05, |
|
"loss": 0.7127, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.5951143451143451, |
|
"grad_norm": 0.6209626687697077, |
|
"learning_rate": 1.9417667303170803e-05, |
|
"loss": 0.7063, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.5977130977130977, |
|
"grad_norm": 0.606055879020448, |
|
"learning_rate": 1.940745469395698e-05, |
|
"loss": 0.695, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6003118503118503, |
|
"grad_norm": 0.6188361477212587, |
|
"learning_rate": 1.9397156043303608e-05, |
|
"loss": 0.6966, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.6029106029106029, |
|
"grad_norm": 0.6290824936609826, |
|
"learning_rate": 1.9386771445403086e-05, |
|
"loss": 0.7031, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6055093555093555, |
|
"grad_norm": 0.6122110814030293, |
|
"learning_rate": 1.9376300995233894e-05, |
|
"loss": 0.7083, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.6081081081081081, |
|
"grad_norm": 0.6273881899574172, |
|
"learning_rate": 1.9365744788559725e-05, |
|
"loss": 0.7023, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6107068607068608, |
|
"grad_norm": 0.5897253804349057, |
|
"learning_rate": 1.9355102921928606e-05, |
|
"loss": 0.7141, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.6133056133056133, |
|
"grad_norm": 0.6492672708845987, |
|
"learning_rate": 1.9344375492672024e-05, |
|
"loss": 0.7097, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6159043659043659, |
|
"grad_norm": 0.5984352418529706, |
|
"learning_rate": 1.9333562598904027e-05, |
|
"loss": 0.7068, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.6185031185031185, |
|
"grad_norm": 0.6401953574979402, |
|
"learning_rate": 1.9322664339520328e-05, |
|
"loss": 0.7007, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6211018711018711, |
|
"grad_norm": 0.6292156047444384, |
|
"learning_rate": 1.93116808141974e-05, |
|
"loss": 0.7114, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.6237006237006237, |
|
"grad_norm": 0.6298280430381119, |
|
"learning_rate": 1.9300612123391574e-05, |
|
"loss": 0.7224, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6262993762993763, |
|
"grad_norm": 0.5647290693137603, |
|
"learning_rate": 1.92894583683381e-05, |
|
"loss": 0.7029, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.6288981288981289, |
|
"grad_norm": 0.5843497274151073, |
|
"learning_rate": 1.927821965105024e-05, |
|
"loss": 0.6935, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6314968814968815, |
|
"grad_norm": 0.5742329412422685, |
|
"learning_rate": 1.9266896074318335e-05, |
|
"loss": 0.6921, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.6340956340956341, |
|
"grad_norm": 0.6198304134928966, |
|
"learning_rate": 1.925548774170885e-05, |
|
"loss": 0.7022, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6366943866943867, |
|
"grad_norm": 0.5927355008313566, |
|
"learning_rate": 1.924399475756343e-05, |
|
"loss": 0.7043, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.6392931392931392, |
|
"grad_norm": 0.5675856487929543, |
|
"learning_rate": 1.9232417226997964e-05, |
|
"loss": 0.6979, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6418918918918919, |
|
"grad_norm": 0.5801729136751573, |
|
"learning_rate": 1.9220755255901604e-05, |
|
"loss": 0.7128, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.6444906444906445, |
|
"grad_norm": 0.5455856005670234, |
|
"learning_rate": 1.92090089509358e-05, |
|
"loss": 0.7154, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6470893970893971, |
|
"grad_norm": 0.578411372283767, |
|
"learning_rate": 1.9197178419533328e-05, |
|
"loss": 0.726, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.6496881496881497, |
|
"grad_norm": 0.6165732640247198, |
|
"learning_rate": 1.918526376989731e-05, |
|
"loss": 0.7097, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6522869022869023, |
|
"grad_norm": 0.579722849123064, |
|
"learning_rate": 1.9173265111000218e-05, |
|
"loss": 0.7181, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.6548856548856549, |
|
"grad_norm": 0.6384864268465269, |
|
"learning_rate": 1.9161182552582885e-05, |
|
"loss": 0.7048, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6574844074844075, |
|
"grad_norm": 0.5442756986247173, |
|
"learning_rate": 1.9149016205153494e-05, |
|
"loss": 0.6983, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.66008316008316, |
|
"grad_norm": 0.5876328008368029, |
|
"learning_rate": 1.9136766179986566e-05, |
|
"loss": 0.7058, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6626819126819127, |
|
"grad_norm": 0.556923286518879, |
|
"learning_rate": 1.9124432589121945e-05, |
|
"loss": 0.7048, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.6652806652806653, |
|
"grad_norm": 0.5614004903256652, |
|
"learning_rate": 1.9112015545363793e-05, |
|
"loss": 0.703, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6678794178794178, |
|
"grad_norm": 0.6029085739466059, |
|
"learning_rate": 1.9099515162279515e-05, |
|
"loss": 0.7149, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.6704781704781705, |
|
"grad_norm": 0.5542833475447663, |
|
"learning_rate": 1.9086931554198756e-05, |
|
"loss": 0.7059, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6730769230769231, |
|
"grad_norm": 0.596418111214614, |
|
"learning_rate": 1.907426483621235e-05, |
|
"loss": 0.7187, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.6756756756756757, |
|
"grad_norm": 0.5964335487077739, |
|
"learning_rate": 1.9061515124171254e-05, |
|
"loss": 0.7023, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6782744282744283, |
|
"grad_norm": 0.6527135172773815, |
|
"learning_rate": 1.90486825346855e-05, |
|
"loss": 0.6985, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.6808731808731808, |
|
"grad_norm": 0.5854908036035414, |
|
"learning_rate": 1.9035767185123118e-05, |
|
"loss": 0.7097, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6834719334719335, |
|
"grad_norm": 0.5630331628185049, |
|
"learning_rate": 1.9022769193609077e-05, |
|
"loss": 0.6973, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.6860706860706861, |
|
"grad_norm": 0.5872323843899289, |
|
"learning_rate": 1.900968867902419e-05, |
|
"loss": 0.7069, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6886694386694386, |
|
"grad_norm": 0.5845474538391455, |
|
"learning_rate": 1.899652576100405e-05, |
|
"loss": 0.7169, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.6912681912681913, |
|
"grad_norm": 0.6164999248623418, |
|
"learning_rate": 1.8983280559937896e-05, |
|
"loss": 0.7005, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.6938669438669439, |
|
"grad_norm": 0.6124510306800306, |
|
"learning_rate": 1.896995319696755e-05, |
|
"loss": 0.701, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.6964656964656964, |
|
"grad_norm": 0.6267010331850633, |
|
"learning_rate": 1.8956543793986287e-05, |
|
"loss": 0.7164, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.6990644490644491, |
|
"grad_norm": 0.5961399562877898, |
|
"learning_rate": 1.8943052473637734e-05, |
|
"loss": 0.7213, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.7016632016632016, |
|
"grad_norm": 0.6174817199293855, |
|
"learning_rate": 1.8929479359314742e-05, |
|
"loss": 0.6985, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7042619542619543, |
|
"grad_norm": 0.5851247926140993, |
|
"learning_rate": 1.891582457515825e-05, |
|
"loss": 0.6935, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.7068607068607069, |
|
"grad_norm": 0.5776477138388799, |
|
"learning_rate": 1.890208824605616e-05, |
|
"loss": 0.708, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7094594594594594, |
|
"grad_norm": 0.5309187069380664, |
|
"learning_rate": 1.888827049764219e-05, |
|
"loss": 0.7003, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.7120582120582121, |
|
"grad_norm": 0.5496529326574807, |
|
"learning_rate": 1.8874371456294732e-05, |
|
"loss": 0.6999, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7146569646569647, |
|
"grad_norm": 0.5339269514909717, |
|
"learning_rate": 1.8860391249135692e-05, |
|
"loss": 0.6966, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.7172557172557172, |
|
"grad_norm": 0.5427973139574223, |
|
"learning_rate": 1.884633000402931e-05, |
|
"loss": 0.6936, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.7198544698544699, |
|
"grad_norm": 0.5672590602791164, |
|
"learning_rate": 1.883218784958103e-05, |
|
"loss": 0.689, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.7224532224532224, |
|
"grad_norm": 0.5402673826941384, |
|
"learning_rate": 1.8817964915136277e-05, |
|
"loss": 0.7072, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.725051975051975, |
|
"grad_norm": 0.5601951835510618, |
|
"learning_rate": 1.8803661330779316e-05, |
|
"loss": 0.7059, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.7276507276507277, |
|
"grad_norm": 0.5857868039965994, |
|
"learning_rate": 1.8789277227332025e-05, |
|
"loss": 0.6799, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7302494802494802, |
|
"grad_norm": 0.5347885501584507, |
|
"learning_rate": 1.877481273635274e-05, |
|
"loss": 0.6956, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.7328482328482329, |
|
"grad_norm": 0.5502849975189612, |
|
"learning_rate": 1.8760267990135007e-05, |
|
"loss": 0.7059, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7354469854469855, |
|
"grad_norm": 0.5178257228797314, |
|
"learning_rate": 1.874564312170641e-05, |
|
"loss": 0.7019, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.738045738045738, |
|
"grad_norm": 0.5607208259193451, |
|
"learning_rate": 1.8730938264827322e-05, |
|
"loss": 0.6963, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7406444906444907, |
|
"grad_norm": 0.5821162244405798, |
|
"learning_rate": 1.8716153553989716e-05, |
|
"loss": 0.6965, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.7432432432432432, |
|
"grad_norm": 0.5495747594677731, |
|
"learning_rate": 1.8701289124415902e-05, |
|
"loss": 0.6963, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7458419958419958, |
|
"grad_norm": 0.528297292924797, |
|
"learning_rate": 1.868634511205731e-05, |
|
"loss": 0.6917, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.7484407484407485, |
|
"grad_norm": 0.5326976953811587, |
|
"learning_rate": 1.8671321653593244e-05, |
|
"loss": 0.6989, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.751039501039501, |
|
"grad_norm": 0.5584186177167862, |
|
"learning_rate": 1.8656218886429624e-05, |
|
"loss": 0.7031, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.7536382536382537, |
|
"grad_norm": 0.5570198454055475, |
|
"learning_rate": 1.8641036948697736e-05, |
|
"loss": 0.7023, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7562370062370062, |
|
"grad_norm": 0.6760644666500142, |
|
"learning_rate": 1.8625775979252976e-05, |
|
"loss": 0.6789, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.7588357588357588, |
|
"grad_norm": 0.5638434234347486, |
|
"learning_rate": 1.8610436117673557e-05, |
|
"loss": 0.6986, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7614345114345115, |
|
"grad_norm": 0.5493778378867652, |
|
"learning_rate": 1.8595017504259253e-05, |
|
"loss": 0.6785, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.764033264033264, |
|
"grad_norm": 0.6031826832296197, |
|
"learning_rate": 1.8579520280030118e-05, |
|
"loss": 0.6995, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7666320166320166, |
|
"grad_norm": 0.5143780295012962, |
|
"learning_rate": 1.8563944586725175e-05, |
|
"loss": 0.6846, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 0.5539515728601708, |
|
"learning_rate": 1.8548290566801132e-05, |
|
"loss": 0.7238, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7718295218295218, |
|
"grad_norm": 0.5421409786755411, |
|
"learning_rate": 1.853255836343109e-05, |
|
"loss": 0.6999, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.7744282744282744, |
|
"grad_norm": 0.6141673616193241, |
|
"learning_rate": 1.8516748120503217e-05, |
|
"loss": 0.6899, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.777027027027027, |
|
"grad_norm": 0.6088321493956566, |
|
"learning_rate": 1.8500859982619438e-05, |
|
"loss": 0.6985, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.7796257796257796, |
|
"grad_norm": 0.5792987579663321, |
|
"learning_rate": 1.848489409509411e-05, |
|
"loss": 0.7015, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7822245322245323, |
|
"grad_norm": 0.5889071004078938, |
|
"learning_rate": 1.84688506039527e-05, |
|
"loss": 0.6961, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.7848232848232848, |
|
"grad_norm": 0.6583485451018368, |
|
"learning_rate": 1.845272965593045e-05, |
|
"loss": 0.6999, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7874220374220374, |
|
"grad_norm": 0.5605926278169279, |
|
"learning_rate": 1.843653139847101e-05, |
|
"loss": 0.6862, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.7900207900207901, |
|
"grad_norm": 0.5528728709462963, |
|
"learning_rate": 1.842025597972513e-05, |
|
"loss": 0.697, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.7926195426195426, |
|
"grad_norm": 0.5793992149063935, |
|
"learning_rate": 1.840390354854927e-05, |
|
"loss": 0.6981, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.7952182952182952, |
|
"grad_norm": 0.5629064758499602, |
|
"learning_rate": 1.8387474254504265e-05, |
|
"loss": 0.6847, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.7978170478170478, |
|
"grad_norm": 0.5625360669791298, |
|
"learning_rate": 1.8370968247853933e-05, |
|
"loss": 0.7102, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.8004158004158004, |
|
"grad_norm": 0.575364667753087, |
|
"learning_rate": 1.8354385679563723e-05, |
|
"loss": 0.7028, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.803014553014553, |
|
"grad_norm": 0.5391664994143878, |
|
"learning_rate": 1.8337726701299313e-05, |
|
"loss": 0.6972, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.8056133056133056, |
|
"grad_norm": 0.5411008753649549, |
|
"learning_rate": 1.8320991465425243e-05, |
|
"loss": 0.6903, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8082120582120582, |
|
"grad_norm": 0.5247464270778599, |
|
"learning_rate": 1.8304180125003505e-05, |
|
"loss": 0.6892, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.8108108108108109, |
|
"grad_norm": 0.5616645790978936, |
|
"learning_rate": 1.8287292833792157e-05, |
|
"loss": 0.6996, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.8134095634095634, |
|
"grad_norm": 0.5496955252051037, |
|
"learning_rate": 1.8270329746243903e-05, |
|
"loss": 0.7093, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.816008316008316, |
|
"grad_norm": 0.5727569676124988, |
|
"learning_rate": 1.8253291017504694e-05, |
|
"loss": 0.6921, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.8186070686070686, |
|
"grad_norm": 0.5394235138224174, |
|
"learning_rate": 1.8236176803412296e-05, |
|
"loss": 0.6915, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.8212058212058212, |
|
"grad_norm": 0.5545057246411995, |
|
"learning_rate": 1.8218987260494877e-05, |
|
"loss": 0.7076, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8238045738045738, |
|
"grad_norm": 0.5440533432959407, |
|
"learning_rate": 1.820172254596956e-05, |
|
"loss": 0.6765, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.8264033264033264, |
|
"grad_norm": 0.5572733236733112, |
|
"learning_rate": 1.8184382817741005e-05, |
|
"loss": 0.699, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.829002079002079, |
|
"grad_norm": 0.5531936584129153, |
|
"learning_rate": 1.816696823439995e-05, |
|
"loss": 0.6921, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.8316008316008316, |
|
"grad_norm": 0.5885460971318106, |
|
"learning_rate": 1.814947895522176e-05, |
|
"loss": 0.7058, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8341995841995842, |
|
"grad_norm": 0.5258234834971192, |
|
"learning_rate": 1.8131915140164985e-05, |
|
"loss": 0.7075, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.8367983367983368, |
|
"grad_norm": 0.6039050150490132, |
|
"learning_rate": 1.8114276949869877e-05, |
|
"loss": 0.7022, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.8393970893970893, |
|
"grad_norm": 0.5562997078883312, |
|
"learning_rate": 1.809656454565693e-05, |
|
"loss": 0.7079, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.841995841995842, |
|
"grad_norm": 0.5537066778477406, |
|
"learning_rate": 1.8078778089525423e-05, |
|
"loss": 0.6982, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8445945945945946, |
|
"grad_norm": 0.5336085274714755, |
|
"learning_rate": 1.80609177441519e-05, |
|
"loss": 0.6813, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.8471933471933472, |
|
"grad_norm": 0.5921356105703777, |
|
"learning_rate": 1.8042983672888706e-05, |
|
"loss": 0.6982, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.8497920997920998, |
|
"grad_norm": 0.5823716073373996, |
|
"learning_rate": 1.8024976039762507e-05, |
|
"loss": 0.7007, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.8523908523908524, |
|
"grad_norm": 0.5413045444506639, |
|
"learning_rate": 1.8006895009472747e-05, |
|
"loss": 0.693, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.854989604989605, |
|
"grad_norm": 0.5634810470157456, |
|
"learning_rate": 1.7988740747390182e-05, |
|
"loss": 0.6973, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.8575883575883576, |
|
"grad_norm": 0.5742831274552044, |
|
"learning_rate": 1.797051341955536e-05, |
|
"loss": 0.6983, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8601871101871101, |
|
"grad_norm": 0.5870076478023661, |
|
"learning_rate": 1.7952213192677074e-05, |
|
"loss": 0.7139, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.8627858627858628, |
|
"grad_norm": 0.5730171778092863, |
|
"learning_rate": 1.7933840234130878e-05, |
|
"loss": 0.7048, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8653846153846154, |
|
"grad_norm": 0.5104051114581488, |
|
"learning_rate": 1.7915394711957523e-05, |
|
"loss": 0.7073, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.867983367983368, |
|
"grad_norm": 0.5580415382036159, |
|
"learning_rate": 1.7896876794861443e-05, |
|
"loss": 0.6942, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8705821205821206, |
|
"grad_norm": 0.5837643043381491, |
|
"learning_rate": 1.7878286652209196e-05, |
|
"loss": 0.7025, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.8731808731808732, |
|
"grad_norm": 0.5423350178745967, |
|
"learning_rate": 1.785962445402792e-05, |
|
"loss": 0.6952, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8757796257796258, |
|
"grad_norm": 0.5729568854084454, |
|
"learning_rate": 1.7840890371003795e-05, |
|
"loss": 0.6966, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.8783783783783784, |
|
"grad_norm": 0.5553594551886265, |
|
"learning_rate": 1.782208457448044e-05, |
|
"loss": 0.7013, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8809771309771309, |
|
"grad_norm": 0.535467096093925, |
|
"learning_rate": 1.7803207236457404e-05, |
|
"loss": 0.7082, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.8835758835758836, |
|
"grad_norm": 0.5486284072585131, |
|
"learning_rate": 1.778425852958853e-05, |
|
"loss": 0.6666, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8861746361746362, |
|
"grad_norm": 0.5078876333931026, |
|
"learning_rate": 1.7765238627180424e-05, |
|
"loss": 0.6894, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.8887733887733887, |
|
"grad_norm": 0.5667869632736022, |
|
"learning_rate": 1.7746147703190857e-05, |
|
"loss": 0.704, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.8913721413721414, |
|
"grad_norm": 0.574959887360108, |
|
"learning_rate": 1.7726985932227156e-05, |
|
"loss": 0.7107, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.893970893970894, |
|
"grad_norm": 0.5224789850325783, |
|
"learning_rate": 1.7707753489544628e-05, |
|
"loss": 0.7047, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.8965696465696466, |
|
"grad_norm": 0.5527415568002146, |
|
"learning_rate": 1.768845055104495e-05, |
|
"loss": 0.7091, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.8991683991683992, |
|
"grad_norm": 0.5812394569236012, |
|
"learning_rate": 1.7669077293274564e-05, |
|
"loss": 0.6862, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9017671517671517, |
|
"grad_norm": 0.5235725170689791, |
|
"learning_rate": 1.764963389342305e-05, |
|
"loss": 0.6811, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.9043659043659044, |
|
"grad_norm": 0.5171430012007782, |
|
"learning_rate": 1.7630120529321518e-05, |
|
"loss": 0.6846, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.906964656964657, |
|
"grad_norm": 0.528693038590328, |
|
"learning_rate": 1.7610537379440987e-05, |
|
"loss": 0.6915, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.9095634095634095, |
|
"grad_norm": 0.5364540922380395, |
|
"learning_rate": 1.759088462289072e-05, |
|
"loss": 0.684, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9121621621621622, |
|
"grad_norm": 0.5660558568326042, |
|
"learning_rate": 1.7571162439416632e-05, |
|
"loss": 0.6955, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.9147609147609148, |
|
"grad_norm": 0.5672957617937873, |
|
"learning_rate": 1.755137100939961e-05, |
|
"loss": 0.6988, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9173596673596673, |
|
"grad_norm": 0.5530873304373302, |
|
"learning_rate": 1.753151051385388e-05, |
|
"loss": 0.6833, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.91995841995842, |
|
"grad_norm": 0.5178111657664748, |
|
"learning_rate": 1.7511581134425347e-05, |
|
"loss": 0.7073, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9225571725571725, |
|
"grad_norm": 0.5721288757020301, |
|
"learning_rate": 1.7491583053389937e-05, |
|
"loss": 0.6863, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.9251559251559252, |
|
"grad_norm": 0.5726074152322754, |
|
"learning_rate": 1.7471516453651925e-05, |
|
"loss": 0.6885, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9277546777546778, |
|
"grad_norm": 0.5569113799035454, |
|
"learning_rate": 1.7451381518742264e-05, |
|
"loss": 0.6919, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.9303534303534303, |
|
"grad_norm": 0.5349940876609687, |
|
"learning_rate": 1.7431178432816905e-05, |
|
"loss": 0.6888, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.932952182952183, |
|
"grad_norm": 0.48085486147721074, |
|
"learning_rate": 1.7410907380655118e-05, |
|
"loss": 0.6892, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.9355509355509356, |
|
"grad_norm": 0.5162470850450532, |
|
"learning_rate": 1.7390568547657797e-05, |
|
"loss": 0.6844, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9381496881496881, |
|
"grad_norm": 0.5500432932817269, |
|
"learning_rate": 1.7370162119845768e-05, |
|
"loss": 0.677, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.9407484407484408, |
|
"grad_norm": 0.5691270831237378, |
|
"learning_rate": 1.734968828385808e-05, |
|
"loss": 0.6816, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9433471933471933, |
|
"grad_norm": 0.5353076421264558, |
|
"learning_rate": 1.7329147226950303e-05, |
|
"loss": 0.6825, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.9459459459459459, |
|
"grad_norm": 0.552477154180168, |
|
"learning_rate": 1.7308539136992823e-05, |
|
"loss": 0.6893, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9485446985446986, |
|
"grad_norm": 0.5280777987730796, |
|
"learning_rate": 1.7287864202469117e-05, |
|
"loss": 0.7004, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.9511434511434511, |
|
"grad_norm": 0.5437828698378319, |
|
"learning_rate": 1.7267122612474013e-05, |
|
"loss": 0.6761, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9537422037422038, |
|
"grad_norm": 0.5687279165024458, |
|
"learning_rate": 1.7246314556711994e-05, |
|
"loss": 0.6894, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.9563409563409564, |
|
"grad_norm": 0.5740312633264971, |
|
"learning_rate": 1.7225440225495436e-05, |
|
"loss": 0.6914, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.9589397089397089, |
|
"grad_norm": 0.5573795518397149, |
|
"learning_rate": 1.720449980974288e-05, |
|
"loss": 0.6771, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"grad_norm": 0.5351408449090207, |
|
"learning_rate": 1.7183493500977277e-05, |
|
"loss": 0.6932, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9641372141372141, |
|
"grad_norm": 0.5528674527887268, |
|
"learning_rate": 1.7162421491324247e-05, |
|
"loss": 0.6836, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.9667359667359667, |
|
"grad_norm": 0.5406423387911308, |
|
"learning_rate": 1.7141283973510313e-05, |
|
"loss": 0.691, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.9693347193347194, |
|
"grad_norm": 0.5420681920741066, |
|
"learning_rate": 1.712008114086115e-05, |
|
"loss": 0.7039, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.9719334719334719, |
|
"grad_norm": 0.496824504759365, |
|
"learning_rate": 1.7098813187299786e-05, |
|
"loss": 0.692, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.9745322245322245, |
|
"grad_norm": 0.5324381566943999, |
|
"learning_rate": 1.707748030734488e-05, |
|
"loss": 0.6776, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.9771309771309772, |
|
"grad_norm": 0.5230259681581492, |
|
"learning_rate": 1.7056082696108896e-05, |
|
"loss": 0.6847, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.9797297297297297, |
|
"grad_norm": 0.5404603350045258, |
|
"learning_rate": 1.7034620549296336e-05, |
|
"loss": 0.6896, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.9823284823284824, |
|
"grad_norm": 0.5198497643717813, |
|
"learning_rate": 1.701309406320196e-05, |
|
"loss": 0.6676, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9849272349272349, |
|
"grad_norm": 0.49415192104030464, |
|
"learning_rate": 1.699150343470897e-05, |
|
"loss": 0.6839, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.9875259875259875, |
|
"grad_norm": 0.5044834255400932, |
|
"learning_rate": 1.696984886128723e-05, |
|
"loss": 0.6913, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9901247401247402, |
|
"grad_norm": 0.5398617776602235, |
|
"learning_rate": 1.6948130540991443e-05, |
|
"loss": 0.6874, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.9927234927234927, |
|
"grad_norm": 0.5225589262440207, |
|
"learning_rate": 1.6926348672459347e-05, |
|
"loss": 0.6822, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.9953222453222453, |
|
"grad_norm": 0.5351932477818484, |
|
"learning_rate": 1.6904503454909905e-05, |
|
"loss": 0.668, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.997920997920998, |
|
"grad_norm": 0.5596299212706576, |
|
"learning_rate": 1.688259508814147e-05, |
|
"loss": 0.6884, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.7763931155204773, |
|
"eval_runtime": 106.5617, |
|
"eval_samples_per_second": 77.045, |
|
"eval_steps_per_second": 1.211, |
|
"step": 1924 |
|
}, |
|
{ |
|
"epoch": 1.0005197505197505, |
|
"grad_norm": 0.7015242091925799, |
|
"learning_rate": 1.6860623772529964e-05, |
|
"loss": 0.6682, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.003118503118503, |
|
"grad_norm": 0.6034586283492043, |
|
"learning_rate": 1.6838589709027043e-05, |
|
"loss": 0.6255, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.0057172557172558, |
|
"grad_norm": 0.6673777384785803, |
|
"learning_rate": 1.681649309915827e-05, |
|
"loss": 0.615, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 1.0083160083160083, |
|
"grad_norm": 0.55969047689154, |
|
"learning_rate": 1.6794334145021252e-05, |
|
"loss": 0.6276, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.0109147609147608, |
|
"grad_norm": 0.5655945464921515, |
|
"learning_rate": 1.677211304928381e-05, |
|
"loss": 0.6072, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 1.0135135135135136, |
|
"grad_norm": 0.5544352654106766, |
|
"learning_rate": 1.6749830015182106e-05, |
|
"loss": 0.604, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.0161122661122661, |
|
"grad_norm": 0.5513580613594744, |
|
"learning_rate": 1.6727485246518813e-05, |
|
"loss": 0.6087, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 1.0187110187110187, |
|
"grad_norm": 0.5475994898582014, |
|
"learning_rate": 1.6705078947661224e-05, |
|
"loss": 0.6125, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.0213097713097714, |
|
"grad_norm": 0.5469347962338588, |
|
"learning_rate": 1.668261132353939e-05, |
|
"loss": 0.6079, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 1.023908523908524, |
|
"grad_norm": 0.6609443347963427, |
|
"learning_rate": 1.6660082579644257e-05, |
|
"loss": 0.6085, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.0265072765072765, |
|
"grad_norm": 0.5735256704279655, |
|
"learning_rate": 1.6637492922025767e-05, |
|
"loss": 0.5988, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.0291060291060292, |
|
"grad_norm": 0.5604570622446723, |
|
"learning_rate": 1.6614842557291003e-05, |
|
"loss": 0.605, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.0317047817047817, |
|
"grad_norm": 0.5716162653407055, |
|
"learning_rate": 1.6592131692602257e-05, |
|
"loss": 0.6199, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 1.0343035343035343, |
|
"grad_norm": 0.572255735683218, |
|
"learning_rate": 1.6569360535675177e-05, |
|
"loss": 0.6136, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.0369022869022868, |
|
"grad_norm": 0.5548434711803698, |
|
"learning_rate": 1.654652929477684e-05, |
|
"loss": 0.6292, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 1.0395010395010396, |
|
"grad_norm": 0.5372326277134161, |
|
"learning_rate": 1.6523638178723863e-05, |
|
"loss": 0.615, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.042099792099792, |
|
"grad_norm": 0.5629251525957598, |
|
"learning_rate": 1.6500687396880483e-05, |
|
"loss": 0.5994, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 1.0446985446985446, |
|
"grad_norm": 0.5386319104306212, |
|
"learning_rate": 1.6477677159156647e-05, |
|
"loss": 0.6074, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.0472972972972974, |
|
"grad_norm": 0.5344519142234625, |
|
"learning_rate": 1.6454607676006085e-05, |
|
"loss": 0.6093, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 1.04989604989605, |
|
"grad_norm": 0.5911538942849666, |
|
"learning_rate": 1.64314791584244e-05, |
|
"loss": 0.6219, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.0524948024948024, |
|
"grad_norm": 0.5747632841380809, |
|
"learning_rate": 1.6408291817947126e-05, |
|
"loss": 0.6229, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.0550935550935552, |
|
"grad_norm": 0.5553970996601802, |
|
"learning_rate": 1.6385045866647797e-05, |
|
"loss": 0.6131, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.0576923076923077, |
|
"grad_norm": 0.54652804470796, |
|
"learning_rate": 1.6361741517136e-05, |
|
"loss": 0.6189, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 1.0602910602910602, |
|
"grad_norm": 0.5652320668164962, |
|
"learning_rate": 1.633837898255545e-05, |
|
"loss": 0.6206, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.062889812889813, |
|
"grad_norm": 0.5377492795503913, |
|
"learning_rate": 1.631495847658202e-05, |
|
"loss": 0.6246, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 1.0654885654885655, |
|
"grad_norm": 0.5701190357161201, |
|
"learning_rate": 1.6291480213421796e-05, |
|
"loss": 0.6151, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.068087318087318, |
|
"grad_norm": 0.5661751524816903, |
|
"learning_rate": 1.626794440780911e-05, |
|
"loss": 0.6155, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 1.0706860706860706, |
|
"grad_norm": 0.563344281114797, |
|
"learning_rate": 1.62443512750046e-05, |
|
"loss": 0.6065, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.0732848232848233, |
|
"grad_norm": 0.5605142234460203, |
|
"learning_rate": 1.6220701030793203e-05, |
|
"loss": 0.6182, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 1.0758835758835759, |
|
"grad_norm": 0.5276428593516702, |
|
"learning_rate": 1.6196993891482216e-05, |
|
"loss": 0.622, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.0784823284823284, |
|
"grad_norm": 0.5855580679475535, |
|
"learning_rate": 1.6173230073899303e-05, |
|
"loss": 0.613, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"grad_norm": 0.5464688542980072, |
|
"learning_rate": 1.6149409795390503e-05, |
|
"loss": 0.6109, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.0836798336798337, |
|
"grad_norm": 0.523861090524199, |
|
"learning_rate": 1.6125533273818257e-05, |
|
"loss": 0.5932, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 1.0862785862785862, |
|
"grad_norm": 0.5800436769814354, |
|
"learning_rate": 1.6101600727559423e-05, |
|
"loss": 0.5974, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.088877338877339, |
|
"grad_norm": 0.5314763619677401, |
|
"learning_rate": 1.6077612375503244e-05, |
|
"loss": 0.6233, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 1.0914760914760915, |
|
"grad_norm": 0.5292466384443512, |
|
"learning_rate": 1.605356843704938e-05, |
|
"loss": 0.6082, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.094074844074844, |
|
"grad_norm": 0.5877188139637917, |
|
"learning_rate": 1.6029469132105886e-05, |
|
"loss": 0.6255, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 1.0966735966735968, |
|
"grad_norm": 0.5760369419098388, |
|
"learning_rate": 1.6005314681087208e-05, |
|
"loss": 0.6157, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.0992723492723493, |
|
"grad_norm": 0.630752472432159, |
|
"learning_rate": 1.598110530491216e-05, |
|
"loss": 0.6175, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 1.1018711018711018, |
|
"grad_norm": 0.5608953698596442, |
|
"learning_rate": 1.595684122500191e-05, |
|
"loss": 0.6177, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.1044698544698546, |
|
"grad_norm": 0.573695393031942, |
|
"learning_rate": 1.593252266327794e-05, |
|
"loss": 0.6243, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.107068607068607, |
|
"grad_norm": 0.5935701794951512, |
|
"learning_rate": 1.590814984216004e-05, |
|
"loss": 0.6134, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.1096673596673596, |
|
"grad_norm": 0.5895903291761935, |
|
"learning_rate": 1.588372298456426e-05, |
|
"loss": 0.6082, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 1.1122661122661124, |
|
"grad_norm": 0.5711362021969438, |
|
"learning_rate": 1.5859242313900866e-05, |
|
"loss": 0.6048, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.114864864864865, |
|
"grad_norm": 0.5761578602169135, |
|
"learning_rate": 1.583470805407231e-05, |
|
"loss": 0.619, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 1.1174636174636174, |
|
"grad_norm": 0.5567866303525553, |
|
"learning_rate": 1.581012042947117e-05, |
|
"loss": 0.6112, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.12006237006237, |
|
"grad_norm": 0.5493991058746482, |
|
"learning_rate": 1.578547966497811e-05, |
|
"loss": 0.5976, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 1.1226611226611227, |
|
"grad_norm": 0.5652320554508646, |
|
"learning_rate": 1.57607859859598e-05, |
|
"loss": 0.6048, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.1252598752598753, |
|
"grad_norm": 0.5401641304994612, |
|
"learning_rate": 1.57360396182669e-05, |
|
"loss": 0.6082, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 1.1278586278586278, |
|
"grad_norm": 0.5606818865719918, |
|
"learning_rate": 1.5711240788231933e-05, |
|
"loss": 0.6039, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.1304573804573805, |
|
"grad_norm": 0.6007383546804671, |
|
"learning_rate": 1.5686389722667273e-05, |
|
"loss": 0.6047, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.133056133056133, |
|
"grad_norm": 0.5715756539794042, |
|
"learning_rate": 1.5661486648863027e-05, |
|
"loss": 0.6252, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.1356548856548856, |
|
"grad_norm": 0.6079845247405427, |
|
"learning_rate": 1.563653179458499e-05, |
|
"loss": 0.6099, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 1.1382536382536383, |
|
"grad_norm": 0.5663226785265596, |
|
"learning_rate": 1.5611525388072525e-05, |
|
"loss": 0.5996, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.1408523908523909, |
|
"grad_norm": 0.6108898947357355, |
|
"learning_rate": 1.5586467658036526e-05, |
|
"loss": 0.6209, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 1.1434511434511434, |
|
"grad_norm": 0.6420427561575582, |
|
"learning_rate": 1.556135883365727e-05, |
|
"loss": 0.6038, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.1460498960498962, |
|
"grad_norm": 0.587335250663389, |
|
"learning_rate": 1.5536199144582354e-05, |
|
"loss": 0.6242, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 1.1486486486486487, |
|
"grad_norm": 0.5910496137391441, |
|
"learning_rate": 1.5510988820924598e-05, |
|
"loss": 0.6069, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.1512474012474012, |
|
"grad_norm": 0.5655552313228328, |
|
"learning_rate": 1.5485728093259923e-05, |
|
"loss": 0.6225, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 1.1538461538461537, |
|
"grad_norm": 0.554660591831712, |
|
"learning_rate": 1.5460417192625245e-05, |
|
"loss": 0.6121, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.1564449064449065, |
|
"grad_norm": 0.5924033501687683, |
|
"learning_rate": 1.5435056350516376e-05, |
|
"loss": 0.6108, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.159043659043659, |
|
"grad_norm": 0.5678195976691061, |
|
"learning_rate": 1.54096457988859e-05, |
|
"loss": 0.6146, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.1616424116424116, |
|
"grad_norm": 0.582762073846593, |
|
"learning_rate": 1.5384185770141027e-05, |
|
"loss": 0.6116, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 1.1642411642411643, |
|
"grad_norm": 0.5520839459854381, |
|
"learning_rate": 1.535867649714152e-05, |
|
"loss": 0.6167, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.1668399168399168, |
|
"grad_norm": 0.5394988505298011, |
|
"learning_rate": 1.533311821319751e-05, |
|
"loss": 0.6173, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 1.1694386694386694, |
|
"grad_norm": 0.5490162258104867, |
|
"learning_rate": 1.5307511152067397e-05, |
|
"loss": 0.6195, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.1720374220374221, |
|
"grad_norm": 0.5176946937084966, |
|
"learning_rate": 1.5281855547955704e-05, |
|
"loss": 0.6063, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 1.1746361746361746, |
|
"grad_norm": 0.5697232320984311, |
|
"learning_rate": 1.5256151635510925e-05, |
|
"loss": 0.6132, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.1772349272349272, |
|
"grad_norm": 0.5408355403813135, |
|
"learning_rate": 1.5230399649823389e-05, |
|
"loss": 0.6202, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 1.17983367983368, |
|
"grad_norm": 0.5504776040838202, |
|
"learning_rate": 1.5204599826423108e-05, |
|
"loss": 0.6121, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.1824324324324325, |
|
"grad_norm": 0.5337013368651256, |
|
"learning_rate": 1.5178752401277628e-05, |
|
"loss": 0.616, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.185031185031185, |
|
"grad_norm": 0.562149132935065, |
|
"learning_rate": 1.5152857610789854e-05, |
|
"loss": 0.6097, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.1876299376299375, |
|
"grad_norm": 0.5909197735161369, |
|
"learning_rate": 1.5126915691795905e-05, |
|
"loss": 0.6188, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 1.1902286902286903, |
|
"grad_norm": 0.5535938243322149, |
|
"learning_rate": 1.5100926881562936e-05, |
|
"loss": 0.6137, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.1928274428274428, |
|
"grad_norm": 0.544767406909682, |
|
"learning_rate": 1.5074891417786993e-05, |
|
"loss": 0.6133, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 1.1954261954261955, |
|
"grad_norm": 0.5459850942463099, |
|
"learning_rate": 1.5048809538590789e-05, |
|
"loss": 0.613, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.198024948024948, |
|
"grad_norm": 0.5873358493955128, |
|
"learning_rate": 1.5022681482521579e-05, |
|
"loss": 0.6156, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 1.2006237006237006, |
|
"grad_norm": 0.5644324461104552, |
|
"learning_rate": 1.499650748854895e-05, |
|
"loss": 0.6155, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.2032224532224531, |
|
"grad_norm": 0.5531535214490884, |
|
"learning_rate": 1.4970287796062642e-05, |
|
"loss": 0.6191, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 1.2058212058212059, |
|
"grad_norm": 0.5509179294326446, |
|
"learning_rate": 1.494402264487035e-05, |
|
"loss": 0.614, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.2084199584199584, |
|
"grad_norm": 0.5585470168515849, |
|
"learning_rate": 1.491771227519555e-05, |
|
"loss": 0.6139, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.211018711018711, |
|
"grad_norm": 0.5129593419686834, |
|
"learning_rate": 1.4891356927675284e-05, |
|
"loss": 0.6089, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.2136174636174637, |
|
"grad_norm": 0.5920443075253277, |
|
"learning_rate": 1.4864956843357967e-05, |
|
"loss": 0.63, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 1.2162162162162162, |
|
"grad_norm": 0.5559902991412571, |
|
"learning_rate": 1.4838512263701184e-05, |
|
"loss": 0.6228, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.2188149688149688, |
|
"grad_norm": 0.5643995055948857, |
|
"learning_rate": 1.4812023430569467e-05, |
|
"loss": 0.619, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 1.2214137214137215, |
|
"grad_norm": 0.5742853786867631, |
|
"learning_rate": 1.4785490586232108e-05, |
|
"loss": 0.6245, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.224012474012474, |
|
"grad_norm": 0.5778953782438334, |
|
"learning_rate": 1.4758913973360919e-05, |
|
"loss": 0.6227, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 1.2266112266112266, |
|
"grad_norm": 0.5925914426786582, |
|
"learning_rate": 1.4732293835028038e-05, |
|
"loss": 0.6107, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.2292099792099793, |
|
"grad_norm": 0.5895371651072315, |
|
"learning_rate": 1.4705630414703669e-05, |
|
"loss": 0.6057, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 1.2318087318087318, |
|
"grad_norm": 0.6081772444953167, |
|
"learning_rate": 1.4678923956253894e-05, |
|
"loss": 0.6424, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.2344074844074844, |
|
"grad_norm": 0.5933961879145944, |
|
"learning_rate": 1.4652174703938422e-05, |
|
"loss": 0.6128, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.237006237006237, |
|
"grad_norm": 0.6054620771138413, |
|
"learning_rate": 1.4625382902408356e-05, |
|
"loss": 0.6084, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.2396049896049897, |
|
"grad_norm": 0.5776932281070712, |
|
"learning_rate": 1.4598548796703953e-05, |
|
"loss": 0.6217, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 1.2422037422037422, |
|
"grad_norm": 0.5591153237371339, |
|
"learning_rate": 1.4571672632252404e-05, |
|
"loss": 0.6059, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.2448024948024947, |
|
"grad_norm": 0.5667751253010028, |
|
"learning_rate": 1.4544754654865553e-05, |
|
"loss": 0.6269, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 1.2474012474012475, |
|
"grad_norm": 0.5510576618147843, |
|
"learning_rate": 1.4517795110737687e-05, |
|
"loss": 0.6175, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.5653685584114336, |
|
"learning_rate": 1.4490794246443249e-05, |
|
"loss": 0.6141, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 1.2525987525987525, |
|
"grad_norm": 0.569054506821339, |
|
"learning_rate": 1.446375230893462e-05, |
|
"loss": 0.6132, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.255197505197505, |
|
"grad_norm": 0.5530850077073164, |
|
"learning_rate": 1.4436669545539824e-05, |
|
"loss": 0.6112, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 1.2577962577962578, |
|
"grad_norm": 0.5413151446394687, |
|
"learning_rate": 1.4409546203960284e-05, |
|
"loss": 0.6032, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.2603950103950103, |
|
"grad_norm": 0.5230951552758679, |
|
"learning_rate": 1.4382382532268566e-05, |
|
"loss": 0.6144, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.262993762993763, |
|
"grad_norm": 0.541771918919958, |
|
"learning_rate": 1.4355178778906085e-05, |
|
"loss": 0.6234, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.2655925155925156, |
|
"grad_norm": 0.5203001197628181, |
|
"learning_rate": 1.4327935192680857e-05, |
|
"loss": 0.6045, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 1.2681912681912682, |
|
"grad_norm": 0.5440655504089812, |
|
"learning_rate": 1.4300652022765207e-05, |
|
"loss": 0.6139, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.2707900207900207, |
|
"grad_norm": 0.6149133483770466, |
|
"learning_rate": 1.4273329518693497e-05, |
|
"loss": 0.6145, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 1.2733887733887734, |
|
"grad_norm": 0.6021509402407774, |
|
"learning_rate": 1.4245967930359848e-05, |
|
"loss": 0.6159, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.275987525987526, |
|
"grad_norm": 0.5913158357105107, |
|
"learning_rate": 1.4218567508015841e-05, |
|
"loss": 0.6168, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 1.2785862785862787, |
|
"grad_norm": 0.5618432626028342, |
|
"learning_rate": 1.4191128502268242e-05, |
|
"loss": 0.6152, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.2811850311850312, |
|
"grad_norm": 0.5249984782845095, |
|
"learning_rate": 1.4163651164076705e-05, |
|
"loss": 0.6086, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 1.2837837837837838, |
|
"grad_norm": 0.5672830278319703, |
|
"learning_rate": 1.4136135744751468e-05, |
|
"loss": 0.6114, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.2863825363825363, |
|
"grad_norm": 0.5682891875562709, |
|
"learning_rate": 1.4108582495951077e-05, |
|
"loss": 0.6148, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.288981288981289, |
|
"grad_norm": 0.5615341097983116, |
|
"learning_rate": 1.408099166968005e-05, |
|
"loss": 0.6111, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.2915800415800416, |
|
"grad_norm": 0.5497563938968811, |
|
"learning_rate": 1.4053363518286613e-05, |
|
"loss": 0.6088, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 1.2941787941787941, |
|
"grad_norm": 0.5582405570031684, |
|
"learning_rate": 1.4025698294460362e-05, |
|
"loss": 0.6136, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.2967775467775469, |
|
"grad_norm": 0.6011380118880273, |
|
"learning_rate": 1.3997996251229948e-05, |
|
"loss": 0.6186, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 1.2993762993762994, |
|
"grad_norm": 0.5496562610843831, |
|
"learning_rate": 1.3970257641960795e-05, |
|
"loss": 0.6182, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.301975051975052, |
|
"grad_norm": 0.5687796275549053, |
|
"learning_rate": 1.3942482720352761e-05, |
|
"loss": 0.6157, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 1.3045738045738045, |
|
"grad_norm": 0.574298920577317, |
|
"learning_rate": 1.3914671740437811e-05, |
|
"loss": 0.6136, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.3071725571725572, |
|
"grad_norm": 0.5542768495449328, |
|
"learning_rate": 1.3886824956577702e-05, |
|
"loss": 0.6031, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 1.3097713097713097, |
|
"grad_norm": 0.5666521327715712, |
|
"learning_rate": 1.3858942623461664e-05, |
|
"loss": 0.6062, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.3123700623700625, |
|
"grad_norm": 0.5383202751991224, |
|
"learning_rate": 1.3831024996104065e-05, |
|
"loss": 0.6119, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.314968814968815, |
|
"grad_norm": 0.550924324768737, |
|
"learning_rate": 1.3803072329842073e-05, |
|
"loss": 0.6218, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.3175675675675675, |
|
"grad_norm": 0.5715325257279636, |
|
"learning_rate": 1.3775084880333323e-05, |
|
"loss": 0.6197, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 1.32016632016632, |
|
"grad_norm": 0.5516314324953223, |
|
"learning_rate": 1.3747062903553582e-05, |
|
"loss": 0.5983, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.3227650727650728, |
|
"grad_norm": 0.5587681122677882, |
|
"learning_rate": 1.3719006655794414e-05, |
|
"loss": 0.6104, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 1.3253638253638254, |
|
"grad_norm": 0.5529619265877077, |
|
"learning_rate": 1.3690916393660815e-05, |
|
"loss": 0.6232, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.3279625779625779, |
|
"grad_norm": 0.6007892832321496, |
|
"learning_rate": 1.3662792374068896e-05, |
|
"loss": 0.6246, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 1.3305613305613306, |
|
"grad_norm": 0.5102078314524738, |
|
"learning_rate": 1.3634634854243503e-05, |
|
"loss": 0.6037, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.3331600831600832, |
|
"grad_norm": 0.5068981925325898, |
|
"learning_rate": 1.3606444091715883e-05, |
|
"loss": 0.6056, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 1.3357588357588357, |
|
"grad_norm": 0.5201200155890484, |
|
"learning_rate": 1.3578220344321325e-05, |
|
"loss": 0.6088, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.3383575883575882, |
|
"grad_norm": 0.5411417638449072, |
|
"learning_rate": 1.3549963870196796e-05, |
|
"loss": 0.606, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.340956340956341, |
|
"grad_norm": 0.5169808096315553, |
|
"learning_rate": 1.3521674927778594e-05, |
|
"loss": 0.6278, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.3435550935550935, |
|
"grad_norm": 0.5658934679962141, |
|
"learning_rate": 1.3493353775799967e-05, |
|
"loss": 0.6067, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 1.3461538461538463, |
|
"grad_norm": 0.5724238241800808, |
|
"learning_rate": 1.3465000673288757e-05, |
|
"loss": 0.6003, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.3487525987525988, |
|
"grad_norm": 0.6105368545978801, |
|
"learning_rate": 1.3436615879565025e-05, |
|
"loss": 0.616, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 1.3513513513513513, |
|
"grad_norm": 0.5188576936304327, |
|
"learning_rate": 1.340819965423869e-05, |
|
"loss": 0.6283, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.3539501039501038, |
|
"grad_norm": 0.4959836182939828, |
|
"learning_rate": 1.3379752257207144e-05, |
|
"loss": 0.6157, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 1.3565488565488566, |
|
"grad_norm": 0.5769448388897034, |
|
"learning_rate": 1.3351273948652872e-05, |
|
"loss": 0.6133, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.3591476091476091, |
|
"grad_norm": 0.5647777721810548, |
|
"learning_rate": 1.3322764989041086e-05, |
|
"loss": 0.6047, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 1.3617463617463619, |
|
"grad_norm": 0.5362269489941972, |
|
"learning_rate": 1.329422563911734e-05, |
|
"loss": 0.6244, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.3643451143451144, |
|
"grad_norm": 0.5876277649004987, |
|
"learning_rate": 1.326565615990513e-05, |
|
"loss": 0.6094, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.366943866943867, |
|
"grad_norm": 0.5771702605216373, |
|
"learning_rate": 1.3237056812703517e-05, |
|
"loss": 0.6162, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.3695426195426195, |
|
"grad_norm": 0.5206111176210121, |
|
"learning_rate": 1.3208427859084743e-05, |
|
"loss": 0.5991, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 1.3721413721413722, |
|
"grad_norm": 0.5703420517094763, |
|
"learning_rate": 1.3179769560891837e-05, |
|
"loss": 0.6158, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.3747401247401247, |
|
"grad_norm": 0.5075630462180919, |
|
"learning_rate": 1.315108218023621e-05, |
|
"loss": 0.6157, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 1.3773388773388773, |
|
"grad_norm": 0.5278204198500884, |
|
"learning_rate": 1.3122365979495259e-05, |
|
"loss": 0.611, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.37993762993763, |
|
"grad_norm": 0.5830494022632724, |
|
"learning_rate": 1.3093621221309982e-05, |
|
"loss": 0.6226, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 1.3825363825363826, |
|
"grad_norm": 0.5567019594449695, |
|
"learning_rate": 1.3064848168582562e-05, |
|
"loss": 0.6128, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.385135135135135, |
|
"grad_norm": 0.5218600131647313, |
|
"learning_rate": 1.3036047084473964e-05, |
|
"loss": 0.6164, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 1.3877338877338876, |
|
"grad_norm": 0.5550941890937359, |
|
"learning_rate": 1.3007218232401535e-05, |
|
"loss": 0.6178, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.3903326403326404, |
|
"grad_norm": 0.5140778619937807, |
|
"learning_rate": 1.2978361876036586e-05, |
|
"loss": 0.6015, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.392931392931393, |
|
"grad_norm": 0.5704426484745836, |
|
"learning_rate": 1.2949478279301993e-05, |
|
"loss": 0.6218, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.3955301455301456, |
|
"grad_norm": 0.573333768381573, |
|
"learning_rate": 1.292056770636976e-05, |
|
"loss": 0.6195, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 1.3981288981288982, |
|
"grad_norm": 0.5463535484803559, |
|
"learning_rate": 1.2891630421658631e-05, |
|
"loss": 0.619, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.4007276507276507, |
|
"grad_norm": 0.5239768140578435, |
|
"learning_rate": 1.2862666689831655e-05, |
|
"loss": 0.5988, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 1.4033264033264032, |
|
"grad_norm": 0.5254212957357791, |
|
"learning_rate": 1.2833676775793766e-05, |
|
"loss": 0.6089, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.405925155925156, |
|
"grad_norm": 0.4999540179579075, |
|
"learning_rate": 1.2804660944689368e-05, |
|
"loss": 0.6161, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 1.4085239085239085, |
|
"grad_norm": 0.5566115132096349, |
|
"learning_rate": 1.2775619461899896e-05, |
|
"loss": 0.6182, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.411122661122661, |
|
"grad_norm": 0.5740812598543206, |
|
"learning_rate": 1.2746552593041405e-05, |
|
"loss": 0.598, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 1.4137214137214138, |
|
"grad_norm": 0.5437551314682787, |
|
"learning_rate": 1.2717460603962132e-05, |
|
"loss": 0.609, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.4163201663201663, |
|
"grad_norm": 0.527003171395807, |
|
"learning_rate": 1.268834376074007e-05, |
|
"loss": 0.6097, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.4189189189189189, |
|
"grad_norm": 0.5151895053958203, |
|
"learning_rate": 1.2659202329680515e-05, |
|
"loss": 0.6223, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.4215176715176714, |
|
"grad_norm": 0.5220435941255479, |
|
"learning_rate": 1.2630036577313667e-05, |
|
"loss": 0.6273, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 1.4241164241164241, |
|
"grad_norm": 0.5541408035311566, |
|
"learning_rate": 1.2600846770392155e-05, |
|
"loss": 0.6115, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.4267151767151767, |
|
"grad_norm": 0.5183266262374772, |
|
"learning_rate": 1.2571633175888618e-05, |
|
"loss": 0.6098, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 1.4293139293139294, |
|
"grad_norm": 0.5467945168613629, |
|
"learning_rate": 1.2542396060993256e-05, |
|
"loss": 0.6129, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.431912681912682, |
|
"grad_norm": 0.5408402850999704, |
|
"learning_rate": 1.2513135693111399e-05, |
|
"loss": 0.6113, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 1.4345114345114345, |
|
"grad_norm": 0.5481669387572653, |
|
"learning_rate": 1.2483852339861033e-05, |
|
"loss": 0.6032, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.437110187110187, |
|
"grad_norm": 0.5292679934908046, |
|
"learning_rate": 1.2454546269070392e-05, |
|
"loss": 0.6037, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 1.4397089397089398, |
|
"grad_norm": 0.5744822983902161, |
|
"learning_rate": 1.2425217748775464e-05, |
|
"loss": 0.6099, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.4423076923076923, |
|
"grad_norm": 0.5030366381929183, |
|
"learning_rate": 1.239586704721758e-05, |
|
"loss": 0.6067, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.444906444906445, |
|
"grad_norm": 0.5833985268491657, |
|
"learning_rate": 1.2366494432840937e-05, |
|
"loss": 0.6039, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.4475051975051976, |
|
"grad_norm": 0.5747742162047574, |
|
"learning_rate": 1.2337100174290142e-05, |
|
"loss": 0.6101, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 1.45010395010395, |
|
"grad_norm": 0.5356407427398536, |
|
"learning_rate": 1.2307684540407775e-05, |
|
"loss": 0.6055, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.4527027027027026, |
|
"grad_norm": 0.5413902409510034, |
|
"learning_rate": 1.2278247800231901e-05, |
|
"loss": 0.6162, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 1.4553014553014554, |
|
"grad_norm": 0.5361345781691861, |
|
"learning_rate": 1.2248790222993639e-05, |
|
"loss": 0.6132, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.457900207900208, |
|
"grad_norm": 0.48977234406410547, |
|
"learning_rate": 1.221931207811468e-05, |
|
"loss": 0.619, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 1.4604989604989604, |
|
"grad_norm": 0.5539199421254352, |
|
"learning_rate": 1.2189813635204825e-05, |
|
"loss": 0.6034, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.4630977130977132, |
|
"grad_norm": 0.5274980068953669, |
|
"learning_rate": 1.2160295164059529e-05, |
|
"loss": 0.6076, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 1.4656964656964657, |
|
"grad_norm": 0.5081900105077334, |
|
"learning_rate": 1.2130756934657424e-05, |
|
"loss": 0.6097, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.4682952182952183, |
|
"grad_norm": 0.5619754096937638, |
|
"learning_rate": 1.210119921715785e-05, |
|
"loss": 0.6156, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.4708939708939708, |
|
"grad_norm": 0.5058475060346515, |
|
"learning_rate": 1.2071622281898394e-05, |
|
"loss": 0.6119, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.4734927234927235, |
|
"grad_norm": 0.528937107568451, |
|
"learning_rate": 1.2042026399392403e-05, |
|
"loss": 0.6034, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 1.476091476091476, |
|
"grad_norm": 0.5585222059699902, |
|
"learning_rate": 1.2012411840326524e-05, |
|
"loss": 0.6122, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.4786902286902288, |
|
"grad_norm": 0.5474471042332577, |
|
"learning_rate": 1.1982778875558215e-05, |
|
"loss": 0.5978, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 1.4812889812889813, |
|
"grad_norm": 0.5637920526811849, |
|
"learning_rate": 1.1953127776113279e-05, |
|
"loss": 0.6097, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.4838877338877339, |
|
"grad_norm": 0.5153160827226365, |
|
"learning_rate": 1.192345881318338e-05, |
|
"loss": 0.6065, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 1.4864864864864864, |
|
"grad_norm": 0.5089185825931368, |
|
"learning_rate": 1.1893772258123554e-05, |
|
"loss": 0.5955, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.4890852390852392, |
|
"grad_norm": 0.5284121779832783, |
|
"learning_rate": 1.1864068382449756e-05, |
|
"loss": 0.6088, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 1.4916839916839917, |
|
"grad_norm": 0.5231059878227796, |
|
"learning_rate": 1.1834347457836337e-05, |
|
"loss": 0.5976, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.4942827442827442, |
|
"grad_norm": 0.5517740731632155, |
|
"learning_rate": 1.180460975611359e-05, |
|
"loss": 0.613, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.496881496881497, |
|
"grad_norm": 0.46831838517285146, |
|
"learning_rate": 1.1774855549265245e-05, |
|
"loss": 0.6053, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.4994802494802495, |
|
"grad_norm": 0.527557394883835, |
|
"learning_rate": 1.1745085109426002e-05, |
|
"loss": 0.6174, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 1.502079002079002, |
|
"grad_norm": 0.5200048942038921, |
|
"learning_rate": 1.171529870887902e-05, |
|
"loss": 0.6066, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.5046777546777546, |
|
"grad_norm": 0.5460408265611407, |
|
"learning_rate": 1.1685496620053434e-05, |
|
"loss": 0.6122, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 1.5072765072765073, |
|
"grad_norm": 0.5171487101859985, |
|
"learning_rate": 1.165567911552187e-05, |
|
"loss": 0.607, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.5098752598752598, |
|
"grad_norm": 0.5082429135678129, |
|
"learning_rate": 1.1625846467997952e-05, |
|
"loss": 0.6118, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 1.5124740124740126, |
|
"grad_norm": 0.536744119246903, |
|
"learning_rate": 1.1595998950333794e-05, |
|
"loss": 0.6228, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.5150727650727651, |
|
"grad_norm": 0.5540864582315153, |
|
"learning_rate": 1.1566136835517518e-05, |
|
"loss": 0.6085, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 1.5176715176715176, |
|
"grad_norm": 0.5480519199954694, |
|
"learning_rate": 1.1536260396670753e-05, |
|
"loss": 0.6038, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.5202702702702702, |
|
"grad_norm": 0.5320678068411181, |
|
"learning_rate": 1.1506369907046135e-05, |
|
"loss": 0.6027, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.5228690228690227, |
|
"grad_norm": 0.5559206845902772, |
|
"learning_rate": 1.1476465640024814e-05, |
|
"loss": 0.6082, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.5254677754677755, |
|
"grad_norm": 0.5919814949422626, |
|
"learning_rate": 1.1446547869113944e-05, |
|
"loss": 0.5897, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 1.5280665280665282, |
|
"grad_norm": 0.5327268055659626, |
|
"learning_rate": 1.1416616867944192e-05, |
|
"loss": 0.611, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.5306652806652807, |
|
"grad_norm": 0.4971186426325191, |
|
"learning_rate": 1.1386672910267225e-05, |
|
"loss": 0.6101, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 1.5332640332640333, |
|
"grad_norm": 0.5640128227568957, |
|
"learning_rate": 1.1356716269953213e-05, |
|
"loss": 0.6199, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.5358627858627858, |
|
"grad_norm": 0.5179662541283063, |
|
"learning_rate": 1.1326747220988327e-05, |
|
"loss": 0.6202, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 0.6423145905392057, |
|
"learning_rate": 1.1296766037472223e-05, |
|
"loss": 0.6144, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.541060291060291, |
|
"grad_norm": 0.5256505864598588, |
|
"learning_rate": 1.1266772993615543e-05, |
|
"loss": 0.6066, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 1.5436590436590436, |
|
"grad_norm": 0.5209882272221003, |
|
"learning_rate": 1.1236768363737408e-05, |
|
"loss": 0.613, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.5462577962577964, |
|
"grad_norm": 0.5139682181751073, |
|
"learning_rate": 1.120675242226289e-05, |
|
"loss": 0.6195, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.5488565488565489, |
|
"grad_norm": 0.5285679185697464, |
|
"learning_rate": 1.1176725443720545e-05, |
|
"loss": 0.6074, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.5514553014553014, |
|
"grad_norm": 0.5176763822468469, |
|
"learning_rate": 1.1146687702739855e-05, |
|
"loss": 0.6225, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 1.554054054054054, |
|
"grad_norm": 0.5346252383786081, |
|
"learning_rate": 1.1116639474048741e-05, |
|
"loss": 0.5955, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.5566528066528067, |
|
"grad_norm": 0.5246377509399082, |
|
"learning_rate": 1.108658103247104e-05, |
|
"loss": 0.6075, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 1.5592515592515592, |
|
"grad_norm": 0.5852349160305579, |
|
"learning_rate": 1.1056512652924014e-05, |
|
"loss": 0.6102, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.561850311850312, |
|
"grad_norm": 0.5540954218703817, |
|
"learning_rate": 1.1026434610415804e-05, |
|
"loss": 0.6073, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 1.5644490644490645, |
|
"grad_norm": 0.516164831755444, |
|
"learning_rate": 1.099634718004293e-05, |
|
"loss": 0.6144, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.567047817047817, |
|
"grad_norm": 0.5238437043105261, |
|
"learning_rate": 1.0966250636987776e-05, |
|
"loss": 0.61, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 1.5696465696465696, |
|
"grad_norm": 0.5499703346154395, |
|
"learning_rate": 1.093614525651608e-05, |
|
"loss": 0.6, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.572245322245322, |
|
"grad_norm": 0.5392038397492541, |
|
"learning_rate": 1.0906031313974392e-05, |
|
"loss": 0.6004, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.5748440748440748, |
|
"grad_norm": 0.5440366683585401, |
|
"learning_rate": 1.0875909084787586e-05, |
|
"loss": 0.6079, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.5774428274428276, |
|
"grad_norm": 0.5280604613144251, |
|
"learning_rate": 1.0845778844456319e-05, |
|
"loss": 0.6028, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 1.5800415800415801, |
|
"grad_norm": 0.5130988979787711, |
|
"learning_rate": 1.0815640868554518e-05, |
|
"loss": 0.6255, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.5826403326403327, |
|
"grad_norm": 0.5347614455862642, |
|
"learning_rate": 1.0785495432726864e-05, |
|
"loss": 0.6144, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 1.5852390852390852, |
|
"grad_norm": 0.5540466808635207, |
|
"learning_rate": 1.0755342812686264e-05, |
|
"loss": 0.618, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.5878378378378377, |
|
"grad_norm": 0.5158267468916651, |
|
"learning_rate": 1.0725183284211335e-05, |
|
"loss": 0.6054, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 1.5904365904365905, |
|
"grad_norm": 0.5235550308126831, |
|
"learning_rate": 1.0695017123143881e-05, |
|
"loss": 0.6113, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.593035343035343, |
|
"grad_norm": 0.49676274074318394, |
|
"learning_rate": 1.0664844605386357e-05, |
|
"loss": 0.6066, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 1.5956340956340958, |
|
"grad_norm": 0.5091724259037824, |
|
"learning_rate": 1.0634666006899375e-05, |
|
"loss": 0.6059, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.5982328482328483, |
|
"grad_norm": 0.5308888501073562, |
|
"learning_rate": 1.0604481603699146e-05, |
|
"loss": 0.6077, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.6008316008316008, |
|
"grad_norm": 0.5668118121411413, |
|
"learning_rate": 1.0574291671854979e-05, |
|
"loss": 0.6119, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.6034303534303533, |
|
"grad_norm": 0.5232440524467463, |
|
"learning_rate": 1.054409648748675e-05, |
|
"loss": 0.6132, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 1.6060291060291059, |
|
"grad_norm": 0.5326956732038823, |
|
"learning_rate": 1.0513896326762363e-05, |
|
"loss": 0.5957, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.6086278586278586, |
|
"grad_norm": 0.5376136523378364, |
|
"learning_rate": 1.0483691465895256e-05, |
|
"loss": 0.5963, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 1.6112266112266114, |
|
"grad_norm": 0.5590406644575509, |
|
"learning_rate": 1.0453482181141838e-05, |
|
"loss": 0.6114, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.613825363825364, |
|
"grad_norm": 0.5348933441437478, |
|
"learning_rate": 1.0423268748798992e-05, |
|
"loss": 0.626, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 1.6164241164241164, |
|
"grad_norm": 0.5587808171684693, |
|
"learning_rate": 1.0393051445201518e-05, |
|
"loss": 0.6035, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.619022869022869, |
|
"grad_norm": 0.5217308721418593, |
|
"learning_rate": 1.0362830546719644e-05, |
|
"loss": 0.6007, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 1.6216216216216215, |
|
"grad_norm": 0.5331440823163403, |
|
"learning_rate": 1.0332606329756463e-05, |
|
"loss": 0.6103, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.6242203742203742, |
|
"grad_norm": 0.5354516402513061, |
|
"learning_rate": 1.030237907074542e-05, |
|
"loss": 0.6021, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.6268191268191268, |
|
"grad_norm": 0.5301206175827867, |
|
"learning_rate": 1.0272149046147788e-05, |
|
"loss": 0.6032, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.6294178794178795, |
|
"grad_norm": 0.5364702146724981, |
|
"learning_rate": 1.0241916532450133e-05, |
|
"loss": 0.6107, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 1.632016632016632, |
|
"grad_norm": 0.5030704592075379, |
|
"learning_rate": 1.0211681806161787e-05, |
|
"loss": 0.5984, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.6346153846153846, |
|
"grad_norm": 0.5001028568491547, |
|
"learning_rate": 1.0181445143812312e-05, |
|
"loss": 0.6011, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 1.637214137214137, |
|
"grad_norm": 0.5537298706648461, |
|
"learning_rate": 1.0151206821948985e-05, |
|
"loss": 0.6348, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.6398128898128899, |
|
"grad_norm": 0.5499538795880998, |
|
"learning_rate": 1.0120967117134262e-05, |
|
"loss": 0.6163, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 1.6424116424116424, |
|
"grad_norm": 0.4944029513235786, |
|
"learning_rate": 1.009072630594324e-05, |
|
"loss": 0.5997, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.6450103950103951, |
|
"grad_norm": 0.5560023248781629, |
|
"learning_rate": 1.0060484664961136e-05, |
|
"loss": 0.6066, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 1.6476091476091477, |
|
"grad_norm": 0.5228794932020453, |
|
"learning_rate": 1.0030242470780769e-05, |
|
"loss": 0.6049, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.6502079002079002, |
|
"grad_norm": 0.5251096124443742, |
|
"learning_rate": 1e-05, |
|
"loss": 0.617, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.6528066528066527, |
|
"grad_norm": 0.504719489023802, |
|
"learning_rate": 9.969757529219236e-06, |
|
"loss": 0.611, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.6554054054054053, |
|
"grad_norm": 0.5164130013232197, |
|
"learning_rate": 9.939515335038866e-06, |
|
"loss": 0.6071, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 1.658004158004158, |
|
"grad_norm": 0.503984804974549, |
|
"learning_rate": 9.909273694056765e-06, |
|
"loss": 0.6098, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.6606029106029108, |
|
"grad_norm": 0.5318145254626715, |
|
"learning_rate": 9.879032882865745e-06, |
|
"loss": 0.6046, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 1.6632016632016633, |
|
"grad_norm": 0.49979486457828537, |
|
"learning_rate": 9.848793178051017e-06, |
|
"loss": 0.5942, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.6658004158004158, |
|
"grad_norm": 0.5222561724594693, |
|
"learning_rate": 9.818554856187692e-06, |
|
"loss": 0.6102, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 1.6683991683991684, |
|
"grad_norm": 0.5119064608955575, |
|
"learning_rate": 9.788318193838218e-06, |
|
"loss": 0.6063, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.6709979209979209, |
|
"grad_norm": 0.49188265798150393, |
|
"learning_rate": 9.758083467549868e-06, |
|
"loss": 0.6007, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 1.6735966735966736, |
|
"grad_norm": 0.5307992559310489, |
|
"learning_rate": 9.727850953852217e-06, |
|
"loss": 0.6037, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.6761954261954262, |
|
"grad_norm": 0.5456235977768752, |
|
"learning_rate": 9.697620929254584e-06, |
|
"loss": 0.6244, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.678794178794179, |
|
"grad_norm": 0.5088649958340964, |
|
"learning_rate": 9.66739367024354e-06, |
|
"loss": 0.6042, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.6813929313929314, |
|
"grad_norm": 0.4953639561715028, |
|
"learning_rate": 9.63716945328036e-06, |
|
"loss": 0.5938, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 1.683991683991684, |
|
"grad_norm": 0.49505908823955036, |
|
"learning_rate": 9.606948554798482e-06, |
|
"loss": 0.6144, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.6865904365904365, |
|
"grad_norm": 0.5175987592879167, |
|
"learning_rate": 9.57673125120101e-06, |
|
"loss": 0.6098, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 1.689189189189189, |
|
"grad_norm": 0.5388656862756696, |
|
"learning_rate": 9.546517818858164e-06, |
|
"loss": 0.6171, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.6917879417879418, |
|
"grad_norm": 0.5153249162580613, |
|
"learning_rate": 9.516308534104744e-06, |
|
"loss": 0.5923, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 1.6943866943866945, |
|
"grad_norm": 0.5185938318204056, |
|
"learning_rate": 9.486103673237638e-06, |
|
"loss": 0.589, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.696985446985447, |
|
"grad_norm": 0.5735162818769731, |
|
"learning_rate": 9.455903512513257e-06, |
|
"loss": 0.6199, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 1.6995841995841996, |
|
"grad_norm": 0.5550865974556703, |
|
"learning_rate": 9.425708328145023e-06, |
|
"loss": 0.603, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.7021829521829521, |
|
"grad_norm": 0.5656039764802955, |
|
"learning_rate": 9.395518396300857e-06, |
|
"loss": 0.6036, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.7047817047817047, |
|
"grad_norm": 0.5356649490240522, |
|
"learning_rate": 9.365333993100628e-06, |
|
"loss": 0.5951, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.7073804573804574, |
|
"grad_norm": 0.5715455882322491, |
|
"learning_rate": 9.335155394613641e-06, |
|
"loss": 0.5989, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 1.70997920997921, |
|
"grad_norm": 0.5497264191896297, |
|
"learning_rate": 9.304982876856124e-06, |
|
"loss": 0.6058, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.7125779625779627, |
|
"grad_norm": 0.5051026018528313, |
|
"learning_rate": 9.274816715788668e-06, |
|
"loss": 0.5969, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 1.7151767151767152, |
|
"grad_norm": 0.5204788821196659, |
|
"learning_rate": 9.244657187313739e-06, |
|
"loss": 0.611, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.7177754677754677, |
|
"grad_norm": 0.48242559624890763, |
|
"learning_rate": 9.214504567273139e-06, |
|
"loss": 0.5893, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 1.7203742203742203, |
|
"grad_norm": 0.5475293749782204, |
|
"learning_rate": 9.184359131445487e-06, |
|
"loss": 0.6128, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.722972972972973, |
|
"grad_norm": 0.5227512974575209, |
|
"learning_rate": 9.154221155543684e-06, |
|
"loss": 0.5942, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 1.7255717255717256, |
|
"grad_norm": 0.517359580415827, |
|
"learning_rate": 9.124090915212415e-06, |
|
"loss": 0.5995, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.7281704781704783, |
|
"grad_norm": 0.5305121769843365, |
|
"learning_rate": 9.093968686025612e-06, |
|
"loss": 0.618, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.7307692307692308, |
|
"grad_norm": 0.5312713830882955, |
|
"learning_rate": 9.063854743483924e-06, |
|
"loss": 0.5929, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.7333679833679834, |
|
"grad_norm": 0.5159635712284465, |
|
"learning_rate": 9.033749363012228e-06, |
|
"loss": 0.5942, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 1.735966735966736, |
|
"grad_norm": 0.5304955336277648, |
|
"learning_rate": 9.003652819957073e-06, |
|
"loss": 0.5955, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.7385654885654884, |
|
"grad_norm": 0.5306004926529849, |
|
"learning_rate": 8.973565389584199e-06, |
|
"loss": 0.6157, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 1.7411642411642412, |
|
"grad_norm": 0.5287399261745209, |
|
"learning_rate": 8.943487347075988e-06, |
|
"loss": 0.5867, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.743762993762994, |
|
"grad_norm": 0.5230774773864855, |
|
"learning_rate": 8.91341896752896e-06, |
|
"loss": 0.5894, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 1.7463617463617465, |
|
"grad_norm": 0.5155471622168707, |
|
"learning_rate": 8.883360525951264e-06, |
|
"loss": 0.5958, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.748960498960499, |
|
"grad_norm": 0.5133214239778116, |
|
"learning_rate": 8.85331229726015e-06, |
|
"loss": 0.5935, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 1.7515592515592515, |
|
"grad_norm": 0.5319975207166266, |
|
"learning_rate": 8.823274556279455e-06, |
|
"loss": 0.5934, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.754158004158004, |
|
"grad_norm": 0.5364931909740585, |
|
"learning_rate": 8.793247577737112e-06, |
|
"loss": 0.6055, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.7567567567567568, |
|
"grad_norm": 0.5093682789742844, |
|
"learning_rate": 8.763231636262599e-06, |
|
"loss": 0.5904, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.7593555093555093, |
|
"grad_norm": 0.49218365344373355, |
|
"learning_rate": 8.733227006384459e-06, |
|
"loss": 0.6045, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 1.761954261954262, |
|
"grad_norm": 0.5463702062588134, |
|
"learning_rate": 8.703233962527779e-06, |
|
"loss": 0.6039, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.7645530145530146, |
|
"grad_norm": 0.5102092525737645, |
|
"learning_rate": 8.673252779011676e-06, |
|
"loss": 0.5887, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 1.7671517671517671, |
|
"grad_norm": 0.5268210778389424, |
|
"learning_rate": 8.643283730046788e-06, |
|
"loss": 0.5983, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.7697505197505197, |
|
"grad_norm": 0.5098708018226924, |
|
"learning_rate": 8.61332708973278e-06, |
|
"loss": 0.6043, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 1.7723492723492722, |
|
"grad_norm": 0.48835524185569673, |
|
"learning_rate": 8.583383132055814e-06, |
|
"loss": 0.6107, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.774948024948025, |
|
"grad_norm": 0.5701236303096751, |
|
"learning_rate": 8.55345213088606e-06, |
|
"loss": 0.6033, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 1.7775467775467777, |
|
"grad_norm": 0.5137867247566509, |
|
"learning_rate": 8.52353435997519e-06, |
|
"loss": 0.5988, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.7801455301455302, |
|
"grad_norm": 0.5185967787599991, |
|
"learning_rate": 8.49363009295387e-06, |
|
"loss": 0.6027, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.7827442827442828, |
|
"grad_norm": 0.5232087879326293, |
|
"learning_rate": 8.46373960332925e-06, |
|
"loss": 0.5958, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.7853430353430353, |
|
"grad_norm": 0.5227750785275999, |
|
"learning_rate": 8.433863164482485e-06, |
|
"loss": 0.6087, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 1.7879417879417878, |
|
"grad_norm": 0.4796440456103048, |
|
"learning_rate": 8.404001049666211e-06, |
|
"loss": 0.5961, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.7905405405405406, |
|
"grad_norm": 0.5114161067261779, |
|
"learning_rate": 8.37415353200205e-06, |
|
"loss": 0.5975, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 1.793139293139293, |
|
"grad_norm": 0.5368539216036579, |
|
"learning_rate": 8.344320884478133e-06, |
|
"loss": 0.5995, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.7957380457380459, |
|
"grad_norm": 0.5251230847938383, |
|
"learning_rate": 8.314503379946569e-06, |
|
"loss": 0.5924, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 1.7983367983367984, |
|
"grad_norm": 0.5125606084891738, |
|
"learning_rate": 8.284701291120984e-06, |
|
"loss": 0.59, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.800935550935551, |
|
"grad_norm": 0.5082724750112706, |
|
"learning_rate": 8.254914890574001e-06, |
|
"loss": 0.5783, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 1.8035343035343034, |
|
"grad_norm": 0.5857171673424286, |
|
"learning_rate": 8.225144450734755e-06, |
|
"loss": 0.6159, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.806133056133056, |
|
"grad_norm": 0.5189085809502059, |
|
"learning_rate": 8.195390243886414e-06, |
|
"loss": 0.5876, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.8087318087318087, |
|
"grad_norm": 0.5054176942242024, |
|
"learning_rate": 8.165652542163668e-06, |
|
"loss": 0.6018, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.8113305613305615, |
|
"grad_norm": 0.5245871555142563, |
|
"learning_rate": 8.135931617550245e-06, |
|
"loss": 0.607, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 1.813929313929314, |
|
"grad_norm": 0.5240802764153503, |
|
"learning_rate": 8.106227741876447e-06, |
|
"loss": 0.6074, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.8165280665280665, |
|
"grad_norm": 0.5431345881991243, |
|
"learning_rate": 8.076541186816625e-06, |
|
"loss": 0.6002, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 1.819126819126819, |
|
"grad_norm": 0.5192080223913004, |
|
"learning_rate": 8.046872223886723e-06, |
|
"loss": 0.6039, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.8217255717255716, |
|
"grad_norm": 0.5377132118040553, |
|
"learning_rate": 8.017221124441787e-06, |
|
"loss": 0.5866, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 1.8243243243243243, |
|
"grad_norm": 0.4848076231447858, |
|
"learning_rate": 7.98758815967348e-06, |
|
"loss": 0.5926, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.8269230769230769, |
|
"grad_norm": 0.49613490454069115, |
|
"learning_rate": 7.957973600607597e-06, |
|
"loss": 0.6029, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 1.8295218295218296, |
|
"grad_norm": 0.49532299518482037, |
|
"learning_rate": 7.92837771810161e-06, |
|
"loss": 0.5893, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.8321205821205822, |
|
"grad_norm": 0.5240393625504302, |
|
"learning_rate": 7.898800782842153e-06, |
|
"loss": 0.6044, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.8347193347193347, |
|
"grad_norm": 0.5378508353167911, |
|
"learning_rate": 7.86924306534258e-06, |
|
"loss": 0.5892, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.8373180873180872, |
|
"grad_norm": 0.5199976481026775, |
|
"learning_rate": 7.839704835940473e-06, |
|
"loss": 0.5982, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 1.83991683991684, |
|
"grad_norm": 0.529729165924642, |
|
"learning_rate": 7.81018636479518e-06, |
|
"loss": 0.6012, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.8425155925155925, |
|
"grad_norm": 0.4916121082024032, |
|
"learning_rate": 7.780687921885324e-06, |
|
"loss": 0.5977, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 1.8451143451143452, |
|
"grad_norm": 0.5575500880550704, |
|
"learning_rate": 7.751209777006363e-06, |
|
"loss": 0.6003, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.8477130977130978, |
|
"grad_norm": 0.5326362594297853, |
|
"learning_rate": 7.7217521997681e-06, |
|
"loss": 0.6039, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 1.8503118503118503, |
|
"grad_norm": 0.6375012050960875, |
|
"learning_rate": 7.69231545959223e-06, |
|
"loss": 0.5903, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.8529106029106028, |
|
"grad_norm": 0.5127398317219315, |
|
"learning_rate": 7.66289982570986e-06, |
|
"loss": 0.6127, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 1.8555093555093554, |
|
"grad_norm": 0.5337342117702417, |
|
"learning_rate": 7.633505567159068e-06, |
|
"loss": 0.6106, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.8581081081081081, |
|
"grad_norm": 0.5127146513499672, |
|
"learning_rate": 7.604132952782421e-06, |
|
"loss": 0.593, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.8607068607068609, |
|
"grad_norm": 0.5475181259322507, |
|
"learning_rate": 7.574782251224541e-06, |
|
"loss": 0.6087, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.8633056133056134, |
|
"grad_norm": 0.5057492212324644, |
|
"learning_rate": 7.545453730929612e-06, |
|
"loss": 0.5961, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 1.865904365904366, |
|
"grad_norm": 0.5313703879609416, |
|
"learning_rate": 7.516147660138968e-06, |
|
"loss": 0.5826, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.8685031185031185, |
|
"grad_norm": 0.5198552151529012, |
|
"learning_rate": 7.486864306888608e-06, |
|
"loss": 0.6015, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 1.871101871101871, |
|
"grad_norm": 0.5410281063432927, |
|
"learning_rate": 7.457603939006745e-06, |
|
"loss": 0.6033, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.8737006237006237, |
|
"grad_norm": 0.5557013181252524, |
|
"learning_rate": 7.428366824111386e-06, |
|
"loss": 0.5902, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 1.8762993762993763, |
|
"grad_norm": 0.5633083487264265, |
|
"learning_rate": 7.399153229607849e-06, |
|
"loss": 0.6018, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.878898128898129, |
|
"grad_norm": 0.48595216285541615, |
|
"learning_rate": 7.369963422686335e-06, |
|
"loss": 0.594, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 1.8814968814968815, |
|
"grad_norm": 0.5169705729740565, |
|
"learning_rate": 7.340797670319488e-06, |
|
"loss": 0.5899, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.884095634095634, |
|
"grad_norm": 0.4970606877334214, |
|
"learning_rate": 7.311656239259934e-06, |
|
"loss": 0.6148, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.8866943866943866, |
|
"grad_norm": 0.5178433096469348, |
|
"learning_rate": 7.282539396037868e-06, |
|
"loss": 0.59, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.8892931392931391, |
|
"grad_norm": 0.5085741805913727, |
|
"learning_rate": 7.253447406958598e-06, |
|
"loss": 0.5969, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 1.8918918918918919, |
|
"grad_norm": 0.5339633821078309, |
|
"learning_rate": 7.2243805381001084e-06, |
|
"loss": 0.6013, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.8944906444906446, |
|
"grad_norm": 0.5142299591444427, |
|
"learning_rate": 7.195339055310635e-06, |
|
"loss": 0.605, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 1.8970893970893972, |
|
"grad_norm": 0.53012102257086, |
|
"learning_rate": 7.166323224206236e-06, |
|
"loss": 0.5934, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.8996881496881497, |
|
"grad_norm": 0.5180950494011575, |
|
"learning_rate": 7.13733331016835e-06, |
|
"loss": 0.5967, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 1.9022869022869022, |
|
"grad_norm": 0.5183177508817899, |
|
"learning_rate": 7.108369578341372e-06, |
|
"loss": 0.5823, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.9048856548856548, |
|
"grad_norm": 0.5070875844600755, |
|
"learning_rate": 7.079432293630244e-06, |
|
"loss": 0.5956, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 1.9074844074844075, |
|
"grad_norm": 0.6274267217296448, |
|
"learning_rate": 7.050521720698009e-06, |
|
"loss": 0.6114, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.91008316008316, |
|
"grad_norm": 0.5602574620780145, |
|
"learning_rate": 7.021638123963415e-06, |
|
"loss": 0.586, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.9126819126819128, |
|
"grad_norm": 0.517425035346988, |
|
"learning_rate": 6.992781767598467e-06, |
|
"loss": 0.5937, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.9152806652806653, |
|
"grad_norm": 0.49392256995221356, |
|
"learning_rate": 6.9639529155260355e-06, |
|
"loss": 0.5893, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 1.9178794178794178, |
|
"grad_norm": 0.5221453351667464, |
|
"learning_rate": 6.935151831417442e-06, |
|
"loss": 0.5921, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.9204781704781704, |
|
"grad_norm": 0.5437297366337159, |
|
"learning_rate": 6.906378778690023e-06, |
|
"loss": 0.5941, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 1.9230769230769231, |
|
"grad_norm": 0.5526585356985603, |
|
"learning_rate": 6.8776340205047446e-06, |
|
"loss": 0.5879, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.9256756756756757, |
|
"grad_norm": 0.47075169096755787, |
|
"learning_rate": 6.848917819763794e-06, |
|
"loss": 0.587, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 1.9282744282744284, |
|
"grad_norm": 0.5318393319444799, |
|
"learning_rate": 6.8202304391081665e-06, |
|
"loss": 0.5961, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.930873180873181, |
|
"grad_norm": 0.5429244483259561, |
|
"learning_rate": 6.791572140915258e-06, |
|
"loss": 0.5972, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 1.9334719334719335, |
|
"grad_norm": 0.7824999181116893, |
|
"learning_rate": 6.762943187296487e-06, |
|
"loss": 0.6025, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.936070686070686, |
|
"grad_norm": 0.5506609069987528, |
|
"learning_rate": 6.734343840094877e-06, |
|
"loss": 0.5935, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.9386694386694385, |
|
"grad_norm": 0.5393169028265578, |
|
"learning_rate": 6.705774360882662e-06, |
|
"loss": 0.5998, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.9412681912681913, |
|
"grad_norm": 0.5196746072745972, |
|
"learning_rate": 6.677235010958916e-06, |
|
"loss": 0.6024, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 1.943866943866944, |
|
"grad_norm": 0.5161195299496159, |
|
"learning_rate": 6.648726051347132e-06, |
|
"loss": 0.5923, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.9464656964656966, |
|
"grad_norm": 0.5385756600061604, |
|
"learning_rate": 6.6202477427928604e-06, |
|
"loss": 0.5936, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 1.949064449064449, |
|
"grad_norm": 0.5068176753311672, |
|
"learning_rate": 6.591800345761313e-06, |
|
"loss": 0.5857, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.9516632016632016, |
|
"grad_norm": 0.5156660031341467, |
|
"learning_rate": 6.563384120434978e-06, |
|
"loss": 0.5998, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 1.9542619542619541, |
|
"grad_norm": 0.5915967761576071, |
|
"learning_rate": 6.5349993267112455e-06, |
|
"loss": 0.5901, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.956860706860707, |
|
"grad_norm": 0.5286753068152813, |
|
"learning_rate": 6.506646224200036e-06, |
|
"loss": 0.606, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 1.9594594594594594, |
|
"grad_norm": 0.5510621595391834, |
|
"learning_rate": 6.4783250722214066e-06, |
|
"loss": 0.5996, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.9620582120582122, |
|
"grad_norm": 0.5050260932024032, |
|
"learning_rate": 6.450036129803205e-06, |
|
"loss": 0.5811, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.9646569646569647, |
|
"grad_norm": 0.5187426414977963, |
|
"learning_rate": 6.42177965567868e-06, |
|
"loss": 0.6012, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.9672557172557172, |
|
"grad_norm": 0.5226860354335852, |
|
"learning_rate": 6.393555908284119e-06, |
|
"loss": 0.6002, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 1.9698544698544698, |
|
"grad_norm": 0.5394751859696948, |
|
"learning_rate": 6.3653651457565005e-06, |
|
"loss": 0.6049, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.9724532224532223, |
|
"grad_norm": 0.5041393147335839, |
|
"learning_rate": 6.337207625931105e-06, |
|
"loss": 0.5995, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 1.975051975051975, |
|
"grad_norm": 0.533267009949286, |
|
"learning_rate": 6.309083606339184e-06, |
|
"loss": 0.5845, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.9776507276507278, |
|
"grad_norm": 0.5376844644903337, |
|
"learning_rate": 6.28099334420559e-06, |
|
"loss": 0.5889, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 1.9802494802494803, |
|
"grad_norm": 0.5424651587833641, |
|
"learning_rate": 6.252937096446422e-06, |
|
"loss": 0.5931, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.9828482328482329, |
|
"grad_norm": 0.5323019777176436, |
|
"learning_rate": 6.224915119666682e-06, |
|
"loss": 0.6001, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 1.9854469854469854, |
|
"grad_norm": 0.5719566751743559, |
|
"learning_rate": 6.196927670157931e-06, |
|
"loss": 0.5969, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.988045738045738, |
|
"grad_norm": 0.5144348819478973, |
|
"learning_rate": 6.168975003895939e-06, |
|
"loss": 0.6027, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.9906444906444907, |
|
"grad_norm": 0.5171213896946363, |
|
"learning_rate": 6.141057376538338e-06, |
|
"loss": 0.5986, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.9932432432432432, |
|
"grad_norm": 0.5178977499722083, |
|
"learning_rate": 6.113175043422301e-06, |
|
"loss": 0.6069, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 1.995841995841996, |
|
"grad_norm": 0.5264478858379251, |
|
"learning_rate": 6.085328259562195e-06, |
|
"loss": 0.5939, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.9984407484407485, |
|
"grad_norm": 0.49088468990078843, |
|
"learning_rate": 6.0575172796472405e-06, |
|
"loss": 0.5899, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.7568330764770508, |
|
"eval_runtime": 106.5795, |
|
"eval_samples_per_second": 77.032, |
|
"eval_steps_per_second": 1.21, |
|
"step": 3848 |
|
}, |
|
{ |
|
"epoch": 2.001039501039501, |
|
"grad_norm": 0.6668925862814008, |
|
"learning_rate": 6.0297423580392055e-06, |
|
"loss": 0.5449, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 2.0036382536382535, |
|
"grad_norm": 0.6496810654128746, |
|
"learning_rate": 6.002003748770055e-06, |
|
"loss": 0.5054, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 2.006237006237006, |
|
"grad_norm": 0.6031011324917133, |
|
"learning_rate": 5.9743017055396424e-06, |
|
"loss": 0.508, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 2.008835758835759, |
|
"grad_norm": 0.6010814236947867, |
|
"learning_rate": 5.9466364817133886e-06, |
|
"loss": 0.5042, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 2.0114345114345116, |
|
"grad_norm": 0.5728886202402685, |
|
"learning_rate": 5.9190083303199505e-06, |
|
"loss": 0.5013, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 2.014033264033264, |
|
"grad_norm": 0.5424146827083851, |
|
"learning_rate": 5.891417504048926e-06, |
|
"loss": 0.5075, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 2.0166320166320166, |
|
"grad_norm": 0.5790413975893587, |
|
"learning_rate": 5.863864255248533e-06, |
|
"loss": 0.5179, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 2.019230769230769, |
|
"grad_norm": 0.5318824862807435, |
|
"learning_rate": 5.836348835923299e-06, |
|
"loss": 0.5068, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 2.0218295218295217, |
|
"grad_norm": 0.550883650215065, |
|
"learning_rate": 5.808871497731758e-06, |
|
"loss": 0.4974, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 2.024428274428274, |
|
"grad_norm": 0.5879882582188948, |
|
"learning_rate": 5.781432491984162e-06, |
|
"loss": 0.5113, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 2.027027027027027, |
|
"grad_norm": 0.5546832686816904, |
|
"learning_rate": 5.754032069640153e-06, |
|
"loss": 0.5063, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.0296257796257797, |
|
"grad_norm": 0.5211604329798696, |
|
"learning_rate": 5.726670481306505e-06, |
|
"loss": 0.5052, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 2.0322245322245323, |
|
"grad_norm": 0.5455878123275217, |
|
"learning_rate": 5.699347977234799e-06, |
|
"loss": 0.5053, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 2.034823284823285, |
|
"grad_norm": 0.5950657410818389, |
|
"learning_rate": 5.672064807319146e-06, |
|
"loss": 0.5152, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 2.0374220374220373, |
|
"grad_norm": 0.5858939065311778, |
|
"learning_rate": 5.644821221093916e-06, |
|
"loss": 0.5059, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 2.04002079002079, |
|
"grad_norm": 0.5347458170039379, |
|
"learning_rate": 5.617617467731438e-06, |
|
"loss": 0.5112, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 2.042619542619543, |
|
"grad_norm": 0.5384308493309783, |
|
"learning_rate": 5.5904537960397155e-06, |
|
"loss": 0.4975, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 2.0452182952182953, |
|
"grad_norm": 0.5730691087208541, |
|
"learning_rate": 5.563330454460179e-06, |
|
"loss": 0.4961, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 2.047817047817048, |
|
"grad_norm": 0.5477717438993087, |
|
"learning_rate": 5.536247691065384e-06, |
|
"loss": 0.5121, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 2.0504158004158004, |
|
"grad_norm": 0.5548351105040114, |
|
"learning_rate": 5.50920575355675e-06, |
|
"loss": 0.5079, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 2.053014553014553, |
|
"grad_norm": 0.5759237090673845, |
|
"learning_rate": 5.482204889262319e-06, |
|
"loss": 0.5093, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 2.0556133056133055, |
|
"grad_norm": 0.5547272581679922, |
|
"learning_rate": 5.455245345134449e-06, |
|
"loss": 0.4965, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 2.0582120582120584, |
|
"grad_norm": 0.5545227019423067, |
|
"learning_rate": 5.428327367747598e-06, |
|
"loss": 0.5056, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 2.060810810810811, |
|
"grad_norm": 0.5475321378756351, |
|
"learning_rate": 5.401451203296049e-06, |
|
"loss": 0.4992, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 2.0634095634095635, |
|
"grad_norm": 0.5571990780758471, |
|
"learning_rate": 5.37461709759165e-06, |
|
"loss": 0.5029, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 2.066008316008316, |
|
"grad_norm": 0.5646750653448925, |
|
"learning_rate": 5.3478252960615794e-06, |
|
"loss": 0.5045, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 2.0686070686070686, |
|
"grad_norm": 0.5748986438531573, |
|
"learning_rate": 5.321076043746108e-06, |
|
"loss": 0.4982, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 2.071205821205821, |
|
"grad_norm": 0.5627371455320099, |
|
"learning_rate": 5.2943695852963325e-06, |
|
"loss": 0.5096, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 2.0738045738045736, |
|
"grad_norm": 0.5535736181815755, |
|
"learning_rate": 5.267706164971966e-06, |
|
"loss": 0.502, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 2.0764033264033266, |
|
"grad_norm": 0.5361674621317485, |
|
"learning_rate": 5.241086026639079e-06, |
|
"loss": 0.5056, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 2.079002079002079, |
|
"grad_norm": 0.5644675385907009, |
|
"learning_rate": 5.214509413767892e-06, |
|
"loss": 0.5142, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.0816008316008316, |
|
"grad_norm": 0.5784423395730652, |
|
"learning_rate": 5.187976569430535e-06, |
|
"loss": 0.5087, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 2.084199584199584, |
|
"grad_norm": 0.592275009867849, |
|
"learning_rate": 5.1614877362988205e-06, |
|
"loss": 0.5027, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 2.0867983367983367, |
|
"grad_norm": 0.5950969421446421, |
|
"learning_rate": 5.1350431566420326e-06, |
|
"loss": 0.5046, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 2.0893970893970892, |
|
"grad_norm": 0.5446288597399254, |
|
"learning_rate": 5.108643072324717e-06, |
|
"loss": 0.5107, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 2.091995841995842, |
|
"grad_norm": 0.5839095060604741, |
|
"learning_rate": 5.082287724804453e-06, |
|
"loss": 0.507, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 2.0945945945945947, |
|
"grad_norm": 0.5801086689129009, |
|
"learning_rate": 5.055977355129653e-06, |
|
"loss": 0.5007, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 2.0971933471933473, |
|
"grad_norm": 0.5394364509572592, |
|
"learning_rate": 5.02971220393736e-06, |
|
"loss": 0.5079, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 2.0997920997921, |
|
"grad_norm": 0.5649634959442216, |
|
"learning_rate": 5.003492511451051e-06, |
|
"loss": 0.5042, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 2.1023908523908523, |
|
"grad_norm": 0.5697263888969452, |
|
"learning_rate": 4.977318517478421e-06, |
|
"loss": 0.5012, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 2.104989604989605, |
|
"grad_norm": 0.6166161420968725, |
|
"learning_rate": 4.951190461409214e-06, |
|
"loss": 0.511, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 2.1075883575883574, |
|
"grad_norm": 0.5624903877149114, |
|
"learning_rate": 4.925108582213013e-06, |
|
"loss": 0.5104, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 2.1101871101871104, |
|
"grad_norm": 0.5731533074752744, |
|
"learning_rate": 4.899073118437063e-06, |
|
"loss": 0.5109, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 2.112785862785863, |
|
"grad_norm": 0.5800809144559984, |
|
"learning_rate": 4.873084308204101e-06, |
|
"loss": 0.4999, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 2.1153846153846154, |
|
"grad_norm": 0.5639967552020521, |
|
"learning_rate": 4.84714238921015e-06, |
|
"loss": 0.4972, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 2.117983367983368, |
|
"grad_norm": 0.5540959676849216, |
|
"learning_rate": 4.821247598722373e-06, |
|
"loss": 0.4887, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 2.1205821205821205, |
|
"grad_norm": 0.5820289593717347, |
|
"learning_rate": 4.7954001735768925e-06, |
|
"loss": 0.4983, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 2.123180873180873, |
|
"grad_norm": 0.5665889825124238, |
|
"learning_rate": 4.7696003501766155e-06, |
|
"loss": 0.4928, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 2.125779625779626, |
|
"grad_norm": 0.5786397074647865, |
|
"learning_rate": 4.7438483644890776e-06, |
|
"loss": 0.509, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 2.1283783783783785, |
|
"grad_norm": 0.5544771675503383, |
|
"learning_rate": 4.718144452044299e-06, |
|
"loss": 0.5088, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 2.130977130977131, |
|
"grad_norm": 0.5842089375560309, |
|
"learning_rate": 4.692488847932601e-06, |
|
"loss": 0.5131, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.1335758835758836, |
|
"grad_norm": 0.5572667236950973, |
|
"learning_rate": 4.666881786802492e-06, |
|
"loss": 0.513, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 2.136174636174636, |
|
"grad_norm": 0.6261912246125306, |
|
"learning_rate": 4.6413235028584804e-06, |
|
"loss": 0.5053, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 2.1387733887733886, |
|
"grad_norm": 0.5702222004267216, |
|
"learning_rate": 4.615814229858969e-06, |
|
"loss": 0.495, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 2.141372141372141, |
|
"grad_norm": 0.5790166866248228, |
|
"learning_rate": 4.590354201114103e-06, |
|
"loss": 0.4973, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 2.143970893970894, |
|
"grad_norm": 0.5603345931162405, |
|
"learning_rate": 4.564943649483625e-06, |
|
"loss": 0.5063, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 2.1465696465696467, |
|
"grad_norm": 0.5569620723069888, |
|
"learning_rate": 4.539582807374756e-06, |
|
"loss": 0.4982, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 2.149168399168399, |
|
"grad_norm": 0.6019143400672264, |
|
"learning_rate": 4.514271906740082e-06, |
|
"loss": 0.5116, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 2.1517671517671517, |
|
"grad_norm": 0.5668221330685952, |
|
"learning_rate": 4.489011179075408e-06, |
|
"loss": 0.4989, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 2.1543659043659042, |
|
"grad_norm": 0.5777522818500115, |
|
"learning_rate": 4.46380085541765e-06, |
|
"loss": 0.4866, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 2.156964656964657, |
|
"grad_norm": 0.5577057043245417, |
|
"learning_rate": 4.438641166342733e-06, |
|
"loss": 0.5048, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 2.1595634095634098, |
|
"grad_norm": 0.5811543313527234, |
|
"learning_rate": 4.413532341963477e-06, |
|
"loss": 0.5024, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 2.1621621621621623, |
|
"grad_norm": 0.5901491166344425, |
|
"learning_rate": 4.388474611927472e-06, |
|
"loss": 0.4985, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 2.164760914760915, |
|
"grad_norm": 0.5537573132486768, |
|
"learning_rate": 4.363468205415014e-06, |
|
"loss": 0.4956, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 2.1673596673596673, |
|
"grad_norm": 0.5660909275231115, |
|
"learning_rate": 4.338513351136977e-06, |
|
"loss": 0.4928, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 2.16995841995842, |
|
"grad_norm": 0.5991376447658537, |
|
"learning_rate": 4.313610277332732e-06, |
|
"loss": 0.499, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 2.1725571725571724, |
|
"grad_norm": 0.6075244421550833, |
|
"learning_rate": 4.288759211768072e-06, |
|
"loss": 0.5033, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 2.1751559251559254, |
|
"grad_norm": 0.5517113456938116, |
|
"learning_rate": 4.263960381733106e-06, |
|
"loss": 0.4951, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 2.177754677754678, |
|
"grad_norm": 0.5677317519142169, |
|
"learning_rate": 4.2392140140401996e-06, |
|
"loss": 0.4978, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 2.1803534303534304, |
|
"grad_norm": 0.569770318980704, |
|
"learning_rate": 4.214520335021896e-06, |
|
"loss": 0.4939, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 2.182952182952183, |
|
"grad_norm": 0.6118856781558967, |
|
"learning_rate": 4.189879570528831e-06, |
|
"loss": 0.5069, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.1855509355509355, |
|
"grad_norm": 0.606019484002795, |
|
"learning_rate": 4.165291945927693e-06, |
|
"loss": 0.5043, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 2.188149688149688, |
|
"grad_norm": 0.5556726937928989, |
|
"learning_rate": 4.140757686099137e-06, |
|
"loss": 0.4868, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 2.1907484407484406, |
|
"grad_norm": 0.5837653560310493, |
|
"learning_rate": 4.116277015435743e-06, |
|
"loss": 0.5015, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 2.1933471933471935, |
|
"grad_norm": 0.588179392816627, |
|
"learning_rate": 4.091850157839963e-06, |
|
"loss": 0.503, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 2.195945945945946, |
|
"grad_norm": 0.5994112141091228, |
|
"learning_rate": 4.067477336722063e-06, |
|
"loss": 0.5124, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 2.1985446985446986, |
|
"grad_norm": 0.6035705377584152, |
|
"learning_rate": 4.043158774998093e-06, |
|
"loss": 0.5089, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 2.201143451143451, |
|
"grad_norm": 0.5688655453887249, |
|
"learning_rate": 4.01889469508784e-06, |
|
"loss": 0.5043, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 2.2037422037422036, |
|
"grad_norm": 0.5917248307889117, |
|
"learning_rate": 3.994685318912794e-06, |
|
"loss": 0.5163, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 2.206340956340956, |
|
"grad_norm": 0.5394197139265716, |
|
"learning_rate": 3.970530867894114e-06, |
|
"loss": 0.5069, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 2.208939708939709, |
|
"grad_norm": 0.576389590721846, |
|
"learning_rate": 3.946431562950624e-06, |
|
"loss": 0.5005, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 2.2115384615384617, |
|
"grad_norm": 0.595056352282733, |
|
"learning_rate": 3.922387624496762e-06, |
|
"loss": 0.5043, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 2.214137214137214, |
|
"grad_norm": 0.5572014241693316, |
|
"learning_rate": 3.89839927244058e-06, |
|
"loss": 0.5074, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 2.2167359667359667, |
|
"grad_norm": 0.5659607082452609, |
|
"learning_rate": 3.87446672618174e-06, |
|
"loss": 0.5078, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 2.2193347193347193, |
|
"grad_norm": 0.5659519225313114, |
|
"learning_rate": 3.850590204609501e-06, |
|
"loss": 0.5042, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 2.221933471933472, |
|
"grad_norm": 0.5520449827126801, |
|
"learning_rate": 3.826769926100699e-06, |
|
"loss": 0.5049, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 2.2245322245322248, |
|
"grad_norm": 0.9464101657600216, |
|
"learning_rate": 3.803006108517786e-06, |
|
"loss": 0.5049, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 2.2271309771309773, |
|
"grad_norm": 0.5678141927512144, |
|
"learning_rate": 3.7792989692068018e-06, |
|
"loss": 0.5035, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 2.22972972972973, |
|
"grad_norm": 0.5706457165389711, |
|
"learning_rate": 3.755648724995404e-06, |
|
"loss": 0.4968, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 2.2323284823284824, |
|
"grad_norm": 0.6171952232229321, |
|
"learning_rate": 3.732055592190893e-06, |
|
"loss": 0.5082, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 2.234927234927235, |
|
"grad_norm": 0.600087841592369, |
|
"learning_rate": 3.7085197865782085e-06, |
|
"loss": 0.5039, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.2375259875259874, |
|
"grad_norm": 0.5625791681960587, |
|
"learning_rate": 3.6850415234179805e-06, |
|
"loss": 0.5041, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 2.24012474012474, |
|
"grad_norm": 0.596631151098558, |
|
"learning_rate": 3.661621017444551e-06, |
|
"loss": 0.5013, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 2.242723492723493, |
|
"grad_norm": 0.5324892002802353, |
|
"learning_rate": 3.638258482863999e-06, |
|
"loss": 0.4958, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 2.2453222453222454, |
|
"grad_norm": 0.5720843807653389, |
|
"learning_rate": 3.6149541333522053e-06, |
|
"loss": 0.4994, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 2.247920997920998, |
|
"grad_norm": 0.5550466004827054, |
|
"learning_rate": 3.5917081820528765e-06, |
|
"loss": 0.5066, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 2.2505197505197505, |
|
"grad_norm": 0.5853024960860177, |
|
"learning_rate": 3.568520841575601e-06, |
|
"loss": 0.4984, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 2.253118503118503, |
|
"grad_norm": 0.5591994418510899, |
|
"learning_rate": 3.5453923239939192e-06, |
|
"loss": 0.5057, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 2.2557172557172556, |
|
"grad_norm": 0.595876343661196, |
|
"learning_rate": 3.5223228408433564e-06, |
|
"loss": 0.4978, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 2.258316008316008, |
|
"grad_norm": 0.5689153116175016, |
|
"learning_rate": 3.499312603119517e-06, |
|
"loss": 0.5045, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 2.260914760914761, |
|
"grad_norm": 0.617131890152672, |
|
"learning_rate": 3.4763618212761376e-06, |
|
"loss": 0.5068, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 2.2635135135135136, |
|
"grad_norm": 0.5756546456394432, |
|
"learning_rate": 3.453470705223162e-06, |
|
"loss": 0.5006, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 2.266112266112266, |
|
"grad_norm": 0.5904214264537652, |
|
"learning_rate": 3.430639464324825e-06, |
|
"loss": 0.509, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 2.2687110187110187, |
|
"grad_norm": 0.5486092941094705, |
|
"learning_rate": 3.407868307397747e-06, |
|
"loss": 0.4956, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 2.271309771309771, |
|
"grad_norm": 0.6341681884960043, |
|
"learning_rate": 3.3851574427090028e-06, |
|
"loss": 0.502, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 2.2739085239085237, |
|
"grad_norm": 0.5816609147620979, |
|
"learning_rate": 3.362507077974234e-06, |
|
"loss": 0.5053, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 2.2765072765072767, |
|
"grad_norm": 0.5992096025731823, |
|
"learning_rate": 3.339917420355746e-06, |
|
"loss": 0.4915, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 2.279106029106029, |
|
"grad_norm": 0.5634477819700985, |
|
"learning_rate": 3.3173886764606133e-06, |
|
"loss": 0.5034, |
|
"step": 4385 |
|
}, |
|
{ |
|
"epoch": 2.2817047817047817, |
|
"grad_norm": 0.5407976728647481, |
|
"learning_rate": 3.2949210523387786e-06, |
|
"loss": 0.4999, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 2.2843035343035343, |
|
"grad_norm": 0.562584471586657, |
|
"learning_rate": 3.2725147534811885e-06, |
|
"loss": 0.502, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 2.286902286902287, |
|
"grad_norm": 0.5561376196303791, |
|
"learning_rate": 3.250169984817897e-06, |
|
"loss": 0.4996, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.2895010395010393, |
|
"grad_norm": 0.5288577740017452, |
|
"learning_rate": 3.2278869507161947e-06, |
|
"loss": 0.4923, |
|
"step": 4405 |
|
}, |
|
{ |
|
"epoch": 2.2920997920997923, |
|
"grad_norm": 0.5564297800059832, |
|
"learning_rate": 3.2056658549787513e-06, |
|
"loss": 0.5004, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 2.294698544698545, |
|
"grad_norm": 0.5807459489768877, |
|
"learning_rate": 3.1835069008417307e-06, |
|
"loss": 0.513, |
|
"step": 4415 |
|
}, |
|
{ |
|
"epoch": 2.2972972972972974, |
|
"grad_norm": 0.5698550653282723, |
|
"learning_rate": 3.1614102909729547e-06, |
|
"loss": 0.5017, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 2.29989604989605, |
|
"grad_norm": 0.5484677379859523, |
|
"learning_rate": 3.139376227470038e-06, |
|
"loss": 0.4948, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 2.3024948024948024, |
|
"grad_norm": 0.6034586450111454, |
|
"learning_rate": 3.1174049118585303e-06, |
|
"loss": 0.5057, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 2.305093555093555, |
|
"grad_norm": 0.6209164341363942, |
|
"learning_rate": 3.0954965450900963e-06, |
|
"loss": 0.5013, |
|
"step": 4435 |
|
}, |
|
{ |
|
"epoch": 2.3076923076923075, |
|
"grad_norm": 0.608022280956626, |
|
"learning_rate": 3.0736513275406565e-06, |
|
"loss": 0.5007, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 2.3102910602910605, |
|
"grad_norm": 0.5822912947800326, |
|
"learning_rate": 3.0518694590085608e-06, |
|
"loss": 0.4878, |
|
"step": 4445 |
|
}, |
|
{ |
|
"epoch": 2.312889812889813, |
|
"grad_norm": 0.6052596335796735, |
|
"learning_rate": 3.0301511387127746e-06, |
|
"loss": 0.5048, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 2.3154885654885655, |
|
"grad_norm": 0.6109257960539891, |
|
"learning_rate": 3.0084965652910314e-06, |
|
"loss": 0.4979, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 2.318087318087318, |
|
"grad_norm": 0.6001612610617809, |
|
"learning_rate": 2.9869059367980402e-06, |
|
"loss": 0.502, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 2.3206860706860706, |
|
"grad_norm": 0.5748427683895482, |
|
"learning_rate": 2.965379450703665e-06, |
|
"loss": 0.4976, |
|
"step": 4465 |
|
}, |
|
{ |
|
"epoch": 2.323284823284823, |
|
"grad_norm": 0.6062682998551074, |
|
"learning_rate": 2.943917303891107e-06, |
|
"loss": 0.51, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 2.3258835758835756, |
|
"grad_norm": 0.5866563557363672, |
|
"learning_rate": 2.92251969265512e-06, |
|
"loss": 0.5063, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 2.3284823284823286, |
|
"grad_norm": 0.5512520483966091, |
|
"learning_rate": 2.9011868127002153e-06, |
|
"loss": 0.4934, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 2.331081081081081, |
|
"grad_norm": 0.5427787073773119, |
|
"learning_rate": 2.879918859138857e-06, |
|
"loss": 0.4909, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 2.3336798336798337, |
|
"grad_norm": 0.5807057669777462, |
|
"learning_rate": 2.8587160264896873e-06, |
|
"loss": 0.4955, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 2.336278586278586, |
|
"grad_norm": 0.6094348111906394, |
|
"learning_rate": 2.8375785086757533e-06, |
|
"loss": 0.5028, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 2.3388773388773387, |
|
"grad_norm": 0.569446343993791, |
|
"learning_rate": 2.8165064990227255e-06, |
|
"loss": 0.4966, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.3414760914760917, |
|
"grad_norm": 0.546949602624272, |
|
"learning_rate": 2.795500190257122e-06, |
|
"loss": 0.5041, |
|
"step": 4505 |
|
}, |
|
{ |
|
"epoch": 2.3440748440748442, |
|
"grad_norm": 0.5841136870299933, |
|
"learning_rate": 2.774559774504566e-06, |
|
"loss": 0.5093, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 2.3466735966735968, |
|
"grad_norm": 0.5894084600218413, |
|
"learning_rate": 2.75368544328801e-06, |
|
"loss": 0.5018, |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 2.3492723492723493, |
|
"grad_norm": 0.5849212705691518, |
|
"learning_rate": 2.7328773875259905e-06, |
|
"loss": 0.4983, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 2.351871101871102, |
|
"grad_norm": 0.5594245456576148, |
|
"learning_rate": 2.7121357975308893e-06, |
|
"loss": 0.5116, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 2.3544698544698544, |
|
"grad_norm": 0.5904437980074254, |
|
"learning_rate": 2.691460863007178e-06, |
|
"loss": 0.5046, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 2.357068607068607, |
|
"grad_norm": 0.6092333364117684, |
|
"learning_rate": 2.670852773049698e-06, |
|
"loss": 0.492, |
|
"step": 4535 |
|
}, |
|
{ |
|
"epoch": 2.35966735966736, |
|
"grad_norm": 0.5406949036065258, |
|
"learning_rate": 2.6503117161419246e-06, |
|
"loss": 0.4966, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 2.3622661122661124, |
|
"grad_norm": 0.6499059905714683, |
|
"learning_rate": 2.6298378801542337e-06, |
|
"loss": 0.4995, |
|
"step": 4545 |
|
}, |
|
{ |
|
"epoch": 2.364864864864865, |
|
"grad_norm": 0.5417621572559367, |
|
"learning_rate": 2.6094314523422035e-06, |
|
"loss": 0.4903, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 2.3674636174636174, |
|
"grad_norm": 0.5832045594170597, |
|
"learning_rate": 2.589092619344885e-06, |
|
"loss": 0.4937, |
|
"step": 4555 |
|
}, |
|
{ |
|
"epoch": 2.37006237006237, |
|
"grad_norm": 0.570494106023411, |
|
"learning_rate": 2.5688215671830975e-06, |
|
"loss": 0.4967, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 2.3726611226611225, |
|
"grad_norm": 0.5563324888807575, |
|
"learning_rate": 2.54861848125774e-06, |
|
"loss": 0.5039, |
|
"step": 4565 |
|
}, |
|
{ |
|
"epoch": 2.375259875259875, |
|
"grad_norm": 0.5891719757564269, |
|
"learning_rate": 2.5284835463480774e-06, |
|
"loss": 0.5009, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 2.377858627858628, |
|
"grad_norm": 0.6000418457824788, |
|
"learning_rate": 2.5084169466100626e-06, |
|
"loss": 0.494, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 2.3804573804573805, |
|
"grad_norm": 0.5612803989317922, |
|
"learning_rate": 2.4884188655746554e-06, |
|
"loss": 0.4974, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 2.383056133056133, |
|
"grad_norm": 0.5574484874125388, |
|
"learning_rate": 2.468489486146125e-06, |
|
"loss": 0.4953, |
|
"step": 4585 |
|
}, |
|
{ |
|
"epoch": 2.3856548856548856, |
|
"grad_norm": 0.550628523258081, |
|
"learning_rate": 2.4486289906003935e-06, |
|
"loss": 0.5182, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 2.388253638253638, |
|
"grad_norm": 0.567017209479145, |
|
"learning_rate": 2.4288375605833726e-06, |
|
"loss": 0.4907, |
|
"step": 4595 |
|
}, |
|
{ |
|
"epoch": 2.390852390852391, |
|
"grad_norm": 0.5474114054711359, |
|
"learning_rate": 2.4091153771092847e-06, |
|
"loss": 0.4976, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.3934511434511436, |
|
"grad_norm": 0.5567614559206484, |
|
"learning_rate": 2.3894626205590177e-06, |
|
"loss": 0.4925, |
|
"step": 4605 |
|
}, |
|
{ |
|
"epoch": 2.396049896049896, |
|
"grad_norm": 0.5620691248378288, |
|
"learning_rate": 2.36987947067848e-06, |
|
"loss": 0.4892, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 2.3986486486486487, |
|
"grad_norm": 0.5471599595016963, |
|
"learning_rate": 2.3503661065769523e-06, |
|
"loss": 0.5006, |
|
"step": 4615 |
|
}, |
|
{ |
|
"epoch": 2.401247401247401, |
|
"grad_norm": 0.5643679588409989, |
|
"learning_rate": 2.330922706725437e-06, |
|
"loss": 0.5052, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 2.4038461538461537, |
|
"grad_norm": 0.5992107723526578, |
|
"learning_rate": 2.3115494489550517e-06, |
|
"loss": 0.4944, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 2.4064449064449063, |
|
"grad_norm": 0.5735681525239322, |
|
"learning_rate": 2.292246510455375e-06, |
|
"loss": 0.5023, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 2.4090436590436592, |
|
"grad_norm": 0.5569413415577497, |
|
"learning_rate": 2.2730140677728485e-06, |
|
"loss": 0.5017, |
|
"step": 4635 |
|
}, |
|
{ |
|
"epoch": 2.4116424116424118, |
|
"grad_norm": 0.5657509769713301, |
|
"learning_rate": 2.253852296809148e-06, |
|
"loss": 0.5018, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 2.4142411642411643, |
|
"grad_norm": 0.561092028484337, |
|
"learning_rate": 2.234761372819577e-06, |
|
"loss": 0.5005, |
|
"step": 4645 |
|
}, |
|
{ |
|
"epoch": 2.416839916839917, |
|
"grad_norm": 0.584135442702734, |
|
"learning_rate": 2.215741470411472e-06, |
|
"loss": 0.495, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 2.4194386694386694, |
|
"grad_norm": 0.5585660724073979, |
|
"learning_rate": 2.196792763542599e-06, |
|
"loss": 0.5045, |
|
"step": 4655 |
|
}, |
|
{ |
|
"epoch": 2.422037422037422, |
|
"grad_norm": 0.5584867361238677, |
|
"learning_rate": 2.1779154255195576e-06, |
|
"loss": 0.5018, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 2.4246361746361744, |
|
"grad_norm": 0.566982522139209, |
|
"learning_rate": 2.1591096289962077e-06, |
|
"loss": 0.4911, |
|
"step": 4665 |
|
}, |
|
{ |
|
"epoch": 2.4272349272349274, |
|
"grad_norm": 0.560220035509712, |
|
"learning_rate": 2.140375545972081e-06, |
|
"loss": 0.5021, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 2.42983367983368, |
|
"grad_norm": 0.5507532159687185, |
|
"learning_rate": 2.121713347790808e-06, |
|
"loss": 0.5036, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 2.4324324324324325, |
|
"grad_norm": 0.5966472596819247, |
|
"learning_rate": 2.1031232051385606e-06, |
|
"loss": 0.4966, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 2.435031185031185, |
|
"grad_norm": 0.5544285219883713, |
|
"learning_rate": 2.0846052880424783e-06, |
|
"loss": 0.501, |
|
"step": 4685 |
|
}, |
|
{ |
|
"epoch": 2.4376299376299375, |
|
"grad_norm": 0.5182057167941686, |
|
"learning_rate": 2.0661597658691226e-06, |
|
"loss": 0.4904, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 2.44022869022869, |
|
"grad_norm": 0.5694712994846337, |
|
"learning_rate": 2.047786807322927e-06, |
|
"loss": 0.4875, |
|
"step": 4695 |
|
}, |
|
{ |
|
"epoch": 2.442827442827443, |
|
"grad_norm": 0.5644710268706207, |
|
"learning_rate": 2.029486580444644e-06, |
|
"loss": 0.4919, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.4454261954261955, |
|
"grad_norm": 0.5709123415197537, |
|
"learning_rate": 2.0112592526098173e-06, |
|
"loss": 0.5087, |
|
"step": 4705 |
|
}, |
|
{ |
|
"epoch": 2.448024948024948, |
|
"grad_norm": 0.5535461175978135, |
|
"learning_rate": 1.993104990527257e-06, |
|
"loss": 0.4921, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 2.4506237006237006, |
|
"grad_norm": 0.6301006821974645, |
|
"learning_rate": 1.975023960237499e-06, |
|
"loss": 0.4885, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 2.453222453222453, |
|
"grad_norm": 0.5494288053608467, |
|
"learning_rate": 1.957016327111294e-06, |
|
"loss": 0.4906, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.4558212058212057, |
|
"grad_norm": 0.5493564158683376, |
|
"learning_rate": 1.9390822558481014e-06, |
|
"loss": 0.4955, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 2.4584199584199586, |
|
"grad_norm": 0.6043307426388902, |
|
"learning_rate": 1.921221910474579e-06, |
|
"loss": 0.5007, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 2.461018711018711, |
|
"grad_norm": 0.5909390472872661, |
|
"learning_rate": 1.9034354543430677e-06, |
|
"loss": 0.5009, |
|
"step": 4735 |
|
}, |
|
{ |
|
"epoch": 2.4636174636174637, |
|
"grad_norm": 0.5601290888435961, |
|
"learning_rate": 1.885723050130127e-06, |
|
"loss": 0.4869, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 2.4662162162162162, |
|
"grad_norm": 0.5671638427007798, |
|
"learning_rate": 1.8680848598350165e-06, |
|
"loss": 0.5002, |
|
"step": 4745 |
|
}, |
|
{ |
|
"epoch": 2.4688149688149688, |
|
"grad_norm": 0.5597631082866084, |
|
"learning_rate": 1.8505210447782418e-06, |
|
"loss": 0.5092, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 2.4714137214137213, |
|
"grad_norm": 0.5611497450799863, |
|
"learning_rate": 1.833031765600054e-06, |
|
"loss": 0.5008, |
|
"step": 4755 |
|
}, |
|
{ |
|
"epoch": 2.474012474012474, |
|
"grad_norm": 0.5601559085266762, |
|
"learning_rate": 1.8156171822589963e-06, |
|
"loss": 0.4887, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 2.476611226611227, |
|
"grad_norm": 0.555263493680061, |
|
"learning_rate": 1.7982774540304404e-06, |
|
"loss": 0.5112, |
|
"step": 4765 |
|
}, |
|
{ |
|
"epoch": 2.4792099792099793, |
|
"grad_norm": 0.5663743347641695, |
|
"learning_rate": 1.781012739505127e-06, |
|
"loss": 0.4907, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 2.481808731808732, |
|
"grad_norm": 0.6155955922535356, |
|
"learning_rate": 1.7638231965877039e-06, |
|
"loss": 0.4836, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 2.4844074844074844, |
|
"grad_norm": 0.5902555495646782, |
|
"learning_rate": 1.7467089824953077e-06, |
|
"loss": 0.5047, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 2.487006237006237, |
|
"grad_norm": 0.5720398120641105, |
|
"learning_rate": 1.7296702537560994e-06, |
|
"loss": 0.5094, |
|
"step": 4785 |
|
}, |
|
{ |
|
"epoch": 2.4896049896049894, |
|
"grad_norm": 0.5593330846808308, |
|
"learning_rate": 1.7127071662078455e-06, |
|
"loss": 0.5121, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 2.492203742203742, |
|
"grad_norm": 0.5807674813382018, |
|
"learning_rate": 1.6958198749964983e-06, |
|
"loss": 0.4888, |
|
"step": 4795 |
|
}, |
|
{ |
|
"epoch": 2.494802494802495, |
|
"grad_norm": 0.5712031491060828, |
|
"learning_rate": 1.679008534574761e-06, |
|
"loss": 0.485, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.4974012474012475, |
|
"grad_norm": 0.5616832705475885, |
|
"learning_rate": 1.6622732987006884e-06, |
|
"loss": 0.5019, |
|
"step": 4805 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 0.5536996546706574, |
|
"learning_rate": 1.6456143204362807e-06, |
|
"loss": 0.4933, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 2.5025987525987525, |
|
"grad_norm": 0.5707614937226522, |
|
"learning_rate": 1.6290317521460697e-06, |
|
"loss": 0.4828, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 2.505197505197505, |
|
"grad_norm": 0.5927994991308208, |
|
"learning_rate": 1.6125257454957365e-06, |
|
"loss": 0.4861, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 2.507796257796258, |
|
"grad_norm": 0.5852679815841081, |
|
"learning_rate": 1.5960964514507316e-06, |
|
"loss": 0.4944, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 2.51039501039501, |
|
"grad_norm": 0.5782206531686512, |
|
"learning_rate": 1.5797440202748748e-06, |
|
"loss": 0.4897, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 2.512993762993763, |
|
"grad_norm": 0.5749262146519877, |
|
"learning_rate": 1.5634686015289925e-06, |
|
"loss": 0.5008, |
|
"step": 4835 |
|
}, |
|
{ |
|
"epoch": 2.5155925155925156, |
|
"grad_norm": 0.5902984761192304, |
|
"learning_rate": 1.5472703440695524e-06, |
|
"loss": 0.4997, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 2.518191268191268, |
|
"grad_norm": 0.5725171354203544, |
|
"learning_rate": 1.5311493960472978e-06, |
|
"loss": 0.4913, |
|
"step": 4845 |
|
}, |
|
{ |
|
"epoch": 2.5207900207900207, |
|
"grad_norm": 0.5489936561056176, |
|
"learning_rate": 1.5151059049058913e-06, |
|
"loss": 0.4965, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 2.523388773388773, |
|
"grad_norm": 0.571188356733997, |
|
"learning_rate": 1.499140017380566e-06, |
|
"loss": 0.4955, |
|
"step": 4855 |
|
}, |
|
{ |
|
"epoch": 2.525987525987526, |
|
"grad_norm": 0.543934249979962, |
|
"learning_rate": 1.4832518794967853e-06, |
|
"loss": 0.498, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 2.5285862785862787, |
|
"grad_norm": 0.5779586017866482, |
|
"learning_rate": 1.4674416365689137e-06, |
|
"loss": 0.5079, |
|
"step": 4865 |
|
}, |
|
{ |
|
"epoch": 2.5311850311850312, |
|
"grad_norm": 0.573790412674796, |
|
"learning_rate": 1.4517094331988734e-06, |
|
"loss": 0.5071, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 2.5337837837837838, |
|
"grad_norm": 0.5834488347165243, |
|
"learning_rate": 1.4360554132748305e-06, |
|
"loss": 0.493, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 2.5363825363825363, |
|
"grad_norm": 0.5521193528499587, |
|
"learning_rate": 1.4204797199698839e-06, |
|
"loss": 0.4893, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 2.538981288981289, |
|
"grad_norm": 0.5837955107251298, |
|
"learning_rate": 1.4049824957407464e-06, |
|
"loss": 0.4998, |
|
"step": 4885 |
|
}, |
|
{ |
|
"epoch": 2.5415800415800414, |
|
"grad_norm": 0.5654955515661542, |
|
"learning_rate": 1.3895638823264447e-06, |
|
"loss": 0.4913, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 2.5441787941787943, |
|
"grad_norm": 0.5743033149419415, |
|
"learning_rate": 1.374224020747027e-06, |
|
"loss": 0.5056, |
|
"step": 4895 |
|
}, |
|
{ |
|
"epoch": 2.546777546777547, |
|
"grad_norm": 0.5855881014618302, |
|
"learning_rate": 1.3589630513022656e-06, |
|
"loss": 0.5028, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.5493762993762994, |
|
"grad_norm": 0.6031010192364838, |
|
"learning_rate": 1.3437811135703792e-06, |
|
"loss": 0.4964, |
|
"step": 4905 |
|
}, |
|
{ |
|
"epoch": 2.551975051975052, |
|
"grad_norm": 0.5409820031001269, |
|
"learning_rate": 1.328678346406761e-06, |
|
"loss": 0.4946, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 2.5545738045738045, |
|
"grad_norm": 0.5667214248558752, |
|
"learning_rate": 1.3136548879426926e-06, |
|
"loss": 0.492, |
|
"step": 4915 |
|
}, |
|
{ |
|
"epoch": 2.5571725571725574, |
|
"grad_norm": 0.5478082998559753, |
|
"learning_rate": 1.2987108755840994e-06, |
|
"loss": 0.4949, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 2.5597713097713095, |
|
"grad_norm": 0.5748275704846928, |
|
"learning_rate": 1.2838464460102862e-06, |
|
"loss": 0.4969, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 2.5623700623700625, |
|
"grad_norm": 0.5561105303734099, |
|
"learning_rate": 1.2690617351726798e-06, |
|
"loss": 0.4967, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 2.564968814968815, |
|
"grad_norm": 0.5847660828596739, |
|
"learning_rate": 1.2543568782935933e-06, |
|
"loss": 0.4893, |
|
"step": 4935 |
|
}, |
|
{ |
|
"epoch": 2.5675675675675675, |
|
"grad_norm": 0.5797822737989639, |
|
"learning_rate": 1.2397320098649957e-06, |
|
"loss": 0.5002, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 2.57016632016632, |
|
"grad_norm": 0.5696211912101424, |
|
"learning_rate": 1.225187263647265e-06, |
|
"loss": 0.5056, |
|
"step": 4945 |
|
}, |
|
{ |
|
"epoch": 2.5727650727650726, |
|
"grad_norm": 0.6105509252737591, |
|
"learning_rate": 1.210722772667977e-06, |
|
"loss": 0.4786, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 2.5753638253638256, |
|
"grad_norm": 0.5710521831184937, |
|
"learning_rate": 1.196338669220689e-06, |
|
"loss": 0.4895, |
|
"step": 4955 |
|
}, |
|
{ |
|
"epoch": 2.577962577962578, |
|
"grad_norm": 0.5601653499624455, |
|
"learning_rate": 1.182035084863724e-06, |
|
"loss": 0.5016, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 2.5805613305613306, |
|
"grad_norm": 0.584343919584128, |
|
"learning_rate": 1.167812150418972e-06, |
|
"loss": 0.5159, |
|
"step": 4965 |
|
}, |
|
{ |
|
"epoch": 2.583160083160083, |
|
"grad_norm": 0.6129296779221889, |
|
"learning_rate": 1.1536699959706898e-06, |
|
"loss": 0.5055, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 2.5857588357588357, |
|
"grad_norm": 0.5844416957330778, |
|
"learning_rate": 1.1396087508643106e-06, |
|
"loss": 0.504, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 2.5883575883575882, |
|
"grad_norm": 0.5750509882184978, |
|
"learning_rate": 1.1256285437052684e-06, |
|
"loss": 0.4925, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 2.5909563409563408, |
|
"grad_norm": 0.5486207493135079, |
|
"learning_rate": 1.1117295023578134e-06, |
|
"loss": 0.5079, |
|
"step": 4985 |
|
}, |
|
{ |
|
"epoch": 2.5935550935550937, |
|
"grad_norm": 0.5751013655385914, |
|
"learning_rate": 1.0979117539438444e-06, |
|
"loss": 0.4925, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 2.5961538461538463, |
|
"grad_norm": 0.5940615821819871, |
|
"learning_rate": 1.0841754248417535e-06, |
|
"loss": 0.5001, |
|
"step": 4995 |
|
}, |
|
{ |
|
"epoch": 2.598752598752599, |
|
"grad_norm": 0.544341637873671, |
|
"learning_rate": 1.0705206406852607e-06, |
|
"loss": 0.5003, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.6013513513513513, |
|
"grad_norm": 0.5688429085624325, |
|
"learning_rate": 1.0569475263622652e-06, |
|
"loss": 0.492, |
|
"step": 5005 |
|
}, |
|
{ |
|
"epoch": 2.603950103950104, |
|
"grad_norm": 0.5898916948570275, |
|
"learning_rate": 1.0434562060137154e-06, |
|
"loss": 0.494, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 2.606548856548857, |
|
"grad_norm": 0.5415866533855809, |
|
"learning_rate": 1.030046803032455e-06, |
|
"loss": 0.4904, |
|
"step": 5015 |
|
}, |
|
{ |
|
"epoch": 2.609147609147609, |
|
"grad_norm": 0.5719708739269925, |
|
"learning_rate": 1.0167194400621072e-06, |
|
"loss": 0.489, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 2.611746361746362, |
|
"grad_norm": 0.5958211082907041, |
|
"learning_rate": 1.003474238995954e-06, |
|
"loss": 0.4957, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 2.6143451143451144, |
|
"grad_norm": 0.5541558159414748, |
|
"learning_rate": 9.903113209758098e-07, |
|
"loss": 0.4993, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 2.616943866943867, |
|
"grad_norm": 0.5872960411024288, |
|
"learning_rate": 9.772308063909263e-07, |
|
"loss": 0.5105, |
|
"step": 5035 |
|
}, |
|
{ |
|
"epoch": 2.6195426195426195, |
|
"grad_norm": 0.5605189291423912, |
|
"learning_rate": 9.642328148768865e-07, |
|
"loss": 0.4963, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 2.622141372141372, |
|
"grad_norm": 0.5607585059207263, |
|
"learning_rate": 9.513174653145052e-07, |
|
"loss": 0.5028, |
|
"step": 5045 |
|
}, |
|
{ |
|
"epoch": 2.624740124740125, |
|
"grad_norm": 0.5534539551699679, |
|
"learning_rate": 9.384848758287469e-07, |
|
"loss": 0.4894, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 2.6273388773388775, |
|
"grad_norm": 0.5668591204471706, |
|
"learning_rate": 9.25735163787651e-07, |
|
"loss": 0.5004, |
|
"step": 5055 |
|
}, |
|
{ |
|
"epoch": 2.62993762993763, |
|
"grad_norm": 0.5428488027300683, |
|
"learning_rate": 9.13068445801244e-07, |
|
"loss": 0.5028, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 2.6325363825363826, |
|
"grad_norm": 0.5730809692935364, |
|
"learning_rate": 9.004848377204878e-07, |
|
"loss": 0.4961, |
|
"step": 5065 |
|
}, |
|
{ |
|
"epoch": 2.635135135135135, |
|
"grad_norm": 0.5287247837670602, |
|
"learning_rate": 8.879844546362093e-07, |
|
"loss": 0.499, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 2.6377338877338876, |
|
"grad_norm": 0.5730710423541429, |
|
"learning_rate": 8.755674108780532e-07, |
|
"loss": 0.4964, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 2.64033264033264, |
|
"grad_norm": 0.5506264010952827, |
|
"learning_rate": 8.632338200134382e-07, |
|
"loss": 0.4936, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 2.642931392931393, |
|
"grad_norm": 0.519210055930055, |
|
"learning_rate": 8.509837948465094e-07, |
|
"loss": 0.49, |
|
"step": 5085 |
|
}, |
|
{ |
|
"epoch": 2.6455301455301456, |
|
"grad_norm": 0.5809039207230658, |
|
"learning_rate": 8.388174474171163e-07, |
|
"loss": 0.5033, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 2.648128898128898, |
|
"grad_norm": 0.561018096449011, |
|
"learning_rate": 8.267348889997839e-07, |
|
"loss": 0.5051, |
|
"step": 5095 |
|
}, |
|
{ |
|
"epoch": 2.6507276507276507, |
|
"grad_norm": 0.5422624927838648, |
|
"learning_rate": 8.14736230102694e-07, |
|
"loss": 0.4864, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.6533264033264032, |
|
"grad_norm": 0.549824075528394, |
|
"learning_rate": 8.028215804666761e-07, |
|
"loss": 0.5027, |
|
"step": 5105 |
|
}, |
|
{ |
|
"epoch": 2.6559251559251558, |
|
"grad_norm": 0.6073556177013598, |
|
"learning_rate": 7.909910490642025e-07, |
|
"loss": 0.4981, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 2.6585239085239083, |
|
"grad_norm": 0.5812550130344551, |
|
"learning_rate": 7.792447440983985e-07, |
|
"loss": 0.504, |
|
"step": 5115 |
|
}, |
|
{ |
|
"epoch": 2.6611226611226613, |
|
"grad_norm": 0.5688133990130678, |
|
"learning_rate": 7.675827730020358e-07, |
|
"loss": 0.5004, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 2.663721413721414, |
|
"grad_norm": 0.5617035595950866, |
|
"learning_rate": 7.560052424365716e-07, |
|
"loss": 0.4923, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 2.6663201663201663, |
|
"grad_norm": 0.5835381005107588, |
|
"learning_rate": 7.445122582911546e-07, |
|
"loss": 0.4989, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 2.668918918918919, |
|
"grad_norm": 0.5681826093882452, |
|
"learning_rate": 7.331039256816664e-07, |
|
"loss": 0.5001, |
|
"step": 5135 |
|
}, |
|
{ |
|
"epoch": 2.6715176715176714, |
|
"grad_norm": 0.5416547579730493, |
|
"learning_rate": 7.217803489497621e-07, |
|
"loss": 0.4915, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 2.6741164241164244, |
|
"grad_norm": 0.5708948503374369, |
|
"learning_rate": 7.10541631661904e-07, |
|
"loss": 0.506, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 2.6767151767151764, |
|
"grad_norm": 0.5825889025850369, |
|
"learning_rate": 6.993878766084295e-07, |
|
"loss": 0.4978, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 2.6793139293139294, |
|
"grad_norm": 0.5658766831235557, |
|
"learning_rate": 6.883191858026006e-07, |
|
"loss": 0.5002, |
|
"step": 5155 |
|
}, |
|
{ |
|
"epoch": 2.681912681912682, |
|
"grad_norm": 0.5560529305298989, |
|
"learning_rate": 6.773356604796744e-07, |
|
"loss": 0.4975, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 2.6845114345114345, |
|
"grad_norm": 0.5708450386103079, |
|
"learning_rate": 6.664374010959739e-07, |
|
"loss": 0.5089, |
|
"step": 5165 |
|
}, |
|
{ |
|
"epoch": 2.687110187110187, |
|
"grad_norm": 0.5562285971352838, |
|
"learning_rate": 6.556245073279777e-07, |
|
"loss": 0.5075, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 2.6897089397089395, |
|
"grad_norm": 0.5721605688382857, |
|
"learning_rate": 6.448970780713948e-07, |
|
"loss": 0.4876, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 2.6923076923076925, |
|
"grad_norm": 0.567860262795361, |
|
"learning_rate": 6.342552114402789e-07, |
|
"loss": 0.4968, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 2.694906444906445, |
|
"grad_norm": 0.5512124172540173, |
|
"learning_rate": 6.236990047661074e-07, |
|
"loss": 0.4971, |
|
"step": 5185 |
|
}, |
|
{ |
|
"epoch": 2.6975051975051976, |
|
"grad_norm": 0.5567955011645962, |
|
"learning_rate": 6.132285545969141e-07, |
|
"loss": 0.4893, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 2.70010395010395, |
|
"grad_norm": 0.5658501671925406, |
|
"learning_rate": 6.028439566963929e-07, |
|
"loss": 0.4899, |
|
"step": 5195 |
|
}, |
|
{ |
|
"epoch": 2.7027027027027026, |
|
"grad_norm": 0.5444634054315433, |
|
"learning_rate": 5.925453060430219e-07, |
|
"loss": 0.4878, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.705301455301455, |
|
"grad_norm": 0.550663548661878, |
|
"learning_rate": 5.823326968292009e-07, |
|
"loss": 0.5009, |
|
"step": 5205 |
|
}, |
|
{ |
|
"epoch": 2.7079002079002077, |
|
"grad_norm": 0.6060107034007801, |
|
"learning_rate": 5.722062224603886e-07, |
|
"loss": 0.4946, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 2.7104989604989607, |
|
"grad_norm": 0.58216821945967, |
|
"learning_rate": 5.621659755542408e-07, |
|
"loss": 0.5057, |
|
"step": 5215 |
|
}, |
|
{ |
|
"epoch": 2.713097713097713, |
|
"grad_norm": 0.5416674185051638, |
|
"learning_rate": 5.522120479397731e-07, |
|
"loss": 0.4965, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 2.7156964656964657, |
|
"grad_norm": 0.5761995130950316, |
|
"learning_rate": 5.423445306565168e-07, |
|
"loss": 0.5038, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 2.7182952182952183, |
|
"grad_norm": 0.5635042371421582, |
|
"learning_rate": 5.325635139536867e-07, |
|
"loss": 0.4884, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 2.720893970893971, |
|
"grad_norm": 0.5743033588993577, |
|
"learning_rate": 5.228690872893527e-07, |
|
"loss": 0.4934, |
|
"step": 5235 |
|
}, |
|
{ |
|
"epoch": 2.7234927234927238, |
|
"grad_norm": 0.5431291593888027, |
|
"learning_rate": 5.132613393296293e-07, |
|
"loss": 0.4921, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 2.726091476091476, |
|
"grad_norm": 0.5702390465003064, |
|
"learning_rate": 5.037403579478551e-07, |
|
"loss": 0.5067, |
|
"step": 5245 |
|
}, |
|
{ |
|
"epoch": 2.728690228690229, |
|
"grad_norm": 0.5864949506182338, |
|
"learning_rate": 4.943062302237922e-07, |
|
"loss": 0.5047, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 2.7312889812889813, |
|
"grad_norm": 0.5856655357457804, |
|
"learning_rate": 4.849590424428386e-07, |
|
"loss": 0.498, |
|
"step": 5255 |
|
}, |
|
{ |
|
"epoch": 2.733887733887734, |
|
"grad_norm": 0.5592585991123705, |
|
"learning_rate": 4.7569888009522336e-07, |
|
"loss": 0.5062, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 2.7364864864864864, |
|
"grad_norm": 0.5598975614142522, |
|
"learning_rate": 4.665258278752383e-07, |
|
"loss": 0.4922, |
|
"step": 5265 |
|
}, |
|
{ |
|
"epoch": 2.739085239085239, |
|
"grad_norm": 0.5672657648159654, |
|
"learning_rate": 4.574399696804588e-07, |
|
"loss": 0.5032, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 2.741683991683992, |
|
"grad_norm": 0.5635739403935113, |
|
"learning_rate": 4.4844138861096954e-07, |
|
"loss": 0.4914, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 2.7442827442827444, |
|
"grad_norm": 0.5707341919153839, |
|
"learning_rate": 4.3953016696861805e-07, |
|
"loss": 0.4955, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 2.746881496881497, |
|
"grad_norm": 0.593682622033041, |
|
"learning_rate": 4.3070638625624884e-07, |
|
"loss": 0.504, |
|
"step": 5285 |
|
}, |
|
{ |
|
"epoch": 2.7494802494802495, |
|
"grad_norm": 0.5308025960734446, |
|
"learning_rate": 4.2197012717696604e-07, |
|
"loss": 0.4898, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 2.752079002079002, |
|
"grad_norm": 0.562474477928597, |
|
"learning_rate": 4.133214696333943e-07, |
|
"loss": 0.4919, |
|
"step": 5295 |
|
}, |
|
{ |
|
"epoch": 2.7546777546777546, |
|
"grad_norm": 0.5626677048136434, |
|
"learning_rate": 4.047604927269433e-07, |
|
"loss": 0.5041, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.757276507276507, |
|
"grad_norm": 0.5534559012860586, |
|
"learning_rate": 3.9628727475709003e-07, |
|
"loss": 0.5018, |
|
"step": 5305 |
|
}, |
|
{ |
|
"epoch": 2.75987525987526, |
|
"grad_norm": 0.5678002530448841, |
|
"learning_rate": 3.879018932206624e-07, |
|
"loss": 0.4795, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 2.7624740124740126, |
|
"grad_norm": 0.5725188711773384, |
|
"learning_rate": 3.796044248111219e-07, |
|
"loss": 0.4825, |
|
"step": 5315 |
|
}, |
|
{ |
|
"epoch": 2.765072765072765, |
|
"grad_norm": 0.5449981795766418, |
|
"learning_rate": 3.7139494541787225e-07, |
|
"loss": 0.4966, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 2.7676715176715176, |
|
"grad_norm": 0.5793024671746052, |
|
"learning_rate": 3.632735301255652e-07, |
|
"loss": 0.499, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 2.77027027027027, |
|
"grad_norm": 0.5463699816730897, |
|
"learning_rate": 3.552402532134014e-07, |
|
"loss": 0.4971, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 2.7728690228690227, |
|
"grad_norm": 0.569435124360503, |
|
"learning_rate": 3.472951881544695e-07, |
|
"loss": 0.4965, |
|
"step": 5335 |
|
}, |
|
{ |
|
"epoch": 2.7754677754677752, |
|
"grad_norm": 0.5426897702433433, |
|
"learning_rate": 3.3943840761505695e-07, |
|
"loss": 0.5109, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 2.778066528066528, |
|
"grad_norm": 0.5583873172184759, |
|
"learning_rate": 3.316699834539983e-07, |
|
"loss": 0.5025, |
|
"step": 5345 |
|
}, |
|
{ |
|
"epoch": 2.7806652806652807, |
|
"grad_norm": 0.589354529655944, |
|
"learning_rate": 3.239899867220064e-07, |
|
"loss": 0.4998, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 2.7832640332640333, |
|
"grad_norm": 0.5254097934455335, |
|
"learning_rate": 3.163984876610371e-07, |
|
"loss": 0.4949, |
|
"step": 5355 |
|
}, |
|
{ |
|
"epoch": 2.785862785862786, |
|
"grad_norm": 0.5536550483370661, |
|
"learning_rate": 3.0889555570363216e-07, |
|
"loss": 0.4917, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 2.7884615384615383, |
|
"grad_norm": 0.5994721180940217, |
|
"learning_rate": 3.0148125947229047e-07, |
|
"loss": 0.495, |
|
"step": 5365 |
|
}, |
|
{ |
|
"epoch": 2.7910602910602913, |
|
"grad_norm": 0.5652212087251041, |
|
"learning_rate": 2.9415566677884365e-07, |
|
"loss": 0.5029, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 2.7936590436590434, |
|
"grad_norm": 0.567605739930232, |
|
"learning_rate": 2.869188446238336e-07, |
|
"loss": 0.506, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 2.7962577962577964, |
|
"grad_norm": 0.5569576384780233, |
|
"learning_rate": 2.7977085919589253e-07, |
|
"loss": 0.5003, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 2.798856548856549, |
|
"grad_norm": 0.5412543330665912, |
|
"learning_rate": 2.727117758711506e-07, |
|
"loss": 0.4887, |
|
"step": 5385 |
|
}, |
|
{ |
|
"epoch": 2.8014553014553014, |
|
"grad_norm": 0.5376966982466084, |
|
"learning_rate": 2.6574165921262605e-07, |
|
"loss": 0.4888, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 2.804054054054054, |
|
"grad_norm": 0.54053951299071, |
|
"learning_rate": 2.588605729696447e-07, |
|
"loss": 0.4919, |
|
"step": 5395 |
|
}, |
|
{ |
|
"epoch": 2.8066528066528065, |
|
"grad_norm": 0.5981753062988322, |
|
"learning_rate": 2.5206858007724934e-07, |
|
"loss": 0.4839, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.8092515592515594, |
|
"grad_norm": 0.5725431316908658, |
|
"learning_rate": 2.453657426556244e-07, |
|
"loss": 0.5122, |
|
"step": 5405 |
|
}, |
|
{ |
|
"epoch": 2.811850311850312, |
|
"grad_norm": 0.5422874879244404, |
|
"learning_rate": 2.387521220095357e-07, |
|
"loss": 0.4891, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 2.8144490644490645, |
|
"grad_norm": 0.5599975123926269, |
|
"learning_rate": 2.3222777862776046e-07, |
|
"loss": 0.5021, |
|
"step": 5415 |
|
}, |
|
{ |
|
"epoch": 2.817047817047817, |
|
"grad_norm": 0.5590054648939673, |
|
"learning_rate": 2.2579277218253926e-07, |
|
"loss": 0.4841, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 2.8196465696465696, |
|
"grad_norm": 0.5504364503745305, |
|
"learning_rate": 2.1944716152902834e-07, |
|
"loss": 0.5002, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 2.822245322245322, |
|
"grad_norm": 0.5797812317339487, |
|
"learning_rate": 2.131910047047625e-07, |
|
"loss": 0.486, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 2.8248440748440746, |
|
"grad_norm": 0.5584561563327431, |
|
"learning_rate": 2.070243589291221e-07, |
|
"loss": 0.4879, |
|
"step": 5435 |
|
}, |
|
{ |
|
"epoch": 2.8274428274428276, |
|
"grad_norm": 0.5983591006728118, |
|
"learning_rate": 2.0094728060281454e-07, |
|
"loss": 0.4964, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 2.83004158004158, |
|
"grad_norm": 0.5793345159708853, |
|
"learning_rate": 1.9495982530735035e-07, |
|
"loss": 0.4931, |
|
"step": 5445 |
|
}, |
|
{ |
|
"epoch": 2.8326403326403327, |
|
"grad_norm": 0.5521618537806441, |
|
"learning_rate": 1.890620478045435e-07, |
|
"loss": 0.4844, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 2.835239085239085, |
|
"grad_norm": 0.5590622081741721, |
|
"learning_rate": 1.832540020360063e-07, |
|
"loss": 0.4941, |
|
"step": 5455 |
|
}, |
|
{ |
|
"epoch": 2.8378378378378377, |
|
"grad_norm": 0.5807487335701172, |
|
"learning_rate": 1.7753574112265526e-07, |
|
"loss": 0.4888, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 2.8404365904365907, |
|
"grad_norm": 0.5896137562543345, |
|
"learning_rate": 1.7190731736422606e-07, |
|
"loss": 0.4983, |
|
"step": 5465 |
|
}, |
|
{ |
|
"epoch": 2.8430353430353428, |
|
"grad_norm": 0.5740425049161126, |
|
"learning_rate": 1.6636878223879826e-07, |
|
"loss": 0.4931, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 2.8456340956340958, |
|
"grad_norm": 0.5440884316639071, |
|
"learning_rate": 1.6092018640231688e-07, |
|
"loss": 0.4831, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 2.8482328482328483, |
|
"grad_norm": 0.5781760225759857, |
|
"learning_rate": 1.5556157968813823e-07, |
|
"loss": 0.4988, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 2.850831600831601, |
|
"grad_norm": 0.5889448479356277, |
|
"learning_rate": 1.5029301110656923e-07, |
|
"loss": 0.4885, |
|
"step": 5485 |
|
}, |
|
{ |
|
"epoch": 2.8534303534303533, |
|
"grad_norm": 0.6111223279448279, |
|
"learning_rate": 1.4511452884441778e-07, |
|
"loss": 0.5014, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 2.856029106029106, |
|
"grad_norm": 0.5497820384951762, |
|
"learning_rate": 1.400261802645575e-07, |
|
"loss": 0.4951, |
|
"step": 5495 |
|
}, |
|
{ |
|
"epoch": 2.858627858627859, |
|
"grad_norm": 0.560040257401693, |
|
"learning_rate": 1.350280119054881e-07, |
|
"loss": 0.4907, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.8612266112266114, |
|
"grad_norm": 0.5832689907805786, |
|
"learning_rate": 1.3012006948091237e-07, |
|
"loss": 0.4919, |
|
"step": 5505 |
|
}, |
|
{ |
|
"epoch": 2.863825363825364, |
|
"grad_norm": 0.5550041536720619, |
|
"learning_rate": 1.2530239787932108e-07, |
|
"loss": 0.4841, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 2.8664241164241164, |
|
"grad_norm": 0.5548285935264545, |
|
"learning_rate": 1.2057504116357865e-07, |
|
"loss": 0.4957, |
|
"step": 5515 |
|
}, |
|
{ |
|
"epoch": 2.869022869022869, |
|
"grad_norm": 0.5573963669652322, |
|
"learning_rate": 1.1593804257052143e-07, |
|
"loss": 0.5003, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 2.8716216216216215, |
|
"grad_norm": 0.573683837072622, |
|
"learning_rate": 1.1139144451056016e-07, |
|
"loss": 0.4917, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 2.874220374220374, |
|
"grad_norm": 0.5955060126211607, |
|
"learning_rate": 1.0693528856729918e-07, |
|
"loss": 0.5077, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 2.876819126819127, |
|
"grad_norm": 0.5823077256056483, |
|
"learning_rate": 1.025696154971445e-07, |
|
"loss": 0.4879, |
|
"step": 5535 |
|
}, |
|
{ |
|
"epoch": 2.8794178794178795, |
|
"grad_norm": 0.578110542140886, |
|
"learning_rate": 9.829446522894193e-08, |
|
"loss": 0.5007, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 2.882016632016632, |
|
"grad_norm": 0.5745290388769638, |
|
"learning_rate": 9.410987686360618e-08, |
|
"loss": 0.4846, |
|
"step": 5545 |
|
}, |
|
{ |
|
"epoch": 2.8846153846153846, |
|
"grad_norm": 0.5645029409864777, |
|
"learning_rate": 9.001588867376343e-08, |
|
"loss": 0.4875, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 2.887214137214137, |
|
"grad_norm": 0.5579791649018835, |
|
"learning_rate": 8.601253810340493e-08, |
|
"loss": 0.498, |
|
"step": 5555 |
|
}, |
|
{ |
|
"epoch": 2.88981288981289, |
|
"grad_norm": 0.5352826063441829, |
|
"learning_rate": 8.209986176753947e-08, |
|
"loss": 0.4929, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 2.892411642411642, |
|
"grad_norm": 0.5406028586508593, |
|
"learning_rate": 7.827789545186149e-08, |
|
"loss": 0.493, |
|
"step": 5565 |
|
}, |
|
{ |
|
"epoch": 2.895010395010395, |
|
"grad_norm": 0.5735179042030664, |
|
"learning_rate": 7.454667411242677e-08, |
|
"loss": 0.4974, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 2.8976091476091477, |
|
"grad_norm": 0.5884667672124062, |
|
"learning_rate": 7.090623187532286e-08, |
|
"loss": 0.4979, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 2.9002079002079, |
|
"grad_norm": 0.5483035016900611, |
|
"learning_rate": 6.735660203636918e-08, |
|
"loss": 0.4905, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 2.9028066528066527, |
|
"grad_norm": 0.5528971829398494, |
|
"learning_rate": 6.389781706080289e-08, |
|
"loss": 0.5122, |
|
"step": 5585 |
|
}, |
|
{ |
|
"epoch": 2.9054054054054053, |
|
"grad_norm": 0.6057996286720458, |
|
"learning_rate": 6.052990858298801e-08, |
|
"loss": 0.5028, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 2.9080041580041582, |
|
"grad_norm": 0.5791623231565692, |
|
"learning_rate": 5.7252907406123436e-08, |
|
"loss": 0.4982, |
|
"step": 5595 |
|
}, |
|
{ |
|
"epoch": 2.9106029106029108, |
|
"grad_norm": 0.5627119775940129, |
|
"learning_rate": 5.406684350195979e-08, |
|
"loss": 0.4964, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.9132016632016633, |
|
"grad_norm": 0.5559095452523849, |
|
"learning_rate": 5.0971746010528566e-08, |
|
"loss": 0.5063, |
|
"step": 5605 |
|
}, |
|
{ |
|
"epoch": 2.915800415800416, |
|
"grad_norm": 0.5535048888740743, |
|
"learning_rate": 4.7967643239875686e-08, |
|
"loss": 0.501, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 2.9183991683991684, |
|
"grad_norm": 0.5462617861557779, |
|
"learning_rate": 4.505456266579833e-08, |
|
"loss": 0.5031, |
|
"step": 5615 |
|
}, |
|
{ |
|
"epoch": 2.920997920997921, |
|
"grad_norm": 0.5384884238791128, |
|
"learning_rate": 4.22325309315963e-08, |
|
"loss": 0.5019, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 2.9235966735966734, |
|
"grad_norm": 0.5773761719166159, |
|
"learning_rate": 3.950157384783104e-08, |
|
"loss": 0.4939, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 2.9261954261954264, |
|
"grad_norm": 0.5595211401825231, |
|
"learning_rate": 3.68617163920848e-08, |
|
"loss": 0.5007, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 2.928794178794179, |
|
"grad_norm": 0.5658405364960404, |
|
"learning_rate": 3.4312982708734065e-08, |
|
"loss": 0.4806, |
|
"step": 5635 |
|
}, |
|
{ |
|
"epoch": 2.9313929313929314, |
|
"grad_norm": 0.5503957964422638, |
|
"learning_rate": 3.1855396108730897e-08, |
|
"loss": 0.5014, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 2.933991683991684, |
|
"grad_norm": 0.5453959548244628, |
|
"learning_rate": 2.9488979069387523e-08, |
|
"loss": 0.4894, |
|
"step": 5645 |
|
}, |
|
{ |
|
"epoch": 2.9365904365904365, |
|
"grad_norm": 0.5693202599680172, |
|
"learning_rate": 2.721375323416875e-08, |
|
"loss": 0.4966, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 2.939189189189189, |
|
"grad_norm": 0.5709168022581379, |
|
"learning_rate": 2.5029739412497643e-08, |
|
"loss": 0.4887, |
|
"step": 5655 |
|
}, |
|
{ |
|
"epoch": 2.9417879417879416, |
|
"grad_norm": 0.5402927981587441, |
|
"learning_rate": 2.293695757956571e-08, |
|
"loss": 0.4968, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 2.9443866943866945, |
|
"grad_norm": 0.629131836957027, |
|
"learning_rate": 2.0935426876144138e-08, |
|
"loss": 0.4891, |
|
"step": 5665 |
|
}, |
|
{ |
|
"epoch": 2.946985446985447, |
|
"grad_norm": 0.5667833955309428, |
|
"learning_rate": 1.9025165608418382e-08, |
|
"loss": 0.4975, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 2.9495841995841996, |
|
"grad_norm": 0.5618681057012622, |
|
"learning_rate": 1.7206191247810533e-08, |
|
"loss": 0.4949, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 2.952182952182952, |
|
"grad_norm": 0.5467732554533196, |
|
"learning_rate": 1.5478520430826095e-08, |
|
"loss": 0.4985, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 2.9547817047817047, |
|
"grad_norm": 0.566899525327703, |
|
"learning_rate": 1.3842168958900782e-08, |
|
"loss": 0.4978, |
|
"step": 5685 |
|
}, |
|
{ |
|
"epoch": 2.9573804573804576, |
|
"grad_norm": 0.548674729535616, |
|
"learning_rate": 1.229715179825397e-08, |
|
"loss": 0.5092, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 2.9599792099792097, |
|
"grad_norm": 0.5855706553975555, |
|
"learning_rate": 1.0843483079755468e-08, |
|
"loss": 0.5036, |
|
"step": 5695 |
|
}, |
|
{ |
|
"epoch": 2.9625779625779627, |
|
"grad_norm": 0.574837155061116, |
|
"learning_rate": 9.481176098788958e-09, |
|
"loss": 0.5036, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.965176715176715, |
|
"grad_norm": 0.5658141647440329, |
|
"learning_rate": 8.210243315140976e-09, |
|
"loss": 0.4972, |
|
"step": 5705 |
|
}, |
|
{ |
|
"epoch": 2.9677754677754677, |
|
"grad_norm": 0.5791629723335482, |
|
"learning_rate": 7.030696352878786e-09, |
|
"loss": 0.4942, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 2.9703742203742203, |
|
"grad_norm": 0.5707304439471226, |
|
"learning_rate": 5.942546000244909e-09, |
|
"loss": 0.4946, |
|
"step": 5715 |
|
}, |
|
{ |
|
"epoch": 2.972972972972973, |
|
"grad_norm": 0.5873811028122294, |
|
"learning_rate": 4.945802209562755e-09, |
|
"loss": 0.4899, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 2.975571725571726, |
|
"grad_norm": 0.5626457291531141, |
|
"learning_rate": 4.0404740971433655e-09, |
|
"loss": 0.4837, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 2.9781704781704783, |
|
"grad_norm": 0.5482498449859082, |
|
"learning_rate": 3.226569943197699e-09, |
|
"loss": 0.4958, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 2.980769230769231, |
|
"grad_norm": 0.57584453111696, |
|
"learning_rate": 2.5040971917689172e-09, |
|
"loss": 0.5065, |
|
"step": 5735 |
|
}, |
|
{ |
|
"epoch": 2.9833679833679834, |
|
"grad_norm": 0.6193525019087672, |
|
"learning_rate": 1.873062450659102e-09, |
|
"loss": 0.4947, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 2.985966735966736, |
|
"grad_norm": 0.5534322935945964, |
|
"learning_rate": 1.3334714913681989e-09, |
|
"loss": 0.4968, |
|
"step": 5745 |
|
}, |
|
{ |
|
"epoch": 2.9885654885654884, |
|
"grad_norm": 0.5698273659048839, |
|
"learning_rate": 8.853292490462739e-10, |
|
"loss": 0.4965, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 2.991164241164241, |
|
"grad_norm": 0.6072011011211793, |
|
"learning_rate": 5.286398224413347e-10, |
|
"loss": 0.4982, |
|
"step": 5755 |
|
}, |
|
{ |
|
"epoch": 2.993762993762994, |
|
"grad_norm": 0.5483495272455329, |
|
"learning_rate": 2.6340647386935426e-10, |
|
"loss": 0.4905, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 2.9963617463617465, |
|
"grad_norm": 0.5468214431100177, |
|
"learning_rate": 8.963162917763335e-11, |
|
"loss": 0.4943, |
|
"step": 5765 |
|
}, |
|
{ |
|
"epoch": 2.998960498960499, |
|
"grad_norm": 0.554348853048081, |
|
"learning_rate": 7.31687772592693e-12, |
|
"loss": 0.4984, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.8089174032211304, |
|
"eval_runtime": 106.5642, |
|
"eval_samples_per_second": 77.043, |
|
"eval_steps_per_second": 1.211, |
|
"step": 5772 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 5772, |
|
"total_flos": 1208539372584960.0, |
|
"train_loss": 0.6196737293559317, |
|
"train_runtime": 16925.3973, |
|
"train_samples_per_second": 21.825, |
|
"train_steps_per_second": 0.341 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 5772, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1208539372584960.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|