|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 3848, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005197505197505198, |
|
"grad_norm": 37.05835422762717, |
|
"learning_rate": 5.194805194805195e-08, |
|
"loss": 2.0466, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002598752598752599, |
|
"grad_norm": 37.36458385503662, |
|
"learning_rate": 2.597402597402598e-07, |
|
"loss": 2.0439, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.005197505197505198, |
|
"grad_norm": 39.070563651666575, |
|
"learning_rate": 5.194805194805196e-07, |
|
"loss": 2.0133, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.007796257796257797, |
|
"grad_norm": 11.361027583243034, |
|
"learning_rate": 7.792207792207792e-07, |
|
"loss": 1.8666, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.010395010395010396, |
|
"grad_norm": 5.863826986672296, |
|
"learning_rate": 1.0389610389610392e-06, |
|
"loss": 1.7677, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.012993762993762994, |
|
"grad_norm": 4.522921155466077, |
|
"learning_rate": 1.2987012987012986e-06, |
|
"loss": 1.6467, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.015592515592515593, |
|
"grad_norm": 3.1296619021160326, |
|
"learning_rate": 1.5584415584415584e-06, |
|
"loss": 1.4777, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.018191268191268192, |
|
"grad_norm": 2.12702049381069, |
|
"learning_rate": 1.8181818181818183e-06, |
|
"loss": 1.3423, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02079002079002079, |
|
"grad_norm": 2.381326820034051, |
|
"learning_rate": 2.0779220779220784e-06, |
|
"loss": 1.2129, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02338877338877339, |
|
"grad_norm": 1.745648282948505, |
|
"learning_rate": 2.337662337662338e-06, |
|
"loss": 1.117, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02598752598752599, |
|
"grad_norm": 1.2151402583346058, |
|
"learning_rate": 2.597402597402597e-06, |
|
"loss": 1.0561, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.028586278586278588, |
|
"grad_norm": 1.1469155851046635, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.9923, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.031185031185031187, |
|
"grad_norm": 1.1501290412070948, |
|
"learning_rate": 3.116883116883117e-06, |
|
"loss": 0.9596, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.033783783783783786, |
|
"grad_norm": 1.2437833266166898, |
|
"learning_rate": 3.376623376623377e-06, |
|
"loss": 0.9281, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.036382536382536385, |
|
"grad_norm": 1.1628173200126817, |
|
"learning_rate": 3.6363636363636366e-06, |
|
"loss": 0.9026, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03898128898128898, |
|
"grad_norm": 1.1632550654052534, |
|
"learning_rate": 3.896103896103897e-06, |
|
"loss": 0.9004, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04158004158004158, |
|
"grad_norm": 1.2551305602329859, |
|
"learning_rate": 4.155844155844157e-06, |
|
"loss": 0.8755, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04417879417879418, |
|
"grad_norm": 1.1474998623142625, |
|
"learning_rate": 4.415584415584416e-06, |
|
"loss": 0.8516, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.04677754677754678, |
|
"grad_norm": 1.1143075622189529, |
|
"learning_rate": 4.675324675324676e-06, |
|
"loss": 0.8517, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04937629937629938, |
|
"grad_norm": 1.1206710563724116, |
|
"learning_rate": 4.935064935064935e-06, |
|
"loss": 0.8458, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.05197505197505198, |
|
"grad_norm": 1.0838734153124963, |
|
"learning_rate": 5.194805194805194e-06, |
|
"loss": 0.8363, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05457380457380458, |
|
"grad_norm": 1.5339125140531156, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 0.8195, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.057172557172557176, |
|
"grad_norm": 1.1439071531812668, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 0.8394, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.059771309771309775, |
|
"grad_norm": 1.4061392934636887, |
|
"learning_rate": 5.9740259740259746e-06, |
|
"loss": 0.8239, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.062370062370062374, |
|
"grad_norm": 1.1592154562156236, |
|
"learning_rate": 6.233766233766234e-06, |
|
"loss": 0.8039, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06496881496881497, |
|
"grad_norm": 1.0248228760326474, |
|
"learning_rate": 6.493506493506494e-06, |
|
"loss": 0.8022, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.06756756756756757, |
|
"grad_norm": 1.1650242675138267, |
|
"learning_rate": 6.753246753246754e-06, |
|
"loss": 0.8032, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07016632016632017, |
|
"grad_norm": 1.1631059955651202, |
|
"learning_rate": 7.012987012987014e-06, |
|
"loss": 0.8076, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.07276507276507277, |
|
"grad_norm": 0.9623641373070422, |
|
"learning_rate": 7.272727272727273e-06, |
|
"loss": 0.8039, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07536382536382537, |
|
"grad_norm": 1.1042092302715827, |
|
"learning_rate": 7.532467532467533e-06, |
|
"loss": 0.8041, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.07796257796257797, |
|
"grad_norm": 1.034610728564867, |
|
"learning_rate": 7.792207792207793e-06, |
|
"loss": 0.7979, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08056133056133057, |
|
"grad_norm": 1.0441346842973676, |
|
"learning_rate": 8.051948051948052e-06, |
|
"loss": 0.7889, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.08316008316008316, |
|
"grad_norm": 1.1314811400194547, |
|
"learning_rate": 8.311688311688313e-06, |
|
"loss": 0.8095, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08575883575883576, |
|
"grad_norm": 1.083993546178677, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.7838, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.08835758835758836, |
|
"grad_norm": 0.9844284926961596, |
|
"learning_rate": 8.831168831168832e-06, |
|
"loss": 0.8007, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09095634095634096, |
|
"grad_norm": 0.9748756413462465, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 0.7839, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.09355509355509356, |
|
"grad_norm": 1.054078525965627, |
|
"learning_rate": 9.350649350649352e-06, |
|
"loss": 0.7819, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09615384615384616, |
|
"grad_norm": 1.1412040734751607, |
|
"learning_rate": 9.610389610389611e-06, |
|
"loss": 0.7874, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.09875259875259876, |
|
"grad_norm": 1.163328897434868, |
|
"learning_rate": 9.87012987012987e-06, |
|
"loss": 0.7762, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10135135135135136, |
|
"grad_norm": 1.0536579529578631, |
|
"learning_rate": 1.012987012987013e-05, |
|
"loss": 0.7665, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.10395010395010396, |
|
"grad_norm": 1.048914553080904, |
|
"learning_rate": 1.0389610389610389e-05, |
|
"loss": 0.7849, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10654885654885655, |
|
"grad_norm": 0.9748667183768033, |
|
"learning_rate": 1.064935064935065e-05, |
|
"loss": 0.7839, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.10914760914760915, |
|
"grad_norm": 0.9705690959742787, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 0.7827, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11174636174636175, |
|
"grad_norm": 0.9527544873685447, |
|
"learning_rate": 1.116883116883117e-05, |
|
"loss": 0.7823, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.11434511434511435, |
|
"grad_norm": 1.1194233425550948, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 0.7795, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.11694386694386695, |
|
"grad_norm": 2.120471539383005, |
|
"learning_rate": 1.1688311688311688e-05, |
|
"loss": 0.7778, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.11954261954261955, |
|
"grad_norm": 0.9808803038153814, |
|
"learning_rate": 1.1948051948051949e-05, |
|
"loss": 0.7634, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12214137214137215, |
|
"grad_norm": 0.94060918910957, |
|
"learning_rate": 1.2207792207792208e-05, |
|
"loss": 0.7578, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.12474012474012475, |
|
"grad_norm": 0.9878811168887828, |
|
"learning_rate": 1.2467532467532468e-05, |
|
"loss": 0.7837, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.12733887733887733, |
|
"grad_norm": 1.0526434374561247, |
|
"learning_rate": 1.2727272727272728e-05, |
|
"loss": 0.777, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.12993762993762994, |
|
"grad_norm": 1.067255251886244, |
|
"learning_rate": 1.2987012987012988e-05, |
|
"loss": 0.7606, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13253638253638253, |
|
"grad_norm": 1.019011419752788, |
|
"learning_rate": 1.3246753246753249e-05, |
|
"loss": 0.7783, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.13513513513513514, |
|
"grad_norm": 0.9625368636583931, |
|
"learning_rate": 1.3506493506493508e-05, |
|
"loss": 0.7643, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.13773388773388773, |
|
"grad_norm": 0.8964922122751663, |
|
"learning_rate": 1.3766233766233767e-05, |
|
"loss": 0.769, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.14033264033264034, |
|
"grad_norm": 1.0861405810538973, |
|
"learning_rate": 1.4025974025974028e-05, |
|
"loss": 0.772, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14293139293139293, |
|
"grad_norm": 0.7725004042430159, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 0.7684, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.14553014553014554, |
|
"grad_norm": 0.9641597887627689, |
|
"learning_rate": 1.4545454545454546e-05, |
|
"loss": 0.7665, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.14812889812889812, |
|
"grad_norm": 0.8799231599980127, |
|
"learning_rate": 1.4805194805194807e-05, |
|
"loss": 0.7553, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.15072765072765074, |
|
"grad_norm": 0.865183845501612, |
|
"learning_rate": 1.5064935064935066e-05, |
|
"loss": 0.776, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15332640332640332, |
|
"grad_norm": 0.9280023423503597, |
|
"learning_rate": 1.5324675324675326e-05, |
|
"loss": 0.7655, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.15592515592515593, |
|
"grad_norm": 0.9412086108813551, |
|
"learning_rate": 1.5584415584415587e-05, |
|
"loss": 0.7541, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15852390852390852, |
|
"grad_norm": 1.049389569363405, |
|
"learning_rate": 1.5844155844155847e-05, |
|
"loss": 0.7609, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.16112266112266113, |
|
"grad_norm": 0.8632998959240495, |
|
"learning_rate": 1.6103896103896105e-05, |
|
"loss": 0.7572, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.16372141372141372, |
|
"grad_norm": 0.926940763705136, |
|
"learning_rate": 1.6363636363636366e-05, |
|
"loss": 0.763, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.16632016632016633, |
|
"grad_norm": 0.8169022540417774, |
|
"learning_rate": 1.6623376623376627e-05, |
|
"loss": 0.7564, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.16891891891891891, |
|
"grad_norm": 0.8913503941645097, |
|
"learning_rate": 1.6883116883116884e-05, |
|
"loss": 0.768, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.17151767151767153, |
|
"grad_norm": 0.9677009736871893, |
|
"learning_rate": 1.7142857142857142e-05, |
|
"loss": 0.7577, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.1741164241164241, |
|
"grad_norm": 0.891596010196128, |
|
"learning_rate": 1.7402597402597403e-05, |
|
"loss": 0.7482, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.17671517671517672, |
|
"grad_norm": 0.922259390367936, |
|
"learning_rate": 1.7662337662337664e-05, |
|
"loss": 0.7545, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.1793139293139293, |
|
"grad_norm": 0.8012139878255935, |
|
"learning_rate": 1.792207792207792e-05, |
|
"loss": 0.748, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.18191268191268192, |
|
"grad_norm": 0.8885070497602618, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.7506, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1845114345114345, |
|
"grad_norm": 0.8139071250420767, |
|
"learning_rate": 1.8441558441558443e-05, |
|
"loss": 0.7657, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.18711018711018712, |
|
"grad_norm": 0.8597830390928304, |
|
"learning_rate": 1.8701298701298704e-05, |
|
"loss": 0.7482, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1897089397089397, |
|
"grad_norm": 0.8215436811633837, |
|
"learning_rate": 1.896103896103896e-05, |
|
"loss": 0.7563, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.19230769230769232, |
|
"grad_norm": 0.7888662364197664, |
|
"learning_rate": 1.9220779220779222e-05, |
|
"loss": 0.7638, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1949064449064449, |
|
"grad_norm": 0.7758295937643592, |
|
"learning_rate": 1.9480519480519483e-05, |
|
"loss": 0.7578, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.19750519750519752, |
|
"grad_norm": 0.7321462182850453, |
|
"learning_rate": 1.974025974025974e-05, |
|
"loss": 0.7478, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2001039501039501, |
|
"grad_norm": 0.8972812591629451, |
|
"learning_rate": 2e-05, |
|
"loss": 0.7364, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.20270270270270271, |
|
"grad_norm": 0.8504197871382875, |
|
"learning_rate": 1.9999897126378044e-05, |
|
"loss": 0.7531, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2053014553014553, |
|
"grad_norm": 1.0008077674950657, |
|
"learning_rate": 1.9999588507628768e-05, |
|
"loss": 0.7555, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.2079002079002079, |
|
"grad_norm": 0.8219960180575271, |
|
"learning_rate": 1.999907415010192e-05, |
|
"loss": 0.7412, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2104989604989605, |
|
"grad_norm": 0.7975912262836815, |
|
"learning_rate": 1.9998354064380263e-05, |
|
"loss": 0.7342, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.2130977130977131, |
|
"grad_norm": 0.7386294167625675, |
|
"learning_rate": 1.9997428265279365e-05, |
|
"loss": 0.7414, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.2156964656964657, |
|
"grad_norm": 0.7996558972435127, |
|
"learning_rate": 1.999629677184728e-05, |
|
"loss": 0.7684, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.2182952182952183, |
|
"grad_norm": 0.8532223218359839, |
|
"learning_rate": 1.999495960736418e-05, |
|
"loss": 0.7318, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2208939708939709, |
|
"grad_norm": 0.8099184350785439, |
|
"learning_rate": 1.999341679934186e-05, |
|
"loss": 0.7483, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.2234927234927235, |
|
"grad_norm": 0.8032456938404925, |
|
"learning_rate": 1.999166837952316e-05, |
|
"loss": 0.7464, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2260914760914761, |
|
"grad_norm": 0.7613353132794639, |
|
"learning_rate": 1.998971438388134e-05, |
|
"loss": 0.7241, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.2286902286902287, |
|
"grad_norm": 0.739838242733476, |
|
"learning_rate": 1.9987554852619325e-05, |
|
"loss": 0.7523, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2312889812889813, |
|
"grad_norm": 0.7456120762278995, |
|
"learning_rate": 1.998518983016887e-05, |
|
"loss": 0.7461, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.2338877338877339, |
|
"grad_norm": 0.8064413740728865, |
|
"learning_rate": 1.9982619365189662e-05, |
|
"loss": 0.7524, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.23648648648648649, |
|
"grad_norm": 0.8264553841158012, |
|
"learning_rate": 1.9979843510568312e-05, |
|
"loss": 0.7569, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.2390852390852391, |
|
"grad_norm": 0.7700326026252647, |
|
"learning_rate": 1.9976862323417262e-05, |
|
"loss": 0.7421, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.24168399168399168, |
|
"grad_norm": 0.8078133521219729, |
|
"learning_rate": 1.9973675865073614e-05, |
|
"loss": 0.7459, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.2442827442827443, |
|
"grad_norm": 0.8834562269681555, |
|
"learning_rate": 1.9970284201097874e-05, |
|
"loss": 0.7419, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.24688149688149688, |
|
"grad_norm": 0.8201197358624366, |
|
"learning_rate": 1.996668740127259e-05, |
|
"loss": 0.7264, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.2494802494802495, |
|
"grad_norm": 0.7832673791812143, |
|
"learning_rate": 1.996288553960093e-05, |
|
"loss": 0.7326, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2520790020790021, |
|
"grad_norm": 0.7491102594223664, |
|
"learning_rate": 1.9958878694305147e-05, |
|
"loss": 0.7423, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.25467775467775466, |
|
"grad_norm": 0.7711246686689462, |
|
"learning_rate": 1.9954666947824983e-05, |
|
"loss": 0.7601, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.25727650727650725, |
|
"grad_norm": 0.8431355399537503, |
|
"learning_rate": 1.9950250386815953e-05, |
|
"loss": 0.7508, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.2598752598752599, |
|
"grad_norm": 0.7491422333746851, |
|
"learning_rate": 1.9945629102147593e-05, |
|
"loss": 0.7365, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2624740124740125, |
|
"grad_norm": 0.6891526342744494, |
|
"learning_rate": 1.9940803188901556e-05, |
|
"loss": 0.7315, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.26507276507276506, |
|
"grad_norm": 0.7000282425722503, |
|
"learning_rate": 1.9935772746369678e-05, |
|
"loss": 0.7524, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.26767151767151764, |
|
"grad_norm": 0.6734603177161373, |
|
"learning_rate": 1.9930537878051927e-05, |
|
"loss": 0.746, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.2702702702702703, |
|
"grad_norm": 0.6632065052188117, |
|
"learning_rate": 1.9925098691654275e-05, |
|
"loss": 0.7225, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.27286902286902287, |
|
"grad_norm": 0.6985881279460795, |
|
"learning_rate": 1.9919455299086485e-05, |
|
"loss": 0.7451, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.27546777546777546, |
|
"grad_norm": 0.6540248071379781, |
|
"learning_rate": 1.99136078164598e-05, |
|
"loss": 0.7322, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.27806652806652804, |
|
"grad_norm": 0.661603660243552, |
|
"learning_rate": 1.9907556364084568e-05, |
|
"loss": 0.7544, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.2806652806652807, |
|
"grad_norm": 0.7446420340679871, |
|
"learning_rate": 1.990130106646775e-05, |
|
"loss": 0.7476, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.28326403326403327, |
|
"grad_norm": 0.8425291153171641, |
|
"learning_rate": 1.9894842052310373e-05, |
|
"loss": 0.733, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.28586278586278585, |
|
"grad_norm": 0.6481748553031429, |
|
"learning_rate": 1.9888179454504875e-05, |
|
"loss": 0.7439, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.28846153846153844, |
|
"grad_norm": 0.679678646955314, |
|
"learning_rate": 1.9881313410132365e-05, |
|
"loss": 0.7418, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.2910602910602911, |
|
"grad_norm": 0.6504991149281961, |
|
"learning_rate": 1.9874244060459816e-05, |
|
"loss": 0.7338, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.29365904365904366, |
|
"grad_norm": 0.7240524101878886, |
|
"learning_rate": 1.986697155093715e-05, |
|
"loss": 0.7384, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.29625779625779625, |
|
"grad_norm": 0.677804845061605, |
|
"learning_rate": 1.9859496031194242e-05, |
|
"loss": 0.7368, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.29885654885654883, |
|
"grad_norm": 0.6836994514744595, |
|
"learning_rate": 1.9851817655037854e-05, |
|
"loss": 0.7361, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.30145530145530147, |
|
"grad_norm": 0.6837922941788586, |
|
"learning_rate": 1.9843936580448457e-05, |
|
"loss": 0.7243, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.30405405405405406, |
|
"grad_norm": 0.7000337048592958, |
|
"learning_rate": 1.983585296957699e-05, |
|
"loss": 0.7329, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.30665280665280664, |
|
"grad_norm": 0.7178435855481708, |
|
"learning_rate": 1.9827566988741525e-05, |
|
"loss": 0.719, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3092515592515592, |
|
"grad_norm": 0.7066252470023224, |
|
"learning_rate": 1.9819078808423825e-05, |
|
"loss": 0.7277, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.31185031185031187, |
|
"grad_norm": 0.7131086872189417, |
|
"learning_rate": 1.981038860326586e-05, |
|
"loss": 0.7408, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31444906444906445, |
|
"grad_norm": 0.6862600699294851, |
|
"learning_rate": 1.980149655206621e-05, |
|
"loss": 0.7338, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.31704781704781704, |
|
"grad_norm": 0.7265535072671346, |
|
"learning_rate": 1.9792402837776377e-05, |
|
"loss": 0.7209, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.3196465696465696, |
|
"grad_norm": 0.7076896535093625, |
|
"learning_rate": 1.978310764749703e-05, |
|
"loss": 0.7442, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.32224532224532226, |
|
"grad_norm": 0.6473108719176737, |
|
"learning_rate": 1.9773611172474143e-05, |
|
"loss": 0.7314, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.32484407484407485, |
|
"grad_norm": 0.6958993345177985, |
|
"learning_rate": 1.976391360809507e-05, |
|
"loss": 0.7297, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.32744282744282743, |
|
"grad_norm": 0.6980561880943195, |
|
"learning_rate": 1.9754015153884533e-05, |
|
"loss": 0.7507, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.33004158004158, |
|
"grad_norm": 0.725526089648866, |
|
"learning_rate": 1.974391601350049e-05, |
|
"loss": 0.7316, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.33264033264033266, |
|
"grad_norm": 0.6555475191719049, |
|
"learning_rate": 1.9733616394729975e-05, |
|
"loss": 0.7415, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.33523908523908524, |
|
"grad_norm": 0.7126270164163553, |
|
"learning_rate": 1.9723116509484807e-05, |
|
"loss": 0.7084, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.33783783783783783, |
|
"grad_norm": 0.6175823835566073, |
|
"learning_rate": 1.971241657379723e-05, |
|
"loss": 0.7437, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3404365904365904, |
|
"grad_norm": 0.641705701909066, |
|
"learning_rate": 1.9701516807815472e-05, |
|
"loss": 0.7227, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.34303534303534305, |
|
"grad_norm": 0.6271473392148214, |
|
"learning_rate": 1.9690417435799217e-05, |
|
"loss": 0.7131, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.34563409563409564, |
|
"grad_norm": 0.7368709959215286, |
|
"learning_rate": 1.967911868611498e-05, |
|
"loss": 0.7366, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.3482328482328482, |
|
"grad_norm": 0.645812200849196, |
|
"learning_rate": 1.9667620791231422e-05, |
|
"loss": 0.7174, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.3508316008316008, |
|
"grad_norm": 0.6331883051927242, |
|
"learning_rate": 1.965592398771456e-05, |
|
"loss": 0.7255, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.35343035343035345, |
|
"grad_norm": 0.7047104196046573, |
|
"learning_rate": 1.9644028516222912e-05, |
|
"loss": 0.7349, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.35602910602910603, |
|
"grad_norm": 0.6573670034252512, |
|
"learning_rate": 1.9631934621502514e-05, |
|
"loss": 0.7294, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.3586278586278586, |
|
"grad_norm": 0.64012763867233, |
|
"learning_rate": 1.9619642552381924e-05, |
|
"loss": 0.7253, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3612266112266112, |
|
"grad_norm": 0.7085641762966977, |
|
"learning_rate": 1.9607152561767077e-05, |
|
"loss": 0.7302, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.36382536382536385, |
|
"grad_norm": 0.6270235430166686, |
|
"learning_rate": 1.9594464906636083e-05, |
|
"loss": 0.7215, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.36642411642411643, |
|
"grad_norm": 0.6347937985689194, |
|
"learning_rate": 1.958157984803395e-05, |
|
"loss": 0.7198, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.369022869022869, |
|
"grad_norm": 0.6404396997260027, |
|
"learning_rate": 1.956849765106721e-05, |
|
"loss": 0.7355, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.3716216216216216, |
|
"grad_norm": 0.5736442010381354, |
|
"learning_rate": 1.9555218584898457e-05, |
|
"loss": 0.7181, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.37422037422037424, |
|
"grad_norm": 0.6408880853261398, |
|
"learning_rate": 1.954174292274082e-05, |
|
"loss": 0.713, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3768191268191268, |
|
"grad_norm": 0.6668059771868257, |
|
"learning_rate": 1.9528070941852334e-05, |
|
"loss": 0.7477, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.3794178794178794, |
|
"grad_norm": 0.6868239822957647, |
|
"learning_rate": 1.9514202923530233e-05, |
|
"loss": 0.7209, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.382016632016632, |
|
"grad_norm": 0.6673930879897976, |
|
"learning_rate": 1.9500139153105183e-05, |
|
"loss": 0.7256, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 0.6481855409470721, |
|
"learning_rate": 1.948587991993537e-05, |
|
"loss": 0.6989, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3872141372141372, |
|
"grad_norm": 0.7003926465709457, |
|
"learning_rate": 1.94714255174006e-05, |
|
"loss": 0.7079, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.3898128898128898, |
|
"grad_norm": 0.7900259673292569, |
|
"learning_rate": 1.945677624289621e-05, |
|
"loss": 0.7238, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3924116424116424, |
|
"grad_norm": 0.6964569074530326, |
|
"learning_rate": 1.9441932397826993e-05, |
|
"loss": 0.7097, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.39501039501039503, |
|
"grad_norm": 0.6487993082304472, |
|
"learning_rate": 1.9426894287600966e-05, |
|
"loss": 0.7079, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.3976091476091476, |
|
"grad_norm": 0.688310939038027, |
|
"learning_rate": 1.9411662221623103e-05, |
|
"loss": 0.7154, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.4002079002079002, |
|
"grad_norm": 0.67435617407303, |
|
"learning_rate": 1.939623651328897e-05, |
|
"loss": 0.7094, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.4028066528066528, |
|
"grad_norm": 0.6183714426118461, |
|
"learning_rate": 1.9380617479978255e-05, |
|
"loss": 0.7341, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.40540540540540543, |
|
"grad_norm": 0.6617362465155076, |
|
"learning_rate": 1.9364805443048266e-05, |
|
"loss": 0.7173, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.408004158004158, |
|
"grad_norm": 0.5678238642277171, |
|
"learning_rate": 1.9348800727827307e-05, |
|
"loss": 0.7168, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.4106029106029106, |
|
"grad_norm": 0.5910729067701429, |
|
"learning_rate": 1.9332603663607983e-05, |
|
"loss": 0.712, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4132016632016632, |
|
"grad_norm": 0.6197819946059432, |
|
"learning_rate": 1.9316214583640427e-05, |
|
"loss": 0.7247, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.4158004158004158, |
|
"grad_norm": 0.6305700579142718, |
|
"learning_rate": 1.929963382512544e-05, |
|
"loss": 0.7139, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.4183991683991684, |
|
"grad_norm": 0.6439900936581443, |
|
"learning_rate": 1.9282861729207555e-05, |
|
"loss": 0.7106, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.420997920997921, |
|
"grad_norm": 0.6990297176777683, |
|
"learning_rate": 1.926589864096803e-05, |
|
"loss": 0.7234, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4235966735966736, |
|
"grad_norm": 0.6967169647297736, |
|
"learning_rate": 1.9248744909417728e-05, |
|
"loss": 0.7178, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.4261954261954262, |
|
"grad_norm": 0.6433170596765804, |
|
"learning_rate": 1.923140088748995e-05, |
|
"loss": 0.7159, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.4287941787941788, |
|
"grad_norm": 0.5983245551968179, |
|
"learning_rate": 1.9213866932033164e-05, |
|
"loss": 0.7057, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.4313929313929314, |
|
"grad_norm": 0.6087581373525092, |
|
"learning_rate": 1.9196143403803667e-05, |
|
"loss": 0.7154, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.433991683991684, |
|
"grad_norm": 0.636489262543138, |
|
"learning_rate": 1.9178230667458175e-05, |
|
"loss": 0.7057, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.4365904365904366, |
|
"grad_norm": 0.6439601891575886, |
|
"learning_rate": 1.91601290915463e-05, |
|
"loss": 0.7279, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.4391891891891892, |
|
"grad_norm": 0.6225933290388692, |
|
"learning_rate": 1.9141839048502974e-05, |
|
"loss": 0.7187, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.4417879417879418, |
|
"grad_norm": 0.6035545819895592, |
|
"learning_rate": 1.9123360914640794e-05, |
|
"loss": 0.716, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.44438669438669437, |
|
"grad_norm": 0.6607567088647912, |
|
"learning_rate": 1.9104695070142273e-05, |
|
"loss": 0.7309, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.446985446985447, |
|
"grad_norm": 0.6708322724799578, |
|
"learning_rate": 1.9085841899052014e-05, |
|
"loss": 0.7042, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4495841995841996, |
|
"grad_norm": 0.5887928003237825, |
|
"learning_rate": 1.9066801789268815e-05, |
|
"loss": 0.7227, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.4521829521829522, |
|
"grad_norm": 0.6055286727657766, |
|
"learning_rate": 1.9047575132537694e-05, |
|
"loss": 0.7092, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.45478170478170477, |
|
"grad_norm": 0.6202428781057725, |
|
"learning_rate": 1.902816232444181e-05, |
|
"loss": 0.7104, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.4573804573804574, |
|
"grad_norm": 0.6274570988856961, |
|
"learning_rate": 1.9008563764394334e-05, |
|
"loss": 0.7229, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.45997920997921, |
|
"grad_norm": 0.6605818513974558, |
|
"learning_rate": 1.8988779855630246e-05, |
|
"loss": 0.7286, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.4625779625779626, |
|
"grad_norm": 0.6410281151386927, |
|
"learning_rate": 1.896881100519801e-05, |
|
"loss": 0.725, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.46517671517671516, |
|
"grad_norm": 0.5697116895525344, |
|
"learning_rate": 1.8948657623951224e-05, |
|
"loss": 0.7012, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.4677754677754678, |
|
"grad_norm": 0.6221008774646193, |
|
"learning_rate": 1.8928320126540154e-05, |
|
"loss": 0.7088, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.4703742203742204, |
|
"grad_norm": 0.6783327380563849, |
|
"learning_rate": 1.89077989314032e-05, |
|
"loss": 0.7012, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.47297297297297297, |
|
"grad_norm": 0.6364455758589938, |
|
"learning_rate": 1.8887094460758298e-05, |
|
"loss": 0.7173, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.47557172557172556, |
|
"grad_norm": 0.6423883619688115, |
|
"learning_rate": 1.8866207140594223e-05, |
|
"loss": 0.7221, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.4781704781704782, |
|
"grad_norm": 0.6078098531540564, |
|
"learning_rate": 1.8845137400661832e-05, |
|
"loss": 0.7112, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.4807692307692308, |
|
"grad_norm": 0.6855240964444005, |
|
"learning_rate": 1.882388567446522e-05, |
|
"loss": 0.701, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.48336798336798337, |
|
"grad_norm": 0.6214299025960558, |
|
"learning_rate": 1.880245239925279e-05, |
|
"loss": 0.7152, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.48596673596673595, |
|
"grad_norm": 0.7011949443886023, |
|
"learning_rate": 1.878083801600828e-05, |
|
"loss": 0.7099, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.4885654885654886, |
|
"grad_norm": 0.6305195514031546, |
|
"learning_rate": 1.8759042969441666e-05, |
|
"loss": 0.7119, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4911642411642412, |
|
"grad_norm": 0.6284331380737156, |
|
"learning_rate": 1.8737067707980018e-05, |
|
"loss": 0.7283, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.49376299376299376, |
|
"grad_norm": 0.6834839648993508, |
|
"learning_rate": 1.8714912683758292e-05, |
|
"loss": 0.7224, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.49636174636174635, |
|
"grad_norm": 0.5860761907825943, |
|
"learning_rate": 1.869257835261e-05, |
|
"loss": 0.7006, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.498960498960499, |
|
"grad_norm": 0.6589606191667001, |
|
"learning_rate": 1.8670065174057854e-05, |
|
"loss": 0.7179, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5015592515592515, |
|
"grad_norm": 0.6369606574521753, |
|
"learning_rate": 1.8647373611304295e-05, |
|
"loss": 0.715, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.5041580041580042, |
|
"grad_norm": 0.604142259445137, |
|
"learning_rate": 1.862450413122197e-05, |
|
"loss": 0.7014, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5067567567567568, |
|
"grad_norm": 0.6290172817388325, |
|
"learning_rate": 1.8601457204344134e-05, |
|
"loss": 0.71, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.5093555093555093, |
|
"grad_norm": 0.5861860996181917, |
|
"learning_rate": 1.8578233304854952e-05, |
|
"loss": 0.7041, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.511954261954262, |
|
"grad_norm": 0.6582907547385, |
|
"learning_rate": 1.855483291057976e-05, |
|
"loss": 0.7259, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.5145530145530145, |
|
"grad_norm": 0.6056438043018836, |
|
"learning_rate": 1.8531256502975218e-05, |
|
"loss": 0.7141, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5171517671517671, |
|
"grad_norm": 0.6299326122139869, |
|
"learning_rate": 1.850750456711941e-05, |
|
"loss": 0.7037, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.5197505197505198, |
|
"grad_norm": 0.5922842182974687, |
|
"learning_rate": 1.8483577591701876e-05, |
|
"loss": 0.7227, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5223492723492723, |
|
"grad_norm": 0.6335533442723702, |
|
"learning_rate": 1.8459476069013537e-05, |
|
"loss": 0.7331, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.524948024948025, |
|
"grad_norm": 0.603529261601813, |
|
"learning_rate": 1.8435200494936583e-05, |
|
"loss": 0.7204, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.5275467775467776, |
|
"grad_norm": 0.5784152517101885, |
|
"learning_rate": 1.841075136893426e-05, |
|
"loss": 0.7045, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.5301455301455301, |
|
"grad_norm": 0.5775827914904406, |
|
"learning_rate": 1.8386129194040597e-05, |
|
"loss": 0.71, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5327442827442828, |
|
"grad_norm": 0.6037013614687209, |
|
"learning_rate": 1.8361334476850052e-05, |
|
"loss": 0.6973, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.5353430353430353, |
|
"grad_norm": 0.5969176345454209, |
|
"learning_rate": 1.8336367727507104e-05, |
|
"loss": 0.7102, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.5379417879417879, |
|
"grad_norm": 0.5880052937664013, |
|
"learning_rate": 1.8311229459695735e-05, |
|
"loss": 0.702, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.5405405405405406, |
|
"grad_norm": 0.5665171424206621, |
|
"learning_rate": 1.828592019062888e-05, |
|
"loss": 0.7059, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5431392931392931, |
|
"grad_norm": 0.5776240714899302, |
|
"learning_rate": 1.826044044103777e-05, |
|
"loss": 0.7124, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.5457380457380457, |
|
"grad_norm": 0.5961454904432945, |
|
"learning_rate": 1.8234790735161233e-05, |
|
"loss": 0.7003, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.5483367983367984, |
|
"grad_norm": 0.61619821889063, |
|
"learning_rate": 1.82089716007349e-05, |
|
"loss": 0.7112, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.5509355509355509, |
|
"grad_norm": 0.5663452621669731, |
|
"learning_rate": 1.8182983568980347e-05, |
|
"loss": 0.7089, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5535343035343036, |
|
"grad_norm": 0.5804355767396245, |
|
"learning_rate": 1.8156827174594157e-05, |
|
"loss": 0.6992, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.5561330561330561, |
|
"grad_norm": 0.6266224913727885, |
|
"learning_rate": 1.8130502955736945e-05, |
|
"loss": 0.7054, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.5587318087318087, |
|
"grad_norm": 0.5683126721009722, |
|
"learning_rate": 1.810401145402225e-05, |
|
"loss": 0.7132, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.5613305613305614, |
|
"grad_norm": 0.6083470685034659, |
|
"learning_rate": 1.8077353214505428e-05, |
|
"loss": 0.7058, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5639293139293139, |
|
"grad_norm": 0.6109321123987571, |
|
"learning_rate": 1.8050528785672403e-05, |
|
"loss": 0.7096, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.5665280665280665, |
|
"grad_norm": 0.5553746362917324, |
|
"learning_rate": 1.8023538719428405e-05, |
|
"loss": 0.7009, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5691268191268192, |
|
"grad_norm": 0.6062718001611305, |
|
"learning_rate": 1.7996383571086614e-05, |
|
"loss": 0.7098, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.5717255717255717, |
|
"grad_norm": 0.5458923136243866, |
|
"learning_rate": 1.796906389935672e-05, |
|
"loss": 0.6999, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5743243243243243, |
|
"grad_norm": 0.5655005675331972, |
|
"learning_rate": 1.7941580266333433e-05, |
|
"loss": 0.7072, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.5769230769230769, |
|
"grad_norm": 0.5924848136830811, |
|
"learning_rate": 1.7913933237484936e-05, |
|
"loss": 0.7096, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5795218295218295, |
|
"grad_norm": 0.5639064277581654, |
|
"learning_rate": 1.7886123381641226e-05, |
|
"loss": 0.7238, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.5821205821205822, |
|
"grad_norm": 0.6027713277537942, |
|
"learning_rate": 1.7858151270982422e-05, |
|
"loss": 0.7053, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5847193347193347, |
|
"grad_norm": 0.5726133330263367, |
|
"learning_rate": 1.7830017481026992e-05, |
|
"loss": 0.6965, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.5873180873180873, |
|
"grad_norm": 0.5747640646170223, |
|
"learning_rate": 1.7801722590619905e-05, |
|
"loss": 0.6996, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.58991683991684, |
|
"grad_norm": 0.5720989361581698, |
|
"learning_rate": 1.777326718192074e-05, |
|
"loss": 0.7132, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.5925155925155925, |
|
"grad_norm": 0.6153266265302803, |
|
"learning_rate": 1.7744651840391686e-05, |
|
"loss": 0.7077, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.5951143451143451, |
|
"grad_norm": 0.6101914919623531, |
|
"learning_rate": 1.7715877154785504e-05, |
|
"loss": 0.7017, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.5977130977130977, |
|
"grad_norm": 0.5892227087436086, |
|
"learning_rate": 1.768694371713343e-05, |
|
"loss": 0.6901, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6003118503118503, |
|
"grad_norm": 0.5792852971539604, |
|
"learning_rate": 1.7657852122732958e-05, |
|
"loss": 0.6915, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.6029106029106029, |
|
"grad_norm": 0.5812633503316377, |
|
"learning_rate": 1.7628602970135635e-05, |
|
"loss": 0.6983, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6055093555093555, |
|
"grad_norm": 0.5776629062328037, |
|
"learning_rate": 1.7599196861134706e-05, |
|
"loss": 0.7033, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.6081081081081081, |
|
"grad_norm": 0.6291058371783688, |
|
"learning_rate": 1.7569634400752763e-05, |
|
"loss": 0.6976, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6107068607068608, |
|
"grad_norm": 0.5613902945792599, |
|
"learning_rate": 1.753991619722928e-05, |
|
"loss": 0.7093, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.6133056133056133, |
|
"grad_norm": 0.6383056669880055, |
|
"learning_rate": 1.7510042862008102e-05, |
|
"loss": 0.7049, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6159043659043659, |
|
"grad_norm": 0.6039268449286719, |
|
"learning_rate": 1.7480015009724873e-05, |
|
"loss": 0.702, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.6185031185031185, |
|
"grad_norm": 1.092030131308809, |
|
"learning_rate": 1.7449833258194376e-05, |
|
"loss": 0.6967, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6211018711018711, |
|
"grad_norm": 0.6403075410117294, |
|
"learning_rate": 1.7419498228397826e-05, |
|
"loss": 0.707, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.6237006237006237, |
|
"grad_norm": 0.6802172468521516, |
|
"learning_rate": 1.7389010544470105e-05, |
|
"loss": 0.7178, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6262993762993763, |
|
"grad_norm": 0.5789178063330651, |
|
"learning_rate": 1.7358370833686907e-05, |
|
"loss": 0.6981, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.6288981288981289, |
|
"grad_norm": 0.5616632715895711, |
|
"learning_rate": 1.732757972645183e-05, |
|
"loss": 0.6886, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6314968814968815, |
|
"grad_norm": 0.5702377179814124, |
|
"learning_rate": 1.7296637856283432e-05, |
|
"loss": 0.6876, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.6340956340956341, |
|
"grad_norm": 0.5951629365447559, |
|
"learning_rate": 1.7265545859802154e-05, |
|
"loss": 0.6976, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6366943866943867, |
|
"grad_norm": 0.5872853132760645, |
|
"learning_rate": 1.7234304376717252e-05, |
|
"loss": 0.6992, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.6392931392931392, |
|
"grad_norm": 0.570568895020055, |
|
"learning_rate": 1.720291404981364e-05, |
|
"loss": 0.6927, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6418918918918919, |
|
"grad_norm": 0.5451578107308829, |
|
"learning_rate": 1.7171375524938638e-05, |
|
"loss": 0.7083, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.6444906444906445, |
|
"grad_norm": 0.5444907291949577, |
|
"learning_rate": 1.71396894509887e-05, |
|
"loss": 0.7107, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6470893970893971, |
|
"grad_norm": 0.5605468917786897, |
|
"learning_rate": 1.710785647989607e-05, |
|
"loss": 0.7209, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.6496881496881497, |
|
"grad_norm": 0.578999844192668, |
|
"learning_rate": 1.7075877266615347e-05, |
|
"loss": 0.7045, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6522869022869023, |
|
"grad_norm": 0.5894696454082586, |
|
"learning_rate": 1.704375246911004e-05, |
|
"loss": 0.7134, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.6548856548856549, |
|
"grad_norm": 0.6210688729366013, |
|
"learning_rate": 1.7011482748338998e-05, |
|
"loss": 0.6995, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6574844074844075, |
|
"grad_norm": 0.5323403441786221, |
|
"learning_rate": 1.697906876824283e-05, |
|
"loss": 0.6931, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.66008316008316, |
|
"grad_norm": 0.5639934756275331, |
|
"learning_rate": 1.6946511195730254e-05, |
|
"loss": 0.7006, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6626819126819127, |
|
"grad_norm": 0.5421699251312687, |
|
"learning_rate": 1.6913810700664337e-05, |
|
"loss": 0.6995, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.6652806652806653, |
|
"grad_norm": 0.5498793219946022, |
|
"learning_rate": 1.6880967955848762e-05, |
|
"loss": 0.6978, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6678794178794178, |
|
"grad_norm": 0.5867341882206146, |
|
"learning_rate": 1.6847983637013946e-05, |
|
"loss": 0.7097, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.6704781704781705, |
|
"grad_norm": 0.5399055609353709, |
|
"learning_rate": 1.6814858422803162e-05, |
|
"loss": 0.7007, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6730769230769231, |
|
"grad_norm": 0.5807292558166693, |
|
"learning_rate": 1.6781592994758563e-05, |
|
"loss": 0.7129, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.6756756756756757, |
|
"grad_norm": 0.5656285736891911, |
|
"learning_rate": 1.674818803730716e-05, |
|
"loss": 0.697, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6782744282744283, |
|
"grad_norm": 0.6062022250444103, |
|
"learning_rate": 1.671464423774675e-05, |
|
"loss": 0.6933, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.6808731808731808, |
|
"grad_norm": 0.5432055936520285, |
|
"learning_rate": 1.668096228623176e-05, |
|
"loss": 0.7042, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6834719334719335, |
|
"grad_norm": 0.5696528007526865, |
|
"learning_rate": 1.664714287575906e-05, |
|
"loss": 0.6921, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.6860706860706861, |
|
"grad_norm": 0.5723069912815502, |
|
"learning_rate": 1.66131867021537e-05, |
|
"loss": 0.7016, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6886694386694386, |
|
"grad_norm": 0.5709607134990504, |
|
"learning_rate": 1.6579094464054595e-05, |
|
"loss": 0.7113, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.6912681912681913, |
|
"grad_norm": 0.6010864530745209, |
|
"learning_rate": 1.6544866862900146e-05, |
|
"loss": 0.695, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.6938669438669439, |
|
"grad_norm": 0.5800151181524591, |
|
"learning_rate": 1.6510504602913815e-05, |
|
"loss": 0.6952, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.6964656964656964, |
|
"grad_norm": 0.6157566120129612, |
|
"learning_rate": 1.6476008391089628e-05, |
|
"loss": 0.7105, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.6990644490644491, |
|
"grad_norm": 0.5752568805255817, |
|
"learning_rate": 1.644137893717764e-05, |
|
"loss": 0.7161, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.7016632016632016, |
|
"grad_norm": 0.5767294919201806, |
|
"learning_rate": 1.6406616953669317e-05, |
|
"loss": 0.6926, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7042619542619543, |
|
"grad_norm": 0.6028116817361912, |
|
"learning_rate": 1.637172315578289e-05, |
|
"loss": 0.6878, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.7068607068607069, |
|
"grad_norm": 0.5528228994127236, |
|
"learning_rate": 1.6336698261448632e-05, |
|
"loss": 0.702, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.7094594594594594, |
|
"grad_norm": 0.5213097969559868, |
|
"learning_rate": 1.6301542991294094e-05, |
|
"loss": 0.6947, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.7120582120582121, |
|
"grad_norm": 0.5370559355171456, |
|
"learning_rate": 1.626625806862927e-05, |
|
"loss": 0.6938, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7146569646569647, |
|
"grad_norm": 0.5319720586513772, |
|
"learning_rate": 1.623084421943172e-05, |
|
"loss": 0.691, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.7172557172557172, |
|
"grad_norm": 0.5688111345080934, |
|
"learning_rate": 1.6195302172331624e-05, |
|
"loss": 0.688, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.7198544698544699, |
|
"grad_norm": 0.5580488505367257, |
|
"learning_rate": 1.6159632658596815e-05, |
|
"loss": 0.6832, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.7224532224532224, |
|
"grad_norm": 0.5186643724448894, |
|
"learning_rate": 1.6123836412117702e-05, |
|
"loss": 0.7004, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.725051975051975, |
|
"grad_norm": 0.5624601265524769, |
|
"learning_rate": 1.608791416939219e-05, |
|
"loss": 0.7, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.7276507276507277, |
|
"grad_norm": 0.5541391195602022, |
|
"learning_rate": 1.605186666951052e-05, |
|
"loss": 0.6734, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7302494802494802, |
|
"grad_norm": 0.5332911572620831, |
|
"learning_rate": 1.6015694654140076e-05, |
|
"loss": 0.6896, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.7328482328482329, |
|
"grad_norm": 0.5548630655118624, |
|
"learning_rate": 1.59793988675101e-05, |
|
"loss": 0.7001, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7354469854469855, |
|
"grad_norm": 0.5047178273406415, |
|
"learning_rate": 1.5942980056396394e-05, |
|
"loss": 0.6959, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.738045738045738, |
|
"grad_norm": 0.5934685974493243, |
|
"learning_rate": 1.5906438970105965e-05, |
|
"loss": 0.6904, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7406444906444907, |
|
"grad_norm": 0.5527382042918192, |
|
"learning_rate": 1.5869776360461596e-05, |
|
"loss": 0.6905, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.7432432432432432, |
|
"grad_norm": 0.5528065876901935, |
|
"learning_rate": 1.5832992981786362e-05, |
|
"loss": 0.6906, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7458419958419958, |
|
"grad_norm": 0.5265702785034122, |
|
"learning_rate": 1.5796089590888154e-05, |
|
"loss": 0.6857, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.7484407484407485, |
|
"grad_norm": 0.5438966896525291, |
|
"learning_rate": 1.575906694704407e-05, |
|
"loss": 0.6926, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.751039501039501, |
|
"grad_norm": 0.5388783204520624, |
|
"learning_rate": 1.5721925811984795e-05, |
|
"loss": 0.6973, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.7536382536382537, |
|
"grad_norm": 0.5389485246395991, |
|
"learning_rate": 1.568466694987895e-05, |
|
"loss": 0.6962, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7562370062370062, |
|
"grad_norm": 0.5935682313499335, |
|
"learning_rate": 1.5647291127317353e-05, |
|
"loss": 0.6725, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.7588357588357588, |
|
"grad_norm": 0.5822513257760918, |
|
"learning_rate": 1.560979911329725e-05, |
|
"loss": 0.6926, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7614345114345115, |
|
"grad_norm": 0.591628155657702, |
|
"learning_rate": 1.5572191679206503e-05, |
|
"loss": 0.6724, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.764033264033264, |
|
"grad_norm": 0.5308640431230437, |
|
"learning_rate": 1.5534469598807703e-05, |
|
"loss": 0.693, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7666320166320166, |
|
"grad_norm": 0.5305514116153979, |
|
"learning_rate": 1.5496633648222246e-05, |
|
"loss": 0.6781, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 0.5426925004836652, |
|
"learning_rate": 1.54586846059144e-05, |
|
"loss": 0.7164, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7718295218295218, |
|
"grad_norm": 0.523224567107794, |
|
"learning_rate": 1.542062325267525e-05, |
|
"loss": 0.6934, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.7744282744282744, |
|
"grad_norm": 0.5414111517541614, |
|
"learning_rate": 1.5382450371606643e-05, |
|
"loss": 0.6829, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.777027027027027, |
|
"grad_norm": 0.5960821029242948, |
|
"learning_rate": 1.534416674810509e-05, |
|
"loss": 0.6924, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.7796257796257796, |
|
"grad_norm": 0.5781538636667527, |
|
"learning_rate": 1.5305773169845598e-05, |
|
"loss": 0.695, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7822245322245323, |
|
"grad_norm": 0.5764921412336972, |
|
"learning_rate": 1.5267270426765445e-05, |
|
"loss": 0.6887, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.7848232848232848, |
|
"grad_norm": 0.65716343258798, |
|
"learning_rate": 1.5228659311047969e-05, |
|
"loss": 0.6934, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7874220374220374, |
|
"grad_norm": 0.5421938070789174, |
|
"learning_rate": 1.5189940617106231e-05, |
|
"loss": 0.6791, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.7900207900207901, |
|
"grad_norm": 0.5269524597532019, |
|
"learning_rate": 1.5151115141566686e-05, |
|
"loss": 0.6901, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.7926195426195426, |
|
"grad_norm": 0.5653499412081372, |
|
"learning_rate": 1.5112183683252797e-05, |
|
"loss": 0.6915, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.7952182952182952, |
|
"grad_norm": 0.5439207742834801, |
|
"learning_rate": 1.5073147043168589e-05, |
|
"loss": 0.6775, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.7978170478170478, |
|
"grad_norm": 0.5489788736531123, |
|
"learning_rate": 1.5034006024482164e-05, |
|
"loss": 0.7036, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.8004158004158004, |
|
"grad_norm": 0.5550872564267669, |
|
"learning_rate": 1.4994761432509201e-05, |
|
"loss": 0.6959, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.803014553014553, |
|
"grad_norm": 0.5490474276501166, |
|
"learning_rate": 1.4955414074696369e-05, |
|
"loss": 0.6906, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.8056133056133056, |
|
"grad_norm": 0.5125906404793841, |
|
"learning_rate": 1.4915964760604698e-05, |
|
"loss": 0.6828, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8082120582120582, |
|
"grad_norm": 0.5203971242963676, |
|
"learning_rate": 1.4876414301892963e-05, |
|
"loss": 0.6825, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.8108108108108109, |
|
"grad_norm": 0.5658527313991132, |
|
"learning_rate": 1.4836763512300946e-05, |
|
"loss": 0.6928, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.8134095634095634, |
|
"grad_norm": 0.535836579476803, |
|
"learning_rate": 1.4797013207632718e-05, |
|
"loss": 0.7018, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.816008316008316, |
|
"grad_norm": 0.5645034857163748, |
|
"learning_rate": 1.475716420573984e-05, |
|
"loss": 0.685, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.8186070686070686, |
|
"grad_norm": 0.5217450999852206, |
|
"learning_rate": 1.4717217326504542e-05, |
|
"loss": 0.6849, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.8212058212058212, |
|
"grad_norm": 0.5480983222260942, |
|
"learning_rate": 1.467717339182286e-05, |
|
"loss": 0.7002, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8238045738045738, |
|
"grad_norm": 0.542270358042675, |
|
"learning_rate": 1.4637033225587707e-05, |
|
"loss": 0.6694, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.8264033264033264, |
|
"grad_norm": 0.5543644512440328, |
|
"learning_rate": 1.4596797653671947e-05, |
|
"loss": 0.6921, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.829002079002079, |
|
"grad_norm": 0.5568668245488615, |
|
"learning_rate": 1.4556467503911376e-05, |
|
"loss": 0.6852, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.8316008316008316, |
|
"grad_norm": 0.5656786288010156, |
|
"learning_rate": 1.4516043606087712e-05, |
|
"loss": 0.6988, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8341995841995842, |
|
"grad_norm": 0.5127153017778545, |
|
"learning_rate": 1.4475526791911516e-05, |
|
"loss": 0.6993, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.8367983367983368, |
|
"grad_norm": 0.5775499987771998, |
|
"learning_rate": 1.4434917895005071e-05, |
|
"loss": 0.695, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.8393970893970893, |
|
"grad_norm": 0.5330354393061656, |
|
"learning_rate": 1.4394217750885233e-05, |
|
"loss": 0.701, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.841995841995842, |
|
"grad_norm": 0.5419503755472291, |
|
"learning_rate": 1.4353427196946257e-05, |
|
"loss": 0.6913, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8445945945945946, |
|
"grad_norm": 0.5311784231584057, |
|
"learning_rate": 1.4312547072442544e-05, |
|
"loss": 0.674, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.8471933471933472, |
|
"grad_norm": 0.5563211583759982, |
|
"learning_rate": 1.4271578218471392e-05, |
|
"loss": 0.6907, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.8497920997920998, |
|
"grad_norm": 0.543414093186935, |
|
"learning_rate": 1.4230521477955677e-05, |
|
"loss": 0.6933, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.8523908523908524, |
|
"grad_norm": 0.5240436124612864, |
|
"learning_rate": 1.418937769562652e-05, |
|
"loss": 0.686, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.854989604989605, |
|
"grad_norm": 0.5430088145666387, |
|
"learning_rate": 1.4148147718005907e-05, |
|
"loss": 0.6896, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.8575883575883576, |
|
"grad_norm": 0.5458623547814142, |
|
"learning_rate": 1.4106832393389258e-05, |
|
"loss": 0.6908, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8601871101871101, |
|
"grad_norm": 0.5634088926920675, |
|
"learning_rate": 1.4065432571827992e-05, |
|
"loss": 0.7065, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.8627858627858628, |
|
"grad_norm": 0.5501420194013241, |
|
"learning_rate": 1.4023949105112034e-05, |
|
"loss": 0.6988, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8653846153846154, |
|
"grad_norm": 0.545143784026079, |
|
"learning_rate": 1.3982382846752265e-05, |
|
"loss": 0.7001, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.867983367983368, |
|
"grad_norm": 0.5514351435035997, |
|
"learning_rate": 1.3940734651963008e-05, |
|
"loss": 0.6864, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8705821205821206, |
|
"grad_norm": 0.578025130173629, |
|
"learning_rate": 1.3899005377644389e-05, |
|
"loss": 0.6951, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.8731808731808732, |
|
"grad_norm": 0.5486337422568206, |
|
"learning_rate": 1.3857195882364717e-05, |
|
"loss": 0.6875, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8757796257796258, |
|
"grad_norm": 0.5987234793423925, |
|
"learning_rate": 1.3815307026342847e-05, |
|
"loss": 0.6895, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.8783783783783784, |
|
"grad_norm": 0.5723737998517776, |
|
"learning_rate": 1.3773339671430441e-05, |
|
"loss": 0.6934, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8809771309771309, |
|
"grad_norm": 0.5157754043967531, |
|
"learning_rate": 1.3731294681094257e-05, |
|
"loss": 0.7008, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.8835758835758836, |
|
"grad_norm": 0.5484329818025037, |
|
"learning_rate": 1.3689172920398385e-05, |
|
"loss": 0.6588, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8861746361746362, |
|
"grad_norm": 0.5313559372585769, |
|
"learning_rate": 1.3646975255986448e-05, |
|
"loss": 0.682, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.8887733887733887, |
|
"grad_norm": 0.5576768736741219, |
|
"learning_rate": 1.3604702556063751e-05, |
|
"loss": 0.6964, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.8913721413721414, |
|
"grad_norm": 0.5719657285080421, |
|
"learning_rate": 1.3562355690379455e-05, |
|
"loss": 0.7023, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.893970893970894, |
|
"grad_norm": 0.5445374932912297, |
|
"learning_rate": 1.3519935530208642e-05, |
|
"loss": 0.6969, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.8965696465696466, |
|
"grad_norm": 0.5332385586498977, |
|
"learning_rate": 1.347744294833442e-05, |
|
"loss": 0.7009, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.8991683991683992, |
|
"grad_norm": 0.5518972270290637, |
|
"learning_rate": 1.3434878819029949e-05, |
|
"loss": 0.6783, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9017671517671517, |
|
"grad_norm": 0.5111471501880547, |
|
"learning_rate": 1.3392244018040459e-05, |
|
"loss": 0.6733, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.9043659043659044, |
|
"grad_norm": 0.5204670594020995, |
|
"learning_rate": 1.3349539422565227e-05, |
|
"loss": 0.6768, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.906964656964657, |
|
"grad_norm": 0.5185707128359598, |
|
"learning_rate": 1.3306765911239536e-05, |
|
"loss": 0.6836, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.9095634095634095, |
|
"grad_norm": 0.5371823929518809, |
|
"learning_rate": 1.3263924364116598e-05, |
|
"loss": 0.6757, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9121621621621622, |
|
"grad_norm": 0.5512113239584037, |
|
"learning_rate": 1.3221015662649435e-05, |
|
"loss": 0.6874, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.9147609147609148, |
|
"grad_norm": 0.5413143469260978, |
|
"learning_rate": 1.3178040689672746e-05, |
|
"loss": 0.6903, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9173596673596673, |
|
"grad_norm": 0.5448713291982036, |
|
"learning_rate": 1.3135000329384768e-05, |
|
"loss": 0.6749, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.91995841995842, |
|
"grad_norm": 0.5694338929447293, |
|
"learning_rate": 1.3091895467329043e-05, |
|
"loss": 0.6992, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9225571725571725, |
|
"grad_norm": 0.5680751056661771, |
|
"learning_rate": 1.304872699037623e-05, |
|
"loss": 0.6778, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.9251559251559252, |
|
"grad_norm": 0.5827527502015905, |
|
"learning_rate": 1.3005495786705847e-05, |
|
"loss": 0.6801, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9277546777546778, |
|
"grad_norm": 0.5414753069080744, |
|
"learning_rate": 1.2962202745787993e-05, |
|
"loss": 0.6828, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.9303534303534303, |
|
"grad_norm": 0.5954708948041362, |
|
"learning_rate": 1.2918848758365047e-05, |
|
"loss": 0.6804, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.932952182952183, |
|
"grad_norm": 0.4986624974413991, |
|
"learning_rate": 1.2875434716433364e-05, |
|
"loss": 0.6807, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.9355509355509356, |
|
"grad_norm": 0.512872956001043, |
|
"learning_rate": 1.2831961513224883e-05, |
|
"loss": 0.6758, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9381496881496881, |
|
"grad_norm": 0.5424188290664714, |
|
"learning_rate": 1.278843004318878e-05, |
|
"loss": 0.6684, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.9407484407484408, |
|
"grad_norm": 0.5279446908433959, |
|
"learning_rate": 1.274484120197305e-05, |
|
"loss": 0.6732, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9433471933471933, |
|
"grad_norm": 0.5019944493922829, |
|
"learning_rate": 1.2701195886406094e-05, |
|
"loss": 0.6736, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.9459459459459459, |
|
"grad_norm": 0.5441846652026447, |
|
"learning_rate": 1.2657494994478248e-05, |
|
"loss": 0.6803, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.9485446985446986, |
|
"grad_norm": 0.5027297117206215, |
|
"learning_rate": 1.2613739425323318e-05, |
|
"loss": 0.6916, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.9511434511434511, |
|
"grad_norm": 0.5208646951194689, |
|
"learning_rate": 1.2569930079200083e-05, |
|
"loss": 0.6674, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9537422037422038, |
|
"grad_norm": 0.5229959541344918, |
|
"learning_rate": 1.2526067857473763e-05, |
|
"loss": 0.6808, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.9563409563409564, |
|
"grad_norm": 0.5436749548564572, |
|
"learning_rate": 1.2482153662597478e-05, |
|
"loss": 0.682, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.9589397089397089, |
|
"grad_norm": 0.525934850158874, |
|
"learning_rate": 1.2438188398093689e-05, |
|
"loss": 0.6684, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"grad_norm": 0.5241344769977555, |
|
"learning_rate": 1.2394172968535596e-05, |
|
"loss": 0.6845, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9641372141372141, |
|
"grad_norm": 0.5334714104019, |
|
"learning_rate": 1.235010827952853e-05, |
|
"loss": 0.6752, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.9667359667359667, |
|
"grad_norm": 0.4981678215882384, |
|
"learning_rate": 1.230599523769132e-05, |
|
"loss": 0.6824, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.9693347193347194, |
|
"grad_norm": 0.529971709936305, |
|
"learning_rate": 1.2261834750637648e-05, |
|
"loss": 0.695, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.9719334719334719, |
|
"grad_norm": 0.5288630054972472, |
|
"learning_rate": 1.2217627726957362e-05, |
|
"loss": 0.6833, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.9745322245322245, |
|
"grad_norm": 0.5109511701940944, |
|
"learning_rate": 1.217337507619779e-05, |
|
"loss": 0.6688, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.9771309771309772, |
|
"grad_norm": 0.5251862301897194, |
|
"learning_rate": 1.212907770884503e-05, |
|
"loss": 0.6752, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.9797297297297297, |
|
"grad_norm": 0.5081841213925302, |
|
"learning_rate": 1.2084736536305199e-05, |
|
"loss": 0.6802, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.9823284823284824, |
|
"grad_norm": 0.5199130862705122, |
|
"learning_rate": 1.2040352470885705e-05, |
|
"loss": 0.6585, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9849272349272349, |
|
"grad_norm": 0.5127627640317641, |
|
"learning_rate": 1.1995926425776463e-05, |
|
"loss": 0.6753, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.9875259875259875, |
|
"grad_norm": 0.5057020713500769, |
|
"learning_rate": 1.1951459315031103e-05, |
|
"loss": 0.6823, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9901247401247402, |
|
"grad_norm": 0.5368825129232229, |
|
"learning_rate": 1.1906952053548173e-05, |
|
"loss": 0.6775, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.9927234927234927, |
|
"grad_norm": 0.48766377489485674, |
|
"learning_rate": 1.1862405557052316e-05, |
|
"loss": 0.6731, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.9953222453222453, |
|
"grad_norm": 0.5077162603005374, |
|
"learning_rate": 1.1817820742075418e-05, |
|
"loss": 0.659, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.997920997920998, |
|
"grad_norm": 0.5273871660362291, |
|
"learning_rate": 1.1773198525937758e-05, |
|
"loss": 0.679, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.7630624771118164, |
|
"eval_runtime": 104.7865, |
|
"eval_samples_per_second": 78.35, |
|
"eval_steps_per_second": 1.231, |
|
"step": 1924 |
|
}, |
|
{ |
|
"epoch": 1.0005197505197505, |
|
"grad_norm": 0.7167016181446053, |
|
"learning_rate": 1.1728539826729135e-05, |
|
"loss": 0.6587, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.003118503118503, |
|
"grad_norm": 0.6041178102787718, |
|
"learning_rate": 1.168384556328998e-05, |
|
"loss": 0.6121, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.0057172557172558, |
|
"grad_norm": 0.5921694491638905, |
|
"learning_rate": 1.163911665519244e-05, |
|
"loss": 0.6028, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 1.0083160083160083, |
|
"grad_norm": 0.6013653429933059, |
|
"learning_rate": 1.1594354022721475e-05, |
|
"loss": 0.6156, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.0109147609147608, |
|
"grad_norm": 0.6105316954631931, |
|
"learning_rate": 1.1549558586855909e-05, |
|
"loss": 0.5945, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 1.0135135135135136, |
|
"grad_norm": 0.535707927809498, |
|
"learning_rate": 1.150473126924949e-05, |
|
"loss": 0.5922, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.0161122661122661, |
|
"grad_norm": 0.5556825438783977, |
|
"learning_rate": 1.1459872992211923e-05, |
|
"loss": 0.5953, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 1.0187110187110187, |
|
"grad_norm": 0.5691489620123985, |
|
"learning_rate": 1.1414984678689895e-05, |
|
"loss": 0.599, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.0213097713097714, |
|
"grad_norm": 0.5936266445205919, |
|
"learning_rate": 1.1370067252248085e-05, |
|
"loss": 0.5949, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 1.023908523908524, |
|
"grad_norm": 0.6066096499835257, |
|
"learning_rate": 1.1325121637050161e-05, |
|
"loss": 0.5971, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.0265072765072765, |
|
"grad_norm": 0.5681394506425539, |
|
"learning_rate": 1.128014875783977e-05, |
|
"loss": 0.5864, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 1.0291060291060292, |
|
"grad_norm": 0.5628786880019524, |
|
"learning_rate": 1.1235149539921509e-05, |
|
"loss": 0.592, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.0317047817047817, |
|
"grad_norm": 0.5467263882406993, |
|
"learning_rate": 1.1190124909141877e-05, |
|
"loss": 0.6075, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 1.0343035343035343, |
|
"grad_norm": 0.5440093360987802, |
|
"learning_rate": 1.1145075791870253e-05, |
|
"loss": 0.5988, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.0369022869022868, |
|
"grad_norm": 0.5752544838637329, |
|
"learning_rate": 1.1100003114979803e-05, |
|
"loss": 0.615, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 1.0395010395010396, |
|
"grad_norm": 0.5310516110561704, |
|
"learning_rate": 1.1054907805828427e-05, |
|
"loss": 0.6005, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.042099792099792, |
|
"grad_norm": 0.5432466126208855, |
|
"learning_rate": 1.1009790792239692e-05, |
|
"loss": 0.5866, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 1.0446985446985446, |
|
"grad_norm": 0.5644600605835878, |
|
"learning_rate": 1.0964653002483714e-05, |
|
"loss": 0.5936, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.0472972972972974, |
|
"grad_norm": 0.5272489276362843, |
|
"learning_rate": 1.0919495365258077e-05, |
|
"loss": 0.597, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 1.04989604989605, |
|
"grad_norm": 0.5617695811131541, |
|
"learning_rate": 1.0874318809668717e-05, |
|
"loss": 0.6065, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.0524948024948024, |
|
"grad_norm": 0.5192198208489455, |
|
"learning_rate": 1.0829124265210822e-05, |
|
"loss": 0.6075, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.0550935550935552, |
|
"grad_norm": 0.5440321607158946, |
|
"learning_rate": 1.0783912661749682e-05, |
|
"loss": 0.5993, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.0576923076923077, |
|
"grad_norm": 0.5380219431230713, |
|
"learning_rate": 1.0738684929501577e-05, |
|
"loss": 0.6047, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 1.0602910602910602, |
|
"grad_norm": 0.5309774372945627, |
|
"learning_rate": 1.069344199901464e-05, |
|
"loss": 0.6057, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.062889812889813, |
|
"grad_norm": 0.5317011424510519, |
|
"learning_rate": 1.064818480114969e-05, |
|
"loss": 0.6087, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 1.0654885654885655, |
|
"grad_norm": 0.5636006054084751, |
|
"learning_rate": 1.0602914267061102e-05, |
|
"loss": 0.6016, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.068087318087318, |
|
"grad_norm": 0.5385118881739976, |
|
"learning_rate": 1.0557631328177635e-05, |
|
"loss": 0.6009, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 1.0706860706860706, |
|
"grad_norm": 0.5387163558386009, |
|
"learning_rate": 1.0512336916183282e-05, |
|
"loss": 0.5919, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.0732848232848233, |
|
"grad_norm": 0.5550215843050625, |
|
"learning_rate": 1.046703196299808e-05, |
|
"loss": 0.6032, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 1.0758835758835759, |
|
"grad_norm": 0.5317572302029103, |
|
"learning_rate": 1.042171740075896e-05, |
|
"loss": 0.6061, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.0784823284823284, |
|
"grad_norm": 0.5953262222453396, |
|
"learning_rate": 1.037639416180055e-05, |
|
"loss": 0.5968, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.0810810810810811, |
|
"grad_norm": 0.5468172409024149, |
|
"learning_rate": 1.0331063178635991e-05, |
|
"loss": 0.5942, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.0836798336798337, |
|
"grad_norm": 0.5337399744353215, |
|
"learning_rate": 1.028572538393778e-05, |
|
"loss": 0.5784, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 1.0862785862785862, |
|
"grad_norm": 0.5918184869466311, |
|
"learning_rate": 1.0240381710518544e-05, |
|
"loss": 0.5825, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.088877338877339, |
|
"grad_norm": 0.5415874642045884, |
|
"learning_rate": 1.0195033091311866e-05, |
|
"loss": 0.6081, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 1.0914760914760915, |
|
"grad_norm": 0.5414796664701005, |
|
"learning_rate": 1.0149680459353085e-05, |
|
"loss": 0.5916, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.094074844074844, |
|
"grad_norm": 0.5523534552531586, |
|
"learning_rate": 1.0104324747760103e-05, |
|
"loss": 0.6108, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 1.0966735966735968, |
|
"grad_norm": 0.691647511565078, |
|
"learning_rate": 1.0058966889714192e-05, |
|
"loss": 0.6, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.0992723492723493, |
|
"grad_norm": 0.5912743887434098, |
|
"learning_rate": 1.0013607818440775e-05, |
|
"loss": 0.6006, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 1.1018711018711018, |
|
"grad_norm": 0.5460338682601912, |
|
"learning_rate": 9.968248467190246e-06, |
|
"loss": 0.6017, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.1044698544698546, |
|
"grad_norm": 0.5664759203624757, |
|
"learning_rate": 9.922889769218754e-06, |
|
"loss": 0.6093, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.107068607068607, |
|
"grad_norm": 0.5666035631954701, |
|
"learning_rate": 9.877532657769006e-06, |
|
"loss": 0.5971, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 1.1096673596673596, |
|
"grad_norm": 0.5200286477603056, |
|
"learning_rate": 9.832178066051074e-06, |
|
"loss": 0.5934, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 1.1122661122661124, |
|
"grad_norm": 0.6074145785434165, |
|
"learning_rate": 9.78682692722318e-06, |
|
"loss": 0.5886, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 1.114864864864865, |
|
"grad_norm": 0.5541608986138071, |
|
"learning_rate": 9.741480174372505e-06, |
|
"loss": 0.6028, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 1.1174636174636174, |
|
"grad_norm": 0.5324738113711789, |
|
"learning_rate": 9.696138740495992e-06, |
|
"loss": 0.5963, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.12006237006237, |
|
"grad_norm": 0.5848651297296711, |
|
"learning_rate": 9.650803558481147e-06, |
|
"loss": 0.5807, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 1.1226611226611227, |
|
"grad_norm": 0.5602080935436595, |
|
"learning_rate": 9.605475561086842e-06, |
|
"loss": 0.5888, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 1.1252598752598753, |
|
"grad_norm": 0.5653703789523455, |
|
"learning_rate": 9.560155680924137e-06, |
|
"loss": 0.5916, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 1.1278586278586278, |
|
"grad_norm": 0.5356042555591374, |
|
"learning_rate": 9.514844850437067e-06, |
|
"loss": 0.5877, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 1.1304573804573805, |
|
"grad_norm": 0.5682089324184239, |
|
"learning_rate": 9.469544001883491e-06, |
|
"loss": 0.5867, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.133056133056133, |
|
"grad_norm": 0.5334569975639222, |
|
"learning_rate": 9.424254067315875e-06, |
|
"loss": 0.6082, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 1.1356548856548856, |
|
"grad_norm": 0.6011244426639291, |
|
"learning_rate": 9.378975978562147e-06, |
|
"loss": 0.5934, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 1.1382536382536383, |
|
"grad_norm": 0.578376500402116, |
|
"learning_rate": 9.3337106672065e-06, |
|
"loss": 0.5836, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 1.1408523908523909, |
|
"grad_norm": 0.5588463035978978, |
|
"learning_rate": 9.288459064570239e-06, |
|
"loss": 0.6047, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 1.1434511434511434, |
|
"grad_norm": 0.5657302559519196, |
|
"learning_rate": 9.243222101692617e-06, |
|
"loss": 0.5858, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.1460498960498962, |
|
"grad_norm": 0.583919168737973, |
|
"learning_rate": 9.19800070931168e-06, |
|
"loss": 0.607, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 1.1486486486486487, |
|
"grad_norm": 0.5817136037512669, |
|
"learning_rate": 9.15279581784511e-06, |
|
"loss": 0.589, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 1.1512474012474012, |
|
"grad_norm": 0.538036225434835, |
|
"learning_rate": 9.107608357371092e-06, |
|
"loss": 0.6052, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 1.1538461538461537, |
|
"grad_norm": 0.5265174041614111, |
|
"learning_rate": 9.062439257609165e-06, |
|
"loss": 0.5953, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 1.1564449064449065, |
|
"grad_norm": 0.548975305771916, |
|
"learning_rate": 9.01728944790112e-06, |
|
"loss": 0.5931, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.159043659043659, |
|
"grad_norm": 0.5481727349763981, |
|
"learning_rate": 8.972159857191838e-06, |
|
"loss": 0.5951, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 1.1616424116424116, |
|
"grad_norm": 0.5808749569289294, |
|
"learning_rate": 8.927051414010213e-06, |
|
"loss": 0.5925, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 1.1642411642411643, |
|
"grad_norm": 0.5597977821004193, |
|
"learning_rate": 8.881965046450033e-06, |
|
"loss": 0.5983, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 1.1668399168399168, |
|
"grad_norm": 0.5732606987628547, |
|
"learning_rate": 8.83690168215088e-06, |
|
"loss": 0.6003, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 1.1694386694386694, |
|
"grad_norm": 0.5317615005846271, |
|
"learning_rate": 8.791862248279059e-06, |
|
"loss": 0.6011, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.1720374220374221, |
|
"grad_norm": 0.5175378118538798, |
|
"learning_rate": 8.746847671508506e-06, |
|
"loss": 0.5906, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 1.1746361746361746, |
|
"grad_norm": 0.5827570884628767, |
|
"learning_rate": 8.701858878001731e-06, |
|
"loss": 0.5947, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 1.1772349272349272, |
|
"grad_norm": 0.5832582553430286, |
|
"learning_rate": 8.656896793390763e-06, |
|
"loss": 0.6021, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 1.17983367983368, |
|
"grad_norm": 0.5252465600060544, |
|
"learning_rate": 8.6119623427581e-06, |
|
"loss": 0.5946, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 1.1824324324324325, |
|
"grad_norm": 0.5535883003187351, |
|
"learning_rate": 8.567056450617678e-06, |
|
"loss": 0.5997, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.185031185031185, |
|
"grad_norm": 0.5265834205804355, |
|
"learning_rate": 8.522180040895855e-06, |
|
"loss": 0.5919, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 1.1876299376299375, |
|
"grad_norm": 0.5588080207098279, |
|
"learning_rate": 8.477334036912392e-06, |
|
"loss": 0.599, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 1.1902286902286903, |
|
"grad_norm": 0.5254040822916115, |
|
"learning_rate": 8.432519361361458e-06, |
|
"loss": 0.5947, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 1.1928274428274428, |
|
"grad_norm": 0.5455269093593619, |
|
"learning_rate": 8.387736936292654e-06, |
|
"loss": 0.5939, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 1.1954261954261955, |
|
"grad_norm": 0.5639163666347963, |
|
"learning_rate": 8.342987683092033e-06, |
|
"loss": 0.5949, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.198024948024948, |
|
"grad_norm": 0.6084945001801156, |
|
"learning_rate": 8.298272522463144e-06, |
|
"loss": 0.596, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 1.2006237006237006, |
|
"grad_norm": 0.5836727328380208, |
|
"learning_rate": 8.253592374408095e-06, |
|
"loss": 0.5963, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 1.2032224532224531, |
|
"grad_norm": 0.5454765026633895, |
|
"learning_rate": 8.20894815820862e-06, |
|
"loss": 0.6003, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 1.2058212058212059, |
|
"grad_norm": 0.5490153680134165, |
|
"learning_rate": 8.164340792407159e-06, |
|
"loss": 0.594, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 1.2084199584199584, |
|
"grad_norm": 0.5442982828513857, |
|
"learning_rate": 8.119771194787966e-06, |
|
"loss": 0.5939, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.211018711018711, |
|
"grad_norm": 0.5224447370874735, |
|
"learning_rate": 8.07524028235824e-06, |
|
"loss": 0.5895, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 1.2136174636174637, |
|
"grad_norm": 0.5648657256694566, |
|
"learning_rate": 8.030748971329224e-06, |
|
"loss": 0.6101, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 1.2162162162162162, |
|
"grad_norm": 0.5616352744995109, |
|
"learning_rate": 7.986298177097377e-06, |
|
"loss": 0.6042, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 1.2188149688149688, |
|
"grad_norm": 0.5771472821711124, |
|
"learning_rate": 7.941888814225545e-06, |
|
"loss": 0.5987, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 1.2214137214137215, |
|
"grad_norm": 0.5512099431582866, |
|
"learning_rate": 7.897521796424129e-06, |
|
"loss": 0.6056, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.224012474012474, |
|
"grad_norm": 0.5098551231289585, |
|
"learning_rate": 7.853198036532288e-06, |
|
"loss": 0.6031, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 1.2266112266112266, |
|
"grad_norm": 0.5717033338675165, |
|
"learning_rate": 7.80891844649917e-06, |
|
"loss": 0.592, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.2292099792099793, |
|
"grad_norm": 0.5550621676748785, |
|
"learning_rate": 7.764683937365134e-06, |
|
"loss": 0.5877, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 1.2318087318087318, |
|
"grad_norm": 0.5823877655186512, |
|
"learning_rate": 7.720495419243014e-06, |
|
"loss": 0.6226, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 1.2344074844074844, |
|
"grad_norm": 0.547937317341142, |
|
"learning_rate": 7.676353801299388e-06, |
|
"loss": 0.5936, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.237006237006237, |
|
"grad_norm": 0.5341568281577489, |
|
"learning_rate": 7.63225999173589e-06, |
|
"loss": 0.5899, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 1.2396049896049897, |
|
"grad_norm": 0.5432593899885211, |
|
"learning_rate": 7.5882148977704914e-06, |
|
"loss": 0.602, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 1.2422037422037422, |
|
"grad_norm": 0.5421541547325941, |
|
"learning_rate": 7.544219425618862e-06, |
|
"loss": 0.5858, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 1.2448024948024947, |
|
"grad_norm": 0.5370495755166194, |
|
"learning_rate": 7.500274480475716e-06, |
|
"loss": 0.6071, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 1.2474012474012475, |
|
"grad_norm": 0.5449432404989222, |
|
"learning_rate": 7.45638096649619e-06, |
|
"loss": 0.5972, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.5660658221047755, |
|
"learning_rate": 7.412539786777234e-06, |
|
"loss": 0.5943, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 1.2525987525987525, |
|
"grad_norm": 0.5247570244603685, |
|
"learning_rate": 7.3687518433390414e-06, |
|
"loss": 0.5924, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 1.255197505197505, |
|
"grad_norm": 0.5207188548015057, |
|
"learning_rate": 7.325018037106474e-06, |
|
"loss": 0.5917, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 1.2577962577962578, |
|
"grad_norm": 0.550416355369781, |
|
"learning_rate": 7.281339267890548e-06, |
|
"loss": 0.5857, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 1.2603950103950103, |
|
"grad_norm": 0.5155652437752087, |
|
"learning_rate": 7.237716434369899e-06, |
|
"loss": 0.5946, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.262993762993763, |
|
"grad_norm": 0.5446737196259831, |
|
"learning_rate": 7.1941504340723e-06, |
|
"loss": 0.602, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 1.2655925155925156, |
|
"grad_norm": 0.5241476632554306, |
|
"learning_rate": 7.150642163356207e-06, |
|
"loss": 0.5848, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 1.2681912681912682, |
|
"grad_norm": 0.5368234251185683, |
|
"learning_rate": 7.107192517392296e-06, |
|
"loss": 0.593, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 1.2707900207900207, |
|
"grad_norm": 0.5609013153184366, |
|
"learning_rate": 7.063802390145058e-06, |
|
"loss": 0.5926, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 1.2733887733887734, |
|
"grad_norm": 0.5601427654877619, |
|
"learning_rate": 7.020472674354399e-06, |
|
"loss": 0.5955, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.275987525987526, |
|
"grad_norm": 0.5573446244898519, |
|
"learning_rate": 6.9772042615172855e-06, |
|
"loss": 0.596, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 1.2785862785862787, |
|
"grad_norm": 0.5409848408904747, |
|
"learning_rate": 6.933998041869381e-06, |
|
"loss": 0.5935, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 1.2811850311850312, |
|
"grad_norm": 0.535048843399521, |
|
"learning_rate": 6.890854904366749e-06, |
|
"loss": 0.5887, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 1.2837837837837838, |
|
"grad_norm": 0.5178312289683921, |
|
"learning_rate": 6.8477757366675526e-06, |
|
"loss": 0.5915, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 1.2863825363825363, |
|
"grad_norm": 0.5746689510719801, |
|
"learning_rate": 6.804761425113796e-06, |
|
"loss": 0.5952, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.288981288981289, |
|
"grad_norm": 0.5494461776895628, |
|
"learning_rate": 6.761812854713079e-06, |
|
"loss": 0.5905, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 1.2915800415800416, |
|
"grad_norm": 0.5437436470618007, |
|
"learning_rate": 6.718930909120404e-06, |
|
"loss": 0.5881, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 1.2941787941787941, |
|
"grad_norm": 0.5670019639541221, |
|
"learning_rate": 6.676116470619988e-06, |
|
"loss": 0.5945, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 1.2967775467775469, |
|
"grad_norm": 0.5754824812127866, |
|
"learning_rate": 6.633370420107097e-06, |
|
"loss": 0.5975, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 1.2993762993762994, |
|
"grad_norm": 0.548088254326578, |
|
"learning_rate": 6.590693637069938e-06, |
|
"loss": 0.5966, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.301975051975052, |
|
"grad_norm": 0.5243431037903644, |
|
"learning_rate": 6.548086999571566e-06, |
|
"loss": 0.5938, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 1.3045738045738045, |
|
"grad_norm": 0.565611456496591, |
|
"learning_rate": 6.505551384231801e-06, |
|
"loss": 0.592, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 1.3071725571725572, |
|
"grad_norm": 0.561890682944344, |
|
"learning_rate": 6.463087666209203e-06, |
|
"loss": 0.5818, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 1.3097713097713097, |
|
"grad_norm": 0.5872998314400075, |
|
"learning_rate": 6.420696719183072e-06, |
|
"loss": 0.5849, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 1.3123700623700625, |
|
"grad_norm": 0.5165776292315633, |
|
"learning_rate": 6.378379415335456e-06, |
|
"loss": 0.5913, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 1.314968814968815, |
|
"grad_norm": 0.5271295585343699, |
|
"learning_rate": 6.336136625333218e-06, |
|
"loss": 0.6001, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 1.3175675675675675, |
|
"grad_norm": 0.5541093558682746, |
|
"learning_rate": 6.293969218310122e-06, |
|
"loss": 0.5986, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 1.32016632016632, |
|
"grad_norm": 0.533798380054359, |
|
"learning_rate": 6.251878061848948e-06, |
|
"loss": 0.5759, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 1.3227650727650728, |
|
"grad_norm": 0.5573470744869463, |
|
"learning_rate": 6.209864021963638e-06, |
|
"loss": 0.5879, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 1.3253638253638254, |
|
"grad_norm": 0.5493774653074921, |
|
"learning_rate": 6.16792796308148e-06, |
|
"loss": 0.6017, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 1.3279625779625779, |
|
"grad_norm": 0.582071520963931, |
|
"learning_rate": 6.126070748025332e-06, |
|
"loss": 0.6025, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 1.3305613305613306, |
|
"grad_norm": 0.5144065189940694, |
|
"learning_rate": 6.084293237995855e-06, |
|
"loss": 0.5827, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 1.3331600831600832, |
|
"grad_norm": 0.5139551250609696, |
|
"learning_rate": 6.042596292553803e-06, |
|
"loss": 0.585, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 1.3357588357588357, |
|
"grad_norm": 0.512609402226577, |
|
"learning_rate": 6.0009807696023385e-06, |
|
"loss": 0.5865, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 1.3383575883575882, |
|
"grad_norm": 0.5648181415304806, |
|
"learning_rate": 5.959447525369375e-06, |
|
"loss": 0.585, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 1.340956340956341, |
|
"grad_norm": 0.4961717842990021, |
|
"learning_rate": 5.917997414389966e-06, |
|
"loss": 0.6065, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 1.3435550935550935, |
|
"grad_norm": 0.5369467703030896, |
|
"learning_rate": 5.87663128948872e-06, |
|
"loss": 0.586, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 1.3461538461538463, |
|
"grad_norm": 0.5805263679243358, |
|
"learning_rate": 5.835350001762258e-06, |
|
"loss": 0.5793, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 1.3487525987525988, |
|
"grad_norm": 0.5472679842232231, |
|
"learning_rate": 5.794154400561698e-06, |
|
"loss": 0.5936, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 1.3513513513513513, |
|
"grad_norm": 0.543074701262151, |
|
"learning_rate": 5.753045333475181e-06, |
|
"loss": 0.6068, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.3539501039501038, |
|
"grad_norm": 0.501759332855818, |
|
"learning_rate": 5.712023646310429e-06, |
|
"loss": 0.5923, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 1.3565488565488566, |
|
"grad_norm": 0.5627676287799811, |
|
"learning_rate": 5.671090183077355e-06, |
|
"loss": 0.5908, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 1.3591476091476091, |
|
"grad_norm": 0.5396141346304916, |
|
"learning_rate": 5.630245785970676e-06, |
|
"loss": 0.5835, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 1.3617463617463619, |
|
"grad_norm": 0.5237498968336539, |
|
"learning_rate": 5.589491295352607e-06, |
|
"loss": 0.6023, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 1.3643451143451144, |
|
"grad_norm": 0.5714671269121867, |
|
"learning_rate": 5.548827549735565e-06, |
|
"loss": 0.5875, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 1.366943866943867, |
|
"grad_norm": 0.5420021477246512, |
|
"learning_rate": 5.508255385764908e-06, |
|
"loss": 0.5962, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 1.3695426195426195, |
|
"grad_norm": 0.5213053662389652, |
|
"learning_rate": 5.46777563820172e-06, |
|
"loss": 0.5775, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 1.3721413721413722, |
|
"grad_norm": 0.5940088174557574, |
|
"learning_rate": 5.427389139905661e-06, |
|
"loss": 0.5925, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 1.3747401247401247, |
|
"grad_norm": 0.5222712044920537, |
|
"learning_rate": 5.387096721817799e-06, |
|
"loss": 0.5939, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 1.3773388773388773, |
|
"grad_norm": 0.533759193353919, |
|
"learning_rate": 5.346899212943529e-06, |
|
"loss": 0.5891, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 1.37993762993763, |
|
"grad_norm": 0.5684539829097972, |
|
"learning_rate": 5.306797440335516e-06, |
|
"loss": 0.598, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 1.3825363825363826, |
|
"grad_norm": 0.577296227438705, |
|
"learning_rate": 5.266792229076683e-06, |
|
"loss": 0.5908, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 1.385135135135135, |
|
"grad_norm": 0.5367239875824128, |
|
"learning_rate": 5.2268844022632255e-06, |
|
"loss": 0.5933, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 1.3877338877338876, |
|
"grad_norm": 0.5512610722369685, |
|
"learning_rate": 5.187074780987682e-06, |
|
"loss": 0.5952, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 1.3903326403326404, |
|
"grad_norm": 0.5045101270772827, |
|
"learning_rate": 5.147364184322035e-06, |
|
"loss": 0.5793, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 1.392931392931393, |
|
"grad_norm": 0.5505398932307536, |
|
"learning_rate": 5.1077534293008635e-06, |
|
"loss": 0.5973, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 1.3955301455301456, |
|
"grad_norm": 0.5618823440106911, |
|
"learning_rate": 5.068243330904543e-06, |
|
"loss": 0.5954, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 1.3981288981288982, |
|
"grad_norm": 0.547767493340866, |
|
"learning_rate": 5.028834702042451e-06, |
|
"loss": 0.5965, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 1.4007276507276507, |
|
"grad_norm": 0.5311004214157612, |
|
"learning_rate": 4.9895283535362744e-06, |
|
"loss": 0.5761, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 1.4033264033264032, |
|
"grad_norm": 0.5522752500932042, |
|
"learning_rate": 4.9503250941032944e-06, |
|
"loss": 0.5858, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.405925155925156, |
|
"grad_norm": 0.5236156885167432, |
|
"learning_rate": 4.911225730339769e-06, |
|
"loss": 0.5949, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 1.4085239085239085, |
|
"grad_norm": 0.5167097373944585, |
|
"learning_rate": 4.872231066704334e-06, |
|
"loss": 0.5961, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 1.411122661122661, |
|
"grad_norm": 0.563080292650056, |
|
"learning_rate": 4.83334190550144e-06, |
|
"loss": 0.5749, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 1.4137214137214138, |
|
"grad_norm": 0.5327245199013042, |
|
"learning_rate": 4.794559046864869e-06, |
|
"loss": 0.5862, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 1.4163201663201663, |
|
"grad_norm": 0.5289775705397598, |
|
"learning_rate": 4.7558832887412445e-06, |
|
"loss": 0.5877, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 1.4189189189189189, |
|
"grad_norm": 0.5162142489701083, |
|
"learning_rate": 4.717315426873631e-06, |
|
"loss": 0.601, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 1.4215176715176714, |
|
"grad_norm": 0.5405562294471293, |
|
"learning_rate": 4.678856254785158e-06, |
|
"loss": 0.6065, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 1.4241164241164241, |
|
"grad_norm": 0.534465583683509, |
|
"learning_rate": 4.6405065637626855e-06, |
|
"loss": 0.5871, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 1.4267151767151767, |
|
"grad_norm": 0.5179677159166826, |
|
"learning_rate": 4.602267142840545e-06, |
|
"loss": 0.5873, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 1.4293139293139294, |
|
"grad_norm": 0.5480531904822096, |
|
"learning_rate": 4.564138778784276e-06, |
|
"loss": 0.5895, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 1.431912681912682, |
|
"grad_norm": 0.5288015351179746, |
|
"learning_rate": 4.526122256074468e-06, |
|
"loss": 0.588, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 1.4345114345114345, |
|
"grad_norm": 0.5232295069807825, |
|
"learning_rate": 4.488218356890594e-06, |
|
"loss": 0.5802, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 1.437110187110187, |
|
"grad_norm": 0.5460332455155361, |
|
"learning_rate": 4.450427861094933e-06, |
|
"loss": 0.5809, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 1.4397089397089398, |
|
"grad_norm": 0.5451685990728384, |
|
"learning_rate": 4.412751546216521e-06, |
|
"loss": 0.5879, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 1.4423076923076923, |
|
"grad_norm": 0.5068743803171689, |
|
"learning_rate": 4.375190187435151e-06, |
|
"loss": 0.5819, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 1.444906444906445, |
|
"grad_norm": 0.602224317860505, |
|
"learning_rate": 4.3377445575654256e-06, |
|
"loss": 0.5808, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 1.4475051975051976, |
|
"grad_norm": 0.5584038619724856, |
|
"learning_rate": 4.300415427040867e-06, |
|
"loss": 0.586, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 1.45010395010395, |
|
"grad_norm": 0.5402693196977059, |
|
"learning_rate": 4.263203563898038e-06, |
|
"loss": 0.5828, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 1.4527027027027026, |
|
"grad_norm": 0.5393224079014185, |
|
"learning_rate": 4.226109733760777e-06, |
|
"loss": 0.5933, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 1.4553014553014554, |
|
"grad_norm": 0.519264378128423, |
|
"learning_rate": 4.1891346998244096e-06, |
|
"loss": 0.5902, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.457900207900208, |
|
"grad_norm": 0.5061001580587425, |
|
"learning_rate": 4.15227922284007e-06, |
|
"loss": 0.5965, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 1.4604989604989604, |
|
"grad_norm": 0.5250324465767509, |
|
"learning_rate": 4.11554406109904e-06, |
|
"loss": 0.5805, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 1.4630977130977132, |
|
"grad_norm": 0.5215691344217085, |
|
"learning_rate": 4.07892997041714e-06, |
|
"loss": 0.5847, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 1.4656964656964657, |
|
"grad_norm": 0.5100852244380407, |
|
"learning_rate": 4.042437704119207e-06, |
|
"loss": 0.5863, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 1.4682952182952183, |
|
"grad_norm": 0.5683610331681553, |
|
"learning_rate": 4.006068013023556e-06, |
|
"loss": 0.5916, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 1.4708939708939708, |
|
"grad_norm": 0.5388381909277385, |
|
"learning_rate": 3.969821645426559e-06, |
|
"loss": 0.5879, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 1.4734927234927235, |
|
"grad_norm": 0.5470869871655806, |
|
"learning_rate": 3.933699347087239e-06, |
|
"loss": 0.579, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 1.476091476091476, |
|
"grad_norm": 0.5410808556361425, |
|
"learning_rate": 3.897701861211924e-06, |
|
"loss": 0.5882, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 1.4786902286902288, |
|
"grad_norm": 0.5445488241193585, |
|
"learning_rate": 3.861829928438975e-06, |
|
"loss": 0.5745, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 1.4812889812889813, |
|
"grad_norm": 0.5601094797596367, |
|
"learning_rate": 3.8260842868235105e-06, |
|
"loss": 0.5845, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 1.4838877338877339, |
|
"grad_norm": 0.5108174266653619, |
|
"learning_rate": 3.7904656718222653e-06, |
|
"loss": 0.5817, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 1.4864864864864864, |
|
"grad_norm": 0.5380258071504634, |
|
"learning_rate": 3.7549748162784216e-06, |
|
"loss": 0.5734, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 1.4890852390852392, |
|
"grad_norm": 0.5056247324728291, |
|
"learning_rate": 3.7196124504065512e-06, |
|
"loss": 0.5837, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 1.4916839916839917, |
|
"grad_norm": 0.5164676599197356, |
|
"learning_rate": 3.684379301777585e-06, |
|
"loss": 0.5737, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 1.4942827442827442, |
|
"grad_norm": 0.5588163356512155, |
|
"learning_rate": 3.649276095303843e-06, |
|
"loss": 0.5896, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 1.496881496881497, |
|
"grad_norm": 0.4785867733453047, |
|
"learning_rate": 3.61430355322413e-06, |
|
"loss": 0.5829, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 1.4994802494802495, |
|
"grad_norm": 0.5236230514577889, |
|
"learning_rate": 3.579462395088856e-06, |
|
"loss": 0.5943, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 1.502079002079002, |
|
"grad_norm": 0.5243126400259197, |
|
"learning_rate": 3.544753337745249e-06, |
|
"loss": 0.583, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 1.5046777546777546, |
|
"grad_norm": 0.5400667626881714, |
|
"learning_rate": 3.5101770953225932e-06, |
|
"loss": 0.5868, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 1.5072765072765073, |
|
"grad_norm": 0.513023426174068, |
|
"learning_rate": 3.475734379217555e-06, |
|
"loss": 0.5833, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.5098752598752598, |
|
"grad_norm": 0.5157665498993713, |
|
"learning_rate": 3.4414258980795202e-06, |
|
"loss": 0.589, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 1.5124740124740126, |
|
"grad_norm": 0.5357953624143235, |
|
"learning_rate": 3.4072523577960304e-06, |
|
"loss": 0.5964, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 1.5150727650727651, |
|
"grad_norm": 0.5527070418499459, |
|
"learning_rate": 3.3732144614782657e-06, |
|
"loss": 0.5837, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 1.5176715176715176, |
|
"grad_norm": 0.5458675642969252, |
|
"learning_rate": 3.339312909446557e-06, |
|
"loss": 0.5796, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 1.5202702702702702, |
|
"grad_norm": 0.5152283307254787, |
|
"learning_rate": 3.305548399215994e-06, |
|
"loss": 0.5788, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 1.5228690228690227, |
|
"grad_norm": 0.530799104763713, |
|
"learning_rate": 3.2719216254820697e-06, |
|
"loss": 0.5838, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 1.5254677754677755, |
|
"grad_norm": 0.5403421099817592, |
|
"learning_rate": 3.2384332801063846e-06, |
|
"loss": 0.5656, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 1.5280665280665282, |
|
"grad_norm": 0.558670507000079, |
|
"learning_rate": 3.205084052102414e-06, |
|
"loss": 0.588, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 1.5306652806652807, |
|
"grad_norm": 0.5087922370395622, |
|
"learning_rate": 3.1718746276213343e-06, |
|
"loss": 0.5852, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 1.5332640332640333, |
|
"grad_norm": 0.5482047516259143, |
|
"learning_rate": 3.1388056899379036e-06, |
|
"loss": 0.5946, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 1.5358627858627858, |
|
"grad_norm": 0.5239979050184048, |
|
"learning_rate": 3.105877919436401e-06, |
|
"loss": 0.5961, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 0.6264561692888647, |
|
"learning_rate": 3.0730919935966262e-06, |
|
"loss": 0.5896, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 1.541060291060291, |
|
"grad_norm": 0.5079701430841874, |
|
"learning_rate": 3.0404485869799684e-06, |
|
"loss": 0.5804, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 1.5436590436590436, |
|
"grad_norm": 0.5227318565543926, |
|
"learning_rate": 3.007948371215521e-06, |
|
"loss": 0.5881, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 1.5462577962577964, |
|
"grad_norm": 0.4870518164479936, |
|
"learning_rate": 2.975592014986265e-06, |
|
"loss": 0.5952, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 1.5488565488565489, |
|
"grad_norm": 0.5650630749989504, |
|
"learning_rate": 2.9433801840153166e-06, |
|
"loss": 0.5836, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 1.5514553014553014, |
|
"grad_norm": 0.5358798465667378, |
|
"learning_rate": 2.9113135410522173e-06, |
|
"loss": 0.5962, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 1.554054054054054, |
|
"grad_norm": 0.5158817153128504, |
|
"learning_rate": 2.8793927458593096e-06, |
|
"loss": 0.57, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 1.5566528066528067, |
|
"grad_norm": 0.5442534042057305, |
|
"learning_rate": 2.8476184551981547e-06, |
|
"loss": 0.5821, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 1.5592515592515592, |
|
"grad_norm": 0.5778592324539092, |
|
"learning_rate": 2.8159913228160242e-06, |
|
"loss": 0.5862, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.561850311850312, |
|
"grad_norm": 0.545713870580316, |
|
"learning_rate": 2.7845119994324567e-06, |
|
"loss": 0.5837, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 1.5644490644490645, |
|
"grad_norm": 0.5027605038010509, |
|
"learning_rate": 2.753181132725846e-06, |
|
"loss": 0.5908, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 1.567047817047817, |
|
"grad_norm": 0.5296784509548438, |
|
"learning_rate": 2.7219993673201483e-06, |
|
"loss": 0.5845, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 1.5696465696465696, |
|
"grad_norm": 0.5434115009675748, |
|
"learning_rate": 2.69096734477159e-06, |
|
"loss": 0.5761, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 1.572245322245322, |
|
"grad_norm": 0.5490780089111318, |
|
"learning_rate": 2.6600857035554805e-06, |
|
"loss": 0.5759, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 1.5748440748440748, |
|
"grad_norm": 0.5434069633378591, |
|
"learning_rate": 2.6293550790530776e-06, |
|
"loss": 0.5856, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 1.5774428274428276, |
|
"grad_norm": 0.5295051711364253, |
|
"learning_rate": 2.5987761035385074e-06, |
|
"loss": 0.5776, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 1.5800415800415801, |
|
"grad_norm": 0.5294382951858779, |
|
"learning_rate": 2.5683494061657698e-06, |
|
"loss": 0.6003, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 1.5826403326403327, |
|
"grad_norm": 0.5487264444738784, |
|
"learning_rate": 2.538075612955775e-06, |
|
"loss": 0.5905, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 1.5852390852390852, |
|
"grad_norm": 0.5151850538678816, |
|
"learning_rate": 2.5079553467834694e-06, |
|
"loss": 0.5945, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 1.5878378378378377, |
|
"grad_norm": 0.5241891607063889, |
|
"learning_rate": 2.4779892273650363e-06, |
|
"loss": 0.5809, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 1.5904365904365905, |
|
"grad_norm": 0.5490608540434708, |
|
"learning_rate": 2.44817787124512e-06, |
|
"loss": 0.5864, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 1.593035343035343, |
|
"grad_norm": 0.5085552474004695, |
|
"learning_rate": 2.4185218917841557e-06, |
|
"loss": 0.5839, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 1.5956340956340958, |
|
"grad_norm": 0.5237874429368263, |
|
"learning_rate": 2.389021899145745e-06, |
|
"loss": 0.583, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 1.5982328482328483, |
|
"grad_norm": 0.5462699535427128, |
|
"learning_rate": 2.3596785002841126e-06, |
|
"loss": 0.582, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 1.6008316008316008, |
|
"grad_norm": 0.5305897465130409, |
|
"learning_rate": 2.330492298931599e-06, |
|
"loss": 0.588, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 1.6034303534303533, |
|
"grad_norm": 0.5283228272742915, |
|
"learning_rate": 2.3014638955862534e-06, |
|
"loss": 0.5895, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 1.6060291060291059, |
|
"grad_norm": 0.5375248398279345, |
|
"learning_rate": 2.2725938874994733e-06, |
|
"loss": 0.5712, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 1.6086278586278586, |
|
"grad_norm": 0.5387607107517319, |
|
"learning_rate": 2.2438828686637193e-06, |
|
"loss": 0.5717, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 1.6112266112266114, |
|
"grad_norm": 0.5333070714415671, |
|
"learning_rate": 2.2153314298002883e-06, |
|
"loss": 0.5869, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.613825363825364, |
|
"grad_norm": 0.5130040335910431, |
|
"learning_rate": 2.1869401583471674e-06, |
|
"loss": 0.6017, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 1.6164241164241164, |
|
"grad_norm": 0.5340427602820074, |
|
"learning_rate": 2.1587096384469465e-06, |
|
"loss": 0.5792, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 1.619022869022869, |
|
"grad_norm": 0.5241334616500023, |
|
"learning_rate": 2.130640450934787e-06, |
|
"loss": 0.5766, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 1.6216216216216215, |
|
"grad_norm": 0.5307440437206696, |
|
"learning_rate": 2.1027331733264877e-06, |
|
"loss": 0.5841, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 1.6242203742203742, |
|
"grad_norm": 0.5111869577979938, |
|
"learning_rate": 2.074988379806593e-06, |
|
"loss": 0.5772, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 1.6268191268191268, |
|
"grad_norm": 0.5232894395546558, |
|
"learning_rate": 2.047406641216583e-06, |
|
"loss": 0.5798, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 1.6294178794178795, |
|
"grad_norm": 0.5427662431229768, |
|
"learning_rate": 2.019988525043125e-06, |
|
"loss": 0.5852, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 1.632016632016632, |
|
"grad_norm": 0.50753581880278, |
|
"learning_rate": 1.992734595406408e-06, |
|
"loss": 0.5756, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 1.6346153846153846, |
|
"grad_norm": 0.5026065449648726, |
|
"learning_rate": 1.965645413048519e-06, |
|
"loss": 0.5756, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 1.637214137214137, |
|
"grad_norm": 0.5135278822828021, |
|
"learning_rate": 1.9387215353219213e-06, |
|
"loss": 0.6112, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 1.6398128898128899, |
|
"grad_norm": 0.5576890383464939, |
|
"learning_rate": 1.9119635161779738e-06, |
|
"loss": 0.5933, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 1.6424116424116424, |
|
"grad_norm": 0.5005852274685088, |
|
"learning_rate": 1.885371906155552e-06, |
|
"loss": 0.575, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 1.6450103950103951, |
|
"grad_norm": 0.552519914509362, |
|
"learning_rate": 1.8589472523697017e-06, |
|
"loss": 0.5832, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 1.6476091476091477, |
|
"grad_norm": 0.5313038853576302, |
|
"learning_rate": 1.832690098500386e-06, |
|
"loss": 0.5799, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 1.6502079002079002, |
|
"grad_norm": 0.5307430737754275, |
|
"learning_rate": 1.8066009847813171e-06, |
|
"loss": 0.5933, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 1.6528066528066527, |
|
"grad_norm": 0.5092540679964042, |
|
"learning_rate": 1.7806804479888151e-06, |
|
"loss": 0.5857, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 1.6554054054054053, |
|
"grad_norm": 0.527226302557591, |
|
"learning_rate": 1.7549290214307824e-06, |
|
"loss": 0.5826, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 1.658004158004158, |
|
"grad_norm": 0.5117048033630656, |
|
"learning_rate": 1.729347234935721e-06, |
|
"loss": 0.5866, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 1.6606029106029108, |
|
"grad_norm": 0.5338263684846878, |
|
"learning_rate": 1.7039356148418363e-06, |
|
"loss": 0.5815, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 1.6632016632016633, |
|
"grad_norm": 0.514319228309042, |
|
"learning_rate": 1.6786946839862127e-06, |
|
"loss": 0.5704, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.6658004158004158, |
|
"grad_norm": 0.5093476576764645, |
|
"learning_rate": 1.6536249616940414e-06, |
|
"loss": 0.5861, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 1.6683991683991684, |
|
"grad_norm": 0.507273641369432, |
|
"learning_rate": 1.6287269637679538e-06, |
|
"loss": 0.5827, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 1.6709979209979209, |
|
"grad_norm": 0.5145115612127696, |
|
"learning_rate": 1.604001202477391e-06, |
|
"loss": 0.5768, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 1.6735966735966736, |
|
"grad_norm": 0.5149567719955702, |
|
"learning_rate": 1.5794481865480805e-06, |
|
"loss": 0.581, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 1.6761954261954262, |
|
"grad_norm": 0.5471640458477693, |
|
"learning_rate": 1.5550684211515544e-06, |
|
"loss": 0.6, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 1.678794178794179, |
|
"grad_norm": 0.5394179199712156, |
|
"learning_rate": 1.5308624078947666e-06, |
|
"loss": 0.5801, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 1.6813929313929314, |
|
"grad_norm": 0.506276316904012, |
|
"learning_rate": 1.5068306448097714e-06, |
|
"loss": 0.5689, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 1.683991683991684, |
|
"grad_norm": 0.5038655785707594, |
|
"learning_rate": 1.4829736263434702e-06, |
|
"loss": 0.5907, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 1.6865904365904365, |
|
"grad_norm": 0.5272673427698651, |
|
"learning_rate": 1.4592918433474424e-06, |
|
"loss": 0.586, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 1.689189189189189, |
|
"grad_norm": 0.5298471417100465, |
|
"learning_rate": 1.4357857830678445e-06, |
|
"loss": 0.5937, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 1.6917879417879418, |
|
"grad_norm": 0.5181589625133596, |
|
"learning_rate": 1.4124559291353878e-06, |
|
"loss": 0.5679, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 1.6943866943866945, |
|
"grad_norm": 0.5012265675078679, |
|
"learning_rate": 1.3893027615553922e-06, |
|
"loss": 0.5657, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 1.696985446985447, |
|
"grad_norm": 0.5568040197261597, |
|
"learning_rate": 1.366326756697892e-06, |
|
"loss": 0.5961, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 1.6995841995841996, |
|
"grad_norm": 0.5718957039548288, |
|
"learning_rate": 1.3435283872878623e-06, |
|
"loss": 0.5787, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 1.7021829521829521, |
|
"grad_norm": 0.5432446627166708, |
|
"learning_rate": 1.3209081223954678e-06, |
|
"loss": 0.5808, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 1.7047817047817047, |
|
"grad_norm": 0.5392794940489283, |
|
"learning_rate": 1.2984664274264214e-06, |
|
"loss": 0.571, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 1.7073804573804574, |
|
"grad_norm": 0.5619472295000788, |
|
"learning_rate": 1.2762037641124147e-06, |
|
"loss": 0.5752, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 1.70997920997921, |
|
"grad_norm": 0.5144706465312519, |
|
"learning_rate": 1.2541205905016097e-06, |
|
"loss": 0.5831, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 1.7125779625779627, |
|
"grad_norm": 0.5359125394441557, |
|
"learning_rate": 1.2322173609492138e-06, |
|
"loss": 0.571, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 1.7151767151767152, |
|
"grad_norm": 0.5219188144811212, |
|
"learning_rate": 1.2104945261081425e-06, |
|
"loss": 0.5866, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.7177754677754677, |
|
"grad_norm": 0.48682253952056287, |
|
"learning_rate": 1.1889525329197337e-06, |
|
"loss": 0.5659, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 1.7203742203742203, |
|
"grad_norm": 0.552850063663083, |
|
"learning_rate": 1.1675918246045592e-06, |
|
"loss": 0.5892, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 1.722972972972973, |
|
"grad_norm": 0.5129345549943813, |
|
"learning_rate": 1.1464128406533082e-06, |
|
"loss": 0.5726, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 1.7255717255717256, |
|
"grad_norm": 0.5215339840888276, |
|
"learning_rate": 1.1254160168177363e-06, |
|
"loss": 0.5755, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 1.7281704781704783, |
|
"grad_norm": 0.5346007889564445, |
|
"learning_rate": 1.1046017851017077e-06, |
|
"loss": 0.5934, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 1.7307692307692308, |
|
"grad_norm": 0.5241493511931715, |
|
"learning_rate": 1.083970573752301e-06, |
|
"loss": 0.5687, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 1.7333679833679834, |
|
"grad_norm": 0.5144495715767252, |
|
"learning_rate": 1.0635228072510084e-06, |
|
"loss": 0.5717, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 1.735966735966736, |
|
"grad_norm": 0.555636699696863, |
|
"learning_rate": 1.043258906304987e-06, |
|
"loss": 0.5717, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 1.7385654885654884, |
|
"grad_norm": 0.5398880461538901, |
|
"learning_rate": 1.023179287838414e-06, |
|
"loss": 0.5904, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 1.7411642411642412, |
|
"grad_norm": 0.5443352220533247, |
|
"learning_rate": 1.0032843649839052e-06, |
|
"loss": 0.5633, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 1.743762993762994, |
|
"grad_norm": 0.5176425518248007, |
|
"learning_rate": 9.835745470740143e-07, |
|
"loss": 0.5659, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 1.7463617463617465, |
|
"grad_norm": 0.5093779378294409, |
|
"learning_rate": 9.640502396328133e-07, |
|
"loss": 0.5727, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 1.748960498960499, |
|
"grad_norm": 0.5447675578056951, |
|
"learning_rate": 9.447118443675496e-07, |
|
"loss": 0.57, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 1.7515592515592515, |
|
"grad_norm": 0.5213010548532966, |
|
"learning_rate": 9.255597591603727e-07, |
|
"loss": 0.5714, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 1.754158004158004, |
|
"grad_norm": 0.5398854243774627, |
|
"learning_rate": 9.065943780601583e-07, |
|
"loss": 0.5811, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 1.7567567567567568, |
|
"grad_norm": 0.5056718137516323, |
|
"learning_rate": 8.87816091274396e-07, |
|
"loss": 0.5674, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 1.7593555093555093, |
|
"grad_norm": 0.4990869376715757, |
|
"learning_rate": 8.692252851611594e-07, |
|
"loss": 0.5813, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 1.761954261954262, |
|
"grad_norm": 0.53552516213898, |
|
"learning_rate": 8.508223422211593e-07, |
|
"loss": 0.5814, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 1.7645530145530146, |
|
"grad_norm": 0.5060595322478018, |
|
"learning_rate": 8.326076410898798e-07, |
|
"loss": 0.5653, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 1.7671517671517671, |
|
"grad_norm": 0.5333772606550752, |
|
"learning_rate": 8.145815565297721e-07, |
|
"loss": 0.575, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.7697505197505197, |
|
"grad_norm": 0.5193976811229921, |
|
"learning_rate": 7.967444594225604e-07, |
|
"loss": 0.5821, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 1.7723492723492722, |
|
"grad_norm": 0.4986756303541811, |
|
"learning_rate": 7.790967167615982e-07, |
|
"loss": 0.5878, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 1.774948024948025, |
|
"grad_norm": 0.5443836974981098, |
|
"learning_rate": 7.616386916443308e-07, |
|
"loss": 0.5788, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 1.7775467775467777, |
|
"grad_norm": 0.5018572354435884, |
|
"learning_rate": 7.443707432648117e-07, |
|
"loss": 0.5757, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 1.7801455301455302, |
|
"grad_norm": 0.5221278089522104, |
|
"learning_rate": 7.272932269063182e-07, |
|
"loss": 0.5804, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 1.7827442827442828, |
|
"grad_norm": 0.5309066311243344, |
|
"learning_rate": 7.10406493934046e-07, |
|
"loss": 0.5725, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 1.7853430353430353, |
|
"grad_norm": 0.523703068520167, |
|
"learning_rate": 6.937108917878699e-07, |
|
"loss": 0.5851, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 1.7879417879417878, |
|
"grad_norm": 0.4990677586423772, |
|
"learning_rate": 6.772067639752023e-07, |
|
"loss": 0.5738, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 1.7905405405405406, |
|
"grad_norm": 0.5118619723842658, |
|
"learning_rate": 6.608944500639247e-07, |
|
"loss": 0.5759, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 1.793139293139293, |
|
"grad_norm": 0.5335785839494404, |
|
"learning_rate": 6.447742856754014e-07, |
|
"loss": 0.5765, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 1.7957380457380459, |
|
"grad_norm": 0.5153869448178949, |
|
"learning_rate": 6.288466024775674e-07, |
|
"loss": 0.5696, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 1.7983367983367984, |
|
"grad_norm": 0.48901624634329477, |
|
"learning_rate": 6.131117281781196e-07, |
|
"loss": 0.5674, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 1.800935550935551, |
|
"grad_norm": 0.5102458462355101, |
|
"learning_rate": 5.975699865177575e-07, |
|
"loss": 0.5567, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 1.8035343035343034, |
|
"grad_norm": 0.5723265636044084, |
|
"learning_rate": 5.822216972635331e-07, |
|
"loss": 0.5926, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 1.806133056133056, |
|
"grad_norm": 0.5094713754422882, |
|
"learning_rate": 5.670671762022684e-07, |
|
"loss": 0.5655, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 1.8087318087318087, |
|
"grad_norm": 0.5039283824986392, |
|
"learning_rate": 5.52106735134057e-07, |
|
"loss": 0.5804, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 1.8113305613305615, |
|
"grad_norm": 0.5144912329273745, |
|
"learning_rate": 5.3734068186585e-07, |
|
"loss": 0.5836, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 1.813929313929314, |
|
"grad_norm": 0.5153264587268782, |
|
"learning_rate": 5.227693202051232e-07, |
|
"loss": 0.5847, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 1.8165280665280665, |
|
"grad_norm": 0.5415875159805758, |
|
"learning_rate": 5.083929499536289e-07, |
|
"loss": 0.577, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 1.819126819126819, |
|
"grad_norm": 0.5049450382278391, |
|
"learning_rate": 4.942118669012208e-07, |
|
"loss": 0.5822, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.8217255717255716, |
|
"grad_norm": 0.5051461545256577, |
|
"learning_rate": 4.802263628197756e-07, |
|
"loss": 0.5644, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 1.8243243243243243, |
|
"grad_norm": 0.49660831452275783, |
|
"learning_rate": 4.664367254571855e-07, |
|
"loss": 0.5709, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 1.8269230769230769, |
|
"grad_norm": 0.49492888592400675, |
|
"learning_rate": 4.528432385314352e-07, |
|
"loss": 0.5813, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 1.8295218295218296, |
|
"grad_norm": 0.4865514561195398, |
|
"learning_rate": 4.394461817247753e-07, |
|
"loss": 0.5686, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 1.8321205821205822, |
|
"grad_norm": 0.5225610714631679, |
|
"learning_rate": 4.262458306779571e-07, |
|
"loss": 0.5806, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 1.8347193347193347, |
|
"grad_norm": 0.5340600886285485, |
|
"learning_rate": 4.13242456984565e-07, |
|
"loss": 0.566, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 1.8373180873180872, |
|
"grad_norm": 0.5237864452094557, |
|
"learning_rate": 4.0043632818542845e-07, |
|
"loss": 0.576, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 1.83991683991684, |
|
"grad_norm": 0.5512703745424207, |
|
"learning_rate": 3.878277077631176e-07, |
|
"loss": 0.5786, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 1.8425155925155925, |
|
"grad_norm": 0.47905215589036726, |
|
"learning_rate": 3.754168551365234e-07, |
|
"loss": 0.5772, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 1.8451143451143452, |
|
"grad_norm": 0.549871032924406, |
|
"learning_rate": 3.6320402565551585e-07, |
|
"loss": 0.5779, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 1.8477130977130978, |
|
"grad_norm": 0.5305355260984184, |
|
"learning_rate": 3.5118947059569974e-07, |
|
"loss": 0.5814, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 1.8503118503118503, |
|
"grad_norm": 0.5361560006783601, |
|
"learning_rate": 3.393734371532309e-07, |
|
"loss": 0.5668, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 1.8529106029106028, |
|
"grad_norm": 0.5205186123699476, |
|
"learning_rate": 3.277561684397412e-07, |
|
"loss": 0.5921, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 1.8555093555093554, |
|
"grad_norm": 0.523345976738322, |
|
"learning_rate": 3.163379034773328e-07, |
|
"loss": 0.5889, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 1.8581081081081081, |
|
"grad_norm": 0.5080494795310655, |
|
"learning_rate": 3.0511887719366193e-07, |
|
"loss": 0.5707, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 1.8607068607068609, |
|
"grad_norm": 0.5483892874723801, |
|
"learning_rate": 2.9409932041710056e-07, |
|
"loss": 0.5867, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 1.8633056133056134, |
|
"grad_norm": 0.5028157875697813, |
|
"learning_rate": 2.8327945987199234e-07, |
|
"loss": 0.5735, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 1.865904365904366, |
|
"grad_norm": 0.542730797020021, |
|
"learning_rate": 2.726595181739866e-07, |
|
"loss": 0.5607, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 1.8685031185031185, |
|
"grad_norm": 0.5229545949985743, |
|
"learning_rate": 2.6223971382545623e-07, |
|
"loss": 0.5805, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 1.871101871101871, |
|
"grad_norm": 0.528588646817702, |
|
"learning_rate": 2.5202026121100453e-07, |
|
"loss": 0.5809, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.8737006237006237, |
|
"grad_norm": 0.5053433572695779, |
|
"learning_rate": 2.420013705930524e-07, |
|
"loss": 0.5695, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 1.8762993762993763, |
|
"grad_norm": 0.6061825606893263, |
|
"learning_rate": 2.321832481075137e-07, |
|
"loss": 0.5821, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 1.878898128898129, |
|
"grad_norm": 0.49099322033756715, |
|
"learning_rate": 2.225660957595499e-07, |
|
"loss": 0.5721, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 1.8814968814968815, |
|
"grad_norm": 0.5193378188817532, |
|
"learning_rate": 2.131501114194212e-07, |
|
"loss": 0.568, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 1.884095634095634, |
|
"grad_norm": 0.5088241393434693, |
|
"learning_rate": 2.0393548881841308e-07, |
|
"loss": 0.5937, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 1.8866943866943866, |
|
"grad_norm": 0.5249257025900219, |
|
"learning_rate": 1.9492241754484275e-07, |
|
"loss": 0.5696, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 1.8892931392931391, |
|
"grad_norm": 0.5063117403822439, |
|
"learning_rate": 1.861110830401691e-07, |
|
"loss": 0.5776, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 1.8918918918918919, |
|
"grad_norm": 0.5247823235703282, |
|
"learning_rate": 1.7750166659517009e-07, |
|
"loss": 0.5805, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 1.8944906444906446, |
|
"grad_norm": 0.530597215782001, |
|
"learning_rate": 1.6909434534621683e-07, |
|
"loss": 0.5833, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 1.8970893970893972, |
|
"grad_norm": 0.535514568090153, |
|
"learning_rate": 1.6088929227162542e-07, |
|
"loss": 0.5722, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 1.8996881496881497, |
|
"grad_norm": 0.5247060245361456, |
|
"learning_rate": 1.5288667618810426e-07, |
|
"loss": 0.5761, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 1.9022869022869022, |
|
"grad_norm": 0.5199277346646484, |
|
"learning_rate": 1.4508666174727237e-07, |
|
"loss": 0.5625, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 1.9048856548856548, |
|
"grad_norm": 0.5016083095240303, |
|
"learning_rate": 1.374894094322765e-07, |
|
"loss": 0.5754, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 1.9074844074844075, |
|
"grad_norm": 0.7501294618373303, |
|
"learning_rate": 1.300950755544894e-07, |
|
"loss": 0.5914, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 1.91008316008316, |
|
"grad_norm": 0.5599461348657118, |
|
"learning_rate": 1.2290381225029345e-07, |
|
"loss": 0.5661, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 1.9126819126819128, |
|
"grad_norm": 0.5324363544661017, |
|
"learning_rate": 1.1591576747794875e-07, |
|
"loss": 0.5734, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 1.9152806652806653, |
|
"grad_norm": 0.4949940507745294, |
|
"learning_rate": 1.0913108501454994e-07, |
|
"loss": 0.5691, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 1.9178794178794178, |
|
"grad_norm": 0.5201949950592929, |
|
"learning_rate": 1.0254990445306978e-07, |
|
"loss": 0.5702, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 1.9204781704781704, |
|
"grad_norm": 0.5435052866885572, |
|
"learning_rate": 9.617236119948359e-08, |
|
"loss": 0.5737, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 1.9230769230769231, |
|
"grad_norm": 0.5284636555155561, |
|
"learning_rate": 8.999858646998705e-08, |
|
"loss": 0.5691, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.9256756756756757, |
|
"grad_norm": 0.48133491617434554, |
|
"learning_rate": 8.402870728829283e-08, |
|
"loss": 0.5679, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 1.9282744282744284, |
|
"grad_norm": 0.525724615350644, |
|
"learning_rate": 7.82628464830193e-08, |
|
"loss": 0.5753, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 1.930873180873181, |
|
"grad_norm": 0.5316420453721764, |
|
"learning_rate": 7.270112268516482e-08, |
|
"loss": 0.5769, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 1.9334719334719335, |
|
"grad_norm": 0.5256504385548955, |
|
"learning_rate": 6.734365032566192e-08, |
|
"loss": 0.5818, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.936070686070686, |
|
"grad_norm": 0.5309069869526528, |
|
"learning_rate": 6.219053963303023e-08, |
|
"loss": 0.5734, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 1.9386694386694385, |
|
"grad_norm": 0.5354454111890438, |
|
"learning_rate": 5.724189663110058e-08, |
|
"loss": 0.5809, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.9412681912681913, |
|
"grad_norm": 0.5053818186885027, |
|
"learning_rate": 5.249782313683893e-08, |
|
"loss": 0.5815, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 1.943866943866944, |
|
"grad_norm": 0.5087952292369419, |
|
"learning_rate": 4.7958416758249196e-08, |
|
"loss": 0.572, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.9464656964656966, |
|
"grad_norm": 0.5399877020308321, |
|
"learning_rate": 4.3623770892368136e-08, |
|
"loss": 0.5745, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 1.949064449064449, |
|
"grad_norm": 0.4970333597526285, |
|
"learning_rate": 3.949397472333805e-08, |
|
"loss": 0.5677, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 1.9516632016632016, |
|
"grad_norm": 0.51125198825636, |
|
"learning_rate": 3.556911322057821e-08, |
|
"loss": 0.5807, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 1.9542619542619541, |
|
"grad_norm": 0.5453706817478942, |
|
"learning_rate": 3.184926713703185e-08, |
|
"loss": 0.5696, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 1.956860706860707, |
|
"grad_norm": 0.5259685279196025, |
|
"learning_rate": 2.8334513007507447e-08, |
|
"loss": 0.5874, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 1.9594594594594594, |
|
"grad_norm": 0.5299460024442244, |
|
"learning_rate": 2.5024923147101144e-08, |
|
"loss": 0.5802, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 1.9620582120582122, |
|
"grad_norm": 0.5085531168100673, |
|
"learning_rate": 2.1920565649713454e-08, |
|
"loss": 0.5633, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 1.9646569646569647, |
|
"grad_norm": 0.5066062776158768, |
|
"learning_rate": 1.902150438664374e-08, |
|
"loss": 0.5815, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 1.9672557172557172, |
|
"grad_norm": 0.5304292915843402, |
|
"learning_rate": 1.6327799005277923e-08, |
|
"loss": 0.5818, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 1.9698544698544698, |
|
"grad_norm": 0.5429547592576485, |
|
"learning_rate": 1.3839504927863901e-08, |
|
"loss": 0.5866, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 1.9724532224532223, |
|
"grad_norm": 0.5025697826189373, |
|
"learning_rate": 1.15566733503647e-08, |
|
"loss": 0.5805, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 1.975051975051975, |
|
"grad_norm": 0.5279650041637034, |
|
"learning_rate": 9.479351241410418e-09, |
|
"loss": 0.5664, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.9776507276507278, |
|
"grad_norm": 0.5231820617917791, |
|
"learning_rate": 7.607581341332326e-09, |
|
"loss": 0.5712, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 1.9802494802494803, |
|
"grad_norm": 0.5278123420036012, |
|
"learning_rate": 5.941402161279142e-09, |
|
"loss": 0.5737, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 1.9828482328482329, |
|
"grad_norm": 0.5309246932396601, |
|
"learning_rate": 4.48084798242987e-09, |
|
"loss": 0.5817, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 1.9854469854469854, |
|
"grad_norm": 0.5575208848453578, |
|
"learning_rate": 3.2259488552832676e-09, |
|
"loss": 0.5787, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.988045738045738, |
|
"grad_norm": 0.512653822002805, |
|
"learning_rate": 2.1767305990416653e-09, |
|
"loss": 0.5848, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 1.9906444906444907, |
|
"grad_norm": 0.5291714453451785, |
|
"learning_rate": 1.3332148010836154e-09, |
|
"loss": 0.5831, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 1.9932432432432432, |
|
"grad_norm": 0.5268775795002444, |
|
"learning_rate": 6.954188165131382e-10, |
|
"loss": 0.5895, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 1.995841995841996, |
|
"grad_norm": 0.5255058213556102, |
|
"learning_rate": 2.6335576780667227e-10, |
|
"loss": 0.5759, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 1.9984407484407485, |
|
"grad_norm": 0.5086937679965747, |
|
"learning_rate": 3.7034544543290694e-11, |
|
"loss": 0.5727, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.7488190531730652, |
|
"eval_runtime": 104.7896, |
|
"eval_samples_per_second": 78.347, |
|
"eval_steps_per_second": 1.231, |
|
"step": 3848 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 3848, |
|
"total_flos": 805692915056640.0, |
|
"train_loss": 0.6651840950991657, |
|
"train_runtime": 11913.1492, |
|
"train_samples_per_second": 20.671, |
|
"train_steps_per_second": 0.323 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 3848, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 805692915056640.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|