|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9992688276870583, |
|
"eval_steps": 500, |
|
"global_step": 1025, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0009748964172556666, |
|
"grad_norm": 22.86059565076889, |
|
"learning_rate": 9.70873786407767e-08, |
|
"loss": 1.3065, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004874482086278333, |
|
"grad_norm": 22.08339033526192, |
|
"learning_rate": 4.854368932038835e-07, |
|
"loss": 1.316, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.009748964172556666, |
|
"grad_norm": 8.710899949749317, |
|
"learning_rate": 9.70873786407767e-07, |
|
"loss": 1.2194, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.014623446258834999, |
|
"grad_norm": 8.431065000460007, |
|
"learning_rate": 1.4563106796116506e-06, |
|
"loss": 1.0655, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01949792834511333, |
|
"grad_norm": 3.0129778026962346, |
|
"learning_rate": 1.941747572815534e-06, |
|
"loss": 0.9273, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.024372410431391666, |
|
"grad_norm": 2.4586920682855142, |
|
"learning_rate": 2.427184466019418e-06, |
|
"loss": 0.8841, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.029246892517669997, |
|
"grad_norm": 2.1763561124357684, |
|
"learning_rate": 2.912621359223301e-06, |
|
"loss": 0.853, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03412137460394833, |
|
"grad_norm": 2.2029840594866466, |
|
"learning_rate": 3.398058252427185e-06, |
|
"loss": 0.8341, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03899585669022666, |
|
"grad_norm": 2.446000778730272, |
|
"learning_rate": 3.883495145631068e-06, |
|
"loss": 0.8118, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.043870338776504994, |
|
"grad_norm": 2.451352004117067, |
|
"learning_rate": 4.368932038834952e-06, |
|
"loss": 0.7943, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.04874482086278333, |
|
"grad_norm": 2.4129113465126957, |
|
"learning_rate": 4.854368932038836e-06, |
|
"loss": 0.7768, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05361930294906166, |
|
"grad_norm": 2.3127065430589298, |
|
"learning_rate": 5.3398058252427185e-06, |
|
"loss": 0.7778, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.058493785035339994, |
|
"grad_norm": 2.405876637403882, |
|
"learning_rate": 5.825242718446602e-06, |
|
"loss": 0.7534, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06336826712161833, |
|
"grad_norm": 2.2639828354846387, |
|
"learning_rate": 6.310679611650487e-06, |
|
"loss": 0.7398, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.06824274920789666, |
|
"grad_norm": 2.3243442460154657, |
|
"learning_rate": 6.79611650485437e-06, |
|
"loss": 0.7261, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.073117231294175, |
|
"grad_norm": 2.4227745775819836, |
|
"learning_rate": 7.2815533980582534e-06, |
|
"loss": 0.7168, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.07799171338045333, |
|
"grad_norm": 4.25071801960126, |
|
"learning_rate": 7.766990291262136e-06, |
|
"loss": 0.71, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08286619546673166, |
|
"grad_norm": 2.148825747496477, |
|
"learning_rate": 8.25242718446602e-06, |
|
"loss": 0.7074, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.08774067755300999, |
|
"grad_norm": 2.380339632272055, |
|
"learning_rate": 8.737864077669904e-06, |
|
"loss": 0.7032, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09261515963928832, |
|
"grad_norm": 2.3544801176511916, |
|
"learning_rate": 9.223300970873788e-06, |
|
"loss": 0.7, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.09748964172556666, |
|
"grad_norm": 2.1648774285785013, |
|
"learning_rate": 9.708737864077671e-06, |
|
"loss": 0.6812, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.102364123811845, |
|
"grad_norm": 2.103759384635341, |
|
"learning_rate": 9.999883898929927e-06, |
|
"loss": 0.6822, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.10723860589812333, |
|
"grad_norm": 2.1317773929603576, |
|
"learning_rate": 9.998577823812066e-06, |
|
"loss": 0.6845, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.11211308798440166, |
|
"grad_norm": 2.5038388167151924, |
|
"learning_rate": 9.995820927586548e-06, |
|
"loss": 0.6799, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.11698757007067999, |
|
"grad_norm": 2.2890913021002772, |
|
"learning_rate": 9.99161401043362e-06, |
|
"loss": 0.6893, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.12186205215695832, |
|
"grad_norm": 2.287115199507479, |
|
"learning_rate": 9.985958293397433e-06, |
|
"loss": 0.6801, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.12673653424323666, |
|
"grad_norm": 2.111605184938528, |
|
"learning_rate": 9.978855418031633e-06, |
|
"loss": 0.6761, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.131611016329515, |
|
"grad_norm": 2.3968266021989573, |
|
"learning_rate": 9.970307445922905e-06, |
|
"loss": 0.6659, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.13648549841579333, |
|
"grad_norm": 2.0562856783183383, |
|
"learning_rate": 9.960316858092613e-06, |
|
"loss": 0.6735, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.14135998050207166, |
|
"grad_norm": 2.2596310641814634, |
|
"learning_rate": 9.948886554276689e-06, |
|
"loss": 0.6658, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.14623446258835, |
|
"grad_norm": 2.3851159107252022, |
|
"learning_rate": 9.936019852083982e-06, |
|
"loss": 0.6592, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.15110894467462832, |
|
"grad_norm": 2.0161901435788083, |
|
"learning_rate": 9.921720486033348e-06, |
|
"loss": 0.657, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.15598342676090665, |
|
"grad_norm": 2.127638747482369, |
|
"learning_rate": 9.905992606469708e-06, |
|
"loss": 0.6595, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.16085790884718498, |
|
"grad_norm": 2.0531639641862447, |
|
"learning_rate": 9.888840778359431e-06, |
|
"loss": 0.6515, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.1657323909334633, |
|
"grad_norm": 1.8950315321987827, |
|
"learning_rate": 9.870269979965364e-06, |
|
"loss": 0.6492, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.17060687301974164, |
|
"grad_norm": 1.9060347213321087, |
|
"learning_rate": 9.850285601401899e-06, |
|
"loss": 0.6458, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.17548135510601998, |
|
"grad_norm": 2.074587641369688, |
|
"learning_rate": 9.828893443070527e-06, |
|
"loss": 0.6515, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1803558371922983, |
|
"grad_norm": 2.021646896648869, |
|
"learning_rate": 9.806099713976277e-06, |
|
"loss": 0.6306, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.18523031927857664, |
|
"grad_norm": 2.183178620435626, |
|
"learning_rate": 9.781911029925573e-06, |
|
"loss": 0.6317, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.190104801364855, |
|
"grad_norm": 1.9176655344163256, |
|
"learning_rate": 9.756334411606028e-06, |
|
"loss": 0.6304, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.19497928345113333, |
|
"grad_norm": 2.0283053056639226, |
|
"learning_rate": 9.729377282548696e-06, |
|
"loss": 0.6273, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.19985376553741166, |
|
"grad_norm": 2.2614639215711794, |
|
"learning_rate": 9.701047466973429e-06, |
|
"loss": 0.6227, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.20472824762369, |
|
"grad_norm": 2.048850431237601, |
|
"learning_rate": 9.67135318751792e-06, |
|
"loss": 0.6289, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.20960272970996832, |
|
"grad_norm": 2.0716723168430224, |
|
"learning_rate": 9.640303062851101e-06, |
|
"loss": 0.6235, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.21447721179624665, |
|
"grad_norm": 2.165745603614713, |
|
"learning_rate": 9.607906105171613e-06, |
|
"loss": 0.6254, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.21935169388252498, |
|
"grad_norm": 2.121524753608251, |
|
"learning_rate": 9.574171717592038e-06, |
|
"loss": 0.6215, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.22422617596880332, |
|
"grad_norm": 2.0155505697588207, |
|
"learning_rate": 9.539109691409677e-06, |
|
"loss": 0.6125, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.22910065805508165, |
|
"grad_norm": 2.5497104730670457, |
|
"learning_rate": 9.502730203264656e-06, |
|
"loss": 0.6116, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.23397514014135998, |
|
"grad_norm": 2.0859151782380163, |
|
"learning_rate": 9.465043812186194e-06, |
|
"loss": 0.6026, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.2388496222276383, |
|
"grad_norm": 2.0192086858289673, |
|
"learning_rate": 9.426061456527871e-06, |
|
"loss": 0.601, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.24372410431391664, |
|
"grad_norm": 2.08123508728777, |
|
"learning_rate": 9.385794450792818e-06, |
|
"loss": 0.593, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.24859858640019497, |
|
"grad_norm": 1.970692344326779, |
|
"learning_rate": 9.344254482349702e-06, |
|
"loss": 0.5879, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.25347306848647333, |
|
"grad_norm": 2.0360574888538037, |
|
"learning_rate": 9.301453608040523e-06, |
|
"loss": 0.5884, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.25834755057275166, |
|
"grad_norm": 1.9510711367676767, |
|
"learning_rate": 9.25740425068114e-06, |
|
"loss": 0.5937, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.26322203265903, |
|
"grad_norm": 2.0679042358578057, |
|
"learning_rate": 9.2121191954556e-06, |
|
"loss": 0.5838, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2680965147453083, |
|
"grad_norm": 2.0443212477081008, |
|
"learning_rate": 9.165611586205268e-06, |
|
"loss": 0.5719, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.27297099683158665, |
|
"grad_norm": 2.0408583983165234, |
|
"learning_rate": 9.11789492161388e-06, |
|
"loss": 0.5839, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.277845478917865, |
|
"grad_norm": 1.9767895517261123, |
|
"learning_rate": 9.068983051289589e-06, |
|
"loss": 0.5865, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.2827199610041433, |
|
"grad_norm": 1.881981157978116, |
|
"learning_rate": 9.018890171745156e-06, |
|
"loss": 0.5793, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.28759444309042165, |
|
"grad_norm": 1.9932110390018152, |
|
"learning_rate": 8.967630822277472e-06, |
|
"loss": 0.5808, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.2924689251767, |
|
"grad_norm": 1.9915669044094475, |
|
"learning_rate": 8.915219880747555e-06, |
|
"loss": 0.569, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2973434072629783, |
|
"grad_norm": 2.046268209029942, |
|
"learning_rate": 8.861672559262316e-06, |
|
"loss": 0.5759, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.30221788934925664, |
|
"grad_norm": 1.9535790327085676, |
|
"learning_rate": 8.80700439975928e-06, |
|
"loss": 0.5717, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.30709237143553497, |
|
"grad_norm": 2.280776169449102, |
|
"learning_rate": 8.751231269495604e-06, |
|
"loss": 0.5703, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.3119668535218133, |
|
"grad_norm": 2.0042263363709996, |
|
"learning_rate": 8.694369356442638e-06, |
|
"loss": 0.5668, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.31684133560809163, |
|
"grad_norm": 2.098934601526615, |
|
"learning_rate": 8.636435164587436e-06, |
|
"loss": 0.5532, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.32171581769436997, |
|
"grad_norm": 2.0675738967767496, |
|
"learning_rate": 8.577445509142514e-06, |
|
"loss": 0.5585, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3265902997806483, |
|
"grad_norm": 2.3766255529170692, |
|
"learning_rate": 8.517417511665299e-06, |
|
"loss": 0.5658, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.3314647818669266, |
|
"grad_norm": 2.1910659775597248, |
|
"learning_rate": 8.456368595088647e-06, |
|
"loss": 0.5507, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.33633926395320496, |
|
"grad_norm": 1.8703655986029497, |
|
"learning_rate": 8.394316478663886e-06, |
|
"loss": 0.5406, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.3412137460394833, |
|
"grad_norm": 1.9244217565682906, |
|
"learning_rate": 8.331279172817876e-06, |
|
"loss": 0.542, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3460882281257616, |
|
"grad_norm": 2.0014619556553335, |
|
"learning_rate": 8.26727497392553e-06, |
|
"loss": 0.5392, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.35096271021203995, |
|
"grad_norm": 1.8442031091685414, |
|
"learning_rate": 8.20232245899935e-06, |
|
"loss": 0.5318, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3558371922983183, |
|
"grad_norm": 1.9799423972998886, |
|
"learning_rate": 8.136440480297514e-06, |
|
"loss": 0.5414, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.3607116743845966, |
|
"grad_norm": 2.0577170932676596, |
|
"learning_rate": 8.069648159852059e-06, |
|
"loss": 0.5296, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.36558615647087495, |
|
"grad_norm": 2.037446340676939, |
|
"learning_rate": 8.001964883918793e-06, |
|
"loss": 0.5348, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.3704606385571533, |
|
"grad_norm": 2.0638439360397665, |
|
"learning_rate": 7.933410297350472e-06, |
|
"loss": 0.5229, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3753351206434316, |
|
"grad_norm": 1.9569671735850687, |
|
"learning_rate": 7.864004297894963e-06, |
|
"loss": 0.5275, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.38020960272971, |
|
"grad_norm": 1.9870139157026745, |
|
"learning_rate": 7.793767030419975e-06, |
|
"loss": 0.533, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.3850840848159883, |
|
"grad_norm": 1.9631002389899173, |
|
"learning_rate": 7.722718881066086e-06, |
|
"loss": 0.5245, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.38995856690226666, |
|
"grad_norm": 1.9193362092353006, |
|
"learning_rate": 7.650880471329725e-06, |
|
"loss": 0.5203, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.394833048988545, |
|
"grad_norm": 1.9397872066961752, |
|
"learning_rate": 7.578272652077849e-06, |
|
"loss": 0.5144, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.3997075310748233, |
|
"grad_norm": 1.952394556800401, |
|
"learning_rate": 7.504916497496051e-06, |
|
"loss": 0.5168, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.40458201316110165, |
|
"grad_norm": 1.9410715991990146, |
|
"learning_rate": 7.43083329897184e-06, |
|
"loss": 0.4964, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.40945649524738, |
|
"grad_norm": 1.9371602423339187, |
|
"learning_rate": 7.3560445589148875e-06, |
|
"loss": 0.5136, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4143309773336583, |
|
"grad_norm": 1.9398436533180357, |
|
"learning_rate": 7.2805719845160195e-06, |
|
"loss": 0.5012, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.41920545941993664, |
|
"grad_norm": 1.8404891949351312, |
|
"learning_rate": 7.20443748144678e-06, |
|
"loss": 0.4985, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.424079941506215, |
|
"grad_norm": 1.9219013215637166, |
|
"learning_rate": 7.127663147501377e-06, |
|
"loss": 0.497, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.4289544235924933, |
|
"grad_norm": 2.087372537148497, |
|
"learning_rate": 7.050271266182862e-06, |
|
"loss": 0.4954, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.43382890567877164, |
|
"grad_norm": 1.9732921364375713, |
|
"learning_rate": 6.97228430023543e-06, |
|
"loss": 0.4914, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.43870338776504997, |
|
"grad_norm": 1.8509876619225545, |
|
"learning_rate": 6.893724885124668e-06, |
|
"loss": 0.4816, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4435778698513283, |
|
"grad_norm": 1.876089892137146, |
|
"learning_rate": 6.814615822467691e-06, |
|
"loss": 0.4863, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.44845235193760663, |
|
"grad_norm": 1.956376416630627, |
|
"learning_rate": 6.734980073415038e-06, |
|
"loss": 0.4914, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.45332683402388496, |
|
"grad_norm": 1.831312024979652, |
|
"learning_rate": 6.654840751986282e-06, |
|
"loss": 0.4773, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.4582013161101633, |
|
"grad_norm": 1.9745029935310336, |
|
"learning_rate": 6.574221118361254e-06, |
|
"loss": 0.4843, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4630757981964416, |
|
"grad_norm": 2.090848022257445, |
|
"learning_rate": 6.493144572128852e-06, |
|
"loss": 0.4891, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.46795028028271995, |
|
"grad_norm": 2.0539193178381345, |
|
"learning_rate": 6.411634645495388e-06, |
|
"loss": 0.465, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.4728247623689983, |
|
"grad_norm": 1.8380774504527437, |
|
"learning_rate": 6.329714996454436e-06, |
|
"loss": 0.4717, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.4776992444552766, |
|
"grad_norm": 1.9501438562254991, |
|
"learning_rate": 6.247409401920184e-06, |
|
"loss": 0.47, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.48257372654155495, |
|
"grad_norm": 1.9399214168055752, |
|
"learning_rate": 6.164741750826246e-06, |
|
"loss": 0.4696, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.4874482086278333, |
|
"grad_norm": 1.9438999474698049, |
|
"learning_rate": 6.081736037191998e-06, |
|
"loss": 0.4761, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4923226907141116, |
|
"grad_norm": 2.109991763270123, |
|
"learning_rate": 5.998416353158369e-06, |
|
"loss": 0.467, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.49719717280038994, |
|
"grad_norm": 1.9305942803443825, |
|
"learning_rate": 5.914806881995192e-06, |
|
"loss": 0.4519, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5020716548866683, |
|
"grad_norm": 1.8894659709352353, |
|
"learning_rate": 5.830931891082077e-06, |
|
"loss": 0.4625, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.5069461369729467, |
|
"grad_norm": 1.943600690573164, |
|
"learning_rate": 5.746815724864884e-06, |
|
"loss": 0.4486, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5118206190592249, |
|
"grad_norm": 1.8604126506438579, |
|
"learning_rate": 5.662482797789833e-06, |
|
"loss": 0.4501, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.5166951011455033, |
|
"grad_norm": 1.8945174085661423, |
|
"learning_rate": 5.577957587217281e-06, |
|
"loss": 0.4576, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5215695832317816, |
|
"grad_norm": 1.9454371455405457, |
|
"learning_rate": 5.493264626317252e-06, |
|
"loss": 0.4546, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.52644406531806, |
|
"grad_norm": 1.9257763152448473, |
|
"learning_rate": 5.408428496948761e-06, |
|
"loss": 0.4476, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5313185474043383, |
|
"grad_norm": 1.942601355689532, |
|
"learning_rate": 5.323473822525011e-06, |
|
"loss": 0.4419, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.5361930294906166, |
|
"grad_norm": 1.8380797740924733, |
|
"learning_rate": 5.238425260866524e-06, |
|
"loss": 0.4339, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5410675115768949, |
|
"grad_norm": 1.9856061522664756, |
|
"learning_rate": 5.153307497044291e-06, |
|
"loss": 0.4486, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.5459419936631733, |
|
"grad_norm": 2.0152615373656446, |
|
"learning_rate": 5.068145236215007e-06, |
|
"loss": 0.4361, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5508164757494516, |
|
"grad_norm": 1.8713611793445957, |
|
"learning_rate": 4.982963196450478e-06, |
|
"loss": 0.4388, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.55569095783573, |
|
"grad_norm": 1.9591094110255243, |
|
"learning_rate": 4.8977861015632865e-06, |
|
"loss": 0.4382, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5605654399220082, |
|
"grad_norm": 1.8237556125663812, |
|
"learning_rate": 4.812638673930777e-06, |
|
"loss": 0.4289, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.5654399220082866, |
|
"grad_norm": 1.8090312867754712, |
|
"learning_rate": 4.72754562731947e-06, |
|
"loss": 0.4258, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5703144040945649, |
|
"grad_norm": 1.8257902996618338, |
|
"learning_rate": 4.64253165971197e-06, |
|
"loss": 0.4303, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.5751888861808433, |
|
"grad_norm": 2.0254032476042223, |
|
"learning_rate": 4.557621446138455e-06, |
|
"loss": 0.4202, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5800633682671216, |
|
"grad_norm": 1.9947879579802112, |
|
"learning_rate": 4.47283963151483e-06, |
|
"loss": 0.424, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.5849378503534, |
|
"grad_norm": 1.8982751664240438, |
|
"learning_rate": 4.388210823489616e-06, |
|
"loss": 0.4221, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5898123324396782, |
|
"grad_norm": 1.8999729869459319, |
|
"learning_rate": 4.3037595853016645e-06, |
|
"loss": 0.4162, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.5946868145259566, |
|
"grad_norm": 1.9051454199437885, |
|
"learning_rate": 4.219510428650752e-06, |
|
"loss": 0.4154, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5995612966122349, |
|
"grad_norm": 1.8892938847030196, |
|
"learning_rate": 4.135487806583141e-06, |
|
"loss": 0.4183, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.6044357786985133, |
|
"grad_norm": 1.8391212463048745, |
|
"learning_rate": 4.051716106394162e-06, |
|
"loss": 0.4169, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6093102607847917, |
|
"grad_norm": 1.9164931200596216, |
|
"learning_rate": 3.968219642549876e-06, |
|
"loss": 0.4096, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.6141847428710699, |
|
"grad_norm": 1.8882587070789514, |
|
"learning_rate": 3.885022649629887e-06, |
|
"loss": 0.4089, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6190592249573483, |
|
"grad_norm": 2.0414351606801677, |
|
"learning_rate": 3.8021492752933196e-06, |
|
"loss": 0.4118, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.6239337070436266, |
|
"grad_norm": 1.88948983848305, |
|
"learning_rate": 3.7196235732700546e-06, |
|
"loss": 0.4155, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.628808189129905, |
|
"grad_norm": 1.9185526120999834, |
|
"learning_rate": 3.637469496379201e-06, |
|
"loss": 0.3988, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.6336826712161833, |
|
"grad_norm": 1.8441572398843216, |
|
"learning_rate": 3.5557108895768723e-06, |
|
"loss": 0.4099, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6385571533024617, |
|
"grad_norm": 2.0485958888309796, |
|
"learning_rate": 3.4743714830352604e-06, |
|
"loss": 0.405, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.6434316353887399, |
|
"grad_norm": 1.806992701143073, |
|
"learning_rate": 3.3934748852550353e-06, |
|
"loss": 0.4037, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6483061174750183, |
|
"grad_norm": 1.943526238424205, |
|
"learning_rate": 3.3130445762130485e-06, |
|
"loss": 0.3967, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.6531805995612966, |
|
"grad_norm": 1.8848557764463123, |
|
"learning_rate": 3.2331039005473495e-06, |
|
"loss": 0.3924, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.658055081647575, |
|
"grad_norm": 1.892841917353598, |
|
"learning_rate": 3.1536760607814747e-06, |
|
"loss": 0.3961, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.6629295637338533, |
|
"grad_norm": 1.7833910365050067, |
|
"learning_rate": 3.0747841105899965e-06, |
|
"loss": 0.3973, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6678040458201316, |
|
"grad_norm": 1.8266463006432494, |
|
"learning_rate": 2.9964509481072627e-06, |
|
"loss": 0.3829, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.6726785279064099, |
|
"grad_norm": 1.9794830429310166, |
|
"learning_rate": 2.918699309281292e-06, |
|
"loss": 0.3886, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6775530099926883, |
|
"grad_norm": 1.8605018457416864, |
|
"learning_rate": 2.84155176127473e-06, |
|
"loss": 0.3889, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.6824274920789666, |
|
"grad_norm": 1.8677944161418016, |
|
"learning_rate": 2.765030695914815e-06, |
|
"loss": 0.3878, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.687301974165245, |
|
"grad_norm": 1.7809933219767806, |
|
"learning_rate": 2.689158323194212e-06, |
|
"loss": 0.389, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.6921764562515232, |
|
"grad_norm": 1.9534924091406372, |
|
"learning_rate": 2.6139566648246355e-06, |
|
"loss": 0.38, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6970509383378016, |
|
"grad_norm": 1.8154536663255623, |
|
"learning_rate": 2.5394475478451246e-06, |
|
"loss": 0.3819, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.7019254204240799, |
|
"grad_norm": 1.910488831165307, |
|
"learning_rate": 2.4656525982868106e-06, |
|
"loss": 0.3805, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7067999025103583, |
|
"grad_norm": 1.8313054725277078, |
|
"learning_rate": 2.39259323489603e-06, |
|
"loss": 0.3742, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.7116743845966366, |
|
"grad_norm": 1.8851239942706999, |
|
"learning_rate": 2.320290662917607e-06, |
|
"loss": 0.3726, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.716548866682915, |
|
"grad_norm": 1.8525938618294533, |
|
"learning_rate": 2.2487658679400943e-06, |
|
"loss": 0.3812, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.7214233487691932, |
|
"grad_norm": 1.952969026800498, |
|
"learning_rate": 2.178039609804777e-06, |
|
"loss": 0.3757, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7262978308554716, |
|
"grad_norm": 1.8641758450652162, |
|
"learning_rate": 2.108132416580198e-06, |
|
"loss": 0.3794, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.7311723129417499, |
|
"grad_norm": 1.8518338990761232, |
|
"learning_rate": 2.0390645786039406e-06, |
|
"loss": 0.3713, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7360467950280283, |
|
"grad_norm": 1.8015066231257286, |
|
"learning_rate": 1.9708561425934393e-06, |
|
"loss": 0.3784, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.7409212771143066, |
|
"grad_norm": 1.8409794783180748, |
|
"learning_rate": 1.903526905827474e-06, |
|
"loss": 0.3751, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7457957592005849, |
|
"grad_norm": 1.8641076874515972, |
|
"learning_rate": 1.8370964104000783e-06, |
|
"loss": 0.3746, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.7506702412868632, |
|
"grad_norm": 1.786737876865444, |
|
"learning_rate": 1.7715839375485067e-06, |
|
"loss": 0.3628, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.7555447233731416, |
|
"grad_norm": 1.8446799495571504, |
|
"learning_rate": 1.7070085020569194e-06, |
|
"loss": 0.3644, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.76041920545942, |
|
"grad_norm": 1.751658260263151, |
|
"learning_rate": 1.6433888467374015e-06, |
|
"loss": 0.37, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7652936875456983, |
|
"grad_norm": 1.8477843728803192, |
|
"learning_rate": 1.5807434369899248e-06, |
|
"loss": 0.3628, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.7701681696319767, |
|
"grad_norm": 1.8593690412561963, |
|
"learning_rate": 1.51909045544282e-06, |
|
"loss": 0.3708, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7750426517182549, |
|
"grad_norm": 1.747782138054366, |
|
"learning_rate": 1.4584477966753324e-06, |
|
"loss": 0.3652, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.7799171338045333, |
|
"grad_norm": 1.755667670884649, |
|
"learning_rate": 1.398833062023775e-06, |
|
"loss": 0.3691, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7847916158908116, |
|
"grad_norm": 1.8425636184668681, |
|
"learning_rate": 1.3402635544727992e-06, |
|
"loss": 0.366, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.78966609797709, |
|
"grad_norm": 1.9026395556203364, |
|
"learning_rate": 1.2827562736332555e-06, |
|
"loss": 0.3589, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7945405800633683, |
|
"grad_norm": 1.7133796113318556, |
|
"learning_rate": 1.226327910808116e-06, |
|
"loss": 0.3597, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.7994150621496466, |
|
"grad_norm": 1.8616850952101744, |
|
"learning_rate": 1.1709948441478763e-06, |
|
"loss": 0.3583, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8042895442359249, |
|
"grad_norm": 1.7675149524570417, |
|
"learning_rate": 1.116773133896848e-06, |
|
"loss": 0.3605, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.8091640263222033, |
|
"grad_norm": 1.900735872689697, |
|
"learning_rate": 1.0636785177317255e-06, |
|
"loss": 0.3547, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8140385084084816, |
|
"grad_norm": 1.7353000724393655, |
|
"learning_rate": 1.0117264061937777e-06, |
|
"loss": 0.3543, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.81891299049476, |
|
"grad_norm": 1.8226701532638399, |
|
"learning_rate": 9.60931878215985e-07, |
|
"loss": 0.3523, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8237874725810382, |
|
"grad_norm": 1.7199982449094653, |
|
"learning_rate": 9.113096767464302e-07, |
|
"loss": 0.3572, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.8286619546673166, |
|
"grad_norm": 1.7416045554651556, |
|
"learning_rate": 8.62874204469204e-07, |
|
"loss": 0.3546, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8335364367535949, |
|
"grad_norm": 1.7748467098472374, |
|
"learning_rate": 8.156395196240752e-07, |
|
"loss": 0.3488, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.8384109188398733, |
|
"grad_norm": 1.8256676668006175, |
|
"learning_rate": 7.696193319261242e-07, |
|
"loss": 0.3467, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8432854009261516, |
|
"grad_norm": 1.8827284517179357, |
|
"learning_rate": 7.248269985865514e-07, |
|
"loss": 0.3521, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.84815988301243, |
|
"grad_norm": 1.6781224427131647, |
|
"learning_rate": 6.812755204357857e-07, |
|
"loss": 0.3535, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8530343650987082, |
|
"grad_norm": 1.6995357059191782, |
|
"learning_rate": 6.389775381500351e-07, |
|
"loss": 0.3435, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.8579088471849866, |
|
"grad_norm": 1.7402333449103136, |
|
"learning_rate": 5.979453285823711e-07, |
|
"loss": 0.3443, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.8627833292712649, |
|
"grad_norm": 1.7615298544617708, |
|
"learning_rate": 5.58190801199413e-07, |
|
"loss": 0.3528, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.8676578113575433, |
|
"grad_norm": 1.7343491942185678, |
|
"learning_rate": 5.197254946246416e-07, |
|
"loss": 0.3495, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8725322934438215, |
|
"grad_norm": 1.657555480865734, |
|
"learning_rate": 4.825605732893546e-07, |
|
"loss": 0.3468, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.8774067755300999, |
|
"grad_norm": 1.6499922538828395, |
|
"learning_rate": 4.4670682419221954e-07, |
|
"loss": 0.3396, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8822812576163782, |
|
"grad_norm": 1.7518714860746227, |
|
"learning_rate": 4.121746537683907e-07, |
|
"loss": 0.3504, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.8871557397026566, |
|
"grad_norm": 1.770832100980816, |
|
"learning_rate": 3.789740848690682e-07, |
|
"loss": 0.3518, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.8920302217889349, |
|
"grad_norm": 1.7567597669594073, |
|
"learning_rate": 3.4711475385240057e-07, |
|
"loss": 0.3492, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.8969047038752133, |
|
"grad_norm": 1.7161657861744943, |
|
"learning_rate": 3.1660590778656406e-07, |
|
"loss": 0.3428, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9017791859614915, |
|
"grad_norm": 1.728233513619595, |
|
"learning_rate": 2.8745640176582766e-07, |
|
"loss": 0.3396, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.9066536680477699, |
|
"grad_norm": 1.8381612957565303, |
|
"learning_rate": 2.5967469634039177e-07, |
|
"loss": 0.345, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9115281501340483, |
|
"grad_norm": 1.7243006426458327, |
|
"learning_rate": 2.3326885506074314e-07, |
|
"loss": 0.3465, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.9164026322203266, |
|
"grad_norm": 1.5970738101828363, |
|
"learning_rate": 2.0824654213723038e-07, |
|
"loss": 0.3429, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.921277114306605, |
|
"grad_norm": 1.6533959425871336, |
|
"learning_rate": 1.8461502021555721e-07, |
|
"loss": 0.3389, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.9261515963928832, |
|
"grad_norm": 1.7738912948147607, |
|
"learning_rate": 1.6238114826881868e-07, |
|
"loss": 0.3439, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9310260784791616, |
|
"grad_norm": 1.7429674496591296, |
|
"learning_rate": 1.4155137960670974e-07, |
|
"loss": 0.3389, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.9359005605654399, |
|
"grad_norm": 1.7343774559673155, |
|
"learning_rate": 1.2213176000246852e-07, |
|
"loss": 0.345, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.9407750426517183, |
|
"grad_norm": 1.7876244818652967, |
|
"learning_rate": 1.0412792593811505e-07, |
|
"loss": 0.3426, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.9456495247379966, |
|
"grad_norm": 1.6810981829120015, |
|
"learning_rate": 8.754510296847651e-08, |
|
"loss": 0.3444, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.950524006824275, |
|
"grad_norm": 1.7314637174387817, |
|
"learning_rate": 7.238810420448883e-08, |
|
"loss": 0.3339, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.9553984889105532, |
|
"grad_norm": 1.6057497995450185, |
|
"learning_rate": 5.866132891620746e-08, |
|
"loss": 0.346, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.9602729709968316, |
|
"grad_norm": 1.7119707817106466, |
|
"learning_rate": 4.6368761255930485e-08, |
|
"loss": 0.3379, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.9651474530831099, |
|
"grad_norm": 1.7236174016491694, |
|
"learning_rate": 3.551396910181415e-08, |
|
"loss": 0.3412, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.9700219351693883, |
|
"grad_norm": 1.6591399917697027, |
|
"learning_rate": 2.6100103022306257e-08, |
|
"loss": 0.3433, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.9748964172556666, |
|
"grad_norm": 1.7276142814853213, |
|
"learning_rate": 1.812989536170484e-08, |
|
"loss": 0.3414, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.979770899341945, |
|
"grad_norm": 1.6558936839338774, |
|
"learning_rate": 1.1605659447102568e-08, |
|
"loss": 0.3366, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.9846453814282232, |
|
"grad_norm": 1.6766541339225858, |
|
"learning_rate": 6.529288916952703e-09, |
|
"loss": 0.3331, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.9895198635145016, |
|
"grad_norm": 1.7452767005894518, |
|
"learning_rate": 2.9022571714448776e-09, |
|
"loss": 0.3429, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.9943943456007799, |
|
"grad_norm": 1.8097610241635727, |
|
"learning_rate": 7.256169448560668e-10, |
|
"loss": 0.3415, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9992688276870583, |
|
"grad_norm": 1.758337391190151, |
|
"learning_rate": 0.0, |
|
"loss": 0.3467, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.9992688276870583, |
|
"eval_loss": 0.3359443247318268, |
|
"eval_runtime": 96.9406, |
|
"eval_samples_per_second": 3.115, |
|
"eval_steps_per_second": 0.784, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.9992688276870583, |
|
"step": 1025, |
|
"total_flos": 214561802158080.0, |
|
"train_loss": 0.5001517156275307, |
|
"train_runtime": 26987.6611, |
|
"train_samples_per_second": 1.216, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1025, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 214561802158080.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|