|
{ |
|
"best_metric": 0.12016157805919647, |
|
"best_model_checkpoint": "autotrain-ledky-lrgbn/checkpoint-13926", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 13926, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005385609651012494, |
|
"grad_norm": 1.214734673500061, |
|
"learning_rate": 8.973438621679828e-07, |
|
"loss": 0.696, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.010771219302024989, |
|
"grad_norm": 0.9307985901832581, |
|
"learning_rate": 1.7946877243359655e-06, |
|
"loss": 0.6901, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.016156828953037484, |
|
"grad_norm": 1.196394443511963, |
|
"learning_rate": 2.6920315865039484e-06, |
|
"loss": 0.6818, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.021542438604049977, |
|
"grad_norm": 1.0026447772979736, |
|
"learning_rate": 3.589375448671931e-06, |
|
"loss": 0.6614, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.026928048255062474, |
|
"grad_norm": 1.2615079879760742, |
|
"learning_rate": 4.486719310839914e-06, |
|
"loss": 0.6209, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03231365790607497, |
|
"grad_norm": 2.165177583694458, |
|
"learning_rate": 5.384063173007897e-06, |
|
"loss": 0.5555, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03769926755708746, |
|
"grad_norm": 1.6065446138381958, |
|
"learning_rate": 6.2814070351758795e-06, |
|
"loss": 0.4827, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.043084877208099955, |
|
"grad_norm": 1.00612473487854, |
|
"learning_rate": 7.178750897343862e-06, |
|
"loss": 0.415, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.048470486859112455, |
|
"grad_norm": 1.8842555284500122, |
|
"learning_rate": 8.076094759511846e-06, |
|
"loss": 0.3724, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.05385609651012495, |
|
"grad_norm": 1.875260353088379, |
|
"learning_rate": 8.973438621679828e-06, |
|
"loss": 0.2979, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05924170616113744, |
|
"grad_norm": 2.970935106277466, |
|
"learning_rate": 9.870782483847811e-06, |
|
"loss": 0.2478, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.06462731581214994, |
|
"grad_norm": 0.5585727691650391, |
|
"learning_rate": 1.0768126346015794e-05, |
|
"loss": 0.2456, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07001292546316243, |
|
"grad_norm": 0.5557870864868164, |
|
"learning_rate": 1.1665470208183776e-05, |
|
"loss": 0.2346, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.07539853511417492, |
|
"grad_norm": 5.3282365798950195, |
|
"learning_rate": 1.2562814070351759e-05, |
|
"loss": 0.2192, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08078414476518742, |
|
"grad_norm": 0.8856891989707947, |
|
"learning_rate": 1.3460157932519743e-05, |
|
"loss": 0.2935, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.08616975441619991, |
|
"grad_norm": 0.9343020915985107, |
|
"learning_rate": 1.4357501794687724e-05, |
|
"loss": 0.2117, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0915553640672124, |
|
"grad_norm": 8.641404151916504, |
|
"learning_rate": 1.5254845656855707e-05, |
|
"loss": 0.2256, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.09694097371822491, |
|
"grad_norm": 0.31070542335510254, |
|
"learning_rate": 1.615218951902369e-05, |
|
"loss": 0.2155, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.1023265833692374, |
|
"grad_norm": 2.8748903274536133, |
|
"learning_rate": 1.7049533381191674e-05, |
|
"loss": 0.2272, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.1077121930202499, |
|
"grad_norm": 0.5525055527687073, |
|
"learning_rate": 1.7946877243359657e-05, |
|
"loss": 0.3538, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11309780267126239, |
|
"grad_norm": 23.755407333374023, |
|
"learning_rate": 1.884422110552764e-05, |
|
"loss": 0.1872, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.11848341232227488, |
|
"grad_norm": 34.48551559448242, |
|
"learning_rate": 1.9741564967695622e-05, |
|
"loss": 0.1676, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.12386902197328738, |
|
"grad_norm": 1.0720394849777222, |
|
"learning_rate": 2.0638908829863605e-05, |
|
"loss": 0.258, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.12925463162429987, |
|
"grad_norm": 0.2333178073167801, |
|
"learning_rate": 2.1536252692031587e-05, |
|
"loss": 0.2356, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13464024127531238, |
|
"grad_norm": 28.830148696899414, |
|
"learning_rate": 2.243359655419957e-05, |
|
"loss": 0.2483, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.14002585092632486, |
|
"grad_norm": 0.38484323024749756, |
|
"learning_rate": 2.3330940416367553e-05, |
|
"loss": 0.2576, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.14541146057733736, |
|
"grad_norm": 9.458002090454102, |
|
"learning_rate": 2.4228284278535535e-05, |
|
"loss": 0.2542, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.15079707022834984, |
|
"grad_norm": 0.28468650579452515, |
|
"learning_rate": 2.5125628140703518e-05, |
|
"loss": 0.2451, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.15618267987936235, |
|
"grad_norm": 4.210727691650391, |
|
"learning_rate": 2.6022972002871504e-05, |
|
"loss": 0.2512, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.16156828953037483, |
|
"grad_norm": 0.2801736295223236, |
|
"learning_rate": 2.6920315865039487e-05, |
|
"loss": 0.2245, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.16695389918138734, |
|
"grad_norm": 0.18515126407146454, |
|
"learning_rate": 2.781765972720747e-05, |
|
"loss": 0.2455, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.17233950883239982, |
|
"grad_norm": 24.687742233276367, |
|
"learning_rate": 2.871500358937545e-05, |
|
"loss": 0.1717, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.17772511848341233, |
|
"grad_norm": 0.31296807527542114, |
|
"learning_rate": 2.961234745154343e-05, |
|
"loss": 0.3381, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.1831107281344248, |
|
"grad_norm": 0.2845638692378998, |
|
"learning_rate": 3.0509691313711414e-05, |
|
"loss": 0.2039, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1884963377854373, |
|
"grad_norm": 9.822659492492676, |
|
"learning_rate": 3.14070351758794e-05, |
|
"loss": 0.2695, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.19388194743644982, |
|
"grad_norm": 11.07955265045166, |
|
"learning_rate": 3.230437903804738e-05, |
|
"loss": 0.1941, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.1992675570874623, |
|
"grad_norm": 0.13381847739219666, |
|
"learning_rate": 3.3201722900215365e-05, |
|
"loss": 0.1602, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.2046531667384748, |
|
"grad_norm": 0.3931732177734375, |
|
"learning_rate": 3.409906676238335e-05, |
|
"loss": 0.1306, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2100387763894873, |
|
"grad_norm": 59.89822006225586, |
|
"learning_rate": 3.499641062455133e-05, |
|
"loss": 0.218, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.2154243860404998, |
|
"grad_norm": 2.6214139461517334, |
|
"learning_rate": 3.5893754486719313e-05, |
|
"loss": 0.1886, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22080999569151227, |
|
"grad_norm": 1.7539465427398682, |
|
"learning_rate": 3.6791098348887296e-05, |
|
"loss": 0.1999, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.22619560534252478, |
|
"grad_norm": 0.13422812521457672, |
|
"learning_rate": 3.768844221105528e-05, |
|
"loss": 0.1967, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.23158121499353726, |
|
"grad_norm": 17.16714859008789, |
|
"learning_rate": 3.858578607322326e-05, |
|
"loss": 0.2197, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.23696682464454977, |
|
"grad_norm": 4.463374614715576, |
|
"learning_rate": 3.9483129935391244e-05, |
|
"loss": 0.1428, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.24235243429556225, |
|
"grad_norm": 14.286873817443848, |
|
"learning_rate": 4.038047379755923e-05, |
|
"loss": 0.154, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.24773804394657475, |
|
"grad_norm": 1.0160866975784302, |
|
"learning_rate": 4.127781765972721e-05, |
|
"loss": 0.1008, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.25312365359758726, |
|
"grad_norm": 0.08231418579816818, |
|
"learning_rate": 4.217516152189519e-05, |
|
"loss": 0.1716, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.25850926324859974, |
|
"grad_norm": 0.4267788231372833, |
|
"learning_rate": 4.3072505384063175e-05, |
|
"loss": 0.2282, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2638948728996122, |
|
"grad_norm": 8.705976486206055, |
|
"learning_rate": 4.396984924623116e-05, |
|
"loss": 0.1687, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.26928048255062476, |
|
"grad_norm": 0.08920421451330185, |
|
"learning_rate": 4.486719310839914e-05, |
|
"loss": 0.1566, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.27466609220163724, |
|
"grad_norm": 136.6876220703125, |
|
"learning_rate": 4.576453697056712e-05, |
|
"loss": 0.3009, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.2800517018526497, |
|
"grad_norm": 97.63652038574219, |
|
"learning_rate": 4.6661880832735106e-05, |
|
"loss": 0.1277, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.2854373115036622, |
|
"grad_norm": 0.1028209999203682, |
|
"learning_rate": 4.755922469490309e-05, |
|
"loss": 0.1079, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.29082292115467473, |
|
"grad_norm": 0.1444273144006729, |
|
"learning_rate": 4.845656855707107e-05, |
|
"loss": 0.2049, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.2962085308056872, |
|
"grad_norm": 0.07809091359376907, |
|
"learning_rate": 4.9353912419239054e-05, |
|
"loss": 0.1565, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.3015941404566997, |
|
"grad_norm": 27.804719924926758, |
|
"learning_rate": 4.9972073725365035e-05, |
|
"loss": 0.1187, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.30697975010771217, |
|
"grad_norm": 2.5913608074188232, |
|
"learning_rate": 4.987233703024017e-05, |
|
"loss": 0.2541, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.3123653597587247, |
|
"grad_norm": 0.08927720040082932, |
|
"learning_rate": 4.97726003351153e-05, |
|
"loss": 0.1258, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3177509694097372, |
|
"grad_norm": 11.248839378356934, |
|
"learning_rate": 4.967286363999043e-05, |
|
"loss": 0.2244, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.32313657906074966, |
|
"grad_norm": 4.807493686676025, |
|
"learning_rate": 4.957312694486556e-05, |
|
"loss": 0.1561, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3285221887117622, |
|
"grad_norm": 2.7352848052978516, |
|
"learning_rate": 4.9473390249740684e-05, |
|
"loss": 0.2706, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.3339077983627747, |
|
"grad_norm": 2.721325159072876, |
|
"learning_rate": 4.937365355461582e-05, |
|
"loss": 0.1181, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.33929340801378716, |
|
"grad_norm": 0.08835005760192871, |
|
"learning_rate": 4.927391685949095e-05, |
|
"loss": 0.0849, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.34467901766479964, |
|
"grad_norm": 0.06279947608709335, |
|
"learning_rate": 4.917418016436607e-05, |
|
"loss": 0.0603, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.35006462731581217, |
|
"grad_norm": 5.033148765563965, |
|
"learning_rate": 4.9074443469241206e-05, |
|
"loss": 0.1982, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.35545023696682465, |
|
"grad_norm": 19.948888778686523, |
|
"learning_rate": 4.897470677411633e-05, |
|
"loss": 0.1951, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.36083584661783713, |
|
"grad_norm": 0.15091180801391602, |
|
"learning_rate": 4.887497007899147e-05, |
|
"loss": 0.1967, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.3662214562688496, |
|
"grad_norm": 11.530139923095703, |
|
"learning_rate": 4.8775233383866594e-05, |
|
"loss": 0.2055, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.37160706591986215, |
|
"grad_norm": 1.3459250926971436, |
|
"learning_rate": 4.867549668874172e-05, |
|
"loss": 0.165, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.3769926755708746, |
|
"grad_norm": 0.1419544219970703, |
|
"learning_rate": 4.8575759993616855e-05, |
|
"loss": 0.2761, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.3823782852218871, |
|
"grad_norm": 0.0771302655339241, |
|
"learning_rate": 4.847602329849198e-05, |
|
"loss": 0.1745, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.38776389487289964, |
|
"grad_norm": 7.464870452880859, |
|
"learning_rate": 4.8376286603367116e-05, |
|
"loss": 0.1824, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3931495045239121, |
|
"grad_norm": 8.839171409606934, |
|
"learning_rate": 4.827654990824224e-05, |
|
"loss": 0.2336, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.3985351141749246, |
|
"grad_norm": 2.038132667541504, |
|
"learning_rate": 4.817681321311737e-05, |
|
"loss": 0.3063, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.4039207238259371, |
|
"grad_norm": 1.8243613243103027, |
|
"learning_rate": 4.8077076517992504e-05, |
|
"loss": 0.1202, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.4093063334769496, |
|
"grad_norm": 0.09693591296672821, |
|
"learning_rate": 4.797733982286763e-05, |
|
"loss": 0.1442, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4146919431279621, |
|
"grad_norm": 4.965816020965576, |
|
"learning_rate": 4.787760312774276e-05, |
|
"loss": 0.2029, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.4200775527789746, |
|
"grad_norm": 0.06871023029088974, |
|
"learning_rate": 4.777786643261789e-05, |
|
"loss": 0.0826, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.42546316242998705, |
|
"grad_norm": 0.0709209218621254, |
|
"learning_rate": 4.767812973749302e-05, |
|
"loss": 0.1575, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.4308487720809996, |
|
"grad_norm": 1.0321829319000244, |
|
"learning_rate": 4.7578393042368154e-05, |
|
"loss": 0.1481, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.43623438173201207, |
|
"grad_norm": 0.05471009761095047, |
|
"learning_rate": 4.747865634724328e-05, |
|
"loss": 0.0653, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.44161999138302455, |
|
"grad_norm": 0.5579153299331665, |
|
"learning_rate": 4.737891965211841e-05, |
|
"loss": 0.1818, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.447005601034037, |
|
"grad_norm": 7.742904186248779, |
|
"learning_rate": 4.727918295699354e-05, |
|
"loss": 0.1663, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.45239121068504956, |
|
"grad_norm": 0.26534390449523926, |
|
"learning_rate": 4.717944626186867e-05, |
|
"loss": 0.2548, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.45777682033606204, |
|
"grad_norm": 0.060189589858055115, |
|
"learning_rate": 4.7079709566743796e-05, |
|
"loss": 0.1214, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.4631624299870745, |
|
"grad_norm": 2.2301554679870605, |
|
"learning_rate": 4.697997287161893e-05, |
|
"loss": 0.1254, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.46854803963808705, |
|
"grad_norm": 31.325176239013672, |
|
"learning_rate": 4.688023617649406e-05, |
|
"loss": 0.2186, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.47393364928909953, |
|
"grad_norm": 0.8820040822029114, |
|
"learning_rate": 4.678049948136919e-05, |
|
"loss": 0.1814, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.479319258940112, |
|
"grad_norm": 2.4866137504577637, |
|
"learning_rate": 4.668076278624432e-05, |
|
"loss": 0.1301, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.4847048685911245, |
|
"grad_norm": 0.0820331797003746, |
|
"learning_rate": 4.6581026091119445e-05, |
|
"loss": 0.1845, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.49009047824213703, |
|
"grad_norm": 36.052513122558594, |
|
"learning_rate": 4.648128939599458e-05, |
|
"loss": 0.1782, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.4954760878931495, |
|
"grad_norm": 0.08009187877178192, |
|
"learning_rate": 4.6381552700869706e-05, |
|
"loss": 0.1326, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.500861697544162, |
|
"grad_norm": 1.324997067451477, |
|
"learning_rate": 4.628181600574484e-05, |
|
"loss": 0.2862, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.5062473071951745, |
|
"grad_norm": 0.17933067679405212, |
|
"learning_rate": 4.618207931061996e-05, |
|
"loss": 0.1677, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.511632916846187, |
|
"grad_norm": 0.22597669064998627, |
|
"learning_rate": 4.6082342615495094e-05, |
|
"loss": 0.2077, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.5170185264971995, |
|
"grad_norm": 1.9283539056777954, |
|
"learning_rate": 4.598260592037023e-05, |
|
"loss": 0.227, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.522404136148212, |
|
"grad_norm": 0.21102119982242584, |
|
"learning_rate": 4.5882869225245355e-05, |
|
"loss": 0.2177, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.5277897457992244, |
|
"grad_norm": 16.233251571655273, |
|
"learning_rate": 4.578313253012048e-05, |
|
"loss": 0.1349, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.533175355450237, |
|
"grad_norm": 12.861350059509277, |
|
"learning_rate": 4.568339583499561e-05, |
|
"loss": 0.2012, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.5385609651012495, |
|
"grad_norm": 29.975788116455078, |
|
"learning_rate": 4.5583659139870744e-05, |
|
"loss": 0.1332, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5439465747522619, |
|
"grad_norm": 0.08081357926130295, |
|
"learning_rate": 4.548392244474588e-05, |
|
"loss": 0.1514, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.5493321844032745, |
|
"grad_norm": 72.80885314941406, |
|
"learning_rate": 4.5384185749621005e-05, |
|
"loss": 0.2106, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.5547177940542869, |
|
"grad_norm": 0.0727340430021286, |
|
"learning_rate": 4.528444905449613e-05, |
|
"loss": 0.1395, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.5601034037052994, |
|
"grad_norm": 0.10599514842033386, |
|
"learning_rate": 4.518471235937126e-05, |
|
"loss": 0.1318, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.565489013356312, |
|
"grad_norm": 0.09759501367807388, |
|
"learning_rate": 4.508497566424639e-05, |
|
"loss": 0.0796, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.5708746230073244, |
|
"grad_norm": 0.15663489699363708, |
|
"learning_rate": 4.4985238969121527e-05, |
|
"loss": 0.1019, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.5762602326583369, |
|
"grad_norm": 0.13662685453891754, |
|
"learning_rate": 4.488550227399665e-05, |
|
"loss": 0.1396, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.5816458423093495, |
|
"grad_norm": 0.10818319767713547, |
|
"learning_rate": 4.478576557887178e-05, |
|
"loss": 0.1455, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5870314519603619, |
|
"grad_norm": 0.11137444525957108, |
|
"learning_rate": 4.468602888374691e-05, |
|
"loss": 0.1616, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.5924170616113744, |
|
"grad_norm": 0.1452583223581314, |
|
"learning_rate": 4.458629218862204e-05, |
|
"loss": 0.1445, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.597802671262387, |
|
"grad_norm": 10.563451766967773, |
|
"learning_rate": 4.448655549349717e-05, |
|
"loss": 0.1514, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.6031882809133994, |
|
"grad_norm": 14.584248542785645, |
|
"learning_rate": 4.4386818798372296e-05, |
|
"loss": 0.1717, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6085738905644119, |
|
"grad_norm": 12.885149955749512, |
|
"learning_rate": 4.428708210324743e-05, |
|
"loss": 0.2395, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.6139595002154243, |
|
"grad_norm": 2.0140767097473145, |
|
"learning_rate": 4.418734540812256e-05, |
|
"loss": 0.2966, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6193451098664369, |
|
"grad_norm": 0.11748749762773514, |
|
"learning_rate": 4.4087608712997684e-05, |
|
"loss": 0.083, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.6247307195174494, |
|
"grad_norm": 0.058700062334537506, |
|
"learning_rate": 4.398787201787282e-05, |
|
"loss": 0.1189, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6301163291684618, |
|
"grad_norm": 1.4539530277252197, |
|
"learning_rate": 4.3888135322747945e-05, |
|
"loss": 0.1992, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.6355019388194744, |
|
"grad_norm": 0.36291152238845825, |
|
"learning_rate": 4.378839862762308e-05, |
|
"loss": 0.0857, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.6408875484704869, |
|
"grad_norm": 1.1221033334732056, |
|
"learning_rate": 4.3688661932498206e-05, |
|
"loss": 0.2207, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.6462731581214993, |
|
"grad_norm": 1.3202532529830933, |
|
"learning_rate": 4.3588925237373333e-05, |
|
"loss": 0.1268, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6516587677725119, |
|
"grad_norm": 0.23690970242023468, |
|
"learning_rate": 4.348918854224847e-05, |
|
"loss": 0.2074, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.6570443774235244, |
|
"grad_norm": 0.39078834652900696, |
|
"learning_rate": 4.3389451847123595e-05, |
|
"loss": 0.0893, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.6624299870745368, |
|
"grad_norm": 2.198420286178589, |
|
"learning_rate": 4.328971515199873e-05, |
|
"loss": 0.1987, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.6678155967255494, |
|
"grad_norm": 0.14714717864990234, |
|
"learning_rate": 4.3189978456873856e-05, |
|
"loss": 0.1911, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.6732012063765618, |
|
"grad_norm": 0.07894087582826614, |
|
"learning_rate": 4.309024176174898e-05, |
|
"loss": 0.1334, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.6785868160275743, |
|
"grad_norm": 0.11621283739805222, |
|
"learning_rate": 4.2990505066624117e-05, |
|
"loss": 0.0956, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.6839724256785868, |
|
"grad_norm": 0.15330371260643005, |
|
"learning_rate": 4.2890768371499244e-05, |
|
"loss": 0.1856, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.6893580353295993, |
|
"grad_norm": 100.52692413330078, |
|
"learning_rate": 4.279103167637437e-05, |
|
"loss": 0.1754, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.6947436449806118, |
|
"grad_norm": 1.5307726860046387, |
|
"learning_rate": 4.2691294981249505e-05, |
|
"loss": 0.1644, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.7001292546316243, |
|
"grad_norm": 0.1815415918827057, |
|
"learning_rate": 4.259155828612463e-05, |
|
"loss": 0.1715, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7055148642826368, |
|
"grad_norm": 0.06501147896051407, |
|
"learning_rate": 4.2491821590999766e-05, |
|
"loss": 0.0786, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.7109004739336493, |
|
"grad_norm": 13.024656295776367, |
|
"learning_rate": 4.239208489587489e-05, |
|
"loss": 0.1299, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7162860835846618, |
|
"grad_norm": 0.061265505850315094, |
|
"learning_rate": 4.229234820075002e-05, |
|
"loss": 0.1553, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.7216716932356743, |
|
"grad_norm": 0.15151949226856232, |
|
"learning_rate": 4.2192611505625154e-05, |
|
"loss": 0.1563, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.7270573028866868, |
|
"grad_norm": 0.37678295373916626, |
|
"learning_rate": 4.209287481050028e-05, |
|
"loss": 0.1824, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.7324429125376992, |
|
"grad_norm": 0.22883647680282593, |
|
"learning_rate": 4.1993138115375415e-05, |
|
"loss": 0.2303, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.7378285221887118, |
|
"grad_norm": 0.123573899269104, |
|
"learning_rate": 4.189340142025054e-05, |
|
"loss": 0.1554, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.7432141318397243, |
|
"grad_norm": 7.3934431076049805, |
|
"learning_rate": 4.179366472512567e-05, |
|
"loss": 0.1949, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.7485997414907367, |
|
"grad_norm": 0.07116091251373291, |
|
"learning_rate": 4.16939280300008e-05, |
|
"loss": 0.1022, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.7539853511417492, |
|
"grad_norm": 0.6062744855880737, |
|
"learning_rate": 4.159419133487593e-05, |
|
"loss": 0.1311, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.7593709607927618, |
|
"grad_norm": 0.19308426976203918, |
|
"learning_rate": 4.149445463975106e-05, |
|
"loss": 0.1942, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.7647565704437742, |
|
"grad_norm": 0.08045301586389542, |
|
"learning_rate": 4.139471794462619e-05, |
|
"loss": 0.1055, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.7701421800947867, |
|
"grad_norm": 47.74348449707031, |
|
"learning_rate": 4.129498124950132e-05, |
|
"loss": 0.0844, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.7755277897457993, |
|
"grad_norm": 14.923918724060059, |
|
"learning_rate": 4.119524455437645e-05, |
|
"loss": 0.0943, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.7809133993968117, |
|
"grad_norm": 0.3901768624782562, |
|
"learning_rate": 4.109550785925157e-05, |
|
"loss": 0.2203, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.7862990090478242, |
|
"grad_norm": 0.05978202819824219, |
|
"learning_rate": 4.0995771164126707e-05, |
|
"loss": 0.0804, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.7916846186988367, |
|
"grad_norm": 8.785200119018555, |
|
"learning_rate": 4.089603446900184e-05, |
|
"loss": 0.1765, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.7970702283498492, |
|
"grad_norm": 1.523232102394104, |
|
"learning_rate": 4.079629777387697e-05, |
|
"loss": 0.1302, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8024558380008617, |
|
"grad_norm": 0.05809938907623291, |
|
"learning_rate": 4.06965610787521e-05, |
|
"loss": 0.0481, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.8078414476518742, |
|
"grad_norm": 0.05040917173027992, |
|
"learning_rate": 4.059682438362722e-05, |
|
"loss": 0.0768, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.8132270573028867, |
|
"grad_norm": 0.07563222199678421, |
|
"learning_rate": 4.0497087688502356e-05, |
|
"loss": 0.1397, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.8186126669538992, |
|
"grad_norm": 0.4528738260269165, |
|
"learning_rate": 4.039735099337749e-05, |
|
"loss": 0.3308, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.8239982766049117, |
|
"grad_norm": 1.0809814929962158, |
|
"learning_rate": 4.029761429825262e-05, |
|
"loss": 0.1919, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.8293838862559242, |
|
"grad_norm": 0.2343398779630661, |
|
"learning_rate": 4.0197877603127744e-05, |
|
"loss": 0.1351, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.8347694959069367, |
|
"grad_norm": 1.747331976890564, |
|
"learning_rate": 4.009814090800287e-05, |
|
"loss": 0.1744, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.8401551055579491, |
|
"grad_norm": 0.06216764450073242, |
|
"learning_rate": 3.9998404212878005e-05, |
|
"loss": 0.0587, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.8455407152089617, |
|
"grad_norm": 0.33079928159713745, |
|
"learning_rate": 3.989866751775314e-05, |
|
"loss": 0.1353, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.8509263248599741, |
|
"grad_norm": 1.1662975549697876, |
|
"learning_rate": 3.979893082262826e-05, |
|
"loss": 0.2162, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.8563119345109866, |
|
"grad_norm": 0.2112116664648056, |
|
"learning_rate": 3.969919412750339e-05, |
|
"loss": 0.1332, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.8616975441619992, |
|
"grad_norm": 0.2449023723602295, |
|
"learning_rate": 3.959945743237852e-05, |
|
"loss": 0.1461, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.8670831538130116, |
|
"grad_norm": 0.17366133630275726, |
|
"learning_rate": 3.9499720737253654e-05, |
|
"loss": 0.0463, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.8724687634640241, |
|
"grad_norm": 5.110925674438477, |
|
"learning_rate": 3.939998404212878e-05, |
|
"loss": 0.2415, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.8778543731150367, |
|
"grad_norm": 0.30593565106391907, |
|
"learning_rate": 3.930024734700391e-05, |
|
"loss": 0.1456, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.8832399827660491, |
|
"grad_norm": 0.19217516481876373, |
|
"learning_rate": 3.920051065187904e-05, |
|
"loss": 0.161, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.8886255924170616, |
|
"grad_norm": 1.0149356126785278, |
|
"learning_rate": 3.910077395675417e-05, |
|
"loss": 0.1787, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.894011202068074, |
|
"grad_norm": 5.444819450378418, |
|
"learning_rate": 3.90010372616293e-05, |
|
"loss": 0.1539, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.8993968117190866, |
|
"grad_norm": 1.2180671691894531, |
|
"learning_rate": 3.890130056650443e-05, |
|
"loss": 0.115, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.9047824213700991, |
|
"grad_norm": 0.16885504126548767, |
|
"learning_rate": 3.880156387137956e-05, |
|
"loss": 0.2378, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.9101680310211115, |
|
"grad_norm": 0.19795845448970795, |
|
"learning_rate": 3.870182717625469e-05, |
|
"loss": 0.1481, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.9155536406721241, |
|
"grad_norm": 0.2421010136604309, |
|
"learning_rate": 3.860209048112982e-05, |
|
"loss": 0.1496, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.9209392503231366, |
|
"grad_norm": 6.898841857910156, |
|
"learning_rate": 3.8502353786004946e-05, |
|
"loss": 0.1823, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.926324859974149, |
|
"grad_norm": 0.2529371678829193, |
|
"learning_rate": 3.840261709088008e-05, |
|
"loss": 0.2487, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.9317104696251616, |
|
"grad_norm": 0.39406079053878784, |
|
"learning_rate": 3.830288039575521e-05, |
|
"loss": 0.2116, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.9370960792761741, |
|
"grad_norm": 0.07333202660083771, |
|
"learning_rate": 3.820314370063034e-05, |
|
"loss": 0.0279, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.9424816889271865, |
|
"grad_norm": 1.0374135971069336, |
|
"learning_rate": 3.810340700550547e-05, |
|
"loss": 0.1611, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"grad_norm": 0.12242020666599274, |
|
"learning_rate": 3.8003670310380595e-05, |
|
"loss": 0.1268, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.9532529082292115, |
|
"grad_norm": 0.7784824371337891, |
|
"learning_rate": 3.790393361525573e-05, |
|
"loss": 0.2008, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.958638517880224, |
|
"grad_norm": 0.3692557215690613, |
|
"learning_rate": 3.7804196920130856e-05, |
|
"loss": 0.1814, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.9640241275312366, |
|
"grad_norm": 0.30838918685913086, |
|
"learning_rate": 3.770446022500599e-05, |
|
"loss": 0.0937, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.969409737182249, |
|
"grad_norm": 8.506460189819336, |
|
"learning_rate": 3.760472352988112e-05, |
|
"loss": 0.2372, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.9747953468332615, |
|
"grad_norm": 0.11787492781877518, |
|
"learning_rate": 3.7504986834756244e-05, |
|
"loss": 0.0906, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.9801809564842741, |
|
"grad_norm": 0.18946515023708344, |
|
"learning_rate": 3.740525013963138e-05, |
|
"loss": 0.2561, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.9855665661352865, |
|
"grad_norm": 0.10240988433361053, |
|
"learning_rate": 3.7305513444506505e-05, |
|
"loss": 0.087, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.990952175786299, |
|
"grad_norm": 2.1701135635375977, |
|
"learning_rate": 3.720577674938163e-05, |
|
"loss": 0.1067, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.9963377854373116, |
|
"grad_norm": 0.10697437822818756, |
|
"learning_rate": 3.7106040054256766e-05, |
|
"loss": 0.1298, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9611117095766455, |
|
"eval_auc": 0.9901169654651506, |
|
"eval_f1": 0.961078167115903, |
|
"eval_loss": 0.13669981062412262, |
|
"eval_precision": 0.9587007958700796, |
|
"eval_recall": 0.9634673584089927, |
|
"eval_runtime": 1681.5869, |
|
"eval_samples_per_second": 5.52, |
|
"eval_steps_per_second": 0.346, |
|
"step": 4642 |
|
}, |
|
{ |
|
"epoch": 1.001723395088324, |
|
"grad_norm": 2.8891570568084717, |
|
"learning_rate": 3.700630335913189e-05, |
|
"loss": 0.2125, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.0071090047393365, |
|
"grad_norm": 10.658247947692871, |
|
"learning_rate": 3.690656666400703e-05, |
|
"loss": 0.1635, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 1.012494614390349, |
|
"grad_norm": 0.06771805137395859, |
|
"learning_rate": 3.680682996888215e-05, |
|
"loss": 0.0609, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.0178802240413616, |
|
"grad_norm": 0.10092346370220184, |
|
"learning_rate": 3.670709327375728e-05, |
|
"loss": 0.1142, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 1.023265833692374, |
|
"grad_norm": 0.09358137845993042, |
|
"learning_rate": 3.6607356578632415e-05, |
|
"loss": 0.134, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.0286514433433864, |
|
"grad_norm": 0.10109369456768036, |
|
"learning_rate": 3.650761988350754e-05, |
|
"loss": 0.0835, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 1.034037052994399, |
|
"grad_norm": 0.06669428944587708, |
|
"learning_rate": 3.640788318838267e-05, |
|
"loss": 0.0729, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.0394226626454115, |
|
"grad_norm": 0.26679906249046326, |
|
"learning_rate": 3.63081464932578e-05, |
|
"loss": 0.1923, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 1.044808272296424, |
|
"grad_norm": 0.1384529024362564, |
|
"learning_rate": 3.620840979813293e-05, |
|
"loss": 0.0301, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.0501938819474363, |
|
"grad_norm": 0.04985777288675308, |
|
"learning_rate": 3.6108673103008065e-05, |
|
"loss": 0.0582, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 1.0555794915984489, |
|
"grad_norm": 0.17264176905155182, |
|
"learning_rate": 3.600893640788319e-05, |
|
"loss": 0.1751, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.0609651012494614, |
|
"grad_norm": 0.41739097237586975, |
|
"learning_rate": 3.590919971275832e-05, |
|
"loss": 0.0556, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 1.066350710900474, |
|
"grad_norm": 0.049522146582603455, |
|
"learning_rate": 3.5809463017633446e-05, |
|
"loss": 0.0233, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.0717363205514865, |
|
"grad_norm": 18.618080139160156, |
|
"learning_rate": 3.570972632250858e-05, |
|
"loss": 0.0875, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 1.077121930202499, |
|
"grad_norm": 0.055007074028253555, |
|
"learning_rate": 3.5609989627383714e-05, |
|
"loss": 0.0397, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.0825075398535113, |
|
"grad_norm": 0.05218919366598129, |
|
"learning_rate": 3.5510252932258834e-05, |
|
"loss": 0.1636, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 1.0878931495045239, |
|
"grad_norm": 0.13161726295948029, |
|
"learning_rate": 3.541051623713397e-05, |
|
"loss": 0.1354, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.0932787591555364, |
|
"grad_norm": 0.2851119041442871, |
|
"learning_rate": 3.5310779542009095e-05, |
|
"loss": 0.0301, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 1.098664368806549, |
|
"grad_norm": 0.03602718561887741, |
|
"learning_rate": 3.521104284688423e-05, |
|
"loss": 0.0037, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.1040499784575615, |
|
"grad_norm": 20.054950714111328, |
|
"learning_rate": 3.5111306151759356e-05, |
|
"loss": 0.1972, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 1.1094355881085738, |
|
"grad_norm": 100.69612121582031, |
|
"learning_rate": 3.501156945663448e-05, |
|
"loss": 0.0889, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.1148211977595863, |
|
"grad_norm": 11.003225326538086, |
|
"learning_rate": 3.491183276150962e-05, |
|
"loss": 0.1155, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 1.1202068074105989, |
|
"grad_norm": 8.291934967041016, |
|
"learning_rate": 3.4812096066384744e-05, |
|
"loss": 0.2039, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.1255924170616114, |
|
"grad_norm": 0.06259515136480331, |
|
"learning_rate": 3.471235937125988e-05, |
|
"loss": 0.1264, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 1.130978026712624, |
|
"grad_norm": 1.9804569482803345, |
|
"learning_rate": 3.4612622676135005e-05, |
|
"loss": 0.1603, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 13.11237621307373, |
|
"learning_rate": 3.451288598101013e-05, |
|
"loss": 0.0895, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 1.1417492460146488, |
|
"grad_norm": 0.14050540328025818, |
|
"learning_rate": 3.4413149285885266e-05, |
|
"loss": 0.1879, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.1471348556656613, |
|
"grad_norm": 0.08870512247085571, |
|
"learning_rate": 3.4313412590760393e-05, |
|
"loss": 0.0668, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 1.1525204653166738, |
|
"grad_norm": 1.545466661453247, |
|
"learning_rate": 3.421367589563552e-05, |
|
"loss": 0.0765, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.1579060749676864, |
|
"grad_norm": 0.2768205404281616, |
|
"learning_rate": 3.4113939200510654e-05, |
|
"loss": 0.1389, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 1.163291684618699, |
|
"grad_norm": 15.847858428955078, |
|
"learning_rate": 3.401420250538578e-05, |
|
"loss": 0.1629, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.1686772942697115, |
|
"grad_norm": 0.10181305557489395, |
|
"learning_rate": 3.3914465810260916e-05, |
|
"loss": 0.2408, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 1.1740629039207238, |
|
"grad_norm": 1.0575032234191895, |
|
"learning_rate": 3.381472911513604e-05, |
|
"loss": 0.1404, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.1794485135717363, |
|
"grad_norm": 0.14517611265182495, |
|
"learning_rate": 3.371499242001117e-05, |
|
"loss": 0.1526, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 1.1848341232227488, |
|
"grad_norm": 3.418147087097168, |
|
"learning_rate": 3.3615255724886304e-05, |
|
"loss": 0.204, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.1902197328737614, |
|
"grad_norm": 0.12968988716602325, |
|
"learning_rate": 3.351551902976143e-05, |
|
"loss": 0.1302, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 1.195605342524774, |
|
"grad_norm": 0.49698755145072937, |
|
"learning_rate": 3.341578233463656e-05, |
|
"loss": 0.1656, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.2009909521757862, |
|
"grad_norm": 0.4227307140827179, |
|
"learning_rate": 3.331604563951169e-05, |
|
"loss": 0.1857, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 1.2063765618267988, |
|
"grad_norm": 1.2486361265182495, |
|
"learning_rate": 3.321630894438682e-05, |
|
"loss": 0.0902, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.2117621714778113, |
|
"grad_norm": 2.5114333629608154, |
|
"learning_rate": 3.311657224926195e-05, |
|
"loss": 0.1306, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 1.2171477811288238, |
|
"grad_norm": 0.07333961874246597, |
|
"learning_rate": 3.301683555413708e-05, |
|
"loss": 0.0746, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.2225333907798364, |
|
"grad_norm": 0.26353907585144043, |
|
"learning_rate": 3.291709885901221e-05, |
|
"loss": 0.0902, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 1.2279190004308487, |
|
"grad_norm": 4.065846920013428, |
|
"learning_rate": 3.281736216388734e-05, |
|
"loss": 0.0805, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.2333046100818612, |
|
"grad_norm": 0.06470991671085358, |
|
"learning_rate": 3.271762546876247e-05, |
|
"loss": 0.1123, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 1.2386902197328737, |
|
"grad_norm": 0.8340930938720703, |
|
"learning_rate": 3.26178887736376e-05, |
|
"loss": 0.1006, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.2440758293838863, |
|
"grad_norm": 1.6223039627075195, |
|
"learning_rate": 3.251815207851273e-05, |
|
"loss": 0.1494, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 1.2494614390348988, |
|
"grad_norm": 5.608697891235352, |
|
"learning_rate": 3.2418415383387856e-05, |
|
"loss": 0.1074, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.2548470486859111, |
|
"grad_norm": 0.0557849146425724, |
|
"learning_rate": 3.231867868826299e-05, |
|
"loss": 0.084, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 1.2602326583369237, |
|
"grad_norm": 27.92970085144043, |
|
"learning_rate": 3.221894199313812e-05, |
|
"loss": 0.0981, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.2656182679879362, |
|
"grad_norm": 0.06425247341394424, |
|
"learning_rate": 3.2119205298013244e-05, |
|
"loss": 0.0708, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 1.2710038776389487, |
|
"grad_norm": 0.17037852108478546, |
|
"learning_rate": 3.201946860288838e-05, |
|
"loss": 0.0508, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.2763894872899613, |
|
"grad_norm": 71.36393737792969, |
|
"learning_rate": 3.1919731907763505e-05, |
|
"loss": 0.0935, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 1.2817750969409736, |
|
"grad_norm": 0.062485672533512115, |
|
"learning_rate": 3.181999521263864e-05, |
|
"loss": 0.0126, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.2871607065919863, |
|
"grad_norm": 2.009347438812256, |
|
"learning_rate": 3.1720258517513767e-05, |
|
"loss": 0.1319, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 1.2925463162429986, |
|
"grad_norm": 0.06675919890403748, |
|
"learning_rate": 3.1620521822388894e-05, |
|
"loss": 0.0746, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.2979319258940112, |
|
"grad_norm": 8.066916465759277, |
|
"learning_rate": 3.152078512726403e-05, |
|
"loss": 0.1249, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 1.3033175355450237, |
|
"grad_norm": 0.03880688175559044, |
|
"learning_rate": 3.1421048432139155e-05, |
|
"loss": 0.1348, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.3087031451960363, |
|
"grad_norm": 0.30525916814804077, |
|
"learning_rate": 3.132131173701429e-05, |
|
"loss": 0.0379, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 1.3140887548470488, |
|
"grad_norm": 19.745574951171875, |
|
"learning_rate": 3.122157504188941e-05, |
|
"loss": 0.2353, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.319474364498061, |
|
"grad_norm": 0.17969800531864166, |
|
"learning_rate": 3.112183834676454e-05, |
|
"loss": 0.0887, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 1.3248599741490736, |
|
"grad_norm": 0.11662442237138748, |
|
"learning_rate": 3.102210165163968e-05, |
|
"loss": 0.0777, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.3302455838000862, |
|
"grad_norm": 29.151994705200195, |
|
"learning_rate": 3.0922364956514804e-05, |
|
"loss": 0.1288, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 1.3356311934510987, |
|
"grad_norm": 0.1103227511048317, |
|
"learning_rate": 3.082262826138993e-05, |
|
"loss": 0.1441, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.3410168031021112, |
|
"grad_norm": 0.055622648447752, |
|
"learning_rate": 3.072289156626506e-05, |
|
"loss": 0.0355, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 1.3464024127531236, |
|
"grad_norm": 0.05069245770573616, |
|
"learning_rate": 3.062315487114019e-05, |
|
"loss": 0.1014, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.351788022404136, |
|
"grad_norm": 2.9234323501586914, |
|
"learning_rate": 3.0523418176015326e-05, |
|
"loss": 0.1902, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 1.3571736320551486, |
|
"grad_norm": 0.10473991930484772, |
|
"learning_rate": 3.042368148089045e-05, |
|
"loss": 0.1063, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.3625592417061612, |
|
"grad_norm": 0.2278774380683899, |
|
"learning_rate": 3.032394478576558e-05, |
|
"loss": 0.1619, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 1.3679448513571737, |
|
"grad_norm": 0.31000664830207825, |
|
"learning_rate": 3.022420809064071e-05, |
|
"loss": 0.1219, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.373330461008186, |
|
"grad_norm": 0.3597787022590637, |
|
"learning_rate": 3.012447139551584e-05, |
|
"loss": 0.1056, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 1.3787160706591985, |
|
"grad_norm": 0.11010152846574783, |
|
"learning_rate": 3.0024734700390972e-05, |
|
"loss": 0.1596, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.384101680310211, |
|
"grad_norm": 1.2052332162857056, |
|
"learning_rate": 2.99249980052661e-05, |
|
"loss": 0.1476, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 1.3894872899612236, |
|
"grad_norm": 3.561495542526245, |
|
"learning_rate": 2.982526131014123e-05, |
|
"loss": 0.0991, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.3948728996122362, |
|
"grad_norm": 0.16460971534252167, |
|
"learning_rate": 2.972552461501636e-05, |
|
"loss": 0.0774, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 1.4002585092632485, |
|
"grad_norm": 0.08335640281438828, |
|
"learning_rate": 2.962578791989149e-05, |
|
"loss": 0.1055, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.4056441189142612, |
|
"grad_norm": 24.70379638671875, |
|
"learning_rate": 2.9526051224766614e-05, |
|
"loss": 0.0675, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 1.4110297285652735, |
|
"grad_norm": 0.06125853583216667, |
|
"learning_rate": 2.9426314529641748e-05, |
|
"loss": 0.0687, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.416415338216286, |
|
"grad_norm": 0.05415304750204086, |
|
"learning_rate": 2.932657783451688e-05, |
|
"loss": 0.0439, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 1.4218009478672986, |
|
"grad_norm": 0.06519858539104462, |
|
"learning_rate": 2.922684113939201e-05, |
|
"loss": 0.052, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.4271865575183111, |
|
"grad_norm": 0.05177925154566765, |
|
"learning_rate": 2.9127104444267133e-05, |
|
"loss": 0.068, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 1.4325721671693237, |
|
"grad_norm": 0.0696578249335289, |
|
"learning_rate": 2.9027367749142263e-05, |
|
"loss": 0.1637, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.437957776820336, |
|
"grad_norm": 0.07040733844041824, |
|
"learning_rate": 2.8927631054017397e-05, |
|
"loss": 0.0799, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 1.4433433864713485, |
|
"grad_norm": 0.09942576289176941, |
|
"learning_rate": 2.8827894358892528e-05, |
|
"loss": 0.1363, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.448728996122361, |
|
"grad_norm": 0.07192389667034149, |
|
"learning_rate": 2.8728157663767658e-05, |
|
"loss": 0.1003, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 1.4541146057733736, |
|
"grad_norm": 0.0946580320596695, |
|
"learning_rate": 2.8628420968642782e-05, |
|
"loss": 0.1488, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.4595002154243861, |
|
"grad_norm": 0.07671581953763962, |
|
"learning_rate": 2.8528684273517913e-05, |
|
"loss": 0.0701, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 1.4648858250753984, |
|
"grad_norm": 0.06100593879818916, |
|
"learning_rate": 2.8428947578393046e-05, |
|
"loss": 0.0232, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.470271434726411, |
|
"grad_norm": 0.04395158588886261, |
|
"learning_rate": 2.8329210883268177e-05, |
|
"loss": 0.0763, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 1.4756570443774235, |
|
"grad_norm": 0.03843090683221817, |
|
"learning_rate": 2.82294741881433e-05, |
|
"loss": 0.0832, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.481042654028436, |
|
"grad_norm": 32.23625564575195, |
|
"learning_rate": 2.812973749301843e-05, |
|
"loss": 0.117, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 1.4864282636794486, |
|
"grad_norm": 0.042882196605205536, |
|
"learning_rate": 2.8030000797893562e-05, |
|
"loss": 0.1001, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.491813873330461, |
|
"grad_norm": 0.1393657624721527, |
|
"learning_rate": 2.7930264102768696e-05, |
|
"loss": 0.1046, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 1.4971994829814734, |
|
"grad_norm": 30.964231491088867, |
|
"learning_rate": 2.783052740764382e-05, |
|
"loss": 0.1901, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.502585092632486, |
|
"grad_norm": 1.4588963985443115, |
|
"learning_rate": 2.773079071251895e-05, |
|
"loss": 0.0741, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 1.5079707022834985, |
|
"grad_norm": 0.06478077918291092, |
|
"learning_rate": 2.763105401739408e-05, |
|
"loss": 0.08, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.513356311934511, |
|
"grad_norm": 0.05906078219413757, |
|
"learning_rate": 2.753131732226921e-05, |
|
"loss": 0.0685, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 1.5187419215855233, |
|
"grad_norm": 4.263314247131348, |
|
"learning_rate": 2.7431580627144338e-05, |
|
"loss": 0.0689, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.524127531236536, |
|
"grad_norm": 0.07187110185623169, |
|
"learning_rate": 2.733184393201947e-05, |
|
"loss": 0.1388, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 1.5295131408875484, |
|
"grad_norm": 0.06428200751543045, |
|
"learning_rate": 2.72321072368946e-05, |
|
"loss": 0.0718, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.534898750538561, |
|
"grad_norm": 0.05657276138663292, |
|
"learning_rate": 2.713237054176973e-05, |
|
"loss": 0.0777, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 1.5402843601895735, |
|
"grad_norm": 29.23991584777832, |
|
"learning_rate": 2.703263384664486e-05, |
|
"loss": 0.1151, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.5456699698405858, |
|
"grad_norm": 13.988266944885254, |
|
"learning_rate": 2.6932897151519987e-05, |
|
"loss": 0.1451, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 1.5510555794915986, |
|
"grad_norm": 1.5213409662246704, |
|
"learning_rate": 2.6833160456395118e-05, |
|
"loss": 0.1063, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.5564411891426109, |
|
"grad_norm": 0.10175667703151703, |
|
"learning_rate": 2.6733423761270248e-05, |
|
"loss": 0.0863, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 1.5618267987936234, |
|
"grad_norm": 0.12581761181354523, |
|
"learning_rate": 2.663368706614538e-05, |
|
"loss": 0.2038, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.567212408444636, |
|
"grad_norm": 0.11780127137899399, |
|
"learning_rate": 2.6533950371020506e-05, |
|
"loss": 0.1042, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 1.5725980180956485, |
|
"grad_norm": 0.2194374054670334, |
|
"learning_rate": 2.6434213675895636e-05, |
|
"loss": 0.1805, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.577983627746661, |
|
"grad_norm": 0.18749874830245972, |
|
"learning_rate": 2.6334476980770767e-05, |
|
"loss": 0.1292, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 1.5833692373976733, |
|
"grad_norm": 0.12621381878852844, |
|
"learning_rate": 2.6234740285645897e-05, |
|
"loss": 0.0807, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.5887548470486859, |
|
"grad_norm": 0.09865190833806992, |
|
"learning_rate": 2.6135003590521025e-05, |
|
"loss": 0.0985, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 1.5941404566996984, |
|
"grad_norm": 0.11946694552898407, |
|
"learning_rate": 2.6035266895396155e-05, |
|
"loss": 0.0895, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.599526066350711, |
|
"grad_norm": 1.2805886268615723, |
|
"learning_rate": 2.5935530200271286e-05, |
|
"loss": 0.1058, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 1.6049116760017235, |
|
"grad_norm": 0.14271412789821625, |
|
"learning_rate": 2.5835793505146416e-05, |
|
"loss": 0.1345, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.6102972856527358, |
|
"grad_norm": 32.52022933959961, |
|
"learning_rate": 2.5736056810021547e-05, |
|
"loss": 0.1266, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 1.6156828953037485, |
|
"grad_norm": 0.12038301676511765, |
|
"learning_rate": 2.5636320114896674e-05, |
|
"loss": 0.1492, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.6210685049547608, |
|
"grad_norm": 1.8868168592453003, |
|
"learning_rate": 2.5536583419771804e-05, |
|
"loss": 0.0776, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 1.6264541146057734, |
|
"grad_norm": 1.943164348602295, |
|
"learning_rate": 2.5436846724646935e-05, |
|
"loss": 0.0348, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.631839724256786, |
|
"grad_norm": 0.07310531288385391, |
|
"learning_rate": 2.5337110029522065e-05, |
|
"loss": 0.0375, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 1.6372253339077982, |
|
"grad_norm": 0.05736231431365013, |
|
"learning_rate": 2.5237373334397192e-05, |
|
"loss": 0.0588, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.642610943558811, |
|
"grad_norm": 0.05780019611120224, |
|
"learning_rate": 2.5137636639272323e-05, |
|
"loss": 0.0524, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 1.6479965532098233, |
|
"grad_norm": 0.5779060125350952, |
|
"learning_rate": 2.5037899944147453e-05, |
|
"loss": 0.1221, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.6533821628608358, |
|
"grad_norm": 0.05291671305894852, |
|
"learning_rate": 2.493816324902258e-05, |
|
"loss": 0.0978, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 1.6587677725118484, |
|
"grad_norm": 0.04571978747844696, |
|
"learning_rate": 2.483842655389771e-05, |
|
"loss": 0.0798, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.6641533821628607, |
|
"grad_norm": 0.0946052223443985, |
|
"learning_rate": 2.473868985877284e-05, |
|
"loss": 0.0712, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 1.6695389918138734, |
|
"grad_norm": 0.04669623076915741, |
|
"learning_rate": 2.4638953163647972e-05, |
|
"loss": 0.1253, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.6749246014648858, |
|
"grad_norm": 0.03837985917925835, |
|
"learning_rate": 2.45392164685231e-05, |
|
"loss": 0.0281, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 1.6803102111158983, |
|
"grad_norm": 0.1306626796722412, |
|
"learning_rate": 2.443947977339823e-05, |
|
"loss": 0.0298, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.6856958207669108, |
|
"grad_norm": 0.04202219098806381, |
|
"learning_rate": 2.4339743078273357e-05, |
|
"loss": 0.1771, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 1.6910814304179234, |
|
"grad_norm": 9.636494636535645, |
|
"learning_rate": 2.424000638314849e-05, |
|
"loss": 0.1739, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.696467040068936, |
|
"grad_norm": 0.05588022246956825, |
|
"learning_rate": 2.414026968802362e-05, |
|
"loss": 0.1273, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 1.7018526497199482, |
|
"grad_norm": 0.30812951922416687, |
|
"learning_rate": 2.404053299289875e-05, |
|
"loss": 0.0813, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.7072382593709607, |
|
"grad_norm": 1.565081000328064, |
|
"learning_rate": 2.394079629777388e-05, |
|
"loss": 0.0759, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 1.7126238690219733, |
|
"grad_norm": 0.06271021068096161, |
|
"learning_rate": 2.3841059602649006e-05, |
|
"loss": 0.1473, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.7180094786729858, |
|
"grad_norm": 0.06820254772901535, |
|
"learning_rate": 2.374132290752414e-05, |
|
"loss": 0.0978, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 1.7233950883239983, |
|
"grad_norm": 0.0788588598370552, |
|
"learning_rate": 2.3641586212399267e-05, |
|
"loss": 0.1251, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.7287806979750107, |
|
"grad_norm": 0.08439130336046219, |
|
"learning_rate": 2.3541849517274398e-05, |
|
"loss": 0.0489, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 1.7341663076260234, |
|
"grad_norm": 0.08412740379571915, |
|
"learning_rate": 2.3442112822149525e-05, |
|
"loss": 0.0972, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.7395519172770357, |
|
"grad_norm": 0.05004223436117172, |
|
"learning_rate": 2.3342376127024655e-05, |
|
"loss": 0.0867, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 1.7449375269280483, |
|
"grad_norm": 0.08401107043027878, |
|
"learning_rate": 2.3242639431899786e-05, |
|
"loss": 0.1558, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.7503231365790608, |
|
"grad_norm": 0.1881813257932663, |
|
"learning_rate": 2.3142902736774916e-05, |
|
"loss": 0.0759, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 1.7557087462300731, |
|
"grad_norm": 1.4269578456878662, |
|
"learning_rate": 2.3043166041650043e-05, |
|
"loss": 0.0702, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.7610943558810859, |
|
"grad_norm": 43.845985412597656, |
|
"learning_rate": 2.2943429346525174e-05, |
|
"loss": 0.0901, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 1.7664799655320982, |
|
"grad_norm": 0.10664209723472595, |
|
"learning_rate": 2.2843692651400304e-05, |
|
"loss": 0.0651, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.7718655751831107, |
|
"grad_norm": 3.7163357734680176, |
|
"learning_rate": 2.2743955956275435e-05, |
|
"loss": 0.0471, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 1.7772511848341233, |
|
"grad_norm": 0.05055355280637741, |
|
"learning_rate": 2.2644219261150565e-05, |
|
"loss": 0.1338, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.7826367944851356, |
|
"grad_norm": 0.23772941529750824, |
|
"learning_rate": 2.2544482566025693e-05, |
|
"loss": 0.2019, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 1.7880224041361483, |
|
"grad_norm": 0.03852352499961853, |
|
"learning_rate": 2.2444745870900823e-05, |
|
"loss": 0.0408, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.7934080137871606, |
|
"grad_norm": 2.454216480255127, |
|
"learning_rate": 2.2345009175775954e-05, |
|
"loss": 0.1086, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 1.7987936234381732, |
|
"grad_norm": 0.2967223525047302, |
|
"learning_rate": 2.2245272480651084e-05, |
|
"loss": 0.0961, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.8041792330891857, |
|
"grad_norm": 0.5866406559944153, |
|
"learning_rate": 2.214553578552621e-05, |
|
"loss": 0.1074, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 1.8095648427401982, |
|
"grad_norm": 0.07801081985235214, |
|
"learning_rate": 2.2045799090401342e-05, |
|
"loss": 0.0628, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.8149504523912108, |
|
"grad_norm": 18.57970428466797, |
|
"learning_rate": 2.194606239527647e-05, |
|
"loss": 0.1468, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 1.820336062042223, |
|
"grad_norm": 0.5681573152542114, |
|
"learning_rate": 2.1846325700151603e-05, |
|
"loss": 0.0894, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.8257216716932356, |
|
"grad_norm": 0.07245413213968277, |
|
"learning_rate": 2.174658900502673e-05, |
|
"loss": 0.1625, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 1.8311072813442482, |
|
"grad_norm": 0.058218203485012054, |
|
"learning_rate": 2.164685230990186e-05, |
|
"loss": 0.0966, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.8364928909952607, |
|
"grad_norm": 0.7239183187484741, |
|
"learning_rate": 2.1547115614776988e-05, |
|
"loss": 0.1296, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 1.8418785006462732, |
|
"grad_norm": 0.0625491514801979, |
|
"learning_rate": 2.1447378919652118e-05, |
|
"loss": 0.1106, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.8472641102972855, |
|
"grad_norm": 0.11988529562950134, |
|
"learning_rate": 2.134764222452725e-05, |
|
"loss": 0.129, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 1.8526497199482983, |
|
"grad_norm": 23.358821868896484, |
|
"learning_rate": 2.124790552940238e-05, |
|
"loss": 0.1312, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.8580353295993106, |
|
"grad_norm": 1.5045102834701538, |
|
"learning_rate": 2.114816883427751e-05, |
|
"loss": 0.107, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 1.8634209392503231, |
|
"grad_norm": 0.06715774536132812, |
|
"learning_rate": 2.1048432139152637e-05, |
|
"loss": 0.0365, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.8688065489013357, |
|
"grad_norm": 1.9744484424591064, |
|
"learning_rate": 2.0948695444027767e-05, |
|
"loss": 0.0944, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 1.874192158552348, |
|
"grad_norm": 0.28752002120018005, |
|
"learning_rate": 2.0848958748902898e-05, |
|
"loss": 0.1517, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.8795777682033608, |
|
"grad_norm": 0.06683491915464401, |
|
"learning_rate": 2.074922205377803e-05, |
|
"loss": 0.0558, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 1.884963377854373, |
|
"grad_norm": 2.5277748107910156, |
|
"learning_rate": 2.0649485358653155e-05, |
|
"loss": 0.124, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.8903489875053856, |
|
"grad_norm": 1.8914086818695068, |
|
"learning_rate": 2.0549748663528286e-05, |
|
"loss": 0.0586, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"grad_norm": 0.06591325998306274, |
|
"learning_rate": 2.0450011968403416e-05, |
|
"loss": 0.0511, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.9011202068074105, |
|
"grad_norm": 1.5716817378997803, |
|
"learning_rate": 2.0350275273278547e-05, |
|
"loss": 0.1128, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 1.9065058164584232, |
|
"grad_norm": 0.05281971022486687, |
|
"learning_rate": 2.0250538578153674e-05, |
|
"loss": 0.1034, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.9118914261094355, |
|
"grad_norm": 0.04660295695066452, |
|
"learning_rate": 2.0150801883028805e-05, |
|
"loss": 0.216, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 1.917277035760448, |
|
"grad_norm": 0.2716023027896881, |
|
"learning_rate": 2.0051065187903935e-05, |
|
"loss": 0.189, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.9226626454114606, |
|
"grad_norm": 0.06641989201307297, |
|
"learning_rate": 1.9951328492779066e-05, |
|
"loss": 0.0583, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 1.9280482550624731, |
|
"grad_norm": 0.053841717541217804, |
|
"learning_rate": 1.9851591797654193e-05, |
|
"loss": 0.0942, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.9334338647134857, |
|
"grad_norm": 0.05274002254009247, |
|
"learning_rate": 1.9751855102529323e-05, |
|
"loss": 0.1183, |
|
"step": 8975 |
|
}, |
|
{ |
|
"epoch": 1.938819474364498, |
|
"grad_norm": 0.08617954701185226, |
|
"learning_rate": 1.9652118407404454e-05, |
|
"loss": 0.0855, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.9442050840155105, |
|
"grad_norm": 0.06696368008852005, |
|
"learning_rate": 1.9552381712279584e-05, |
|
"loss": 0.117, |
|
"step": 9025 |
|
}, |
|
{ |
|
"epoch": 1.949590693666523, |
|
"grad_norm": 7.755177974700928, |
|
"learning_rate": 1.9452645017154715e-05, |
|
"loss": 0.1239, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 1.9549763033175356, |
|
"grad_norm": 0.1449265480041504, |
|
"learning_rate": 1.9352908322029842e-05, |
|
"loss": 0.0605, |
|
"step": 9075 |
|
}, |
|
{ |
|
"epoch": 1.9603619129685481, |
|
"grad_norm": 0.04622127115726471, |
|
"learning_rate": 1.9253171626904972e-05, |
|
"loss": 0.1222, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.9657475226195604, |
|
"grad_norm": 0.051821399480104446, |
|
"learning_rate": 1.91534349317801e-05, |
|
"loss": 0.0562, |
|
"step": 9125 |
|
}, |
|
{ |
|
"epoch": 1.9711331322705732, |
|
"grad_norm": 0.04180682450532913, |
|
"learning_rate": 1.9053698236655234e-05, |
|
"loss": 0.0473, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 1.9765187419215855, |
|
"grad_norm": 0.0317608080804348, |
|
"learning_rate": 1.895396154153036e-05, |
|
"loss": 0.0576, |
|
"step": 9175 |
|
}, |
|
{ |
|
"epoch": 1.981904351572598, |
|
"grad_norm": 0.026255348697304726, |
|
"learning_rate": 1.885422484640549e-05, |
|
"loss": 0.0315, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.9872899612236106, |
|
"grad_norm": 1.6421452760696411, |
|
"learning_rate": 1.8754488151280618e-05, |
|
"loss": 0.1244, |
|
"step": 9225 |
|
}, |
|
{ |
|
"epoch": 1.9926755708746229, |
|
"grad_norm": 0.0805448517203331, |
|
"learning_rate": 1.865475145615575e-05, |
|
"loss": 0.0834, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 1.9980611805256356, |
|
"grad_norm": 0.03920817747712135, |
|
"learning_rate": 1.855501476103088e-05, |
|
"loss": 0.0997, |
|
"step": 9275 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9691909942906388, |
|
"eval_auc": 0.9931809600784133, |
|
"eval_f1": 0.9694248449861022, |
|
"eval_loss": 0.1328408569097519, |
|
"eval_precision": 0.9589678510998308, |
|
"eval_recall": 0.9801124081279723, |
|
"eval_runtime": 1746.5378, |
|
"eval_samples_per_second": 5.315, |
|
"eval_steps_per_second": 0.333, |
|
"step": 9284 |
|
}, |
|
{ |
|
"epoch": 2.003446790176648, |
|
"grad_norm": 0.672393262386322, |
|
"learning_rate": 1.845527806590601e-05, |
|
"loss": 0.0478, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.0088323998276607, |
|
"grad_norm": 0.0429680272936821, |
|
"learning_rate": 1.8355541370781137e-05, |
|
"loss": 0.0864, |
|
"step": 9325 |
|
}, |
|
{ |
|
"epoch": 2.014218009478673, |
|
"grad_norm": 0.036236416548490524, |
|
"learning_rate": 1.8255804675656267e-05, |
|
"loss": 0.0491, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 2.0196036191296853, |
|
"grad_norm": 0.036336880177259445, |
|
"learning_rate": 1.8156067980531398e-05, |
|
"loss": 0.0581, |
|
"step": 9375 |
|
}, |
|
{ |
|
"epoch": 2.024989228780698, |
|
"grad_norm": 0.032438818365335464, |
|
"learning_rate": 1.805633128540653e-05, |
|
"loss": 0.0419, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.0303748384317104, |
|
"grad_norm": 1.6185767650604248, |
|
"learning_rate": 1.795659459028166e-05, |
|
"loss": 0.0767, |
|
"step": 9425 |
|
}, |
|
{ |
|
"epoch": 2.035760448082723, |
|
"grad_norm": 2.298982858657837, |
|
"learning_rate": 1.7856857895156786e-05, |
|
"loss": 0.0983, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 2.0411460577337355, |
|
"grad_norm": 8.202011108398438, |
|
"learning_rate": 1.7757121200031917e-05, |
|
"loss": 0.0845, |
|
"step": 9475 |
|
}, |
|
{ |
|
"epoch": 2.046531667384748, |
|
"grad_norm": 0.029248099774122238, |
|
"learning_rate": 1.7657384504907047e-05, |
|
"loss": 0.053, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.0519172770357605, |
|
"grad_norm": 0.02675529569387436, |
|
"learning_rate": 1.7557647809782178e-05, |
|
"loss": 0.0053, |
|
"step": 9525 |
|
}, |
|
{ |
|
"epoch": 2.057302886686773, |
|
"grad_norm": 17.323036193847656, |
|
"learning_rate": 1.7457911114657305e-05, |
|
"loss": 0.127, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 2.0626884963377856, |
|
"grad_norm": 0.03516780957579613, |
|
"learning_rate": 1.7358174419532435e-05, |
|
"loss": 0.1891, |
|
"step": 9575 |
|
}, |
|
{ |
|
"epoch": 2.068074105988798, |
|
"grad_norm": 3.910149574279785, |
|
"learning_rate": 1.7258437724407562e-05, |
|
"loss": 0.1083, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.0734597156398102, |
|
"grad_norm": 0.18281258642673492, |
|
"learning_rate": 1.7158701029282696e-05, |
|
"loss": 0.0678, |
|
"step": 9625 |
|
}, |
|
{ |
|
"epoch": 2.078845325290823, |
|
"grad_norm": 0.04458484426140785, |
|
"learning_rate": 1.7058964334157823e-05, |
|
"loss": 0.0904, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 2.0842309349418353, |
|
"grad_norm": 0.20179250836372375, |
|
"learning_rate": 1.6959227639032954e-05, |
|
"loss": 0.0388, |
|
"step": 9675 |
|
}, |
|
{ |
|
"epoch": 2.089616544592848, |
|
"grad_norm": 0.02497880347073078, |
|
"learning_rate": 1.685949094390808e-05, |
|
"loss": 0.0031, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.0950021542438604, |
|
"grad_norm": 0.02411968819797039, |
|
"learning_rate": 1.675975424878321e-05, |
|
"loss": 0.0691, |
|
"step": 9725 |
|
}, |
|
{ |
|
"epoch": 2.1003877638948727, |
|
"grad_norm": 0.035218965262174606, |
|
"learning_rate": 1.6660017553658346e-05, |
|
"loss": 0.0511, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 2.1057733735458855, |
|
"grad_norm": 0.037588488310575485, |
|
"learning_rate": 1.6560280858533473e-05, |
|
"loss": 0.0335, |
|
"step": 9775 |
|
}, |
|
{ |
|
"epoch": 2.1111589831968978, |
|
"grad_norm": 0.03383982926607132, |
|
"learning_rate": 1.6460544163408603e-05, |
|
"loss": 0.0516, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.1165445928479105, |
|
"grad_norm": 0.6187468767166138, |
|
"learning_rate": 1.636080746828373e-05, |
|
"loss": 0.0983, |
|
"step": 9825 |
|
}, |
|
{ |
|
"epoch": 2.121930202498923, |
|
"grad_norm": 0.23063376545906067, |
|
"learning_rate": 1.626107077315886e-05, |
|
"loss": 0.1008, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 2.127315812149935, |
|
"grad_norm": 0.04398832470178604, |
|
"learning_rate": 1.616133407803399e-05, |
|
"loss": 0.027, |
|
"step": 9875 |
|
}, |
|
{ |
|
"epoch": 2.132701421800948, |
|
"grad_norm": 0.0390457846224308, |
|
"learning_rate": 1.6061597382909122e-05, |
|
"loss": 0.0588, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.13808703145196, |
|
"grad_norm": 0.8915455937385559, |
|
"learning_rate": 1.596186068778425e-05, |
|
"loss": 0.1456, |
|
"step": 9925 |
|
}, |
|
{ |
|
"epoch": 2.143472641102973, |
|
"grad_norm": 68.75797271728516, |
|
"learning_rate": 1.586212399265938e-05, |
|
"loss": 0.0791, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 2.1488582507539853, |
|
"grad_norm": 15.131389617919922, |
|
"learning_rate": 1.576238729753451e-05, |
|
"loss": 0.1169, |
|
"step": 9975 |
|
}, |
|
{ |
|
"epoch": 2.154243860404998, |
|
"grad_norm": 0.05127192661166191, |
|
"learning_rate": 1.566265060240964e-05, |
|
"loss": 0.0582, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.1596294700560104, |
|
"grad_norm": 0.03388744965195656, |
|
"learning_rate": 1.5562913907284768e-05, |
|
"loss": 0.0609, |
|
"step": 10025 |
|
}, |
|
{ |
|
"epoch": 2.1650150797070227, |
|
"grad_norm": 0.0341382697224617, |
|
"learning_rate": 1.5463177212159898e-05, |
|
"loss": 0.1072, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 2.1704006893580354, |
|
"grad_norm": 0.032444145530462265, |
|
"learning_rate": 1.536344051703503e-05, |
|
"loss": 0.0447, |
|
"step": 10075 |
|
}, |
|
{ |
|
"epoch": 2.1757862990090477, |
|
"grad_norm": 0.024341439828276634, |
|
"learning_rate": 1.526370382191016e-05, |
|
"loss": 0.0139, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 2.1811719086600605, |
|
"grad_norm": 1.856371521949768, |
|
"learning_rate": 1.5163967126785286e-05, |
|
"loss": 0.0965, |
|
"step": 10125 |
|
}, |
|
{ |
|
"epoch": 2.186557518311073, |
|
"grad_norm": 3.588845729827881, |
|
"learning_rate": 1.5064230431660417e-05, |
|
"loss": 0.0047, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 2.191943127962085, |
|
"grad_norm": 0.025228984653949738, |
|
"learning_rate": 1.4964493736535547e-05, |
|
"loss": 0.0525, |
|
"step": 10175 |
|
}, |
|
{ |
|
"epoch": 2.197328737613098, |
|
"grad_norm": 0.023060867562890053, |
|
"learning_rate": 1.4864757041410676e-05, |
|
"loss": 0.0817, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.20271434726411, |
|
"grad_norm": 0.026297984644770622, |
|
"learning_rate": 1.4765020346285807e-05, |
|
"loss": 0.0313, |
|
"step": 10225 |
|
}, |
|
{ |
|
"epoch": 2.208099956915123, |
|
"grad_norm": 0.020485829561948776, |
|
"learning_rate": 1.4665283651160936e-05, |
|
"loss": 0.0022, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 2.2134855665661353, |
|
"grad_norm": 0.023789284750819206, |
|
"learning_rate": 1.4565546956036066e-05, |
|
"loss": 0.069, |
|
"step": 10275 |
|
}, |
|
{ |
|
"epoch": 2.2188711762171476, |
|
"grad_norm": 0.02372126840054989, |
|
"learning_rate": 1.4465810260911195e-05, |
|
"loss": 0.0181, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 2.2242567858681603, |
|
"grad_norm": 0.02078492008149624, |
|
"learning_rate": 1.4366073565786325e-05, |
|
"loss": 0.0312, |
|
"step": 10325 |
|
}, |
|
{ |
|
"epoch": 2.2296423955191726, |
|
"grad_norm": 0.09992769360542297, |
|
"learning_rate": 1.4266336870661454e-05, |
|
"loss": 0.1303, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 2.2350280051701854, |
|
"grad_norm": 0.14363807439804077, |
|
"learning_rate": 1.4166600175536585e-05, |
|
"loss": 0.1266, |
|
"step": 10375 |
|
}, |
|
{ |
|
"epoch": 2.2404136148211977, |
|
"grad_norm": 0.047046031802892685, |
|
"learning_rate": 1.4066863480411714e-05, |
|
"loss": 0.0385, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.2457992244722105, |
|
"grad_norm": 0.045923490077257156, |
|
"learning_rate": 1.3967126785286844e-05, |
|
"loss": 0.0046, |
|
"step": 10425 |
|
}, |
|
{ |
|
"epoch": 2.251184834123223, |
|
"grad_norm": 21.856792449951172, |
|
"learning_rate": 1.3867390090161971e-05, |
|
"loss": 0.1563, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 2.256570443774235, |
|
"grad_norm": 0.07415134459733963, |
|
"learning_rate": 1.3767653395037103e-05, |
|
"loss": 0.1118, |
|
"step": 10475 |
|
}, |
|
{ |
|
"epoch": 2.261956053425248, |
|
"grad_norm": 0.06203525885939598, |
|
"learning_rate": 1.366791669991223e-05, |
|
"loss": 0.0484, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.26734166307626, |
|
"grad_norm": 0.03413529694080353, |
|
"learning_rate": 1.3568180004787363e-05, |
|
"loss": 0.0395, |
|
"step": 10525 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 2.930551528930664, |
|
"learning_rate": 1.3468443309662493e-05, |
|
"loss": 0.0553, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 2.2781128823782852, |
|
"grad_norm": 0.027626806870102882, |
|
"learning_rate": 1.336870661453762e-05, |
|
"loss": 0.0611, |
|
"step": 10575 |
|
}, |
|
{ |
|
"epoch": 2.2834984920292976, |
|
"grad_norm": 100.386962890625, |
|
"learning_rate": 1.3268969919412753e-05, |
|
"loss": 0.0417, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.2888841016803103, |
|
"grad_norm": 0.02713247761130333, |
|
"learning_rate": 1.316923322428788e-05, |
|
"loss": 0.0763, |
|
"step": 10625 |
|
}, |
|
{ |
|
"epoch": 2.2942697113313226, |
|
"grad_norm": 1.6741771697998047, |
|
"learning_rate": 1.3069496529163012e-05, |
|
"loss": 0.098, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 2.2996553209823354, |
|
"grad_norm": 6.76040506362915, |
|
"learning_rate": 1.2969759834038139e-05, |
|
"loss": 0.0836, |
|
"step": 10675 |
|
}, |
|
{ |
|
"epoch": 2.3050409306333477, |
|
"grad_norm": 1.2940654754638672, |
|
"learning_rate": 1.287002313891327e-05, |
|
"loss": 0.0466, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 2.31042654028436, |
|
"grad_norm": 0.4345569610595703, |
|
"learning_rate": 1.2770286443788398e-05, |
|
"loss": 0.0564, |
|
"step": 10725 |
|
}, |
|
{ |
|
"epoch": 2.3158121499353728, |
|
"grad_norm": 2.130342721939087, |
|
"learning_rate": 1.2670549748663529e-05, |
|
"loss": 0.0852, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 2.321197759586385, |
|
"grad_norm": 0.11710493266582489, |
|
"learning_rate": 1.2570813053538658e-05, |
|
"loss": 0.0509, |
|
"step": 10775 |
|
}, |
|
{ |
|
"epoch": 2.326583369237398, |
|
"grad_norm": 0.026548050343990326, |
|
"learning_rate": 1.2471076358413788e-05, |
|
"loss": 0.004, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.33196897888841, |
|
"grad_norm": 0.03190811350941658, |
|
"learning_rate": 1.2371339663288919e-05, |
|
"loss": 0.0522, |
|
"step": 10825 |
|
}, |
|
{ |
|
"epoch": 2.337354588539423, |
|
"grad_norm": 81.03248596191406, |
|
"learning_rate": 1.2271602968164048e-05, |
|
"loss": 0.0468, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 2.342740198190435, |
|
"grad_norm": 0.02296995371580124, |
|
"learning_rate": 1.2171866273039176e-05, |
|
"loss": 0.0255, |
|
"step": 10875 |
|
}, |
|
{ |
|
"epoch": 2.3481258078414475, |
|
"grad_norm": 0.020512187853455544, |
|
"learning_rate": 1.2072129577914307e-05, |
|
"loss": 0.0499, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.3535114174924603, |
|
"grad_norm": 0.2346896231174469, |
|
"learning_rate": 1.1972392882789436e-05, |
|
"loss": 0.0904, |
|
"step": 10925 |
|
}, |
|
{ |
|
"epoch": 2.3588970271434726, |
|
"grad_norm": 0.03325672820210457, |
|
"learning_rate": 1.1872656187664566e-05, |
|
"loss": 0.1062, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 2.364282636794485, |
|
"grad_norm": 0.0365658663213253, |
|
"learning_rate": 1.1772919492539695e-05, |
|
"loss": 0.0542, |
|
"step": 10975 |
|
}, |
|
{ |
|
"epoch": 2.3696682464454977, |
|
"grad_norm": 0.04546808823943138, |
|
"learning_rate": 1.1673182797414826e-05, |
|
"loss": 0.0828, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.37505385609651, |
|
"grad_norm": 0.037818703800439835, |
|
"learning_rate": 1.1573446102289954e-05, |
|
"loss": 0.0034, |
|
"step": 11025 |
|
}, |
|
{ |
|
"epoch": 2.3804394657475227, |
|
"grad_norm": 0.025843236595392227, |
|
"learning_rate": 1.1473709407165085e-05, |
|
"loss": 0.0419, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 2.385825075398535, |
|
"grad_norm": 0.030073661357164383, |
|
"learning_rate": 1.1373972712040214e-05, |
|
"loss": 0.1035, |
|
"step": 11075 |
|
}, |
|
{ |
|
"epoch": 2.391210685049548, |
|
"grad_norm": 1.6878631114959717, |
|
"learning_rate": 1.1274236016915344e-05, |
|
"loss": 0.0982, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.39659629470056, |
|
"grad_norm": 0.03798234090209007, |
|
"learning_rate": 1.1174499321790475e-05, |
|
"loss": 0.057, |
|
"step": 11125 |
|
}, |
|
{ |
|
"epoch": 2.4019819043515724, |
|
"grad_norm": 0.030130743980407715, |
|
"learning_rate": 1.1074762626665604e-05, |
|
"loss": 0.0424, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 2.407367514002585, |
|
"grad_norm": 0.029741058126091957, |
|
"learning_rate": 1.0975025931540734e-05, |
|
"loss": 0.0183, |
|
"step": 11175 |
|
}, |
|
{ |
|
"epoch": 2.4127531236535975, |
|
"grad_norm": 0.025296270847320557, |
|
"learning_rate": 1.0875289236415863e-05, |
|
"loss": 0.0099, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.4181387333046103, |
|
"grad_norm": 0.020655043423175812, |
|
"learning_rate": 1.0775552541290992e-05, |
|
"loss": 0.0521, |
|
"step": 11225 |
|
}, |
|
{ |
|
"epoch": 2.4235243429556226, |
|
"grad_norm": 0.26498308777809143, |
|
"learning_rate": 1.0675815846166122e-05, |
|
"loss": 0.1073, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 2.428909952606635, |
|
"grad_norm": 0.023832648992538452, |
|
"learning_rate": 1.0576079151041251e-05, |
|
"loss": 0.042, |
|
"step": 11275 |
|
}, |
|
{ |
|
"epoch": 2.4342955622576476, |
|
"grad_norm": 0.025238890200853348, |
|
"learning_rate": 1.0476342455916382e-05, |
|
"loss": 0.0765, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.43968117190866, |
|
"grad_norm": 0.029484113678336143, |
|
"learning_rate": 1.037660576079151e-05, |
|
"loss": 0.1135, |
|
"step": 11325 |
|
}, |
|
{ |
|
"epoch": 2.4450667815596727, |
|
"grad_norm": 4.394412994384766, |
|
"learning_rate": 1.0276869065666641e-05, |
|
"loss": 0.033, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 2.450452391210685, |
|
"grad_norm": 0.02717486396431923, |
|
"learning_rate": 1.017713237054177e-05, |
|
"loss": 0.0756, |
|
"step": 11375 |
|
}, |
|
{ |
|
"epoch": 2.4558380008616973, |
|
"grad_norm": 0.03741078078746796, |
|
"learning_rate": 1.0077395675416899e-05, |
|
"loss": 0.0959, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.46122361051271, |
|
"grad_norm": 0.025637760758399963, |
|
"learning_rate": 9.977658980292029e-06, |
|
"loss": 0.0421, |
|
"step": 11425 |
|
}, |
|
{ |
|
"epoch": 2.4666092201637224, |
|
"grad_norm": 0.04782960191369057, |
|
"learning_rate": 9.877922285167158e-06, |
|
"loss": 0.1026, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 2.471994829814735, |
|
"grad_norm": 0.04921013489365578, |
|
"learning_rate": 9.77818559004229e-06, |
|
"loss": 0.0067, |
|
"step": 11475 |
|
}, |
|
{ |
|
"epoch": 2.4773804394657475, |
|
"grad_norm": 0.04560457542538643, |
|
"learning_rate": 9.678448894917419e-06, |
|
"loss": 0.089, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.4827660491167602, |
|
"grad_norm": 0.02379678376019001, |
|
"learning_rate": 9.578712199792548e-06, |
|
"loss": 0.0625, |
|
"step": 11525 |
|
}, |
|
{ |
|
"epoch": 2.4881516587677726, |
|
"grad_norm": 0.02355758473277092, |
|
"learning_rate": 9.478975504667678e-06, |
|
"loss": 0.0375, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 2.493537268418785, |
|
"grad_norm": 0.025686856359243393, |
|
"learning_rate": 9.379238809542807e-06, |
|
"loss": 0.0407, |
|
"step": 11575 |
|
}, |
|
{ |
|
"epoch": 2.4989228780697976, |
|
"grad_norm": 0.028306877240538597, |
|
"learning_rate": 9.279502114417938e-06, |
|
"loss": 0.0277, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.50430848772081, |
|
"grad_norm": 0.02451007254421711, |
|
"learning_rate": 9.179765419293066e-06, |
|
"loss": 0.0431, |
|
"step": 11625 |
|
}, |
|
{ |
|
"epoch": 2.5096940973718223, |
|
"grad_norm": 3.4917030334472656, |
|
"learning_rate": 9.080028724168197e-06, |
|
"loss": 0.0778, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 2.515079707022835, |
|
"grad_norm": 0.03555026650428772, |
|
"learning_rate": 8.980292029043326e-06, |
|
"loss": 0.0597, |
|
"step": 11675 |
|
}, |
|
{ |
|
"epoch": 2.5204653166738473, |
|
"grad_norm": 0.05606693774461746, |
|
"learning_rate": 8.880555333918456e-06, |
|
"loss": 0.0789, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.52585092632486, |
|
"grad_norm": 0.03267287090420723, |
|
"learning_rate": 8.780818638793585e-06, |
|
"loss": 0.0212, |
|
"step": 11725 |
|
}, |
|
{ |
|
"epoch": 2.5312365359758724, |
|
"grad_norm": 0.028950348496437073, |
|
"learning_rate": 8.681081943668714e-06, |
|
"loss": 0.0142, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 2.536622145626885, |
|
"grad_norm": 0.02338344044983387, |
|
"learning_rate": 8.581345248543844e-06, |
|
"loss": 0.0022, |
|
"step": 11775 |
|
}, |
|
{ |
|
"epoch": 2.5420077552778975, |
|
"grad_norm": 0.042633187025785446, |
|
"learning_rate": 8.481608553418973e-06, |
|
"loss": 0.1394, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.5473933649289098, |
|
"grad_norm": 0.1146039292216301, |
|
"learning_rate": 8.381871858294104e-06, |
|
"loss": 0.1077, |
|
"step": 11825 |
|
}, |
|
{ |
|
"epoch": 2.5527789745799225, |
|
"grad_norm": 0.03126833215355873, |
|
"learning_rate": 8.282135163169234e-06, |
|
"loss": 0.0714, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 2.558164584230935, |
|
"grad_norm": 0.032733842730522156, |
|
"learning_rate": 8.182398468044363e-06, |
|
"loss": 0.1485, |
|
"step": 11875 |
|
}, |
|
{ |
|
"epoch": 2.563550193881947, |
|
"grad_norm": 0.5381880402565002, |
|
"learning_rate": 8.082661772919494e-06, |
|
"loss": 0.061, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 2.56893580353296, |
|
"grad_norm": 0.031703583896160126, |
|
"learning_rate": 7.982925077794622e-06, |
|
"loss": 0.0555, |
|
"step": 11925 |
|
}, |
|
{ |
|
"epoch": 2.5743214131839727, |
|
"grad_norm": 0.10838180035352707, |
|
"learning_rate": 7.883188382669753e-06, |
|
"loss": 0.0063, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 2.579707022834985, |
|
"grad_norm": 0.05079588666558266, |
|
"learning_rate": 7.783451687544882e-06, |
|
"loss": 0.0251, |
|
"step": 11975 |
|
}, |
|
{ |
|
"epoch": 2.5850926324859973, |
|
"grad_norm": 0.023224713280797005, |
|
"learning_rate": 7.683714992420012e-06, |
|
"loss": 0.0351, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.59047824213701, |
|
"grad_norm": 0.02427135780453682, |
|
"learning_rate": 7.583978297295141e-06, |
|
"loss": 0.0435, |
|
"step": 12025 |
|
}, |
|
{ |
|
"epoch": 2.5958638517880224, |
|
"grad_norm": 10.110872268676758, |
|
"learning_rate": 7.484241602170271e-06, |
|
"loss": 0.0877, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 2.6012494614390347, |
|
"grad_norm": 0.018899202346801758, |
|
"learning_rate": 7.3845049070454004e-06, |
|
"loss": 0.0266, |
|
"step": 12075 |
|
}, |
|
{ |
|
"epoch": 2.6066350710900474, |
|
"grad_norm": 2.7654731273651123, |
|
"learning_rate": 7.28476821192053e-06, |
|
"loss": 0.0766, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 2.6120206807410598, |
|
"grad_norm": 0.02472410537302494, |
|
"learning_rate": 7.18503151679566e-06, |
|
"loss": 0.0775, |
|
"step": 12125 |
|
}, |
|
{ |
|
"epoch": 2.6174062903920725, |
|
"grad_norm": 0.08125100284814835, |
|
"learning_rate": 7.085294821670789e-06, |
|
"loss": 0.033, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 2.622791900043085, |
|
"grad_norm": 0.01764783076941967, |
|
"learning_rate": 6.985558126545918e-06, |
|
"loss": 0.0032, |
|
"step": 12175 |
|
}, |
|
{ |
|
"epoch": 2.6281775096940976, |
|
"grad_norm": 0.019586993381381035, |
|
"learning_rate": 6.885821431421048e-06, |
|
"loss": 0.0463, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 2.63356311934511, |
|
"grad_norm": 1.3871238231658936, |
|
"learning_rate": 6.786084736296179e-06, |
|
"loss": 0.0267, |
|
"step": 12225 |
|
}, |
|
{ |
|
"epoch": 2.638948728996122, |
|
"grad_norm": 0.018140500411391258, |
|
"learning_rate": 6.686348041171309e-06, |
|
"loss": 0.0696, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 2.644334338647135, |
|
"grad_norm": 0.0250703152269125, |
|
"learning_rate": 6.586611346046439e-06, |
|
"loss": 0.0742, |
|
"step": 12275 |
|
}, |
|
{ |
|
"epoch": 2.6497199482981473, |
|
"grad_norm": 0.020942581817507744, |
|
"learning_rate": 6.4868746509215674e-06, |
|
"loss": 0.0302, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.6551055579491596, |
|
"grad_norm": 0.018071915954351425, |
|
"learning_rate": 6.387137955796697e-06, |
|
"loss": 0.0367, |
|
"step": 12325 |
|
}, |
|
{ |
|
"epoch": 2.6604911676001723, |
|
"grad_norm": 19.54036521911621, |
|
"learning_rate": 6.287401260671827e-06, |
|
"loss": 0.0825, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 2.665876777251185, |
|
"grad_norm": 0.016232356429100037, |
|
"learning_rate": 6.1876645655469564e-06, |
|
"loss": 0.0359, |
|
"step": 12375 |
|
}, |
|
{ |
|
"epoch": 2.6712623869021974, |
|
"grad_norm": 0.024337278679013252, |
|
"learning_rate": 6.087927870422086e-06, |
|
"loss": 0.0273, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 2.6766479965532097, |
|
"grad_norm": 0.28807416558265686, |
|
"learning_rate": 5.988191175297216e-06, |
|
"loss": 0.0266, |
|
"step": 12425 |
|
}, |
|
{ |
|
"epoch": 2.6820336062042225, |
|
"grad_norm": 0.01607998088002205, |
|
"learning_rate": 5.8884544801723455e-06, |
|
"loss": 0.0666, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 2.687419215855235, |
|
"grad_norm": 0.021535271778702736, |
|
"learning_rate": 5.788717785047474e-06, |
|
"loss": 0.0611, |
|
"step": 12475 |
|
}, |
|
{ |
|
"epoch": 2.692804825506247, |
|
"grad_norm": 12.746896743774414, |
|
"learning_rate": 5.688981089922604e-06, |
|
"loss": 0.101, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.69819043515726, |
|
"grad_norm": 1.019801378250122, |
|
"learning_rate": 5.5892443947977345e-06, |
|
"loss": 0.0415, |
|
"step": 12525 |
|
}, |
|
{ |
|
"epoch": 2.703576044808272, |
|
"grad_norm": 0.092778280377388, |
|
"learning_rate": 5.489507699672864e-06, |
|
"loss": 0.0365, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 2.708961654459285, |
|
"grad_norm": 0.5808451771736145, |
|
"learning_rate": 5.389771004547994e-06, |
|
"loss": 0.0983, |
|
"step": 12575 |
|
}, |
|
{ |
|
"epoch": 2.7143472641102973, |
|
"grad_norm": 0.17807550728321075, |
|
"learning_rate": 5.2900343094231235e-06, |
|
"loss": 0.0214, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.71973287376131, |
|
"grad_norm": 0.031068023294210434, |
|
"learning_rate": 5.190297614298253e-06, |
|
"loss": 0.0623, |
|
"step": 12625 |
|
}, |
|
{ |
|
"epoch": 2.7251184834123223, |
|
"grad_norm": 0.023084493353962898, |
|
"learning_rate": 5.090560919173382e-06, |
|
"loss": 0.0154, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 2.7305040930633346, |
|
"grad_norm": 0.0403926819562912, |
|
"learning_rate": 4.990824224048512e-06, |
|
"loss": 0.0358, |
|
"step": 12675 |
|
}, |
|
{ |
|
"epoch": 2.7358897027143474, |
|
"grad_norm": 0.43667861819267273, |
|
"learning_rate": 4.891087528923642e-06, |
|
"loss": 0.0016, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 2.7412753123653597, |
|
"grad_norm": 0.07555987685918808, |
|
"learning_rate": 4.791350833798772e-06, |
|
"loss": 0.0721, |
|
"step": 12725 |
|
}, |
|
{ |
|
"epoch": 2.746660922016372, |
|
"grad_norm": 0.022048979997634888, |
|
"learning_rate": 4.6916141386739015e-06, |
|
"loss": 0.0565, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 2.7520465316673848, |
|
"grad_norm": 0.020737633109092712, |
|
"learning_rate": 4.591877443549031e-06, |
|
"loss": 0.038, |
|
"step": 12775 |
|
}, |
|
{ |
|
"epoch": 2.757432141318397, |
|
"grad_norm": 2.780998945236206, |
|
"learning_rate": 4.492140748424161e-06, |
|
"loss": 0.1345, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 2.76281775096941, |
|
"grad_norm": 0.0388825386762619, |
|
"learning_rate": 4.39240405329929e-06, |
|
"loss": 0.0194, |
|
"step": 12825 |
|
}, |
|
{ |
|
"epoch": 2.768203360620422, |
|
"grad_norm": 7.417761325836182, |
|
"learning_rate": 4.292667358174419e-06, |
|
"loss": 0.0343, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 2.773588970271435, |
|
"grad_norm": 0.02559277042746544, |
|
"learning_rate": 4.192930663049549e-06, |
|
"loss": 0.002, |
|
"step": 12875 |
|
}, |
|
{ |
|
"epoch": 2.7789745799224472, |
|
"grad_norm": 0.019186072051525116, |
|
"learning_rate": 4.0931939679246795e-06, |
|
"loss": 0.0732, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.7843601895734595, |
|
"grad_norm": 0.034625016152858734, |
|
"learning_rate": 3.993457272799809e-06, |
|
"loss": 0.0631, |
|
"step": 12925 |
|
}, |
|
{ |
|
"epoch": 2.7897457992244723, |
|
"grad_norm": 0.07090263068675995, |
|
"learning_rate": 3.893720577674939e-06, |
|
"loss": 0.0705, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 2.7951314088754846, |
|
"grad_norm": 0.01826942153275013, |
|
"learning_rate": 3.793983882550068e-06, |
|
"loss": 0.0212, |
|
"step": 12975 |
|
}, |
|
{ |
|
"epoch": 2.800517018526497, |
|
"grad_norm": 7.512089729309082, |
|
"learning_rate": 3.6942471874251977e-06, |
|
"loss": 0.0609, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.8059026281775097, |
|
"grad_norm": 92.8465805053711, |
|
"learning_rate": 3.594510492300327e-06, |
|
"loss": 0.0397, |
|
"step": 13025 |
|
}, |
|
{ |
|
"epoch": 2.8112882378285224, |
|
"grad_norm": 2.2542502880096436, |
|
"learning_rate": 3.4947737971754566e-06, |
|
"loss": 0.0542, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 2.8166738474795348, |
|
"grad_norm": 0.026438845321536064, |
|
"learning_rate": 3.395037102050587e-06, |
|
"loss": 0.0257, |
|
"step": 13075 |
|
}, |
|
{ |
|
"epoch": 2.822059457130547, |
|
"grad_norm": 0.027188275009393692, |
|
"learning_rate": 3.2953004069257164e-06, |
|
"loss": 0.0357, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 2.82744506678156, |
|
"grad_norm": 0.023801114410161972, |
|
"learning_rate": 3.195563711800846e-06, |
|
"loss": 0.0453, |
|
"step": 13125 |
|
}, |
|
{ |
|
"epoch": 2.832830676432572, |
|
"grad_norm": 0.10319499671459198, |
|
"learning_rate": 3.0958270166759757e-06, |
|
"loss": 0.0796, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 2.8382162860835844, |
|
"grad_norm": 0.031992778182029724, |
|
"learning_rate": 2.9960903215511054e-06, |
|
"loss": 0.0505, |
|
"step": 13175 |
|
}, |
|
{ |
|
"epoch": 2.843601895734597, |
|
"grad_norm": 0.12323871999979019, |
|
"learning_rate": 2.8963536264262346e-06, |
|
"loss": 0.0366, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.8489875053856095, |
|
"grad_norm": 0.019458066672086716, |
|
"learning_rate": 2.7966169313013647e-06, |
|
"loss": 0.0232, |
|
"step": 13225 |
|
}, |
|
{ |
|
"epoch": 2.8543731150366223, |
|
"grad_norm": 0.024557072669267654, |
|
"learning_rate": 2.6968802361764944e-06, |
|
"loss": 0.0745, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 2.8597587246876346, |
|
"grad_norm": 5.838003635406494, |
|
"learning_rate": 2.5971435410516236e-06, |
|
"loss": 0.0553, |
|
"step": 13275 |
|
}, |
|
{ |
|
"epoch": 2.8651443343386473, |
|
"grad_norm": 1.81170654296875, |
|
"learning_rate": 2.4974068459267533e-06, |
|
"loss": 0.0341, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 2.8705299439896597, |
|
"grad_norm": 4.6604180335998535, |
|
"learning_rate": 2.3976701508018834e-06, |
|
"loss": 0.0166, |
|
"step": 13325 |
|
}, |
|
{ |
|
"epoch": 2.875915553640672, |
|
"grad_norm": 0.9680020809173584, |
|
"learning_rate": 2.2979334556770127e-06, |
|
"loss": 0.0575, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 2.8813011632916847, |
|
"grad_norm": 0.044519729912281036, |
|
"learning_rate": 2.1981967605521423e-06, |
|
"loss": 0.0795, |
|
"step": 13375 |
|
}, |
|
{ |
|
"epoch": 2.886686772942697, |
|
"grad_norm": 0.019457995891571045, |
|
"learning_rate": 2.098460065427272e-06, |
|
"loss": 0.0278, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 2.8920723825937094, |
|
"grad_norm": 0.017885301262140274, |
|
"learning_rate": 1.9987233703024017e-06, |
|
"loss": 0.0149, |
|
"step": 13425 |
|
}, |
|
{ |
|
"epoch": 2.897457992244722, |
|
"grad_norm": 37.20232009887695, |
|
"learning_rate": 1.8989866751775313e-06, |
|
"loss": 0.0252, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 2.902843601895735, |
|
"grad_norm": 0.03464394435286522, |
|
"learning_rate": 1.799249980052661e-06, |
|
"loss": 0.0956, |
|
"step": 13475 |
|
}, |
|
{ |
|
"epoch": 2.908229211546747, |
|
"grad_norm": 0.08744242042303085, |
|
"learning_rate": 1.6995132849277909e-06, |
|
"loss": 0.0632, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.9136148211977595, |
|
"grad_norm": 0.014135221019387245, |
|
"learning_rate": 1.5997765898029205e-06, |
|
"loss": 0.0276, |
|
"step": 13525 |
|
}, |
|
{ |
|
"epoch": 2.9190004308487723, |
|
"grad_norm": 0.06255125254392624, |
|
"learning_rate": 1.50003989467805e-06, |
|
"loss": 0.0825, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 2.9243860404997846, |
|
"grad_norm": 10.135443687438965, |
|
"learning_rate": 1.4003031995531797e-06, |
|
"loss": 0.0442, |
|
"step": 13575 |
|
}, |
|
{ |
|
"epoch": 2.929771650150797, |
|
"grad_norm": 0.01875634863972664, |
|
"learning_rate": 1.3005665044283093e-06, |
|
"loss": 0.0304, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 2.9351572598018096, |
|
"grad_norm": 0.13877597451210022, |
|
"learning_rate": 1.200829809303439e-06, |
|
"loss": 0.0367, |
|
"step": 13625 |
|
}, |
|
{ |
|
"epoch": 2.940542869452822, |
|
"grad_norm": 0.016782578080892563, |
|
"learning_rate": 1.1010931141785687e-06, |
|
"loss": 0.1222, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 2.9459284791038347, |
|
"grad_norm": 0.03902614489197731, |
|
"learning_rate": 1.0013564190536983e-06, |
|
"loss": 0.1027, |
|
"step": 13675 |
|
}, |
|
{ |
|
"epoch": 2.951314088754847, |
|
"grad_norm": 0.017981288954615593, |
|
"learning_rate": 9.016197239288279e-07, |
|
"loss": 0.014, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 2.9566996984058598, |
|
"grad_norm": 0.016431229189038277, |
|
"learning_rate": 8.018830288039577e-07, |
|
"loss": 0.016, |
|
"step": 13725 |
|
}, |
|
{ |
|
"epoch": 2.962085308056872, |
|
"grad_norm": 0.031093724071979523, |
|
"learning_rate": 7.021463336790872e-07, |
|
"loss": 0.0563, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 2.9674709177078844, |
|
"grad_norm": 0.06026161089539528, |
|
"learning_rate": 6.024096385542169e-07, |
|
"loss": 0.0381, |
|
"step": 13775 |
|
}, |
|
{ |
|
"epoch": 2.972856527358897, |
|
"grad_norm": 7.449411869049072, |
|
"learning_rate": 5.026729434293466e-07, |
|
"loss": 0.0633, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.9782421370099095, |
|
"grad_norm": 0.022474773228168488, |
|
"learning_rate": 4.0293624830447624e-07, |
|
"loss": 0.0601, |
|
"step": 13825 |
|
}, |
|
{ |
|
"epoch": 2.983627746660922, |
|
"grad_norm": 0.020873403176665306, |
|
"learning_rate": 3.0319955317960585e-07, |
|
"loss": 0.0036, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 2.9890133563119345, |
|
"grad_norm": 0.0238470621407032, |
|
"learning_rate": 2.0346285805473552e-07, |
|
"loss": 0.0519, |
|
"step": 13875 |
|
}, |
|
{ |
|
"epoch": 2.994398965962947, |
|
"grad_norm": 0.03307591751217842, |
|
"learning_rate": 1.0372616292986516e-07, |
|
"loss": 0.002, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 2.9997845756139596, |
|
"grad_norm": 0.9745203852653503, |
|
"learning_rate": 3.989467804994814e-09, |
|
"loss": 0.058, |
|
"step": 13925 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9739308413228482, |
|
"eval_auc": 0.9959614324316973, |
|
"eval_f1": 0.9739448751076658, |
|
"eval_loss": 0.12016157805919647, |
|
"eval_precision": 0.9701844701844702, |
|
"eval_recall": 0.9777345438824038, |
|
"eval_runtime": 1664.4298, |
|
"eval_samples_per_second": 5.577, |
|
"eval_steps_per_second": 0.349, |
|
"step": 13926 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 13926, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7327379780720640.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|