Alejo760's picture
Upload folder using huggingface_hub
3acd455 verified
{
"best_metric": 0.12016157805919647,
"best_model_checkpoint": "autotrain-ledky-lrgbn/checkpoint-13926",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 13926,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005385609651012494,
"grad_norm": 1.214734673500061,
"learning_rate": 8.973438621679828e-07,
"loss": 0.696,
"step": 25
},
{
"epoch": 0.010771219302024989,
"grad_norm": 0.9307985901832581,
"learning_rate": 1.7946877243359655e-06,
"loss": 0.6901,
"step": 50
},
{
"epoch": 0.016156828953037484,
"grad_norm": 1.196394443511963,
"learning_rate": 2.6920315865039484e-06,
"loss": 0.6818,
"step": 75
},
{
"epoch": 0.021542438604049977,
"grad_norm": 1.0026447772979736,
"learning_rate": 3.589375448671931e-06,
"loss": 0.6614,
"step": 100
},
{
"epoch": 0.026928048255062474,
"grad_norm": 1.2615079879760742,
"learning_rate": 4.486719310839914e-06,
"loss": 0.6209,
"step": 125
},
{
"epoch": 0.03231365790607497,
"grad_norm": 2.165177583694458,
"learning_rate": 5.384063173007897e-06,
"loss": 0.5555,
"step": 150
},
{
"epoch": 0.03769926755708746,
"grad_norm": 1.6065446138381958,
"learning_rate": 6.2814070351758795e-06,
"loss": 0.4827,
"step": 175
},
{
"epoch": 0.043084877208099955,
"grad_norm": 1.00612473487854,
"learning_rate": 7.178750897343862e-06,
"loss": 0.415,
"step": 200
},
{
"epoch": 0.048470486859112455,
"grad_norm": 1.8842555284500122,
"learning_rate": 8.076094759511846e-06,
"loss": 0.3724,
"step": 225
},
{
"epoch": 0.05385609651012495,
"grad_norm": 1.875260353088379,
"learning_rate": 8.973438621679828e-06,
"loss": 0.2979,
"step": 250
},
{
"epoch": 0.05924170616113744,
"grad_norm": 2.970935106277466,
"learning_rate": 9.870782483847811e-06,
"loss": 0.2478,
"step": 275
},
{
"epoch": 0.06462731581214994,
"grad_norm": 0.5585727691650391,
"learning_rate": 1.0768126346015794e-05,
"loss": 0.2456,
"step": 300
},
{
"epoch": 0.07001292546316243,
"grad_norm": 0.5557870864868164,
"learning_rate": 1.1665470208183776e-05,
"loss": 0.2346,
"step": 325
},
{
"epoch": 0.07539853511417492,
"grad_norm": 5.3282365798950195,
"learning_rate": 1.2562814070351759e-05,
"loss": 0.2192,
"step": 350
},
{
"epoch": 0.08078414476518742,
"grad_norm": 0.8856891989707947,
"learning_rate": 1.3460157932519743e-05,
"loss": 0.2935,
"step": 375
},
{
"epoch": 0.08616975441619991,
"grad_norm": 0.9343020915985107,
"learning_rate": 1.4357501794687724e-05,
"loss": 0.2117,
"step": 400
},
{
"epoch": 0.0915553640672124,
"grad_norm": 8.641404151916504,
"learning_rate": 1.5254845656855707e-05,
"loss": 0.2256,
"step": 425
},
{
"epoch": 0.09694097371822491,
"grad_norm": 0.31070542335510254,
"learning_rate": 1.615218951902369e-05,
"loss": 0.2155,
"step": 450
},
{
"epoch": 0.1023265833692374,
"grad_norm": 2.8748903274536133,
"learning_rate": 1.7049533381191674e-05,
"loss": 0.2272,
"step": 475
},
{
"epoch": 0.1077121930202499,
"grad_norm": 0.5525055527687073,
"learning_rate": 1.7946877243359657e-05,
"loss": 0.3538,
"step": 500
},
{
"epoch": 0.11309780267126239,
"grad_norm": 23.755407333374023,
"learning_rate": 1.884422110552764e-05,
"loss": 0.1872,
"step": 525
},
{
"epoch": 0.11848341232227488,
"grad_norm": 34.48551559448242,
"learning_rate": 1.9741564967695622e-05,
"loss": 0.1676,
"step": 550
},
{
"epoch": 0.12386902197328738,
"grad_norm": 1.0720394849777222,
"learning_rate": 2.0638908829863605e-05,
"loss": 0.258,
"step": 575
},
{
"epoch": 0.12925463162429987,
"grad_norm": 0.2333178073167801,
"learning_rate": 2.1536252692031587e-05,
"loss": 0.2356,
"step": 600
},
{
"epoch": 0.13464024127531238,
"grad_norm": 28.830148696899414,
"learning_rate": 2.243359655419957e-05,
"loss": 0.2483,
"step": 625
},
{
"epoch": 0.14002585092632486,
"grad_norm": 0.38484323024749756,
"learning_rate": 2.3330940416367553e-05,
"loss": 0.2576,
"step": 650
},
{
"epoch": 0.14541146057733736,
"grad_norm": 9.458002090454102,
"learning_rate": 2.4228284278535535e-05,
"loss": 0.2542,
"step": 675
},
{
"epoch": 0.15079707022834984,
"grad_norm": 0.28468650579452515,
"learning_rate": 2.5125628140703518e-05,
"loss": 0.2451,
"step": 700
},
{
"epoch": 0.15618267987936235,
"grad_norm": 4.210727691650391,
"learning_rate": 2.6022972002871504e-05,
"loss": 0.2512,
"step": 725
},
{
"epoch": 0.16156828953037483,
"grad_norm": 0.2801736295223236,
"learning_rate": 2.6920315865039487e-05,
"loss": 0.2245,
"step": 750
},
{
"epoch": 0.16695389918138734,
"grad_norm": 0.18515126407146454,
"learning_rate": 2.781765972720747e-05,
"loss": 0.2455,
"step": 775
},
{
"epoch": 0.17233950883239982,
"grad_norm": 24.687742233276367,
"learning_rate": 2.871500358937545e-05,
"loss": 0.1717,
"step": 800
},
{
"epoch": 0.17772511848341233,
"grad_norm": 0.31296807527542114,
"learning_rate": 2.961234745154343e-05,
"loss": 0.3381,
"step": 825
},
{
"epoch": 0.1831107281344248,
"grad_norm": 0.2845638692378998,
"learning_rate": 3.0509691313711414e-05,
"loss": 0.2039,
"step": 850
},
{
"epoch": 0.1884963377854373,
"grad_norm": 9.822659492492676,
"learning_rate": 3.14070351758794e-05,
"loss": 0.2695,
"step": 875
},
{
"epoch": 0.19388194743644982,
"grad_norm": 11.07955265045166,
"learning_rate": 3.230437903804738e-05,
"loss": 0.1941,
"step": 900
},
{
"epoch": 0.1992675570874623,
"grad_norm": 0.13381847739219666,
"learning_rate": 3.3201722900215365e-05,
"loss": 0.1602,
"step": 925
},
{
"epoch": 0.2046531667384748,
"grad_norm": 0.3931732177734375,
"learning_rate": 3.409906676238335e-05,
"loss": 0.1306,
"step": 950
},
{
"epoch": 0.2100387763894873,
"grad_norm": 59.89822006225586,
"learning_rate": 3.499641062455133e-05,
"loss": 0.218,
"step": 975
},
{
"epoch": 0.2154243860404998,
"grad_norm": 2.6214139461517334,
"learning_rate": 3.5893754486719313e-05,
"loss": 0.1886,
"step": 1000
},
{
"epoch": 0.22080999569151227,
"grad_norm": 1.7539465427398682,
"learning_rate": 3.6791098348887296e-05,
"loss": 0.1999,
"step": 1025
},
{
"epoch": 0.22619560534252478,
"grad_norm": 0.13422812521457672,
"learning_rate": 3.768844221105528e-05,
"loss": 0.1967,
"step": 1050
},
{
"epoch": 0.23158121499353726,
"grad_norm": 17.16714859008789,
"learning_rate": 3.858578607322326e-05,
"loss": 0.2197,
"step": 1075
},
{
"epoch": 0.23696682464454977,
"grad_norm": 4.463374614715576,
"learning_rate": 3.9483129935391244e-05,
"loss": 0.1428,
"step": 1100
},
{
"epoch": 0.24235243429556225,
"grad_norm": 14.286873817443848,
"learning_rate": 4.038047379755923e-05,
"loss": 0.154,
"step": 1125
},
{
"epoch": 0.24773804394657475,
"grad_norm": 1.0160866975784302,
"learning_rate": 4.127781765972721e-05,
"loss": 0.1008,
"step": 1150
},
{
"epoch": 0.25312365359758726,
"grad_norm": 0.08231418579816818,
"learning_rate": 4.217516152189519e-05,
"loss": 0.1716,
"step": 1175
},
{
"epoch": 0.25850926324859974,
"grad_norm": 0.4267788231372833,
"learning_rate": 4.3072505384063175e-05,
"loss": 0.2282,
"step": 1200
},
{
"epoch": 0.2638948728996122,
"grad_norm": 8.705976486206055,
"learning_rate": 4.396984924623116e-05,
"loss": 0.1687,
"step": 1225
},
{
"epoch": 0.26928048255062476,
"grad_norm": 0.08920421451330185,
"learning_rate": 4.486719310839914e-05,
"loss": 0.1566,
"step": 1250
},
{
"epoch": 0.27466609220163724,
"grad_norm": 136.6876220703125,
"learning_rate": 4.576453697056712e-05,
"loss": 0.3009,
"step": 1275
},
{
"epoch": 0.2800517018526497,
"grad_norm": 97.63652038574219,
"learning_rate": 4.6661880832735106e-05,
"loss": 0.1277,
"step": 1300
},
{
"epoch": 0.2854373115036622,
"grad_norm": 0.1028209999203682,
"learning_rate": 4.755922469490309e-05,
"loss": 0.1079,
"step": 1325
},
{
"epoch": 0.29082292115467473,
"grad_norm": 0.1444273144006729,
"learning_rate": 4.845656855707107e-05,
"loss": 0.2049,
"step": 1350
},
{
"epoch": 0.2962085308056872,
"grad_norm": 0.07809091359376907,
"learning_rate": 4.9353912419239054e-05,
"loss": 0.1565,
"step": 1375
},
{
"epoch": 0.3015941404566997,
"grad_norm": 27.804719924926758,
"learning_rate": 4.9972073725365035e-05,
"loss": 0.1187,
"step": 1400
},
{
"epoch": 0.30697975010771217,
"grad_norm": 2.5913608074188232,
"learning_rate": 4.987233703024017e-05,
"loss": 0.2541,
"step": 1425
},
{
"epoch": 0.3123653597587247,
"grad_norm": 0.08927720040082932,
"learning_rate": 4.97726003351153e-05,
"loss": 0.1258,
"step": 1450
},
{
"epoch": 0.3177509694097372,
"grad_norm": 11.248839378356934,
"learning_rate": 4.967286363999043e-05,
"loss": 0.2244,
"step": 1475
},
{
"epoch": 0.32313657906074966,
"grad_norm": 4.807493686676025,
"learning_rate": 4.957312694486556e-05,
"loss": 0.1561,
"step": 1500
},
{
"epoch": 0.3285221887117622,
"grad_norm": 2.7352848052978516,
"learning_rate": 4.9473390249740684e-05,
"loss": 0.2706,
"step": 1525
},
{
"epoch": 0.3339077983627747,
"grad_norm": 2.721325159072876,
"learning_rate": 4.937365355461582e-05,
"loss": 0.1181,
"step": 1550
},
{
"epoch": 0.33929340801378716,
"grad_norm": 0.08835005760192871,
"learning_rate": 4.927391685949095e-05,
"loss": 0.0849,
"step": 1575
},
{
"epoch": 0.34467901766479964,
"grad_norm": 0.06279947608709335,
"learning_rate": 4.917418016436607e-05,
"loss": 0.0603,
"step": 1600
},
{
"epoch": 0.35006462731581217,
"grad_norm": 5.033148765563965,
"learning_rate": 4.9074443469241206e-05,
"loss": 0.1982,
"step": 1625
},
{
"epoch": 0.35545023696682465,
"grad_norm": 19.948888778686523,
"learning_rate": 4.897470677411633e-05,
"loss": 0.1951,
"step": 1650
},
{
"epoch": 0.36083584661783713,
"grad_norm": 0.15091180801391602,
"learning_rate": 4.887497007899147e-05,
"loss": 0.1967,
"step": 1675
},
{
"epoch": 0.3662214562688496,
"grad_norm": 11.530139923095703,
"learning_rate": 4.8775233383866594e-05,
"loss": 0.2055,
"step": 1700
},
{
"epoch": 0.37160706591986215,
"grad_norm": 1.3459250926971436,
"learning_rate": 4.867549668874172e-05,
"loss": 0.165,
"step": 1725
},
{
"epoch": 0.3769926755708746,
"grad_norm": 0.1419544219970703,
"learning_rate": 4.8575759993616855e-05,
"loss": 0.2761,
"step": 1750
},
{
"epoch": 0.3823782852218871,
"grad_norm": 0.0771302655339241,
"learning_rate": 4.847602329849198e-05,
"loss": 0.1745,
"step": 1775
},
{
"epoch": 0.38776389487289964,
"grad_norm": 7.464870452880859,
"learning_rate": 4.8376286603367116e-05,
"loss": 0.1824,
"step": 1800
},
{
"epoch": 0.3931495045239121,
"grad_norm": 8.839171409606934,
"learning_rate": 4.827654990824224e-05,
"loss": 0.2336,
"step": 1825
},
{
"epoch": 0.3985351141749246,
"grad_norm": 2.038132667541504,
"learning_rate": 4.817681321311737e-05,
"loss": 0.3063,
"step": 1850
},
{
"epoch": 0.4039207238259371,
"grad_norm": 1.8243613243103027,
"learning_rate": 4.8077076517992504e-05,
"loss": 0.1202,
"step": 1875
},
{
"epoch": 0.4093063334769496,
"grad_norm": 0.09693591296672821,
"learning_rate": 4.797733982286763e-05,
"loss": 0.1442,
"step": 1900
},
{
"epoch": 0.4146919431279621,
"grad_norm": 4.965816020965576,
"learning_rate": 4.787760312774276e-05,
"loss": 0.2029,
"step": 1925
},
{
"epoch": 0.4200775527789746,
"grad_norm": 0.06871023029088974,
"learning_rate": 4.777786643261789e-05,
"loss": 0.0826,
"step": 1950
},
{
"epoch": 0.42546316242998705,
"grad_norm": 0.0709209218621254,
"learning_rate": 4.767812973749302e-05,
"loss": 0.1575,
"step": 1975
},
{
"epoch": 0.4308487720809996,
"grad_norm": 1.0321829319000244,
"learning_rate": 4.7578393042368154e-05,
"loss": 0.1481,
"step": 2000
},
{
"epoch": 0.43623438173201207,
"grad_norm": 0.05471009761095047,
"learning_rate": 4.747865634724328e-05,
"loss": 0.0653,
"step": 2025
},
{
"epoch": 0.44161999138302455,
"grad_norm": 0.5579153299331665,
"learning_rate": 4.737891965211841e-05,
"loss": 0.1818,
"step": 2050
},
{
"epoch": 0.447005601034037,
"grad_norm": 7.742904186248779,
"learning_rate": 4.727918295699354e-05,
"loss": 0.1663,
"step": 2075
},
{
"epoch": 0.45239121068504956,
"grad_norm": 0.26534390449523926,
"learning_rate": 4.717944626186867e-05,
"loss": 0.2548,
"step": 2100
},
{
"epoch": 0.45777682033606204,
"grad_norm": 0.060189589858055115,
"learning_rate": 4.7079709566743796e-05,
"loss": 0.1214,
"step": 2125
},
{
"epoch": 0.4631624299870745,
"grad_norm": 2.2301554679870605,
"learning_rate": 4.697997287161893e-05,
"loss": 0.1254,
"step": 2150
},
{
"epoch": 0.46854803963808705,
"grad_norm": 31.325176239013672,
"learning_rate": 4.688023617649406e-05,
"loss": 0.2186,
"step": 2175
},
{
"epoch": 0.47393364928909953,
"grad_norm": 0.8820040822029114,
"learning_rate": 4.678049948136919e-05,
"loss": 0.1814,
"step": 2200
},
{
"epoch": 0.479319258940112,
"grad_norm": 2.4866137504577637,
"learning_rate": 4.668076278624432e-05,
"loss": 0.1301,
"step": 2225
},
{
"epoch": 0.4847048685911245,
"grad_norm": 0.0820331797003746,
"learning_rate": 4.6581026091119445e-05,
"loss": 0.1845,
"step": 2250
},
{
"epoch": 0.49009047824213703,
"grad_norm": 36.052513122558594,
"learning_rate": 4.648128939599458e-05,
"loss": 0.1782,
"step": 2275
},
{
"epoch": 0.4954760878931495,
"grad_norm": 0.08009187877178192,
"learning_rate": 4.6381552700869706e-05,
"loss": 0.1326,
"step": 2300
},
{
"epoch": 0.500861697544162,
"grad_norm": 1.324997067451477,
"learning_rate": 4.628181600574484e-05,
"loss": 0.2862,
"step": 2325
},
{
"epoch": 0.5062473071951745,
"grad_norm": 0.17933067679405212,
"learning_rate": 4.618207931061996e-05,
"loss": 0.1677,
"step": 2350
},
{
"epoch": 0.511632916846187,
"grad_norm": 0.22597669064998627,
"learning_rate": 4.6082342615495094e-05,
"loss": 0.2077,
"step": 2375
},
{
"epoch": 0.5170185264971995,
"grad_norm": 1.9283539056777954,
"learning_rate": 4.598260592037023e-05,
"loss": 0.227,
"step": 2400
},
{
"epoch": 0.522404136148212,
"grad_norm": 0.21102119982242584,
"learning_rate": 4.5882869225245355e-05,
"loss": 0.2177,
"step": 2425
},
{
"epoch": 0.5277897457992244,
"grad_norm": 16.233251571655273,
"learning_rate": 4.578313253012048e-05,
"loss": 0.1349,
"step": 2450
},
{
"epoch": 0.533175355450237,
"grad_norm": 12.861350059509277,
"learning_rate": 4.568339583499561e-05,
"loss": 0.2012,
"step": 2475
},
{
"epoch": 0.5385609651012495,
"grad_norm": 29.975788116455078,
"learning_rate": 4.5583659139870744e-05,
"loss": 0.1332,
"step": 2500
},
{
"epoch": 0.5439465747522619,
"grad_norm": 0.08081357926130295,
"learning_rate": 4.548392244474588e-05,
"loss": 0.1514,
"step": 2525
},
{
"epoch": 0.5493321844032745,
"grad_norm": 72.80885314941406,
"learning_rate": 4.5384185749621005e-05,
"loss": 0.2106,
"step": 2550
},
{
"epoch": 0.5547177940542869,
"grad_norm": 0.0727340430021286,
"learning_rate": 4.528444905449613e-05,
"loss": 0.1395,
"step": 2575
},
{
"epoch": 0.5601034037052994,
"grad_norm": 0.10599514842033386,
"learning_rate": 4.518471235937126e-05,
"loss": 0.1318,
"step": 2600
},
{
"epoch": 0.565489013356312,
"grad_norm": 0.09759501367807388,
"learning_rate": 4.508497566424639e-05,
"loss": 0.0796,
"step": 2625
},
{
"epoch": 0.5708746230073244,
"grad_norm": 0.15663489699363708,
"learning_rate": 4.4985238969121527e-05,
"loss": 0.1019,
"step": 2650
},
{
"epoch": 0.5762602326583369,
"grad_norm": 0.13662685453891754,
"learning_rate": 4.488550227399665e-05,
"loss": 0.1396,
"step": 2675
},
{
"epoch": 0.5816458423093495,
"grad_norm": 0.10818319767713547,
"learning_rate": 4.478576557887178e-05,
"loss": 0.1455,
"step": 2700
},
{
"epoch": 0.5870314519603619,
"grad_norm": 0.11137444525957108,
"learning_rate": 4.468602888374691e-05,
"loss": 0.1616,
"step": 2725
},
{
"epoch": 0.5924170616113744,
"grad_norm": 0.1452583223581314,
"learning_rate": 4.458629218862204e-05,
"loss": 0.1445,
"step": 2750
},
{
"epoch": 0.597802671262387,
"grad_norm": 10.563451766967773,
"learning_rate": 4.448655549349717e-05,
"loss": 0.1514,
"step": 2775
},
{
"epoch": 0.6031882809133994,
"grad_norm": 14.584248542785645,
"learning_rate": 4.4386818798372296e-05,
"loss": 0.1717,
"step": 2800
},
{
"epoch": 0.6085738905644119,
"grad_norm": 12.885149955749512,
"learning_rate": 4.428708210324743e-05,
"loss": 0.2395,
"step": 2825
},
{
"epoch": 0.6139595002154243,
"grad_norm": 2.0140767097473145,
"learning_rate": 4.418734540812256e-05,
"loss": 0.2966,
"step": 2850
},
{
"epoch": 0.6193451098664369,
"grad_norm": 0.11748749762773514,
"learning_rate": 4.4087608712997684e-05,
"loss": 0.083,
"step": 2875
},
{
"epoch": 0.6247307195174494,
"grad_norm": 0.058700062334537506,
"learning_rate": 4.398787201787282e-05,
"loss": 0.1189,
"step": 2900
},
{
"epoch": 0.6301163291684618,
"grad_norm": 1.4539530277252197,
"learning_rate": 4.3888135322747945e-05,
"loss": 0.1992,
"step": 2925
},
{
"epoch": 0.6355019388194744,
"grad_norm": 0.36291152238845825,
"learning_rate": 4.378839862762308e-05,
"loss": 0.0857,
"step": 2950
},
{
"epoch": 0.6408875484704869,
"grad_norm": 1.1221033334732056,
"learning_rate": 4.3688661932498206e-05,
"loss": 0.2207,
"step": 2975
},
{
"epoch": 0.6462731581214993,
"grad_norm": 1.3202532529830933,
"learning_rate": 4.3588925237373333e-05,
"loss": 0.1268,
"step": 3000
},
{
"epoch": 0.6516587677725119,
"grad_norm": 0.23690970242023468,
"learning_rate": 4.348918854224847e-05,
"loss": 0.2074,
"step": 3025
},
{
"epoch": 0.6570443774235244,
"grad_norm": 0.39078834652900696,
"learning_rate": 4.3389451847123595e-05,
"loss": 0.0893,
"step": 3050
},
{
"epoch": 0.6624299870745368,
"grad_norm": 2.198420286178589,
"learning_rate": 4.328971515199873e-05,
"loss": 0.1987,
"step": 3075
},
{
"epoch": 0.6678155967255494,
"grad_norm": 0.14714717864990234,
"learning_rate": 4.3189978456873856e-05,
"loss": 0.1911,
"step": 3100
},
{
"epoch": 0.6732012063765618,
"grad_norm": 0.07894087582826614,
"learning_rate": 4.309024176174898e-05,
"loss": 0.1334,
"step": 3125
},
{
"epoch": 0.6785868160275743,
"grad_norm": 0.11621283739805222,
"learning_rate": 4.2990505066624117e-05,
"loss": 0.0956,
"step": 3150
},
{
"epoch": 0.6839724256785868,
"grad_norm": 0.15330371260643005,
"learning_rate": 4.2890768371499244e-05,
"loss": 0.1856,
"step": 3175
},
{
"epoch": 0.6893580353295993,
"grad_norm": 100.52692413330078,
"learning_rate": 4.279103167637437e-05,
"loss": 0.1754,
"step": 3200
},
{
"epoch": 0.6947436449806118,
"grad_norm": 1.5307726860046387,
"learning_rate": 4.2691294981249505e-05,
"loss": 0.1644,
"step": 3225
},
{
"epoch": 0.7001292546316243,
"grad_norm": 0.1815415918827057,
"learning_rate": 4.259155828612463e-05,
"loss": 0.1715,
"step": 3250
},
{
"epoch": 0.7055148642826368,
"grad_norm": 0.06501147896051407,
"learning_rate": 4.2491821590999766e-05,
"loss": 0.0786,
"step": 3275
},
{
"epoch": 0.7109004739336493,
"grad_norm": 13.024656295776367,
"learning_rate": 4.239208489587489e-05,
"loss": 0.1299,
"step": 3300
},
{
"epoch": 0.7162860835846618,
"grad_norm": 0.061265505850315094,
"learning_rate": 4.229234820075002e-05,
"loss": 0.1553,
"step": 3325
},
{
"epoch": 0.7216716932356743,
"grad_norm": 0.15151949226856232,
"learning_rate": 4.2192611505625154e-05,
"loss": 0.1563,
"step": 3350
},
{
"epoch": 0.7270573028866868,
"grad_norm": 0.37678295373916626,
"learning_rate": 4.209287481050028e-05,
"loss": 0.1824,
"step": 3375
},
{
"epoch": 0.7324429125376992,
"grad_norm": 0.22883647680282593,
"learning_rate": 4.1993138115375415e-05,
"loss": 0.2303,
"step": 3400
},
{
"epoch": 0.7378285221887118,
"grad_norm": 0.123573899269104,
"learning_rate": 4.189340142025054e-05,
"loss": 0.1554,
"step": 3425
},
{
"epoch": 0.7432141318397243,
"grad_norm": 7.3934431076049805,
"learning_rate": 4.179366472512567e-05,
"loss": 0.1949,
"step": 3450
},
{
"epoch": 0.7485997414907367,
"grad_norm": 0.07116091251373291,
"learning_rate": 4.16939280300008e-05,
"loss": 0.1022,
"step": 3475
},
{
"epoch": 0.7539853511417492,
"grad_norm": 0.6062744855880737,
"learning_rate": 4.159419133487593e-05,
"loss": 0.1311,
"step": 3500
},
{
"epoch": 0.7593709607927618,
"grad_norm": 0.19308426976203918,
"learning_rate": 4.149445463975106e-05,
"loss": 0.1942,
"step": 3525
},
{
"epoch": 0.7647565704437742,
"grad_norm": 0.08045301586389542,
"learning_rate": 4.139471794462619e-05,
"loss": 0.1055,
"step": 3550
},
{
"epoch": 0.7701421800947867,
"grad_norm": 47.74348449707031,
"learning_rate": 4.129498124950132e-05,
"loss": 0.0844,
"step": 3575
},
{
"epoch": 0.7755277897457993,
"grad_norm": 14.923918724060059,
"learning_rate": 4.119524455437645e-05,
"loss": 0.0943,
"step": 3600
},
{
"epoch": 0.7809133993968117,
"grad_norm": 0.3901768624782562,
"learning_rate": 4.109550785925157e-05,
"loss": 0.2203,
"step": 3625
},
{
"epoch": 0.7862990090478242,
"grad_norm": 0.05978202819824219,
"learning_rate": 4.0995771164126707e-05,
"loss": 0.0804,
"step": 3650
},
{
"epoch": 0.7916846186988367,
"grad_norm": 8.785200119018555,
"learning_rate": 4.089603446900184e-05,
"loss": 0.1765,
"step": 3675
},
{
"epoch": 0.7970702283498492,
"grad_norm": 1.523232102394104,
"learning_rate": 4.079629777387697e-05,
"loss": 0.1302,
"step": 3700
},
{
"epoch": 0.8024558380008617,
"grad_norm": 0.05809938907623291,
"learning_rate": 4.06965610787521e-05,
"loss": 0.0481,
"step": 3725
},
{
"epoch": 0.8078414476518742,
"grad_norm": 0.05040917173027992,
"learning_rate": 4.059682438362722e-05,
"loss": 0.0768,
"step": 3750
},
{
"epoch": 0.8132270573028867,
"grad_norm": 0.07563222199678421,
"learning_rate": 4.0497087688502356e-05,
"loss": 0.1397,
"step": 3775
},
{
"epoch": 0.8186126669538992,
"grad_norm": 0.4528738260269165,
"learning_rate": 4.039735099337749e-05,
"loss": 0.3308,
"step": 3800
},
{
"epoch": 0.8239982766049117,
"grad_norm": 1.0809814929962158,
"learning_rate": 4.029761429825262e-05,
"loss": 0.1919,
"step": 3825
},
{
"epoch": 0.8293838862559242,
"grad_norm": 0.2343398779630661,
"learning_rate": 4.0197877603127744e-05,
"loss": 0.1351,
"step": 3850
},
{
"epoch": 0.8347694959069367,
"grad_norm": 1.747331976890564,
"learning_rate": 4.009814090800287e-05,
"loss": 0.1744,
"step": 3875
},
{
"epoch": 0.8401551055579491,
"grad_norm": 0.06216764450073242,
"learning_rate": 3.9998404212878005e-05,
"loss": 0.0587,
"step": 3900
},
{
"epoch": 0.8455407152089617,
"grad_norm": 0.33079928159713745,
"learning_rate": 3.989866751775314e-05,
"loss": 0.1353,
"step": 3925
},
{
"epoch": 0.8509263248599741,
"grad_norm": 1.1662975549697876,
"learning_rate": 3.979893082262826e-05,
"loss": 0.2162,
"step": 3950
},
{
"epoch": 0.8563119345109866,
"grad_norm": 0.2112116664648056,
"learning_rate": 3.969919412750339e-05,
"loss": 0.1332,
"step": 3975
},
{
"epoch": 0.8616975441619992,
"grad_norm": 0.2449023723602295,
"learning_rate": 3.959945743237852e-05,
"loss": 0.1461,
"step": 4000
},
{
"epoch": 0.8670831538130116,
"grad_norm": 0.17366133630275726,
"learning_rate": 3.9499720737253654e-05,
"loss": 0.0463,
"step": 4025
},
{
"epoch": 0.8724687634640241,
"grad_norm": 5.110925674438477,
"learning_rate": 3.939998404212878e-05,
"loss": 0.2415,
"step": 4050
},
{
"epoch": 0.8778543731150367,
"grad_norm": 0.30593565106391907,
"learning_rate": 3.930024734700391e-05,
"loss": 0.1456,
"step": 4075
},
{
"epoch": 0.8832399827660491,
"grad_norm": 0.19217516481876373,
"learning_rate": 3.920051065187904e-05,
"loss": 0.161,
"step": 4100
},
{
"epoch": 0.8886255924170616,
"grad_norm": 1.0149356126785278,
"learning_rate": 3.910077395675417e-05,
"loss": 0.1787,
"step": 4125
},
{
"epoch": 0.894011202068074,
"grad_norm": 5.444819450378418,
"learning_rate": 3.90010372616293e-05,
"loss": 0.1539,
"step": 4150
},
{
"epoch": 0.8993968117190866,
"grad_norm": 1.2180671691894531,
"learning_rate": 3.890130056650443e-05,
"loss": 0.115,
"step": 4175
},
{
"epoch": 0.9047824213700991,
"grad_norm": 0.16885504126548767,
"learning_rate": 3.880156387137956e-05,
"loss": 0.2378,
"step": 4200
},
{
"epoch": 0.9101680310211115,
"grad_norm": 0.19795845448970795,
"learning_rate": 3.870182717625469e-05,
"loss": 0.1481,
"step": 4225
},
{
"epoch": 0.9155536406721241,
"grad_norm": 0.2421010136604309,
"learning_rate": 3.860209048112982e-05,
"loss": 0.1496,
"step": 4250
},
{
"epoch": 0.9209392503231366,
"grad_norm": 6.898841857910156,
"learning_rate": 3.8502353786004946e-05,
"loss": 0.1823,
"step": 4275
},
{
"epoch": 0.926324859974149,
"grad_norm": 0.2529371678829193,
"learning_rate": 3.840261709088008e-05,
"loss": 0.2487,
"step": 4300
},
{
"epoch": 0.9317104696251616,
"grad_norm": 0.39406079053878784,
"learning_rate": 3.830288039575521e-05,
"loss": 0.2116,
"step": 4325
},
{
"epoch": 0.9370960792761741,
"grad_norm": 0.07333202660083771,
"learning_rate": 3.820314370063034e-05,
"loss": 0.0279,
"step": 4350
},
{
"epoch": 0.9424816889271865,
"grad_norm": 1.0374135971069336,
"learning_rate": 3.810340700550547e-05,
"loss": 0.1611,
"step": 4375
},
{
"epoch": 0.9478672985781991,
"grad_norm": 0.12242020666599274,
"learning_rate": 3.8003670310380595e-05,
"loss": 0.1268,
"step": 4400
},
{
"epoch": 0.9532529082292115,
"grad_norm": 0.7784824371337891,
"learning_rate": 3.790393361525573e-05,
"loss": 0.2008,
"step": 4425
},
{
"epoch": 0.958638517880224,
"grad_norm": 0.3692557215690613,
"learning_rate": 3.7804196920130856e-05,
"loss": 0.1814,
"step": 4450
},
{
"epoch": 0.9640241275312366,
"grad_norm": 0.30838918685913086,
"learning_rate": 3.770446022500599e-05,
"loss": 0.0937,
"step": 4475
},
{
"epoch": 0.969409737182249,
"grad_norm": 8.506460189819336,
"learning_rate": 3.760472352988112e-05,
"loss": 0.2372,
"step": 4500
},
{
"epoch": 0.9747953468332615,
"grad_norm": 0.11787492781877518,
"learning_rate": 3.7504986834756244e-05,
"loss": 0.0906,
"step": 4525
},
{
"epoch": 0.9801809564842741,
"grad_norm": 0.18946515023708344,
"learning_rate": 3.740525013963138e-05,
"loss": 0.2561,
"step": 4550
},
{
"epoch": 0.9855665661352865,
"grad_norm": 0.10240988433361053,
"learning_rate": 3.7305513444506505e-05,
"loss": 0.087,
"step": 4575
},
{
"epoch": 0.990952175786299,
"grad_norm": 2.1701135635375977,
"learning_rate": 3.720577674938163e-05,
"loss": 0.1067,
"step": 4600
},
{
"epoch": 0.9963377854373116,
"grad_norm": 0.10697437822818756,
"learning_rate": 3.7106040054256766e-05,
"loss": 0.1298,
"step": 4625
},
{
"epoch": 1.0,
"eval_accuracy": 0.9611117095766455,
"eval_auc": 0.9901169654651506,
"eval_f1": 0.961078167115903,
"eval_loss": 0.13669981062412262,
"eval_precision": 0.9587007958700796,
"eval_recall": 0.9634673584089927,
"eval_runtime": 1681.5869,
"eval_samples_per_second": 5.52,
"eval_steps_per_second": 0.346,
"step": 4642
},
{
"epoch": 1.001723395088324,
"grad_norm": 2.8891570568084717,
"learning_rate": 3.700630335913189e-05,
"loss": 0.2125,
"step": 4650
},
{
"epoch": 1.0071090047393365,
"grad_norm": 10.658247947692871,
"learning_rate": 3.690656666400703e-05,
"loss": 0.1635,
"step": 4675
},
{
"epoch": 1.012494614390349,
"grad_norm": 0.06771805137395859,
"learning_rate": 3.680682996888215e-05,
"loss": 0.0609,
"step": 4700
},
{
"epoch": 1.0178802240413616,
"grad_norm": 0.10092346370220184,
"learning_rate": 3.670709327375728e-05,
"loss": 0.1142,
"step": 4725
},
{
"epoch": 1.023265833692374,
"grad_norm": 0.09358137845993042,
"learning_rate": 3.6607356578632415e-05,
"loss": 0.134,
"step": 4750
},
{
"epoch": 1.0286514433433864,
"grad_norm": 0.10109369456768036,
"learning_rate": 3.650761988350754e-05,
"loss": 0.0835,
"step": 4775
},
{
"epoch": 1.034037052994399,
"grad_norm": 0.06669428944587708,
"learning_rate": 3.640788318838267e-05,
"loss": 0.0729,
"step": 4800
},
{
"epoch": 1.0394226626454115,
"grad_norm": 0.26679906249046326,
"learning_rate": 3.63081464932578e-05,
"loss": 0.1923,
"step": 4825
},
{
"epoch": 1.044808272296424,
"grad_norm": 0.1384529024362564,
"learning_rate": 3.620840979813293e-05,
"loss": 0.0301,
"step": 4850
},
{
"epoch": 1.0501938819474363,
"grad_norm": 0.04985777288675308,
"learning_rate": 3.6108673103008065e-05,
"loss": 0.0582,
"step": 4875
},
{
"epoch": 1.0555794915984489,
"grad_norm": 0.17264176905155182,
"learning_rate": 3.600893640788319e-05,
"loss": 0.1751,
"step": 4900
},
{
"epoch": 1.0609651012494614,
"grad_norm": 0.41739097237586975,
"learning_rate": 3.590919971275832e-05,
"loss": 0.0556,
"step": 4925
},
{
"epoch": 1.066350710900474,
"grad_norm": 0.049522146582603455,
"learning_rate": 3.5809463017633446e-05,
"loss": 0.0233,
"step": 4950
},
{
"epoch": 1.0717363205514865,
"grad_norm": 18.618080139160156,
"learning_rate": 3.570972632250858e-05,
"loss": 0.0875,
"step": 4975
},
{
"epoch": 1.077121930202499,
"grad_norm": 0.055007074028253555,
"learning_rate": 3.5609989627383714e-05,
"loss": 0.0397,
"step": 5000
},
{
"epoch": 1.0825075398535113,
"grad_norm": 0.05218919366598129,
"learning_rate": 3.5510252932258834e-05,
"loss": 0.1636,
"step": 5025
},
{
"epoch": 1.0878931495045239,
"grad_norm": 0.13161726295948029,
"learning_rate": 3.541051623713397e-05,
"loss": 0.1354,
"step": 5050
},
{
"epoch": 1.0932787591555364,
"grad_norm": 0.2851119041442871,
"learning_rate": 3.5310779542009095e-05,
"loss": 0.0301,
"step": 5075
},
{
"epoch": 1.098664368806549,
"grad_norm": 0.03602718561887741,
"learning_rate": 3.521104284688423e-05,
"loss": 0.0037,
"step": 5100
},
{
"epoch": 1.1040499784575615,
"grad_norm": 20.054950714111328,
"learning_rate": 3.5111306151759356e-05,
"loss": 0.1972,
"step": 5125
},
{
"epoch": 1.1094355881085738,
"grad_norm": 100.69612121582031,
"learning_rate": 3.501156945663448e-05,
"loss": 0.0889,
"step": 5150
},
{
"epoch": 1.1148211977595863,
"grad_norm": 11.003225326538086,
"learning_rate": 3.491183276150962e-05,
"loss": 0.1155,
"step": 5175
},
{
"epoch": 1.1202068074105989,
"grad_norm": 8.291934967041016,
"learning_rate": 3.4812096066384744e-05,
"loss": 0.2039,
"step": 5200
},
{
"epoch": 1.1255924170616114,
"grad_norm": 0.06259515136480331,
"learning_rate": 3.471235937125988e-05,
"loss": 0.1264,
"step": 5225
},
{
"epoch": 1.130978026712624,
"grad_norm": 1.9804569482803345,
"learning_rate": 3.4612622676135005e-05,
"loss": 0.1603,
"step": 5250
},
{
"epoch": 1.1363636363636362,
"grad_norm": 13.11237621307373,
"learning_rate": 3.451288598101013e-05,
"loss": 0.0895,
"step": 5275
},
{
"epoch": 1.1417492460146488,
"grad_norm": 0.14050540328025818,
"learning_rate": 3.4413149285885266e-05,
"loss": 0.1879,
"step": 5300
},
{
"epoch": 1.1471348556656613,
"grad_norm": 0.08870512247085571,
"learning_rate": 3.4313412590760393e-05,
"loss": 0.0668,
"step": 5325
},
{
"epoch": 1.1525204653166738,
"grad_norm": 1.545466661453247,
"learning_rate": 3.421367589563552e-05,
"loss": 0.0765,
"step": 5350
},
{
"epoch": 1.1579060749676864,
"grad_norm": 0.2768205404281616,
"learning_rate": 3.4113939200510654e-05,
"loss": 0.1389,
"step": 5375
},
{
"epoch": 1.163291684618699,
"grad_norm": 15.847858428955078,
"learning_rate": 3.401420250538578e-05,
"loss": 0.1629,
"step": 5400
},
{
"epoch": 1.1686772942697115,
"grad_norm": 0.10181305557489395,
"learning_rate": 3.3914465810260916e-05,
"loss": 0.2408,
"step": 5425
},
{
"epoch": 1.1740629039207238,
"grad_norm": 1.0575032234191895,
"learning_rate": 3.381472911513604e-05,
"loss": 0.1404,
"step": 5450
},
{
"epoch": 1.1794485135717363,
"grad_norm": 0.14517611265182495,
"learning_rate": 3.371499242001117e-05,
"loss": 0.1526,
"step": 5475
},
{
"epoch": 1.1848341232227488,
"grad_norm": 3.418147087097168,
"learning_rate": 3.3615255724886304e-05,
"loss": 0.204,
"step": 5500
},
{
"epoch": 1.1902197328737614,
"grad_norm": 0.12968988716602325,
"learning_rate": 3.351551902976143e-05,
"loss": 0.1302,
"step": 5525
},
{
"epoch": 1.195605342524774,
"grad_norm": 0.49698755145072937,
"learning_rate": 3.341578233463656e-05,
"loss": 0.1656,
"step": 5550
},
{
"epoch": 1.2009909521757862,
"grad_norm": 0.4227307140827179,
"learning_rate": 3.331604563951169e-05,
"loss": 0.1857,
"step": 5575
},
{
"epoch": 1.2063765618267988,
"grad_norm": 1.2486361265182495,
"learning_rate": 3.321630894438682e-05,
"loss": 0.0902,
"step": 5600
},
{
"epoch": 1.2117621714778113,
"grad_norm": 2.5114333629608154,
"learning_rate": 3.311657224926195e-05,
"loss": 0.1306,
"step": 5625
},
{
"epoch": 1.2171477811288238,
"grad_norm": 0.07333961874246597,
"learning_rate": 3.301683555413708e-05,
"loss": 0.0746,
"step": 5650
},
{
"epoch": 1.2225333907798364,
"grad_norm": 0.26353907585144043,
"learning_rate": 3.291709885901221e-05,
"loss": 0.0902,
"step": 5675
},
{
"epoch": 1.2279190004308487,
"grad_norm": 4.065846920013428,
"learning_rate": 3.281736216388734e-05,
"loss": 0.0805,
"step": 5700
},
{
"epoch": 1.2333046100818612,
"grad_norm": 0.06470991671085358,
"learning_rate": 3.271762546876247e-05,
"loss": 0.1123,
"step": 5725
},
{
"epoch": 1.2386902197328737,
"grad_norm": 0.8340930938720703,
"learning_rate": 3.26178887736376e-05,
"loss": 0.1006,
"step": 5750
},
{
"epoch": 1.2440758293838863,
"grad_norm": 1.6223039627075195,
"learning_rate": 3.251815207851273e-05,
"loss": 0.1494,
"step": 5775
},
{
"epoch": 1.2494614390348988,
"grad_norm": 5.608697891235352,
"learning_rate": 3.2418415383387856e-05,
"loss": 0.1074,
"step": 5800
},
{
"epoch": 1.2548470486859111,
"grad_norm": 0.0557849146425724,
"learning_rate": 3.231867868826299e-05,
"loss": 0.084,
"step": 5825
},
{
"epoch": 1.2602326583369237,
"grad_norm": 27.92970085144043,
"learning_rate": 3.221894199313812e-05,
"loss": 0.0981,
"step": 5850
},
{
"epoch": 1.2656182679879362,
"grad_norm": 0.06425247341394424,
"learning_rate": 3.2119205298013244e-05,
"loss": 0.0708,
"step": 5875
},
{
"epoch": 1.2710038776389487,
"grad_norm": 0.17037852108478546,
"learning_rate": 3.201946860288838e-05,
"loss": 0.0508,
"step": 5900
},
{
"epoch": 1.2763894872899613,
"grad_norm": 71.36393737792969,
"learning_rate": 3.1919731907763505e-05,
"loss": 0.0935,
"step": 5925
},
{
"epoch": 1.2817750969409736,
"grad_norm": 0.062485672533512115,
"learning_rate": 3.181999521263864e-05,
"loss": 0.0126,
"step": 5950
},
{
"epoch": 1.2871607065919863,
"grad_norm": 2.009347438812256,
"learning_rate": 3.1720258517513767e-05,
"loss": 0.1319,
"step": 5975
},
{
"epoch": 1.2925463162429986,
"grad_norm": 0.06675919890403748,
"learning_rate": 3.1620521822388894e-05,
"loss": 0.0746,
"step": 6000
},
{
"epoch": 1.2979319258940112,
"grad_norm": 8.066916465759277,
"learning_rate": 3.152078512726403e-05,
"loss": 0.1249,
"step": 6025
},
{
"epoch": 1.3033175355450237,
"grad_norm": 0.03880688175559044,
"learning_rate": 3.1421048432139155e-05,
"loss": 0.1348,
"step": 6050
},
{
"epoch": 1.3087031451960363,
"grad_norm": 0.30525916814804077,
"learning_rate": 3.132131173701429e-05,
"loss": 0.0379,
"step": 6075
},
{
"epoch": 1.3140887548470488,
"grad_norm": 19.745574951171875,
"learning_rate": 3.122157504188941e-05,
"loss": 0.2353,
"step": 6100
},
{
"epoch": 1.319474364498061,
"grad_norm": 0.17969800531864166,
"learning_rate": 3.112183834676454e-05,
"loss": 0.0887,
"step": 6125
},
{
"epoch": 1.3248599741490736,
"grad_norm": 0.11662442237138748,
"learning_rate": 3.102210165163968e-05,
"loss": 0.0777,
"step": 6150
},
{
"epoch": 1.3302455838000862,
"grad_norm": 29.151994705200195,
"learning_rate": 3.0922364956514804e-05,
"loss": 0.1288,
"step": 6175
},
{
"epoch": 1.3356311934510987,
"grad_norm": 0.1103227511048317,
"learning_rate": 3.082262826138993e-05,
"loss": 0.1441,
"step": 6200
},
{
"epoch": 1.3410168031021112,
"grad_norm": 0.055622648447752,
"learning_rate": 3.072289156626506e-05,
"loss": 0.0355,
"step": 6225
},
{
"epoch": 1.3464024127531236,
"grad_norm": 0.05069245770573616,
"learning_rate": 3.062315487114019e-05,
"loss": 0.1014,
"step": 6250
},
{
"epoch": 1.351788022404136,
"grad_norm": 2.9234323501586914,
"learning_rate": 3.0523418176015326e-05,
"loss": 0.1902,
"step": 6275
},
{
"epoch": 1.3571736320551486,
"grad_norm": 0.10473991930484772,
"learning_rate": 3.042368148089045e-05,
"loss": 0.1063,
"step": 6300
},
{
"epoch": 1.3625592417061612,
"grad_norm": 0.2278774380683899,
"learning_rate": 3.032394478576558e-05,
"loss": 0.1619,
"step": 6325
},
{
"epoch": 1.3679448513571737,
"grad_norm": 0.31000664830207825,
"learning_rate": 3.022420809064071e-05,
"loss": 0.1219,
"step": 6350
},
{
"epoch": 1.373330461008186,
"grad_norm": 0.3597787022590637,
"learning_rate": 3.012447139551584e-05,
"loss": 0.1056,
"step": 6375
},
{
"epoch": 1.3787160706591985,
"grad_norm": 0.11010152846574783,
"learning_rate": 3.0024734700390972e-05,
"loss": 0.1596,
"step": 6400
},
{
"epoch": 1.384101680310211,
"grad_norm": 1.2052332162857056,
"learning_rate": 2.99249980052661e-05,
"loss": 0.1476,
"step": 6425
},
{
"epoch": 1.3894872899612236,
"grad_norm": 3.561495542526245,
"learning_rate": 2.982526131014123e-05,
"loss": 0.0991,
"step": 6450
},
{
"epoch": 1.3948728996122362,
"grad_norm": 0.16460971534252167,
"learning_rate": 2.972552461501636e-05,
"loss": 0.0774,
"step": 6475
},
{
"epoch": 1.4002585092632485,
"grad_norm": 0.08335640281438828,
"learning_rate": 2.962578791989149e-05,
"loss": 0.1055,
"step": 6500
},
{
"epoch": 1.4056441189142612,
"grad_norm": 24.70379638671875,
"learning_rate": 2.9526051224766614e-05,
"loss": 0.0675,
"step": 6525
},
{
"epoch": 1.4110297285652735,
"grad_norm": 0.06125853583216667,
"learning_rate": 2.9426314529641748e-05,
"loss": 0.0687,
"step": 6550
},
{
"epoch": 1.416415338216286,
"grad_norm": 0.05415304750204086,
"learning_rate": 2.932657783451688e-05,
"loss": 0.0439,
"step": 6575
},
{
"epoch": 1.4218009478672986,
"grad_norm": 0.06519858539104462,
"learning_rate": 2.922684113939201e-05,
"loss": 0.052,
"step": 6600
},
{
"epoch": 1.4271865575183111,
"grad_norm": 0.05177925154566765,
"learning_rate": 2.9127104444267133e-05,
"loss": 0.068,
"step": 6625
},
{
"epoch": 1.4325721671693237,
"grad_norm": 0.0696578249335289,
"learning_rate": 2.9027367749142263e-05,
"loss": 0.1637,
"step": 6650
},
{
"epoch": 1.437957776820336,
"grad_norm": 0.07040733844041824,
"learning_rate": 2.8927631054017397e-05,
"loss": 0.0799,
"step": 6675
},
{
"epoch": 1.4433433864713485,
"grad_norm": 0.09942576289176941,
"learning_rate": 2.8827894358892528e-05,
"loss": 0.1363,
"step": 6700
},
{
"epoch": 1.448728996122361,
"grad_norm": 0.07192389667034149,
"learning_rate": 2.8728157663767658e-05,
"loss": 0.1003,
"step": 6725
},
{
"epoch": 1.4541146057733736,
"grad_norm": 0.0946580320596695,
"learning_rate": 2.8628420968642782e-05,
"loss": 0.1488,
"step": 6750
},
{
"epoch": 1.4595002154243861,
"grad_norm": 0.07671581953763962,
"learning_rate": 2.8528684273517913e-05,
"loss": 0.0701,
"step": 6775
},
{
"epoch": 1.4648858250753984,
"grad_norm": 0.06100593879818916,
"learning_rate": 2.8428947578393046e-05,
"loss": 0.0232,
"step": 6800
},
{
"epoch": 1.470271434726411,
"grad_norm": 0.04395158588886261,
"learning_rate": 2.8329210883268177e-05,
"loss": 0.0763,
"step": 6825
},
{
"epoch": 1.4756570443774235,
"grad_norm": 0.03843090683221817,
"learning_rate": 2.82294741881433e-05,
"loss": 0.0832,
"step": 6850
},
{
"epoch": 1.481042654028436,
"grad_norm": 32.23625564575195,
"learning_rate": 2.812973749301843e-05,
"loss": 0.117,
"step": 6875
},
{
"epoch": 1.4864282636794486,
"grad_norm": 0.042882196605205536,
"learning_rate": 2.8030000797893562e-05,
"loss": 0.1001,
"step": 6900
},
{
"epoch": 1.491813873330461,
"grad_norm": 0.1393657624721527,
"learning_rate": 2.7930264102768696e-05,
"loss": 0.1046,
"step": 6925
},
{
"epoch": 1.4971994829814734,
"grad_norm": 30.964231491088867,
"learning_rate": 2.783052740764382e-05,
"loss": 0.1901,
"step": 6950
},
{
"epoch": 1.502585092632486,
"grad_norm": 1.4588963985443115,
"learning_rate": 2.773079071251895e-05,
"loss": 0.0741,
"step": 6975
},
{
"epoch": 1.5079707022834985,
"grad_norm": 0.06478077918291092,
"learning_rate": 2.763105401739408e-05,
"loss": 0.08,
"step": 7000
},
{
"epoch": 1.513356311934511,
"grad_norm": 0.05906078219413757,
"learning_rate": 2.753131732226921e-05,
"loss": 0.0685,
"step": 7025
},
{
"epoch": 1.5187419215855233,
"grad_norm": 4.263314247131348,
"learning_rate": 2.7431580627144338e-05,
"loss": 0.0689,
"step": 7050
},
{
"epoch": 1.524127531236536,
"grad_norm": 0.07187110185623169,
"learning_rate": 2.733184393201947e-05,
"loss": 0.1388,
"step": 7075
},
{
"epoch": 1.5295131408875484,
"grad_norm": 0.06428200751543045,
"learning_rate": 2.72321072368946e-05,
"loss": 0.0718,
"step": 7100
},
{
"epoch": 1.534898750538561,
"grad_norm": 0.05657276138663292,
"learning_rate": 2.713237054176973e-05,
"loss": 0.0777,
"step": 7125
},
{
"epoch": 1.5402843601895735,
"grad_norm": 29.23991584777832,
"learning_rate": 2.703263384664486e-05,
"loss": 0.1151,
"step": 7150
},
{
"epoch": 1.5456699698405858,
"grad_norm": 13.988266944885254,
"learning_rate": 2.6932897151519987e-05,
"loss": 0.1451,
"step": 7175
},
{
"epoch": 1.5510555794915986,
"grad_norm": 1.5213409662246704,
"learning_rate": 2.6833160456395118e-05,
"loss": 0.1063,
"step": 7200
},
{
"epoch": 1.5564411891426109,
"grad_norm": 0.10175667703151703,
"learning_rate": 2.6733423761270248e-05,
"loss": 0.0863,
"step": 7225
},
{
"epoch": 1.5618267987936234,
"grad_norm": 0.12581761181354523,
"learning_rate": 2.663368706614538e-05,
"loss": 0.2038,
"step": 7250
},
{
"epoch": 1.567212408444636,
"grad_norm": 0.11780127137899399,
"learning_rate": 2.6533950371020506e-05,
"loss": 0.1042,
"step": 7275
},
{
"epoch": 1.5725980180956485,
"grad_norm": 0.2194374054670334,
"learning_rate": 2.6434213675895636e-05,
"loss": 0.1805,
"step": 7300
},
{
"epoch": 1.577983627746661,
"grad_norm": 0.18749874830245972,
"learning_rate": 2.6334476980770767e-05,
"loss": 0.1292,
"step": 7325
},
{
"epoch": 1.5833692373976733,
"grad_norm": 0.12621381878852844,
"learning_rate": 2.6234740285645897e-05,
"loss": 0.0807,
"step": 7350
},
{
"epoch": 1.5887548470486859,
"grad_norm": 0.09865190833806992,
"learning_rate": 2.6135003590521025e-05,
"loss": 0.0985,
"step": 7375
},
{
"epoch": 1.5941404566996984,
"grad_norm": 0.11946694552898407,
"learning_rate": 2.6035266895396155e-05,
"loss": 0.0895,
"step": 7400
},
{
"epoch": 1.599526066350711,
"grad_norm": 1.2805886268615723,
"learning_rate": 2.5935530200271286e-05,
"loss": 0.1058,
"step": 7425
},
{
"epoch": 1.6049116760017235,
"grad_norm": 0.14271412789821625,
"learning_rate": 2.5835793505146416e-05,
"loss": 0.1345,
"step": 7450
},
{
"epoch": 1.6102972856527358,
"grad_norm": 32.52022933959961,
"learning_rate": 2.5736056810021547e-05,
"loss": 0.1266,
"step": 7475
},
{
"epoch": 1.6156828953037485,
"grad_norm": 0.12038301676511765,
"learning_rate": 2.5636320114896674e-05,
"loss": 0.1492,
"step": 7500
},
{
"epoch": 1.6210685049547608,
"grad_norm": 1.8868168592453003,
"learning_rate": 2.5536583419771804e-05,
"loss": 0.0776,
"step": 7525
},
{
"epoch": 1.6264541146057734,
"grad_norm": 1.943164348602295,
"learning_rate": 2.5436846724646935e-05,
"loss": 0.0348,
"step": 7550
},
{
"epoch": 1.631839724256786,
"grad_norm": 0.07310531288385391,
"learning_rate": 2.5337110029522065e-05,
"loss": 0.0375,
"step": 7575
},
{
"epoch": 1.6372253339077982,
"grad_norm": 0.05736231431365013,
"learning_rate": 2.5237373334397192e-05,
"loss": 0.0588,
"step": 7600
},
{
"epoch": 1.642610943558811,
"grad_norm": 0.05780019611120224,
"learning_rate": 2.5137636639272323e-05,
"loss": 0.0524,
"step": 7625
},
{
"epoch": 1.6479965532098233,
"grad_norm": 0.5779060125350952,
"learning_rate": 2.5037899944147453e-05,
"loss": 0.1221,
"step": 7650
},
{
"epoch": 1.6533821628608358,
"grad_norm": 0.05291671305894852,
"learning_rate": 2.493816324902258e-05,
"loss": 0.0978,
"step": 7675
},
{
"epoch": 1.6587677725118484,
"grad_norm": 0.04571978747844696,
"learning_rate": 2.483842655389771e-05,
"loss": 0.0798,
"step": 7700
},
{
"epoch": 1.6641533821628607,
"grad_norm": 0.0946052223443985,
"learning_rate": 2.473868985877284e-05,
"loss": 0.0712,
"step": 7725
},
{
"epoch": 1.6695389918138734,
"grad_norm": 0.04669623076915741,
"learning_rate": 2.4638953163647972e-05,
"loss": 0.1253,
"step": 7750
},
{
"epoch": 1.6749246014648858,
"grad_norm": 0.03837985917925835,
"learning_rate": 2.45392164685231e-05,
"loss": 0.0281,
"step": 7775
},
{
"epoch": 1.6803102111158983,
"grad_norm": 0.1306626796722412,
"learning_rate": 2.443947977339823e-05,
"loss": 0.0298,
"step": 7800
},
{
"epoch": 1.6856958207669108,
"grad_norm": 0.04202219098806381,
"learning_rate": 2.4339743078273357e-05,
"loss": 0.1771,
"step": 7825
},
{
"epoch": 1.6910814304179234,
"grad_norm": 9.636494636535645,
"learning_rate": 2.424000638314849e-05,
"loss": 0.1739,
"step": 7850
},
{
"epoch": 1.696467040068936,
"grad_norm": 0.05588022246956825,
"learning_rate": 2.414026968802362e-05,
"loss": 0.1273,
"step": 7875
},
{
"epoch": 1.7018526497199482,
"grad_norm": 0.30812951922416687,
"learning_rate": 2.404053299289875e-05,
"loss": 0.0813,
"step": 7900
},
{
"epoch": 1.7072382593709607,
"grad_norm": 1.565081000328064,
"learning_rate": 2.394079629777388e-05,
"loss": 0.0759,
"step": 7925
},
{
"epoch": 1.7126238690219733,
"grad_norm": 0.06271021068096161,
"learning_rate": 2.3841059602649006e-05,
"loss": 0.1473,
"step": 7950
},
{
"epoch": 1.7180094786729858,
"grad_norm": 0.06820254772901535,
"learning_rate": 2.374132290752414e-05,
"loss": 0.0978,
"step": 7975
},
{
"epoch": 1.7233950883239983,
"grad_norm": 0.0788588598370552,
"learning_rate": 2.3641586212399267e-05,
"loss": 0.1251,
"step": 8000
},
{
"epoch": 1.7287806979750107,
"grad_norm": 0.08439130336046219,
"learning_rate": 2.3541849517274398e-05,
"loss": 0.0489,
"step": 8025
},
{
"epoch": 1.7341663076260234,
"grad_norm": 0.08412740379571915,
"learning_rate": 2.3442112822149525e-05,
"loss": 0.0972,
"step": 8050
},
{
"epoch": 1.7395519172770357,
"grad_norm": 0.05004223436117172,
"learning_rate": 2.3342376127024655e-05,
"loss": 0.0867,
"step": 8075
},
{
"epoch": 1.7449375269280483,
"grad_norm": 0.08401107043027878,
"learning_rate": 2.3242639431899786e-05,
"loss": 0.1558,
"step": 8100
},
{
"epoch": 1.7503231365790608,
"grad_norm": 0.1881813257932663,
"learning_rate": 2.3142902736774916e-05,
"loss": 0.0759,
"step": 8125
},
{
"epoch": 1.7557087462300731,
"grad_norm": 1.4269578456878662,
"learning_rate": 2.3043166041650043e-05,
"loss": 0.0702,
"step": 8150
},
{
"epoch": 1.7610943558810859,
"grad_norm": 43.845985412597656,
"learning_rate": 2.2943429346525174e-05,
"loss": 0.0901,
"step": 8175
},
{
"epoch": 1.7664799655320982,
"grad_norm": 0.10664209723472595,
"learning_rate": 2.2843692651400304e-05,
"loss": 0.0651,
"step": 8200
},
{
"epoch": 1.7718655751831107,
"grad_norm": 3.7163357734680176,
"learning_rate": 2.2743955956275435e-05,
"loss": 0.0471,
"step": 8225
},
{
"epoch": 1.7772511848341233,
"grad_norm": 0.05055355280637741,
"learning_rate": 2.2644219261150565e-05,
"loss": 0.1338,
"step": 8250
},
{
"epoch": 1.7826367944851356,
"grad_norm": 0.23772941529750824,
"learning_rate": 2.2544482566025693e-05,
"loss": 0.2019,
"step": 8275
},
{
"epoch": 1.7880224041361483,
"grad_norm": 0.03852352499961853,
"learning_rate": 2.2444745870900823e-05,
"loss": 0.0408,
"step": 8300
},
{
"epoch": 1.7934080137871606,
"grad_norm": 2.454216480255127,
"learning_rate": 2.2345009175775954e-05,
"loss": 0.1086,
"step": 8325
},
{
"epoch": 1.7987936234381732,
"grad_norm": 0.2967223525047302,
"learning_rate": 2.2245272480651084e-05,
"loss": 0.0961,
"step": 8350
},
{
"epoch": 1.8041792330891857,
"grad_norm": 0.5866406559944153,
"learning_rate": 2.214553578552621e-05,
"loss": 0.1074,
"step": 8375
},
{
"epoch": 1.8095648427401982,
"grad_norm": 0.07801081985235214,
"learning_rate": 2.2045799090401342e-05,
"loss": 0.0628,
"step": 8400
},
{
"epoch": 1.8149504523912108,
"grad_norm": 18.57970428466797,
"learning_rate": 2.194606239527647e-05,
"loss": 0.1468,
"step": 8425
},
{
"epoch": 1.820336062042223,
"grad_norm": 0.5681573152542114,
"learning_rate": 2.1846325700151603e-05,
"loss": 0.0894,
"step": 8450
},
{
"epoch": 1.8257216716932356,
"grad_norm": 0.07245413213968277,
"learning_rate": 2.174658900502673e-05,
"loss": 0.1625,
"step": 8475
},
{
"epoch": 1.8311072813442482,
"grad_norm": 0.058218203485012054,
"learning_rate": 2.164685230990186e-05,
"loss": 0.0966,
"step": 8500
},
{
"epoch": 1.8364928909952607,
"grad_norm": 0.7239183187484741,
"learning_rate": 2.1547115614776988e-05,
"loss": 0.1296,
"step": 8525
},
{
"epoch": 1.8418785006462732,
"grad_norm": 0.0625491514801979,
"learning_rate": 2.1447378919652118e-05,
"loss": 0.1106,
"step": 8550
},
{
"epoch": 1.8472641102972855,
"grad_norm": 0.11988529562950134,
"learning_rate": 2.134764222452725e-05,
"loss": 0.129,
"step": 8575
},
{
"epoch": 1.8526497199482983,
"grad_norm": 23.358821868896484,
"learning_rate": 2.124790552940238e-05,
"loss": 0.1312,
"step": 8600
},
{
"epoch": 1.8580353295993106,
"grad_norm": 1.5045102834701538,
"learning_rate": 2.114816883427751e-05,
"loss": 0.107,
"step": 8625
},
{
"epoch": 1.8634209392503231,
"grad_norm": 0.06715774536132812,
"learning_rate": 2.1048432139152637e-05,
"loss": 0.0365,
"step": 8650
},
{
"epoch": 1.8688065489013357,
"grad_norm": 1.9744484424591064,
"learning_rate": 2.0948695444027767e-05,
"loss": 0.0944,
"step": 8675
},
{
"epoch": 1.874192158552348,
"grad_norm": 0.28752002120018005,
"learning_rate": 2.0848958748902898e-05,
"loss": 0.1517,
"step": 8700
},
{
"epoch": 1.8795777682033608,
"grad_norm": 0.06683491915464401,
"learning_rate": 2.074922205377803e-05,
"loss": 0.0558,
"step": 8725
},
{
"epoch": 1.884963377854373,
"grad_norm": 2.5277748107910156,
"learning_rate": 2.0649485358653155e-05,
"loss": 0.124,
"step": 8750
},
{
"epoch": 1.8903489875053856,
"grad_norm": 1.8914086818695068,
"learning_rate": 2.0549748663528286e-05,
"loss": 0.0586,
"step": 8775
},
{
"epoch": 1.8957345971563981,
"grad_norm": 0.06591325998306274,
"learning_rate": 2.0450011968403416e-05,
"loss": 0.0511,
"step": 8800
},
{
"epoch": 1.9011202068074105,
"grad_norm": 1.5716817378997803,
"learning_rate": 2.0350275273278547e-05,
"loss": 0.1128,
"step": 8825
},
{
"epoch": 1.9065058164584232,
"grad_norm": 0.05281971022486687,
"learning_rate": 2.0250538578153674e-05,
"loss": 0.1034,
"step": 8850
},
{
"epoch": 1.9118914261094355,
"grad_norm": 0.04660295695066452,
"learning_rate": 2.0150801883028805e-05,
"loss": 0.216,
"step": 8875
},
{
"epoch": 1.917277035760448,
"grad_norm": 0.2716023027896881,
"learning_rate": 2.0051065187903935e-05,
"loss": 0.189,
"step": 8900
},
{
"epoch": 1.9226626454114606,
"grad_norm": 0.06641989201307297,
"learning_rate": 1.9951328492779066e-05,
"loss": 0.0583,
"step": 8925
},
{
"epoch": 1.9280482550624731,
"grad_norm": 0.053841717541217804,
"learning_rate": 1.9851591797654193e-05,
"loss": 0.0942,
"step": 8950
},
{
"epoch": 1.9334338647134857,
"grad_norm": 0.05274002254009247,
"learning_rate": 1.9751855102529323e-05,
"loss": 0.1183,
"step": 8975
},
{
"epoch": 1.938819474364498,
"grad_norm": 0.08617954701185226,
"learning_rate": 1.9652118407404454e-05,
"loss": 0.0855,
"step": 9000
},
{
"epoch": 1.9442050840155105,
"grad_norm": 0.06696368008852005,
"learning_rate": 1.9552381712279584e-05,
"loss": 0.117,
"step": 9025
},
{
"epoch": 1.949590693666523,
"grad_norm": 7.755177974700928,
"learning_rate": 1.9452645017154715e-05,
"loss": 0.1239,
"step": 9050
},
{
"epoch": 1.9549763033175356,
"grad_norm": 0.1449265480041504,
"learning_rate": 1.9352908322029842e-05,
"loss": 0.0605,
"step": 9075
},
{
"epoch": 1.9603619129685481,
"grad_norm": 0.04622127115726471,
"learning_rate": 1.9253171626904972e-05,
"loss": 0.1222,
"step": 9100
},
{
"epoch": 1.9657475226195604,
"grad_norm": 0.051821399480104446,
"learning_rate": 1.91534349317801e-05,
"loss": 0.0562,
"step": 9125
},
{
"epoch": 1.9711331322705732,
"grad_norm": 0.04180682450532913,
"learning_rate": 1.9053698236655234e-05,
"loss": 0.0473,
"step": 9150
},
{
"epoch": 1.9765187419215855,
"grad_norm": 0.0317608080804348,
"learning_rate": 1.895396154153036e-05,
"loss": 0.0576,
"step": 9175
},
{
"epoch": 1.981904351572598,
"grad_norm": 0.026255348697304726,
"learning_rate": 1.885422484640549e-05,
"loss": 0.0315,
"step": 9200
},
{
"epoch": 1.9872899612236106,
"grad_norm": 1.6421452760696411,
"learning_rate": 1.8754488151280618e-05,
"loss": 0.1244,
"step": 9225
},
{
"epoch": 1.9926755708746229,
"grad_norm": 0.0805448517203331,
"learning_rate": 1.865475145615575e-05,
"loss": 0.0834,
"step": 9250
},
{
"epoch": 1.9980611805256356,
"grad_norm": 0.03920817747712135,
"learning_rate": 1.855501476103088e-05,
"loss": 0.0997,
"step": 9275
},
{
"epoch": 2.0,
"eval_accuracy": 0.9691909942906388,
"eval_auc": 0.9931809600784133,
"eval_f1": 0.9694248449861022,
"eval_loss": 0.1328408569097519,
"eval_precision": 0.9589678510998308,
"eval_recall": 0.9801124081279723,
"eval_runtime": 1746.5378,
"eval_samples_per_second": 5.315,
"eval_steps_per_second": 0.333,
"step": 9284
},
{
"epoch": 2.003446790176648,
"grad_norm": 0.672393262386322,
"learning_rate": 1.845527806590601e-05,
"loss": 0.0478,
"step": 9300
},
{
"epoch": 2.0088323998276607,
"grad_norm": 0.0429680272936821,
"learning_rate": 1.8355541370781137e-05,
"loss": 0.0864,
"step": 9325
},
{
"epoch": 2.014218009478673,
"grad_norm": 0.036236416548490524,
"learning_rate": 1.8255804675656267e-05,
"loss": 0.0491,
"step": 9350
},
{
"epoch": 2.0196036191296853,
"grad_norm": 0.036336880177259445,
"learning_rate": 1.8156067980531398e-05,
"loss": 0.0581,
"step": 9375
},
{
"epoch": 2.024989228780698,
"grad_norm": 0.032438818365335464,
"learning_rate": 1.805633128540653e-05,
"loss": 0.0419,
"step": 9400
},
{
"epoch": 2.0303748384317104,
"grad_norm": 1.6185767650604248,
"learning_rate": 1.795659459028166e-05,
"loss": 0.0767,
"step": 9425
},
{
"epoch": 2.035760448082723,
"grad_norm": 2.298982858657837,
"learning_rate": 1.7856857895156786e-05,
"loss": 0.0983,
"step": 9450
},
{
"epoch": 2.0411460577337355,
"grad_norm": 8.202011108398438,
"learning_rate": 1.7757121200031917e-05,
"loss": 0.0845,
"step": 9475
},
{
"epoch": 2.046531667384748,
"grad_norm": 0.029248099774122238,
"learning_rate": 1.7657384504907047e-05,
"loss": 0.053,
"step": 9500
},
{
"epoch": 2.0519172770357605,
"grad_norm": 0.02675529569387436,
"learning_rate": 1.7557647809782178e-05,
"loss": 0.0053,
"step": 9525
},
{
"epoch": 2.057302886686773,
"grad_norm": 17.323036193847656,
"learning_rate": 1.7457911114657305e-05,
"loss": 0.127,
"step": 9550
},
{
"epoch": 2.0626884963377856,
"grad_norm": 0.03516780957579613,
"learning_rate": 1.7358174419532435e-05,
"loss": 0.1891,
"step": 9575
},
{
"epoch": 2.068074105988798,
"grad_norm": 3.910149574279785,
"learning_rate": 1.7258437724407562e-05,
"loss": 0.1083,
"step": 9600
},
{
"epoch": 2.0734597156398102,
"grad_norm": 0.18281258642673492,
"learning_rate": 1.7158701029282696e-05,
"loss": 0.0678,
"step": 9625
},
{
"epoch": 2.078845325290823,
"grad_norm": 0.04458484426140785,
"learning_rate": 1.7058964334157823e-05,
"loss": 0.0904,
"step": 9650
},
{
"epoch": 2.0842309349418353,
"grad_norm": 0.20179250836372375,
"learning_rate": 1.6959227639032954e-05,
"loss": 0.0388,
"step": 9675
},
{
"epoch": 2.089616544592848,
"grad_norm": 0.02497880347073078,
"learning_rate": 1.685949094390808e-05,
"loss": 0.0031,
"step": 9700
},
{
"epoch": 2.0950021542438604,
"grad_norm": 0.02411968819797039,
"learning_rate": 1.675975424878321e-05,
"loss": 0.0691,
"step": 9725
},
{
"epoch": 2.1003877638948727,
"grad_norm": 0.035218965262174606,
"learning_rate": 1.6660017553658346e-05,
"loss": 0.0511,
"step": 9750
},
{
"epoch": 2.1057733735458855,
"grad_norm": 0.037588488310575485,
"learning_rate": 1.6560280858533473e-05,
"loss": 0.0335,
"step": 9775
},
{
"epoch": 2.1111589831968978,
"grad_norm": 0.03383982926607132,
"learning_rate": 1.6460544163408603e-05,
"loss": 0.0516,
"step": 9800
},
{
"epoch": 2.1165445928479105,
"grad_norm": 0.6187468767166138,
"learning_rate": 1.636080746828373e-05,
"loss": 0.0983,
"step": 9825
},
{
"epoch": 2.121930202498923,
"grad_norm": 0.23063376545906067,
"learning_rate": 1.626107077315886e-05,
"loss": 0.1008,
"step": 9850
},
{
"epoch": 2.127315812149935,
"grad_norm": 0.04398832470178604,
"learning_rate": 1.616133407803399e-05,
"loss": 0.027,
"step": 9875
},
{
"epoch": 2.132701421800948,
"grad_norm": 0.0390457846224308,
"learning_rate": 1.6061597382909122e-05,
"loss": 0.0588,
"step": 9900
},
{
"epoch": 2.13808703145196,
"grad_norm": 0.8915455937385559,
"learning_rate": 1.596186068778425e-05,
"loss": 0.1456,
"step": 9925
},
{
"epoch": 2.143472641102973,
"grad_norm": 68.75797271728516,
"learning_rate": 1.586212399265938e-05,
"loss": 0.0791,
"step": 9950
},
{
"epoch": 2.1488582507539853,
"grad_norm": 15.131389617919922,
"learning_rate": 1.576238729753451e-05,
"loss": 0.1169,
"step": 9975
},
{
"epoch": 2.154243860404998,
"grad_norm": 0.05127192661166191,
"learning_rate": 1.566265060240964e-05,
"loss": 0.0582,
"step": 10000
},
{
"epoch": 2.1596294700560104,
"grad_norm": 0.03388744965195656,
"learning_rate": 1.5562913907284768e-05,
"loss": 0.0609,
"step": 10025
},
{
"epoch": 2.1650150797070227,
"grad_norm": 0.0341382697224617,
"learning_rate": 1.5463177212159898e-05,
"loss": 0.1072,
"step": 10050
},
{
"epoch": 2.1704006893580354,
"grad_norm": 0.032444145530462265,
"learning_rate": 1.536344051703503e-05,
"loss": 0.0447,
"step": 10075
},
{
"epoch": 2.1757862990090477,
"grad_norm": 0.024341439828276634,
"learning_rate": 1.526370382191016e-05,
"loss": 0.0139,
"step": 10100
},
{
"epoch": 2.1811719086600605,
"grad_norm": 1.856371521949768,
"learning_rate": 1.5163967126785286e-05,
"loss": 0.0965,
"step": 10125
},
{
"epoch": 2.186557518311073,
"grad_norm": 3.588845729827881,
"learning_rate": 1.5064230431660417e-05,
"loss": 0.0047,
"step": 10150
},
{
"epoch": 2.191943127962085,
"grad_norm": 0.025228984653949738,
"learning_rate": 1.4964493736535547e-05,
"loss": 0.0525,
"step": 10175
},
{
"epoch": 2.197328737613098,
"grad_norm": 0.023060867562890053,
"learning_rate": 1.4864757041410676e-05,
"loss": 0.0817,
"step": 10200
},
{
"epoch": 2.20271434726411,
"grad_norm": 0.026297984644770622,
"learning_rate": 1.4765020346285807e-05,
"loss": 0.0313,
"step": 10225
},
{
"epoch": 2.208099956915123,
"grad_norm": 0.020485829561948776,
"learning_rate": 1.4665283651160936e-05,
"loss": 0.0022,
"step": 10250
},
{
"epoch": 2.2134855665661353,
"grad_norm": 0.023789284750819206,
"learning_rate": 1.4565546956036066e-05,
"loss": 0.069,
"step": 10275
},
{
"epoch": 2.2188711762171476,
"grad_norm": 0.02372126840054989,
"learning_rate": 1.4465810260911195e-05,
"loss": 0.0181,
"step": 10300
},
{
"epoch": 2.2242567858681603,
"grad_norm": 0.02078492008149624,
"learning_rate": 1.4366073565786325e-05,
"loss": 0.0312,
"step": 10325
},
{
"epoch": 2.2296423955191726,
"grad_norm": 0.09992769360542297,
"learning_rate": 1.4266336870661454e-05,
"loss": 0.1303,
"step": 10350
},
{
"epoch": 2.2350280051701854,
"grad_norm": 0.14363807439804077,
"learning_rate": 1.4166600175536585e-05,
"loss": 0.1266,
"step": 10375
},
{
"epoch": 2.2404136148211977,
"grad_norm": 0.047046031802892685,
"learning_rate": 1.4066863480411714e-05,
"loss": 0.0385,
"step": 10400
},
{
"epoch": 2.2457992244722105,
"grad_norm": 0.045923490077257156,
"learning_rate": 1.3967126785286844e-05,
"loss": 0.0046,
"step": 10425
},
{
"epoch": 2.251184834123223,
"grad_norm": 21.856792449951172,
"learning_rate": 1.3867390090161971e-05,
"loss": 0.1563,
"step": 10450
},
{
"epoch": 2.256570443774235,
"grad_norm": 0.07415134459733963,
"learning_rate": 1.3767653395037103e-05,
"loss": 0.1118,
"step": 10475
},
{
"epoch": 2.261956053425248,
"grad_norm": 0.06203525885939598,
"learning_rate": 1.366791669991223e-05,
"loss": 0.0484,
"step": 10500
},
{
"epoch": 2.26734166307626,
"grad_norm": 0.03413529694080353,
"learning_rate": 1.3568180004787363e-05,
"loss": 0.0395,
"step": 10525
},
{
"epoch": 2.2727272727272725,
"grad_norm": 2.930551528930664,
"learning_rate": 1.3468443309662493e-05,
"loss": 0.0553,
"step": 10550
},
{
"epoch": 2.2781128823782852,
"grad_norm": 0.027626806870102882,
"learning_rate": 1.336870661453762e-05,
"loss": 0.0611,
"step": 10575
},
{
"epoch": 2.2834984920292976,
"grad_norm": 100.386962890625,
"learning_rate": 1.3268969919412753e-05,
"loss": 0.0417,
"step": 10600
},
{
"epoch": 2.2888841016803103,
"grad_norm": 0.02713247761130333,
"learning_rate": 1.316923322428788e-05,
"loss": 0.0763,
"step": 10625
},
{
"epoch": 2.2942697113313226,
"grad_norm": 1.6741771697998047,
"learning_rate": 1.3069496529163012e-05,
"loss": 0.098,
"step": 10650
},
{
"epoch": 2.2996553209823354,
"grad_norm": 6.76040506362915,
"learning_rate": 1.2969759834038139e-05,
"loss": 0.0836,
"step": 10675
},
{
"epoch": 2.3050409306333477,
"grad_norm": 1.2940654754638672,
"learning_rate": 1.287002313891327e-05,
"loss": 0.0466,
"step": 10700
},
{
"epoch": 2.31042654028436,
"grad_norm": 0.4345569610595703,
"learning_rate": 1.2770286443788398e-05,
"loss": 0.0564,
"step": 10725
},
{
"epoch": 2.3158121499353728,
"grad_norm": 2.130342721939087,
"learning_rate": 1.2670549748663529e-05,
"loss": 0.0852,
"step": 10750
},
{
"epoch": 2.321197759586385,
"grad_norm": 0.11710493266582489,
"learning_rate": 1.2570813053538658e-05,
"loss": 0.0509,
"step": 10775
},
{
"epoch": 2.326583369237398,
"grad_norm": 0.026548050343990326,
"learning_rate": 1.2471076358413788e-05,
"loss": 0.004,
"step": 10800
},
{
"epoch": 2.33196897888841,
"grad_norm": 0.03190811350941658,
"learning_rate": 1.2371339663288919e-05,
"loss": 0.0522,
"step": 10825
},
{
"epoch": 2.337354588539423,
"grad_norm": 81.03248596191406,
"learning_rate": 1.2271602968164048e-05,
"loss": 0.0468,
"step": 10850
},
{
"epoch": 2.342740198190435,
"grad_norm": 0.02296995371580124,
"learning_rate": 1.2171866273039176e-05,
"loss": 0.0255,
"step": 10875
},
{
"epoch": 2.3481258078414475,
"grad_norm": 0.020512187853455544,
"learning_rate": 1.2072129577914307e-05,
"loss": 0.0499,
"step": 10900
},
{
"epoch": 2.3535114174924603,
"grad_norm": 0.2346896231174469,
"learning_rate": 1.1972392882789436e-05,
"loss": 0.0904,
"step": 10925
},
{
"epoch": 2.3588970271434726,
"grad_norm": 0.03325672820210457,
"learning_rate": 1.1872656187664566e-05,
"loss": 0.1062,
"step": 10950
},
{
"epoch": 2.364282636794485,
"grad_norm": 0.0365658663213253,
"learning_rate": 1.1772919492539695e-05,
"loss": 0.0542,
"step": 10975
},
{
"epoch": 2.3696682464454977,
"grad_norm": 0.04546808823943138,
"learning_rate": 1.1673182797414826e-05,
"loss": 0.0828,
"step": 11000
},
{
"epoch": 2.37505385609651,
"grad_norm": 0.037818703800439835,
"learning_rate": 1.1573446102289954e-05,
"loss": 0.0034,
"step": 11025
},
{
"epoch": 2.3804394657475227,
"grad_norm": 0.025843236595392227,
"learning_rate": 1.1473709407165085e-05,
"loss": 0.0419,
"step": 11050
},
{
"epoch": 2.385825075398535,
"grad_norm": 0.030073661357164383,
"learning_rate": 1.1373972712040214e-05,
"loss": 0.1035,
"step": 11075
},
{
"epoch": 2.391210685049548,
"grad_norm": 1.6878631114959717,
"learning_rate": 1.1274236016915344e-05,
"loss": 0.0982,
"step": 11100
},
{
"epoch": 2.39659629470056,
"grad_norm": 0.03798234090209007,
"learning_rate": 1.1174499321790475e-05,
"loss": 0.057,
"step": 11125
},
{
"epoch": 2.4019819043515724,
"grad_norm": 0.030130743980407715,
"learning_rate": 1.1074762626665604e-05,
"loss": 0.0424,
"step": 11150
},
{
"epoch": 2.407367514002585,
"grad_norm": 0.029741058126091957,
"learning_rate": 1.0975025931540734e-05,
"loss": 0.0183,
"step": 11175
},
{
"epoch": 2.4127531236535975,
"grad_norm": 0.025296270847320557,
"learning_rate": 1.0875289236415863e-05,
"loss": 0.0099,
"step": 11200
},
{
"epoch": 2.4181387333046103,
"grad_norm": 0.020655043423175812,
"learning_rate": 1.0775552541290992e-05,
"loss": 0.0521,
"step": 11225
},
{
"epoch": 2.4235243429556226,
"grad_norm": 0.26498308777809143,
"learning_rate": 1.0675815846166122e-05,
"loss": 0.1073,
"step": 11250
},
{
"epoch": 2.428909952606635,
"grad_norm": 0.023832648992538452,
"learning_rate": 1.0576079151041251e-05,
"loss": 0.042,
"step": 11275
},
{
"epoch": 2.4342955622576476,
"grad_norm": 0.025238890200853348,
"learning_rate": 1.0476342455916382e-05,
"loss": 0.0765,
"step": 11300
},
{
"epoch": 2.43968117190866,
"grad_norm": 0.029484113678336143,
"learning_rate": 1.037660576079151e-05,
"loss": 0.1135,
"step": 11325
},
{
"epoch": 2.4450667815596727,
"grad_norm": 4.394412994384766,
"learning_rate": 1.0276869065666641e-05,
"loss": 0.033,
"step": 11350
},
{
"epoch": 2.450452391210685,
"grad_norm": 0.02717486396431923,
"learning_rate": 1.017713237054177e-05,
"loss": 0.0756,
"step": 11375
},
{
"epoch": 2.4558380008616973,
"grad_norm": 0.03741078078746796,
"learning_rate": 1.0077395675416899e-05,
"loss": 0.0959,
"step": 11400
},
{
"epoch": 2.46122361051271,
"grad_norm": 0.025637760758399963,
"learning_rate": 9.977658980292029e-06,
"loss": 0.0421,
"step": 11425
},
{
"epoch": 2.4666092201637224,
"grad_norm": 0.04782960191369057,
"learning_rate": 9.877922285167158e-06,
"loss": 0.1026,
"step": 11450
},
{
"epoch": 2.471994829814735,
"grad_norm": 0.04921013489365578,
"learning_rate": 9.77818559004229e-06,
"loss": 0.0067,
"step": 11475
},
{
"epoch": 2.4773804394657475,
"grad_norm": 0.04560457542538643,
"learning_rate": 9.678448894917419e-06,
"loss": 0.089,
"step": 11500
},
{
"epoch": 2.4827660491167602,
"grad_norm": 0.02379678376019001,
"learning_rate": 9.578712199792548e-06,
"loss": 0.0625,
"step": 11525
},
{
"epoch": 2.4881516587677726,
"grad_norm": 0.02355758473277092,
"learning_rate": 9.478975504667678e-06,
"loss": 0.0375,
"step": 11550
},
{
"epoch": 2.493537268418785,
"grad_norm": 0.025686856359243393,
"learning_rate": 9.379238809542807e-06,
"loss": 0.0407,
"step": 11575
},
{
"epoch": 2.4989228780697976,
"grad_norm": 0.028306877240538597,
"learning_rate": 9.279502114417938e-06,
"loss": 0.0277,
"step": 11600
},
{
"epoch": 2.50430848772081,
"grad_norm": 0.02451007254421711,
"learning_rate": 9.179765419293066e-06,
"loss": 0.0431,
"step": 11625
},
{
"epoch": 2.5096940973718223,
"grad_norm": 3.4917030334472656,
"learning_rate": 9.080028724168197e-06,
"loss": 0.0778,
"step": 11650
},
{
"epoch": 2.515079707022835,
"grad_norm": 0.03555026650428772,
"learning_rate": 8.980292029043326e-06,
"loss": 0.0597,
"step": 11675
},
{
"epoch": 2.5204653166738473,
"grad_norm": 0.05606693774461746,
"learning_rate": 8.880555333918456e-06,
"loss": 0.0789,
"step": 11700
},
{
"epoch": 2.52585092632486,
"grad_norm": 0.03267287090420723,
"learning_rate": 8.780818638793585e-06,
"loss": 0.0212,
"step": 11725
},
{
"epoch": 2.5312365359758724,
"grad_norm": 0.028950348496437073,
"learning_rate": 8.681081943668714e-06,
"loss": 0.0142,
"step": 11750
},
{
"epoch": 2.536622145626885,
"grad_norm": 0.02338344044983387,
"learning_rate": 8.581345248543844e-06,
"loss": 0.0022,
"step": 11775
},
{
"epoch": 2.5420077552778975,
"grad_norm": 0.042633187025785446,
"learning_rate": 8.481608553418973e-06,
"loss": 0.1394,
"step": 11800
},
{
"epoch": 2.5473933649289098,
"grad_norm": 0.1146039292216301,
"learning_rate": 8.381871858294104e-06,
"loss": 0.1077,
"step": 11825
},
{
"epoch": 2.5527789745799225,
"grad_norm": 0.03126833215355873,
"learning_rate": 8.282135163169234e-06,
"loss": 0.0714,
"step": 11850
},
{
"epoch": 2.558164584230935,
"grad_norm": 0.032733842730522156,
"learning_rate": 8.182398468044363e-06,
"loss": 0.1485,
"step": 11875
},
{
"epoch": 2.563550193881947,
"grad_norm": 0.5381880402565002,
"learning_rate": 8.082661772919494e-06,
"loss": 0.061,
"step": 11900
},
{
"epoch": 2.56893580353296,
"grad_norm": 0.031703583896160126,
"learning_rate": 7.982925077794622e-06,
"loss": 0.0555,
"step": 11925
},
{
"epoch": 2.5743214131839727,
"grad_norm": 0.10838180035352707,
"learning_rate": 7.883188382669753e-06,
"loss": 0.0063,
"step": 11950
},
{
"epoch": 2.579707022834985,
"grad_norm": 0.05079588666558266,
"learning_rate": 7.783451687544882e-06,
"loss": 0.0251,
"step": 11975
},
{
"epoch": 2.5850926324859973,
"grad_norm": 0.023224713280797005,
"learning_rate": 7.683714992420012e-06,
"loss": 0.0351,
"step": 12000
},
{
"epoch": 2.59047824213701,
"grad_norm": 0.02427135780453682,
"learning_rate": 7.583978297295141e-06,
"loss": 0.0435,
"step": 12025
},
{
"epoch": 2.5958638517880224,
"grad_norm": 10.110872268676758,
"learning_rate": 7.484241602170271e-06,
"loss": 0.0877,
"step": 12050
},
{
"epoch": 2.6012494614390347,
"grad_norm": 0.018899202346801758,
"learning_rate": 7.3845049070454004e-06,
"loss": 0.0266,
"step": 12075
},
{
"epoch": 2.6066350710900474,
"grad_norm": 2.7654731273651123,
"learning_rate": 7.28476821192053e-06,
"loss": 0.0766,
"step": 12100
},
{
"epoch": 2.6120206807410598,
"grad_norm": 0.02472410537302494,
"learning_rate": 7.18503151679566e-06,
"loss": 0.0775,
"step": 12125
},
{
"epoch": 2.6174062903920725,
"grad_norm": 0.08125100284814835,
"learning_rate": 7.085294821670789e-06,
"loss": 0.033,
"step": 12150
},
{
"epoch": 2.622791900043085,
"grad_norm": 0.01764783076941967,
"learning_rate": 6.985558126545918e-06,
"loss": 0.0032,
"step": 12175
},
{
"epoch": 2.6281775096940976,
"grad_norm": 0.019586993381381035,
"learning_rate": 6.885821431421048e-06,
"loss": 0.0463,
"step": 12200
},
{
"epoch": 2.63356311934511,
"grad_norm": 1.3871238231658936,
"learning_rate": 6.786084736296179e-06,
"loss": 0.0267,
"step": 12225
},
{
"epoch": 2.638948728996122,
"grad_norm": 0.018140500411391258,
"learning_rate": 6.686348041171309e-06,
"loss": 0.0696,
"step": 12250
},
{
"epoch": 2.644334338647135,
"grad_norm": 0.0250703152269125,
"learning_rate": 6.586611346046439e-06,
"loss": 0.0742,
"step": 12275
},
{
"epoch": 2.6497199482981473,
"grad_norm": 0.020942581817507744,
"learning_rate": 6.4868746509215674e-06,
"loss": 0.0302,
"step": 12300
},
{
"epoch": 2.6551055579491596,
"grad_norm": 0.018071915954351425,
"learning_rate": 6.387137955796697e-06,
"loss": 0.0367,
"step": 12325
},
{
"epoch": 2.6604911676001723,
"grad_norm": 19.54036521911621,
"learning_rate": 6.287401260671827e-06,
"loss": 0.0825,
"step": 12350
},
{
"epoch": 2.665876777251185,
"grad_norm": 0.016232356429100037,
"learning_rate": 6.1876645655469564e-06,
"loss": 0.0359,
"step": 12375
},
{
"epoch": 2.6712623869021974,
"grad_norm": 0.024337278679013252,
"learning_rate": 6.087927870422086e-06,
"loss": 0.0273,
"step": 12400
},
{
"epoch": 2.6766479965532097,
"grad_norm": 0.28807416558265686,
"learning_rate": 5.988191175297216e-06,
"loss": 0.0266,
"step": 12425
},
{
"epoch": 2.6820336062042225,
"grad_norm": 0.01607998088002205,
"learning_rate": 5.8884544801723455e-06,
"loss": 0.0666,
"step": 12450
},
{
"epoch": 2.687419215855235,
"grad_norm": 0.021535271778702736,
"learning_rate": 5.788717785047474e-06,
"loss": 0.0611,
"step": 12475
},
{
"epoch": 2.692804825506247,
"grad_norm": 12.746896743774414,
"learning_rate": 5.688981089922604e-06,
"loss": 0.101,
"step": 12500
},
{
"epoch": 2.69819043515726,
"grad_norm": 1.019801378250122,
"learning_rate": 5.5892443947977345e-06,
"loss": 0.0415,
"step": 12525
},
{
"epoch": 2.703576044808272,
"grad_norm": 0.092778280377388,
"learning_rate": 5.489507699672864e-06,
"loss": 0.0365,
"step": 12550
},
{
"epoch": 2.708961654459285,
"grad_norm": 0.5808451771736145,
"learning_rate": 5.389771004547994e-06,
"loss": 0.0983,
"step": 12575
},
{
"epoch": 2.7143472641102973,
"grad_norm": 0.17807550728321075,
"learning_rate": 5.2900343094231235e-06,
"loss": 0.0214,
"step": 12600
},
{
"epoch": 2.71973287376131,
"grad_norm": 0.031068023294210434,
"learning_rate": 5.190297614298253e-06,
"loss": 0.0623,
"step": 12625
},
{
"epoch": 2.7251184834123223,
"grad_norm": 0.023084493353962898,
"learning_rate": 5.090560919173382e-06,
"loss": 0.0154,
"step": 12650
},
{
"epoch": 2.7305040930633346,
"grad_norm": 0.0403926819562912,
"learning_rate": 4.990824224048512e-06,
"loss": 0.0358,
"step": 12675
},
{
"epoch": 2.7358897027143474,
"grad_norm": 0.43667861819267273,
"learning_rate": 4.891087528923642e-06,
"loss": 0.0016,
"step": 12700
},
{
"epoch": 2.7412753123653597,
"grad_norm": 0.07555987685918808,
"learning_rate": 4.791350833798772e-06,
"loss": 0.0721,
"step": 12725
},
{
"epoch": 2.746660922016372,
"grad_norm": 0.022048979997634888,
"learning_rate": 4.6916141386739015e-06,
"loss": 0.0565,
"step": 12750
},
{
"epoch": 2.7520465316673848,
"grad_norm": 0.020737633109092712,
"learning_rate": 4.591877443549031e-06,
"loss": 0.038,
"step": 12775
},
{
"epoch": 2.757432141318397,
"grad_norm": 2.780998945236206,
"learning_rate": 4.492140748424161e-06,
"loss": 0.1345,
"step": 12800
},
{
"epoch": 2.76281775096941,
"grad_norm": 0.0388825386762619,
"learning_rate": 4.39240405329929e-06,
"loss": 0.0194,
"step": 12825
},
{
"epoch": 2.768203360620422,
"grad_norm": 7.417761325836182,
"learning_rate": 4.292667358174419e-06,
"loss": 0.0343,
"step": 12850
},
{
"epoch": 2.773588970271435,
"grad_norm": 0.02559277042746544,
"learning_rate": 4.192930663049549e-06,
"loss": 0.002,
"step": 12875
},
{
"epoch": 2.7789745799224472,
"grad_norm": 0.019186072051525116,
"learning_rate": 4.0931939679246795e-06,
"loss": 0.0732,
"step": 12900
},
{
"epoch": 2.7843601895734595,
"grad_norm": 0.034625016152858734,
"learning_rate": 3.993457272799809e-06,
"loss": 0.0631,
"step": 12925
},
{
"epoch": 2.7897457992244723,
"grad_norm": 0.07090263068675995,
"learning_rate": 3.893720577674939e-06,
"loss": 0.0705,
"step": 12950
},
{
"epoch": 2.7951314088754846,
"grad_norm": 0.01826942153275013,
"learning_rate": 3.793983882550068e-06,
"loss": 0.0212,
"step": 12975
},
{
"epoch": 2.800517018526497,
"grad_norm": 7.512089729309082,
"learning_rate": 3.6942471874251977e-06,
"loss": 0.0609,
"step": 13000
},
{
"epoch": 2.8059026281775097,
"grad_norm": 92.8465805053711,
"learning_rate": 3.594510492300327e-06,
"loss": 0.0397,
"step": 13025
},
{
"epoch": 2.8112882378285224,
"grad_norm": 2.2542502880096436,
"learning_rate": 3.4947737971754566e-06,
"loss": 0.0542,
"step": 13050
},
{
"epoch": 2.8166738474795348,
"grad_norm": 0.026438845321536064,
"learning_rate": 3.395037102050587e-06,
"loss": 0.0257,
"step": 13075
},
{
"epoch": 2.822059457130547,
"grad_norm": 0.027188275009393692,
"learning_rate": 3.2953004069257164e-06,
"loss": 0.0357,
"step": 13100
},
{
"epoch": 2.82744506678156,
"grad_norm": 0.023801114410161972,
"learning_rate": 3.195563711800846e-06,
"loss": 0.0453,
"step": 13125
},
{
"epoch": 2.832830676432572,
"grad_norm": 0.10319499671459198,
"learning_rate": 3.0958270166759757e-06,
"loss": 0.0796,
"step": 13150
},
{
"epoch": 2.8382162860835844,
"grad_norm": 0.031992778182029724,
"learning_rate": 2.9960903215511054e-06,
"loss": 0.0505,
"step": 13175
},
{
"epoch": 2.843601895734597,
"grad_norm": 0.12323871999979019,
"learning_rate": 2.8963536264262346e-06,
"loss": 0.0366,
"step": 13200
},
{
"epoch": 2.8489875053856095,
"grad_norm": 0.019458066672086716,
"learning_rate": 2.7966169313013647e-06,
"loss": 0.0232,
"step": 13225
},
{
"epoch": 2.8543731150366223,
"grad_norm": 0.024557072669267654,
"learning_rate": 2.6968802361764944e-06,
"loss": 0.0745,
"step": 13250
},
{
"epoch": 2.8597587246876346,
"grad_norm": 5.838003635406494,
"learning_rate": 2.5971435410516236e-06,
"loss": 0.0553,
"step": 13275
},
{
"epoch": 2.8651443343386473,
"grad_norm": 1.81170654296875,
"learning_rate": 2.4974068459267533e-06,
"loss": 0.0341,
"step": 13300
},
{
"epoch": 2.8705299439896597,
"grad_norm": 4.6604180335998535,
"learning_rate": 2.3976701508018834e-06,
"loss": 0.0166,
"step": 13325
},
{
"epoch": 2.875915553640672,
"grad_norm": 0.9680020809173584,
"learning_rate": 2.2979334556770127e-06,
"loss": 0.0575,
"step": 13350
},
{
"epoch": 2.8813011632916847,
"grad_norm": 0.044519729912281036,
"learning_rate": 2.1981967605521423e-06,
"loss": 0.0795,
"step": 13375
},
{
"epoch": 2.886686772942697,
"grad_norm": 0.019457995891571045,
"learning_rate": 2.098460065427272e-06,
"loss": 0.0278,
"step": 13400
},
{
"epoch": 2.8920723825937094,
"grad_norm": 0.017885301262140274,
"learning_rate": 1.9987233703024017e-06,
"loss": 0.0149,
"step": 13425
},
{
"epoch": 2.897457992244722,
"grad_norm": 37.20232009887695,
"learning_rate": 1.8989866751775313e-06,
"loss": 0.0252,
"step": 13450
},
{
"epoch": 2.902843601895735,
"grad_norm": 0.03464394435286522,
"learning_rate": 1.799249980052661e-06,
"loss": 0.0956,
"step": 13475
},
{
"epoch": 2.908229211546747,
"grad_norm": 0.08744242042303085,
"learning_rate": 1.6995132849277909e-06,
"loss": 0.0632,
"step": 13500
},
{
"epoch": 2.9136148211977595,
"grad_norm": 0.014135221019387245,
"learning_rate": 1.5997765898029205e-06,
"loss": 0.0276,
"step": 13525
},
{
"epoch": 2.9190004308487723,
"grad_norm": 0.06255125254392624,
"learning_rate": 1.50003989467805e-06,
"loss": 0.0825,
"step": 13550
},
{
"epoch": 2.9243860404997846,
"grad_norm": 10.135443687438965,
"learning_rate": 1.4003031995531797e-06,
"loss": 0.0442,
"step": 13575
},
{
"epoch": 2.929771650150797,
"grad_norm": 0.01875634863972664,
"learning_rate": 1.3005665044283093e-06,
"loss": 0.0304,
"step": 13600
},
{
"epoch": 2.9351572598018096,
"grad_norm": 0.13877597451210022,
"learning_rate": 1.200829809303439e-06,
"loss": 0.0367,
"step": 13625
},
{
"epoch": 2.940542869452822,
"grad_norm": 0.016782578080892563,
"learning_rate": 1.1010931141785687e-06,
"loss": 0.1222,
"step": 13650
},
{
"epoch": 2.9459284791038347,
"grad_norm": 0.03902614489197731,
"learning_rate": 1.0013564190536983e-06,
"loss": 0.1027,
"step": 13675
},
{
"epoch": 2.951314088754847,
"grad_norm": 0.017981288954615593,
"learning_rate": 9.016197239288279e-07,
"loss": 0.014,
"step": 13700
},
{
"epoch": 2.9566996984058598,
"grad_norm": 0.016431229189038277,
"learning_rate": 8.018830288039577e-07,
"loss": 0.016,
"step": 13725
},
{
"epoch": 2.962085308056872,
"grad_norm": 0.031093724071979523,
"learning_rate": 7.021463336790872e-07,
"loss": 0.0563,
"step": 13750
},
{
"epoch": 2.9674709177078844,
"grad_norm": 0.06026161089539528,
"learning_rate": 6.024096385542169e-07,
"loss": 0.0381,
"step": 13775
},
{
"epoch": 2.972856527358897,
"grad_norm": 7.449411869049072,
"learning_rate": 5.026729434293466e-07,
"loss": 0.0633,
"step": 13800
},
{
"epoch": 2.9782421370099095,
"grad_norm": 0.022474773228168488,
"learning_rate": 4.0293624830447624e-07,
"loss": 0.0601,
"step": 13825
},
{
"epoch": 2.983627746660922,
"grad_norm": 0.020873403176665306,
"learning_rate": 3.0319955317960585e-07,
"loss": 0.0036,
"step": 13850
},
{
"epoch": 2.9890133563119345,
"grad_norm": 0.0238470621407032,
"learning_rate": 2.0346285805473552e-07,
"loss": 0.0519,
"step": 13875
},
{
"epoch": 2.994398965962947,
"grad_norm": 0.03307591751217842,
"learning_rate": 1.0372616292986516e-07,
"loss": 0.002,
"step": 13900
},
{
"epoch": 2.9997845756139596,
"grad_norm": 0.9745203852653503,
"learning_rate": 3.989467804994814e-09,
"loss": 0.058,
"step": 13925
},
{
"epoch": 3.0,
"eval_accuracy": 0.9739308413228482,
"eval_auc": 0.9959614324316973,
"eval_f1": 0.9739448751076658,
"eval_loss": 0.12016157805919647,
"eval_precision": 0.9701844701844702,
"eval_recall": 0.9777345438824038,
"eval_runtime": 1664.4298,
"eval_samples_per_second": 5.577,
"eval_steps_per_second": 0.349,
"step": 13926
}
],
"logging_steps": 25,
"max_steps": 13926,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7327379780720640.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}