trash_vit_trail / trainer_state.json

shng2025

🍻 cheers

ca17756 verified 23 days ago

No virus

80.1 kB

	{
	"best_metric": 0.0710952952504158,
	"best_model_checkpoint": "./vit-base-trash-demo-v5/checkpoint-4000",
	"epoch": 4.0,
	"eval_steps": 1000,
	"global_step": 4476,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.008936550491510277,
	"grad_norm": 2.2266733646392822,
	"learning_rate": 0.00019955317247542448,
	"loss": 1.7283,
	"step": 10
	},
	{
	"epoch": 0.017873100983020553,
	"grad_norm": 1.7498841285705566,
	"learning_rate": 0.000199106344950849,
	"loss": 1.2864,
	"step": 20
	},
	{
	"epoch": 0.02680965147453083,
	"grad_norm": 2.307732582092285,
	"learning_rate": 0.00019865951742627347,
	"loss": 0.9102,
	"step": 30
	},
	{
	"epoch": 0.035746201966041107,
	"grad_norm": 4.274341106414795,
	"learning_rate": 0.00019821268990169794,
	"loss": 0.7496,
	"step": 40
	},
	{
	"epoch": 0.044682752457551385,
	"grad_norm": 2.586291551589966,
	"learning_rate": 0.00019776586237712246,
	"loss": 0.7103,
	"step": 50
	},
	{
	"epoch": 0.05361930294906166,
	"grad_norm": 1.1641569137573242,
	"learning_rate": 0.00019731903485254693,
	"loss": 0.5292,
	"step": 60
	},
	{
	"epoch": 0.06255585344057193,
	"grad_norm": 5.71007776260376,
	"learning_rate": 0.0001968722073279714,
	"loss": 0.5169,
	"step": 70
	},
	{
	"epoch": 0.07149240393208221,
	"grad_norm": 4.907001495361328,
	"learning_rate": 0.0001964253798033959,
	"loss": 0.5468,
	"step": 80
	},
	{
	"epoch": 0.08042895442359249,
	"grad_norm": 4.788897514343262,
	"learning_rate": 0.00019597855227882039,
	"loss": 0.5174,
	"step": 90
	},
	{
	"epoch": 0.08936550491510277,
	"grad_norm": 3.1065032482147217,
	"learning_rate": 0.00019553172475424485,
	"loss": 0.4598,
	"step": 100
	},
	{
	"epoch": 0.09830205540661305,
	"grad_norm": 2.299734115600586,
	"learning_rate": 0.00019508489722966935,
	"loss": 0.3711,
	"step": 110
	},
	{
	"epoch": 0.10723860589812333,
	"grad_norm": 1.3453820943832397,
	"learning_rate": 0.00019463806970509384,
	"loss": 0.4316,
	"step": 120
	},
	{
	"epoch": 0.1161751563896336,
	"grad_norm": 2.797497034072876,
	"learning_rate": 0.00019419124218051834,
	"loss": 0.5008,
	"step": 130
	},
	{
	"epoch": 0.12511170688114387,
	"grad_norm": 3.164219856262207,
	"learning_rate": 0.0001937444146559428,
	"loss": 0.371,
	"step": 140
	},
	{
	"epoch": 0.13404825737265416,
	"grad_norm": 5.14756965637207,
	"learning_rate": 0.0001932975871313673,
	"loss": 0.5617,
	"step": 150
	},
	{
	"epoch": 0.14298480786416443,
	"grad_norm": 1.478502869606018,
	"learning_rate": 0.0001928507596067918,
	"loss": 0.3317,
	"step": 160
	},
	{
	"epoch": 0.15192135835567472,
	"grad_norm": 1.3914997577667236,
	"learning_rate": 0.00019240393208221627,
	"loss": 0.3637,
	"step": 170
	},
	{
	"epoch": 0.16085790884718498,
	"grad_norm": 2.963843822479248,
	"learning_rate": 0.00019195710455764076,
	"loss": 0.3628,
	"step": 180
	},
	{
	"epoch": 0.16979445933869527,
	"grad_norm": 4.369801044464111,
	"learning_rate": 0.00019151027703306526,
	"loss": 0.4182,
	"step": 190
	},
	{
	"epoch": 0.17873100983020554,
	"grad_norm": 2.109142541885376,
	"learning_rate": 0.00019106344950848973,
	"loss": 0.279,
	"step": 200
	},
	{
	"epoch": 0.1876675603217158,
	"grad_norm": 5.093620777130127,
	"learning_rate": 0.00019061662198391422,
	"loss": 0.4936,
	"step": 210
	},
	{
	"epoch": 0.1966041108132261,
	"grad_norm": 2.2940657138824463,
	"learning_rate": 0.00019016979445933872,
	"loss": 0.532,
	"step": 220
	},
	{
	"epoch": 0.20554066130473636,
	"grad_norm": 3.415463924407959,
	"learning_rate": 0.00018972296693476319,
	"loss": 0.3475,
	"step": 230
	},
	{
	"epoch": 0.21447721179624665,
	"grad_norm": 1.158109426498413,
	"learning_rate": 0.00018927613941018768,
	"loss": 0.386,
	"step": 240
	},
	{
	"epoch": 0.22341376228775692,
	"grad_norm": 0.44666406512260437,
	"learning_rate": 0.00018882931188561218,
	"loss": 0.4165,
	"step": 250
	},
	{
	"epoch": 0.2323503127792672,
	"grad_norm": 2.626112461090088,
	"learning_rate": 0.00018838248436103664,
	"loss": 0.3478,
	"step": 260
	},
	{
	"epoch": 0.24128686327077747,
	"grad_norm": 3.9105656147003174,
	"learning_rate": 0.00018793565683646114,
	"loss": 0.293,
	"step": 270
	},
	{
	"epoch": 0.25022341376228774,
	"grad_norm": 6.075140953063965,
	"learning_rate": 0.00018748882931188563,
	"loss": 0.4013,
	"step": 280
	},
	{
	"epoch": 0.25915996425379806,
	"grad_norm": 1.2540974617004395,
	"learning_rate": 0.0001870420017873101,
	"loss": 0.5419,
	"step": 290
	},
	{
	"epoch": 0.2680965147453083,
	"grad_norm": 2.7805850505828857,
	"learning_rate": 0.00018659517426273457,
	"loss": 0.373,
	"step": 300
	},
	{
	"epoch": 0.2770330652368186,
	"grad_norm": 1.9444429874420166,
	"learning_rate": 0.0001861483467381591,
	"loss": 0.2447,
	"step": 310
	},
	{
	"epoch": 0.28596961572832885,
	"grad_norm": 2.164452075958252,
	"learning_rate": 0.00018570151921358356,
	"loss": 0.4141,
	"step": 320
	},
	{
	"epoch": 0.2949061662198391,
	"grad_norm": 4.439435958862305,
	"learning_rate": 0.00018525469168900803,
	"loss": 0.3183,
	"step": 330
	},
	{
	"epoch": 0.30384271671134944,
	"grad_norm": 3.332730770111084,
	"learning_rate": 0.00018480786416443255,
	"loss": 0.3255,
	"step": 340
	},
	{
	"epoch": 0.3127792672028597,
	"grad_norm": 4.461299419403076,
	"learning_rate": 0.00018436103663985702,
	"loss": 0.2733,
	"step": 350
	},
	{
	"epoch": 0.32171581769436997,
	"grad_norm": 4.637039661407471,
	"learning_rate": 0.0001839142091152815,
	"loss": 0.2486,
	"step": 360
	},
	{
	"epoch": 0.33065236818588023,
	"grad_norm": 1.985630989074707,
	"learning_rate": 0.000183467381590706,
	"loss": 0.2978,
	"step": 370
	},
	{
	"epoch": 0.33958891867739055,
	"grad_norm": 2.7530932426452637,
	"learning_rate": 0.00018302055406613048,
	"loss": 0.3722,
	"step": 380
	},
	{
	"epoch": 0.3485254691689008,
	"grad_norm": 0.20281237363815308,
	"learning_rate": 0.00018257372654155497,
	"loss": 0.3897,
	"step": 390
	},
	{
	"epoch": 0.3574620196604111,
	"grad_norm": 1.9220471382141113,
	"learning_rate": 0.00018212689901697947,
	"loss": 0.2564,
	"step": 400
	},
	{
	"epoch": 0.36639857015192134,
	"grad_norm": 4.841084957122803,
	"learning_rate": 0.00018168007149240394,
	"loss": 0.3892,
	"step": 410
	},
	{
	"epoch": 0.3753351206434316,
	"grad_norm": 5.499583721160889,
	"learning_rate": 0.00018123324396782843,
	"loss": 0.3462,
	"step": 420
	},
	{
	"epoch": 0.38427167113494193,
	"grad_norm": 1.4320725202560425,
	"learning_rate": 0.0001807864164432529,
	"loss": 0.2658,
	"step": 430
	},
	{
	"epoch": 0.3932082216264522,
	"grad_norm": 2.9739625453948975,
	"learning_rate": 0.0001803395889186774,
	"loss": 0.269,
	"step": 440
	},
	{
	"epoch": 0.40214477211796246,
	"grad_norm": 2.0247411727905273,
	"learning_rate": 0.0001798927613941019,
	"loss": 0.3355,
	"step": 450
	},
	{
	"epoch": 0.4110813226094727,
	"grad_norm": 0.25784507393836975,
	"learning_rate": 0.00017944593386952636,
	"loss": 0.2589,
	"step": 460
	},
	{
	"epoch": 0.42001787310098304,
	"grad_norm": 8.2475004196167,
	"learning_rate": 0.00017899910634495086,
	"loss": 0.3034,
	"step": 470
	},
	{
	"epoch": 0.4289544235924933,
	"grad_norm": 2.1054959297180176,
	"learning_rate": 0.00017855227882037535,
	"loss": 0.3784,
	"step": 480
	},
	{
	"epoch": 0.43789097408400357,
	"grad_norm": 2.6620118618011475,
	"learning_rate": 0.00017810545129579982,
	"loss": 0.3303,
	"step": 490
	},
	{
	"epoch": 0.44682752457551383,
	"grad_norm": 2.4308674335479736,
	"learning_rate": 0.00017765862377122431,
	"loss": 0.4428,
	"step": 500
	},
	{
	"epoch": 0.45576407506702415,
	"grad_norm": 4.620336532592773,
	"learning_rate": 0.0001772117962466488,
	"loss": 0.4007,
	"step": 510
	},
	{
	"epoch": 0.4647006255585344,
	"grad_norm": 0.6208035945892334,
	"learning_rate": 0.00017676496872207328,
	"loss": 0.4641,
	"step": 520
	},
	{
	"epoch": 0.4736371760500447,
	"grad_norm": 3.478276252746582,
	"learning_rate": 0.00017631814119749777,
	"loss": 0.3107,
	"step": 530
	},
	{
	"epoch": 0.48257372654155495,
	"grad_norm": 2.8934295177459717,
	"learning_rate": 0.00017587131367292227,
	"loss": 0.2283,
	"step": 540
	},
	{
	"epoch": 0.4915102770330652,
	"grad_norm": 2.323265552520752,
	"learning_rate": 0.00017542448614834674,
	"loss": 0.253,
	"step": 550
	},
	{
	"epoch": 0.5004468275245755,
	"grad_norm": 0.8551294207572937,
	"learning_rate": 0.00017497765862377123,
	"loss": 0.2903,
	"step": 560
	},
	{
	"epoch": 0.5093833780160858,
	"grad_norm": 3.454586982727051,
	"learning_rate": 0.00017453083109919573,
	"loss": 0.2434,
	"step": 570
	},
	{
	"epoch": 0.5183199285075961,
	"grad_norm": 4.937902927398682,
	"learning_rate": 0.0001740840035746202,
	"loss": 0.254,
	"step": 580
	},
	{
	"epoch": 0.5272564789991063,
	"grad_norm": 7.016292095184326,
	"learning_rate": 0.0001736371760500447,
	"loss": 0.3365,
	"step": 590
	},
	{
	"epoch": 0.5361930294906166,
	"grad_norm": 4.799339294433594,
	"learning_rate": 0.0001731903485254692,
	"loss": 0.2639,
	"step": 600
	},
	{
	"epoch": 0.5451295799821269,
	"grad_norm": 3.4884583950042725,
	"learning_rate": 0.00017274352100089365,
	"loss": 0.2255,
	"step": 610
	},
	{
	"epoch": 0.5540661304736372,
	"grad_norm": 1.6748002767562866,
	"learning_rate": 0.00017229669347631815,
	"loss": 0.2678,
	"step": 620
	},
	{
	"epoch": 0.5630026809651475,
	"grad_norm": 4.145753383636475,
	"learning_rate": 0.00017184986595174265,
	"loss": 0.3435,
	"step": 630
	},
	{
	"epoch": 0.5719392314566577,
	"grad_norm": 3.8941946029663086,
	"learning_rate": 0.00017140303842716711,
	"loss": 0.3448,
	"step": 640
	},
	{
	"epoch": 0.580875781948168,
	"grad_norm": 2.7980730533599854,
	"learning_rate": 0.0001709562109025916,
	"loss": 0.1776,
	"step": 650
	},
	{
	"epoch": 0.5898123324396782,
	"grad_norm": 2.6846330165863037,
	"learning_rate": 0.0001705093833780161,
	"loss": 0.1698,
	"step": 660
	},
	{
	"epoch": 0.5987488829311886,
	"grad_norm": 0.6026754379272461,
	"learning_rate": 0.00017006255585344057,
	"loss": 0.2683,
	"step": 670
	},
	{
	"epoch": 0.6076854334226989,
	"grad_norm": 1.7795771360397339,
	"learning_rate": 0.00016961572832886507,
	"loss": 0.1734,
	"step": 680
	},
	{
	"epoch": 0.6166219839142091,
	"grad_norm": 2.9793999195098877,
	"learning_rate": 0.00016916890080428956,
	"loss": 0.2341,
	"step": 690
	},
	{
	"epoch": 0.6255585344057194,
	"grad_norm": 2.885993480682373,
	"learning_rate": 0.00016872207327971403,
	"loss": 0.1787,
	"step": 700
	},
	{
	"epoch": 0.6344950848972297,
	"grad_norm": 0.12324349582195282,
	"learning_rate": 0.00016827524575513853,
	"loss": 0.1716,
	"step": 710
	},
	{
	"epoch": 0.6434316353887399,
	"grad_norm": 3.5133144855499268,
	"learning_rate": 0.00016782841823056302,
	"loss": 0.248,
	"step": 720
	},
	{
	"epoch": 0.6523681858802503,
	"grad_norm": 0.16779197752475739,
	"learning_rate": 0.0001673815907059875,
	"loss": 0.1969,
	"step": 730
	},
	{
	"epoch": 0.6613047363717605,
	"grad_norm": 6.696399688720703,
	"learning_rate": 0.00016693476318141199,
	"loss": 0.2659,
	"step": 740
	},
	{
	"epoch": 0.6702412868632708,
	"grad_norm": 3.4363462924957275,
	"learning_rate": 0.00016648793565683648,
	"loss": 0.1933,
	"step": 750
	},
	{
	"epoch": 0.6791778373547811,
	"grad_norm": 2.9766454696655273,
	"learning_rate": 0.00016604110813226095,
	"loss": 0.3646,
	"step": 760
	},
	{
	"epoch": 0.6881143878462913,
	"grad_norm": 3.3751492500305176,
	"learning_rate": 0.00016559428060768544,
	"loss": 0.2365,
	"step": 770
	},
	{
	"epoch": 0.6970509383378016,
	"grad_norm": 1.6829200983047485,
	"learning_rate": 0.00016514745308310994,
	"loss": 0.3954,
	"step": 780
	},
	{
	"epoch": 0.7059874888293118,
	"grad_norm": 3.473019599914551,
	"learning_rate": 0.0001647006255585344,
	"loss": 0.2257,
	"step": 790
	},
	{
	"epoch": 0.7149240393208222,
	"grad_norm": 2.1625287532806396,
	"learning_rate": 0.0001642537980339589,
	"loss": 0.2984,
	"step": 800
	},
	{
	"epoch": 0.7238605898123325,
	"grad_norm": 1.2086807489395142,
	"learning_rate": 0.00016380697050938337,
	"loss": 0.2888,
	"step": 810
	},
	{
	"epoch": 0.7327971403038427,
	"grad_norm": 0.19183319807052612,
	"learning_rate": 0.00016336014298480787,
	"loss": 0.2884,
	"step": 820
	},
	{
	"epoch": 0.741733690795353,
	"grad_norm": 4.687781810760498,
	"learning_rate": 0.00016291331546023236,
	"loss": 0.2456,
	"step": 830
	},
	{
	"epoch": 0.7506702412868632,
	"grad_norm": 1.6150999069213867,
	"learning_rate": 0.00016246648793565683,
	"loss": 0.2231,
	"step": 840
	},
	{
	"epoch": 0.7596067917783735,
	"grad_norm": 2.592801809310913,
	"learning_rate": 0.00016201966041108133,
	"loss": 0.2425,
	"step": 850
	},
	{
	"epoch": 0.7685433422698839,
	"grad_norm": 5.782228469848633,
	"learning_rate": 0.00016157283288650582,
	"loss": 0.2184,
	"step": 860
	},
	{
	"epoch": 0.7774798927613941,
	"grad_norm": 4.794034957885742,
	"learning_rate": 0.0001611260053619303,
	"loss": 0.1862,
	"step": 870
	},
	{
	"epoch": 0.7864164432529044,
	"grad_norm": 6.517756462097168,
	"learning_rate": 0.0001606791778373548,
	"loss": 0.3502,
	"step": 880
	},
	{
	"epoch": 0.7953529937444147,
	"grad_norm": 6.479066848754883,
	"learning_rate": 0.00016023235031277928,
	"loss": 0.1433,
	"step": 890
	},
	{
	"epoch": 0.8042895442359249,
	"grad_norm": 1.539117455482483,
	"learning_rate": 0.00015978552278820375,
	"loss": 0.3306,
	"step": 900
	},
	{
	"epoch": 0.8132260947274352,
	"grad_norm": 2.0679945945739746,
	"learning_rate": 0.00015933869526362827,
	"loss": 0.2325,
	"step": 910
	},
	{
	"epoch": 0.8221626452189454,
	"grad_norm": 4.1405558586120605,
	"learning_rate": 0.00015889186773905274,
	"loss": 0.2126,
	"step": 920
	},
	{
	"epoch": 0.8310991957104558,
	"grad_norm": 3.7805371284484863,
	"learning_rate": 0.0001584450402144772,
	"loss": 0.2418,
	"step": 930
	},
	{
	"epoch": 0.8400357462019661,
	"grad_norm": 4.6036248207092285,
	"learning_rate": 0.0001579982126899017,
	"loss": 0.3191,
	"step": 940
	},
	{
	"epoch": 0.8489722966934763,
	"grad_norm": 0.8650698661804199,
	"learning_rate": 0.0001575513851653262,
	"loss": 0.0849,
	"step": 950
	},
	{
	"epoch": 0.8579088471849866,
	"grad_norm": 0.4226575791835785,
	"learning_rate": 0.00015710455764075067,
	"loss": 0.1689,
	"step": 960
	},
	{
	"epoch": 0.8668453976764968,
	"grad_norm": 4.508443355560303,
	"learning_rate": 0.00015665773011617516,
	"loss": 0.1381,
	"step": 970
	},
	{
	"epoch": 0.8757819481680071,
	"grad_norm": 5.323261260986328,
	"learning_rate": 0.00015621090259159966,
	"loss": 0.1799,
	"step": 980
	},
	{
	"epoch": 0.8847184986595175,
	"grad_norm": 4.80311393737793,
	"learning_rate": 0.00015576407506702412,
	"loss": 0.1616,
	"step": 990
	},
	{
	"epoch": 0.8936550491510277,
	"grad_norm": 2.0073227882385254,
	"learning_rate": 0.00015531724754244862,
	"loss": 0.1814,
	"step": 1000
	},
	{
	"epoch": 0.8936550491510277,
	"eval_accuracy": 0.9487437185929648,
	"eval_loss": 0.17145079374313354,
	"eval_runtime": 56.2937,
	"eval_samples_per_second": 35.35,
	"eval_steps_per_second": 4.423,
	"step": 1000
	},
	{
	"epoch": 0.902591599642538,
	"grad_norm": 3.7729084491729736,
	"learning_rate": 0.00015487042001787312,
	"loss": 0.1352,
	"step": 1010
	},
	{
	"epoch": 0.9115281501340483,
	"grad_norm": 6.341058254241943,
	"learning_rate": 0.00015442359249329758,
	"loss": 0.2095,
	"step": 1020
	},
	{
	"epoch": 0.9204647006255585,
	"grad_norm": 0.42624279856681824,
	"learning_rate": 0.00015397676496872208,
	"loss": 0.2741,
	"step": 1030
	},
	{
	"epoch": 0.9294012511170688,
	"grad_norm": 4.386059761047363,
	"learning_rate": 0.00015352993744414657,
	"loss": 0.1688,
	"step": 1040
	},
	{
	"epoch": 0.938337801608579,
	"grad_norm": 0.07118342816829681,
	"learning_rate": 0.00015308310991957104,
	"loss": 0.2251,
	"step": 1050
	},
	{
	"epoch": 0.9472743521000894,
	"grad_norm": 1.6763445138931274,
	"learning_rate": 0.00015263628239499554,
	"loss": 0.2107,
	"step": 1060
	},
	{
	"epoch": 0.9562109025915997,
	"grad_norm": 5.161765098571777,
	"learning_rate": 0.00015218945487042003,
	"loss": 0.1552,
	"step": 1070
	},
	{
	"epoch": 0.9651474530831099,
	"grad_norm": 1.8887020349502563,
	"learning_rate": 0.0001517426273458445,
	"loss": 0.259,
	"step": 1080
	},
	{
	"epoch": 0.9740840035746202,
	"grad_norm": 0.29733049869537354,
	"learning_rate": 0.000151295799821269,
	"loss": 0.1136,
	"step": 1090
	},
	{
	"epoch": 0.9830205540661304,
	"grad_norm": 3.253506660461426,
	"learning_rate": 0.0001508489722966935,
	"loss": 0.1211,
	"step": 1100
	},
	{
	"epoch": 0.9919571045576407,
	"grad_norm": 2.1613495349884033,
	"learning_rate": 0.00015040214477211796,
	"loss": 0.1925,
	"step": 1110
	},
	{
	"epoch": 1.000893655049151,
	"grad_norm": 0.23403897881507874,
	"learning_rate": 0.00014995531724754246,
	"loss": 0.1099,
	"step": 1120
	},
	{
	"epoch": 1.0098302055406614,
	"grad_norm": 0.7746050357818604,
	"learning_rate": 0.00014950848972296695,
	"loss": 0.0627,
	"step": 1130
	},
	{
	"epoch": 1.0187667560321716,
	"grad_norm": 0.3406558930873871,
	"learning_rate": 0.00014906166219839145,
	"loss": 0.063,
	"step": 1140
	},
	{
	"epoch": 1.0277033065236818,
	"grad_norm": 0.12071269750595093,
	"learning_rate": 0.00014861483467381591,
	"loss": 0.1432,
	"step": 1150
	},
	{
	"epoch": 1.0366398570151922,
	"grad_norm": 0.4978802800178528,
	"learning_rate": 0.00014816800714924038,
	"loss": 0.0746,
	"step": 1160
	},
	{
	"epoch": 1.0455764075067024,
	"grad_norm": 1.3803187608718872,
	"learning_rate": 0.0001477211796246649,
	"loss": 0.0929,
	"step": 1170
	},
	{
	"epoch": 1.0545129579982127,
	"grad_norm": 0.8612852692604065,
	"learning_rate": 0.00014727435210008937,
	"loss": 0.0949,
	"step": 1180
	},
	{
	"epoch": 1.063449508489723,
	"grad_norm": 0.3493196666240692,
	"learning_rate": 0.00014682752457551384,
	"loss": 0.168,
	"step": 1190
	},
	{
	"epoch": 1.0723860589812333,
	"grad_norm": 0.18564535677433014,
	"learning_rate": 0.00014638069705093836,
	"loss": 0.1344,
	"step": 1200
	},
	{
	"epoch": 1.0813226094727435,
	"grad_norm": 5.853936672210693,
	"learning_rate": 0.00014593386952636283,
	"loss": 0.0874,
	"step": 1210
	},
	{
	"epoch": 1.0902591599642537,
	"grad_norm": 5.366926670074463,
	"learning_rate": 0.0001454870420017873,
	"loss": 0.0847,
	"step": 1220
	},
	{
	"epoch": 1.0991957104557641,
	"grad_norm": 0.04646310582756996,
	"learning_rate": 0.00014504021447721182,
	"loss": 0.0947,
	"step": 1230
	},
	{
	"epoch": 1.1081322609472744,
	"grad_norm": 0.7593271732330322,
	"learning_rate": 0.0001445933869526363,
	"loss": 0.1208,
	"step": 1240
	},
	{
	"epoch": 1.1170688114387846,
	"grad_norm": 6.3901472091674805,
	"learning_rate": 0.00014414655942806076,
	"loss": 0.0977,
	"step": 1250
	},
	{
	"epoch": 1.126005361930295,
	"grad_norm": 1.7897100448608398,
	"learning_rate": 0.00014369973190348528,
	"loss": 0.1107,
	"step": 1260
	},
	{
	"epoch": 1.1349419124218052,
	"grad_norm": 3.0502521991729736,
	"learning_rate": 0.00014325290437890975,
	"loss": 0.1854,
	"step": 1270
	},
	{
	"epoch": 1.1438784629133154,
	"grad_norm": 0.18816685676574707,
	"learning_rate": 0.00014280607685433422,
	"loss": 0.0863,
	"step": 1280
	},
	{
	"epoch": 1.1528150134048256,
	"grad_norm": 0.05062058940529823,
	"learning_rate": 0.0001423592493297587,
	"loss": 0.1339,
	"step": 1290
	},
	{
	"epoch": 1.161751563896336,
	"grad_norm": 0.23230992257595062,
	"learning_rate": 0.0001419124218051832,
	"loss": 0.187,
	"step": 1300
	},
	{
	"epoch": 1.1706881143878463,
	"grad_norm": 3.0492892265319824,
	"learning_rate": 0.00014146559428060768,
	"loss": 0.0701,
	"step": 1310
	},
	{
	"epoch": 1.1796246648793565,
	"grad_norm": 0.03424559161067009,
	"learning_rate": 0.00014101876675603217,
	"loss": 0.0428,
	"step": 1320
	},
	{
	"epoch": 1.188561215370867,
	"grad_norm": 3.6026527881622314,
	"learning_rate": 0.00014057193923145667,
	"loss": 0.1337,
	"step": 1330
	},
	{
	"epoch": 1.197497765862377,
	"grad_norm": 0.09644579142332077,
	"learning_rate": 0.00014012511170688114,
	"loss": 0.1271,
	"step": 1340
	},
	{
	"epoch": 1.2064343163538873,
	"grad_norm": 0.22322706878185272,
	"learning_rate": 0.00013967828418230563,
	"loss": 0.055,
	"step": 1350
	},
	{
	"epoch": 1.2153708668453977,
	"grad_norm": 0.05372155085206032,
	"learning_rate": 0.00013923145665773013,
	"loss": 0.1251,
	"step": 1360
	},
	{
	"epoch": 1.224307417336908,
	"grad_norm": 0.4156355857849121,
	"learning_rate": 0.00013878462913315462,
	"loss": 0.0405,
	"step": 1370
	},
	{
	"epoch": 1.2332439678284182,
	"grad_norm": 0.030704261735081673,
	"learning_rate": 0.0001383378016085791,
	"loss": 0.1225,
	"step": 1380
	},
	{
	"epoch": 1.2421805183199286,
	"grad_norm": 0.09552694112062454,
	"learning_rate": 0.00013789097408400359,
	"loss": 0.0495,
	"step": 1390
	},
	{
	"epoch": 1.2511170688114388,
	"grad_norm": 2.1124463081359863,
	"learning_rate": 0.00013744414655942808,
	"loss": 0.0651,
	"step": 1400
	},
	{
	"epoch": 1.260053619302949,
	"grad_norm": 4.6221232414245605,
	"learning_rate": 0.00013699731903485255,
	"loss": 0.2366,
	"step": 1410
	},
	{
	"epoch": 1.2689901697944594,
	"grad_norm": 0.054540861397981644,
	"learning_rate": 0.00013655049151027704,
	"loss": 0.1915,
	"step": 1420
	},
	{
	"epoch": 1.2779267202859697,
	"grad_norm": 0.6603236198425293,
	"learning_rate": 0.00013610366398570154,
	"loss": 0.0386,
	"step": 1430
	},
	{
	"epoch": 1.2868632707774799,
	"grad_norm": 4.419101715087891,
	"learning_rate": 0.000135656836461126,
	"loss": 0.1288,
	"step": 1440
	},
	{
	"epoch": 1.2957998212689903,
	"grad_norm": 1.6491079330444336,
	"learning_rate": 0.0001352100089365505,
	"loss": 0.077,
	"step": 1450
	},
	{
	"epoch": 1.3047363717605005,
	"grad_norm": 0.904062807559967,
	"learning_rate": 0.000134763181411975,
	"loss": 0.2083,
	"step": 1460
	},
	{
	"epoch": 1.3136729222520107,
	"grad_norm": 3.4404361248016357,
	"learning_rate": 0.00013431635388739947,
	"loss": 0.1846,
	"step": 1470
	},
	{
	"epoch": 1.322609472743521,
	"grad_norm": 0.2096666842699051,
	"learning_rate": 0.00013386952636282396,
	"loss": 0.0354,
	"step": 1480
	},
	{
	"epoch": 1.3315460232350314,
	"grad_norm": 4.2826128005981445,
	"learning_rate": 0.00013342269883824846,
	"loss": 0.1438,
	"step": 1490
	},
	{
	"epoch": 1.3404825737265416,
	"grad_norm": 4.742111682891846,
	"learning_rate": 0.00013297587131367293,
	"loss": 0.0994,
	"step": 1500
	},
	{
	"epoch": 1.3494191242180518,
	"grad_norm": 6.2931952476501465,
	"learning_rate": 0.0001325290437890974,
	"loss": 0.0754,
	"step": 1510
	},
	{
	"epoch": 1.358355674709562,
	"grad_norm": 1.523571491241455,
	"learning_rate": 0.00013208221626452192,
	"loss": 0.1283,
	"step": 1520
	},
	{
	"epoch": 1.3672922252010724,
	"grad_norm": 8.253166198730469,
	"learning_rate": 0.00013163538873994638,
	"loss": 0.1718,
	"step": 1530
	},
	{
	"epoch": 1.3762287756925826,
	"grad_norm": 2.4168646335601807,
	"learning_rate": 0.00013118856121537085,
	"loss": 0.1285,
	"step": 1540
	},
	{
	"epoch": 1.3851653261840928,
	"grad_norm": 4.069122314453125,
	"learning_rate": 0.00013074173369079537,
	"loss": 0.1165,
	"step": 1550
	},
	{
	"epoch": 1.3941018766756033,
	"grad_norm": 0.2789513170719147,
	"learning_rate": 0.00013029490616621984,
	"loss": 0.0795,
	"step": 1560
	},
	{
	"epoch": 1.4030384271671135,
	"grad_norm": 0.5609318017959595,
	"learning_rate": 0.0001298480786416443,
	"loss": 0.1187,
	"step": 1570
	},
	{
	"epoch": 1.4119749776586237,
	"grad_norm": 0.34373611211776733,
	"learning_rate": 0.00012940125111706883,
	"loss": 0.0872,
	"step": 1580
	},
	{
	"epoch": 1.420911528150134,
	"grad_norm": 4.596048355102539,
	"learning_rate": 0.0001289544235924933,
	"loss": 0.1354,
	"step": 1590
	},
	{
	"epoch": 1.4298480786416443,
	"grad_norm": 0.06107456609606743,
	"learning_rate": 0.00012850759606791777,
	"loss": 0.119,
	"step": 1600
	},
	{
	"epoch": 1.4387846291331545,
	"grad_norm": 0.08292512595653534,
	"learning_rate": 0.0001280607685433423,
	"loss": 0.1075,
	"step": 1610
	},
	{
	"epoch": 1.447721179624665,
	"grad_norm": 0.04113980755209923,
	"learning_rate": 0.00012761394101876676,
	"loss": 0.099,
	"step": 1620
	},
	{
	"epoch": 1.4566577301161752,
	"grad_norm": 3.1171679496765137,
	"learning_rate": 0.00012716711349419126,
	"loss": 0.0476,
	"step": 1630
	},
	{
	"epoch": 1.4655942806076854,
	"grad_norm": 0.03248828276991844,
	"learning_rate": 0.00012672028596961572,
	"loss": 0.1217,
	"step": 1640
	},
	{
	"epoch": 1.4745308310991958,
	"grad_norm": 0.14615251123905182,
	"learning_rate": 0.00012627345844504022,
	"loss": 0.0845,
	"step": 1650
	},
	{
	"epoch": 1.483467381590706,
	"grad_norm": 0.8569982647895813,
	"learning_rate": 0.00012582663092046471,
	"loss": 0.0933,
	"step": 1660
	},
	{
	"epoch": 1.4924039320822162,
	"grad_norm": 0.030800212174654007,
	"learning_rate": 0.00012537980339588918,
	"loss": 0.0555,
	"step": 1670
	},
	{
	"epoch": 1.5013404825737267,
	"grad_norm": 0.9634251594543457,
	"learning_rate": 0.00012493297587131368,
	"loss": 0.1249,
	"step": 1680
	},
	{
	"epoch": 1.5102770330652369,
	"grad_norm": 0.06999039649963379,
	"learning_rate": 0.00012448614834673817,
	"loss": 0.0727,
	"step": 1690
	},
	{
	"epoch": 1.519213583556747,
	"grad_norm": 0.0438673160970211,
	"learning_rate": 0.00012403932082216264,
	"loss": 0.0595,
	"step": 1700
	},
	{
	"epoch": 1.5281501340482575,
	"grad_norm": 0.030631419271230698,
	"learning_rate": 0.00012359249329758714,
	"loss": 0.0641,
	"step": 1710
	},
	{
	"epoch": 1.5370866845397675,
	"grad_norm": 0.09066120535135269,
	"learning_rate": 0.00012314566577301163,
	"loss": 0.0689,
	"step": 1720
	},
	{
	"epoch": 1.546023235031278,
	"grad_norm": 1.1478157043457031,
	"learning_rate": 0.0001226988382484361,
	"loss": 0.0427,
	"step": 1730
	},
	{
	"epoch": 1.5549597855227884,
	"grad_norm": 0.5382466912269592,
	"learning_rate": 0.0001222520107238606,
	"loss": 0.1211,
	"step": 1740
	},
	{
	"epoch": 1.5638963360142983,
	"grad_norm": 0.15291939675807953,
	"learning_rate": 0.00012180518319928509,
	"loss": 0.1934,
	"step": 1750
	},
	{
	"epoch": 1.5728328865058088,
	"grad_norm": 0.07158921658992767,
	"learning_rate": 0.00012135835567470957,
	"loss": 0.045,
	"step": 1760
	},
	{
	"epoch": 1.5817694369973192,
	"grad_norm": 1.416129469871521,
	"learning_rate": 0.00012091152815013404,
	"loss": 0.0822,
	"step": 1770
	},
	{
	"epoch": 1.5907059874888292,
	"grad_norm": 3.2841928005218506,
	"learning_rate": 0.00012046470062555855,
	"loss": 0.0685,
	"step": 1780
	},
	{
	"epoch": 1.5996425379803396,
	"grad_norm": 5.683614730834961,
	"learning_rate": 0.00012001787310098302,
	"loss": 0.1512,
	"step": 1790
	},
	{
	"epoch": 1.6085790884718498,
	"grad_norm": 0.054330743849277496,
	"learning_rate": 0.0001195710455764075,
	"loss": 0.1381,
	"step": 1800
	},
	{
	"epoch": 1.61751563896336,
	"grad_norm": 0.05368073284626007,
	"learning_rate": 0.00011912421805183201,
	"loss": 0.1118,
	"step": 1810
	},
	{
	"epoch": 1.6264521894548705,
	"grad_norm": 17.735898971557617,
	"learning_rate": 0.00011867739052725648,
	"loss": 0.1704,
	"step": 1820
	},
	{
	"epoch": 1.6353887399463807,
	"grad_norm": 3.4387574195861816,
	"learning_rate": 0.00011823056300268096,
	"loss": 0.1498,
	"step": 1830
	},
	{
	"epoch": 1.6443252904378909,
	"grad_norm": 3.4959723949432373,
	"learning_rate": 0.00011778373547810547,
	"loss": 0.0667,
	"step": 1840
	},
	{
	"epoch": 1.6532618409294013,
	"grad_norm": 1.4753037691116333,
	"learning_rate": 0.00011733690795352994,
	"loss": 0.0445,
	"step": 1850
	},
	{
	"epoch": 1.6621983914209115,
	"grad_norm": 0.24579989910125732,
	"learning_rate": 0.00011689008042895442,
	"loss": 0.0377,
	"step": 1860
	},
	{
	"epoch": 1.6711349419124217,
	"grad_norm": 3.813619375228882,
	"learning_rate": 0.00011644325290437891,
	"loss": 0.1004,
	"step": 1870
	},
	{
	"epoch": 1.6800714924039322,
	"grad_norm": 0.808028519153595,
	"learning_rate": 0.0001159964253798034,
	"loss": 0.0679,
	"step": 1880
	},
	{
	"epoch": 1.6890080428954424,
	"grad_norm": 0.277228444814682,
	"learning_rate": 0.0001155495978552279,
	"loss": 0.1096,
	"step": 1890
	},
	{
	"epoch": 1.6979445933869526,
	"grad_norm": 2.485595703125,
	"learning_rate": 0.00011510277033065237,
	"loss": 0.0738,
	"step": 1900
	},
	{
	"epoch": 1.706881143878463,
	"grad_norm": 0.35362759232521057,
	"learning_rate": 0.00011465594280607685,
	"loss": 0.0807,
	"step": 1910
	},
	{
	"epoch": 1.7158176943699732,
	"grad_norm": 1.7707135677337646,
	"learning_rate": 0.00011420911528150135,
	"loss": 0.0603,
	"step": 1920
	},
	{
	"epoch": 1.7247542448614834,
	"grad_norm": 0.010053984820842743,
	"learning_rate": 0.00011376228775692583,
	"loss": 0.0142,
	"step": 1930
	},
	{
	"epoch": 1.7336907953529939,
	"grad_norm": 11.442891120910645,
	"learning_rate": 0.00011331546023235031,
	"loss": 0.0509,
	"step": 1940
	},
	{
	"epoch": 1.742627345844504,
	"grad_norm": 2.5633316040039062,
	"learning_rate": 0.00011286863270777481,
	"loss": 0.0204,
	"step": 1950
	},
	{
	"epoch": 1.7515638963360143,
	"grad_norm": 0.9002701044082642,
	"learning_rate": 0.00011242180518319929,
	"loss": 0.0822,
	"step": 1960
	},
	{
	"epoch": 1.7605004468275247,
	"grad_norm": 0.03169967234134674,
	"learning_rate": 0.00011197497765862377,
	"loss": 0.0951,
	"step": 1970
	},
	{
	"epoch": 1.7694369973190347,
	"grad_norm": 0.07693292945623398,
	"learning_rate": 0.00011152815013404827,
	"loss": 0.11,
	"step": 1980
	},
	{
	"epoch": 1.7783735478105451,
	"grad_norm": 0.06315601617097855,
	"learning_rate": 0.00011108132260947275,
	"loss": 0.1217,
	"step": 1990
	},
	{
	"epoch": 1.7873100983020556,
	"grad_norm": 0.26389381289482117,
	"learning_rate": 0.00011063449508489723,
	"loss": 0.1077,
	"step": 2000
	},
	{
	"epoch": 1.7873100983020556,
	"eval_accuracy": 0.9668341708542714,
	"eval_loss": 0.12829196453094482,
	"eval_runtime": 56.3081,
	"eval_samples_per_second": 35.341,
	"eval_steps_per_second": 4.422,
	"step": 2000
	},
	{
	"epoch": 1.7962466487935655,
	"grad_norm": 0.14058926701545715,
	"learning_rate": 0.00011018766756032173,
	"loss": 0.051,
	"step": 2010
	},
	{
	"epoch": 1.805183199285076,
	"grad_norm": 0.8464193940162659,
	"learning_rate": 0.00010974084003574621,
	"loss": 0.0557,
	"step": 2020
	},
	{
	"epoch": 1.8141197497765862,
	"grad_norm": 0.5524567365646362,
	"learning_rate": 0.00010929401251117069,
	"loss": 0.0327,
	"step": 2030
	},
	{
	"epoch": 1.8230563002680964,
	"grad_norm": 4.706042289733887,
	"learning_rate": 0.00010884718498659518,
	"loss": 0.0815,
	"step": 2040
	},
	{
	"epoch": 1.8319928507596068,
	"grad_norm": 5.365744113922119,
	"learning_rate": 0.00010840035746201967,
	"loss": 0.0617,
	"step": 2050
	},
	{
	"epoch": 1.840929401251117,
	"grad_norm": 1.1039865016937256,
	"learning_rate": 0.00010795352993744415,
	"loss": 0.0528,
	"step": 2060
	},
	{
	"epoch": 1.8498659517426272,
	"grad_norm": 2.8230929374694824,
	"learning_rate": 0.00010750670241286864,
	"loss": 0.0534,
	"step": 2070
	},
	{
	"epoch": 1.8588025022341377,
	"grad_norm": 0.02104310691356659,
	"learning_rate": 0.00010705987488829313,
	"loss": 0.1058,
	"step": 2080
	},
	{
	"epoch": 1.8677390527256479,
	"grad_norm": 0.030116664245724678,
	"learning_rate": 0.0001066130473637176,
	"loss": 0.0971,
	"step": 2090
	},
	{
	"epoch": 1.876675603217158,
	"grad_norm": 0.5036576986312866,
	"learning_rate": 0.0001061662198391421,
	"loss": 0.0693,
	"step": 2100
	},
	{
	"epoch": 1.8856121537086685,
	"grad_norm": 4.131002426147461,
	"learning_rate": 0.00010571939231456658,
	"loss": 0.0933,
	"step": 2110
	},
	{
	"epoch": 1.8945487042001787,
	"grad_norm": 5.004481792449951,
	"learning_rate": 0.00010527256478999108,
	"loss": 0.0698,
	"step": 2120
	},
	{
	"epoch": 1.903485254691689,
	"grad_norm": 0.014153541065752506,
	"learning_rate": 0.00010482573726541556,
	"loss": 0.0598,
	"step": 2130
	},
	{
	"epoch": 1.9124218051831994,
	"grad_norm": 0.39952540397644043,
	"learning_rate": 0.00010437890974084004,
	"loss": 0.1169,
	"step": 2140
	},
	{
	"epoch": 1.9213583556747096,
	"grad_norm": 5.047325611114502,
	"learning_rate": 0.00010393208221626454,
	"loss": 0.1492,
	"step": 2150
	},
	{
	"epoch": 1.9302949061662198,
	"grad_norm": 0.045367881655693054,
	"learning_rate": 0.00010348525469168902,
	"loss": 0.081,
	"step": 2160
	},
	{
	"epoch": 1.9392314566577302,
	"grad_norm": 0.02820589952170849,
	"learning_rate": 0.00010303842716711349,
	"loss": 0.1456,
	"step": 2170
	},
	{
	"epoch": 1.9481680071492404,
	"grad_norm": 0.15606756508350372,
	"learning_rate": 0.000102591599642538,
	"loss": 0.0484,
	"step": 2180
	},
	{
	"epoch": 1.9571045576407506,
	"grad_norm": 4.374292850494385,
	"learning_rate": 0.00010214477211796248,
	"loss": 0.1133,
	"step": 2190
	},
	{
	"epoch": 1.966041108132261,
	"grad_norm": 0.6300436854362488,
	"learning_rate": 0.00010169794459338695,
	"loss": 0.0159,
	"step": 2200
	},
	{
	"epoch": 1.974977658623771,
	"grad_norm": 0.011597417294979095,
	"learning_rate": 0.00010125111706881146,
	"loss": 0.019,
	"step": 2210
	},
	{
	"epoch": 1.9839142091152815,
	"grad_norm": 0.013629280962049961,
	"learning_rate": 0.00010080428954423592,
	"loss": 0.0953,
	"step": 2220
	},
	{
	"epoch": 1.992850759606792,
	"grad_norm": 4.461750030517578,
	"learning_rate": 0.0001003574620196604,
	"loss": 0.1169,
	"step": 2230
	},
	{
	"epoch": 2.001787310098302,
	"grad_norm": 0.2028690129518509,
	"learning_rate": 9.99106344950849e-05,
	"loss": 0.0515,
	"step": 2240
	},
	{
	"epoch": 2.0107238605898123,
	"grad_norm": 0.683179497718811,
	"learning_rate": 9.946380697050938e-05,
	"loss": 0.0414,
	"step": 2250
	},
	{
	"epoch": 2.0196604110813228,
	"grad_norm": 0.3097274601459503,
	"learning_rate": 9.901697944593388e-05,
	"loss": 0.013,
	"step": 2260
	},
	{
	"epoch": 2.0285969615728328,
	"grad_norm": 0.02391964942216873,
	"learning_rate": 9.857015192135836e-05,
	"loss": 0.0143,
	"step": 2270
	},
	{
	"epoch": 2.037533512064343,
	"grad_norm": 0.02549424022436142,
	"learning_rate": 9.812332439678284e-05,
	"loss": 0.0321,
	"step": 2280
	},
	{
	"epoch": 2.0464700625558536,
	"grad_norm": 0.015907390043139458,
	"learning_rate": 9.767649687220734e-05,
	"loss": 0.0905,
	"step": 2290
	},
	{
	"epoch": 2.0554066130473636,
	"grad_norm": 0.04600854963064194,
	"learning_rate": 9.722966934763182e-05,
	"loss": 0.0055,
	"step": 2300
	},
	{
	"epoch": 2.064343163538874,
	"grad_norm": 0.17837274074554443,
	"learning_rate": 9.67828418230563e-05,
	"loss": 0.0792,
	"step": 2310
	},
	{
	"epoch": 2.0732797140303845,
	"grad_norm": 0.678176760673523,
	"learning_rate": 9.63360142984808e-05,
	"loss": 0.1025,
	"step": 2320
	},
	{
	"epoch": 2.0822162645218945,
	"grad_norm": 0.047438375651836395,
	"learning_rate": 9.588918677390528e-05,
	"loss": 0.0037,
	"step": 2330
	},
	{
	"epoch": 2.091152815013405,
	"grad_norm": 0.3825267553329468,
	"learning_rate": 9.544235924932976e-05,
	"loss": 0.0271,
	"step": 2340
	},
	{
	"epoch": 2.1000893655049153,
	"grad_norm": 0.022976990789175034,
	"learning_rate": 9.499553172475425e-05,
	"loss": 0.0055,
	"step": 2350
	},
	{
	"epoch": 2.1090259159964253,
	"grad_norm": 0.21945427358150482,
	"learning_rate": 9.454870420017874e-05,
	"loss": 0.0072,
	"step": 2360
	},
	{
	"epoch": 2.1179624664879357,
	"grad_norm": 0.020401885733008385,
	"learning_rate": 9.410187667560322e-05,
	"loss": 0.0045,
	"step": 2370
	},
	{
	"epoch": 2.126899016979446,
	"grad_norm": 0.3614647388458252,
	"learning_rate": 9.365504915102771e-05,
	"loss": 0.0292,
	"step": 2380
	},
	{
	"epoch": 2.135835567470956,
	"grad_norm": 0.01699133589863777,
	"learning_rate": 9.32082216264522e-05,
	"loss": 0.0728,
	"step": 2390
	},
	{
	"epoch": 2.1447721179624666,
	"grad_norm": 0.012751326896250248,
	"learning_rate": 9.276139410187668e-05,
	"loss": 0.0358,
	"step": 2400
	},
	{
	"epoch": 2.1537086684539766,
	"grad_norm": 0.009738125838339329,
	"learning_rate": 9.231456657730116e-05,
	"loss": 0.0415,
	"step": 2410
	},
	{
	"epoch": 2.162645218945487,
	"grad_norm": 0.012577983550727367,
	"learning_rate": 9.186773905272565e-05,
	"loss": 0.0204,
	"step": 2420
	},
	{
	"epoch": 2.1715817694369974,
	"grad_norm": 0.022706875577569008,
	"learning_rate": 9.142091152815015e-05,
	"loss": 0.0391,
	"step": 2430
	},
	{
	"epoch": 2.1805183199285074,
	"grad_norm": 1.2650375366210938,
	"learning_rate": 9.097408400357462e-05,
	"loss": 0.005,
	"step": 2440
	},
	{
	"epoch": 2.189454870420018,
	"grad_norm": 0.012098530307412148,
	"learning_rate": 9.052725647899911e-05,
	"loss": 0.0631,
	"step": 2450
	},
	{
	"epoch": 2.1983914209115283,
	"grad_norm": 0.014217260293662548,
	"learning_rate": 9.00804289544236e-05,
	"loss": 0.0158,
	"step": 2460
	},
	{
	"epoch": 2.2073279714030383,
	"grad_norm": 9.968586921691895,
	"learning_rate": 8.963360142984808e-05,
	"loss": 0.0338,
	"step": 2470
	},
	{
	"epoch": 2.2162645218945487,
	"grad_norm": 0.008608737029135227,
	"learning_rate": 8.918677390527257e-05,
	"loss": 0.0344,
	"step": 2480
	},
	{
	"epoch": 2.225201072386059,
	"grad_norm": 0.0957435816526413,
	"learning_rate": 8.873994638069705e-05,
	"loss": 0.0346,
	"step": 2490
	},
	{
	"epoch": 2.234137622877569,
	"grad_norm": 0.009171651676297188,
	"learning_rate": 8.829311885612154e-05,
	"loss": 0.0534,
	"step": 2500
	},
	{
	"epoch": 2.2430741733690795,
	"grad_norm": 0.025571748614311218,
	"learning_rate": 8.784629133154603e-05,
	"loss": 0.0046,
	"step": 2510
	},
	{
	"epoch": 2.25201072386059,
	"grad_norm": 0.008803543634712696,
	"learning_rate": 8.739946380697051e-05,
	"loss": 0.0104,
	"step": 2520
	},
	{
	"epoch": 2.2609472743521,
	"grad_norm": 0.009746580384671688,
	"learning_rate": 8.6952636282395e-05,
	"loss": 0.0194,
	"step": 2530
	},
	{
	"epoch": 2.2698838248436104,
	"grad_norm": 4.104613780975342,
	"learning_rate": 8.650580875781949e-05,
	"loss": 0.0155,
	"step": 2540
	},
	{
	"epoch": 2.278820375335121,
	"grad_norm": 0.01826513558626175,
	"learning_rate": 8.605898123324397e-05,
	"loss": 0.0072,
	"step": 2550
	},
	{
	"epoch": 2.287756925826631,
	"grad_norm": 0.03380773961544037,
	"learning_rate": 8.561215370866847e-05,
	"loss": 0.0515,
	"step": 2560
	},
	{
	"epoch": 2.2966934763181412,
	"grad_norm": 0.13917675614356995,
	"learning_rate": 8.516532618409293e-05,
	"loss": 0.0553,
	"step": 2570
	},
	{
	"epoch": 2.3056300268096512,
	"grad_norm": 3.9170970916748047,
	"learning_rate": 8.471849865951743e-05,
	"loss": 0.0252,
	"step": 2580
	},
	{
	"epoch": 2.3145665773011617,
	"grad_norm": 0.02010478265583515,
	"learning_rate": 8.427167113494193e-05,
	"loss": 0.0212,
	"step": 2590
	},
	{
	"epoch": 2.323503127792672,
	"grad_norm": 0.008358313702046871,
	"learning_rate": 8.38248436103664e-05,
	"loss": 0.1032,
	"step": 2600
	},
	{
	"epoch": 2.3324396782841825,
	"grad_norm": 0.08038530498743057,
	"learning_rate": 8.337801608579089e-05,
	"loss": 0.0445,
	"step": 2610
	},
	{
	"epoch": 2.3413762287756925,
	"grad_norm": 0.03653928264975548,
	"learning_rate": 8.293118856121538e-05,
	"loss": 0.0396,
	"step": 2620
	},
	{
	"epoch": 2.350312779267203,
	"grad_norm": 0.027160342782735825,
	"learning_rate": 8.248436103663985e-05,
	"loss": 0.0305,
	"step": 2630
	},
	{
	"epoch": 2.359249329758713,
	"grad_norm": 0.015198041684925556,
	"learning_rate": 8.203753351206435e-05,
	"loss": 0.0377,
	"step": 2640
	},
	{
	"epoch": 2.3681858802502234,
	"grad_norm": 0.03799434006214142,
	"learning_rate": 8.159070598748883e-05,
	"loss": 0.0057,
	"step": 2650
	},
	{
	"epoch": 2.377122430741734,
	"grad_norm": 0.008046945556998253,
	"learning_rate": 8.114387846291331e-05,
	"loss": 0.0249,
	"step": 2660
	},
	{
	"epoch": 2.386058981233244,
	"grad_norm": 8.727446556091309,
	"learning_rate": 8.069705093833781e-05,
	"loss": 0.0466,
	"step": 2670
	},
	{
	"epoch": 2.394995531724754,
	"grad_norm": 0.01986142434179783,
	"learning_rate": 8.025022341376229e-05,
	"loss": 0.0357,
	"step": 2680
	},
	{
	"epoch": 2.4039320822162646,
	"grad_norm": 7.71134614944458,
	"learning_rate": 7.980339588918678e-05,
	"loss": 0.015,
	"step": 2690
	},
	{
	"epoch": 2.4128686327077746,
	"grad_norm": 0.04247535765171051,
	"learning_rate": 7.935656836461127e-05,
	"loss": 0.0165,
	"step": 2700
	},
	{
	"epoch": 2.421805183199285,
	"grad_norm": 0.008588094264268875,
	"learning_rate": 7.890974084003575e-05,
	"loss": 0.0039,
	"step": 2710
	},
	{
	"epoch": 2.4307417336907955,
	"grad_norm": 0.11789193749427795,
	"learning_rate": 7.846291331546024e-05,
	"loss": 0.0344,
	"step": 2720
	},
	{
	"epoch": 2.4396782841823055,
	"grad_norm": 0.02231294848024845,
	"learning_rate": 7.801608579088472e-05,
	"loss": 0.0248,
	"step": 2730
	},
	{
	"epoch": 2.448614834673816,
	"grad_norm": 0.017268147319555283,
	"learning_rate": 7.75692582663092e-05,
	"loss": 0.0716,
	"step": 2740
	},
	{
	"epoch": 2.4575513851653263,
	"grad_norm": 8.963982582092285,
	"learning_rate": 7.71224307417337e-05,
	"loss": 0.0282,
	"step": 2750
	},
	{
	"epoch": 2.4664879356568363,
	"grad_norm": 0.799085259437561,
	"learning_rate": 7.667560321715817e-05,
	"loss": 0.0416,
	"step": 2760
	},
	{
	"epoch": 2.4754244861483468,
	"grad_norm": 0.15468931198120117,
	"learning_rate": 7.622877569258267e-05,
	"loss": 0.0669,
	"step": 2770
	},
	{
	"epoch": 2.484361036639857,
	"grad_norm": 3.4924068450927734,
	"learning_rate": 7.578194816800716e-05,
	"loss": 0.0477,
	"step": 2780
	},
	{
	"epoch": 2.493297587131367,
	"grad_norm": 0.012834394350647926,
	"learning_rate": 7.533512064343163e-05,
	"loss": 0.0174,
	"step": 2790
	},
	{
	"epoch": 2.5022341376228776,
	"grad_norm": 0.039204515516757965,
	"learning_rate": 7.488829311885612e-05,
	"loss": 0.0699,
	"step": 2800
	},
	{
	"epoch": 2.5111706881143876,
	"grad_norm": 0.08284445852041245,
	"learning_rate": 7.444146559428062e-05,
	"loss": 0.0445,
	"step": 2810
	},
	{
	"epoch": 2.520107238605898,
	"grad_norm": 0.010827134363353252,
	"learning_rate": 7.39946380697051e-05,
	"loss": 0.043,
	"step": 2820
	},
	{
	"epoch": 2.5290437890974085,
	"grad_norm": 3.5454938411712646,
	"learning_rate": 7.354781054512958e-05,
	"loss": 0.0339,
	"step": 2830
	},
	{
	"epoch": 2.537980339588919,
	"grad_norm": 0.006842234171926975,
	"learning_rate": 7.310098302055406e-05,
	"loss": 0.0029,
	"step": 2840
	},
	{
	"epoch": 2.546916890080429,
	"grad_norm": 0.7790193557739258,
	"learning_rate": 7.265415549597856e-05,
	"loss": 0.0055,
	"step": 2850
	},
	{
	"epoch": 2.5558534405719393,
	"grad_norm": 0.022239111363887787,
	"learning_rate": 7.220732797140304e-05,
	"loss": 0.008,
	"step": 2860
	},
	{
	"epoch": 2.5647899910634493,
	"grad_norm": 0.05403418838977814,
	"learning_rate": 7.176050044682752e-05,
	"loss": 0.057,
	"step": 2870
	},
	{
	"epoch": 2.5737265415549597,
	"grad_norm": 0.008923870511353016,
	"learning_rate": 7.131367292225202e-05,
	"loss": 0.0045,
	"step": 2880
	},
	{
	"epoch": 2.58266309204647,
	"grad_norm": 0.02668040059506893,
	"learning_rate": 7.08668453976765e-05,
	"loss": 0.0551,
	"step": 2890
	},
	{
	"epoch": 2.5915996425379806,
	"grad_norm": 0.049835577607154846,
	"learning_rate": 7.042001787310098e-05,
	"loss": 0.0255,
	"step": 2900
	},
	{
	"epoch": 2.6005361930294906,
	"grad_norm": 0.19334334135055542,
	"learning_rate": 6.997319034852548e-05,
	"loss": 0.0434,
	"step": 2910
	},
	{
	"epoch": 2.609472743521001,
	"grad_norm": 2.9139554500579834,
	"learning_rate": 6.952636282394996e-05,
	"loss": 0.0069,
	"step": 2920
	},
	{
	"epoch": 2.618409294012511,
	"grad_norm": 0.006679228041321039,
	"learning_rate": 6.907953529937444e-05,
	"loss": 0.0021,
	"step": 2930
	},
	{
	"epoch": 2.6273458445040214,
	"grad_norm": 0.1680416613817215,
	"learning_rate": 6.863270777479894e-05,
	"loss": 0.0249,
	"step": 2940
	},
	{
	"epoch": 2.636282394995532,
	"grad_norm": 0.08290654420852661,
	"learning_rate": 6.818588025022342e-05,
	"loss": 0.029,
	"step": 2950
	},
	{
	"epoch": 2.645218945487042,
	"grad_norm": 0.013707391917705536,
	"learning_rate": 6.77390527256479e-05,
	"loss": 0.0124,
	"step": 2960
	},
	{
	"epoch": 2.6541554959785523,
	"grad_norm": 0.2275378704071045,
	"learning_rate": 6.72922252010724e-05,
	"loss": 0.035,
	"step": 2970
	},
	{
	"epoch": 2.6630920464700627,
	"grad_norm": 0.5669155716896057,
	"learning_rate": 6.684539767649688e-05,
	"loss": 0.0288,
	"step": 2980
	},
	{
	"epoch": 2.6720285969615727,
	"grad_norm": 0.01488091703504324,
	"learning_rate": 6.639857015192136e-05,
	"loss": 0.0438,
	"step": 2990
	},
	{
	"epoch": 2.680965147453083,
	"grad_norm": 3.9659953117370605,
	"learning_rate": 6.595174262734584e-05,
	"loss": 0.0652,
	"step": 3000
	},
	{
	"epoch": 2.680965147453083,
	"eval_accuracy": 0.9793969849246231,
	"eval_loss": 0.08239442110061646,
	"eval_runtime": 56.1213,
	"eval_samples_per_second": 35.459,
	"eval_steps_per_second": 4.437,
	"step": 3000
	},
	{
	"epoch": 2.6899016979445936,
	"grad_norm": 8.31395149230957,
	"learning_rate": 6.550491510277034e-05,
	"loss": 0.0098,
	"step": 3010
	},
	{
	"epoch": 2.6988382484361035,
	"grad_norm": 0.008468572981655598,
	"learning_rate": 6.505808757819482e-05,
	"loss": 0.1056,
	"step": 3020
	},
	{
	"epoch": 2.707774798927614,
	"grad_norm": 0.9328808188438416,
	"learning_rate": 6.46112600536193e-05,
	"loss": 0.0769,
	"step": 3030
	},
	{
	"epoch": 2.716711349419124,
	"grad_norm": 0.6114912629127502,
	"learning_rate": 6.41644325290438e-05,
	"loss": 0.0434,
	"step": 3040
	},
	{
	"epoch": 2.7256478999106344,
	"grad_norm": 0.03709472343325615,
	"learning_rate": 6.371760500446829e-05,
	"loss": 0.0166,
	"step": 3050
	},
	{
	"epoch": 2.734584450402145,
	"grad_norm": 0.1086587980389595,
	"learning_rate": 6.327077747989276e-05,
	"loss": 0.0047,
	"step": 3060
	},
	{
	"epoch": 2.7435210008936552,
	"grad_norm": 0.12008140981197357,
	"learning_rate": 6.282394995531725e-05,
	"loss": 0.0069,
	"step": 3070
	},
	{
	"epoch": 2.7524575513851652,
	"grad_norm": 0.017355024814605713,
	"learning_rate": 6.237712243074174e-05,
	"loss": 0.0033,
	"step": 3080
	},
	{
	"epoch": 2.7613941018766757,
	"grad_norm": 0.15070508420467377,
	"learning_rate": 6.193029490616622e-05,
	"loss": 0.0476,
	"step": 3090
	},
	{
	"epoch": 2.7703306523681857,
	"grad_norm": 0.022527649998664856,
	"learning_rate": 6.148346738159071e-05,
	"loss": 0.0243,
	"step": 3100
	},
	{
	"epoch": 2.779267202859696,
	"grad_norm": 0.37779930233955383,
	"learning_rate": 6.10366398570152e-05,
	"loss": 0.0058,
	"step": 3110
	},
	{
	"epoch": 2.7882037533512065,
	"grad_norm": 0.029893942177295685,
	"learning_rate": 6.0589812332439676e-05,
	"loss": 0.0208,
	"step": 3120
	},
	{
	"epoch": 2.797140303842717,
	"grad_norm": 0.01635076478123665,
	"learning_rate": 6.0142984807864165e-05,
	"loss": 0.0026,
	"step": 3130
	},
	{
	"epoch": 2.806076854334227,
	"grad_norm": 0.011868173256516457,
	"learning_rate": 5.969615728328865e-05,
	"loss": 0.0257,
	"step": 3140
	},
	{
	"epoch": 2.8150134048257374,
	"grad_norm": 0.02559722028672695,
	"learning_rate": 5.9249329758713135e-05,
	"loss": 0.0666,
	"step": 3150
	},
	{
	"epoch": 2.8239499553172474,
	"grad_norm": 0.01763424649834633,
	"learning_rate": 5.8802502234137623e-05,
	"loss": 0.0611,
	"step": 3160
	},
	{
	"epoch": 2.832886505808758,
	"grad_norm": 0.02686423808336258,
	"learning_rate": 5.835567470956211e-05,
	"loss": 0.0039,
	"step": 3170
	},
	{
	"epoch": 2.841823056300268,
	"grad_norm": 0.04632404074072838,
	"learning_rate": 5.79088471849866e-05,
	"loss": 0.0122,
	"step": 3180
	},
	{
	"epoch": 2.8507596067917786,
	"grad_norm": 0.1586790531873703,
	"learning_rate": 5.746201966041108e-05,
	"loss": 0.0026,
	"step": 3190
	},
	{
	"epoch": 2.8596961572832886,
	"grad_norm": 5.425605297088623,
	"learning_rate": 5.701519213583557e-05,
	"loss": 0.0622,
	"step": 3200
	},
	{
	"epoch": 2.868632707774799,
	"grad_norm": 0.006181008648127317,
	"learning_rate": 5.656836461126006e-05,
	"loss": 0.0028,
	"step": 3210
	},
	{
	"epoch": 2.877569258266309,
	"grad_norm": 0.09517185389995575,
	"learning_rate": 5.612153708668454e-05,
	"loss": 0.0035,
	"step": 3220
	},
	{
	"epoch": 2.8865058087578195,
	"grad_norm": 0.015022194012999535,
	"learning_rate": 5.567470956210903e-05,
	"loss": 0.0285,
	"step": 3230
	},
	{
	"epoch": 2.89544235924933,
	"grad_norm": 4.772485256195068,
	"learning_rate": 5.522788203753352e-05,
	"loss": 0.0279,
	"step": 3240
	},
	{
	"epoch": 2.90437890974084,
	"grad_norm": 3.1032145023345947,
	"learning_rate": 5.478105451295799e-05,
	"loss": 0.0049,
	"step": 3250
	},
	{
	"epoch": 2.9133154602323503,
	"grad_norm": 0.05868244543671608,
	"learning_rate": 5.433422698838249e-05,
	"loss": 0.0029,
	"step": 3260
	},
	{
	"epoch": 2.9222520107238603,
	"grad_norm": 0.008307090029120445,
	"learning_rate": 5.388739946380698e-05,
	"loss": 0.0099,
	"step": 3270
	},
	{
	"epoch": 2.9311885612153707,
	"grad_norm": 0.010392882861196995,
	"learning_rate": 5.344057193923145e-05,
	"loss": 0.002,
	"step": 3280
	},
	{
	"epoch": 2.940125111706881,
	"grad_norm": 0.005523020401597023,
	"learning_rate": 5.299374441465594e-05,
	"loss": 0.0035,
	"step": 3290
	},
	{
	"epoch": 2.9490616621983916,
	"grad_norm": 0.06098335236310959,
	"learning_rate": 5.2546916890080436e-05,
	"loss": 0.0056,
	"step": 3300
	},
	{
	"epoch": 2.9579982126899016,
	"grad_norm": 0.013083376921713352,
	"learning_rate": 5.2100089365504925e-05,
	"loss": 0.023,
	"step": 3310
	},
	{
	"epoch": 2.966934763181412,
	"grad_norm": 0.01605415530502796,
	"learning_rate": 5.16532618409294e-05,
	"loss": 0.0396,
	"step": 3320
	},
	{
	"epoch": 2.975871313672922,
	"grad_norm": 0.013243346475064754,
	"learning_rate": 5.120643431635389e-05,
	"loss": 0.0083,
	"step": 3330
	},
	{
	"epoch": 2.9848078641644324,
	"grad_norm": 1.4108890295028687,
	"learning_rate": 5.0759606791778383e-05,
	"loss": 0.0468,
	"step": 3340
	},
	{
	"epoch": 2.993744414655943,
	"grad_norm": 0.5704414248466492,
	"learning_rate": 5.031277926720286e-05,
	"loss": 0.0209,
	"step": 3350
	},
	{
	"epoch": 3.002680965147453,
	"grad_norm": 0.03908452019095421,
	"learning_rate": 4.986595174262735e-05,
	"loss": 0.0779,
	"step": 3360
	},
	{
	"epoch": 3.0116175156389633,
	"grad_norm": 0.010959290899336338,
	"learning_rate": 4.9419124218051835e-05,
	"loss": 0.0109,
	"step": 3370
	},
	{
	"epoch": 3.0205540661304737,
	"grad_norm": 0.028490547090768814,
	"learning_rate": 4.8972296693476324e-05,
	"loss": 0.0025,
	"step": 3380
	},
	{
	"epoch": 3.0294906166219837,
	"grad_norm": 0.00491972966119647,
	"learning_rate": 4.8525469168900806e-05,
	"loss": 0.0214,
	"step": 3390
	},
	{
	"epoch": 3.038427167113494,
	"grad_norm": 0.014270992018282413,
	"learning_rate": 4.8078641644325294e-05,
	"loss": 0.0048,
	"step": 3400
	},
	{
	"epoch": 3.0473637176050046,
	"grad_norm": 0.00458119623363018,
	"learning_rate": 4.7631814119749776e-05,
	"loss": 0.0308,
	"step": 3410
	},
	{
	"epoch": 3.0563002680965146,
	"grad_norm": 0.00890402402728796,
	"learning_rate": 4.7184986595174265e-05,
	"loss": 0.0019,
	"step": 3420
	},
	{
	"epoch": 3.065236818588025,
	"grad_norm": 0.004751246422529221,
	"learning_rate": 4.673815907059875e-05,
	"loss": 0.0028,
	"step": 3430
	},
	{
	"epoch": 3.0741733690795354,
	"grad_norm": 0.008143426850438118,
	"learning_rate": 4.6291331546023235e-05,
	"loss": 0.0015,
	"step": 3440
	},
	{
	"epoch": 3.0831099195710454,
	"grad_norm": 0.035306982696056366,
	"learning_rate": 4.5844504021447723e-05,
	"loss": 0.0561,
	"step": 3450
	},
	{
	"epoch": 3.092046470062556,
	"grad_norm": 0.006312028504908085,
	"learning_rate": 4.539767649687221e-05,
	"loss": 0.0407,
	"step": 3460
	},
	{
	"epoch": 3.1009830205540663,
	"grad_norm": 0.012918233871459961,
	"learning_rate": 4.4950848972296694e-05,
	"loss": 0.0204,
	"step": 3470
	},
	{
	"epoch": 3.1099195710455763,
	"grad_norm": 0.03429726883769035,
	"learning_rate": 4.450402144772118e-05,
	"loss": 0.0138,
	"step": 3480
	},
	{
	"epoch": 3.1188561215370867,
	"grad_norm": 0.032142043113708496,
	"learning_rate": 4.405719392314567e-05,
	"loss": 0.0074,
	"step": 3490
	},
	{
	"epoch": 3.127792672028597,
	"grad_norm": 0.11621160060167313,
	"learning_rate": 4.361036639857015e-05,
	"loss": 0.007,
	"step": 3500
	},
	{
	"epoch": 3.136729222520107,
	"grad_norm": 0.010225760750472546,
	"learning_rate": 4.316353887399464e-05,
	"loss": 0.0371,
	"step": 3510
	},
	{
	"epoch": 3.1456657730116175,
	"grad_norm": 0.0270242840051651,
	"learning_rate": 4.271671134941912e-05,
	"loss": 0.0024,
	"step": 3520
	},
	{
	"epoch": 3.154602323503128,
	"grad_norm": 0.561730146408081,
	"learning_rate": 4.226988382484361e-05,
	"loss": 0.0322,
	"step": 3530
	},
	{
	"epoch": 3.163538873994638,
	"grad_norm": 3.7698066234588623,
	"learning_rate": 4.18230563002681e-05,
	"loss": 0.0061,
	"step": 3540
	},
	{
	"epoch": 3.1724754244861484,
	"grad_norm": 0.08852257579565048,
	"learning_rate": 4.137622877569258e-05,
	"loss": 0.002,
	"step": 3550
	},
	{
	"epoch": 3.181411974977659,
	"grad_norm": 0.010241570882499218,
	"learning_rate": 4.092940125111707e-05,
	"loss": 0.0032,
	"step": 3560
	},
	{
	"epoch": 3.190348525469169,
	"grad_norm": 0.02900160290300846,
	"learning_rate": 4.048257372654156e-05,
	"loss": 0.0021,
	"step": 3570
	},
	{
	"epoch": 3.1992850759606792,
	"grad_norm": 0.012413430958986282,
	"learning_rate": 4.003574620196605e-05,
	"loss": 0.0016,
	"step": 3580
	},
	{
	"epoch": 3.2082216264521897,
	"grad_norm": 0.011820780113339424,
	"learning_rate": 3.958891867739053e-05,
	"loss": 0.0156,
	"step": 3590
	},
	{
	"epoch": 3.2171581769436997,
	"grad_norm": 0.0063424003310501575,
	"learning_rate": 3.914209115281501e-05,
	"loss": 0.0066,
	"step": 3600
	},
	{
	"epoch": 3.22609472743521,
	"grad_norm": 0.014534726738929749,
	"learning_rate": 3.8695263628239506e-05,
	"loss": 0.0023,
	"step": 3610
	},
	{
	"epoch": 3.23503127792672,
	"grad_norm": 0.0037305313162505627,
	"learning_rate": 3.824843610366399e-05,
	"loss": 0.0014,
	"step": 3620
	},
	{
	"epoch": 3.2439678284182305,
	"grad_norm": 0.004174220375716686,
	"learning_rate": 3.780160857908847e-05,
	"loss": 0.0022,
	"step": 3630
	},
	{
	"epoch": 3.252904378909741,
	"grad_norm": 0.02620732970535755,
	"learning_rate": 3.735478105451296e-05,
	"loss": 0.0033,
	"step": 3640
	},
	{
	"epoch": 3.2618409294012514,
	"grad_norm": 0.008887135423719883,
	"learning_rate": 3.690795352993745e-05,
	"loss": 0.0137,
	"step": 3650
	},
	{
	"epoch": 3.2707774798927614,
	"grad_norm": 0.0036694956943392754,
	"learning_rate": 3.6461126005361935e-05,
	"loss": 0.0016,
	"step": 3660
	},
	{
	"epoch": 3.279714030384272,
	"grad_norm": 0.005121259950101376,
	"learning_rate": 3.601429848078642e-05,
	"loss": 0.0023,
	"step": 3670
	},
	{
	"epoch": 3.2886505808757818,
	"grad_norm": 0.005332967732101679,
	"learning_rate": 3.55674709562109e-05,
	"loss": 0.0508,
	"step": 3680
	},
	{
	"epoch": 3.297587131367292,
	"grad_norm": 0.008636276237666607,
	"learning_rate": 3.5120643431635394e-05,
	"loss": 0.0015,
	"step": 3690
	},
	{
	"epoch": 3.3065236818588026,
	"grad_norm": 0.004048788454383612,
	"learning_rate": 3.4673815907059876e-05,
	"loss": 0.0015,
	"step": 3700
	},
	{
	"epoch": 3.3154602323503126,
	"grad_norm": 0.013148046098649502,
	"learning_rate": 3.4226988382484365e-05,
	"loss": 0.0021,
	"step": 3710
	},
	{
	"epoch": 3.324396782841823,
	"grad_norm": 0.003611048450693488,
	"learning_rate": 3.3780160857908846e-05,
	"loss": 0.0018,
	"step": 3720
	},
	{
	"epoch": 3.3333333333333335,
	"grad_norm": 0.0047615463845431805,
	"learning_rate": 3.3333333333333335e-05,
	"loss": 0.002,
	"step": 3730
	},
	{
	"epoch": 3.3422698838248435,
	"grad_norm": 0.052058279514312744,
	"learning_rate": 3.2886505808757823e-05,
	"loss": 0.0017,
	"step": 3740
	},
	{
	"epoch": 3.351206434316354,
	"grad_norm": 0.004867528565227985,
	"learning_rate": 3.2439678284182305e-05,
	"loss": 0.0015,
	"step": 3750
	},
	{
	"epoch": 3.3601429848078643,
	"grad_norm": 0.005437952931970358,
	"learning_rate": 3.1992850759606794e-05,
	"loss": 0.0027,
	"step": 3760
	},
	{
	"epoch": 3.3690795352993743,
	"grad_norm": 0.08657950907945633,
	"learning_rate": 3.154602323503128e-05,
	"loss": 0.0012,
	"step": 3770
	},
	{
	"epoch": 3.3780160857908847,
	"grad_norm": 0.003917807713150978,
	"learning_rate": 3.1099195710455764e-05,
	"loss": 0.0016,
	"step": 3780
	},
	{
	"epoch": 3.386952636282395,
	"grad_norm": 0.03561088442802429,
	"learning_rate": 3.065236818588025e-05,
	"loss": 0.0063,
	"step": 3790
	},
	{
	"epoch": 3.395889186773905,
	"grad_norm": 0.021922320127487183,
	"learning_rate": 3.0205540661304738e-05,
	"loss": 0.0195,
	"step": 3800
	},
	{
	"epoch": 3.4048257372654156,
	"grad_norm": 0.009989120066165924,
	"learning_rate": 2.9758713136729223e-05,
	"loss": 0.0027,
	"step": 3810
	},
	{
	"epoch": 3.413762287756926,
	"grad_norm": 0.004757929127663374,
	"learning_rate": 2.931188561215371e-05,
	"loss": 0.0213,
	"step": 3820
	},
	{
	"epoch": 3.422698838248436,
	"grad_norm": 0.005087022669613361,
	"learning_rate": 2.8865058087578197e-05,
	"loss": 0.012,
	"step": 3830
	},
	{
	"epoch": 3.4316353887399464,
	"grad_norm": 0.04014687240123749,
	"learning_rate": 2.8418230563002685e-05,
	"loss": 0.0016,
	"step": 3840
	},
	{
	"epoch": 3.4405719392314564,
	"grad_norm": 0.008556324057281017,
	"learning_rate": 2.7971403038427167e-05,
	"loss": 0.0466,
	"step": 3850
	},
	{
	"epoch": 3.449508489722967,
	"grad_norm": 0.0066629331558942795,
	"learning_rate": 2.7524575513851652e-05,
	"loss": 0.0013,
	"step": 3860
	},
	{
	"epoch": 3.4584450402144773,
	"grad_norm": 0.007047568913549185,
	"learning_rate": 2.707774798927614e-05,
	"loss": 0.0015,
	"step": 3870
	},
	{
	"epoch": 3.4673815907059877,
	"grad_norm": 0.0033304065000265837,
	"learning_rate": 2.6630920464700626e-05,
	"loss": 0.0186,
	"step": 3880
	},
	{
	"epoch": 3.4763181411974977,
	"grad_norm": 0.043915342539548874,
	"learning_rate": 2.6184092940125114e-05,
	"loss": 0.0013,
	"step": 3890
	},
	{
	"epoch": 3.485254691689008,
	"grad_norm": 0.005252317525446415,
	"learning_rate": 2.57372654155496e-05,
	"loss": 0.0036,
	"step": 3900
	},
	{
	"epoch": 3.494191242180518,
	"grad_norm": 0.005055012181401253,
	"learning_rate": 2.5290437890974085e-05,
	"loss": 0.0012,
	"step": 3910
	},
	{
	"epoch": 3.5031277926720286,
	"grad_norm": 0.0049805790185928345,
	"learning_rate": 2.484361036639857e-05,
	"loss": 0.0157,
	"step": 3920
	},
	{
	"epoch": 3.512064343163539,
	"grad_norm": 0.009514909237623215,
	"learning_rate": 2.439678284182306e-05,
	"loss": 0.0109,
	"step": 3930
	},
	{
	"epoch": 3.5210008936550494,
	"grad_norm": 0.03643026947975159,
	"learning_rate": 2.3949955317247544e-05,
	"loss": 0.0019,
	"step": 3940
	},
	{
	"epoch": 3.5299374441465594,
	"grad_norm": 0.056902140378952026,
	"learning_rate": 2.3503127792672032e-05,
	"loss": 0.0016,
	"step": 3950
	},
	{
	"epoch": 3.53887399463807,
	"grad_norm": 0.10358071327209473,
	"learning_rate": 2.3056300268096514e-05,
	"loss": 0.005,
	"step": 3960
	},
	{
	"epoch": 3.54781054512958,
	"grad_norm": 0.005386151373386383,
	"learning_rate": 2.2609472743521002e-05,
	"loss": 0.0021,
	"step": 3970
	},
	{
	"epoch": 3.5567470956210903,
	"grad_norm": 0.007350238971412182,
	"learning_rate": 2.2162645218945488e-05,
	"loss": 0.0016,
	"step": 3980
	},
	{
	"epoch": 3.5656836461126007,
	"grad_norm": 0.07326429337263107,
	"learning_rate": 2.1715817694369976e-05,
	"loss": 0.0084,
	"step": 3990
	},
	{
	"epoch": 3.5746201966041107,
	"grad_norm": 0.005603461060672998,
	"learning_rate": 2.126899016979446e-05,
	"loss": 0.0011,
	"step": 4000
	},
	{
	"epoch": 3.5746201966041107,
	"eval_accuracy": 0.9814070351758793,
	"eval_loss": 0.0710952952504158,
	"eval_runtime": 56.3405,
	"eval_samples_per_second": 35.321,
	"eval_steps_per_second": 4.42,
	"step": 4000
	},
	{
	"epoch": 3.583556747095621,
	"grad_norm": 0.010818341746926308,
	"learning_rate": 2.0822162645218946e-05,
	"loss": 0.0038,
	"step": 4010
	},
	{
	"epoch": 3.592493297587131,
	"grad_norm": 0.003599151037633419,
	"learning_rate": 2.037533512064343e-05,
	"loss": 0.0065,
	"step": 4020
	},
	{
	"epoch": 3.6014298480786415,
	"grad_norm": 4.198567867279053,
	"learning_rate": 1.992850759606792e-05,
	"loss": 0.0083,
	"step": 4030
	},
	{
	"epoch": 3.610366398570152,
	"grad_norm": 0.013494855724275112,
	"learning_rate": 1.9481680071492405e-05,
	"loss": 0.0045,
	"step": 4040
	},
	{
	"epoch": 3.6193029490616624,
	"grad_norm": 0.0036234534345567226,
	"learning_rate": 1.903485254691689e-05,
	"loss": 0.0016,
	"step": 4050
	},
	{
	"epoch": 3.6282394995531724,
	"grad_norm": 0.021920403465628624,
	"learning_rate": 1.8588025022341376e-05,
	"loss": 0.0012,
	"step": 4060
	},
	{
	"epoch": 3.637176050044683,
	"grad_norm": 0.004384295083582401,
	"learning_rate": 1.8141197497765864e-05,
	"loss": 0.001,
	"step": 4070
	},
	{
	"epoch": 3.646112600536193,
	"grad_norm": 0.03161391615867615,
	"learning_rate": 1.769436997319035e-05,
	"loss": 0.0026,
	"step": 4080
	},
	{
	"epoch": 3.6550491510277032,
	"grad_norm": 0.0033394452184438705,
	"learning_rate": 1.7247542448614838e-05,
	"loss": 0.0267,
	"step": 4090
	},
	{
	"epoch": 3.6639857015192137,
	"grad_norm": 0.01090541947633028,
	"learning_rate": 1.680071492403932e-05,
	"loss": 0.0014,
	"step": 4100
	},
	{
	"epoch": 3.672922252010724,
	"grad_norm": 0.0053653959184885025,
	"learning_rate": 1.6353887399463808e-05,
	"loss": 0.0095,
	"step": 4110
	},
	{
	"epoch": 3.681858802502234,
	"grad_norm": 0.032379720360040665,
	"learning_rate": 1.5907059874888293e-05,
	"loss": 0.0011,
	"step": 4120
	},
	{
	"epoch": 3.6907953529937445,
	"grad_norm": 0.05944305285811424,
	"learning_rate": 1.5460232350312782e-05,
	"loss": 0.0015,
	"step": 4130
	},
	{
	"epoch": 3.6997319034852545,
	"grad_norm": 0.0054045203141868114,
	"learning_rate": 1.5013404825737265e-05,
	"loss": 0.0012,
	"step": 4140
	},
	{
	"epoch": 3.708668453976765,
	"grad_norm": 0.003021675394847989,
	"learning_rate": 1.4566577301161752e-05,
	"loss": 0.0023,
	"step": 4150
	},
	{
	"epoch": 3.7176050044682754,
	"grad_norm": 0.007955508306622505,
	"learning_rate": 1.4119749776586239e-05,
	"loss": 0.0031,
	"step": 4160
	},
	{
	"epoch": 3.726541554959786,
	"grad_norm": 0.005485454574227333,
	"learning_rate": 1.3672922252010726e-05,
	"loss": 0.0014,
	"step": 4170
	},
	{
	"epoch": 3.7354781054512958,
	"grad_norm": 0.007910342887043953,
	"learning_rate": 1.322609472743521e-05,
	"loss": 0.0014,
	"step": 4180
	},
	{
	"epoch": 3.744414655942806,
	"grad_norm": 0.011793126352131367,
	"learning_rate": 1.2779267202859696e-05,
	"loss": 0.001,
	"step": 4190
	},
	{
	"epoch": 3.753351206434316,
	"grad_norm": 0.005442539695650339,
	"learning_rate": 1.2332439678284183e-05,
	"loss": 0.0015,
	"step": 4200
	},
	{
	"epoch": 3.7622877569258266,
	"grad_norm": 1.1986395120620728,
	"learning_rate": 1.188561215370867e-05,
	"loss": 0.002,
	"step": 4210
	},
	{
	"epoch": 3.771224307417337,
	"grad_norm": 0.006608502473682165,
	"learning_rate": 1.1438784629133155e-05,
	"loss": 0.0009,
	"step": 4220
	},
	{
	"epoch": 3.780160857908847,
	"grad_norm": 0.0039040117990225554,
	"learning_rate": 1.0991957104557642e-05,
	"loss": 0.0013,
	"step": 4230
	},
	{
	"epoch": 3.7890974084003575,
	"grad_norm": 0.0041880221106112,
	"learning_rate": 1.0545129579982127e-05,
	"loss": 0.0012,
	"step": 4240
	},
	{
	"epoch": 3.798033958891868,
	"grad_norm": 0.003776776837185025,
	"learning_rate": 1.0098302055406614e-05,
	"loss": 0.0013,
	"step": 4250
	},
	{
	"epoch": 3.806970509383378,
	"grad_norm": 0.2970888614654541,
	"learning_rate": 9.651474530831099e-06,
	"loss": 0.0014,
	"step": 4260
	},
	{
	"epoch": 3.8159070598748883,
	"grad_norm": 0.003879937343299389,
	"learning_rate": 9.204647006255586e-06,
	"loss": 0.0013,
	"step": 4270
	},
	{
	"epoch": 3.8248436103663987,
	"grad_norm": 3.5169312953948975,
	"learning_rate": 8.757819481680071e-06,
	"loss": 0.0035,
	"step": 4280
	},
	{
	"epoch": 3.8337801608579087,
	"grad_norm": 0.004920534789562225,
	"learning_rate": 8.310991957104558e-06,
	"loss": 0.001,
	"step": 4290
	},
	{
	"epoch": 3.842716711349419,
	"grad_norm": 0.0035059740766882896,
	"learning_rate": 7.864164432529045e-06,
	"loss": 0.0094,
	"step": 4300
	},
	{
	"epoch": 3.851653261840929,
	"grad_norm": 0.004144645761698484,
	"learning_rate": 7.41733690795353e-06,
	"loss": 0.001,
	"step": 4310
	},
	{
	"epoch": 3.8605898123324396,
	"grad_norm": 0.006385812535881996,
	"learning_rate": 6.970509383378017e-06,
	"loss": 0.001,
	"step": 4320
	},
	{
	"epoch": 3.86952636282395,
	"grad_norm": 0.003677819389849901,
	"learning_rate": 6.523681858802503e-06,
	"loss": 0.0009,
	"step": 4330
	},
	{
	"epoch": 3.8784629133154604,
	"grad_norm": 0.003563833888620138,
	"learning_rate": 6.076854334226989e-06,
	"loss": 0.0251,
	"step": 4340
	},
	{
	"epoch": 3.8873994638069704,
	"grad_norm": 0.012182756327092648,
	"learning_rate": 5.630026809651475e-06,
	"loss": 0.0422,
	"step": 4350
	},
	{
	"epoch": 3.896336014298481,
	"grad_norm": 0.004781792871654034,
	"learning_rate": 5.1831992850759615e-06,
	"loss": 0.0021,
	"step": 4360
	},
	{
	"epoch": 3.905272564789991,
	"grad_norm": 0.003455075901001692,
	"learning_rate": 4.7363717605004475e-06,
	"loss": 0.0182,
	"step": 4370
	},
	{
	"epoch": 3.9142091152815013,
	"grad_norm": 0.00627366965636611,
	"learning_rate": 4.2895442359249335e-06,
	"loss": 0.0148,
	"step": 4380
	},
	{
	"epoch": 3.9231456657730117,
	"grad_norm": 0.015941530466079712,
	"learning_rate": 3.8427167113494195e-06,
	"loss": 0.0015,
	"step": 4390
	},
	{
	"epoch": 3.932082216264522,
	"grad_norm": 0.004724125377833843,
	"learning_rate": 3.3958891867739055e-06,
	"loss": 0.0093,
	"step": 4400
	},
	{
	"epoch": 3.941018766756032,
	"grad_norm": 0.0062377783469855785,
	"learning_rate": 2.9490616621983915e-06,
	"loss": 0.0011,
	"step": 4410
	},
	{
	"epoch": 3.9499553172475426,
	"grad_norm": 0.0042613474652171135,
	"learning_rate": 2.502234137622878e-06,
	"loss": 0.0141,
	"step": 4420
	},
	{
	"epoch": 3.9588918677390526,
	"grad_norm": 0.005040575284510851,
	"learning_rate": 2.055406613047364e-06,
	"loss": 0.0061,
	"step": 4430
	},
	{
	"epoch": 3.967828418230563,
	"grad_norm": 0.004678263328969479,
	"learning_rate": 1.60857908847185e-06,
	"loss": 0.0011,
	"step": 4440
	},
	{
	"epoch": 3.9767649687220734,
	"grad_norm": 0.0033705062232911587,
	"learning_rate": 1.161751563896336e-06,
	"loss": 0.0065,
	"step": 4450
	},
	{
	"epoch": 3.9857015192135834,
	"grad_norm": 0.004543005023151636,
	"learning_rate": 7.149240393208222e-07,
	"loss": 0.0011,
	"step": 4460
	},
	{
	"epoch": 3.994638069705094,
	"grad_norm": 0.01603855937719345,
	"learning_rate": 2.6809651474530835e-07,
	"loss": 0.001,
	"step": 4470
	},
	{
	"epoch": 4.0,
	"step": 4476,
	"total_flos": 5.549295064059888e+18,
	"train_loss": 0.1165861947240576,
	"train_runtime": 2488.5837,
	"train_samples_per_second": 28.775,
	"train_steps_per_second": 1.799
	}
	],
	"logging_steps": 10,
	"max_steps": 4476,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 4,
	"save_steps": 2000,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 5.549295064059888e+18,
	"train_batch_size": 16,
	"trial_name": null,
	"trial_params": null
	}