Bert_base_hidden_unit_HuBERT50C / trainer_state.json
charsiu's picture
Upload 3 files
b16aec2
raw
history blame
39.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.515237104206927,
"global_step": 32000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 5e-05,
"loss": 3.5407,
"step": 100
},
{
"epoch": 0.06,
"learning_rate": 0.0001,
"loss": 3.2075,
"step": 200
},
{
"epoch": 0.09,
"learning_rate": 0.00015,
"loss": 3.0286,
"step": 300
},
{
"epoch": 0.12,
"learning_rate": 0.0002,
"loss": 2.8212,
"step": 400
},
{
"epoch": 0.15,
"learning_rate": 0.00025,
"loss": 2.3586,
"step": 500
},
{
"epoch": 0.18,
"learning_rate": 0.0003,
"loss": 1.676,
"step": 600
},
{
"epoch": 0.21,
"learning_rate": 0.00035,
"loss": 1.3696,
"step": 700
},
{
"epoch": 0.24,
"learning_rate": 0.0004,
"loss": 1.2677,
"step": 800
},
{
"epoch": 0.27,
"learning_rate": 0.00045000000000000004,
"loss": 1.2271,
"step": 900
},
{
"epoch": 0.3,
"learning_rate": 0.0005,
"loss": 1.2006,
"step": 1000
},
{
"epoch": 0.33,
"learning_rate": 0.000498467667790377,
"loss": 1.1846,
"step": 1100
},
{
"epoch": 0.36,
"learning_rate": 0.0004969353355807539,
"loss": 1.1663,
"step": 1200
},
{
"epoch": 0.39,
"learning_rate": 0.0004954030033711309,
"loss": 1.1429,
"step": 1300
},
{
"epoch": 0.42,
"learning_rate": 0.0004938706711615078,
"loss": 1.1384,
"step": 1400
},
{
"epoch": 0.45,
"learning_rate": 0.0004923383389518848,
"loss": 1.1353,
"step": 1500
},
{
"epoch": 0.48,
"learning_rate": 0.0004908060067422617,
"loss": 1.1384,
"step": 1600
},
{
"epoch": 0.51,
"learning_rate": 0.0004892736745326388,
"loss": 1.1461,
"step": 1700
},
{
"epoch": 0.54,
"learning_rate": 0.00048774134232301567,
"loss": 1.1333,
"step": 1800
},
{
"epoch": 0.56,
"learning_rate": 0.0004862090101133926,
"loss": 1.1205,
"step": 1900
},
{
"epoch": 0.59,
"learning_rate": 0.00048467667790376954,
"loss": 1.1141,
"step": 2000
},
{
"epoch": 0.62,
"learning_rate": 0.0004831443456941465,
"loss": 1.1078,
"step": 2100
},
{
"epoch": 0.65,
"learning_rate": 0.0004816120134845234,
"loss": 1.1006,
"step": 2200
},
{
"epoch": 0.68,
"learning_rate": 0.00048007968127490044,
"loss": 1.0978,
"step": 2300
},
{
"epoch": 0.71,
"learning_rate": 0.00047854734906527735,
"loss": 1.0894,
"step": 2400
},
{
"epoch": 0.74,
"learning_rate": 0.0004770150168556543,
"loss": 1.0861,
"step": 2500
},
{
"epoch": 0.77,
"learning_rate": 0.0004754826846460313,
"loss": 1.083,
"step": 2600
},
{
"epoch": 0.8,
"learning_rate": 0.00047395035243640824,
"loss": 1.0758,
"step": 2700
},
{
"epoch": 0.83,
"learning_rate": 0.00047241802022678515,
"loss": 1.076,
"step": 2800
},
{
"epoch": 0.86,
"learning_rate": 0.00047088568801716217,
"loss": 1.0794,
"step": 2900
},
{
"epoch": 0.89,
"learning_rate": 0.0004693533558075391,
"loss": 1.0706,
"step": 3000
},
{
"epoch": 0.92,
"learning_rate": 0.00046782102359791604,
"loss": 1.0725,
"step": 3100
},
{
"epoch": 0.95,
"learning_rate": 0.000466288691388293,
"loss": 1.069,
"step": 3200
},
{
"epoch": 0.98,
"learning_rate": 0.0004647563591786699,
"loss": 1.0674,
"step": 3300
},
{
"epoch": 1.01,
"learning_rate": 0.00046322402696904693,
"loss": 1.066,
"step": 3400
},
{
"epoch": 1.04,
"learning_rate": 0.00046169169475942384,
"loss": 1.0569,
"step": 3500
},
{
"epoch": 1.07,
"learning_rate": 0.0004601593625498008,
"loss": 1.0579,
"step": 3600
},
{
"epoch": 1.1,
"learning_rate": 0.00045862703034017777,
"loss": 1.0615,
"step": 3700
},
{
"epoch": 1.13,
"learning_rate": 0.00045709469813055473,
"loss": 1.055,
"step": 3800
},
{
"epoch": 1.16,
"learning_rate": 0.00045556236592093164,
"loss": 1.0583,
"step": 3900
},
{
"epoch": 1.19,
"learning_rate": 0.0004540300337113086,
"loss": 1.0537,
"step": 4000
},
{
"epoch": 1.22,
"learning_rate": 0.00045249770150168557,
"loss": 1.0531,
"step": 4100
},
{
"epoch": 1.25,
"learning_rate": 0.00045096536929206254,
"loss": 1.0507,
"step": 4200
},
{
"epoch": 1.28,
"learning_rate": 0.0004494330370824395,
"loss": 1.0449,
"step": 4300
},
{
"epoch": 1.31,
"learning_rate": 0.0004479007048728164,
"loss": 1.0463,
"step": 4400
},
{
"epoch": 1.34,
"learning_rate": 0.00044636837266319343,
"loss": 1.0495,
"step": 4500
},
{
"epoch": 1.37,
"learning_rate": 0.00044483604045357034,
"loss": 1.0489,
"step": 4600
},
{
"epoch": 1.4,
"learning_rate": 0.0004433037082439473,
"loss": 1.043,
"step": 4700
},
{
"epoch": 1.43,
"learning_rate": 0.00044177137603432427,
"loss": 1.0404,
"step": 4800
},
{
"epoch": 1.46,
"learning_rate": 0.0004402390438247012,
"loss": 1.0448,
"step": 4900
},
{
"epoch": 1.49,
"learning_rate": 0.00043870671161507814,
"loss": 1.0378,
"step": 5000
},
{
"epoch": 1.52,
"learning_rate": 0.0004371743794054551,
"loss": 1.0359,
"step": 5100
},
{
"epoch": 1.55,
"learning_rate": 0.00043564204719583207,
"loss": 1.0419,
"step": 5200
},
{
"epoch": 1.58,
"learning_rate": 0.000434109714986209,
"loss": 1.0332,
"step": 5300
},
{
"epoch": 1.61,
"learning_rate": 0.000432577382776586,
"loss": 1.0382,
"step": 5400
},
{
"epoch": 1.64,
"learning_rate": 0.0004310450505669629,
"loss": 1.0312,
"step": 5500
},
{
"epoch": 1.67,
"learning_rate": 0.0004295127183573399,
"loss": 1.0377,
"step": 5600
},
{
"epoch": 1.69,
"learning_rate": 0.00042798038614771683,
"loss": 1.0296,
"step": 5700
},
{
"epoch": 1.72,
"learning_rate": 0.00042644805393809374,
"loss": 1.0316,
"step": 5800
},
{
"epoch": 1.75,
"learning_rate": 0.00042491572172847076,
"loss": 1.0322,
"step": 5900
},
{
"epoch": 1.78,
"learning_rate": 0.0004233833895188477,
"loss": 1.0325,
"step": 6000
},
{
"epoch": 1.81,
"learning_rate": 0.00042185105730922464,
"loss": 1.0307,
"step": 6100
},
{
"epoch": 1.84,
"learning_rate": 0.0004203187250996016,
"loss": 1.0297,
"step": 6200
},
{
"epoch": 1.87,
"learning_rate": 0.00041878639288997857,
"loss": 1.031,
"step": 6300
},
{
"epoch": 1.9,
"learning_rate": 0.0004172540606803555,
"loss": 1.0304,
"step": 6400
},
{
"epoch": 1.93,
"learning_rate": 0.0004157217284707325,
"loss": 1.0278,
"step": 6500
},
{
"epoch": 1.96,
"learning_rate": 0.0004141893962611094,
"loss": 1.0211,
"step": 6600
},
{
"epoch": 1.99,
"learning_rate": 0.0004126570640514864,
"loss": 1.0248,
"step": 6700
},
{
"epoch": 2.02,
"learning_rate": 0.00041112473184186333,
"loss": 1.0319,
"step": 6800
},
{
"epoch": 2.05,
"learning_rate": 0.00040959239963224024,
"loss": 1.0301,
"step": 6900
},
{
"epoch": 2.08,
"learning_rate": 0.00040806006742261726,
"loss": 1.0295,
"step": 7000
},
{
"epoch": 2.11,
"learning_rate": 0.00040652773521299417,
"loss": 1.0247,
"step": 7100
},
{
"epoch": 2.14,
"learning_rate": 0.00040499540300337113,
"loss": 1.0205,
"step": 7200
},
{
"epoch": 2.17,
"learning_rate": 0.0004034630707937481,
"loss": 1.0221,
"step": 7300
},
{
"epoch": 2.2,
"learning_rate": 0.00040193073858412506,
"loss": 1.0251,
"step": 7400
},
{
"epoch": 2.23,
"learning_rate": 0.00040039840637450197,
"loss": 1.0164,
"step": 7500
},
{
"epoch": 2.26,
"learning_rate": 0.000398866074164879,
"loss": 1.019,
"step": 7600
},
{
"epoch": 2.29,
"learning_rate": 0.0003973337419552559,
"loss": 1.0167,
"step": 7700
},
{
"epoch": 2.32,
"learning_rate": 0.0003958014097456328,
"loss": 1.0202,
"step": 7800
},
{
"epoch": 2.35,
"learning_rate": 0.00039426907753600983,
"loss": 1.0183,
"step": 7900
},
{
"epoch": 2.38,
"learning_rate": 0.00039273674532638674,
"loss": 1.0234,
"step": 8000
},
{
"epoch": 2.41,
"learning_rate": 0.00039120441311676376,
"loss": 1.0103,
"step": 8100
},
{
"epoch": 2.44,
"learning_rate": 0.00038967208090714067,
"loss": 1.0196,
"step": 8200
},
{
"epoch": 2.47,
"learning_rate": 0.00038813974869751763,
"loss": 1.0147,
"step": 8300
},
{
"epoch": 2.5,
"learning_rate": 0.0003866074164878946,
"loss": 1.0138,
"step": 8400
},
{
"epoch": 2.53,
"learning_rate": 0.00038507508427827156,
"loss": 1.0151,
"step": 8500
},
{
"epoch": 2.56,
"learning_rate": 0.00038354275206864847,
"loss": 1.0118,
"step": 8600
},
{
"epoch": 2.59,
"learning_rate": 0.0003820104198590255,
"loss": 1.014,
"step": 8700
},
{
"epoch": 2.62,
"learning_rate": 0.0003804780876494024,
"loss": 1.0096,
"step": 8800
},
{
"epoch": 2.65,
"learning_rate": 0.0003789457554397793,
"loss": 1.0092,
"step": 8900
},
{
"epoch": 2.68,
"learning_rate": 0.0003774134232301563,
"loss": 1.0096,
"step": 9000
},
{
"epoch": 2.71,
"learning_rate": 0.00037588109102053323,
"loss": 1.0148,
"step": 9100
},
{
"epoch": 2.74,
"learning_rate": 0.00037434875881091025,
"loss": 1.0102,
"step": 9200
},
{
"epoch": 2.77,
"learning_rate": 0.00037281642660128716,
"loss": 1.0095,
"step": 9300
},
{
"epoch": 2.8,
"learning_rate": 0.0003712840943916641,
"loss": 1.0099,
"step": 9400
},
{
"epoch": 2.82,
"learning_rate": 0.0003697517621820411,
"loss": 1.0083,
"step": 9500
},
{
"epoch": 2.85,
"learning_rate": 0.00036821942997241805,
"loss": 1.0093,
"step": 9600
},
{
"epoch": 2.88,
"learning_rate": 0.00036668709776279496,
"loss": 1.0023,
"step": 9700
},
{
"epoch": 2.91,
"learning_rate": 0.00036515476555317193,
"loss": 1.0058,
"step": 9800
},
{
"epoch": 2.94,
"learning_rate": 0.0003636224333435489,
"loss": 1.0088,
"step": 9900
},
{
"epoch": 2.97,
"learning_rate": 0.0003620901011339258,
"loss": 1.0046,
"step": 10000
},
{
"epoch": 3.0,
"learning_rate": 0.0003605577689243028,
"loss": 1.0142,
"step": 10100
},
{
"epoch": 3.03,
"learning_rate": 0.00035902543671467973,
"loss": 1.0031,
"step": 10200
},
{
"epoch": 3.06,
"learning_rate": 0.0003574931045050567,
"loss": 1.006,
"step": 10300
},
{
"epoch": 3.09,
"learning_rate": 0.00035596077229543366,
"loss": 1.0019,
"step": 10400
},
{
"epoch": 3.12,
"learning_rate": 0.0003544284400858106,
"loss": 1.0023,
"step": 10500
},
{
"epoch": 3.15,
"learning_rate": 0.0003528961078761876,
"loss": 0.9993,
"step": 10600
},
{
"epoch": 3.18,
"learning_rate": 0.0003513637756665645,
"loss": 0.9987,
"step": 10700
},
{
"epoch": 3.21,
"learning_rate": 0.00034983144345694146,
"loss": 0.9987,
"step": 10800
},
{
"epoch": 3.24,
"learning_rate": 0.0003482991112473184,
"loss": 1.005,
"step": 10900
},
{
"epoch": 3.27,
"learning_rate": 0.0003467667790376954,
"loss": 0.9966,
"step": 11000
},
{
"epoch": 3.3,
"learning_rate": 0.0003452344468280723,
"loss": 0.9986,
"step": 11100
},
{
"epoch": 3.33,
"learning_rate": 0.0003437021146184493,
"loss": 0.9973,
"step": 11200
},
{
"epoch": 3.36,
"learning_rate": 0.00034216978240882623,
"loss": 1.0011,
"step": 11300
},
{
"epoch": 3.39,
"learning_rate": 0.0003406374501992032,
"loss": 0.9944,
"step": 11400
},
{
"epoch": 3.42,
"learning_rate": 0.00033910511798958016,
"loss": 0.996,
"step": 11500
},
{
"epoch": 3.45,
"learning_rate": 0.00033757278577995707,
"loss": 0.9976,
"step": 11600
},
{
"epoch": 3.48,
"learning_rate": 0.0003360404535703341,
"loss": 0.9931,
"step": 11700
},
{
"epoch": 3.51,
"learning_rate": 0.000334508121360711,
"loss": 0.9921,
"step": 11800
},
{
"epoch": 3.54,
"learning_rate": 0.00033297578915108796,
"loss": 0.9911,
"step": 11900
},
{
"epoch": 3.57,
"learning_rate": 0.0003314434569414649,
"loss": 0.9916,
"step": 12000
},
{
"epoch": 3.6,
"learning_rate": 0.0003299111247318419,
"loss": 0.9921,
"step": 12100
},
{
"epoch": 3.63,
"learning_rate": 0.0003283787925222188,
"loss": 0.991,
"step": 12200
},
{
"epoch": 3.66,
"learning_rate": 0.0003268464603125958,
"loss": 0.9971,
"step": 12300
},
{
"epoch": 3.69,
"learning_rate": 0.0003253141281029727,
"loss": 0.995,
"step": 12400
},
{
"epoch": 3.72,
"learning_rate": 0.0003237817958933497,
"loss": 0.9891,
"step": 12500
},
{
"epoch": 3.75,
"learning_rate": 0.00032224946368372665,
"loss": 0.9907,
"step": 12600
},
{
"epoch": 3.78,
"learning_rate": 0.00032071713147410356,
"loss": 0.9912,
"step": 12700
},
{
"epoch": 3.81,
"learning_rate": 0.0003191847992644805,
"loss": 0.9873,
"step": 12800
},
{
"epoch": 3.84,
"learning_rate": 0.0003176524670548575,
"loss": 0.9868,
"step": 12900
},
{
"epoch": 3.87,
"learning_rate": 0.00031612013484523445,
"loss": 0.9845,
"step": 13000
},
{
"epoch": 3.9,
"learning_rate": 0.0003145878026356114,
"loss": 0.9836,
"step": 13100
},
{
"epoch": 3.92,
"learning_rate": 0.0003130554704259884,
"loss": 0.986,
"step": 13200
},
{
"epoch": 3.95,
"learning_rate": 0.0003115231382163653,
"loss": 0.9902,
"step": 13300
},
{
"epoch": 3.98,
"learning_rate": 0.0003099908060067423,
"loss": 0.983,
"step": 13400
},
{
"epoch": 4.01,
"learning_rate": 0.0003084584737971192,
"loss": 0.9872,
"step": 13500
},
{
"epoch": 4.04,
"learning_rate": 0.00030692614158749613,
"loss": 0.9844,
"step": 13600
},
{
"epoch": 4.07,
"learning_rate": 0.00030539380937787315,
"loss": 0.9867,
"step": 13700
},
{
"epoch": 4.1,
"learning_rate": 0.00030386147716825006,
"loss": 0.9821,
"step": 13800
},
{
"epoch": 4.13,
"learning_rate": 0.000302329144958627,
"loss": 0.9809,
"step": 13900
},
{
"epoch": 4.16,
"learning_rate": 0.000300796812749004,
"loss": 0.984,
"step": 14000
},
{
"epoch": 4.19,
"learning_rate": 0.00029926448053938095,
"loss": 0.9767,
"step": 14100
},
{
"epoch": 4.22,
"learning_rate": 0.0002977321483297579,
"loss": 0.9819,
"step": 14200
},
{
"epoch": 4.25,
"learning_rate": 0.0002961998161201349,
"loss": 0.9811,
"step": 14300
},
{
"epoch": 4.28,
"learning_rate": 0.0002946674839105118,
"loss": 0.9791,
"step": 14400
},
{
"epoch": 4.31,
"learning_rate": 0.00029313515170088875,
"loss": 0.9783,
"step": 14500
},
{
"epoch": 4.34,
"learning_rate": 0.0002916028194912657,
"loss": 0.9878,
"step": 14600
},
{
"epoch": 4.37,
"learning_rate": 0.00029007048728164263,
"loss": 0.975,
"step": 14700
},
{
"epoch": 4.4,
"learning_rate": 0.00028853815507201965,
"loss": 0.9775,
"step": 14800
},
{
"epoch": 4.43,
"learning_rate": 0.00028700582286239656,
"loss": 0.9775,
"step": 14900
},
{
"epoch": 4.46,
"learning_rate": 0.0002854734906527735,
"loss": 0.9786,
"step": 15000
},
{
"epoch": 4.49,
"learning_rate": 0.0002839411584431505,
"loss": 0.9753,
"step": 15100
},
{
"epoch": 4.52,
"learning_rate": 0.00028240882623352745,
"loss": 0.9841,
"step": 15200
},
{
"epoch": 4.55,
"learning_rate": 0.00028087649402390436,
"loss": 0.9716,
"step": 15300
},
{
"epoch": 4.58,
"learning_rate": 0.0002793441618142814,
"loss": 0.9774,
"step": 15400
},
{
"epoch": 4.61,
"learning_rate": 0.0002778118296046583,
"loss": 0.9723,
"step": 15500
},
{
"epoch": 4.64,
"learning_rate": 0.00027627949739503525,
"loss": 0.9702,
"step": 15600
},
{
"epoch": 4.67,
"learning_rate": 0.0002747471651854122,
"loss": 0.9766,
"step": 15700
},
{
"epoch": 4.7,
"learning_rate": 0.0002732148329757891,
"loss": 0.9843,
"step": 15800
},
{
"epoch": 4.73,
"learning_rate": 0.00027168250076616614,
"loss": 0.9701,
"step": 15900
},
{
"epoch": 4.76,
"learning_rate": 0.00027015016855654305,
"loss": 0.9715,
"step": 16000
},
{
"epoch": 4.79,
"learning_rate": 0.00026861783634692,
"loss": 0.9695,
"step": 16100
},
{
"epoch": 4.82,
"learning_rate": 0.000267085504137297,
"loss": 0.9699,
"step": 16200
},
{
"epoch": 4.85,
"learning_rate": 0.00026555317192767394,
"loss": 0.9665,
"step": 16300
},
{
"epoch": 4.88,
"learning_rate": 0.00026402083971805085,
"loss": 0.9681,
"step": 16400
},
{
"epoch": 4.91,
"learning_rate": 0.0002624885075084278,
"loss": 0.9697,
"step": 16500
},
{
"epoch": 4.94,
"learning_rate": 0.0002609561752988048,
"loss": 0.9662,
"step": 16600
},
{
"epoch": 4.97,
"learning_rate": 0.00025942384308918175,
"loss": 0.965,
"step": 16700
},
{
"epoch": 5.0,
"learning_rate": 0.0002578915108795587,
"loss": 0.9655,
"step": 16800
},
{
"epoch": 5.03,
"learning_rate": 0.0002563591786699356,
"loss": 0.9689,
"step": 16900
},
{
"epoch": 5.06,
"learning_rate": 0.00025482684646031264,
"loss": 0.9641,
"step": 17000
},
{
"epoch": 5.08,
"learning_rate": 0.00025329451425068955,
"loss": 0.9612,
"step": 17100
},
{
"epoch": 5.11,
"learning_rate": 0.0002517621820410665,
"loss": 0.9667,
"step": 17200
},
{
"epoch": 5.14,
"learning_rate": 0.0002502298498314435,
"loss": 0.9623,
"step": 17300
},
{
"epoch": 5.17,
"learning_rate": 0.0002486975176218204,
"loss": 0.9611,
"step": 17400
},
{
"epoch": 5.2,
"learning_rate": 0.00024716518541219735,
"loss": 0.956,
"step": 17500
},
{
"epoch": 5.23,
"learning_rate": 0.0002456328532025743,
"loss": 0.9623,
"step": 17600
},
{
"epoch": 5.26,
"learning_rate": 0.00024410052099295128,
"loss": 0.9577,
"step": 17700
},
{
"epoch": 5.29,
"learning_rate": 0.00024256818878332824,
"loss": 0.9584,
"step": 17800
},
{
"epoch": 5.32,
"learning_rate": 0.00024103585657370518,
"loss": 0.9595,
"step": 17900
},
{
"epoch": 5.35,
"learning_rate": 0.00023950352436408212,
"loss": 0.954,
"step": 18000
},
{
"epoch": 5.38,
"learning_rate": 0.00023797119215445908,
"loss": 0.958,
"step": 18100
},
{
"epoch": 5.41,
"learning_rate": 0.00023643885994483605,
"loss": 0.9575,
"step": 18200
},
{
"epoch": 5.44,
"learning_rate": 0.000234906527735213,
"loss": 0.9499,
"step": 18300
},
{
"epoch": 5.47,
"learning_rate": 0.00023337419552558995,
"loss": 0.9583,
"step": 18400
},
{
"epoch": 5.5,
"learning_rate": 0.0002318418633159669,
"loss": 0.9547,
"step": 18500
},
{
"epoch": 5.53,
"learning_rate": 0.00023030953110634387,
"loss": 0.9531,
"step": 18600
},
{
"epoch": 5.56,
"learning_rate": 0.0002287771988967208,
"loss": 0.9566,
"step": 18700
},
{
"epoch": 5.59,
"learning_rate": 0.00022724486668709778,
"loss": 0.9519,
"step": 18800
},
{
"epoch": 5.62,
"learning_rate": 0.0002257125344774747,
"loss": 0.9473,
"step": 18900
},
{
"epoch": 5.65,
"learning_rate": 0.00022418020226785168,
"loss": 0.9496,
"step": 19000
},
{
"epoch": 5.68,
"learning_rate": 0.00022264787005822861,
"loss": 0.9469,
"step": 19100
},
{
"epoch": 5.71,
"learning_rate": 0.00022111553784860558,
"loss": 0.9509,
"step": 19200
},
{
"epoch": 5.74,
"learning_rate": 0.00021958320563898254,
"loss": 0.9466,
"step": 19300
},
{
"epoch": 5.77,
"learning_rate": 0.0002180508734293595,
"loss": 0.9499,
"step": 19400
},
{
"epoch": 5.8,
"learning_rate": 0.00021651854121973644,
"loss": 0.9498,
"step": 19500
},
{
"epoch": 5.83,
"learning_rate": 0.0002149862090101134,
"loss": 0.9483,
"step": 19600
},
{
"epoch": 5.86,
"learning_rate": 0.00021345387680049037,
"loss": 0.9522,
"step": 19700
},
{
"epoch": 5.89,
"learning_rate": 0.00021192154459086728,
"loss": 0.9441,
"step": 19800
},
{
"epoch": 5.92,
"learning_rate": 0.00021038921238124425,
"loss": 0.9492,
"step": 19900
},
{
"epoch": 5.95,
"learning_rate": 0.0002088568801716212,
"loss": 0.9421,
"step": 20000
},
{
"epoch": 5.98,
"learning_rate": 0.00020732454796199817,
"loss": 0.9432,
"step": 20100
},
{
"epoch": 6.01,
"learning_rate": 0.0002057922157523751,
"loss": 0.9542,
"step": 20200
},
{
"epoch": 6.04,
"learning_rate": 0.00020425988354275207,
"loss": 0.9426,
"step": 20300
},
{
"epoch": 6.07,
"learning_rate": 0.00020272755133312904,
"loss": 0.9484,
"step": 20400
},
{
"epoch": 6.1,
"learning_rate": 0.00020119521912350598,
"loss": 0.9473,
"step": 20500
},
{
"epoch": 6.13,
"learning_rate": 0.00019966288691388294,
"loss": 0.9413,
"step": 20600
},
{
"epoch": 6.16,
"learning_rate": 0.0001981305547042599,
"loss": 0.9438,
"step": 20700
},
{
"epoch": 6.18,
"learning_rate": 0.00019659822249463684,
"loss": 0.9421,
"step": 20800
},
{
"epoch": 6.21,
"learning_rate": 0.00019506589028501378,
"loss": 0.9406,
"step": 20900
},
{
"epoch": 6.24,
"learning_rate": 0.00019353355807539074,
"loss": 0.9384,
"step": 21000
},
{
"epoch": 6.27,
"learning_rate": 0.0001920012258657677,
"loss": 0.9397,
"step": 21100
},
{
"epoch": 6.3,
"learning_rate": 0.00019046889365614464,
"loss": 0.9367,
"step": 21200
},
{
"epoch": 6.33,
"learning_rate": 0.0001889365614465216,
"loss": 0.9402,
"step": 21300
},
{
"epoch": 6.36,
"learning_rate": 0.00018740422923689857,
"loss": 0.9319,
"step": 21400
},
{
"epoch": 6.39,
"learning_rate": 0.00018587189702727554,
"loss": 0.9385,
"step": 21500
},
{
"epoch": 6.42,
"learning_rate": 0.00018433956481765247,
"loss": 0.939,
"step": 21600
},
{
"epoch": 6.45,
"learning_rate": 0.0001828072326080294,
"loss": 0.9399,
"step": 21700
},
{
"epoch": 6.48,
"learning_rate": 0.00018127490039840637,
"loss": 0.9407,
"step": 21800
},
{
"epoch": 6.51,
"learning_rate": 0.00017974256818878334,
"loss": 0.94,
"step": 21900
},
{
"epoch": 6.54,
"learning_rate": 0.00017821023597916027,
"loss": 0.9407,
"step": 22000
},
{
"epoch": 6.57,
"learning_rate": 0.00017667790376953724,
"loss": 0.9353,
"step": 22100
},
{
"epoch": 6.6,
"learning_rate": 0.0001751455715599142,
"loss": 0.9405,
"step": 22200
},
{
"epoch": 6.63,
"learning_rate": 0.00017361323935029114,
"loss": 0.9305,
"step": 22300
},
{
"epoch": 6.66,
"learning_rate": 0.0001720809071406681,
"loss": 0.938,
"step": 22400
},
{
"epoch": 6.69,
"learning_rate": 0.00017054857493104507,
"loss": 0.9311,
"step": 22500
},
{
"epoch": 6.72,
"learning_rate": 0.00016901624272142203,
"loss": 0.9343,
"step": 22600
},
{
"epoch": 6.75,
"learning_rate": 0.00016748391051179894,
"loss": 0.9312,
"step": 22700
},
{
"epoch": 6.78,
"learning_rate": 0.0001659515783021759,
"loss": 0.9353,
"step": 22800
},
{
"epoch": 6.81,
"learning_rate": 0.00016441924609255287,
"loss": 0.9341,
"step": 22900
},
{
"epoch": 6.84,
"learning_rate": 0.0001628869138829298,
"loss": 0.9338,
"step": 23000
},
{
"epoch": 6.87,
"learning_rate": 0.00016135458167330677,
"loss": 0.9318,
"step": 23100
},
{
"epoch": 6.9,
"learning_rate": 0.00015982224946368373,
"loss": 0.9309,
"step": 23200
},
{
"epoch": 6.93,
"learning_rate": 0.0001582899172540607,
"loss": 0.9291,
"step": 23300
},
{
"epoch": 6.96,
"learning_rate": 0.00015675758504443764,
"loss": 0.9307,
"step": 23400
},
{
"epoch": 6.99,
"learning_rate": 0.0001552252528348146,
"loss": 0.9325,
"step": 23500
},
{
"epoch": 7.02,
"learning_rate": 0.00015369292062519156,
"loss": 0.9363,
"step": 23600
},
{
"epoch": 7.05,
"learning_rate": 0.00015216058841556847,
"loss": 0.9325,
"step": 23700
},
{
"epoch": 7.08,
"learning_rate": 0.00015062825620594544,
"loss": 0.9276,
"step": 23800
},
{
"epoch": 7.11,
"learning_rate": 0.0001490959239963224,
"loss": 0.9328,
"step": 23900
},
{
"epoch": 7.14,
"learning_rate": 0.00014756359178669937,
"loss": 0.9304,
"step": 24000
},
{
"epoch": 7.17,
"learning_rate": 0.0001460312595770763,
"loss": 0.9274,
"step": 24100
},
{
"epoch": 7.2,
"learning_rate": 0.00014449892736745327,
"loss": 0.9261,
"step": 24200
},
{
"epoch": 7.23,
"learning_rate": 0.00014296659515783023,
"loss": 0.9245,
"step": 24300
},
{
"epoch": 7.26,
"learning_rate": 0.0001414342629482072,
"loss": 0.9233,
"step": 24400
},
{
"epoch": 7.29,
"learning_rate": 0.00013990193073858413,
"loss": 0.9275,
"step": 24500
},
{
"epoch": 7.31,
"learning_rate": 0.00013836959852896107,
"loss": 0.9265,
"step": 24600
},
{
"epoch": 7.34,
"learning_rate": 0.00013683726631933803,
"loss": 0.9276,
"step": 24700
},
{
"epoch": 7.37,
"learning_rate": 0.00013530493410971497,
"loss": 0.9252,
"step": 24800
},
{
"epoch": 7.4,
"learning_rate": 0.00013377260190009193,
"loss": 0.9224,
"step": 24900
},
{
"epoch": 7.43,
"learning_rate": 0.0001322402696904689,
"loss": 0.9216,
"step": 25000
},
{
"epoch": 7.46,
"learning_rate": 0.00013070793748084586,
"loss": 0.9233,
"step": 25100
},
{
"epoch": 7.49,
"learning_rate": 0.0001291756052712228,
"loss": 0.9275,
"step": 25200
},
{
"epoch": 7.52,
"learning_rate": 0.00012764327306159976,
"loss": 0.9229,
"step": 25300
},
{
"epoch": 7.55,
"learning_rate": 0.00012611094085197673,
"loss": 0.922,
"step": 25400
},
{
"epoch": 7.58,
"learning_rate": 0.00012457860864235367,
"loss": 0.9255,
"step": 25500
},
{
"epoch": 7.61,
"learning_rate": 0.0001230462764327306,
"loss": 0.9196,
"step": 25600
},
{
"epoch": 7.64,
"learning_rate": 0.00012151394422310758,
"loss": 0.9198,
"step": 25700
},
{
"epoch": 7.67,
"learning_rate": 0.00011998161201348452,
"loss": 0.9226,
"step": 25800
},
{
"epoch": 7.7,
"learning_rate": 0.00011844927980386148,
"loss": 0.9174,
"step": 25900
},
{
"epoch": 7.73,
"learning_rate": 0.00011691694759423843,
"loss": 0.9191,
"step": 26000
},
{
"epoch": 7.76,
"learning_rate": 0.0001153846153846154,
"loss": 0.9207,
"step": 26100
},
{
"epoch": 7.79,
"learning_rate": 0.00011385228317499235,
"loss": 0.9225,
"step": 26200
},
{
"epoch": 7.82,
"learning_rate": 0.00011231995096536928,
"loss": 0.9198,
"step": 26300
},
{
"epoch": 7.85,
"learning_rate": 0.00011078761875574625,
"loss": 0.9183,
"step": 26400
},
{
"epoch": 7.88,
"learning_rate": 0.0001092552865461232,
"loss": 0.919,
"step": 26500
},
{
"epoch": 7.91,
"learning_rate": 0.00010772295433650016,
"loss": 0.9193,
"step": 26600
},
{
"epoch": 7.94,
"learning_rate": 0.00010619062212687711,
"loss": 0.9205,
"step": 26700
},
{
"epoch": 7.97,
"learning_rate": 0.00010465828991725406,
"loss": 0.92,
"step": 26800
},
{
"epoch": 8.0,
"learning_rate": 0.00010312595770763101,
"loss": 0.9192,
"step": 26900
},
{
"epoch": 8.03,
"learning_rate": 0.00010159362549800798,
"loss": 0.9186,
"step": 27000
},
{
"epoch": 8.06,
"learning_rate": 0.00010006129328838493,
"loss": 0.9176,
"step": 27100
},
{
"epoch": 8.09,
"learning_rate": 9.852896107876188e-05,
"loss": 0.9136,
"step": 27200
},
{
"epoch": 8.12,
"learning_rate": 9.699662886913883e-05,
"loss": 0.9116,
"step": 27300
},
{
"epoch": 8.15,
"learning_rate": 9.546429665951578e-05,
"loss": 0.9174,
"step": 27400
},
{
"epoch": 8.18,
"learning_rate": 9.393196444989274e-05,
"loss": 0.9156,
"step": 27500
},
{
"epoch": 8.21,
"learning_rate": 9.23996322402697e-05,
"loss": 0.912,
"step": 27600
},
{
"epoch": 8.24,
"learning_rate": 9.086730003064666e-05,
"loss": 0.9142,
"step": 27700
},
{
"epoch": 8.27,
"learning_rate": 8.93349678210236e-05,
"loss": 0.9099,
"step": 27800
},
{
"epoch": 8.3,
"learning_rate": 8.780263561140055e-05,
"loss": 0.9129,
"step": 27900
},
{
"epoch": 8.33,
"learning_rate": 8.627030340177751e-05,
"loss": 0.9145,
"step": 28000
},
{
"epoch": 8.36,
"learning_rate": 8.473797119215446e-05,
"loss": 0.9117,
"step": 28100
},
{
"epoch": 8.39,
"learning_rate": 8.320563898253141e-05,
"loss": 0.9112,
"step": 28200
},
{
"epoch": 8.42,
"learning_rate": 8.167330677290836e-05,
"loss": 0.9128,
"step": 28300
},
{
"epoch": 8.44,
"learning_rate": 8.014097456328533e-05,
"loss": 0.9122,
"step": 28400
},
{
"epoch": 8.47,
"learning_rate": 7.860864235366228e-05,
"loss": 0.9113,
"step": 28500
},
{
"epoch": 8.5,
"learning_rate": 7.707631014403924e-05,
"loss": 0.9115,
"step": 28600
},
{
"epoch": 8.53,
"learning_rate": 7.554397793441618e-05,
"loss": 0.9098,
"step": 28700
},
{
"epoch": 8.56,
"learning_rate": 7.401164572479313e-05,
"loss": 0.9101,
"step": 28800
},
{
"epoch": 8.59,
"learning_rate": 7.247931351517009e-05,
"loss": 0.9063,
"step": 28900
},
{
"epoch": 8.62,
"learning_rate": 7.094698130554704e-05,
"loss": 0.913,
"step": 29000
},
{
"epoch": 8.65,
"learning_rate": 6.9414649095924e-05,
"loss": 0.9092,
"step": 29100
},
{
"epoch": 8.68,
"learning_rate": 6.788231688630094e-05,
"loss": 0.9101,
"step": 29200
},
{
"epoch": 8.71,
"learning_rate": 6.634998467667791e-05,
"loss": 0.9089,
"step": 29300
},
{
"epoch": 8.74,
"learning_rate": 6.481765246705486e-05,
"loss": 0.9108,
"step": 29400
},
{
"epoch": 8.77,
"learning_rate": 6.328532025743182e-05,
"loss": 0.9065,
"step": 29500
},
{
"epoch": 8.8,
"learning_rate": 6.175298804780877e-05,
"loss": 0.9129,
"step": 29600
},
{
"epoch": 8.83,
"learning_rate": 6.022065583818572e-05,
"loss": 0.9097,
"step": 29700
},
{
"epoch": 8.86,
"learning_rate": 5.8688323628562674e-05,
"loss": 0.9115,
"step": 29800
},
{
"epoch": 8.89,
"learning_rate": 5.715599141893963e-05,
"loss": 0.9088,
"step": 29900
},
{
"epoch": 8.92,
"learning_rate": 5.5623659209316575e-05,
"loss": 0.9112,
"step": 30000
},
{
"epoch": 8.95,
"learning_rate": 5.409132699969353e-05,
"loss": 0.9086,
"step": 30100
},
{
"epoch": 8.98,
"learning_rate": 5.255899479007049e-05,
"loss": 0.9106,
"step": 30200
},
{
"epoch": 9.01,
"learning_rate": 5.102666258044744e-05,
"loss": 0.9104,
"step": 30300
},
{
"epoch": 9.04,
"learning_rate": 4.94943303708244e-05,
"loss": 0.9037,
"step": 30400
},
{
"epoch": 9.07,
"learning_rate": 4.796199816120135e-05,
"loss": 0.9082,
"step": 30500
},
{
"epoch": 9.1,
"learning_rate": 4.6429665951578305e-05,
"loss": 0.9041,
"step": 30600
},
{
"epoch": 9.13,
"learning_rate": 4.489733374195526e-05,
"loss": 0.9025,
"step": 30700
},
{
"epoch": 9.16,
"learning_rate": 4.3365001532332206e-05,
"loss": 0.9006,
"step": 30800
},
{
"epoch": 9.19,
"learning_rate": 4.1832669322709164e-05,
"loss": 0.9072,
"step": 30900
},
{
"epoch": 9.22,
"learning_rate": 4.0300337113086114e-05,
"loss": 0.9038,
"step": 31000
},
{
"epoch": 9.25,
"learning_rate": 3.876800490346307e-05,
"loss": 0.9072,
"step": 31100
},
{
"epoch": 9.28,
"learning_rate": 3.723567269384002e-05,
"loss": 0.9017,
"step": 31200
},
{
"epoch": 9.31,
"learning_rate": 3.570334048421698e-05,
"loss": 0.9032,
"step": 31300
},
{
"epoch": 9.34,
"learning_rate": 3.4171008274593937e-05,
"loss": 0.9026,
"step": 31400
},
{
"epoch": 9.37,
"learning_rate": 3.263867606497089e-05,
"loss": 0.9008,
"step": 31500
},
{
"epoch": 9.4,
"learning_rate": 3.110634385534784e-05,
"loss": 0.9065,
"step": 31600
},
{
"epoch": 9.43,
"learning_rate": 2.9574011645724795e-05,
"loss": 0.9026,
"step": 31700
},
{
"epoch": 9.46,
"learning_rate": 2.804167943610175e-05,
"loss": 0.9011,
"step": 31800
},
{
"epoch": 9.49,
"learning_rate": 2.65093472264787e-05,
"loss": 0.9023,
"step": 31900
},
{
"epoch": 9.52,
"learning_rate": 2.4977015016855653e-05,
"loss": 0.9007,
"step": 32000
}
],
"max_steps": 33630,
"num_train_epochs": 10,
"total_flos": 8.418135066885916e+17,
"trial_name": null,
"trial_params": null
}