|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.515237104206927, |
|
"global_step": 32000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 5e-05, |
|
"loss": 3.5407, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001, |
|
"loss": 3.2075, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00015, |
|
"loss": 3.0286, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002, |
|
"loss": 2.8212, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025, |
|
"loss": 2.3586, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0003, |
|
"loss": 1.676, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00035, |
|
"loss": 1.3696, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0004, |
|
"loss": 1.2677, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00045000000000000004, |
|
"loss": 1.2271, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0005, |
|
"loss": 1.2006, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.000498467667790377, |
|
"loss": 1.1846, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0004969353355807539, |
|
"loss": 1.1663, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0004954030033711309, |
|
"loss": 1.1429, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0004938706711615078, |
|
"loss": 1.1384, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0004923383389518848, |
|
"loss": 1.1353, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0004908060067422617, |
|
"loss": 1.1384, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0004892736745326388, |
|
"loss": 1.1461, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00048774134232301567, |
|
"loss": 1.1333, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0004862090101133926, |
|
"loss": 1.1205, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00048467667790376954, |
|
"loss": 1.1141, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0004831443456941465, |
|
"loss": 1.1078, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0004816120134845234, |
|
"loss": 1.1006, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00048007968127490044, |
|
"loss": 1.0978, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00047854734906527735, |
|
"loss": 1.0894, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0004770150168556543, |
|
"loss": 1.0861, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0004754826846460313, |
|
"loss": 1.083, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00047395035243640824, |
|
"loss": 1.0758, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00047241802022678515, |
|
"loss": 1.076, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00047088568801716217, |
|
"loss": 1.0794, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0004693533558075391, |
|
"loss": 1.0706, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00046782102359791604, |
|
"loss": 1.0725, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.000466288691388293, |
|
"loss": 1.069, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0004647563591786699, |
|
"loss": 1.0674, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00046322402696904693, |
|
"loss": 1.066, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00046169169475942384, |
|
"loss": 1.0569, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0004601593625498008, |
|
"loss": 1.0579, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00045862703034017777, |
|
"loss": 1.0615, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00045709469813055473, |
|
"loss": 1.055, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00045556236592093164, |
|
"loss": 1.0583, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0004540300337113086, |
|
"loss": 1.0537, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00045249770150168557, |
|
"loss": 1.0531, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00045096536929206254, |
|
"loss": 1.0507, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0004494330370824395, |
|
"loss": 1.0449, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0004479007048728164, |
|
"loss": 1.0463, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00044636837266319343, |
|
"loss": 1.0495, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00044483604045357034, |
|
"loss": 1.0489, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0004433037082439473, |
|
"loss": 1.043, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00044177137603432427, |
|
"loss": 1.0404, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0004402390438247012, |
|
"loss": 1.0448, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00043870671161507814, |
|
"loss": 1.0378, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0004371743794054551, |
|
"loss": 1.0359, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00043564204719583207, |
|
"loss": 1.0419, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.000434109714986209, |
|
"loss": 1.0332, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.000432577382776586, |
|
"loss": 1.0382, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0004310450505669629, |
|
"loss": 1.0312, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0004295127183573399, |
|
"loss": 1.0377, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00042798038614771683, |
|
"loss": 1.0296, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00042644805393809374, |
|
"loss": 1.0316, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00042491572172847076, |
|
"loss": 1.0322, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.0004233833895188477, |
|
"loss": 1.0325, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00042185105730922464, |
|
"loss": 1.0307, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0004203187250996016, |
|
"loss": 1.0297, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00041878639288997857, |
|
"loss": 1.031, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0004172540606803555, |
|
"loss": 1.0304, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0004157217284707325, |
|
"loss": 1.0278, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.0004141893962611094, |
|
"loss": 1.0211, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0004126570640514864, |
|
"loss": 1.0248, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00041112473184186333, |
|
"loss": 1.0319, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00040959239963224024, |
|
"loss": 1.0301, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00040806006742261726, |
|
"loss": 1.0295, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00040652773521299417, |
|
"loss": 1.0247, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.00040499540300337113, |
|
"loss": 1.0205, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0004034630707937481, |
|
"loss": 1.0221, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00040193073858412506, |
|
"loss": 1.0251, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.00040039840637450197, |
|
"loss": 1.0164, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.000398866074164879, |
|
"loss": 1.019, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.0003973337419552559, |
|
"loss": 1.0167, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0003958014097456328, |
|
"loss": 1.0202, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.00039426907753600983, |
|
"loss": 1.0183, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00039273674532638674, |
|
"loss": 1.0234, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.00039120441311676376, |
|
"loss": 1.0103, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00038967208090714067, |
|
"loss": 1.0196, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.00038813974869751763, |
|
"loss": 1.0147, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.0003866074164878946, |
|
"loss": 1.0138, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.00038507508427827156, |
|
"loss": 1.0151, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.00038354275206864847, |
|
"loss": 1.0118, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.0003820104198590255, |
|
"loss": 1.014, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0003804780876494024, |
|
"loss": 1.0096, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.0003789457554397793, |
|
"loss": 1.0092, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.0003774134232301563, |
|
"loss": 1.0096, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.00037588109102053323, |
|
"loss": 1.0148, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.00037434875881091025, |
|
"loss": 1.0102, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.00037281642660128716, |
|
"loss": 1.0095, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0003712840943916641, |
|
"loss": 1.0099, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.0003697517621820411, |
|
"loss": 1.0083, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.00036821942997241805, |
|
"loss": 1.0093, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.00036668709776279496, |
|
"loss": 1.0023, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.00036515476555317193, |
|
"loss": 1.0058, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.0003636224333435489, |
|
"loss": 1.0088, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.0003620901011339258, |
|
"loss": 1.0046, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0003605577689243028, |
|
"loss": 1.0142, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.00035902543671467973, |
|
"loss": 1.0031, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.0003574931045050567, |
|
"loss": 1.006, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.00035596077229543366, |
|
"loss": 1.0019, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.0003544284400858106, |
|
"loss": 1.0023, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.0003528961078761876, |
|
"loss": 0.9993, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.0003513637756665645, |
|
"loss": 0.9987, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.00034983144345694146, |
|
"loss": 0.9987, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.0003482991112473184, |
|
"loss": 1.005, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 0.0003467667790376954, |
|
"loss": 0.9966, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0003452344468280723, |
|
"loss": 0.9986, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 0.0003437021146184493, |
|
"loss": 0.9973, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 0.00034216978240882623, |
|
"loss": 1.0011, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 0.0003406374501992032, |
|
"loss": 0.9944, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 0.00033910511798958016, |
|
"loss": 0.996, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 0.00033757278577995707, |
|
"loss": 0.9976, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 0.0003360404535703341, |
|
"loss": 0.9931, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.000334508121360711, |
|
"loss": 0.9921, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 0.00033297578915108796, |
|
"loss": 0.9911, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 0.0003314434569414649, |
|
"loss": 0.9916, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.0003299111247318419, |
|
"loss": 0.9921, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 0.0003283787925222188, |
|
"loss": 0.991, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.0003268464603125958, |
|
"loss": 0.9971, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.0003253141281029727, |
|
"loss": 0.995, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 0.0003237817958933497, |
|
"loss": 0.9891, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.00032224946368372665, |
|
"loss": 0.9907, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.00032071713147410356, |
|
"loss": 0.9912, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.0003191847992644805, |
|
"loss": 0.9873, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.0003176524670548575, |
|
"loss": 0.9868, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 0.00031612013484523445, |
|
"loss": 0.9845, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.0003145878026356114, |
|
"loss": 0.9836, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 0.0003130554704259884, |
|
"loss": 0.986, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.0003115231382163653, |
|
"loss": 0.9902, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 0.0003099908060067423, |
|
"loss": 0.983, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 0.0003084584737971192, |
|
"loss": 0.9872, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 0.00030692614158749613, |
|
"loss": 0.9844, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.00030539380937787315, |
|
"loss": 0.9867, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.00030386147716825006, |
|
"loss": 0.9821, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 0.000302329144958627, |
|
"loss": 0.9809, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 0.000300796812749004, |
|
"loss": 0.984, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 0.00029926448053938095, |
|
"loss": 0.9767, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.0002977321483297579, |
|
"loss": 0.9819, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.0002961998161201349, |
|
"loss": 0.9811, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 0.0002946674839105118, |
|
"loss": 0.9791, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.00029313515170088875, |
|
"loss": 0.9783, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 0.0002916028194912657, |
|
"loss": 0.9878, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 0.00029007048728164263, |
|
"loss": 0.975, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.00028853815507201965, |
|
"loss": 0.9775, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 0.00028700582286239656, |
|
"loss": 0.9775, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 0.0002854734906527735, |
|
"loss": 0.9786, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.0002839411584431505, |
|
"loss": 0.9753, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 0.00028240882623352745, |
|
"loss": 0.9841, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 0.00028087649402390436, |
|
"loss": 0.9716, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.0002793441618142814, |
|
"loss": 0.9774, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 0.0002778118296046583, |
|
"loss": 0.9723, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 0.00027627949739503525, |
|
"loss": 0.9702, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 0.0002747471651854122, |
|
"loss": 0.9766, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.0002732148329757891, |
|
"loss": 0.9843, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 0.00027168250076616614, |
|
"loss": 0.9701, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 0.00027015016855654305, |
|
"loss": 0.9715, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 0.00026861783634692, |
|
"loss": 0.9695, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 0.000267085504137297, |
|
"loss": 0.9699, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 0.00026555317192767394, |
|
"loss": 0.9665, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 0.00026402083971805085, |
|
"loss": 0.9681, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 0.0002624885075084278, |
|
"loss": 0.9697, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 0.0002609561752988048, |
|
"loss": 0.9662, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 0.00025942384308918175, |
|
"loss": 0.965, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0002578915108795587, |
|
"loss": 0.9655, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 0.0002563591786699356, |
|
"loss": 0.9689, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 0.00025482684646031264, |
|
"loss": 0.9641, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.00025329451425068955, |
|
"loss": 0.9612, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 0.0002517621820410665, |
|
"loss": 0.9667, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 0.0002502298498314435, |
|
"loss": 0.9623, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 0.0002486975176218204, |
|
"loss": 0.9611, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.00024716518541219735, |
|
"loss": 0.956, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 0.0002456328532025743, |
|
"loss": 0.9623, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 0.00024410052099295128, |
|
"loss": 0.9577, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 0.00024256818878332824, |
|
"loss": 0.9584, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 0.00024103585657370518, |
|
"loss": 0.9595, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 0.00023950352436408212, |
|
"loss": 0.954, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.00023797119215445908, |
|
"loss": 0.958, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 0.00023643885994483605, |
|
"loss": 0.9575, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 0.000234906527735213, |
|
"loss": 0.9499, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 0.00023337419552558995, |
|
"loss": 0.9583, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 0.0002318418633159669, |
|
"loss": 0.9547, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.00023030953110634387, |
|
"loss": 0.9531, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 0.0002287771988967208, |
|
"loss": 0.9566, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.00022724486668709778, |
|
"loss": 0.9519, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 0.0002257125344774747, |
|
"loss": 0.9473, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 0.00022418020226785168, |
|
"loss": 0.9496, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 0.00022264787005822861, |
|
"loss": 0.9469, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.00022111553784860558, |
|
"loss": 0.9509, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.00021958320563898254, |
|
"loss": 0.9466, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 0.0002180508734293595, |
|
"loss": 0.9499, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.00021651854121973644, |
|
"loss": 0.9498, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 0.0002149862090101134, |
|
"loss": 0.9483, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 0.00021345387680049037, |
|
"loss": 0.9522, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.00021192154459086728, |
|
"loss": 0.9441, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 0.00021038921238124425, |
|
"loss": 0.9492, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 0.0002088568801716212, |
|
"loss": 0.9421, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.00020732454796199817, |
|
"loss": 0.9432, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 0.0002057922157523751, |
|
"loss": 0.9542, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 0.00020425988354275207, |
|
"loss": 0.9426, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.00020272755133312904, |
|
"loss": 0.9484, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.00020119521912350598, |
|
"loss": 0.9473, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 0.00019966288691388294, |
|
"loss": 0.9413, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.0001981305547042599, |
|
"loss": 0.9438, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 0.00019659822249463684, |
|
"loss": 0.9421, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 0.00019506589028501378, |
|
"loss": 0.9406, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 0.00019353355807539074, |
|
"loss": 0.9384, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 0.0001920012258657677, |
|
"loss": 0.9397, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.00019046889365614464, |
|
"loss": 0.9367, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 0.0001889365614465216, |
|
"loss": 0.9402, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 0.00018740422923689857, |
|
"loss": 0.9319, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 0.00018587189702727554, |
|
"loss": 0.9385, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 0.00018433956481765247, |
|
"loss": 0.939, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.0001828072326080294, |
|
"loss": 0.9399, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 0.00018127490039840637, |
|
"loss": 0.9407, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 0.00017974256818878334, |
|
"loss": 0.94, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 0.00017821023597916027, |
|
"loss": 0.9407, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 0.00017667790376953724, |
|
"loss": 0.9353, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.0001751455715599142, |
|
"loss": 0.9405, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.00017361323935029114, |
|
"loss": 0.9305, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 0.0001720809071406681, |
|
"loss": 0.938, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 0.00017054857493104507, |
|
"loss": 0.9311, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 6.72, |
|
"learning_rate": 0.00016901624272142203, |
|
"loss": 0.9343, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 0.00016748391051179894, |
|
"loss": 0.9312, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 0.0001659515783021759, |
|
"loss": 0.9353, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 0.00016441924609255287, |
|
"loss": 0.9341, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.0001628869138829298, |
|
"loss": 0.9338, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 0.00016135458167330677, |
|
"loss": 0.9318, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 0.00015982224946368373, |
|
"loss": 0.9309, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 0.0001582899172540607, |
|
"loss": 0.9291, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 0.00015675758504443764, |
|
"loss": 0.9307, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 0.0001552252528348146, |
|
"loss": 0.9325, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 0.00015369292062519156, |
|
"loss": 0.9363, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.00015216058841556847, |
|
"loss": 0.9325, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 0.00015062825620594544, |
|
"loss": 0.9276, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 0.0001490959239963224, |
|
"loss": 0.9328, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 0.00014756359178669937, |
|
"loss": 0.9304, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 0.0001460312595770763, |
|
"loss": 0.9274, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.00014449892736745327, |
|
"loss": 0.9261, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 0.00014296659515783023, |
|
"loss": 0.9245, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 0.0001414342629482072, |
|
"loss": 0.9233, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 0.00013990193073858413, |
|
"loss": 0.9275, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"learning_rate": 0.00013836959852896107, |
|
"loss": 0.9265, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 0.00013683726631933803, |
|
"loss": 0.9276, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.00013530493410971497, |
|
"loss": 0.9252, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 0.00013377260190009193, |
|
"loss": 0.9224, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 0.0001322402696904689, |
|
"loss": 0.9216, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 0.00013070793748084586, |
|
"loss": 0.9233, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 0.0001291756052712228, |
|
"loss": 0.9275, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 0.00012764327306159976, |
|
"loss": 0.9229, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.00012611094085197673, |
|
"loss": 0.922, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 0.00012457860864235367, |
|
"loss": 0.9255, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 0.0001230462764327306, |
|
"loss": 0.9196, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 0.00012151394422310758, |
|
"loss": 0.9198, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"learning_rate": 0.00011998161201348452, |
|
"loss": 0.9226, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 0.00011844927980386148, |
|
"loss": 0.9174, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 7.73, |
|
"learning_rate": 0.00011691694759423843, |
|
"loss": 0.9191, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 0.0001153846153846154, |
|
"loss": 0.9207, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 0.00011385228317499235, |
|
"loss": 0.9225, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 0.00011231995096536928, |
|
"loss": 0.9198, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 0.00011078761875574625, |
|
"loss": 0.9183, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 0.0001092552865461232, |
|
"loss": 0.919, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 0.00010772295433650016, |
|
"loss": 0.9193, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 0.00010619062212687711, |
|
"loss": 0.9205, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 0.00010465828991725406, |
|
"loss": 0.92, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.00010312595770763101, |
|
"loss": 0.9192, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 0.00010159362549800798, |
|
"loss": 0.9186, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 0.00010006129328838493, |
|
"loss": 0.9176, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 9.852896107876188e-05, |
|
"loss": 0.9136, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 9.699662886913883e-05, |
|
"loss": 0.9116, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 9.546429665951578e-05, |
|
"loss": 0.9174, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 9.393196444989274e-05, |
|
"loss": 0.9156, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 9.23996322402697e-05, |
|
"loss": 0.912, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 9.086730003064666e-05, |
|
"loss": 0.9142, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 8.93349678210236e-05, |
|
"loss": 0.9099, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 8.780263561140055e-05, |
|
"loss": 0.9129, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 8.627030340177751e-05, |
|
"loss": 0.9145, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 8.473797119215446e-05, |
|
"loss": 0.9117, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 8.320563898253141e-05, |
|
"loss": 0.9112, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 8.167330677290836e-05, |
|
"loss": 0.9128, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 8.014097456328533e-05, |
|
"loss": 0.9122, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 7.860864235366228e-05, |
|
"loss": 0.9113, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 7.707631014403924e-05, |
|
"loss": 0.9115, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 7.554397793441618e-05, |
|
"loss": 0.9098, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 7.401164572479313e-05, |
|
"loss": 0.9101, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 7.247931351517009e-05, |
|
"loss": 0.9063, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 7.094698130554704e-05, |
|
"loss": 0.913, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 6.9414649095924e-05, |
|
"loss": 0.9092, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 6.788231688630094e-05, |
|
"loss": 0.9101, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 6.634998467667791e-05, |
|
"loss": 0.9089, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"learning_rate": 6.481765246705486e-05, |
|
"loss": 0.9108, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 6.328532025743182e-05, |
|
"loss": 0.9065, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 6.175298804780877e-05, |
|
"loss": 0.9129, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 6.022065583818572e-05, |
|
"loss": 0.9097, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 5.8688323628562674e-05, |
|
"loss": 0.9115, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 5.715599141893963e-05, |
|
"loss": 0.9088, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 5.5623659209316575e-05, |
|
"loss": 0.9112, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 5.409132699969353e-05, |
|
"loss": 0.9086, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 8.98, |
|
"learning_rate": 5.255899479007049e-05, |
|
"loss": 0.9106, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 5.102666258044744e-05, |
|
"loss": 0.9104, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 9.04, |
|
"learning_rate": 4.94943303708244e-05, |
|
"loss": 0.9037, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 4.796199816120135e-05, |
|
"loss": 0.9082, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 4.6429665951578305e-05, |
|
"loss": 0.9041, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 4.489733374195526e-05, |
|
"loss": 0.9025, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 4.3365001532332206e-05, |
|
"loss": 0.9006, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 4.1832669322709164e-05, |
|
"loss": 0.9072, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 4.0300337113086114e-05, |
|
"loss": 0.9038, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 3.876800490346307e-05, |
|
"loss": 0.9072, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 3.723567269384002e-05, |
|
"loss": 0.9017, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 3.570334048421698e-05, |
|
"loss": 0.9032, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 3.4171008274593937e-05, |
|
"loss": 0.9026, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 3.263867606497089e-05, |
|
"loss": 0.9008, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 3.110634385534784e-05, |
|
"loss": 0.9065, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 2.9574011645724795e-05, |
|
"loss": 0.9026, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 2.804167943610175e-05, |
|
"loss": 0.9011, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 2.65093472264787e-05, |
|
"loss": 0.9023, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 2.4977015016855653e-05, |
|
"loss": 0.9007, |
|
"step": 32000 |
|
} |
|
], |
|
"max_steps": 33630, |
|
"num_train_epochs": 10, |
|
"total_flos": 8.418135066885916e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|