|
{ |
|
"best_metric": 1.3685582876205444, |
|
"best_model_checkpoint": "clinic_albert_v3/checkpoint-104500", |
|
"epoch": 3.0, |
|
"global_step": 112329, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.4500000000000003e-06, |
|
"loss": 6.2872, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 3.8396031856536865, |
|
"eval_runtime": 92.2647, |
|
"eval_samples_per_second": 161.806, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.950000000000001e-06, |
|
"loss": 3.6553, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 3.5483450889587402, |
|
"eval_runtime": 91.9838, |
|
"eval_samples_per_second": 162.3, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 7.45e-06, |
|
"loss": 3.4131, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 3.3662946224212646, |
|
"eval_runtime": 91.5787, |
|
"eval_samples_per_second": 163.018, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.950000000000001e-06, |
|
"loss": 3.2051, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 3.199434995651245, |
|
"eval_runtime": 91.5904, |
|
"eval_samples_per_second": 162.997, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.2450000000000001e-05, |
|
"loss": 3.1553, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 3.091717004776001, |
|
"eval_runtime": 91.2768, |
|
"eval_samples_per_second": 163.557, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.4950000000000001e-05, |
|
"loss": 3.0703, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 2.9730803966522217, |
|
"eval_runtime": 91.2688, |
|
"eval_samples_per_second": 163.572, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.745e-05, |
|
"loss": 2.9258, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 2.930089235305786, |
|
"eval_runtime": 91.4358, |
|
"eval_samples_per_second": 163.273, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.995e-05, |
|
"loss": 2.8627, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 2.842494249343872, |
|
"eval_runtime": 91.5055, |
|
"eval_samples_per_second": 163.149, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.245e-05, |
|
"loss": 2.797, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 2.8484582901000977, |
|
"eval_runtime": 91.4864, |
|
"eval_samples_per_second": 163.183, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.495e-05, |
|
"loss": 2.7408, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.784637928009033, |
|
"eval_runtime": 91.3143, |
|
"eval_samples_per_second": 163.49, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.7450000000000003e-05, |
|
"loss": 2.7552, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.731156349182129, |
|
"eval_runtime": 91.2004, |
|
"eval_samples_per_second": 163.694, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.995e-05, |
|
"loss": 2.7098, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 2.677649736404419, |
|
"eval_runtime": 91.2243, |
|
"eval_samples_per_second": 163.652, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.245e-05, |
|
"loss": 2.7015, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.7468419075012207, |
|
"eval_runtime": 91.3379, |
|
"eval_samples_per_second": 163.448, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.495e-05, |
|
"loss": 2.6226, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 2.6828126907348633, |
|
"eval_runtime": 91.2062, |
|
"eval_samples_per_second": 163.684, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.745e-05, |
|
"loss": 2.6738, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.622964382171631, |
|
"eval_runtime": 91.2339, |
|
"eval_samples_per_second": 163.634, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.995e-05, |
|
"loss": 2.6507, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 2.6091177463531494, |
|
"eval_runtime": 91.1569, |
|
"eval_samples_per_second": 163.773, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.245e-05, |
|
"loss": 2.6198, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 2.598926067352295, |
|
"eval_runtime": 91.2361, |
|
"eval_samples_per_second": 163.63, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.495e-05, |
|
"loss": 2.5921, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 2.59818959236145, |
|
"eval_runtime": 91.1835, |
|
"eval_samples_per_second": 163.725, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.745e-05, |
|
"loss": 2.5966, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 2.5808210372924805, |
|
"eval_runtime": 91.3208, |
|
"eval_samples_per_second": 163.479, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.995e-05, |
|
"loss": 2.5369, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 2.5975215435028076, |
|
"eval_runtime": 91.1933, |
|
"eval_samples_per_second": 163.707, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.9760576180750326e-05, |
|
"loss": 2.5351, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 2.507310152053833, |
|
"eval_runtime": 91.2579, |
|
"eval_samples_per_second": 163.591, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.9516754781147087e-05, |
|
"loss": 2.469, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 2.5691452026367188, |
|
"eval_runtime": 91.4338, |
|
"eval_samples_per_second": 163.277, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.927244476150456e-05, |
|
"loss": 2.5135, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 2.534702777862549, |
|
"eval_runtime": 91.54, |
|
"eval_samples_per_second": 163.087, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.9028134741862035e-05, |
|
"loss": 2.4175, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 2.506258726119995, |
|
"eval_runtime": 91.2029, |
|
"eval_samples_per_second": 163.69, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.878382472221951e-05, |
|
"loss": 2.4601, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 2.417022228240967, |
|
"eval_runtime": 91.3123, |
|
"eval_samples_per_second": 163.494, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.8539514702576984e-05, |
|
"loss": 2.3867, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 2.41973614692688, |
|
"eval_runtime": 91.3867, |
|
"eval_samples_per_second": 163.361, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.829520468293446e-05, |
|
"loss": 2.3704, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.3954126834869385, |
|
"eval_runtime": 91.334, |
|
"eval_samples_per_second": 163.455, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.805089466329193e-05, |
|
"loss": 2.3152, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 2.3748397827148438, |
|
"eval_runtime": 91.307, |
|
"eval_samples_per_second": 163.503, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.7806584643649414e-05, |
|
"loss": 2.3504, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 2.3295907974243164, |
|
"eval_runtime": 91.2388, |
|
"eval_samples_per_second": 163.626, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.756227462400688e-05, |
|
"loss": 2.325, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 2.315340518951416, |
|
"eval_runtime": 91.4211, |
|
"eval_samples_per_second": 163.299, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.7317964604364355e-05, |
|
"loss": 2.2768, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 2.323646306991577, |
|
"eval_runtime": 91.2326, |
|
"eval_samples_per_second": 163.637, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.707365458472183e-05, |
|
"loss": 2.2752, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 2.286576747894287, |
|
"eval_runtime": 91.2898, |
|
"eval_samples_per_second": 163.534, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.6829344565079304e-05, |
|
"loss": 2.2439, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 2.2795145511627197, |
|
"eval_runtime": 91.3133, |
|
"eval_samples_per_second": 163.492, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.6585523165476065e-05, |
|
"loss": 2.2255, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 2.226459503173828, |
|
"eval_runtime": 91.4081, |
|
"eval_samples_per_second": 163.323, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.634121314583354e-05, |
|
"loss": 2.177, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 2.252764940261841, |
|
"eval_runtime": 91.6167, |
|
"eval_samples_per_second": 162.951, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.60973917462303e-05, |
|
"loss": 2.2602, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 2.2427968978881836, |
|
"eval_runtime": 91.3698, |
|
"eval_samples_per_second": 163.391, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.585405896666634e-05, |
|
"loss": 2.2382, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 2.175690174102783, |
|
"eval_runtime": 91.3659, |
|
"eval_samples_per_second": 163.398, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.5609748947023814e-05, |
|
"loss": 2.1481, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 2.1878654956817627, |
|
"eval_runtime": 91.3436, |
|
"eval_samples_per_second": 163.438, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.5365438927381295e-05, |
|
"loss": 2.0507, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 2.210096597671509, |
|
"eval_runtime": 91.3869, |
|
"eval_samples_per_second": 163.36, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.512112890773877e-05, |
|
"loss": 2.1267, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 2.1687960624694824, |
|
"eval_runtime": 91.1987, |
|
"eval_samples_per_second": 163.697, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.487681888809624e-05, |
|
"loss": 2.1605, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 2.1389825344085693, |
|
"eval_runtime": 91.269, |
|
"eval_samples_per_second": 163.571, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.463250886845372e-05, |
|
"loss": 2.0474, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 2.107516288757324, |
|
"eval_runtime": 91.2438, |
|
"eval_samples_per_second": 163.617, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.438819884881119e-05, |
|
"loss": 2.1403, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 2.117351531982422, |
|
"eval_runtime": 91.3836, |
|
"eval_samples_per_second": 163.366, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.414388882916866e-05, |
|
"loss": 2.1505, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 2.103529214859009, |
|
"eval_runtime": 91.397, |
|
"eval_samples_per_second": 163.342, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.3899578809526134e-05, |
|
"loss": 2.0463, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 2.076082229614258, |
|
"eval_runtime": 91.3284, |
|
"eval_samples_per_second": 163.465, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.3655268789883615e-05, |
|
"loss": 2.0312, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 2.098130702972412, |
|
"eval_runtime": 91.4521, |
|
"eval_samples_per_second": 163.244, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.341095877024109e-05, |
|
"loss": 2.0562, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 2.0372567176818848, |
|
"eval_runtime": 91.2845, |
|
"eval_samples_per_second": 163.544, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.316664875059856e-05, |
|
"loss": 2.0185, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 2.0775225162506104, |
|
"eval_runtime": 91.1456, |
|
"eval_samples_per_second": 163.793, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.292233873095604e-05, |
|
"loss": 2.012, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 2.040658712387085, |
|
"eval_runtime": 91.1946, |
|
"eval_samples_per_second": 163.705, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.267802871131351e-05, |
|
"loss": 2.0448, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 2.0319766998291016, |
|
"eval_runtime": 91.2535, |
|
"eval_samples_per_second": 163.599, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.2433718691670987e-05, |
|
"loss": 1.9511, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 2.034172534942627, |
|
"eval_runtime": 91.3903, |
|
"eval_samples_per_second": 163.354, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.218940867202846e-05, |
|
"loss": 1.9424, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 2.0328168869018555, |
|
"eval_runtime": 91.2635, |
|
"eval_samples_per_second": 163.581, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.1945098652385935e-05, |
|
"loss": 1.9533, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.9927563667297363, |
|
"eval_runtime": 91.4616, |
|
"eval_samples_per_second": 163.227, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.170078863274341e-05, |
|
"loss": 1.9538, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 2.0190131664276123, |
|
"eval_runtime": 91.2642, |
|
"eval_samples_per_second": 163.58, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.145647861310088e-05, |
|
"loss": 1.9778, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 2.007988929748535, |
|
"eval_runtime": 91.4801, |
|
"eval_samples_per_second": 163.194, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.121216859345836e-05, |
|
"loss": 1.9567, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 2.0140700340270996, |
|
"eval_runtime": 91.3767, |
|
"eval_samples_per_second": 163.379, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.096834719385512e-05, |
|
"loss": 1.9589, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.9930866956710815, |
|
"eval_runtime": 91.1168, |
|
"eval_samples_per_second": 163.845, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.072403717421259e-05, |
|
"loss": 1.9605, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.9426169395446777, |
|
"eval_runtime": 91.226, |
|
"eval_samples_per_second": 163.648, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.047972715457006e-05, |
|
"loss": 1.9552, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 1.9635326862335205, |
|
"eval_runtime": 91.3172, |
|
"eval_samples_per_second": 163.485, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.023590575496682e-05, |
|
"loss": 1.9112, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.9517323970794678, |
|
"eval_runtime": 91.4036, |
|
"eval_samples_per_second": 163.331, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.99915957353243e-05, |
|
"loss": 1.8632, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 1.945887565612793, |
|
"eval_runtime": 91.1301, |
|
"eval_samples_per_second": 163.821, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.9747285715681777e-05, |
|
"loss": 1.8776, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 1.9599124193191528, |
|
"eval_runtime": 91.3256, |
|
"eval_samples_per_second": 163.47, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.9502975696039244e-05, |
|
"loss": 1.8564, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 1.93245530128479, |
|
"eval_runtime": 91.4657, |
|
"eval_samples_per_second": 163.22, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.9259154296436005e-05, |
|
"loss": 1.8988, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 1.9134759902954102, |
|
"eval_runtime": 91.4228, |
|
"eval_samples_per_second": 163.296, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9014844276793486e-05, |
|
"loss": 1.8924, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.9036946296691895, |
|
"eval_runtime": 91.2342, |
|
"eval_samples_per_second": 163.634, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.8771022877190246e-05, |
|
"loss": 1.8543, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 1.8894397020339966, |
|
"eval_runtime": 91.3605, |
|
"eval_samples_per_second": 163.408, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.8526712857547714e-05, |
|
"loss": 1.8546, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 1.8986772298812866, |
|
"eval_runtime": 91.4907, |
|
"eval_samples_per_second": 163.175, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.828240283790519e-05, |
|
"loss": 1.7711, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 1.9133729934692383, |
|
"eval_runtime": 91.4658, |
|
"eval_samples_per_second": 163.219, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.803809281826267e-05, |
|
"loss": 1.8173, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 1.8876162767410278, |
|
"eval_runtime": 91.3085, |
|
"eval_samples_per_second": 163.501, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.779378279862014e-05, |
|
"loss": 1.8667, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 1.8843164443969727, |
|
"eval_runtime": 91.1747, |
|
"eval_samples_per_second": 163.741, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.75499613990169e-05, |
|
"loss": 1.7971, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.8736356496810913, |
|
"eval_runtime": 91.3988, |
|
"eval_samples_per_second": 163.339, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.730565137937437e-05, |
|
"loss": 1.8363, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 1.8734278678894043, |
|
"eval_runtime": 91.4136, |
|
"eval_samples_per_second": 163.313, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.7061341359731846e-05, |
|
"loss": 1.9108, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 1.8428773880004883, |
|
"eval_runtime": 91.3304, |
|
"eval_samples_per_second": 163.461, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.681703134008932e-05, |
|
"loss": 1.7966, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 1.863236427307129, |
|
"eval_runtime": 91.3214, |
|
"eval_samples_per_second": 163.478, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.6572721320446795e-05, |
|
"loss": 1.8191, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.826414704322815, |
|
"eval_runtime": 91.3739, |
|
"eval_samples_per_second": 163.384, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.632841130080427e-05, |
|
"loss": 1.7991, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 1.831282615661621, |
|
"eval_runtime": 91.5324, |
|
"eval_samples_per_second": 163.101, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.608410128116175e-05, |
|
"loss": 1.7797, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 1.8110054731369019, |
|
"eval_runtime": 91.6401, |
|
"eval_samples_per_second": 162.909, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.583979126151922e-05, |
|
"loss": 1.7492, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 1.827107548713684, |
|
"eval_runtime": 91.1498, |
|
"eval_samples_per_second": 163.785, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.559596986191598e-05, |
|
"loss": 1.7541, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 1.8055846691131592, |
|
"eval_runtime": 91.4418, |
|
"eval_samples_per_second": 163.262, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.535165984227345e-05, |
|
"loss": 1.7882, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 1.8122904300689697, |
|
"eval_runtime": 91.6628, |
|
"eval_samples_per_second": 162.869, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.510734982263093e-05, |
|
"loss": 1.7544, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 1.8098782300949097, |
|
"eval_runtime": 91.4346, |
|
"eval_samples_per_second": 163.275, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.48630398029884e-05, |
|
"loss": 1.7338, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 1.7867393493652344, |
|
"eval_runtime": 91.3421, |
|
"eval_samples_per_second": 163.44, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.4618729783345875e-05, |
|
"loss": 1.7529, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 1.7708940505981445, |
|
"eval_runtime": 91.2573, |
|
"eval_samples_per_second": 163.592, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.437441976370335e-05, |
|
"loss": 1.7782, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 1.7782670259475708, |
|
"eval_runtime": 91.5943, |
|
"eval_samples_per_second": 162.99, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.4130109744060824e-05, |
|
"loss": 1.7018, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 1.8046059608459473, |
|
"eval_runtime": 91.4265, |
|
"eval_samples_per_second": 163.29, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.38857997244183e-05, |
|
"loss": 1.6726, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 1.7743386030197144, |
|
"eval_runtime": 91.4777, |
|
"eval_samples_per_second": 163.198, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.364148970477577e-05, |
|
"loss": 1.6964, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_loss": 1.781007170677185, |
|
"eval_runtime": 91.4397, |
|
"eval_samples_per_second": 163.266, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.3397179685133254e-05, |
|
"loss": 1.7196, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 1.7817639112472534, |
|
"eval_runtime": 91.6102, |
|
"eval_samples_per_second": 162.962, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.315286966549072e-05, |
|
"loss": 1.7085, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 1.7810941934585571, |
|
"eval_runtime": 91.3943, |
|
"eval_samples_per_second": 163.347, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.2908559645848195e-05, |
|
"loss": 1.729, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 1.7492631673812866, |
|
"eval_runtime": 91.524, |
|
"eval_samples_per_second": 163.116, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.2664738246244956e-05, |
|
"loss": 1.6901, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 1.7426705360412598, |
|
"eval_runtime": 91.3863, |
|
"eval_samples_per_second": 163.362, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.242042822660243e-05, |
|
"loss": 1.6651, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 1.7624448537826538, |
|
"eval_runtime": 91.3919, |
|
"eval_samples_per_second": 163.352, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 3.2176118206959905e-05, |
|
"loss": 1.6679, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.7611286640167236, |
|
"eval_runtime": 91.3302, |
|
"eval_samples_per_second": 163.462, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.193180818731738e-05, |
|
"loss": 1.6635, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 1.7588094472885132, |
|
"eval_runtime": 91.421, |
|
"eval_samples_per_second": 163.299, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.168798678771414e-05, |
|
"loss": 1.6785, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 1.7499796152114868, |
|
"eval_runtime": 91.5104, |
|
"eval_samples_per_second": 163.14, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.1443676768071614e-05, |
|
"loss": 1.6805, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 1.7386727333068848, |
|
"eval_runtime": 91.2579, |
|
"eval_samples_per_second": 163.591, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.119936674842909e-05, |
|
"loss": 1.6603, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_loss": 1.7336368560791016, |
|
"eval_runtime": 91.5612, |
|
"eval_samples_per_second": 163.049, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.095505672878656e-05, |
|
"loss": 1.6551, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 1.7313791513442993, |
|
"eval_runtime": 91.2301, |
|
"eval_samples_per_second": 163.641, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.071123532918332e-05, |
|
"loss": 1.6739, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 1.7289615869522095, |
|
"eval_runtime": 91.2197, |
|
"eval_samples_per_second": 163.66, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.0466925309540794e-05, |
|
"loss": 1.677, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 1.7310763597488403, |
|
"eval_runtime": 91.2875, |
|
"eval_samples_per_second": 163.538, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.022261528989827e-05, |
|
"loss": 1.6545, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 1.7149819135665894, |
|
"eval_runtime": 91.59, |
|
"eval_samples_per_second": 162.998, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.9978305270255746e-05, |
|
"loss": 1.6666, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 1.7039389610290527, |
|
"eval_runtime": 91.4495, |
|
"eval_samples_per_second": 163.249, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.9733995250613217e-05, |
|
"loss": 1.6394, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 1.6981836557388306, |
|
"eval_runtime": 91.1406, |
|
"eval_samples_per_second": 163.802, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.948968523097069e-05, |
|
"loss": 1.6064, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 1.7015522718429565, |
|
"eval_runtime": 91.2276, |
|
"eval_samples_per_second": 163.646, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.9245863831367455e-05, |
|
"loss": 1.676, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 1.6849902868270874, |
|
"eval_runtime": 91.3094, |
|
"eval_samples_per_second": 163.499, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.900155381172493e-05, |
|
"loss": 1.6561, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 1.6929577589035034, |
|
"eval_runtime": 91.6129, |
|
"eval_samples_per_second": 162.957, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.87572437920824e-05, |
|
"loss": 1.6949, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 1.6772891283035278, |
|
"eval_runtime": 91.2759, |
|
"eval_samples_per_second": 163.559, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.8512933772439875e-05, |
|
"loss": 1.6279, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 1.6779346466064453, |
|
"eval_runtime": 91.1313, |
|
"eval_samples_per_second": 163.819, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.8268623752797352e-05, |
|
"loss": 1.6379, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 1.6741278171539307, |
|
"eval_runtime": 91.5237, |
|
"eval_samples_per_second": 163.116, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.8024313733154827e-05, |
|
"loss": 1.599, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 1.6729629039764404, |
|
"eval_runtime": 91.5904, |
|
"eval_samples_per_second": 162.997, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.7780003713512298e-05, |
|
"loss": 1.5784, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 1.6555140018463135, |
|
"eval_runtime": 91.3689, |
|
"eval_samples_per_second": 163.393, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.7535693693869775e-05, |
|
"loss": 1.577, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 1.665055751800537, |
|
"eval_runtime": 91.3871, |
|
"eval_samples_per_second": 163.36, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.7291872294266536e-05, |
|
"loss": 1.6282, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 1.6812142133712769, |
|
"eval_runtime": 91.4955, |
|
"eval_samples_per_second": 163.167, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.704756227462401e-05, |
|
"loss": 1.5585, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 1.6711357831954956, |
|
"eval_runtime": 91.5418, |
|
"eval_samples_per_second": 163.084, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.680325225498148e-05, |
|
"loss": 1.5593, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_loss": 1.6432082653045654, |
|
"eval_runtime": 91.3937, |
|
"eval_samples_per_second": 163.348, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.655894223533896e-05, |
|
"loss": 1.5348, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 1.6400490999221802, |
|
"eval_runtime": 91.215, |
|
"eval_samples_per_second": 163.668, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.6314632215696433e-05, |
|
"loss": 1.6154, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 1.647261381149292, |
|
"eval_runtime": 91.4792, |
|
"eval_samples_per_second": 163.196, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.6070810816093187e-05, |
|
"loss": 1.5831, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 1.6438063383102417, |
|
"eval_runtime": 91.2696, |
|
"eval_samples_per_second": 163.57, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.5826500796450665e-05, |
|
"loss": 1.6009, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 1.646418571472168, |
|
"eval_runtime": 91.2662, |
|
"eval_samples_per_second": 163.576, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.558219077680814e-05, |
|
"loss": 1.5858, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.645626425743103, |
|
"eval_runtime": 91.2497, |
|
"eval_samples_per_second": 163.606, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.5337880757165617e-05, |
|
"loss": 1.5251, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 1.6241323947906494, |
|
"eval_runtime": 91.2854, |
|
"eval_samples_per_second": 163.542, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.509405935756237e-05, |
|
"loss": 1.5767, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 1.6338564157485962, |
|
"eval_runtime": 91.3908, |
|
"eval_samples_per_second": 163.353, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.4849749337919848e-05, |
|
"loss": 1.5394, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 1.602984070777893, |
|
"eval_runtime": 91.3335, |
|
"eval_samples_per_second": 163.456, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.4605439318277322e-05, |
|
"loss": 1.5951, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 1.6044830083847046, |
|
"eval_runtime": 91.4386, |
|
"eval_samples_per_second": 163.268, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.4361129298634797e-05, |
|
"loss": 1.5563, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 1.6318944692611694, |
|
"eval_runtime": 91.2817, |
|
"eval_samples_per_second": 163.549, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.4117307899031554e-05, |
|
"loss": 1.5319, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 1.5991877317428589, |
|
"eval_runtime": 91.533, |
|
"eval_samples_per_second": 163.1, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.387299787938903e-05, |
|
"loss": 1.595, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 1.583760142326355, |
|
"eval_runtime": 91.246, |
|
"eval_samples_per_second": 163.613, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.362917647978579e-05, |
|
"loss": 1.5797, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 1.6295143365859985, |
|
"eval_runtime": 90.9781, |
|
"eval_samples_per_second": 164.094, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.3384866460143267e-05, |
|
"loss": 1.5298, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_loss": 1.599826455116272, |
|
"eval_runtime": 91.406, |
|
"eval_samples_per_second": 163.326, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.3140556440500737e-05, |
|
"loss": 1.5384, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_loss": 1.6063343286514282, |
|
"eval_runtime": 91.4223, |
|
"eval_samples_per_second": 163.297, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.2896246420858215e-05, |
|
"loss": 1.4986, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 1.5949381589889526, |
|
"eval_runtime": 91.3186, |
|
"eval_samples_per_second": 163.483, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.2651936401215686e-05, |
|
"loss": 1.5075, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 1.5914294719696045, |
|
"eval_runtime": 91.3212, |
|
"eval_samples_per_second": 163.478, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.2407626381573164e-05, |
|
"loss": 1.5303, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_loss": 1.5806745290756226, |
|
"eval_runtime": 91.0984, |
|
"eval_samples_per_second": 163.878, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.2163316361930635e-05, |
|
"loss": 1.5015, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 1.5750070810317993, |
|
"eval_runtime": 91.4316, |
|
"eval_samples_per_second": 163.281, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.1919006342288112e-05, |
|
"loss": 1.5542, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 1.6114143133163452, |
|
"eval_runtime": 91.4407, |
|
"eval_samples_per_second": 163.264, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2.167518494268487e-05, |
|
"loss": 1.525, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 1.5729782581329346, |
|
"eval_runtime": 91.2691, |
|
"eval_samples_per_second": 163.571, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.1430874923042347e-05, |
|
"loss": 1.4573, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 1.598924994468689, |
|
"eval_runtime": 91.2081, |
|
"eval_samples_per_second": 163.681, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 2.1186564903399818e-05, |
|
"loss": 1.5125, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_loss": 1.5879831314086914, |
|
"eval_runtime": 91.4783, |
|
"eval_samples_per_second": 163.197, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 2.0942254883757292e-05, |
|
"loss": 1.5089, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 1.5672917366027832, |
|
"eval_runtime": 91.4841, |
|
"eval_samples_per_second": 163.187, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0697944864114767e-05, |
|
"loss": 1.5231, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_loss": 1.5848079919815063, |
|
"eval_runtime": 91.2446, |
|
"eval_samples_per_second": 163.615, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.045363484447224e-05, |
|
"loss": 1.5283, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_loss": 1.5898315906524658, |
|
"eval_runtime": 91.1948, |
|
"eval_samples_per_second": 163.705, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.0209813444869e-05, |
|
"loss": 1.4879, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 1.554874300956726, |
|
"eval_runtime": 91.3983, |
|
"eval_samples_per_second": 163.34, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.9965503425226476e-05, |
|
"loss": 1.5034, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 1.5682209730148315, |
|
"eval_runtime": 91.3854, |
|
"eval_samples_per_second": 163.363, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.972119340558395e-05, |
|
"loss": 1.5024, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 1.5595961809158325, |
|
"eval_runtime": 91.4495, |
|
"eval_samples_per_second": 163.249, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.9476883385941425e-05, |
|
"loss": 1.4673, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_loss": 1.5504151582717896, |
|
"eval_runtime": 91.2318, |
|
"eval_samples_per_second": 163.638, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.92325733662989e-05, |
|
"loss": 1.4758, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 1.5395164489746094, |
|
"eval_runtime": 91.1184, |
|
"eval_samples_per_second": 163.842, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.898875196669566e-05, |
|
"loss": 1.4674, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 1.5537846088409424, |
|
"eval_runtime": 91.4847, |
|
"eval_samples_per_second": 163.186, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.8744441947053134e-05, |
|
"loss": 1.4528, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_loss": 1.552320957183838, |
|
"eval_runtime": 91.3075, |
|
"eval_samples_per_second": 163.502, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.8500131927410608e-05, |
|
"loss": 1.4829, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 1.5265352725982666, |
|
"eval_runtime": 91.4875, |
|
"eval_samples_per_second": 163.181, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.8255821907768082e-05, |
|
"loss": 1.4611, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.5295826196670532, |
|
"eval_runtime": 91.5744, |
|
"eval_samples_per_second": 163.026, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.8012000508164843e-05, |
|
"loss": 1.4442, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 1.5388405323028564, |
|
"eval_runtime": 91.8129, |
|
"eval_samples_per_second": 162.602, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.7767690488522314e-05, |
|
"loss": 1.4465, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 1.5279512405395508, |
|
"eval_runtime": 91.5443, |
|
"eval_samples_per_second": 163.08, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.752338046887979e-05, |
|
"loss": 1.4253, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 1.5250307321548462, |
|
"eval_runtime": 91.2655, |
|
"eval_samples_per_second": 163.578, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.7279070449237263e-05, |
|
"loss": 1.4236, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_loss": 1.5277702808380127, |
|
"eval_runtime": 91.3984, |
|
"eval_samples_per_second": 163.34, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.703476042959474e-05, |
|
"loss": 1.4749, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_loss": 1.5159269571304321, |
|
"eval_runtime": 91.6205, |
|
"eval_samples_per_second": 162.944, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.6790450409952215e-05, |
|
"loss": 1.4373, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 1.4971988201141357, |
|
"eval_runtime": 91.5971, |
|
"eval_samples_per_second": 162.985, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.654614039030969e-05, |
|
"loss": 1.4307, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_loss": 1.5140836238861084, |
|
"eval_runtime": 91.3406, |
|
"eval_samples_per_second": 163.443, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.6301830370667163e-05, |
|
"loss": 1.4237, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 1.5364242792129517, |
|
"eval_runtime": 91.2725, |
|
"eval_samples_per_second": 163.565, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.6058008971063924e-05, |
|
"loss": 1.429, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_loss": 1.5153831243515015, |
|
"eval_runtime": 91.5247, |
|
"eval_samples_per_second": 163.114, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.581418757146068e-05, |
|
"loss": 1.4359, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_loss": 1.5109397172927856, |
|
"eval_runtime": 91.5239, |
|
"eval_samples_per_second": 163.116, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.556987755181816e-05, |
|
"loss": 1.3969, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_loss": 1.5034611225128174, |
|
"eval_runtime": 91.2642, |
|
"eval_samples_per_second": 163.58, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.532556753217563e-05, |
|
"loss": 1.4011, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 1.5004990100860596, |
|
"eval_runtime": 91.4331, |
|
"eval_samples_per_second": 163.278, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.5081257512533104e-05, |
|
"loss": 1.4231, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 1.4998698234558105, |
|
"eval_runtime": 91.3964, |
|
"eval_samples_per_second": 163.343, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.483694749289058e-05, |
|
"loss": 1.4476, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 1.5173091888427734, |
|
"eval_runtime": 91.4077, |
|
"eval_samples_per_second": 163.323, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.4592637473248052e-05, |
|
"loss": 1.4455, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_loss": 1.4715697765350342, |
|
"eval_runtime": 91.3644, |
|
"eval_samples_per_second": 163.401, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.4348327453605528e-05, |
|
"loss": 1.402, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 1.4942283630371094, |
|
"eval_runtime": 91.1381, |
|
"eval_samples_per_second": 163.806, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.4104506054002287e-05, |
|
"loss": 1.4352, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_loss": 1.4948195219039917, |
|
"eval_runtime": 91.3982, |
|
"eval_samples_per_second": 163.34, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.3860196034359763e-05, |
|
"loss": 1.4316, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_loss": 1.4814602136611938, |
|
"eval_runtime": 91.536, |
|
"eval_samples_per_second": 163.094, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.3615886014717236e-05, |
|
"loss": 1.413, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 1.492964267730713, |
|
"eval_runtime": 91.5767, |
|
"eval_samples_per_second": 163.022, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.3371575995074712e-05, |
|
"loss": 1.3647, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 1.5028858184814453, |
|
"eval_runtime": 91.453, |
|
"eval_samples_per_second": 163.242, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.3127265975432185e-05, |
|
"loss": 1.3549, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_loss": 1.4676295518875122, |
|
"eval_runtime": 91.2539, |
|
"eval_samples_per_second": 163.598, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.288295595578966e-05, |
|
"loss": 1.4649, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_loss": 1.4584786891937256, |
|
"eval_runtime": 91.7498, |
|
"eval_samples_per_second": 162.714, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.2638645936147133e-05, |
|
"loss": 1.401, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_loss": 1.478485107421875, |
|
"eval_runtime": 91.4585, |
|
"eval_samples_per_second": 163.232, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.2394335916504608e-05, |
|
"loss": 1.4047, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_loss": 1.4770060777664185, |
|
"eval_runtime": 91.2763, |
|
"eval_samples_per_second": 163.558, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.2150025896862084e-05, |
|
"loss": 1.3718, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_loss": 1.4772363901138306, |
|
"eval_runtime": 91.3427, |
|
"eval_samples_per_second": 163.439, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.1906693117298127e-05, |
|
"loss": 1.4123, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_loss": 1.4651228189468384, |
|
"eval_runtime": 91.4724, |
|
"eval_samples_per_second": 163.208, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.1662383097655601e-05, |
|
"loss": 1.3914, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 1.439155101776123, |
|
"eval_runtime": 91.387, |
|
"eval_samples_per_second": 163.36, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.1418073078013077e-05, |
|
"loss": 1.3936, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_loss": 1.4595708847045898, |
|
"eval_runtime": 91.5308, |
|
"eval_samples_per_second": 163.104, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.1173763058370552e-05, |
|
"loss": 1.3448, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 1.4761011600494385, |
|
"eval_runtime": 91.6432, |
|
"eval_samples_per_second": 162.903, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0929453038728024e-05, |
|
"loss": 1.352, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 1.468656063079834, |
|
"eval_runtime": 91.6903, |
|
"eval_samples_per_second": 162.82, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.0685143019085499e-05, |
|
"loss": 1.3459, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_loss": 1.4411649703979492, |
|
"eval_runtime": 91.7674, |
|
"eval_samples_per_second": 162.683, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.0440832999442973e-05, |
|
"loss": 1.3732, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 1.4545894861221313, |
|
"eval_runtime": 91.5374, |
|
"eval_samples_per_second": 163.092, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.0196522979800447e-05, |
|
"loss": 1.3586, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_loss": 1.4413018226623535, |
|
"eval_runtime": 91.6716, |
|
"eval_samples_per_second": 162.853, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.952212960157921e-06, |
|
"loss": 1.3578, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 1.4500373601913452, |
|
"eval_runtime": 91.7404, |
|
"eval_samples_per_second": 162.731, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 9.708391560554682e-06, |
|
"loss": 1.3847, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 1.4499620199203491, |
|
"eval_runtime": 91.826, |
|
"eval_samples_per_second": 162.579, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 9.464081540912156e-06, |
|
"loss": 1.4119, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 1.4416851997375488, |
|
"eval_runtime": 91.623, |
|
"eval_samples_per_second": 162.939, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.21977152126963e-06, |
|
"loss": 1.3674, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 1.4545212984085083, |
|
"eval_runtime": 91.714, |
|
"eval_samples_per_second": 162.778, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.975461501627105e-06, |
|
"loss": 1.3805, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 1.4539984464645386, |
|
"eval_runtime": 91.7771, |
|
"eval_samples_per_second": 162.666, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.731640102023866e-06, |
|
"loss": 1.341, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_loss": 1.4235692024230957, |
|
"eval_runtime": 91.8087, |
|
"eval_samples_per_second": 162.61, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 8.48733008238134e-06, |
|
"loss": 1.3635, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_loss": 1.427968144416809, |
|
"eval_runtime": 91.7853, |
|
"eval_samples_per_second": 162.651, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 8.243020062738814e-06, |
|
"loss": 1.3217, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 1.4452221393585205, |
|
"eval_runtime": 91.4571, |
|
"eval_samples_per_second": 163.235, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.998710043096287e-06, |
|
"loss": 1.3753, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 1.4172078371047974, |
|
"eval_runtime": 91.6806, |
|
"eval_samples_per_second": 162.837, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.754400023453761e-06, |
|
"loss": 1.3509, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_loss": 1.426705002784729, |
|
"eval_runtime": 91.7131, |
|
"eval_samples_per_second": 162.779, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 7.510578623850522e-06, |
|
"loss": 1.35, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 1.4170763492584229, |
|
"eval_runtime": 91.523, |
|
"eval_samples_per_second": 163.118, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.266268604207996e-06, |
|
"loss": 1.3277, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 1.4099535942077637, |
|
"eval_runtime": 91.6084, |
|
"eval_samples_per_second": 162.965, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.02195858456547e-06, |
|
"loss": 1.3574, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 1.429254174232483, |
|
"eval_runtime": 91.6665, |
|
"eval_samples_per_second": 162.862, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.7776485649229446e-06, |
|
"loss": 1.3347, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 1.3997344970703125, |
|
"eval_runtime": 91.8372, |
|
"eval_samples_per_second": 162.559, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.533827165319704e-06, |
|
"loss": 1.3655, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_loss": 1.414544701576233, |
|
"eval_runtime": 91.8567, |
|
"eval_samples_per_second": 162.525, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.289517145677179e-06, |
|
"loss": 1.3037, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 1.4175053834915161, |
|
"eval_runtime": 91.5715, |
|
"eval_samples_per_second": 163.031, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.045207126034654e-06, |
|
"loss": 1.3632, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"eval_loss": 1.411368489265442, |
|
"eval_runtime": 91.9948, |
|
"eval_samples_per_second": 162.281, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.800897106392127e-06, |
|
"loss": 1.3346, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_loss": 1.4163134098052979, |
|
"eval_runtime": 91.6974, |
|
"eval_samples_per_second": 162.807, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.5565870867496015e-06, |
|
"loss": 1.3371, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 1.4231865406036377, |
|
"eval_runtime": 91.7114, |
|
"eval_samples_per_second": 162.782, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 5.312765687146361e-06, |
|
"loss": 1.3077, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 1.4206013679504395, |
|
"eval_runtime": 91.6392, |
|
"eval_samples_per_second": 162.911, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 5.068455667503836e-06, |
|
"loss": 1.3352, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_loss": 1.4135525226593018, |
|
"eval_runtime": 91.6091, |
|
"eval_samples_per_second": 162.964, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.824145647861311e-06, |
|
"loss": 1.3442, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_loss": 1.413482904434204, |
|
"eval_runtime": 91.795, |
|
"eval_samples_per_second": 162.634, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.579835628218785e-06, |
|
"loss": 1.3061, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_loss": 1.4136689901351929, |
|
"eval_runtime": 92.0252, |
|
"eval_samples_per_second": 162.227, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.336014228615544e-06, |
|
"loss": 1.3456, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 1.40234375, |
|
"eval_runtime": 92.0442, |
|
"eval_samples_per_second": 162.194, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.091704208973018e-06, |
|
"loss": 1.2899, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_loss": 1.3845499753952026, |
|
"eval_runtime": 91.7463, |
|
"eval_samples_per_second": 162.72, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.8473941893304926e-06, |
|
"loss": 1.313, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_loss": 1.3685582876205444, |
|
"eval_runtime": 91.9455, |
|
"eval_samples_per_second": 162.368, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.6030841696879673e-06, |
|
"loss": 1.3106, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 1.3997061252593994, |
|
"eval_runtime": 91.5804, |
|
"eval_samples_per_second": 163.015, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.358774150045442e-06, |
|
"loss": 1.2682, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_loss": 1.3713297843933105, |
|
"eval_runtime": 91.77, |
|
"eval_samples_per_second": 162.678, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.1149527504422013e-06, |
|
"loss": 1.3131, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 1.381724238395691, |
|
"eval_runtime": 91.533, |
|
"eval_samples_per_second": 163.1, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.8706427307996756e-06, |
|
"loss": 1.3206, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 1.3894230127334595, |
|
"eval_runtime": 91.9557, |
|
"eval_samples_per_second": 162.35, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.62633271115715e-06, |
|
"loss": 1.3516, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 1.3866136074066162, |
|
"eval_runtime": 91.8786, |
|
"eval_samples_per_second": 162.486, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.3820226915146242e-06, |
|
"loss": 1.271, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_loss": 1.3788957595825195, |
|
"eval_runtime": 91.4774, |
|
"eval_samples_per_second": 163.199, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.138201291911384e-06, |
|
"loss": 1.3023, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_loss": 1.3791213035583496, |
|
"eval_runtime": 91.8238, |
|
"eval_samples_per_second": 162.583, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.8938912722688583e-06, |
|
"loss": 1.3217, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_loss": 1.393075704574585, |
|
"eval_runtime": 91.8262, |
|
"eval_samples_per_second": 162.579, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.6495812526263328e-06, |
|
"loss": 1.3333, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 1.3756201267242432, |
|
"eval_runtime": 91.6059, |
|
"eval_samples_per_second": 162.97, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4052712329838073e-06, |
|
"loss": 1.2818, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_loss": 1.3732881546020508, |
|
"eval_runtime": 91.7604, |
|
"eval_samples_per_second": 162.696, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.1614498333805667e-06, |
|
"loss": 1.2944, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_loss": 1.3781006336212158, |
|
"eval_runtime": 91.6872, |
|
"eval_samples_per_second": 162.825, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 9.171398137380411e-07, |
|
"loss": 1.312, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 1.3845880031585693, |
|
"eval_runtime": 92.0248, |
|
"eval_samples_per_second": 162.228, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.728297940955155e-07, |
|
"loss": 1.3098, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 1.383412480354309, |
|
"eval_runtime": 92.0563, |
|
"eval_samples_per_second": 162.172, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.285197744529899e-07, |
|
"loss": 1.2995, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_loss": 1.381857991218567, |
|
"eval_runtime": 91.7462, |
|
"eval_samples_per_second": 162.721, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.842097548104643e-07, |
|
"loss": 1.2507, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 1.3710229396820068, |
|
"eval_runtime": 92.2003, |
|
"eval_samples_per_second": 161.919, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 112329, |
|
"total_flos": 15705683781120.0, |
|
"train_runtime": 32291.4207, |
|
"train_samples_per_second": 3.479 |
|
} |
|
], |
|
"max_steps": 112329, |
|
"num_train_epochs": 3, |
|
"total_flos": 15705683781120.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|