|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9845207382417147, |
|
"eval_steps": 200, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 0.24929340183734894, |
|
"eval_runtime": 152.0524, |
|
"eval_samples_per_second": 942.675, |
|
"eval_steps_per_second": 3.683, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 0.397050142288208, |
|
"eval_runtime": 151.7752, |
|
"eval_samples_per_second": 944.397, |
|
"eval_steps_per_second": 3.69, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.751934907719786e-05, |
|
"loss": 0.4919, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 0.619749903678894, |
|
"eval_runtime": 153.0348, |
|
"eval_samples_per_second": 936.624, |
|
"eval_steps_per_second": 3.659, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 0.5482326149940491, |
|
"eval_runtime": 151.3209, |
|
"eval_samples_per_second": 947.232, |
|
"eval_steps_per_second": 3.701, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.5038698154395716e-05, |
|
"loss": 0.9307, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 0.861940324306488, |
|
"eval_runtime": 150.9686, |
|
"eval_samples_per_second": 949.442, |
|
"eval_steps_per_second": 3.709, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 0.5618771910667419, |
|
"eval_runtime": 151.1536, |
|
"eval_samples_per_second": 948.28, |
|
"eval_steps_per_second": 3.705, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 0.7757473587989807, |
|
"eval_runtime": 151.2249, |
|
"eval_samples_per_second": 947.834, |
|
"eval_steps_per_second": 3.703, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.255804723159357e-05, |
|
"loss": 1.6552, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 0.5049824714660645, |
|
"eval_runtime": 151.8547, |
|
"eval_samples_per_second": 943.902, |
|
"eval_steps_per_second": 3.688, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.1517618894577026, |
|
"eval_runtime": 151.1455, |
|
"eval_samples_per_second": 948.331, |
|
"eval_steps_per_second": 3.705, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.0077396308791423e-05, |
|
"loss": 1.1387, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.0938903093338013, |
|
"eval_runtime": 151.2372, |
|
"eval_samples_per_second": 947.756, |
|
"eval_steps_per_second": 3.703, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 9.282928466796875, |
|
"eval_runtime": 150.978, |
|
"eval_samples_per_second": 949.383, |
|
"eval_steps_per_second": 3.709, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 0.2713712155818939, |
|
"eval_runtime": 151.4204, |
|
"eval_samples_per_second": 946.61, |
|
"eval_steps_per_second": 3.698, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 3.7596745385989284e-05, |
|
"loss": 8.5966, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 0.12629659473896027, |
|
"eval_runtime": 151.6925, |
|
"eval_samples_per_second": 944.911, |
|
"eval_steps_per_second": 3.692, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 0.11906945705413818, |
|
"eval_runtime": 151.1292, |
|
"eval_samples_per_second": 948.434, |
|
"eval_steps_per_second": 3.705, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.5116094463187144e-05, |
|
"loss": 0.1233, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 0.11606267094612122, |
|
"eval_runtime": 151.1195, |
|
"eval_samples_per_second": 948.494, |
|
"eval_steps_per_second": 3.706, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 0.11504378169775009, |
|
"eval_runtime": 151.2469, |
|
"eval_samples_per_second": 947.695, |
|
"eval_steps_per_second": 3.703, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 0.11453282833099365, |
|
"eval_runtime": 151.278, |
|
"eval_samples_per_second": 947.501, |
|
"eval_steps_per_second": 3.702, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.2635443540385e-05, |
|
"loss": 0.1166, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 0.1138230413198471, |
|
"eval_runtime": 151.2516, |
|
"eval_samples_per_second": 947.666, |
|
"eval_steps_per_second": 3.702, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 0.11347956955432892, |
|
"eval_runtime": 151.7267, |
|
"eval_samples_per_second": 944.699, |
|
"eval_steps_per_second": 3.691, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.0154792617582855e-05, |
|
"loss": 0.1151, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 0.11324501782655716, |
|
"eval_runtime": 151.6688, |
|
"eval_samples_per_second": 945.059, |
|
"eval_steps_per_second": 3.692, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 0.11296597123146057, |
|
"eval_runtime": 151.2257, |
|
"eval_samples_per_second": 947.829, |
|
"eval_steps_per_second": 3.703, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 0.11248484253883362, |
|
"eval_runtime": 151.3062, |
|
"eval_samples_per_second": 947.324, |
|
"eval_steps_per_second": 3.701, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.7674141694780715e-05, |
|
"loss": 0.1131, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 0.11222843080759048, |
|
"eval_runtime": 151.1995, |
|
"eval_samples_per_second": 947.992, |
|
"eval_steps_per_second": 3.704, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 0.1118897795677185, |
|
"eval_runtime": 151.3101, |
|
"eval_samples_per_second": 947.3, |
|
"eval_steps_per_second": 3.701, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.519349077197857e-05, |
|
"loss": 0.1132, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 0.11164118349552155, |
|
"eval_runtime": 151.2544, |
|
"eval_samples_per_second": 947.648, |
|
"eval_steps_per_second": 3.702, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 0.11147473752498627, |
|
"eval_runtime": 151.2525, |
|
"eval_samples_per_second": 947.661, |
|
"eval_steps_per_second": 3.702, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 0.1114504486322403, |
|
"eval_runtime": 151.3584, |
|
"eval_samples_per_second": 946.997, |
|
"eval_steps_per_second": 3.7, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 2.2712839849176426e-05, |
|
"loss": 0.1123, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 0.1111702173948288, |
|
"eval_runtime": 151.4231, |
|
"eval_samples_per_second": 946.592, |
|
"eval_steps_per_second": 3.698, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 0.11109592020511627, |
|
"eval_runtime": 151.3306, |
|
"eval_samples_per_second": 947.171, |
|
"eval_steps_per_second": 3.701, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0232188926374283e-05, |
|
"loss": 0.1116, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 0.11104920506477356, |
|
"eval_runtime": 151.4541, |
|
"eval_samples_per_second": 946.399, |
|
"eval_steps_per_second": 3.697, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 0.11098004877567291, |
|
"eval_runtime": 151.5152, |
|
"eval_samples_per_second": 946.017, |
|
"eval_steps_per_second": 3.696, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 0.11082496494054794, |
|
"eval_runtime": 151.4327, |
|
"eval_samples_per_second": 946.533, |
|
"eval_steps_per_second": 3.698, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.775153800357214e-05, |
|
"loss": 0.1132, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 0.11074619740247726, |
|
"eval_runtime": 151.2307, |
|
"eval_samples_per_second": 947.797, |
|
"eval_steps_per_second": 3.703, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 0.11220254749059677, |
|
"eval_runtime": 151.2796, |
|
"eval_samples_per_second": 947.491, |
|
"eval_steps_per_second": 3.702, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.5270887080769993e-05, |
|
"loss": 0.2039, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 0.11101505160331726, |
|
"eval_runtime": 151.22, |
|
"eval_samples_per_second": 947.864, |
|
"eval_steps_per_second": 3.703, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 0.11076509952545166, |
|
"eval_runtime": 151.1874, |
|
"eval_samples_per_second": 948.068, |
|
"eval_steps_per_second": 3.704, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 0.11064371466636658, |
|
"eval_runtime": 151.2298, |
|
"eval_samples_per_second": 947.803, |
|
"eval_steps_per_second": 3.703, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.2790236157967852e-05, |
|
"loss": 0.1107, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 0.1105843037366867, |
|
"eval_runtime": 151.246, |
|
"eval_samples_per_second": 947.701, |
|
"eval_steps_per_second": 3.703, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 0.11054880172014236, |
|
"eval_runtime": 151.2949, |
|
"eval_samples_per_second": 947.395, |
|
"eval_steps_per_second": 3.701, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.0309585235165709e-05, |
|
"loss": 0.1115, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 0.11044388264417648, |
|
"eval_runtime": 151.2161, |
|
"eval_samples_per_second": 947.888, |
|
"eval_steps_per_second": 3.703, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_loss": 0.11042939871549606, |
|
"eval_runtime": 151.2469, |
|
"eval_samples_per_second": 947.695, |
|
"eval_steps_per_second": 3.703, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 0.11039307713508606, |
|
"eval_runtime": 151.2801, |
|
"eval_samples_per_second": 947.487, |
|
"eval_steps_per_second": 3.702, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 7.828934312363564e-06, |
|
"loss": 0.1106, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 0.1104336529970169, |
|
"eval_runtime": 151.2744, |
|
"eval_samples_per_second": 947.523, |
|
"eval_steps_per_second": 3.702, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 0.11029984056949615, |
|
"eval_runtime": 151.1978, |
|
"eval_samples_per_second": 948.003, |
|
"eval_steps_per_second": 3.704, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.348283389561421e-06, |
|
"loss": 0.1092, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 0.11028754711151123, |
|
"eval_runtime": 151.3426, |
|
"eval_samples_per_second": 947.096, |
|
"eval_steps_per_second": 3.7, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 0.11026974767446518, |
|
"eval_runtime": 151.187, |
|
"eval_samples_per_second": 948.071, |
|
"eval_steps_per_second": 3.704, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_loss": 0.11021895706653595, |
|
"eval_runtime": 151.1765, |
|
"eval_samples_per_second": 948.137, |
|
"eval_steps_per_second": 3.704, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.867632466759278e-06, |
|
"loss": 0.111, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 0.11018586158752441, |
|
"eval_runtime": 151.3213, |
|
"eval_samples_per_second": 947.229, |
|
"eval_steps_per_second": 3.701, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_loss": 0.11020087450742722, |
|
"eval_runtime": 151.3699, |
|
"eval_samples_per_second": 946.925, |
|
"eval_steps_per_second": 3.7, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.869815439571344e-07, |
|
"loss": 0.1109, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_loss": 0.11018609255552292, |
|
"eval_runtime": 151.328, |
|
"eval_samples_per_second": 947.187, |
|
"eval_steps_per_second": 3.701, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10078, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000, |
|
"total_flos": 1.6722690048e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|