|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 436.28808864265926, |
|
"global_step": 630000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.999307479224377e-05, |
|
"loss": 5.6258, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.9986149584487535e-05, |
|
"loss": 5.4807, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 5.224731922149658, |
|
"eval_runtime": 75.7111, |
|
"eval_samples_per_second": 6503.99, |
|
"eval_steps_per_second": 2.127, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.9979224376731302e-05, |
|
"loss": 5.3786, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.9972299168975073e-05, |
|
"loss": 5.2907, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.996537396121884e-05, |
|
"loss": 5.2208, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 5.035708427429199, |
|
"eval_runtime": 74.9448, |
|
"eval_samples_per_second": 6570.491, |
|
"eval_steps_per_second": 2.148, |
|
"step": 2888 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.9958448753462603e-05, |
|
"loss": 5.163, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.9951523545706373e-05, |
|
"loss": 5.1119, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.994459833795014e-05, |
|
"loss": 5.0685, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 4.928272724151611, |
|
"eval_runtime": 74.5205, |
|
"eval_samples_per_second": 6607.9, |
|
"eval_steps_per_second": 2.16, |
|
"step": 4332 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.9937673130193907e-05, |
|
"loss": 5.034, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.9930747922437674e-05, |
|
"loss": 5.0038, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 1.992382271468144e-05, |
|
"loss": 4.9748, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 4.848881721496582, |
|
"eval_runtime": 73.1546, |
|
"eval_samples_per_second": 6731.277, |
|
"eval_steps_per_second": 2.201, |
|
"step": 5776 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.991689750692521e-05, |
|
"loss": 4.9513, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.9909972299168975e-05, |
|
"loss": 4.9265, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.9903047091412745e-05, |
|
"loss": 4.9046, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 4.779133319854736, |
|
"eval_runtime": 74.0757, |
|
"eval_samples_per_second": 6647.578, |
|
"eval_steps_per_second": 2.173, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 1.9896121883656512e-05, |
|
"loss": 4.8871, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 1.988919667590028e-05, |
|
"loss": 4.8665, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 1.9882271468144046e-05, |
|
"loss": 4.8483, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 4.739624977111816, |
|
"eval_runtime": 74.1752, |
|
"eval_samples_per_second": 6638.657, |
|
"eval_steps_per_second": 2.171, |
|
"step": 8664 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 1.9875346260387813e-05, |
|
"loss": 4.8328, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 1.986842105263158e-05, |
|
"loss": 4.8205, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 1.9861495844875347e-05, |
|
"loss": 4.8032, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 4.699394702911377, |
|
"eval_runtime": 74.6581, |
|
"eval_samples_per_second": 6595.725, |
|
"eval_steps_per_second": 2.156, |
|
"step": 10108 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 1.9854570637119114e-05, |
|
"loss": 4.7918, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 1.9847645429362884e-05, |
|
"loss": 4.7758, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 1.984072022160665e-05, |
|
"loss": 4.7675, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 4.659648895263672, |
|
"eval_runtime": 77.484, |
|
"eval_samples_per_second": 6355.173, |
|
"eval_steps_per_second": 2.078, |
|
"step": 11552 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 1.9833795013850418e-05, |
|
"loss": 4.7551, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 1.9826869806094185e-05, |
|
"loss": 4.7441, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 4.621845722198486, |
|
"eval_runtime": 74.377, |
|
"eval_samples_per_second": 6620.653, |
|
"eval_steps_per_second": 2.165, |
|
"step": 12996 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 1.9819944598337952e-05, |
|
"loss": 4.7288, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 1.981301939058172e-05, |
|
"loss": 4.7199, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 1.9806094182825486e-05, |
|
"loss": 4.7093, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 4.591496467590332, |
|
"eval_runtime": 74.6789, |
|
"eval_samples_per_second": 6593.887, |
|
"eval_steps_per_second": 2.156, |
|
"step": 14440 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 1.9799168975069252e-05, |
|
"loss": 4.7023, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 10.39, |
|
"learning_rate": 1.9792243767313023e-05, |
|
"loss": 4.6882, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 1.9785318559556786e-05, |
|
"loss": 4.6787, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 4.566598415374756, |
|
"eval_runtime": 74.19, |
|
"eval_samples_per_second": 6637.336, |
|
"eval_steps_per_second": 2.17, |
|
"step": 15884 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 1.9778393351800557e-05, |
|
"loss": 4.668, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 11.43, |
|
"learning_rate": 1.9771468144044324e-05, |
|
"loss": 4.6627, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 11.77, |
|
"learning_rate": 1.976454293628809e-05, |
|
"loss": 4.6523, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 4.539627552032471, |
|
"eval_runtime": 74.3111, |
|
"eval_samples_per_second": 6626.517, |
|
"eval_steps_per_second": 2.167, |
|
"step": 17328 |
|
}, |
|
{ |
|
"epoch": 12.12, |
|
"learning_rate": 1.9757617728531857e-05, |
|
"loss": 4.6439, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 12.47, |
|
"learning_rate": 1.9750692520775624e-05, |
|
"loss": 4.6371, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 12.81, |
|
"learning_rate": 1.9743767313019395e-05, |
|
"loss": 4.6281, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 4.52562141418457, |
|
"eval_runtime": 75.4167, |
|
"eval_samples_per_second": 6529.372, |
|
"eval_steps_per_second": 2.135, |
|
"step": 18772 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 1.9736842105263158e-05, |
|
"loss": 4.6176, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 1.9729916897506925e-05, |
|
"loss": 4.6111, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 13.85, |
|
"learning_rate": 1.9722991689750695e-05, |
|
"loss": 4.6068, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 4.500814437866211, |
|
"eval_runtime": 74.0488, |
|
"eval_samples_per_second": 6649.993, |
|
"eval_steps_per_second": 2.174, |
|
"step": 20216 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 1.9716066481994462e-05, |
|
"loss": 4.5989, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 14.54, |
|
"learning_rate": 1.970914127423823e-05, |
|
"loss": 4.5929, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"learning_rate": 1.9702216066481996e-05, |
|
"loss": 4.5839, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 4.482580661773682, |
|
"eval_runtime": 74.9815, |
|
"eval_samples_per_second": 6567.273, |
|
"eval_steps_per_second": 2.147, |
|
"step": 21660 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 1.9695290858725763e-05, |
|
"loss": 4.5769, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 1.968836565096953e-05, |
|
"loss": 4.5733, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 15.93, |
|
"learning_rate": 1.9681440443213297e-05, |
|
"loss": 4.5638, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 4.467035293579102, |
|
"eval_runtime": 74.2416, |
|
"eval_samples_per_second": 6632.726, |
|
"eval_steps_per_second": 2.169, |
|
"step": 23104 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"learning_rate": 1.9674515235457067e-05, |
|
"loss": 4.5584, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"learning_rate": 1.9667590027700834e-05, |
|
"loss": 4.5497, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 16.97, |
|
"learning_rate": 1.9660664819944598e-05, |
|
"loss": 4.5469, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 4.447102069854736, |
|
"eval_runtime": 74.6073, |
|
"eval_samples_per_second": 6600.208, |
|
"eval_steps_per_second": 2.158, |
|
"step": 24548 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"learning_rate": 1.9653739612188368e-05, |
|
"loss": 4.5406, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 17.66, |
|
"learning_rate": 1.9646814404432135e-05, |
|
"loss": 4.5333, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 4.4333930015563965, |
|
"eval_runtime": 74.4269, |
|
"eval_samples_per_second": 6616.211, |
|
"eval_steps_per_second": 2.163, |
|
"step": 25992 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 1.9639889196675902e-05, |
|
"loss": 4.5288, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 18.35, |
|
"learning_rate": 1.963296398891967e-05, |
|
"loss": 4.5174, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 18.7, |
|
"learning_rate": 1.9626038781163436e-05, |
|
"loss": 4.5178, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 4.415244102478027, |
|
"eval_runtime": 74.8709, |
|
"eval_samples_per_second": 6576.976, |
|
"eval_steps_per_second": 2.15, |
|
"step": 27436 |
|
}, |
|
{ |
|
"epoch": 19.04, |
|
"learning_rate": 1.9619113573407206e-05, |
|
"loss": 4.5116, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 19.39, |
|
"learning_rate": 1.961218836565097e-05, |
|
"loss": 4.5031, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 19.74, |
|
"learning_rate": 1.960526315789474e-05, |
|
"loss": 4.501, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 4.401387691497803, |
|
"eval_runtime": 74.9583, |
|
"eval_samples_per_second": 6569.303, |
|
"eval_steps_per_second": 2.148, |
|
"step": 28880 |
|
}, |
|
{ |
|
"epoch": 20.08, |
|
"learning_rate": 1.9598337950138507e-05, |
|
"loss": 4.4967, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 20.43, |
|
"learning_rate": 1.9591412742382274e-05, |
|
"loss": 4.4896, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 20.78, |
|
"learning_rate": 1.958448753462604e-05, |
|
"loss": 4.4853, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 4.387256622314453, |
|
"eval_runtime": 75.2035, |
|
"eval_samples_per_second": 6547.89, |
|
"eval_steps_per_second": 2.141, |
|
"step": 30324 |
|
}, |
|
{ |
|
"epoch": 21.12, |
|
"learning_rate": 1.9577562326869807e-05, |
|
"loss": 4.4815, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 21.47, |
|
"learning_rate": 1.9570637119113574e-05, |
|
"loss": 4.4741, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 21.81, |
|
"learning_rate": 1.956371191135734e-05, |
|
"loss": 4.4705, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 4.372128963470459, |
|
"eval_runtime": 75.7644, |
|
"eval_samples_per_second": 6499.411, |
|
"eval_steps_per_second": 2.125, |
|
"step": 31768 |
|
}, |
|
{ |
|
"epoch": 22.16, |
|
"learning_rate": 1.9556786703601108e-05, |
|
"loss": 4.465, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 22.51, |
|
"learning_rate": 1.954986149584488e-05, |
|
"loss": 4.4612, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 22.85, |
|
"learning_rate": 1.9542936288088645e-05, |
|
"loss": 4.4548, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 4.359036445617676, |
|
"eval_runtime": 75.0614, |
|
"eval_samples_per_second": 6560.284, |
|
"eval_steps_per_second": 2.145, |
|
"step": 33212 |
|
}, |
|
{ |
|
"epoch": 23.2, |
|
"learning_rate": 1.9536011080332412e-05, |
|
"loss": 4.452, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 23.55, |
|
"learning_rate": 1.952908587257618e-05, |
|
"loss": 4.4467, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 23.89, |
|
"learning_rate": 1.9522160664819946e-05, |
|
"loss": 4.4435, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 4.351931571960449, |
|
"eval_runtime": 73.9885, |
|
"eval_samples_per_second": 6655.409, |
|
"eval_steps_per_second": 2.176, |
|
"step": 34656 |
|
}, |
|
{ |
|
"epoch": 24.24, |
|
"learning_rate": 1.9515235457063713e-05, |
|
"loss": 4.4384, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 24.58, |
|
"learning_rate": 1.950831024930748e-05, |
|
"loss": 4.4346, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 24.93, |
|
"learning_rate": 1.9501385041551247e-05, |
|
"loss": 4.4276, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 4.33967924118042, |
|
"eval_runtime": 73.6152, |
|
"eval_samples_per_second": 6689.162, |
|
"eval_steps_per_second": 2.187, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 25.28, |
|
"learning_rate": 1.9494459833795017e-05, |
|
"loss": 4.4249, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 25.62, |
|
"learning_rate": 1.948753462603878e-05, |
|
"loss": 4.4245, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 25.97, |
|
"learning_rate": 1.948060941828255e-05, |
|
"loss": 4.4166, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 4.3328375816345215, |
|
"eval_runtime": 74.8225, |
|
"eval_samples_per_second": 6581.225, |
|
"eval_steps_per_second": 2.152, |
|
"step": 37544 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"learning_rate": 1.9473684210526318e-05, |
|
"loss": 4.4133, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 26.66, |
|
"learning_rate": 1.9466759002770085e-05, |
|
"loss": 4.41, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 4.31511926651001, |
|
"eval_runtime": 75.2713, |
|
"eval_samples_per_second": 6541.986, |
|
"eval_steps_per_second": 2.139, |
|
"step": 38988 |
|
}, |
|
{ |
|
"epoch": 27.01, |
|
"learning_rate": 1.9459833795013852e-05, |
|
"loss": 4.4064, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 27.35, |
|
"learning_rate": 1.945290858725762e-05, |
|
"loss": 4.403, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 27.7, |
|
"learning_rate": 1.944598337950139e-05, |
|
"loss": 4.3996, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 4.304774761199951, |
|
"eval_runtime": 73.8005, |
|
"eval_samples_per_second": 6672.362, |
|
"eval_steps_per_second": 2.182, |
|
"step": 40432 |
|
}, |
|
{ |
|
"epoch": 28.05, |
|
"learning_rate": 1.9439058171745153e-05, |
|
"loss": 4.3957, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 28.39, |
|
"learning_rate": 1.943213296398892e-05, |
|
"loss": 4.3921, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 28.74, |
|
"learning_rate": 1.942520775623269e-05, |
|
"loss": 4.3886, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 4.294938564300537, |
|
"eval_runtime": 76.8852, |
|
"eval_samples_per_second": 6404.662, |
|
"eval_steps_per_second": 2.094, |
|
"step": 41876 |
|
}, |
|
{ |
|
"epoch": 29.09, |
|
"learning_rate": 1.9418282548476453e-05, |
|
"loss": 4.3851, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 29.43, |
|
"learning_rate": 1.9411357340720224e-05, |
|
"loss": 4.3808, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 29.78, |
|
"learning_rate": 1.940443213296399e-05, |
|
"loss": 4.3776, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 4.287764549255371, |
|
"eval_runtime": 74.0431, |
|
"eval_samples_per_second": 6650.502, |
|
"eval_steps_per_second": 2.174, |
|
"step": 43320 |
|
}, |
|
{ |
|
"epoch": 30.12, |
|
"learning_rate": 1.9397506925207758e-05, |
|
"loss": 4.3721, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 30.47, |
|
"learning_rate": 1.9390581717451524e-05, |
|
"loss": 4.3705, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 30.82, |
|
"learning_rate": 1.938365650969529e-05, |
|
"loss": 4.3686, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 4.277154922485352, |
|
"eval_runtime": 74.74, |
|
"eval_samples_per_second": 6588.493, |
|
"eval_steps_per_second": 2.154, |
|
"step": 44764 |
|
}, |
|
{ |
|
"epoch": 31.16, |
|
"learning_rate": 1.937673130193906e-05, |
|
"loss": 4.3631, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 31.51, |
|
"learning_rate": 1.936980609418283e-05, |
|
"loss": 4.363, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 31.86, |
|
"learning_rate": 1.9362880886426595e-05, |
|
"loss": 4.3572, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 4.265904903411865, |
|
"eval_runtime": 74.1213, |
|
"eval_samples_per_second": 6643.49, |
|
"eval_steps_per_second": 2.172, |
|
"step": 46208 |
|
}, |
|
{ |
|
"epoch": 32.2, |
|
"learning_rate": 1.9355955678670362e-05, |
|
"loss": 4.3549, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 32.55, |
|
"learning_rate": 1.934903047091413e-05, |
|
"loss": 4.352, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 32.89, |
|
"learning_rate": 1.9342105263157896e-05, |
|
"loss": 4.3518, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 4.2521071434021, |
|
"eval_runtime": 73.7943, |
|
"eval_samples_per_second": 6672.929, |
|
"eval_steps_per_second": 2.182, |
|
"step": 47652 |
|
}, |
|
{ |
|
"epoch": 33.24, |
|
"learning_rate": 1.9335180055401663e-05, |
|
"loss": 4.345, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 33.59, |
|
"learning_rate": 1.932825484764543e-05, |
|
"loss": 4.344, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 33.93, |
|
"learning_rate": 1.93213296398892e-05, |
|
"loss": 4.3377, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 4.250110626220703, |
|
"eval_runtime": 76.5664, |
|
"eval_samples_per_second": 6431.332, |
|
"eval_steps_per_second": 2.103, |
|
"step": 49096 |
|
}, |
|
{ |
|
"epoch": 34.28, |
|
"learning_rate": 1.9314404432132964e-05, |
|
"loss": 4.333, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 34.63, |
|
"learning_rate": 1.9307479224376734e-05, |
|
"loss": 4.3307, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 34.97, |
|
"learning_rate": 1.93005540166205e-05, |
|
"loss": 4.3292, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 4.243873596191406, |
|
"eval_runtime": 75.8252, |
|
"eval_samples_per_second": 6494.199, |
|
"eval_steps_per_second": 2.123, |
|
"step": 50540 |
|
}, |
|
{ |
|
"epoch": 35.32, |
|
"learning_rate": 1.9293628808864268e-05, |
|
"loss": 4.3287, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 35.66, |
|
"learning_rate": 1.9286703601108035e-05, |
|
"loss": 4.3236, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 4.227120399475098, |
|
"eval_runtime": 75.2969, |
|
"eval_samples_per_second": 6539.764, |
|
"eval_steps_per_second": 2.138, |
|
"step": 51984 |
|
}, |
|
{ |
|
"epoch": 36.01, |
|
"learning_rate": 1.9279778393351802e-05, |
|
"loss": 4.3221, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 36.36, |
|
"learning_rate": 1.927285318559557e-05, |
|
"loss": 4.3191, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 36.7, |
|
"learning_rate": 1.9265927977839336e-05, |
|
"loss": 4.3147, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 4.228574752807617, |
|
"eval_runtime": 73.7741, |
|
"eval_samples_per_second": 6674.754, |
|
"eval_steps_per_second": 2.182, |
|
"step": 53428 |
|
}, |
|
{ |
|
"epoch": 37.05, |
|
"learning_rate": 1.9259002770083103e-05, |
|
"loss": 4.3139, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 37.4, |
|
"learning_rate": 1.9252077562326873e-05, |
|
"loss": 4.3107, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 37.74, |
|
"learning_rate": 1.924515235457064e-05, |
|
"loss": 4.3054, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 4.218676567077637, |
|
"eval_runtime": 73.9641, |
|
"eval_samples_per_second": 6657.61, |
|
"eval_steps_per_second": 2.177, |
|
"step": 54872 |
|
}, |
|
{ |
|
"epoch": 38.09, |
|
"learning_rate": 1.9238227146814407e-05, |
|
"loss": 4.303, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 38.43, |
|
"learning_rate": 1.9231301939058174e-05, |
|
"loss": 4.3031, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 38.78, |
|
"learning_rate": 1.922437673130194e-05, |
|
"loss": 4.3004, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 4.211672306060791, |
|
"eval_runtime": 73.1433, |
|
"eval_samples_per_second": 6732.321, |
|
"eval_steps_per_second": 2.201, |
|
"step": 56316 |
|
}, |
|
{ |
|
"epoch": 39.13, |
|
"learning_rate": 1.9217451523545708e-05, |
|
"loss": 4.2952, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 39.47, |
|
"learning_rate": 1.9210526315789474e-05, |
|
"loss": 4.291, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 39.82, |
|
"learning_rate": 1.920360110803324e-05, |
|
"loss": 4.2922, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 4.2032246589660645, |
|
"eval_runtime": 74.4105, |
|
"eval_samples_per_second": 6617.665, |
|
"eval_steps_per_second": 2.164, |
|
"step": 57760 |
|
}, |
|
{ |
|
"epoch": 40.17, |
|
"learning_rate": 1.9196675900277012e-05, |
|
"loss": 4.2891, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 40.51, |
|
"learning_rate": 1.9189750692520775e-05, |
|
"loss": 4.2848, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 40.86, |
|
"learning_rate": 1.9182825484764546e-05, |
|
"loss": 4.2841, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 4.192102432250977, |
|
"eval_runtime": 75.6355, |
|
"eval_samples_per_second": 6510.485, |
|
"eval_steps_per_second": 2.129, |
|
"step": 59204 |
|
}, |
|
{ |
|
"epoch": 41.2, |
|
"learning_rate": 1.9175900277008312e-05, |
|
"loss": 4.2807, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 41.55, |
|
"learning_rate": 1.916897506925208e-05, |
|
"loss": 4.2781, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 41.9, |
|
"learning_rate": 1.9162049861495846e-05, |
|
"loss": 4.2765, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 4.186478614807129, |
|
"eval_runtime": 74.8014, |
|
"eval_samples_per_second": 6583.085, |
|
"eval_steps_per_second": 2.152, |
|
"step": 60648 |
|
}, |
|
{ |
|
"epoch": 42.24, |
|
"learning_rate": 1.9155124653739613e-05, |
|
"loss": 4.2787, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 42.59, |
|
"learning_rate": 1.9148199445983384e-05, |
|
"loss": 4.2747, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 42.94, |
|
"learning_rate": 1.9141274238227147e-05, |
|
"loss": 4.2699, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 4.183748245239258, |
|
"eval_runtime": 74.0325, |
|
"eval_samples_per_second": 6651.456, |
|
"eval_steps_per_second": 2.175, |
|
"step": 62092 |
|
}, |
|
{ |
|
"epoch": 43.28, |
|
"learning_rate": 1.9134349030470914e-05, |
|
"loss": 4.2688, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 43.63, |
|
"learning_rate": 1.9127423822714684e-05, |
|
"loss": 4.2662, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 43.98, |
|
"learning_rate": 1.9120498614958448e-05, |
|
"loss": 4.2634, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 4.1764092445373535, |
|
"eval_runtime": 74.4165, |
|
"eval_samples_per_second": 6617.138, |
|
"eval_steps_per_second": 2.163, |
|
"step": 63536 |
|
}, |
|
{ |
|
"epoch": 44.32, |
|
"learning_rate": 1.9113573407202218e-05, |
|
"loss": 4.2594, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 44.67, |
|
"learning_rate": 1.9106648199445985e-05, |
|
"loss": 4.2598, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 4.171951770782471, |
|
"eval_runtime": 73.3834, |
|
"eval_samples_per_second": 6710.29, |
|
"eval_steps_per_second": 2.194, |
|
"step": 64980 |
|
}, |
|
{ |
|
"epoch": 45.01, |
|
"learning_rate": 1.9099722991689752e-05, |
|
"loss": 4.256, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 45.36, |
|
"learning_rate": 1.909279778393352e-05, |
|
"loss": 4.2541, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 45.71, |
|
"learning_rate": 1.9085872576177286e-05, |
|
"loss": 4.2499, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 4.162701606750488, |
|
"eval_runtime": 73.3897, |
|
"eval_samples_per_second": 6709.716, |
|
"eval_steps_per_second": 2.194, |
|
"step": 66424 |
|
}, |
|
{ |
|
"epoch": 46.05, |
|
"learning_rate": 1.9078947368421056e-05, |
|
"loss": 4.2489, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 46.4, |
|
"learning_rate": 1.9072022160664823e-05, |
|
"loss": 4.2488, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 46.75, |
|
"learning_rate": 1.906509695290859e-05, |
|
"loss": 4.2437, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 4.157140254974365, |
|
"eval_runtime": 73.7335, |
|
"eval_samples_per_second": 6678.427, |
|
"eval_steps_per_second": 2.184, |
|
"step": 67868 |
|
}, |
|
{ |
|
"epoch": 47.09, |
|
"learning_rate": 1.9058171745152357e-05, |
|
"loss": 4.2438, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 47.44, |
|
"learning_rate": 1.9051246537396124e-05, |
|
"loss": 4.2436, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 47.78, |
|
"learning_rate": 1.904432132963989e-05, |
|
"loss": 4.24, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 4.155510902404785, |
|
"eval_runtime": 73.7665, |
|
"eval_samples_per_second": 6675.438, |
|
"eval_steps_per_second": 2.183, |
|
"step": 69312 |
|
}, |
|
{ |
|
"epoch": 48.13, |
|
"learning_rate": 1.9037396121883658e-05, |
|
"loss": 4.2359, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 48.48, |
|
"learning_rate": 1.9030470914127425e-05, |
|
"loss": 4.2356, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 48.82, |
|
"learning_rate": 1.9023545706371195e-05, |
|
"loss": 4.2334, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 4.148888111114502, |
|
"eval_runtime": 76.009, |
|
"eval_samples_per_second": 6478.494, |
|
"eval_steps_per_second": 2.118, |
|
"step": 70756 |
|
}, |
|
{ |
|
"epoch": 49.17, |
|
"learning_rate": 1.901662049861496e-05, |
|
"loss": 4.2319, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 49.52, |
|
"learning_rate": 1.900969529085873e-05, |
|
"loss": 4.2288, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 49.86, |
|
"learning_rate": 1.9002770083102496e-05, |
|
"loss": 4.2286, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 4.136005401611328, |
|
"eval_runtime": 74.0583, |
|
"eval_samples_per_second": 6649.141, |
|
"eval_steps_per_second": 2.174, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 50.21, |
|
"learning_rate": 1.8995844875346263e-05, |
|
"loss": 4.2234, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 50.55, |
|
"learning_rate": 1.898891966759003e-05, |
|
"loss": 4.2226, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 50.9, |
|
"learning_rate": 1.8981994459833796e-05, |
|
"loss": 4.2213, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_loss": 4.137174606323242, |
|
"eval_runtime": 74.5156, |
|
"eval_samples_per_second": 6608.338, |
|
"eval_steps_per_second": 2.161, |
|
"step": 73644 |
|
}, |
|
{ |
|
"epoch": 51.25, |
|
"learning_rate": 1.8975069252077563e-05, |
|
"loss": 4.2186, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 51.59, |
|
"learning_rate": 1.896814404432133e-05, |
|
"loss": 4.2182, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 51.94, |
|
"learning_rate": 1.8961218836565097e-05, |
|
"loss": 4.2152, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_loss": 4.127338886260986, |
|
"eval_runtime": 76.201, |
|
"eval_samples_per_second": 6462.173, |
|
"eval_steps_per_second": 2.113, |
|
"step": 75088 |
|
}, |
|
{ |
|
"epoch": 52.29, |
|
"learning_rate": 1.8954293628808867e-05, |
|
"loss": 4.2122, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 52.63, |
|
"learning_rate": 1.894736842105263e-05, |
|
"loss": 4.2135, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 52.98, |
|
"learning_rate": 1.89404432132964e-05, |
|
"loss": 4.211, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_loss": 4.125185489654541, |
|
"eval_runtime": 74.874, |
|
"eval_samples_per_second": 6576.7, |
|
"eval_steps_per_second": 2.15, |
|
"step": 76532 |
|
}, |
|
{ |
|
"epoch": 53.32, |
|
"learning_rate": 1.8933518005540168e-05, |
|
"loss": 4.2079, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 53.67, |
|
"learning_rate": 1.8926592797783935e-05, |
|
"loss": 4.2036, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_loss": 4.116921901702881, |
|
"eval_runtime": 74.7043, |
|
"eval_samples_per_second": 6591.639, |
|
"eval_steps_per_second": 2.155, |
|
"step": 77976 |
|
}, |
|
{ |
|
"epoch": 54.02, |
|
"learning_rate": 1.8919667590027702e-05, |
|
"loss": 4.2046, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 54.36, |
|
"learning_rate": 1.891274238227147e-05, |
|
"loss": 4.201, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 54.71, |
|
"learning_rate": 1.8905817174515236e-05, |
|
"loss": 4.1969, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_loss": 4.117580890655518, |
|
"eval_runtime": 74.4844, |
|
"eval_samples_per_second": 6611.1, |
|
"eval_steps_per_second": 2.162, |
|
"step": 79420 |
|
}, |
|
{ |
|
"epoch": 55.06, |
|
"learning_rate": 1.8898891966759006e-05, |
|
"loss": 4.1987, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 55.4, |
|
"learning_rate": 1.889196675900277e-05, |
|
"loss": 4.1951, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 55.75, |
|
"learning_rate": 1.888504155124654e-05, |
|
"loss": 4.1944, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_loss": 4.110030651092529, |
|
"eval_runtime": 73.4183, |
|
"eval_samples_per_second": 6707.104, |
|
"eval_steps_per_second": 2.193, |
|
"step": 80864 |
|
}, |
|
{ |
|
"epoch": 56.09, |
|
"learning_rate": 1.8878116343490307e-05, |
|
"loss": 4.1924, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 56.44, |
|
"learning_rate": 1.8871191135734074e-05, |
|
"loss": 4.1913, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 56.79, |
|
"learning_rate": 1.886426592797784e-05, |
|
"loss": 4.1923, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_loss": 4.108407974243164, |
|
"eval_runtime": 74.3146, |
|
"eval_samples_per_second": 6626.209, |
|
"eval_steps_per_second": 2.166, |
|
"step": 82308 |
|
}, |
|
{ |
|
"epoch": 57.13, |
|
"learning_rate": 1.8857340720221608e-05, |
|
"loss": 4.1889, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 57.48, |
|
"learning_rate": 1.8850415512465378e-05, |
|
"loss": 4.1856, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 57.83, |
|
"learning_rate": 1.884349030470914e-05, |
|
"loss": 4.1869, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_loss": 4.103403568267822, |
|
"eval_runtime": 75.7605, |
|
"eval_samples_per_second": 6499.745, |
|
"eval_steps_per_second": 2.125, |
|
"step": 83752 |
|
}, |
|
{ |
|
"epoch": 58.17, |
|
"learning_rate": 1.883656509695291e-05, |
|
"loss": 4.181, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 58.52, |
|
"learning_rate": 1.882963988919668e-05, |
|
"loss": 4.1801, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 58.86, |
|
"learning_rate": 1.8822714681440442e-05, |
|
"loss": 4.1802, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_loss": 4.101166248321533, |
|
"eval_runtime": 73.6678, |
|
"eval_samples_per_second": 6684.382, |
|
"eval_steps_per_second": 2.185, |
|
"step": 85196 |
|
}, |
|
{ |
|
"epoch": 59.21, |
|
"learning_rate": 1.8815789473684213e-05, |
|
"loss": 4.1776, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 59.56, |
|
"learning_rate": 1.880886426592798e-05, |
|
"loss": 4.1765, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 59.9, |
|
"learning_rate": 1.8801939058171746e-05, |
|
"loss": 4.1764, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_loss": 4.089655876159668, |
|
"eval_runtime": 74.6724, |
|
"eval_samples_per_second": 6594.462, |
|
"eval_steps_per_second": 2.156, |
|
"step": 86640 |
|
}, |
|
{ |
|
"epoch": 60.25, |
|
"learning_rate": 1.8795013850415513e-05, |
|
"loss": 4.1752, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 60.6, |
|
"learning_rate": 1.878808864265928e-05, |
|
"loss": 4.1747, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 60.94, |
|
"learning_rate": 1.878116343490305e-05, |
|
"loss": 4.1668, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_loss": 4.0857367515563965, |
|
"eval_runtime": 73.6262, |
|
"eval_samples_per_second": 6688.167, |
|
"eval_steps_per_second": 2.187, |
|
"step": 88084 |
|
}, |
|
{ |
|
"epoch": 61.29, |
|
"learning_rate": 1.8774238227146814e-05, |
|
"loss": 4.1695, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 61.63, |
|
"learning_rate": 1.8767313019390584e-05, |
|
"loss": 4.1645, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 61.98, |
|
"learning_rate": 1.876038781163435e-05, |
|
"loss": 4.1633, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_loss": 4.080641269683838, |
|
"eval_runtime": 74.2817, |
|
"eval_samples_per_second": 6629.141, |
|
"eval_steps_per_second": 2.167, |
|
"step": 89528 |
|
}, |
|
{ |
|
"epoch": 62.33, |
|
"learning_rate": 1.8753462603878118e-05, |
|
"loss": 4.1629, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 62.67, |
|
"learning_rate": 1.8746537396121885e-05, |
|
"loss": 4.1631, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_loss": 4.07755708694458, |
|
"eval_runtime": 73.9609, |
|
"eval_samples_per_second": 6657.9, |
|
"eval_steps_per_second": 2.177, |
|
"step": 90972 |
|
}, |
|
{ |
|
"epoch": 63.02, |
|
"learning_rate": 1.8739612188365652e-05, |
|
"loss": 4.1607, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 63.37, |
|
"learning_rate": 1.873268698060942e-05, |
|
"loss": 4.1596, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 63.71, |
|
"learning_rate": 1.872576177285319e-05, |
|
"loss": 4.1559, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_loss": 4.075015068054199, |
|
"eval_runtime": 73.801, |
|
"eval_samples_per_second": 6672.317, |
|
"eval_steps_per_second": 2.182, |
|
"step": 92416 |
|
}, |
|
{ |
|
"epoch": 64.06, |
|
"learning_rate": 1.8718836565096953e-05, |
|
"loss": 4.155, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 64.4, |
|
"learning_rate": 1.8711911357340723e-05, |
|
"loss": 4.1567, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 64.75, |
|
"learning_rate": 1.870498614958449e-05, |
|
"loss": 4.1529, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_loss": 4.0660247802734375, |
|
"eval_runtime": 73.35, |
|
"eval_samples_per_second": 6713.346, |
|
"eval_steps_per_second": 2.195, |
|
"step": 93860 |
|
}, |
|
{ |
|
"epoch": 65.1, |
|
"learning_rate": 1.8698060941828257e-05, |
|
"loss": 4.1521, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 65.44, |
|
"learning_rate": 1.8691135734072024e-05, |
|
"loss": 4.1485, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 65.79, |
|
"learning_rate": 1.868421052631579e-05, |
|
"loss": 4.1485, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_loss": 4.064459800720215, |
|
"eval_runtime": 73.345, |
|
"eval_samples_per_second": 6713.802, |
|
"eval_steps_per_second": 2.195, |
|
"step": 95304 |
|
}, |
|
{ |
|
"epoch": 66.14, |
|
"learning_rate": 1.8677285318559558e-05, |
|
"loss": 4.1469, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 66.48, |
|
"learning_rate": 1.8670360110803325e-05, |
|
"loss": 4.1459, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 66.83, |
|
"learning_rate": 1.866343490304709e-05, |
|
"loss": 4.1431, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_loss": 4.058371067047119, |
|
"eval_runtime": 72.3552, |
|
"eval_samples_per_second": 6805.646, |
|
"eval_steps_per_second": 2.225, |
|
"step": 96748 |
|
}, |
|
{ |
|
"epoch": 67.17, |
|
"learning_rate": 1.8656509695290862e-05, |
|
"loss": 4.143, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 67.52, |
|
"learning_rate": 1.8649584487534625e-05, |
|
"loss": 4.1405, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 67.87, |
|
"learning_rate": 1.8642659279778396e-05, |
|
"loss": 4.1404, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_loss": 4.054144382476807, |
|
"eval_runtime": 73.8095, |
|
"eval_samples_per_second": 6671.555, |
|
"eval_steps_per_second": 2.181, |
|
"step": 98192 |
|
}, |
|
{ |
|
"epoch": 68.21, |
|
"learning_rate": 1.8635734072022163e-05, |
|
"loss": 4.1381, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 68.56, |
|
"learning_rate": 1.862880886426593e-05, |
|
"loss": 4.1343, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 68.91, |
|
"learning_rate": 1.8621883656509697e-05, |
|
"loss": 4.1338, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_loss": 4.050791263580322, |
|
"eval_runtime": 75.7075, |
|
"eval_samples_per_second": 6504.296, |
|
"eval_steps_per_second": 2.127, |
|
"step": 99636 |
|
}, |
|
{ |
|
"epoch": 69.25, |
|
"learning_rate": 1.8614958448753463e-05, |
|
"loss": 4.1313, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 69.6, |
|
"learning_rate": 1.860803324099723e-05, |
|
"loss": 4.132, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 69.94, |
|
"learning_rate": 1.8601108033240997e-05, |
|
"loss": 4.1352, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_loss": 4.043735980987549, |
|
"eval_runtime": 74.0516, |
|
"eval_samples_per_second": 6649.741, |
|
"eval_steps_per_second": 2.174, |
|
"step": 101080 |
|
}, |
|
{ |
|
"epoch": 70.29, |
|
"learning_rate": 1.8594182825484764e-05, |
|
"loss": 4.1307, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 70.64, |
|
"learning_rate": 1.8587257617728535e-05, |
|
"loss": 4.1268, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 70.98, |
|
"learning_rate": 1.85803324099723e-05, |
|
"loss": 4.1307, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_loss": 4.04426908493042, |
|
"eval_runtime": 72.7883, |
|
"eval_samples_per_second": 6765.157, |
|
"eval_steps_per_second": 2.212, |
|
"step": 102524 |
|
}, |
|
{ |
|
"epoch": 71.33, |
|
"learning_rate": 1.857340720221607e-05, |
|
"loss": 4.1277, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 71.68, |
|
"learning_rate": 1.8566481994459835e-05, |
|
"loss": 4.1241, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_loss": 4.041337966918945, |
|
"eval_runtime": 75.6211, |
|
"eval_samples_per_second": 6511.731, |
|
"eval_steps_per_second": 2.129, |
|
"step": 103968 |
|
}, |
|
{ |
|
"epoch": 72.02, |
|
"learning_rate": 1.8559556786703602e-05, |
|
"loss": 4.1256, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 72.37, |
|
"learning_rate": 1.8552631578947373e-05, |
|
"loss": 4.1215, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 72.71, |
|
"learning_rate": 1.8545706371191136e-05, |
|
"loss": 4.1227, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_loss": 4.042394161224365, |
|
"eval_runtime": 75.0493, |
|
"eval_samples_per_second": 6561.341, |
|
"eval_steps_per_second": 2.145, |
|
"step": 105412 |
|
}, |
|
{ |
|
"epoch": 73.06, |
|
"learning_rate": 1.8538781163434903e-05, |
|
"loss": 4.118, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 73.41, |
|
"learning_rate": 1.8531855955678673e-05, |
|
"loss": 4.1166, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 73.75, |
|
"learning_rate": 1.8524930747922437e-05, |
|
"loss": 4.1186, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_loss": 4.030467510223389, |
|
"eval_runtime": 73.3813, |
|
"eval_samples_per_second": 6710.483, |
|
"eval_steps_per_second": 2.194, |
|
"step": 106856 |
|
}, |
|
{ |
|
"epoch": 74.1, |
|
"learning_rate": 1.8518005540166207e-05, |
|
"loss": 4.1165, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 74.45, |
|
"learning_rate": 1.8511080332409974e-05, |
|
"loss": 4.113, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 74.79, |
|
"learning_rate": 1.850415512465374e-05, |
|
"loss": 4.1118, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_loss": 4.033474922180176, |
|
"eval_runtime": 72.6909, |
|
"eval_samples_per_second": 6774.219, |
|
"eval_steps_per_second": 2.215, |
|
"step": 108300 |
|
}, |
|
{ |
|
"epoch": 75.14, |
|
"learning_rate": 1.8497229916897508e-05, |
|
"loss": 4.1129, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 75.48, |
|
"learning_rate": 1.8490304709141275e-05, |
|
"loss": 4.1103, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 75.83, |
|
"learning_rate": 1.8483379501385045e-05, |
|
"loss": 4.1123, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_loss": 4.0271382331848145, |
|
"eval_runtime": 73.3585, |
|
"eval_samples_per_second": 6712.566, |
|
"eval_steps_per_second": 2.195, |
|
"step": 109744 |
|
}, |
|
{ |
|
"epoch": 76.18, |
|
"learning_rate": 1.847645429362881e-05, |
|
"loss": 4.1101, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 76.52, |
|
"learning_rate": 1.846952908587258e-05, |
|
"loss": 4.1042, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 76.87, |
|
"learning_rate": 1.8462603878116346e-05, |
|
"loss": 4.1071, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_loss": 4.022892951965332, |
|
"eval_runtime": 74.3109, |
|
"eval_samples_per_second": 6626.539, |
|
"eval_steps_per_second": 2.167, |
|
"step": 111188 |
|
}, |
|
{ |
|
"epoch": 77.22, |
|
"learning_rate": 1.8455678670360113e-05, |
|
"loss": 4.1012, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 77.56, |
|
"learning_rate": 1.844875346260388e-05, |
|
"loss": 4.101, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 77.91, |
|
"learning_rate": 1.8441828254847647e-05, |
|
"loss": 4.1028, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_loss": 4.022953987121582, |
|
"eval_runtime": 75.1521, |
|
"eval_samples_per_second": 6552.363, |
|
"eval_steps_per_second": 2.142, |
|
"step": 112632 |
|
}, |
|
{ |
|
"epoch": 78.25, |
|
"learning_rate": 1.8434903047091414e-05, |
|
"loss": 4.1002, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 78.6, |
|
"learning_rate": 1.842797783933518e-05, |
|
"loss": 4.1001, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 78.95, |
|
"learning_rate": 1.8421052631578947e-05, |
|
"loss": 4.1002, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_loss": 4.016762733459473, |
|
"eval_runtime": 74.5194, |
|
"eval_samples_per_second": 6607.994, |
|
"eval_steps_per_second": 2.161, |
|
"step": 114076 |
|
}, |
|
{ |
|
"epoch": 79.29, |
|
"learning_rate": 1.8414127423822718e-05, |
|
"loss": 4.0983, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 79.64, |
|
"learning_rate": 1.8407202216066485e-05, |
|
"loss": 4.0972, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 79.99, |
|
"learning_rate": 1.840027700831025e-05, |
|
"loss": 4.0958, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_loss": 4.014660835266113, |
|
"eval_runtime": 74.3599, |
|
"eval_samples_per_second": 6622.175, |
|
"eval_steps_per_second": 2.165, |
|
"step": 115520 |
|
}, |
|
{ |
|
"epoch": 80.33, |
|
"learning_rate": 1.839335180055402e-05, |
|
"loss": 4.0944, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 80.68, |
|
"learning_rate": 1.8386426592797785e-05, |
|
"loss": 4.0919, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_loss": 4.004740238189697, |
|
"eval_runtime": 73.8401, |
|
"eval_samples_per_second": 6668.784, |
|
"eval_steps_per_second": 2.18, |
|
"step": 116964 |
|
}, |
|
{ |
|
"epoch": 81.02, |
|
"learning_rate": 1.8379501385041552e-05, |
|
"loss": 4.091, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 81.37, |
|
"learning_rate": 1.837257617728532e-05, |
|
"loss": 4.0913, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 81.72, |
|
"learning_rate": 1.8365650969529086e-05, |
|
"loss": 4.0891, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_loss": 4.007106781005859, |
|
"eval_runtime": 73.8477, |
|
"eval_samples_per_second": 6668.102, |
|
"eval_steps_per_second": 2.18, |
|
"step": 118408 |
|
}, |
|
{ |
|
"epoch": 82.06, |
|
"learning_rate": 1.8358725761772856e-05, |
|
"loss": 4.0869, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 82.41, |
|
"learning_rate": 1.835180055401662e-05, |
|
"loss": 4.0896, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 82.76, |
|
"learning_rate": 1.834487534626039e-05, |
|
"loss": 4.0902, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_loss": 4.002437591552734, |
|
"eval_runtime": 73.8311, |
|
"eval_samples_per_second": 6669.604, |
|
"eval_steps_per_second": 2.181, |
|
"step": 119852 |
|
}, |
|
{ |
|
"epoch": 83.1, |
|
"learning_rate": 1.8337950138504157e-05, |
|
"loss": 4.0872, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 83.45, |
|
"learning_rate": 1.8331024930747924e-05, |
|
"loss": 4.084, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 83.8, |
|
"learning_rate": 1.832409972299169e-05, |
|
"loss": 4.0846, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_loss": 3.9989962577819824, |
|
"eval_runtime": 74.1606, |
|
"eval_samples_per_second": 6639.967, |
|
"eval_steps_per_second": 2.171, |
|
"step": 121296 |
|
}, |
|
{ |
|
"epoch": 84.14, |
|
"learning_rate": 1.8317174515235458e-05, |
|
"loss": 4.0813, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 84.49, |
|
"learning_rate": 1.8310249307479225e-05, |
|
"loss": 4.0794, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 84.83, |
|
"learning_rate": 1.8303324099722992e-05, |
|
"loss": 4.0785, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_loss": 3.9976210594177246, |
|
"eval_runtime": 74.1372, |
|
"eval_samples_per_second": 6642.06, |
|
"eval_steps_per_second": 2.172, |
|
"step": 122740 |
|
}, |
|
{ |
|
"epoch": 85.18, |
|
"learning_rate": 1.829639889196676e-05, |
|
"loss": 4.0776, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 85.53, |
|
"learning_rate": 1.828947368421053e-05, |
|
"loss": 4.079, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 85.87, |
|
"learning_rate": 1.8282548476454296e-05, |
|
"loss": 4.0788, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_loss": 3.9947969913482666, |
|
"eval_runtime": 74.7164, |
|
"eval_samples_per_second": 6590.579, |
|
"eval_steps_per_second": 2.155, |
|
"step": 124184 |
|
}, |
|
{ |
|
"epoch": 86.22, |
|
"learning_rate": 1.8275623268698063e-05, |
|
"loss": 4.0777, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 86.57, |
|
"learning_rate": 1.826869806094183e-05, |
|
"loss": 4.0768, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 86.91, |
|
"learning_rate": 1.8261772853185597e-05, |
|
"loss": 4.0759, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_loss": 3.9896225929260254, |
|
"eval_runtime": 73.8648, |
|
"eval_samples_per_second": 6666.556, |
|
"eval_steps_per_second": 2.18, |
|
"step": 125628 |
|
}, |
|
{ |
|
"epoch": 87.26, |
|
"learning_rate": 1.8254847645429364e-05, |
|
"loss": 4.0709, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 87.6, |
|
"learning_rate": 1.824792243767313e-05, |
|
"loss": 4.0711, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 87.95, |
|
"learning_rate": 1.82409972299169e-05, |
|
"loss": 4.0728, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_loss": 3.988676071166992, |
|
"eval_runtime": 73.3696, |
|
"eval_samples_per_second": 6711.549, |
|
"eval_steps_per_second": 2.194, |
|
"step": 127072 |
|
}, |
|
{ |
|
"epoch": 88.3, |
|
"learning_rate": 1.8234072022160668e-05, |
|
"loss": 4.0693, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 88.64, |
|
"learning_rate": 1.822714681440443e-05, |
|
"loss": 4.0658, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 88.99, |
|
"learning_rate": 1.82202216066482e-05, |
|
"loss": 4.0668, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_loss": 3.9832513332366943, |
|
"eval_runtime": 73.9518, |
|
"eval_samples_per_second": 6658.718, |
|
"eval_steps_per_second": 2.177, |
|
"step": 128516 |
|
}, |
|
{ |
|
"epoch": 89.34, |
|
"learning_rate": 1.821329639889197e-05, |
|
"loss": 4.0669, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 89.68, |
|
"learning_rate": 1.8206371191135735e-05, |
|
"loss": 4.0669, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_loss": 3.9873828887939453, |
|
"eval_runtime": 75.3146, |
|
"eval_samples_per_second": 6538.224, |
|
"eval_steps_per_second": 2.138, |
|
"step": 129960 |
|
}, |
|
{ |
|
"epoch": 90.03, |
|
"learning_rate": 1.8199445983379502e-05, |
|
"loss": 4.0619, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 90.37, |
|
"learning_rate": 1.819252077562327e-05, |
|
"loss": 4.0611, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 90.72, |
|
"learning_rate": 1.818559556786704e-05, |
|
"loss": 4.0625, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_loss": 3.9783737659454346, |
|
"eval_runtime": 75.884, |
|
"eval_samples_per_second": 6489.166, |
|
"eval_steps_per_second": 2.122, |
|
"step": 131404 |
|
}, |
|
{ |
|
"epoch": 91.07, |
|
"learning_rate": 1.8178670360110803e-05, |
|
"loss": 4.0609, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 91.41, |
|
"learning_rate": 1.8171745152354573e-05, |
|
"loss": 4.0605, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 91.76, |
|
"learning_rate": 1.816481994459834e-05, |
|
"loss": 4.0571, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_loss": 3.976148843765259, |
|
"eval_runtime": 74.7074, |
|
"eval_samples_per_second": 6591.369, |
|
"eval_steps_per_second": 2.155, |
|
"step": 132848 |
|
}, |
|
{ |
|
"epoch": 92.11, |
|
"learning_rate": 1.8157894736842107e-05, |
|
"loss": 4.0565, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 92.45, |
|
"learning_rate": 1.8150969529085874e-05, |
|
"loss": 4.057, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 92.8, |
|
"learning_rate": 1.814404432132964e-05, |
|
"loss": 4.0572, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_loss": 3.9746015071868896, |
|
"eval_runtime": 73.6659, |
|
"eval_samples_per_second": 6684.561, |
|
"eval_steps_per_second": 2.186, |
|
"step": 134292 |
|
}, |
|
{ |
|
"epoch": 93.14, |
|
"learning_rate": 1.8137119113573408e-05, |
|
"loss": 4.0551, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 93.49, |
|
"learning_rate": 1.8130193905817175e-05, |
|
"loss": 4.0562, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 93.84, |
|
"learning_rate": 1.8123268698060942e-05, |
|
"loss": 4.0533, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_loss": 3.971306562423706, |
|
"eval_runtime": 74.5978, |
|
"eval_samples_per_second": 6601.056, |
|
"eval_steps_per_second": 2.158, |
|
"step": 135736 |
|
}, |
|
{ |
|
"epoch": 94.18, |
|
"learning_rate": 1.8116343490304712e-05, |
|
"loss": 4.0537, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 94.53, |
|
"learning_rate": 1.810941828254848e-05, |
|
"loss": 4.0532, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 94.88, |
|
"learning_rate": 1.8102493074792246e-05, |
|
"loss": 4.0508, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_loss": 3.971055507659912, |
|
"eval_runtime": 75.83, |
|
"eval_samples_per_second": 6493.789, |
|
"eval_steps_per_second": 2.123, |
|
"step": 137180 |
|
}, |
|
{ |
|
"epoch": 95.22, |
|
"learning_rate": 1.8095567867036013e-05, |
|
"loss": 4.0504, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 95.57, |
|
"learning_rate": 1.808864265927978e-05, |
|
"loss": 4.0466, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 95.91, |
|
"learning_rate": 1.8081717451523547e-05, |
|
"loss": 4.0487, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_loss": 3.964989185333252, |
|
"eval_runtime": 75.3545, |
|
"eval_samples_per_second": 6534.761, |
|
"eval_steps_per_second": 2.137, |
|
"step": 138624 |
|
}, |
|
{ |
|
"epoch": 96.26, |
|
"learning_rate": 1.8074792243767314e-05, |
|
"loss": 4.0443, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 96.61, |
|
"learning_rate": 1.806786703601108e-05, |
|
"loss": 4.044, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 96.95, |
|
"learning_rate": 1.806094182825485e-05, |
|
"loss": 4.0446, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_loss": 3.9668915271759033, |
|
"eval_runtime": 74.5714, |
|
"eval_samples_per_second": 6603.39, |
|
"eval_steps_per_second": 2.159, |
|
"step": 140068 |
|
}, |
|
{ |
|
"epoch": 97.3, |
|
"learning_rate": 1.8054016620498614e-05, |
|
"loss": 4.046, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 97.65, |
|
"learning_rate": 1.8047091412742385e-05, |
|
"loss": 4.0446, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 97.99, |
|
"learning_rate": 1.804016620498615e-05, |
|
"loss": 4.0432, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_loss": 3.958078145980835, |
|
"eval_runtime": 76.6585, |
|
"eval_samples_per_second": 6423.607, |
|
"eval_steps_per_second": 2.1, |
|
"step": 141512 |
|
}, |
|
{ |
|
"epoch": 98.34, |
|
"learning_rate": 1.803324099722992e-05, |
|
"loss": 4.043, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 98.68, |
|
"learning_rate": 1.8026315789473685e-05, |
|
"loss": 4.0413, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_loss": 3.9584155082702637, |
|
"eval_runtime": 74.9791, |
|
"eval_samples_per_second": 6567.486, |
|
"eval_steps_per_second": 2.147, |
|
"step": 142956 |
|
}, |
|
{ |
|
"epoch": 99.03, |
|
"learning_rate": 1.8019390581717452e-05, |
|
"loss": 4.0386, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 99.38, |
|
"learning_rate": 1.801246537396122e-05, |
|
"loss": 4.0369, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 99.72, |
|
"learning_rate": 1.8005540166204986e-05, |
|
"loss": 4.0354, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 3.9502811431884766, |
|
"eval_runtime": 75.7506, |
|
"eval_samples_per_second": 6500.598, |
|
"eval_steps_per_second": 2.125, |
|
"step": 144400 |
|
}, |
|
{ |
|
"epoch": 100.07, |
|
"learning_rate": 1.7998614958448753e-05, |
|
"loss": 4.0367, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 100.42, |
|
"learning_rate": 1.7991689750692523e-05, |
|
"loss": 4.0353, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 100.76, |
|
"learning_rate": 1.798476454293629e-05, |
|
"loss": 4.0357, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_loss": 3.948287010192871, |
|
"eval_runtime": 75.3101, |
|
"eval_samples_per_second": 6538.622, |
|
"eval_steps_per_second": 2.138, |
|
"step": 145844 |
|
}, |
|
{ |
|
"epoch": 101.11, |
|
"learning_rate": 1.7977839335180057e-05, |
|
"loss": 4.0344, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 101.45, |
|
"learning_rate": 1.7970914127423824e-05, |
|
"loss": 4.0308, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 101.8, |
|
"learning_rate": 1.796398891966759e-05, |
|
"loss": 4.0321, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_loss": 3.9529218673706055, |
|
"eval_runtime": 74.5513, |
|
"eval_samples_per_second": 6605.174, |
|
"eval_steps_per_second": 2.16, |
|
"step": 147288 |
|
}, |
|
{ |
|
"epoch": 102.15, |
|
"learning_rate": 1.7957063711911358e-05, |
|
"loss": 4.0305, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 102.49, |
|
"learning_rate": 1.7950138504155125e-05, |
|
"loss": 4.0291, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 102.84, |
|
"learning_rate": 1.7943213296398895e-05, |
|
"loss": 4.0292, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_loss": 3.9466750621795654, |
|
"eval_runtime": 76.3415, |
|
"eval_samples_per_second": 6450.282, |
|
"eval_steps_per_second": 2.109, |
|
"step": 148732 |
|
}, |
|
{ |
|
"epoch": 103.19, |
|
"learning_rate": 1.7936288088642662e-05, |
|
"loss": 4.0281, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 103.53, |
|
"learning_rate": 1.7929362880886426e-05, |
|
"loss": 4.0281, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 103.88, |
|
"learning_rate": 1.7922437673130196e-05, |
|
"loss": 4.0271, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_loss": 3.9457340240478516, |
|
"eval_runtime": 77.413, |
|
"eval_samples_per_second": 6361.001, |
|
"eval_steps_per_second": 2.08, |
|
"step": 150176 |
|
}, |
|
{ |
|
"epoch": 104.22, |
|
"learning_rate": 1.7915512465373963e-05, |
|
"loss": 4.0259, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 104.57, |
|
"learning_rate": 1.790858725761773e-05, |
|
"loss": 4.0247, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 104.92, |
|
"learning_rate": 1.7901662049861497e-05, |
|
"loss": 4.0245, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_loss": 3.944822311401367, |
|
"eval_runtime": 79.2928, |
|
"eval_samples_per_second": 6210.2, |
|
"eval_steps_per_second": 2.03, |
|
"step": 151620 |
|
}, |
|
{ |
|
"epoch": 105.26, |
|
"learning_rate": 1.7894736842105264e-05, |
|
"loss": 4.0233, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 105.61, |
|
"learning_rate": 1.7887811634349034e-05, |
|
"loss": 4.0232, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 105.96, |
|
"learning_rate": 1.7880886426592798e-05, |
|
"loss": 4.0204, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_loss": 3.9429657459259033, |
|
"eval_runtime": 76.6917, |
|
"eval_samples_per_second": 6420.824, |
|
"eval_steps_per_second": 2.099, |
|
"step": 153064 |
|
}, |
|
{ |
|
"epoch": 106.3, |
|
"learning_rate": 1.7873961218836568e-05, |
|
"loss": 4.0201, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 106.65, |
|
"learning_rate": 1.7867036011080335e-05, |
|
"loss": 4.0191, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 106.99, |
|
"learning_rate": 1.7860110803324102e-05, |
|
"loss": 4.0218, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_loss": 3.9408297538757324, |
|
"eval_runtime": 75.1357, |
|
"eval_samples_per_second": 6553.796, |
|
"eval_steps_per_second": 2.143, |
|
"step": 154508 |
|
}, |
|
{ |
|
"epoch": 107.34, |
|
"learning_rate": 1.785318559556787e-05, |
|
"loss": 4.0232, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 107.69, |
|
"learning_rate": 1.7846260387811636e-05, |
|
"loss": 4.018, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_loss": 3.9399232864379883, |
|
"eval_runtime": 74.43, |
|
"eval_samples_per_second": 6615.938, |
|
"eval_steps_per_second": 2.163, |
|
"step": 155952 |
|
}, |
|
{ |
|
"epoch": 108.03, |
|
"learning_rate": 1.7839335180055402e-05, |
|
"loss": 4.0175, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 108.38, |
|
"learning_rate": 1.783240997229917e-05, |
|
"loss": 4.0165, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 108.73, |
|
"learning_rate": 1.7825484764542936e-05, |
|
"loss": 4.0137, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_loss": 3.935981035232544, |
|
"eval_runtime": 75.1558, |
|
"eval_samples_per_second": 6552.043, |
|
"eval_steps_per_second": 2.142, |
|
"step": 157396 |
|
}, |
|
{ |
|
"epoch": 109.07, |
|
"learning_rate": 1.7818559556786707e-05, |
|
"loss": 4.0159, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 109.42, |
|
"learning_rate": 1.7811634349030474e-05, |
|
"loss": 4.0177, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 109.76, |
|
"learning_rate": 1.780470914127424e-05, |
|
"loss": 4.0114, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_loss": 3.934030771255493, |
|
"eval_runtime": 75.3274, |
|
"eval_samples_per_second": 6537.118, |
|
"eval_steps_per_second": 2.137, |
|
"step": 158840 |
|
}, |
|
{ |
|
"epoch": 110.11, |
|
"learning_rate": 1.7797783933518007e-05, |
|
"loss": 4.011, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 110.46, |
|
"learning_rate": 1.7790858725761774e-05, |
|
"loss": 4.0116, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 110.8, |
|
"learning_rate": 1.778393351800554e-05, |
|
"loss": 4.0122, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_loss": 3.9282586574554443, |
|
"eval_runtime": 75.7551, |
|
"eval_samples_per_second": 6500.211, |
|
"eval_steps_per_second": 2.125, |
|
"step": 160284 |
|
}, |
|
{ |
|
"epoch": 111.15, |
|
"learning_rate": 1.7777008310249308e-05, |
|
"loss": 4.0088, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 111.5, |
|
"learning_rate": 1.7770083102493075e-05, |
|
"loss": 4.0097, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 111.84, |
|
"learning_rate": 1.7763157894736845e-05, |
|
"loss": 4.0093, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_loss": 3.9263861179351807, |
|
"eval_runtime": 77.7083, |
|
"eval_samples_per_second": 6336.824, |
|
"eval_steps_per_second": 2.072, |
|
"step": 161728 |
|
}, |
|
{ |
|
"epoch": 112.19, |
|
"learning_rate": 1.775623268698061e-05, |
|
"loss": 4.0096, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 112.53, |
|
"learning_rate": 1.774930747922438e-05, |
|
"loss": 4.0042, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 112.88, |
|
"learning_rate": 1.7742382271468146e-05, |
|
"loss": 4.0068, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"eval_loss": 3.9213218688964844, |
|
"eval_runtime": 76.9094, |
|
"eval_samples_per_second": 6402.654, |
|
"eval_steps_per_second": 2.093, |
|
"step": 163172 |
|
}, |
|
{ |
|
"epoch": 113.23, |
|
"learning_rate": 1.7735457063711913e-05, |
|
"loss": 4.005, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 113.57, |
|
"learning_rate": 1.772853185595568e-05, |
|
"loss": 4.0043, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 113.92, |
|
"learning_rate": 1.7721606648199447e-05, |
|
"loss": 4.002, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_loss": 3.9210433959960938, |
|
"eval_runtime": 73.884, |
|
"eval_samples_per_second": 6664.824, |
|
"eval_steps_per_second": 2.179, |
|
"step": 164616 |
|
}, |
|
{ |
|
"epoch": 114.27, |
|
"learning_rate": 1.7714681440443214e-05, |
|
"loss": 4.0039, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 114.61, |
|
"learning_rate": 1.770775623268698e-05, |
|
"loss": 4.0012, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 114.96, |
|
"learning_rate": 1.7700831024930748e-05, |
|
"loss": 4.0013, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_loss": 3.9188640117645264, |
|
"eval_runtime": 74.0122, |
|
"eval_samples_per_second": 6653.285, |
|
"eval_steps_per_second": 2.175, |
|
"step": 166060 |
|
}, |
|
{ |
|
"epoch": 115.3, |
|
"learning_rate": 1.7693905817174518e-05, |
|
"loss": 4.0011, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 115.65, |
|
"learning_rate": 1.7686980609418285e-05, |
|
"loss": 4.0031, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"learning_rate": 1.7680055401662052e-05, |
|
"loss": 3.9978, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_loss": 3.920518398284912, |
|
"eval_runtime": 74.6861, |
|
"eval_samples_per_second": 6593.245, |
|
"eval_steps_per_second": 2.156, |
|
"step": 167504 |
|
}, |
|
{ |
|
"epoch": 116.34, |
|
"learning_rate": 1.767313019390582e-05, |
|
"loss": 3.9936, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 116.69, |
|
"learning_rate": 1.7666204986149586e-05, |
|
"loss": 3.9978, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"eval_loss": 3.918830394744873, |
|
"eval_runtime": 74.6329, |
|
"eval_samples_per_second": 6597.945, |
|
"eval_steps_per_second": 2.157, |
|
"step": 168948 |
|
}, |
|
{ |
|
"epoch": 117.04, |
|
"learning_rate": 1.7659279778393353e-05, |
|
"loss": 3.9979, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 117.38, |
|
"learning_rate": 1.765235457063712e-05, |
|
"loss": 3.9972, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 117.73, |
|
"learning_rate": 1.764542936288089e-05, |
|
"loss": 3.9966, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_loss": 3.914811611175537, |
|
"eval_runtime": 74.0729, |
|
"eval_samples_per_second": 6647.825, |
|
"eval_steps_per_second": 2.174, |
|
"step": 170392 |
|
}, |
|
{ |
|
"epoch": 118.07, |
|
"learning_rate": 1.7638504155124657e-05, |
|
"loss": 3.9952, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 118.42, |
|
"learning_rate": 1.763157894736842e-05, |
|
"loss": 3.9942, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 118.77, |
|
"learning_rate": 1.762465373961219e-05, |
|
"loss": 3.9923, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"eval_loss": 3.915539503097534, |
|
"eval_runtime": 77.3659, |
|
"eval_samples_per_second": 6364.87, |
|
"eval_steps_per_second": 2.081, |
|
"step": 171836 |
|
}, |
|
{ |
|
"epoch": 119.11, |
|
"learning_rate": 1.7617728531855957e-05, |
|
"loss": 3.9941, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 119.46, |
|
"learning_rate": 1.7610803324099724e-05, |
|
"loss": 3.9925, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 119.81, |
|
"learning_rate": 1.760387811634349e-05, |
|
"loss": 3.9901, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_loss": 3.9078481197357178, |
|
"eval_runtime": 73.0002, |
|
"eval_samples_per_second": 6745.514, |
|
"eval_steps_per_second": 2.205, |
|
"step": 173280 |
|
}, |
|
{ |
|
"epoch": 120.15, |
|
"learning_rate": 1.7596952908587258e-05, |
|
"loss": 3.9908, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 120.5, |
|
"learning_rate": 1.759002770083103e-05, |
|
"loss": 3.9875, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 120.84, |
|
"learning_rate": 1.7583102493074792e-05, |
|
"loss": 3.989, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"eval_loss": 3.9121310710906982, |
|
"eval_runtime": 73.6203, |
|
"eval_samples_per_second": 6688.695, |
|
"eval_steps_per_second": 2.187, |
|
"step": 174724 |
|
}, |
|
{ |
|
"epoch": 121.19, |
|
"learning_rate": 1.7576177285318562e-05, |
|
"loss": 3.987, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 121.54, |
|
"learning_rate": 1.756925207756233e-05, |
|
"loss": 3.991, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 121.88, |
|
"learning_rate": 1.7562326869806096e-05, |
|
"loss": 3.9876, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_loss": 3.9086544513702393, |
|
"eval_runtime": 74.7783, |
|
"eval_samples_per_second": 6585.118, |
|
"eval_steps_per_second": 2.153, |
|
"step": 176168 |
|
}, |
|
{ |
|
"epoch": 122.23, |
|
"learning_rate": 1.7555401662049863e-05, |
|
"loss": 3.9863, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 122.58, |
|
"learning_rate": 1.754847645429363e-05, |
|
"loss": 3.9866, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 122.92, |
|
"learning_rate": 1.7541551246537397e-05, |
|
"loss": 3.9856, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"eval_loss": 3.9068329334259033, |
|
"eval_runtime": 74.5275, |
|
"eval_samples_per_second": 6607.276, |
|
"eval_steps_per_second": 2.16, |
|
"step": 177612 |
|
}, |
|
{ |
|
"epoch": 123.27, |
|
"learning_rate": 1.7534626038781164e-05, |
|
"loss": 3.9857, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 123.61, |
|
"learning_rate": 1.752770083102493e-05, |
|
"loss": 3.9853, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 123.96, |
|
"learning_rate": 1.75207756232687e-05, |
|
"loss": 3.9834, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_loss": 3.90472412109375, |
|
"eval_runtime": 74.8567, |
|
"eval_samples_per_second": 6578.224, |
|
"eval_steps_per_second": 2.151, |
|
"step": 179056 |
|
}, |
|
{ |
|
"epoch": 124.31, |
|
"learning_rate": 1.7513850415512468e-05, |
|
"loss": 3.9833, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 124.65, |
|
"learning_rate": 1.7506925207756235e-05, |
|
"loss": 3.9812, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 1.7500000000000002e-05, |
|
"loss": 3.9786, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_loss": 3.89973783493042, |
|
"eval_runtime": 73.8136, |
|
"eval_samples_per_second": 6671.181, |
|
"eval_steps_per_second": 2.181, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 125.35, |
|
"learning_rate": 1.749307479224377e-05, |
|
"loss": 3.9793, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 125.69, |
|
"learning_rate": 1.7486149584487536e-05, |
|
"loss": 3.9788, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_loss": 3.900991201400757, |
|
"eval_runtime": 76.4593, |
|
"eval_samples_per_second": 6440.341, |
|
"eval_steps_per_second": 2.106, |
|
"step": 181944 |
|
}, |
|
{ |
|
"epoch": 126.04, |
|
"learning_rate": 1.7479224376731303e-05, |
|
"loss": 3.98, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 126.39, |
|
"learning_rate": 1.747229916897507e-05, |
|
"loss": 3.979, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 126.73, |
|
"learning_rate": 1.746537396121884e-05, |
|
"loss": 3.9782, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"eval_loss": 3.898531436920166, |
|
"eval_runtime": 73.137, |
|
"eval_samples_per_second": 6732.899, |
|
"eval_steps_per_second": 2.201, |
|
"step": 183388 |
|
}, |
|
{ |
|
"epoch": 127.08, |
|
"learning_rate": 1.7458448753462603e-05, |
|
"loss": 3.9772, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 127.42, |
|
"learning_rate": 1.7451523545706374e-05, |
|
"loss": 3.9759, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 127.77, |
|
"learning_rate": 1.744459833795014e-05, |
|
"loss": 3.9733, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_loss": 3.892979145050049, |
|
"eval_runtime": 74.6554, |
|
"eval_samples_per_second": 6595.957, |
|
"eval_steps_per_second": 2.157, |
|
"step": 184832 |
|
}, |
|
{ |
|
"epoch": 128.12, |
|
"learning_rate": 1.7437673130193908e-05, |
|
"loss": 3.9761, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 128.46, |
|
"learning_rate": 1.7430747922437674e-05, |
|
"loss": 3.9733, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 128.81, |
|
"learning_rate": 1.742382271468144e-05, |
|
"loss": 3.9765, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"eval_loss": 3.8959548473358154, |
|
"eval_runtime": 72.9991, |
|
"eval_samples_per_second": 6745.614, |
|
"eval_steps_per_second": 2.206, |
|
"step": 186276 |
|
}, |
|
{ |
|
"epoch": 129.16, |
|
"learning_rate": 1.741689750692521e-05, |
|
"loss": 3.9713, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 129.5, |
|
"learning_rate": 1.7409972299168975e-05, |
|
"loss": 3.9714, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 129.85, |
|
"learning_rate": 1.7403047091412742e-05, |
|
"loss": 3.9733, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_loss": 3.8933827877044678, |
|
"eval_runtime": 75.3723, |
|
"eval_samples_per_second": 6533.221, |
|
"eval_steps_per_second": 2.136, |
|
"step": 187720 |
|
}, |
|
{ |
|
"epoch": 130.19, |
|
"learning_rate": 1.7396121883656512e-05, |
|
"loss": 3.9725, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 130.54, |
|
"learning_rate": 1.738919667590028e-05, |
|
"loss": 3.9698, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 130.89, |
|
"learning_rate": 1.7382271468144046e-05, |
|
"loss": 3.9695, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"eval_loss": 3.888564348220825, |
|
"eval_runtime": 76.8687, |
|
"eval_samples_per_second": 6406.041, |
|
"eval_steps_per_second": 2.094, |
|
"step": 189164 |
|
}, |
|
{ |
|
"epoch": 131.23, |
|
"learning_rate": 1.7375346260387813e-05, |
|
"loss": 3.9677, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 131.58, |
|
"learning_rate": 1.736842105263158e-05, |
|
"loss": 3.9709, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 131.93, |
|
"learning_rate": 1.7361495844875347e-05, |
|
"loss": 3.9708, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_loss": 3.889085054397583, |
|
"eval_runtime": 73.3155, |
|
"eval_samples_per_second": 6716.506, |
|
"eval_steps_per_second": 2.196, |
|
"step": 190608 |
|
}, |
|
{ |
|
"epoch": 132.27, |
|
"learning_rate": 1.7354570637119114e-05, |
|
"loss": 3.9635, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 132.62, |
|
"learning_rate": 1.7347645429362884e-05, |
|
"loss": 3.9658, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 132.96, |
|
"learning_rate": 1.734072022160665e-05, |
|
"loss": 3.9673, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"eval_loss": 3.885761022567749, |
|
"eval_runtime": 72.7481, |
|
"eval_samples_per_second": 6768.893, |
|
"eval_steps_per_second": 2.213, |
|
"step": 192052 |
|
}, |
|
{ |
|
"epoch": 133.31, |
|
"learning_rate": 1.7333795013850415e-05, |
|
"loss": 3.9664, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 133.66, |
|
"learning_rate": 1.7326869806094185e-05, |
|
"loss": 3.9635, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"eval_loss": 3.8835644721984863, |
|
"eval_runtime": 72.9182, |
|
"eval_samples_per_second": 6753.106, |
|
"eval_steps_per_second": 2.208, |
|
"step": 193496 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"learning_rate": 1.7319944598337952e-05, |
|
"loss": 3.9667, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 134.35, |
|
"learning_rate": 1.731301939058172e-05, |
|
"loss": 3.9627, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 134.7, |
|
"learning_rate": 1.7306094182825486e-05, |
|
"loss": 3.962, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"eval_loss": 3.8817522525787354, |
|
"eval_runtime": 74.832, |
|
"eval_samples_per_second": 6580.394, |
|
"eval_steps_per_second": 2.151, |
|
"step": 194940 |
|
}, |
|
{ |
|
"epoch": 135.04, |
|
"learning_rate": 1.7299168975069253e-05, |
|
"loss": 3.9648, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 135.39, |
|
"learning_rate": 1.7292243767313023e-05, |
|
"loss": 3.9619, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 135.73, |
|
"learning_rate": 1.7285318559556787e-05, |
|
"loss": 3.9635, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_loss": 3.8839354515075684, |
|
"eval_runtime": 75.8115, |
|
"eval_samples_per_second": 6495.378, |
|
"eval_steps_per_second": 2.124, |
|
"step": 196384 |
|
}, |
|
{ |
|
"epoch": 136.08, |
|
"learning_rate": 1.7278393351800557e-05, |
|
"loss": 3.9585, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 136.43, |
|
"learning_rate": 1.7271468144044324e-05, |
|
"loss": 3.9597, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 136.77, |
|
"learning_rate": 1.7264542936288087e-05, |
|
"loss": 3.9601, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"eval_loss": 3.8804266452789307, |
|
"eval_runtime": 75.6839, |
|
"eval_samples_per_second": 6506.321, |
|
"eval_steps_per_second": 2.127, |
|
"step": 197828 |
|
}, |
|
{ |
|
"epoch": 137.12, |
|
"learning_rate": 1.7257617728531858e-05, |
|
"loss": 3.9609, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 137.47, |
|
"learning_rate": 1.7250692520775625e-05, |
|
"loss": 3.956, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 137.81, |
|
"learning_rate": 1.724376731301939e-05, |
|
"loss": 3.9593, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"eval_loss": 3.879089117050171, |
|
"eval_runtime": 74.4504, |
|
"eval_samples_per_second": 6614.122, |
|
"eval_steps_per_second": 2.163, |
|
"step": 199272 |
|
}, |
|
{ |
|
"epoch": 138.16, |
|
"learning_rate": 1.723684210526316e-05, |
|
"loss": 3.9569, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 138.5, |
|
"learning_rate": 1.7229916897506925e-05, |
|
"loss": 3.9541, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 138.85, |
|
"learning_rate": 1.7222991689750696e-05, |
|
"loss": 3.9566, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"eval_loss": 3.8803153038024902, |
|
"eval_runtime": 77.8029, |
|
"eval_samples_per_second": 6329.123, |
|
"eval_steps_per_second": 2.069, |
|
"step": 200716 |
|
}, |
|
{ |
|
"epoch": 139.2, |
|
"learning_rate": 1.7216066481994462e-05, |
|
"loss": 3.956, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 139.54, |
|
"learning_rate": 1.720914127423823e-05, |
|
"loss": 3.9517, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 139.89, |
|
"learning_rate": 1.7202216066481996e-05, |
|
"loss": 3.9535, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_loss": 3.872063159942627, |
|
"eval_runtime": 75.6675, |
|
"eval_samples_per_second": 6507.738, |
|
"eval_steps_per_second": 2.128, |
|
"step": 202160 |
|
}, |
|
{ |
|
"epoch": 140.24, |
|
"learning_rate": 1.7195290858725763e-05, |
|
"loss": 3.9534, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 140.58, |
|
"learning_rate": 1.718836565096953e-05, |
|
"loss": 3.9534, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 140.93, |
|
"learning_rate": 1.7181440443213297e-05, |
|
"loss": 3.9525, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"eval_loss": 3.8730010986328125, |
|
"eval_runtime": 73.7321, |
|
"eval_samples_per_second": 6678.556, |
|
"eval_steps_per_second": 2.184, |
|
"step": 203604 |
|
}, |
|
{ |
|
"epoch": 141.27, |
|
"learning_rate": 1.7174515235457064e-05, |
|
"loss": 3.9519, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 141.62, |
|
"learning_rate": 1.7167590027700834e-05, |
|
"loss": 3.9488, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 141.97, |
|
"learning_rate": 1.7160664819944598e-05, |
|
"loss": 3.9515, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"eval_loss": 3.870518207550049, |
|
"eval_runtime": 74.6993, |
|
"eval_samples_per_second": 6592.087, |
|
"eval_steps_per_second": 2.155, |
|
"step": 205048 |
|
}, |
|
{ |
|
"epoch": 142.31, |
|
"learning_rate": 1.7153739612188368e-05, |
|
"loss": 3.9499, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 142.66, |
|
"learning_rate": 1.7146814404432135e-05, |
|
"loss": 3.9488, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"eval_loss": 3.868440628051758, |
|
"eval_runtime": 73.8028, |
|
"eval_samples_per_second": 6672.162, |
|
"eval_steps_per_second": 2.181, |
|
"step": 206492 |
|
}, |
|
{ |
|
"epoch": 143.01, |
|
"learning_rate": 1.7139889196675902e-05, |
|
"loss": 3.9525, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 143.35, |
|
"learning_rate": 1.713296398891967e-05, |
|
"loss": 3.9484, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 143.7, |
|
"learning_rate": 1.7126038781163436e-05, |
|
"loss": 3.944, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_loss": 3.8690621852874756, |
|
"eval_runtime": 74.7082, |
|
"eval_samples_per_second": 6591.3, |
|
"eval_steps_per_second": 2.155, |
|
"step": 207936 |
|
}, |
|
{ |
|
"epoch": 144.04, |
|
"learning_rate": 1.7119113573407206e-05, |
|
"loss": 3.9512, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 144.39, |
|
"learning_rate": 1.711218836565097e-05, |
|
"loss": 3.9497, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 144.74, |
|
"learning_rate": 1.7105263157894737e-05, |
|
"loss": 3.9455, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"eval_loss": 3.8670220375061035, |
|
"eval_runtime": 74.3832, |
|
"eval_samples_per_second": 6620.095, |
|
"eval_steps_per_second": 2.164, |
|
"step": 209380 |
|
}, |
|
{ |
|
"epoch": 145.08, |
|
"learning_rate": 1.7098337950138507e-05, |
|
"loss": 3.9445, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 145.43, |
|
"learning_rate": 1.709141274238227e-05, |
|
"loss": 3.945, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 145.78, |
|
"learning_rate": 1.708448753462604e-05, |
|
"loss": 3.9439, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"eval_loss": 3.865227699279785, |
|
"eval_runtime": 76.1017, |
|
"eval_samples_per_second": 6470.6, |
|
"eval_steps_per_second": 2.116, |
|
"step": 210824 |
|
}, |
|
{ |
|
"epoch": 146.12, |
|
"learning_rate": 1.7077562326869808e-05, |
|
"loss": 3.9444, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 146.47, |
|
"learning_rate": 1.7070637119113575e-05, |
|
"loss": 3.9402, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 146.81, |
|
"learning_rate": 1.706371191135734e-05, |
|
"loss": 3.9431, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"eval_loss": 3.8667104244232178, |
|
"eval_runtime": 74.0634, |
|
"eval_samples_per_second": 6648.686, |
|
"eval_steps_per_second": 2.174, |
|
"step": 212268 |
|
}, |
|
{ |
|
"epoch": 147.16, |
|
"learning_rate": 1.705678670360111e-05, |
|
"loss": 3.9421, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 147.51, |
|
"learning_rate": 1.704986149584488e-05, |
|
"loss": 3.9406, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 147.85, |
|
"learning_rate": 1.7042936288088646e-05, |
|
"loss": 3.9422, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_loss": 3.8651583194732666, |
|
"eval_runtime": 74.2586, |
|
"eval_samples_per_second": 6631.208, |
|
"eval_steps_per_second": 2.168, |
|
"step": 213712 |
|
}, |
|
{ |
|
"epoch": 148.2, |
|
"learning_rate": 1.703601108033241e-05, |
|
"loss": 3.9379, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 148.55, |
|
"learning_rate": 1.702908587257618e-05, |
|
"loss": 3.9402, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 148.89, |
|
"learning_rate": 1.7022160664819946e-05, |
|
"loss": 3.9389, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"eval_loss": 3.8652150630950928, |
|
"eval_runtime": 74.5415, |
|
"eval_samples_per_second": 6606.034, |
|
"eval_steps_per_second": 2.16, |
|
"step": 215156 |
|
}, |
|
{ |
|
"epoch": 149.24, |
|
"learning_rate": 1.7015235457063713e-05, |
|
"loss": 3.9387, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 149.58, |
|
"learning_rate": 1.700831024930748e-05, |
|
"loss": 3.9394, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 149.93, |
|
"learning_rate": 1.7001385041551247e-05, |
|
"loss": 3.9387, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_loss": 3.8585476875305176, |
|
"eval_runtime": 75.2231, |
|
"eval_samples_per_second": 6546.184, |
|
"eval_steps_per_second": 2.14, |
|
"step": 216600 |
|
}, |
|
{ |
|
"epoch": 150.28, |
|
"learning_rate": 1.6994459833795017e-05, |
|
"loss": 3.9363, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 150.62, |
|
"learning_rate": 1.698753462603878e-05, |
|
"loss": 3.9406, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 150.97, |
|
"learning_rate": 1.698060941828255e-05, |
|
"loss": 3.9375, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"eval_loss": 3.8584671020507812, |
|
"eval_runtime": 75.4049, |
|
"eval_samples_per_second": 6530.4, |
|
"eval_steps_per_second": 2.135, |
|
"step": 218044 |
|
}, |
|
{ |
|
"epoch": 151.32, |
|
"learning_rate": 1.6973684210526318e-05, |
|
"loss": 3.9365, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 151.66, |
|
"learning_rate": 1.6966759002770085e-05, |
|
"loss": 3.9365, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_loss": 3.8540356159210205, |
|
"eval_runtime": 72.8254, |
|
"eval_samples_per_second": 6761.71, |
|
"eval_steps_per_second": 2.211, |
|
"step": 219488 |
|
}, |
|
{ |
|
"epoch": 152.01, |
|
"learning_rate": 1.6959833795013852e-05, |
|
"loss": 3.9354, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 152.35, |
|
"learning_rate": 1.695290858725762e-05, |
|
"loss": 3.9347, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 152.7, |
|
"learning_rate": 1.6945983379501386e-05, |
|
"loss": 3.9313, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"eval_loss": 3.853323459625244, |
|
"eval_runtime": 74.3001, |
|
"eval_samples_per_second": 6627.5, |
|
"eval_steps_per_second": 2.167, |
|
"step": 220932 |
|
}, |
|
{ |
|
"epoch": 153.05, |
|
"learning_rate": 1.6939058171745153e-05, |
|
"loss": 3.9335, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 153.39, |
|
"learning_rate": 1.693213296398892e-05, |
|
"loss": 3.932, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 153.74, |
|
"learning_rate": 1.692520775623269e-05, |
|
"loss": 3.9287, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"eval_loss": 3.853574275970459, |
|
"eval_runtime": 74.3871, |
|
"eval_samples_per_second": 6619.754, |
|
"eval_steps_per_second": 2.164, |
|
"step": 222376 |
|
}, |
|
{ |
|
"epoch": 154.09, |
|
"learning_rate": 1.6918282548476454e-05, |
|
"loss": 3.935, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 154.43, |
|
"learning_rate": 1.6911357340720224e-05, |
|
"loss": 3.9323, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 154.78, |
|
"learning_rate": 1.690443213296399e-05, |
|
"loss": 3.9304, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"eval_loss": 3.854647636413574, |
|
"eval_runtime": 75.0693, |
|
"eval_samples_per_second": 6559.592, |
|
"eval_steps_per_second": 2.145, |
|
"step": 223820 |
|
}, |
|
{ |
|
"epoch": 155.12, |
|
"learning_rate": 1.6897506925207758e-05, |
|
"loss": 3.9285, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 155.47, |
|
"learning_rate": 1.6890581717451525e-05, |
|
"loss": 3.927, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 155.82, |
|
"learning_rate": 1.688365650969529e-05, |
|
"loss": 3.9304, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_loss": 3.8501617908477783, |
|
"eval_runtime": 75.8197, |
|
"eval_samples_per_second": 6494.673, |
|
"eval_steps_per_second": 2.123, |
|
"step": 225264 |
|
}, |
|
{ |
|
"epoch": 156.16, |
|
"learning_rate": 1.687673130193906e-05, |
|
"loss": 3.9299, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 156.51, |
|
"learning_rate": 1.686980609418283e-05, |
|
"loss": 3.9282, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 156.86, |
|
"learning_rate": 1.6862880886426592e-05, |
|
"loss": 3.9257, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"eval_loss": 3.8476805686950684, |
|
"eval_runtime": 73.4445, |
|
"eval_samples_per_second": 6704.713, |
|
"eval_steps_per_second": 2.192, |
|
"step": 226708 |
|
}, |
|
{ |
|
"epoch": 157.2, |
|
"learning_rate": 1.6855955678670363e-05, |
|
"loss": 3.9263, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 157.55, |
|
"learning_rate": 1.684903047091413e-05, |
|
"loss": 3.9247, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 157.89, |
|
"learning_rate": 1.6842105263157896e-05, |
|
"loss": 3.9242, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"eval_loss": 3.8495914936065674, |
|
"eval_runtime": 74.4187, |
|
"eval_samples_per_second": 6616.942, |
|
"eval_steps_per_second": 2.163, |
|
"step": 228152 |
|
}, |
|
{ |
|
"epoch": 158.24, |
|
"learning_rate": 1.6835180055401663e-05, |
|
"loss": 3.9235, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 158.59, |
|
"learning_rate": 1.682825484764543e-05, |
|
"loss": 3.9275, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 158.93, |
|
"learning_rate": 1.68213296398892e-05, |
|
"loss": 3.9219, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"eval_loss": 3.850391387939453, |
|
"eval_runtime": 74.2125, |
|
"eval_samples_per_second": 6635.322, |
|
"eval_steps_per_second": 2.169, |
|
"step": 229596 |
|
}, |
|
{ |
|
"epoch": 159.28, |
|
"learning_rate": 1.6814404432132964e-05, |
|
"loss": 3.9203, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 159.63, |
|
"learning_rate": 1.680747922437673e-05, |
|
"loss": 3.9239, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 159.97, |
|
"learning_rate": 1.68005540166205e-05, |
|
"loss": 3.9236, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_loss": 3.8478267192840576, |
|
"eval_runtime": 74.9917, |
|
"eval_samples_per_second": 6566.379, |
|
"eval_steps_per_second": 2.147, |
|
"step": 231040 |
|
}, |
|
{ |
|
"epoch": 160.32, |
|
"learning_rate": 1.6793628808864265e-05, |
|
"loss": 3.9238, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 160.66, |
|
"learning_rate": 1.6786703601108035e-05, |
|
"loss": 3.9222, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"eval_loss": 3.8385064601898193, |
|
"eval_runtime": 74.3809, |
|
"eval_samples_per_second": 6620.298, |
|
"eval_steps_per_second": 2.165, |
|
"step": 232484 |
|
}, |
|
{ |
|
"epoch": 161.01, |
|
"learning_rate": 1.6779778393351802e-05, |
|
"loss": 3.92, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 161.36, |
|
"learning_rate": 1.677285318559557e-05, |
|
"loss": 3.9184, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 161.7, |
|
"learning_rate": 1.6765927977839336e-05, |
|
"loss": 3.9231, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"eval_loss": 3.844747543334961, |
|
"eval_runtime": 73.4727, |
|
"eval_samples_per_second": 6702.138, |
|
"eval_steps_per_second": 2.191, |
|
"step": 233928 |
|
}, |
|
{ |
|
"epoch": 162.05, |
|
"learning_rate": 1.6759002770083103e-05, |
|
"loss": 3.9186, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 162.4, |
|
"learning_rate": 1.6752077562326873e-05, |
|
"loss": 3.9207, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 162.74, |
|
"learning_rate": 1.674515235457064e-05, |
|
"loss": 3.9161, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"eval_loss": 3.839341640472412, |
|
"eval_runtime": 74.6904, |
|
"eval_samples_per_second": 6592.87, |
|
"eval_steps_per_second": 2.156, |
|
"step": 235372 |
|
}, |
|
{ |
|
"epoch": 163.09, |
|
"learning_rate": 1.6738227146814404e-05, |
|
"loss": 3.9223, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 163.43, |
|
"learning_rate": 1.6731301939058174e-05, |
|
"loss": 3.9151, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 163.78, |
|
"learning_rate": 1.672437673130194e-05, |
|
"loss": 3.918, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_loss": 3.8450632095336914, |
|
"eval_runtime": 74.8435, |
|
"eval_samples_per_second": 6579.385, |
|
"eval_steps_per_second": 2.151, |
|
"step": 236816 |
|
}, |
|
{ |
|
"epoch": 164.13, |
|
"learning_rate": 1.6717451523545708e-05, |
|
"loss": 3.9184, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 164.47, |
|
"learning_rate": 1.6710526315789475e-05, |
|
"loss": 3.9175, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 164.82, |
|
"learning_rate": 1.670360110803324e-05, |
|
"loss": 3.9153, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"eval_loss": 3.8364455699920654, |
|
"eval_runtime": 75.2179, |
|
"eval_samples_per_second": 6546.636, |
|
"eval_steps_per_second": 2.14, |
|
"step": 238260 |
|
}, |
|
{ |
|
"epoch": 165.17, |
|
"learning_rate": 1.6696675900277012e-05, |
|
"loss": 3.9151, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 165.51, |
|
"learning_rate": 1.6689750692520775e-05, |
|
"loss": 3.9139, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 165.86, |
|
"learning_rate": 1.6682825484764546e-05, |
|
"loss": 3.9158, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"eval_loss": 3.8352112770080566, |
|
"eval_runtime": 74.8807, |
|
"eval_samples_per_second": 6576.117, |
|
"eval_steps_per_second": 2.15, |
|
"step": 239704 |
|
}, |
|
{ |
|
"epoch": 166.2, |
|
"learning_rate": 1.6675900277008313e-05, |
|
"loss": 3.9151, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 166.55, |
|
"learning_rate": 1.666897506925208e-05, |
|
"loss": 3.9146, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 166.9, |
|
"learning_rate": 1.6662049861495847e-05, |
|
"loss": 3.9138, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"eval_loss": 3.835671901702881, |
|
"eval_runtime": 74.7206, |
|
"eval_samples_per_second": 6590.203, |
|
"eval_steps_per_second": 2.155, |
|
"step": 241148 |
|
}, |
|
{ |
|
"epoch": 167.24, |
|
"learning_rate": 1.6655124653739613e-05, |
|
"loss": 3.9147, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 167.59, |
|
"learning_rate": 1.664819944598338e-05, |
|
"loss": 3.9128, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 167.94, |
|
"learning_rate": 1.6641274238227147e-05, |
|
"loss": 3.9135, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_loss": 3.829854726791382, |
|
"eval_runtime": 72.8907, |
|
"eval_samples_per_second": 6755.652, |
|
"eval_steps_per_second": 2.209, |
|
"step": 242592 |
|
}, |
|
{ |
|
"epoch": 168.28, |
|
"learning_rate": 1.6634349030470914e-05, |
|
"loss": 3.9116, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 168.63, |
|
"learning_rate": 1.6627423822714685e-05, |
|
"loss": 3.9104, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 168.98, |
|
"learning_rate": 1.6620498614958448e-05, |
|
"loss": 3.9086, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 169.0, |
|
"eval_loss": 3.83186411857605, |
|
"eval_runtime": 73.8836, |
|
"eval_samples_per_second": 6664.86, |
|
"eval_steps_per_second": 2.179, |
|
"step": 244036 |
|
}, |
|
{ |
|
"epoch": 169.32, |
|
"learning_rate": 1.661357340720222e-05, |
|
"loss": 3.9102, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 169.67, |
|
"learning_rate": 1.6606648199445985e-05, |
|
"loss": 3.9093, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 170.0, |
|
"eval_loss": 3.831186532974243, |
|
"eval_runtime": 75.6023, |
|
"eval_samples_per_second": 6513.346, |
|
"eval_steps_per_second": 2.13, |
|
"step": 245480 |
|
}, |
|
{ |
|
"epoch": 170.01, |
|
"learning_rate": 1.6599722991689752e-05, |
|
"loss": 3.9092, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 170.36, |
|
"learning_rate": 1.659279778393352e-05, |
|
"loss": 3.9083, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 170.71, |
|
"learning_rate": 1.6585872576177286e-05, |
|
"loss": 3.9087, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 171.0, |
|
"eval_loss": 3.8296992778778076, |
|
"eval_runtime": 74.0581, |
|
"eval_samples_per_second": 6649.158, |
|
"eval_steps_per_second": 2.174, |
|
"step": 246924 |
|
}, |
|
{ |
|
"epoch": 171.05, |
|
"learning_rate": 1.6578947368421053e-05, |
|
"loss": 3.9097, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 171.4, |
|
"learning_rate": 1.6572022160664823e-05, |
|
"loss": 3.9103, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 171.75, |
|
"learning_rate": 1.6565096952908587e-05, |
|
"loss": 3.9078, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_loss": 3.8280537128448486, |
|
"eval_runtime": 74.6178, |
|
"eval_samples_per_second": 6599.284, |
|
"eval_steps_per_second": 2.158, |
|
"step": 248368 |
|
}, |
|
{ |
|
"epoch": 172.09, |
|
"learning_rate": 1.6558171745152357e-05, |
|
"loss": 3.9065, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 172.44, |
|
"learning_rate": 1.6551246537396124e-05, |
|
"loss": 3.9052, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 172.78, |
|
"learning_rate": 1.654432132963989e-05, |
|
"loss": 3.9052, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 173.0, |
|
"eval_loss": 3.828386068344116, |
|
"eval_runtime": 74.5947, |
|
"eval_samples_per_second": 6601.323, |
|
"eval_steps_per_second": 2.158, |
|
"step": 249812 |
|
}, |
|
{ |
|
"epoch": 173.13, |
|
"learning_rate": 1.6537396121883658e-05, |
|
"loss": 3.9037, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 173.48, |
|
"learning_rate": 1.6530470914127425e-05, |
|
"loss": 3.9046, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 173.82, |
|
"learning_rate": 1.6523545706371195e-05, |
|
"loss": 3.9059, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 174.0, |
|
"eval_loss": 3.82259464263916, |
|
"eval_runtime": 75.1329, |
|
"eval_samples_per_second": 6554.041, |
|
"eval_steps_per_second": 2.143, |
|
"step": 251256 |
|
}, |
|
{ |
|
"epoch": 174.17, |
|
"learning_rate": 1.651662049861496e-05, |
|
"loss": 3.9044, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 174.52, |
|
"learning_rate": 1.6509695290858726e-05, |
|
"loss": 3.9041, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 174.86, |
|
"learning_rate": 1.6502770083102496e-05, |
|
"loss": 3.9006, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"eval_loss": 3.8257510662078857, |
|
"eval_runtime": 75.0788, |
|
"eval_samples_per_second": 6558.759, |
|
"eval_steps_per_second": 2.144, |
|
"step": 252700 |
|
}, |
|
{ |
|
"epoch": 175.21, |
|
"learning_rate": 1.649584487534626e-05, |
|
"loss": 3.9025, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 175.55, |
|
"learning_rate": 1.648891966759003e-05, |
|
"loss": 3.9015, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 175.9, |
|
"learning_rate": 1.6481994459833797e-05, |
|
"loss": 3.9046, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_loss": 3.8246407508850098, |
|
"eval_runtime": 76.1781, |
|
"eval_samples_per_second": 6464.119, |
|
"eval_steps_per_second": 2.113, |
|
"step": 254144 |
|
}, |
|
{ |
|
"epoch": 176.25, |
|
"learning_rate": 1.6475069252077564e-05, |
|
"loss": 3.9034, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 176.59, |
|
"learning_rate": 1.646814404432133e-05, |
|
"loss": 3.8996, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 176.94, |
|
"learning_rate": 1.6461218836565097e-05, |
|
"loss": 3.9027, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 177.0, |
|
"eval_loss": 3.81931471824646, |
|
"eval_runtime": 74.6152, |
|
"eval_samples_per_second": 6599.512, |
|
"eval_steps_per_second": 2.158, |
|
"step": 255588 |
|
}, |
|
{ |
|
"epoch": 177.29, |
|
"learning_rate": 1.6454293628808868e-05, |
|
"loss": 3.9021, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 177.63, |
|
"learning_rate": 1.644736842105263e-05, |
|
"loss": 3.8989, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 177.98, |
|
"learning_rate": 1.6440443213296398e-05, |
|
"loss": 3.9024, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 178.0, |
|
"eval_loss": 3.819705009460449, |
|
"eval_runtime": 75.4024, |
|
"eval_samples_per_second": 6530.618, |
|
"eval_steps_per_second": 2.135, |
|
"step": 257032 |
|
}, |
|
{ |
|
"epoch": 178.32, |
|
"learning_rate": 1.643351800554017e-05, |
|
"loss": 3.9012, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 178.67, |
|
"learning_rate": 1.6426592797783935e-05, |
|
"loss": 3.9007, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 179.0, |
|
"eval_loss": 3.817988634109497, |
|
"eval_runtime": 74.2631, |
|
"eval_samples_per_second": 6630.806, |
|
"eval_steps_per_second": 2.168, |
|
"step": 258476 |
|
}, |
|
{ |
|
"epoch": 179.02, |
|
"learning_rate": 1.6419667590027702e-05, |
|
"loss": 3.8962, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 179.36, |
|
"learning_rate": 1.641274238227147e-05, |
|
"loss": 3.9005, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 179.71, |
|
"learning_rate": 1.6405817174515236e-05, |
|
"loss": 3.897, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_loss": 3.8208701610565186, |
|
"eval_runtime": 75.7163, |
|
"eval_samples_per_second": 6503.537, |
|
"eval_steps_per_second": 2.126, |
|
"step": 259920 |
|
}, |
|
{ |
|
"epoch": 180.06, |
|
"learning_rate": 1.6398891966759006e-05, |
|
"loss": 3.9019, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 180.4, |
|
"learning_rate": 1.639196675900277e-05, |
|
"loss": 3.8984, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 180.75, |
|
"learning_rate": 1.638504155124654e-05, |
|
"loss": 3.8967, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 181.0, |
|
"eval_loss": 3.814295768737793, |
|
"eval_runtime": 74.2326, |
|
"eval_samples_per_second": 6633.527, |
|
"eval_steps_per_second": 2.169, |
|
"step": 261364 |
|
}, |
|
{ |
|
"epoch": 181.09, |
|
"learning_rate": 1.6378116343490307e-05, |
|
"loss": 3.8996, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 181.44, |
|
"learning_rate": 1.6371191135734074e-05, |
|
"loss": 3.8951, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 181.79, |
|
"learning_rate": 1.636426592797784e-05, |
|
"loss": 3.8978, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 182.0, |
|
"eval_loss": 3.816028118133545, |
|
"eval_runtime": 74.7425, |
|
"eval_samples_per_second": 6588.271, |
|
"eval_steps_per_second": 2.154, |
|
"step": 262808 |
|
}, |
|
{ |
|
"epoch": 182.13, |
|
"learning_rate": 1.6357340720221608e-05, |
|
"loss": 3.896, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 182.48, |
|
"learning_rate": 1.6350415512465375e-05, |
|
"loss": 3.8943, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 182.83, |
|
"learning_rate": 1.6343490304709142e-05, |
|
"loss": 3.894, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 183.0, |
|
"eval_loss": 3.8133652210235596, |
|
"eval_runtime": 76.198, |
|
"eval_samples_per_second": 6462.426, |
|
"eval_steps_per_second": 2.113, |
|
"step": 264252 |
|
}, |
|
{ |
|
"epoch": 183.17, |
|
"learning_rate": 1.633656509695291e-05, |
|
"loss": 3.8947, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 183.52, |
|
"learning_rate": 1.632963988919668e-05, |
|
"loss": 3.8949, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 183.86, |
|
"learning_rate": 1.6322714681440443e-05, |
|
"loss": 3.8919, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_loss": 3.8209118843078613, |
|
"eval_runtime": 74.4007, |
|
"eval_samples_per_second": 6618.538, |
|
"eval_steps_per_second": 2.164, |
|
"step": 265696 |
|
}, |
|
{ |
|
"epoch": 184.21, |
|
"learning_rate": 1.6315789473684213e-05, |
|
"loss": 3.8918, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 184.56, |
|
"learning_rate": 1.630886426592798e-05, |
|
"loss": 3.893, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 184.9, |
|
"learning_rate": 1.6301939058171747e-05, |
|
"loss": 3.8915, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 185.0, |
|
"eval_loss": 3.812711477279663, |
|
"eval_runtime": 74.8088, |
|
"eval_samples_per_second": 6582.437, |
|
"eval_steps_per_second": 2.152, |
|
"step": 267140 |
|
}, |
|
{ |
|
"epoch": 185.25, |
|
"learning_rate": 1.6295013850415514e-05, |
|
"loss": 3.8911, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 185.6, |
|
"learning_rate": 1.628808864265928e-05, |
|
"loss": 3.8878, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 185.94, |
|
"learning_rate": 1.6281163434903047e-05, |
|
"loss": 3.8909, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 186.0, |
|
"eval_loss": 3.8139772415161133, |
|
"eval_runtime": 73.8157, |
|
"eval_samples_per_second": 6670.991, |
|
"eval_steps_per_second": 2.181, |
|
"step": 268584 |
|
}, |
|
{ |
|
"epoch": 186.29, |
|
"learning_rate": 1.6274238227146814e-05, |
|
"loss": 3.8906, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 186.63, |
|
"learning_rate": 1.626731301939058e-05, |
|
"loss": 3.8891, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 186.98, |
|
"learning_rate": 1.626038781163435e-05, |
|
"loss": 3.8894, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 187.0, |
|
"eval_loss": 3.811793327331543, |
|
"eval_runtime": 76.5559, |
|
"eval_samples_per_second": 6432.215, |
|
"eval_steps_per_second": 2.103, |
|
"step": 270028 |
|
}, |
|
{ |
|
"epoch": 187.33, |
|
"learning_rate": 1.625346260387812e-05, |
|
"loss": 3.8857, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 187.67, |
|
"learning_rate": 1.6246537396121885e-05, |
|
"loss": 3.886, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"eval_loss": 3.812798500061035, |
|
"eval_runtime": 73.9286, |
|
"eval_samples_per_second": 6660.807, |
|
"eval_steps_per_second": 2.178, |
|
"step": 271472 |
|
}, |
|
{ |
|
"epoch": 188.02, |
|
"learning_rate": 1.6239612188365652e-05, |
|
"loss": 3.8875, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 188.37, |
|
"learning_rate": 1.623268698060942e-05, |
|
"loss": 3.8906, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 188.71, |
|
"learning_rate": 1.622576177285319e-05, |
|
"loss": 3.8868, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 189.0, |
|
"eval_loss": 3.808310031890869, |
|
"eval_runtime": 74.6, |
|
"eval_samples_per_second": 6600.861, |
|
"eval_steps_per_second": 2.158, |
|
"step": 272916 |
|
}, |
|
{ |
|
"epoch": 189.06, |
|
"learning_rate": 1.6218836565096953e-05, |
|
"loss": 3.8846, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 189.4, |
|
"learning_rate": 1.621191135734072e-05, |
|
"loss": 3.8873, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 189.75, |
|
"learning_rate": 1.620498614958449e-05, |
|
"loss": 3.8827, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 190.0, |
|
"eval_loss": 3.8040313720703125, |
|
"eval_runtime": 73.574, |
|
"eval_samples_per_second": 6692.91, |
|
"eval_steps_per_second": 2.188, |
|
"step": 274360 |
|
}, |
|
{ |
|
"epoch": 190.1, |
|
"learning_rate": 1.6198060941828254e-05, |
|
"loss": 3.888, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 190.44, |
|
"learning_rate": 1.6191135734072024e-05, |
|
"loss": 3.8879, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 190.79, |
|
"learning_rate": 1.618421052631579e-05, |
|
"loss": 3.8854, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 191.0, |
|
"eval_loss": 3.8076326847076416, |
|
"eval_runtime": 73.5075, |
|
"eval_samples_per_second": 6698.963, |
|
"eval_steps_per_second": 2.19, |
|
"step": 275804 |
|
}, |
|
{ |
|
"epoch": 191.14, |
|
"learning_rate": 1.6177285318559558e-05, |
|
"loss": 3.8866, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 191.48, |
|
"learning_rate": 1.6170360110803325e-05, |
|
"loss": 3.8838, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 191.83, |
|
"learning_rate": 1.6163434903047092e-05, |
|
"loss": 3.8814, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_loss": 3.8077051639556885, |
|
"eval_runtime": 74.0447, |
|
"eval_samples_per_second": 6650.364, |
|
"eval_steps_per_second": 2.174, |
|
"step": 277248 |
|
}, |
|
{ |
|
"epoch": 192.17, |
|
"learning_rate": 1.6156509695290862e-05, |
|
"loss": 3.8826, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 192.52, |
|
"learning_rate": 1.6149584487534626e-05, |
|
"loss": 3.8837, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 192.87, |
|
"learning_rate": 1.6142659279778396e-05, |
|
"loss": 3.8825, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 193.0, |
|
"eval_loss": 3.8101003170013428, |
|
"eval_runtime": 74.4578, |
|
"eval_samples_per_second": 6613.463, |
|
"eval_steps_per_second": 2.162, |
|
"step": 278692 |
|
}, |
|
{ |
|
"epoch": 193.21, |
|
"learning_rate": 1.6135734072022163e-05, |
|
"loss": 3.8808, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 193.56, |
|
"learning_rate": 1.612880886426593e-05, |
|
"loss": 3.8832, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 193.91, |
|
"learning_rate": 1.6121883656509697e-05, |
|
"loss": 3.8824, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 194.0, |
|
"eval_loss": 3.8035974502563477, |
|
"eval_runtime": 73.2562, |
|
"eval_samples_per_second": 6721.947, |
|
"eval_steps_per_second": 2.198, |
|
"step": 280136 |
|
}, |
|
{ |
|
"epoch": 194.25, |
|
"learning_rate": 1.6114958448753464e-05, |
|
"loss": 3.8806, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 194.6, |
|
"learning_rate": 1.610803324099723e-05, |
|
"loss": 3.8783, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 194.94, |
|
"learning_rate": 1.6101108033240998e-05, |
|
"loss": 3.8784, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 195.0, |
|
"eval_loss": 3.8016698360443115, |
|
"eval_runtime": 75.1162, |
|
"eval_samples_per_second": 6555.495, |
|
"eval_steps_per_second": 2.143, |
|
"step": 281580 |
|
}, |
|
{ |
|
"epoch": 195.29, |
|
"learning_rate": 1.6094182825484764e-05, |
|
"loss": 3.8792, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 195.64, |
|
"learning_rate": 1.6087257617728535e-05, |
|
"loss": 3.8828, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 195.98, |
|
"learning_rate": 1.60803324099723e-05, |
|
"loss": 3.8796, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"eval_loss": 3.8056745529174805, |
|
"eval_runtime": 74.4209, |
|
"eval_samples_per_second": 6616.747, |
|
"eval_steps_per_second": 2.163, |
|
"step": 283024 |
|
}, |
|
{ |
|
"epoch": 196.33, |
|
"learning_rate": 1.607340720221607e-05, |
|
"loss": 3.8769, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 196.68, |
|
"learning_rate": 1.6066481994459835e-05, |
|
"loss": 3.8801, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 197.0, |
|
"eval_loss": 3.8007116317749023, |
|
"eval_runtime": 77.2912, |
|
"eval_samples_per_second": 6371.024, |
|
"eval_steps_per_second": 2.083, |
|
"step": 284468 |
|
}, |
|
{ |
|
"epoch": 197.02, |
|
"learning_rate": 1.6059556786703602e-05, |
|
"loss": 3.8765, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 197.37, |
|
"learning_rate": 1.605263157894737e-05, |
|
"loss": 3.8789, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 197.71, |
|
"learning_rate": 1.6045706371191136e-05, |
|
"loss": 3.8787, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 198.0, |
|
"eval_loss": 3.7981278896331787, |
|
"eval_runtime": 74.6743, |
|
"eval_samples_per_second": 6594.29, |
|
"eval_steps_per_second": 2.156, |
|
"step": 285912 |
|
}, |
|
{ |
|
"epoch": 198.06, |
|
"learning_rate": 1.6038781163434903e-05, |
|
"loss": 3.8782, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 198.41, |
|
"learning_rate": 1.6031855955678673e-05, |
|
"loss": 3.8791, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 198.75, |
|
"learning_rate": 1.6024930747922437e-05, |
|
"loss": 3.876, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 199.0, |
|
"eval_loss": 3.800157070159912, |
|
"eval_runtime": 74.4204, |
|
"eval_samples_per_second": 6616.785, |
|
"eval_steps_per_second": 2.163, |
|
"step": 287356 |
|
}, |
|
{ |
|
"epoch": 199.1, |
|
"learning_rate": 1.6018005540166207e-05, |
|
"loss": 3.8771, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 199.45, |
|
"learning_rate": 1.6011080332409974e-05, |
|
"loss": 3.8753, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 199.79, |
|
"learning_rate": 1.600415512465374e-05, |
|
"loss": 3.8739, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_loss": 3.801537036895752, |
|
"eval_runtime": 73.7536, |
|
"eval_samples_per_second": 6676.611, |
|
"eval_steps_per_second": 2.183, |
|
"step": 288800 |
|
}, |
|
{ |
|
"epoch": 200.14, |
|
"learning_rate": 1.5997229916897508e-05, |
|
"loss": 3.8727, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 200.48, |
|
"learning_rate": 1.5990304709141275e-05, |
|
"loss": 3.8757, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 200.83, |
|
"learning_rate": 1.5983379501385042e-05, |
|
"loss": 3.8718, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 201.0, |
|
"eval_loss": 3.7952911853790283, |
|
"eval_runtime": 79.0161, |
|
"eval_samples_per_second": 6231.949, |
|
"eval_steps_per_second": 2.038, |
|
"step": 290244 |
|
}, |
|
{ |
|
"epoch": 201.18, |
|
"learning_rate": 1.597645429362881e-05, |
|
"loss": 3.8703, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 201.52, |
|
"learning_rate": 1.5969529085872576e-05, |
|
"loss": 3.8742, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 201.87, |
|
"learning_rate": 1.5962603878116346e-05, |
|
"loss": 3.8728, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 202.0, |
|
"eval_loss": 3.794914484024048, |
|
"eval_runtime": 73.3296, |
|
"eval_samples_per_second": 6715.218, |
|
"eval_steps_per_second": 2.196, |
|
"step": 291688 |
|
}, |
|
{ |
|
"epoch": 202.22, |
|
"learning_rate": 1.5955678670360113e-05, |
|
"loss": 3.8716, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 202.56, |
|
"learning_rate": 1.594875346260388e-05, |
|
"loss": 3.8721, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 202.91, |
|
"learning_rate": 1.5941828254847647e-05, |
|
"loss": 3.871, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 203.0, |
|
"eval_loss": 3.791077136993408, |
|
"eval_runtime": 75.7759, |
|
"eval_samples_per_second": 6498.426, |
|
"eval_steps_per_second": 2.125, |
|
"step": 293132 |
|
}, |
|
{ |
|
"epoch": 203.25, |
|
"learning_rate": 1.5934903047091414e-05, |
|
"loss": 3.8718, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 203.6, |
|
"learning_rate": 1.592797783933518e-05, |
|
"loss": 3.8714, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 203.95, |
|
"learning_rate": 1.5921052631578948e-05, |
|
"loss": 3.8686, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 204.0, |
|
"eval_loss": 3.791466474533081, |
|
"eval_runtime": 76.174, |
|
"eval_samples_per_second": 6464.462, |
|
"eval_steps_per_second": 2.114, |
|
"step": 294576 |
|
}, |
|
{ |
|
"epoch": 204.29, |
|
"learning_rate": 1.5914127423822714e-05, |
|
"loss": 3.8699, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 204.64, |
|
"learning_rate": 1.5907202216066485e-05, |
|
"loss": 3.8704, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 204.99, |
|
"learning_rate": 1.590027700831025e-05, |
|
"loss": 3.8727, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 205.0, |
|
"eval_loss": 3.7933382987976074, |
|
"eval_runtime": 75.2857, |
|
"eval_samples_per_second": 6540.737, |
|
"eval_steps_per_second": 2.139, |
|
"step": 296020 |
|
}, |
|
{ |
|
"epoch": 205.33, |
|
"learning_rate": 1.589335180055402e-05, |
|
"loss": 3.8698, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 205.68, |
|
"learning_rate": 1.5886426592797786e-05, |
|
"loss": 3.8697, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 206.0, |
|
"eval_loss": 3.792696952819824, |
|
"eval_runtime": 77.0496, |
|
"eval_samples_per_second": 6390.998, |
|
"eval_steps_per_second": 2.09, |
|
"step": 297464 |
|
}, |
|
{ |
|
"epoch": 206.02, |
|
"learning_rate": 1.5879501385041552e-05, |
|
"loss": 3.8676, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 206.37, |
|
"learning_rate": 1.587257617728532e-05, |
|
"loss": 3.8685, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 206.72, |
|
"learning_rate": 1.5865650969529086e-05, |
|
"loss": 3.8684, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 207.0, |
|
"eval_loss": 3.792511224746704, |
|
"eval_runtime": 72.6673, |
|
"eval_samples_per_second": 6776.414, |
|
"eval_steps_per_second": 2.216, |
|
"step": 298908 |
|
}, |
|
{ |
|
"epoch": 207.06, |
|
"learning_rate": 1.5858725761772857e-05, |
|
"loss": 3.8679, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 207.41, |
|
"learning_rate": 1.585180055401662e-05, |
|
"loss": 3.8687, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 207.76, |
|
"learning_rate": 1.584487534626039e-05, |
|
"loss": 3.8681, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"eval_loss": 3.790055990219116, |
|
"eval_runtime": 74.2758, |
|
"eval_samples_per_second": 6629.67, |
|
"eval_steps_per_second": 2.168, |
|
"step": 300352 |
|
}, |
|
{ |
|
"epoch": 208.1, |
|
"learning_rate": 1.5837950138504157e-05, |
|
"loss": 3.8691, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 208.45, |
|
"learning_rate": 1.5831024930747924e-05, |
|
"loss": 3.8646, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 208.8, |
|
"learning_rate": 1.582409972299169e-05, |
|
"loss": 3.8665, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 209.0, |
|
"eval_loss": 3.7894084453582764, |
|
"eval_runtime": 74.4843, |
|
"eval_samples_per_second": 6611.112, |
|
"eval_steps_per_second": 2.162, |
|
"step": 301796 |
|
}, |
|
{ |
|
"epoch": 209.14, |
|
"learning_rate": 1.5817174515235458e-05, |
|
"loss": 3.8649, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 209.49, |
|
"learning_rate": 1.5810249307479225e-05, |
|
"loss": 3.8624, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 209.83, |
|
"learning_rate": 1.5803324099722992e-05, |
|
"loss": 3.868, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 210.0, |
|
"eval_loss": 3.7948224544525146, |
|
"eval_runtime": 77.5312, |
|
"eval_samples_per_second": 6351.3, |
|
"eval_steps_per_second": 2.077, |
|
"step": 303240 |
|
}, |
|
{ |
|
"epoch": 210.18, |
|
"learning_rate": 1.579639889196676e-05, |
|
"loss": 3.8675, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 210.53, |
|
"learning_rate": 1.578947368421053e-05, |
|
"loss": 3.8625, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 210.87, |
|
"learning_rate": 1.5782548476454296e-05, |
|
"loss": 3.8626, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 211.0, |
|
"eval_loss": 3.7883081436157227, |
|
"eval_runtime": 74.8996, |
|
"eval_samples_per_second": 6574.459, |
|
"eval_steps_per_second": 2.15, |
|
"step": 304684 |
|
}, |
|
{ |
|
"epoch": 211.22, |
|
"learning_rate": 1.5775623268698063e-05, |
|
"loss": 3.8637, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 211.57, |
|
"learning_rate": 1.576869806094183e-05, |
|
"loss": 3.864, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 211.91, |
|
"learning_rate": 1.5761772853185597e-05, |
|
"loss": 3.8627, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 212.0, |
|
"eval_loss": 3.7848432064056396, |
|
"eval_runtime": 74.92, |
|
"eval_samples_per_second": 6572.667, |
|
"eval_steps_per_second": 2.149, |
|
"step": 306128 |
|
}, |
|
{ |
|
"epoch": 212.26, |
|
"learning_rate": 1.5754847645429364e-05, |
|
"loss": 3.8658, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 212.6, |
|
"learning_rate": 1.574792243767313e-05, |
|
"loss": 3.8599, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 212.95, |
|
"learning_rate": 1.5740997229916898e-05, |
|
"loss": 3.865, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 213.0, |
|
"eval_loss": 3.787092924118042, |
|
"eval_runtime": 75.2395, |
|
"eval_samples_per_second": 6544.757, |
|
"eval_steps_per_second": 2.14, |
|
"step": 307572 |
|
}, |
|
{ |
|
"epoch": 213.3, |
|
"learning_rate": 1.5734072022160668e-05, |
|
"loss": 3.861, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 213.64, |
|
"learning_rate": 1.572714681440443e-05, |
|
"loss": 3.8631, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 213.99, |
|
"learning_rate": 1.5720221606648202e-05, |
|
"loss": 3.8607, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 214.0, |
|
"eval_loss": 3.785153865814209, |
|
"eval_runtime": 74.7564, |
|
"eval_samples_per_second": 6587.052, |
|
"eval_steps_per_second": 2.154, |
|
"step": 309016 |
|
}, |
|
{ |
|
"epoch": 214.34, |
|
"learning_rate": 1.571329639889197e-05, |
|
"loss": 3.8591, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 214.68, |
|
"learning_rate": 1.5706371191135736e-05, |
|
"loss": 3.8609, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 215.0, |
|
"eval_loss": 3.788590431213379, |
|
"eval_runtime": 76.5904, |
|
"eval_samples_per_second": 6429.315, |
|
"eval_steps_per_second": 2.102, |
|
"step": 310460 |
|
}, |
|
{ |
|
"epoch": 215.03, |
|
"learning_rate": 1.5699445983379503e-05, |
|
"loss": 3.8604, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 215.37, |
|
"learning_rate": 1.569252077562327e-05, |
|
"loss": 3.8577, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 215.72, |
|
"learning_rate": 1.5685595567867036e-05, |
|
"loss": 3.8599, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 216.0, |
|
"eval_loss": 3.7840237617492676, |
|
"eval_runtime": 74.4828, |
|
"eval_samples_per_second": 6611.243, |
|
"eval_steps_per_second": 2.162, |
|
"step": 311904 |
|
}, |
|
{ |
|
"epoch": 216.07, |
|
"learning_rate": 1.5678670360110803e-05, |
|
"loss": 3.8602, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 216.41, |
|
"learning_rate": 1.567174515235457e-05, |
|
"loss": 3.8573, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 216.76, |
|
"learning_rate": 1.566481994459834e-05, |
|
"loss": 3.8599, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 217.0, |
|
"eval_loss": 3.785320281982422, |
|
"eval_runtime": 74.9359, |
|
"eval_samples_per_second": 6571.268, |
|
"eval_steps_per_second": 2.149, |
|
"step": 313348 |
|
}, |
|
{ |
|
"epoch": 217.11, |
|
"learning_rate": 1.5657894736842107e-05, |
|
"loss": 3.859, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 217.45, |
|
"learning_rate": 1.5650969529085874e-05, |
|
"loss": 3.8593, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 217.8, |
|
"learning_rate": 1.564404432132964e-05, |
|
"loss": 3.8573, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 218.0, |
|
"eval_loss": 3.7822437286376953, |
|
"eval_runtime": 74.2352, |
|
"eval_samples_per_second": 6633.293, |
|
"eval_steps_per_second": 2.169, |
|
"step": 314792 |
|
}, |
|
{ |
|
"epoch": 218.14, |
|
"learning_rate": 1.5637119113573408e-05, |
|
"loss": 3.8571, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 218.49, |
|
"learning_rate": 1.5630193905817175e-05, |
|
"loss": 3.8573, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 218.84, |
|
"learning_rate": 1.5623268698060942e-05, |
|
"loss": 3.856, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 219.0, |
|
"eval_loss": 3.7809722423553467, |
|
"eval_runtime": 75.1168, |
|
"eval_samples_per_second": 6555.443, |
|
"eval_steps_per_second": 2.143, |
|
"step": 316236 |
|
}, |
|
{ |
|
"epoch": 219.18, |
|
"learning_rate": 1.561634349030471e-05, |
|
"loss": 3.857, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 219.53, |
|
"learning_rate": 1.560941828254848e-05, |
|
"loss": 3.8534, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 219.88, |
|
"learning_rate": 1.5602493074792243e-05, |
|
"loss": 3.8546, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 220.0, |
|
"eval_loss": 3.775569200515747, |
|
"eval_runtime": 74.0328, |
|
"eval_samples_per_second": 6651.431, |
|
"eval_steps_per_second": 2.175, |
|
"step": 317680 |
|
}, |
|
{ |
|
"epoch": 220.22, |
|
"learning_rate": 1.5595567867036013e-05, |
|
"loss": 3.8552, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 220.57, |
|
"learning_rate": 1.558864265927978e-05, |
|
"loss": 3.8551, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 220.91, |
|
"learning_rate": 1.5581717451523547e-05, |
|
"loss": 3.856, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 221.0, |
|
"eval_loss": 3.7812447547912598, |
|
"eval_runtime": 77.0452, |
|
"eval_samples_per_second": 6391.363, |
|
"eval_steps_per_second": 2.09, |
|
"step": 319124 |
|
}, |
|
{ |
|
"epoch": 221.26, |
|
"learning_rate": 1.5574792243767314e-05, |
|
"loss": 3.8573, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 221.61, |
|
"learning_rate": 1.556786703601108e-05, |
|
"loss": 3.8548, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 221.95, |
|
"learning_rate": 1.556094182825485e-05, |
|
"loss": 3.8558, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 222.0, |
|
"eval_loss": 3.781160593032837, |
|
"eval_runtime": 77.0908, |
|
"eval_samples_per_second": 6387.583, |
|
"eval_steps_per_second": 2.088, |
|
"step": 320568 |
|
}, |
|
{ |
|
"epoch": 222.3, |
|
"learning_rate": 1.5554016620498615e-05, |
|
"loss": 3.852, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 222.65, |
|
"learning_rate": 1.5547091412742385e-05, |
|
"loss": 3.8537, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 222.99, |
|
"learning_rate": 1.5540166204986152e-05, |
|
"loss": 3.8515, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 223.0, |
|
"eval_loss": 3.7702577114105225, |
|
"eval_runtime": 75.5669, |
|
"eval_samples_per_second": 6516.401, |
|
"eval_steps_per_second": 2.131, |
|
"step": 322012 |
|
}, |
|
{ |
|
"epoch": 223.34, |
|
"learning_rate": 1.553324099722992e-05, |
|
"loss": 3.852, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 223.68, |
|
"learning_rate": 1.5526315789473686e-05, |
|
"loss": 3.8495, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"eval_loss": 3.772334575653076, |
|
"eval_runtime": 75.2529, |
|
"eval_samples_per_second": 6543.584, |
|
"eval_steps_per_second": 2.139, |
|
"step": 323456 |
|
}, |
|
{ |
|
"epoch": 224.03, |
|
"learning_rate": 1.5519390581717453e-05, |
|
"loss": 3.854, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 224.38, |
|
"learning_rate": 1.551246537396122e-05, |
|
"loss": 3.851, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 224.72, |
|
"learning_rate": 1.5505540166204986e-05, |
|
"loss": 3.8516, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 225.0, |
|
"eval_loss": 3.774179458618164, |
|
"eval_runtime": 75.9135, |
|
"eval_samples_per_second": 6486.645, |
|
"eval_steps_per_second": 2.121, |
|
"step": 324900 |
|
}, |
|
{ |
|
"epoch": 225.07, |
|
"learning_rate": 1.5498614958448753e-05, |
|
"loss": 3.8517, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 225.42, |
|
"learning_rate": 1.5491689750692524e-05, |
|
"loss": 3.8512, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 225.76, |
|
"learning_rate": 1.548476454293629e-05, |
|
"loss": 3.85, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 226.0, |
|
"eval_loss": 3.775554656982422, |
|
"eval_runtime": 74.3917, |
|
"eval_samples_per_second": 6619.337, |
|
"eval_steps_per_second": 2.164, |
|
"step": 326344 |
|
}, |
|
{ |
|
"epoch": 226.11, |
|
"learning_rate": 1.5477839335180058e-05, |
|
"loss": 3.85, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 226.45, |
|
"learning_rate": 1.5470914127423824e-05, |
|
"loss": 3.8516, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 226.8, |
|
"learning_rate": 1.546398891966759e-05, |
|
"loss": 3.8494, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 227.0, |
|
"eval_loss": 3.7784576416015625, |
|
"eval_runtime": 74.4719, |
|
"eval_samples_per_second": 6612.215, |
|
"eval_steps_per_second": 2.162, |
|
"step": 327788 |
|
}, |
|
{ |
|
"epoch": 227.15, |
|
"learning_rate": 1.5457063711911358e-05, |
|
"loss": 3.8482, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 227.49, |
|
"learning_rate": 1.5450138504155125e-05, |
|
"loss": 3.8501, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 227.84, |
|
"learning_rate": 1.5443213296398892e-05, |
|
"loss": 3.8519, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 228.0, |
|
"eval_loss": 3.771754741668701, |
|
"eval_runtime": 74.1616, |
|
"eval_samples_per_second": 6639.877, |
|
"eval_steps_per_second": 2.171, |
|
"step": 329232 |
|
}, |
|
{ |
|
"epoch": 228.19, |
|
"learning_rate": 1.5436288088642662e-05, |
|
"loss": 3.8472, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 228.53, |
|
"learning_rate": 1.5429362880886426e-05, |
|
"loss": 3.8508, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 228.88, |
|
"learning_rate": 1.5422437673130196e-05, |
|
"loss": 3.8471, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 229.0, |
|
"eval_loss": 3.7720766067504883, |
|
"eval_runtime": 74.897, |
|
"eval_samples_per_second": 6574.686, |
|
"eval_steps_per_second": 2.15, |
|
"step": 330676 |
|
}, |
|
{ |
|
"epoch": 229.22, |
|
"learning_rate": 1.5415512465373963e-05, |
|
"loss": 3.8494, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 229.57, |
|
"learning_rate": 1.540858725761773e-05, |
|
"loss": 3.8497, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 229.92, |
|
"learning_rate": 1.5401662049861497e-05, |
|
"loss": 3.8468, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 230.0, |
|
"eval_loss": 3.7739343643188477, |
|
"eval_runtime": 76.8844, |
|
"eval_samples_per_second": 6404.731, |
|
"eval_steps_per_second": 2.094, |
|
"step": 332120 |
|
}, |
|
{ |
|
"epoch": 230.26, |
|
"learning_rate": 1.5394736842105264e-05, |
|
"loss": 3.8449, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 230.61, |
|
"learning_rate": 1.538781163434903e-05, |
|
"loss": 3.8472, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 230.96, |
|
"learning_rate": 1.5380886426592798e-05, |
|
"loss": 3.8425, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 231.0, |
|
"eval_loss": 3.7709102630615234, |
|
"eval_runtime": 74.7014, |
|
"eval_samples_per_second": 6591.895, |
|
"eval_steps_per_second": 2.155, |
|
"step": 333564 |
|
}, |
|
{ |
|
"epoch": 231.3, |
|
"learning_rate": 1.5373961218836565e-05, |
|
"loss": 3.8435, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 231.65, |
|
"learning_rate": 1.5367036011080335e-05, |
|
"loss": 3.8466, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 231.99, |
|
"learning_rate": 1.5360110803324102e-05, |
|
"loss": 3.8475, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 232.0, |
|
"eval_loss": 3.770082950592041, |
|
"eval_runtime": 75.48, |
|
"eval_samples_per_second": 6523.901, |
|
"eval_steps_per_second": 2.133, |
|
"step": 335008 |
|
}, |
|
{ |
|
"epoch": 232.34, |
|
"learning_rate": 1.535318559556787e-05, |
|
"loss": 3.8447, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 232.69, |
|
"learning_rate": 1.5346260387811636e-05, |
|
"loss": 3.8462, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 233.0, |
|
"eval_loss": 3.764716148376465, |
|
"eval_runtime": 74.6162, |
|
"eval_samples_per_second": 6599.422, |
|
"eval_steps_per_second": 2.158, |
|
"step": 336452 |
|
}, |
|
{ |
|
"epoch": 233.03, |
|
"learning_rate": 1.5339335180055403e-05, |
|
"loss": 3.843, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 233.38, |
|
"learning_rate": 1.533240997229917e-05, |
|
"loss": 3.8405, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 233.73, |
|
"learning_rate": 1.5325484764542937e-05, |
|
"loss": 3.8431, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 234.0, |
|
"eval_loss": 3.7664144039154053, |
|
"eval_runtime": 74.5316, |
|
"eval_samples_per_second": 6606.916, |
|
"eval_steps_per_second": 2.16, |
|
"step": 337896 |
|
}, |
|
{ |
|
"epoch": 234.07, |
|
"learning_rate": 1.5318559556786703e-05, |
|
"loss": 3.842, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 234.42, |
|
"learning_rate": 1.5311634349030474e-05, |
|
"loss": 3.8452, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 234.76, |
|
"learning_rate": 1.5304709141274237e-05, |
|
"loss": 3.8427, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 235.0, |
|
"eval_loss": 3.766789197921753, |
|
"eval_runtime": 75.3868, |
|
"eval_samples_per_second": 6531.965, |
|
"eval_steps_per_second": 2.136, |
|
"step": 339340 |
|
}, |
|
{ |
|
"epoch": 235.11, |
|
"learning_rate": 1.5297783933518008e-05, |
|
"loss": 3.8428, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 235.46, |
|
"learning_rate": 1.5290858725761775e-05, |
|
"loss": 3.8405, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 235.8, |
|
"learning_rate": 1.528393351800554e-05, |
|
"loss": 3.8406, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 236.0, |
|
"eval_loss": 3.7623260021209717, |
|
"eval_runtime": 73.9616, |
|
"eval_samples_per_second": 6657.831, |
|
"eval_steps_per_second": 2.177, |
|
"step": 340784 |
|
}, |
|
{ |
|
"epoch": 236.15, |
|
"learning_rate": 1.527700831024931e-05, |
|
"loss": 3.8407, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 236.5, |
|
"learning_rate": 1.5270083102493075e-05, |
|
"loss": 3.8443, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 236.84, |
|
"learning_rate": 1.5263157894736846e-05, |
|
"loss": 3.8388, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 237.0, |
|
"eval_loss": 3.7657153606414795, |
|
"eval_runtime": 75.1446, |
|
"eval_samples_per_second": 6553.015, |
|
"eval_steps_per_second": 2.143, |
|
"step": 342228 |
|
}, |
|
{ |
|
"epoch": 237.19, |
|
"learning_rate": 1.5256232686980609e-05, |
|
"loss": 3.8371, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 237.53, |
|
"learning_rate": 1.5249307479224378e-05, |
|
"loss": 3.8424, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 237.88, |
|
"learning_rate": 1.5242382271468146e-05, |
|
"loss": 3.838, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 238.0, |
|
"eval_loss": 3.764124870300293, |
|
"eval_runtime": 74.9352, |
|
"eval_samples_per_second": 6571.333, |
|
"eval_steps_per_second": 2.149, |
|
"step": 343672 |
|
}, |
|
{ |
|
"epoch": 238.23, |
|
"learning_rate": 1.5235457063711913e-05, |
|
"loss": 3.8418, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 238.57, |
|
"learning_rate": 1.522853185595568e-05, |
|
"loss": 3.8407, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 238.92, |
|
"learning_rate": 1.5221606648199447e-05, |
|
"loss": 3.8386, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 239.0, |
|
"eval_loss": 3.763404369354248, |
|
"eval_runtime": 74.186, |
|
"eval_samples_per_second": 6637.691, |
|
"eval_steps_per_second": 2.17, |
|
"step": 345116 |
|
}, |
|
{ |
|
"epoch": 239.27, |
|
"learning_rate": 1.5214681440443216e-05, |
|
"loss": 3.8396, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 239.61, |
|
"learning_rate": 1.5207756232686981e-05, |
|
"loss": 3.8361, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 239.96, |
|
"learning_rate": 1.520083102493075e-05, |
|
"loss": 3.837, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"eval_loss": 3.7625999450683594, |
|
"eval_runtime": 75.4987, |
|
"eval_samples_per_second": 6522.289, |
|
"eval_steps_per_second": 2.132, |
|
"step": 346560 |
|
}, |
|
{ |
|
"epoch": 240.3, |
|
"learning_rate": 1.5193905817174516e-05, |
|
"loss": 3.8349, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 240.65, |
|
"learning_rate": 1.5186980609418285e-05, |
|
"loss": 3.839, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 241.0, |
|
"learning_rate": 1.518005540166205e-05, |
|
"loss": 3.8383, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 241.0, |
|
"eval_loss": 3.7632668018341064, |
|
"eval_runtime": 75.4619, |
|
"eval_samples_per_second": 6525.467, |
|
"eval_steps_per_second": 2.134, |
|
"step": 348004 |
|
}, |
|
{ |
|
"epoch": 241.34, |
|
"learning_rate": 1.5173130193905819e-05, |
|
"loss": 3.8367, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 241.69, |
|
"learning_rate": 1.5166204986149586e-05, |
|
"loss": 3.8363, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 242.0, |
|
"eval_loss": 3.763760805130005, |
|
"eval_runtime": 75.4912, |
|
"eval_samples_per_second": 6522.929, |
|
"eval_steps_per_second": 2.133, |
|
"step": 349448 |
|
}, |
|
{ |
|
"epoch": 242.04, |
|
"learning_rate": 1.5159279778393353e-05, |
|
"loss": 3.837, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 242.38, |
|
"learning_rate": 1.515235457063712e-05, |
|
"loss": 3.8352, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 242.73, |
|
"learning_rate": 1.5145429362880888e-05, |
|
"loss": 3.8372, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 243.0, |
|
"eval_loss": 3.7610692977905273, |
|
"eval_runtime": 75.8328, |
|
"eval_samples_per_second": 6493.549, |
|
"eval_steps_per_second": 2.123, |
|
"step": 350892 |
|
}, |
|
{ |
|
"epoch": 243.07, |
|
"learning_rate": 1.5138504155124655e-05, |
|
"loss": 3.8372, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 243.42, |
|
"learning_rate": 1.5131578947368422e-05, |
|
"loss": 3.8364, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 243.77, |
|
"learning_rate": 1.5124653739612189e-05, |
|
"loss": 3.8382, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 244.0, |
|
"eval_loss": 3.757394313812256, |
|
"eval_runtime": 75.6891, |
|
"eval_samples_per_second": 6505.877, |
|
"eval_steps_per_second": 2.127, |
|
"step": 352336 |
|
}, |
|
{ |
|
"epoch": 244.11, |
|
"learning_rate": 1.5117728531855958e-05, |
|
"loss": 3.8352, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 244.46, |
|
"learning_rate": 1.5110803324099723e-05, |
|
"loss": 3.837, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 244.81, |
|
"learning_rate": 1.5103878116343492e-05, |
|
"loss": 3.836, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 245.0, |
|
"eval_loss": 3.7577829360961914, |
|
"eval_runtime": 75.2986, |
|
"eval_samples_per_second": 6539.613, |
|
"eval_steps_per_second": 2.138, |
|
"step": 353780 |
|
}, |
|
{ |
|
"epoch": 245.15, |
|
"learning_rate": 1.5096952908587258e-05, |
|
"loss": 3.8344, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 245.5, |
|
"learning_rate": 1.5090027700831027e-05, |
|
"loss": 3.8379, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 245.84, |
|
"learning_rate": 1.5083102493074792e-05, |
|
"loss": 3.8351, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 246.0, |
|
"eval_loss": 3.756089687347412, |
|
"eval_runtime": 77.0188, |
|
"eval_samples_per_second": 6393.556, |
|
"eval_steps_per_second": 2.09, |
|
"step": 355224 |
|
}, |
|
{ |
|
"epoch": 246.19, |
|
"learning_rate": 1.5076177285318561e-05, |
|
"loss": 3.8339, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 246.54, |
|
"learning_rate": 1.5069252077562328e-05, |
|
"loss": 3.8371, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 246.88, |
|
"learning_rate": 1.5062326869806096e-05, |
|
"loss": 3.8334, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 247.0, |
|
"eval_loss": 3.754768133163452, |
|
"eval_runtime": 75.4834, |
|
"eval_samples_per_second": 6523.608, |
|
"eval_steps_per_second": 2.133, |
|
"step": 356668 |
|
}, |
|
{ |
|
"epoch": 247.23, |
|
"learning_rate": 1.5055401662049862e-05, |
|
"loss": 3.8314, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 247.58, |
|
"learning_rate": 1.504847645429363e-05, |
|
"loss": 3.8364, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 247.92, |
|
"learning_rate": 1.5041551246537399e-05, |
|
"loss": 3.834, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 248.0, |
|
"eval_loss": 3.754401922225952, |
|
"eval_runtime": 75.9236, |
|
"eval_samples_per_second": 6485.781, |
|
"eval_steps_per_second": 2.121, |
|
"step": 358112 |
|
}, |
|
{ |
|
"epoch": 248.27, |
|
"learning_rate": 1.5034626038781164e-05, |
|
"loss": 3.8326, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 248.61, |
|
"learning_rate": 1.5027700831024931e-05, |
|
"loss": 3.8333, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 248.96, |
|
"learning_rate": 1.50207756232687e-05, |
|
"loss": 3.8347, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 249.0, |
|
"eval_loss": 3.757708787918091, |
|
"eval_runtime": 74.9192, |
|
"eval_samples_per_second": 6572.738, |
|
"eval_steps_per_second": 2.149, |
|
"step": 359556 |
|
}, |
|
{ |
|
"epoch": 249.31, |
|
"learning_rate": 1.5013850415512468e-05, |
|
"loss": 3.8326, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 249.65, |
|
"learning_rate": 1.5006925207756233e-05, |
|
"loss": 3.8316, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 3.8321, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"eval_loss": 3.751624584197998, |
|
"eval_runtime": 75.2355, |
|
"eval_samples_per_second": 6545.106, |
|
"eval_steps_per_second": 2.14, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 250.35, |
|
"learning_rate": 1.4993074792243769e-05, |
|
"loss": 3.8329, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 250.69, |
|
"learning_rate": 1.4986149584487534e-05, |
|
"loss": 3.8303, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 251.0, |
|
"eval_loss": 3.757580280303955, |
|
"eval_runtime": 76.9563, |
|
"eval_samples_per_second": 6398.75, |
|
"eval_steps_per_second": 2.092, |
|
"step": 362444 |
|
}, |
|
{ |
|
"epoch": 251.04, |
|
"learning_rate": 1.4979224376731303e-05, |
|
"loss": 3.8295, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 251.39, |
|
"learning_rate": 1.4972299168975071e-05, |
|
"loss": 3.8314, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 251.73, |
|
"learning_rate": 1.4965373961218838e-05, |
|
"loss": 3.8307, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 252.0, |
|
"eval_loss": 3.752763032913208, |
|
"eval_runtime": 75.1767, |
|
"eval_samples_per_second": 6550.218, |
|
"eval_steps_per_second": 2.142, |
|
"step": 363888 |
|
}, |
|
{ |
|
"epoch": 252.08, |
|
"learning_rate": 1.4958448753462604e-05, |
|
"loss": 3.8269, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 252.42, |
|
"learning_rate": 1.4951523545706372e-05, |
|
"loss": 3.8307, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 252.77, |
|
"learning_rate": 1.494459833795014e-05, |
|
"loss": 3.8296, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 253.0, |
|
"eval_loss": 3.7549960613250732, |
|
"eval_runtime": 75.8092, |
|
"eval_samples_per_second": 6495.567, |
|
"eval_steps_per_second": 2.124, |
|
"step": 365332 |
|
}, |
|
{ |
|
"epoch": 253.12, |
|
"learning_rate": 1.4937673130193906e-05, |
|
"loss": 3.826, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 253.46, |
|
"learning_rate": 1.4930747922437675e-05, |
|
"loss": 3.8322, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 253.81, |
|
"learning_rate": 1.4923822714681442e-05, |
|
"loss": 3.8307, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 254.0, |
|
"eval_loss": 3.753600597381592, |
|
"eval_runtime": 75.0828, |
|
"eval_samples_per_second": 6558.417, |
|
"eval_steps_per_second": 2.144, |
|
"step": 366776 |
|
}, |
|
{ |
|
"epoch": 254.16, |
|
"learning_rate": 1.491689750692521e-05, |
|
"loss": 3.8309, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 254.5, |
|
"learning_rate": 1.4909972299168975e-05, |
|
"loss": 3.8289, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 254.85, |
|
"learning_rate": 1.4903047091412744e-05, |
|
"loss": 3.8317, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 255.0, |
|
"eval_loss": 3.7492051124572754, |
|
"eval_runtime": 74.5037, |
|
"eval_samples_per_second": 6609.385, |
|
"eval_steps_per_second": 2.161, |
|
"step": 368220 |
|
}, |
|
{ |
|
"epoch": 255.19, |
|
"learning_rate": 1.4896121883656511e-05, |
|
"loss": 3.8291, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 255.54, |
|
"learning_rate": 1.488919667590028e-05, |
|
"loss": 3.83, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 255.89, |
|
"learning_rate": 1.4882271468144045e-05, |
|
"loss": 3.8256, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 256.0, |
|
"eval_loss": 3.752340316772461, |
|
"eval_runtime": 75.106, |
|
"eval_samples_per_second": 6556.387, |
|
"eval_steps_per_second": 2.144, |
|
"step": 369664 |
|
}, |
|
{ |
|
"epoch": 256.23, |
|
"learning_rate": 1.4875346260387813e-05, |
|
"loss": 3.8289, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 256.58, |
|
"learning_rate": 1.486842105263158e-05, |
|
"loss": 3.8261, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 256.93, |
|
"learning_rate": 1.4861495844875347e-05, |
|
"loss": 3.8254, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 257.0, |
|
"eval_loss": 3.7502706050872803, |
|
"eval_runtime": 74.7416, |
|
"eval_samples_per_second": 6588.351, |
|
"eval_steps_per_second": 2.154, |
|
"step": 371108 |
|
}, |
|
{ |
|
"epoch": 257.27, |
|
"learning_rate": 1.4854570637119114e-05, |
|
"loss": 3.827, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 257.62, |
|
"learning_rate": 1.4847645429362883e-05, |
|
"loss": 3.8269, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 257.96, |
|
"learning_rate": 1.484072022160665e-05, |
|
"loss": 3.8262, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 258.0, |
|
"eval_loss": 3.7514851093292236, |
|
"eval_runtime": 76.1643, |
|
"eval_samples_per_second": 6465.283, |
|
"eval_steps_per_second": 2.114, |
|
"step": 372552 |
|
}, |
|
{ |
|
"epoch": 258.31, |
|
"learning_rate": 1.4833795013850417e-05, |
|
"loss": 3.8283, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 258.66, |
|
"learning_rate": 1.4826869806094184e-05, |
|
"loss": 3.8252, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 259.0, |
|
"eval_loss": 3.746579647064209, |
|
"eval_runtime": 73.8659, |
|
"eval_samples_per_second": 6666.464, |
|
"eval_steps_per_second": 2.18, |
|
"step": 373996 |
|
}, |
|
{ |
|
"epoch": 259.0, |
|
"learning_rate": 1.4819944598337952e-05, |
|
"loss": 3.8253, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 259.35, |
|
"learning_rate": 1.4813019390581717e-05, |
|
"loss": 3.823, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 259.7, |
|
"learning_rate": 1.4806094182825486e-05, |
|
"loss": 3.824, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 260.0, |
|
"eval_loss": 3.7484304904937744, |
|
"eval_runtime": 75.3537, |
|
"eval_samples_per_second": 6534.833, |
|
"eval_steps_per_second": 2.137, |
|
"step": 375440 |
|
}, |
|
{ |
|
"epoch": 260.04, |
|
"learning_rate": 1.4799168975069253e-05, |
|
"loss": 3.8241, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 260.39, |
|
"learning_rate": 1.4792243767313022e-05, |
|
"loss": 3.8223, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 260.73, |
|
"learning_rate": 1.4785318559556787e-05, |
|
"loss": 3.8229, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 261.0, |
|
"eval_loss": 3.7492823600769043, |
|
"eval_runtime": 77.039, |
|
"eval_samples_per_second": 6391.882, |
|
"eval_steps_per_second": 2.09, |
|
"step": 376884 |
|
}, |
|
{ |
|
"epoch": 261.08, |
|
"learning_rate": 1.4778393351800555e-05, |
|
"loss": 3.8235, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 261.43, |
|
"learning_rate": 1.4771468144044322e-05, |
|
"loss": 3.8235, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 261.77, |
|
"learning_rate": 1.476454293628809e-05, |
|
"loss": 3.8242, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 262.0, |
|
"eval_loss": 3.7440624237060547, |
|
"eval_runtime": 74.2561, |
|
"eval_samples_per_second": 6631.428, |
|
"eval_steps_per_second": 2.168, |
|
"step": 378328 |
|
}, |
|
{ |
|
"epoch": 262.12, |
|
"learning_rate": 1.4757617728531856e-05, |
|
"loss": 3.8232, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 262.47, |
|
"learning_rate": 1.4750692520775625e-05, |
|
"loss": 3.8234, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 262.81, |
|
"learning_rate": 1.4743767313019393e-05, |
|
"loss": 3.8215, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 263.0, |
|
"eval_loss": 3.746445894241333, |
|
"eval_runtime": 74.2073, |
|
"eval_samples_per_second": 6635.79, |
|
"eval_steps_per_second": 2.17, |
|
"step": 379772 |
|
}, |
|
{ |
|
"epoch": 263.16, |
|
"learning_rate": 1.4736842105263159e-05, |
|
"loss": 3.8221, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 263.5, |
|
"learning_rate": 1.4729916897506925e-05, |
|
"loss": 3.823, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 263.85, |
|
"learning_rate": 1.4722991689750694e-05, |
|
"loss": 3.8222, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 264.0, |
|
"eval_loss": 3.747673749923706, |
|
"eval_runtime": 76.0947, |
|
"eval_samples_per_second": 6471.201, |
|
"eval_steps_per_second": 2.116, |
|
"step": 381216 |
|
}, |
|
{ |
|
"epoch": 264.2, |
|
"learning_rate": 1.4716066481994463e-05, |
|
"loss": 3.8171, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 264.54, |
|
"learning_rate": 1.4709141274238228e-05, |
|
"loss": 3.8187, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 264.89, |
|
"learning_rate": 1.4702216066481997e-05, |
|
"loss": 3.823, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 265.0, |
|
"eval_loss": 3.7447397708892822, |
|
"eval_runtime": 74.1023, |
|
"eval_samples_per_second": 6645.191, |
|
"eval_steps_per_second": 2.173, |
|
"step": 382660 |
|
}, |
|
{ |
|
"epoch": 265.24, |
|
"learning_rate": 1.4695290858725763e-05, |
|
"loss": 3.8209, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 265.58, |
|
"learning_rate": 1.4688365650969529e-05, |
|
"loss": 3.8206, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 265.93, |
|
"learning_rate": 1.4681440443213297e-05, |
|
"loss": 3.8222, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 266.0, |
|
"eval_loss": 3.7423489093780518, |
|
"eval_runtime": 75.7006, |
|
"eval_samples_per_second": 6504.891, |
|
"eval_steps_per_second": 2.127, |
|
"step": 384104 |
|
}, |
|
{ |
|
"epoch": 266.27, |
|
"learning_rate": 1.4674515235457066e-05, |
|
"loss": 3.8173, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 266.62, |
|
"learning_rate": 1.4667590027700833e-05, |
|
"loss": 3.8216, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 266.97, |
|
"learning_rate": 1.4660664819944598e-05, |
|
"loss": 3.8187, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 267.0, |
|
"eval_loss": 3.7439002990722656, |
|
"eval_runtime": 75.5321, |
|
"eval_samples_per_second": 6519.399, |
|
"eval_steps_per_second": 2.132, |
|
"step": 385548 |
|
}, |
|
{ |
|
"epoch": 267.31, |
|
"learning_rate": 1.4653739612188367e-05, |
|
"loss": 3.8174, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 267.66, |
|
"learning_rate": 1.4646814404432135e-05, |
|
"loss": 3.8213, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 268.0, |
|
"eval_loss": 3.740312099456787, |
|
"eval_runtime": 73.9502, |
|
"eval_samples_per_second": 6658.856, |
|
"eval_steps_per_second": 2.177, |
|
"step": 386992 |
|
}, |
|
{ |
|
"epoch": 268.01, |
|
"learning_rate": 1.46398891966759e-05, |
|
"loss": 3.8172, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 268.35, |
|
"learning_rate": 1.4632963988919669e-05, |
|
"loss": 3.8195, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 268.7, |
|
"learning_rate": 1.4626038781163436e-05, |
|
"loss": 3.8181, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 269.0, |
|
"eval_loss": 3.7468435764312744, |
|
"eval_runtime": 75.0402, |
|
"eval_samples_per_second": 6562.133, |
|
"eval_steps_per_second": 2.146, |
|
"step": 388436 |
|
}, |
|
{ |
|
"epoch": 269.04, |
|
"learning_rate": 1.4619113573407205e-05, |
|
"loss": 3.8183, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 269.39, |
|
"learning_rate": 1.461218836565097e-05, |
|
"loss": 3.8177, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 269.74, |
|
"learning_rate": 1.4605263157894739e-05, |
|
"loss": 3.819, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 270.0, |
|
"eval_loss": 3.7409017086029053, |
|
"eval_runtime": 74.5753, |
|
"eval_samples_per_second": 6603.048, |
|
"eval_steps_per_second": 2.159, |
|
"step": 389880 |
|
}, |
|
{ |
|
"epoch": 270.08, |
|
"learning_rate": 1.4598337950138505e-05, |
|
"loss": 3.8169, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 270.43, |
|
"learning_rate": 1.4591412742382272e-05, |
|
"loss": 3.8142, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 270.78, |
|
"learning_rate": 1.458448753462604e-05, |
|
"loss": 3.819, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 271.0, |
|
"eval_loss": 3.742891550064087, |
|
"eval_runtime": 74.4741, |
|
"eval_samples_per_second": 6612.013, |
|
"eval_steps_per_second": 2.162, |
|
"step": 391324 |
|
}, |
|
{ |
|
"epoch": 271.12, |
|
"learning_rate": 1.4577562326869808e-05, |
|
"loss": 3.8146, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 271.47, |
|
"learning_rate": 1.4570637119113575e-05, |
|
"loss": 3.8197, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 271.81, |
|
"learning_rate": 1.4563711911357342e-05, |
|
"loss": 3.8158, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 272.0, |
|
"eval_loss": 3.7434399127960205, |
|
"eval_runtime": 74.3951, |
|
"eval_samples_per_second": 6619.035, |
|
"eval_steps_per_second": 2.164, |
|
"step": 392768 |
|
}, |
|
{ |
|
"epoch": 272.16, |
|
"learning_rate": 1.4556786703601109e-05, |
|
"loss": 3.8153, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 272.51, |
|
"learning_rate": 1.4549861495844877e-05, |
|
"loss": 3.8146, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 272.85, |
|
"learning_rate": 1.4542936288088644e-05, |
|
"loss": 3.816, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 273.0, |
|
"eval_loss": 3.738966464996338, |
|
"eval_runtime": 74.8507, |
|
"eval_samples_per_second": 6578.747, |
|
"eval_steps_per_second": 2.151, |
|
"step": 394212 |
|
}, |
|
{ |
|
"epoch": 273.2, |
|
"learning_rate": 1.4536011080332411e-05, |
|
"loss": 3.8154, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 273.55, |
|
"learning_rate": 1.4529085872576178e-05, |
|
"loss": 3.8152, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 273.89, |
|
"learning_rate": 1.4522160664819947e-05, |
|
"loss": 3.8133, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 274.0, |
|
"eval_loss": 3.7411460876464844, |
|
"eval_runtime": 73.8647, |
|
"eval_samples_per_second": 6666.567, |
|
"eval_steps_per_second": 2.18, |
|
"step": 395656 |
|
}, |
|
{ |
|
"epoch": 274.24, |
|
"learning_rate": 1.4515235457063712e-05, |
|
"loss": 3.8161, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 274.58, |
|
"learning_rate": 1.450831024930748e-05, |
|
"loss": 3.8136, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 274.93, |
|
"learning_rate": 1.4501385041551247e-05, |
|
"loss": 3.8154, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 275.0, |
|
"eval_loss": 3.7406582832336426, |
|
"eval_runtime": 73.3111, |
|
"eval_samples_per_second": 6716.909, |
|
"eval_steps_per_second": 2.196, |
|
"step": 397100 |
|
}, |
|
{ |
|
"epoch": 275.28, |
|
"learning_rate": 1.4494459833795016e-05, |
|
"loss": 3.8141, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 275.62, |
|
"learning_rate": 1.4487534626038781e-05, |
|
"loss": 3.8111, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 275.97, |
|
"learning_rate": 1.448060941828255e-05, |
|
"loss": 3.8152, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 276.0, |
|
"eval_loss": 3.741816759109497, |
|
"eval_runtime": 73.4966, |
|
"eval_samples_per_second": 6699.952, |
|
"eval_steps_per_second": 2.191, |
|
"step": 398544 |
|
}, |
|
{ |
|
"epoch": 276.32, |
|
"learning_rate": 1.4473684210526317e-05, |
|
"loss": 3.8147, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 276.66, |
|
"learning_rate": 1.4466759002770084e-05, |
|
"loss": 3.8135, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 277.0, |
|
"eval_loss": 3.738856315612793, |
|
"eval_runtime": 75.384, |
|
"eval_samples_per_second": 6532.21, |
|
"eval_steps_per_second": 2.136, |
|
"step": 399988 |
|
}, |
|
{ |
|
"epoch": 277.01, |
|
"learning_rate": 1.445983379501385e-05, |
|
"loss": 3.8129, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 277.35, |
|
"learning_rate": 1.445290858725762e-05, |
|
"loss": 3.8126, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 277.7, |
|
"learning_rate": 1.4445983379501388e-05, |
|
"loss": 3.8137, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 278.0, |
|
"eval_loss": 3.7304413318634033, |
|
"eval_runtime": 76.1846, |
|
"eval_samples_per_second": 6463.564, |
|
"eval_steps_per_second": 2.113, |
|
"step": 401432 |
|
}, |
|
{ |
|
"epoch": 278.05, |
|
"learning_rate": 1.4439058171745153e-05, |
|
"loss": 3.8111, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 278.39, |
|
"learning_rate": 1.443213296398892e-05, |
|
"loss": 3.8118, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 278.74, |
|
"learning_rate": 1.4425207756232689e-05, |
|
"loss": 3.8113, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 279.0, |
|
"eval_loss": 3.735074520111084, |
|
"eval_runtime": 75.5234, |
|
"eval_samples_per_second": 6520.156, |
|
"eval_steps_per_second": 2.132, |
|
"step": 402876 |
|
}, |
|
{ |
|
"epoch": 279.09, |
|
"learning_rate": 1.4418282548476454e-05, |
|
"loss": 3.8114, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 279.43, |
|
"learning_rate": 1.4411357340720222e-05, |
|
"loss": 3.8118, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 279.78, |
|
"learning_rate": 1.4404432132963991e-05, |
|
"loss": 3.8099, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 280.0, |
|
"eval_loss": 3.735912799835205, |
|
"eval_runtime": 75.725, |
|
"eval_samples_per_second": 6502.791, |
|
"eval_steps_per_second": 2.126, |
|
"step": 404320 |
|
}, |
|
{ |
|
"epoch": 280.12, |
|
"learning_rate": 1.4397506925207758e-05, |
|
"loss": 3.81, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 280.47, |
|
"learning_rate": 1.4390581717451523e-05, |
|
"loss": 3.8137, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 280.82, |
|
"learning_rate": 1.4383656509695292e-05, |
|
"loss": 3.808, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 281.0, |
|
"eval_loss": 3.7392752170562744, |
|
"eval_runtime": 74.1732, |
|
"eval_samples_per_second": 6638.844, |
|
"eval_steps_per_second": 2.171, |
|
"step": 405764 |
|
}, |
|
{ |
|
"epoch": 281.16, |
|
"learning_rate": 1.437673130193906e-05, |
|
"loss": 3.81, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 281.51, |
|
"learning_rate": 1.4369806094182827e-05, |
|
"loss": 3.8114, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 281.86, |
|
"learning_rate": 1.4362880886426594e-05, |
|
"loss": 3.8103, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 282.0, |
|
"eval_loss": 3.7333552837371826, |
|
"eval_runtime": 75.1438, |
|
"eval_samples_per_second": 6553.09, |
|
"eval_steps_per_second": 2.143, |
|
"step": 407208 |
|
}, |
|
{ |
|
"epoch": 282.2, |
|
"learning_rate": 1.4355955678670361e-05, |
|
"loss": 3.8101, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 282.55, |
|
"learning_rate": 1.434903047091413e-05, |
|
"loss": 3.8119, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 282.89, |
|
"learning_rate": 1.4342105263157895e-05, |
|
"loss": 3.8108, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 283.0, |
|
"eval_loss": 3.735065221786499, |
|
"eval_runtime": 73.2315, |
|
"eval_samples_per_second": 6724.206, |
|
"eval_steps_per_second": 2.199, |
|
"step": 408652 |
|
}, |
|
{ |
|
"epoch": 283.24, |
|
"learning_rate": 1.4335180055401664e-05, |
|
"loss": 3.8071, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 283.59, |
|
"learning_rate": 1.432825484764543e-05, |
|
"loss": 3.8104, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 283.93, |
|
"learning_rate": 1.4321329639889199e-05, |
|
"loss": 3.8104, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 284.0, |
|
"eval_loss": 3.7356646060943604, |
|
"eval_runtime": 75.7247, |
|
"eval_samples_per_second": 6502.821, |
|
"eval_steps_per_second": 2.126, |
|
"step": 410096 |
|
}, |
|
{ |
|
"epoch": 284.28, |
|
"learning_rate": 1.4314404432132964e-05, |
|
"loss": 3.8049, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 284.63, |
|
"learning_rate": 1.4307479224376733e-05, |
|
"loss": 3.8094, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 284.97, |
|
"learning_rate": 1.43005540166205e-05, |
|
"loss": 3.807, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 285.0, |
|
"eval_loss": 3.7316856384277344, |
|
"eval_runtime": 75.2405, |
|
"eval_samples_per_second": 6544.666, |
|
"eval_steps_per_second": 2.14, |
|
"step": 411540 |
|
}, |
|
{ |
|
"epoch": 285.32, |
|
"learning_rate": 1.4293628808864267e-05, |
|
"loss": 3.8078, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 285.66, |
|
"learning_rate": 1.4286703601108034e-05, |
|
"loss": 3.806, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 286.0, |
|
"eval_loss": 3.731189250946045, |
|
"eval_runtime": 73.5011, |
|
"eval_samples_per_second": 6699.549, |
|
"eval_steps_per_second": 2.19, |
|
"step": 412984 |
|
}, |
|
{ |
|
"epoch": 286.01, |
|
"learning_rate": 1.4279778393351802e-05, |
|
"loss": 3.8083, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 286.36, |
|
"learning_rate": 1.427285318559557e-05, |
|
"loss": 3.8082, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 286.7, |
|
"learning_rate": 1.4265927977839336e-05, |
|
"loss": 3.8085, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 287.0, |
|
"eval_loss": 3.727071523666382, |
|
"eval_runtime": 75.2005, |
|
"eval_samples_per_second": 6548.149, |
|
"eval_steps_per_second": 2.141, |
|
"step": 414428 |
|
}, |
|
{ |
|
"epoch": 287.05, |
|
"learning_rate": 1.4259002770083103e-05, |
|
"loss": 3.8035, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 287.4, |
|
"learning_rate": 1.4252077562326872e-05, |
|
"loss": 3.8054, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 287.74, |
|
"learning_rate": 1.4245152354570639e-05, |
|
"loss": 3.8043, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 288.0, |
|
"eval_loss": 3.7320947647094727, |
|
"eval_runtime": 74.0311, |
|
"eval_samples_per_second": 6651.583, |
|
"eval_steps_per_second": 2.175, |
|
"step": 415872 |
|
}, |
|
{ |
|
"epoch": 288.09, |
|
"learning_rate": 1.4238227146814406e-05, |
|
"loss": 3.8056, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 288.43, |
|
"learning_rate": 1.4231301939058172e-05, |
|
"loss": 3.8044, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 288.78, |
|
"learning_rate": 1.4224376731301941e-05, |
|
"loss": 3.8051, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 289.0, |
|
"eval_loss": 3.726090431213379, |
|
"eval_runtime": 75.4663, |
|
"eval_samples_per_second": 6525.085, |
|
"eval_steps_per_second": 2.133, |
|
"step": 417316 |
|
}, |
|
{ |
|
"epoch": 289.13, |
|
"learning_rate": 1.4217451523545706e-05, |
|
"loss": 3.8017, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 289.47, |
|
"learning_rate": 1.4210526315789475e-05, |
|
"loss": 3.8071, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 289.82, |
|
"learning_rate": 1.4203601108033242e-05, |
|
"loss": 3.8058, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 290.0, |
|
"eval_loss": 3.730849027633667, |
|
"eval_runtime": 75.3792, |
|
"eval_samples_per_second": 6532.621, |
|
"eval_steps_per_second": 2.136, |
|
"step": 418760 |
|
}, |
|
{ |
|
"epoch": 290.17, |
|
"learning_rate": 1.419667590027701e-05, |
|
"loss": 3.8073, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 290.51, |
|
"learning_rate": 1.4189750692520776e-05, |
|
"loss": 3.8049, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 290.86, |
|
"learning_rate": 1.4182825484764544e-05, |
|
"loss": 3.8024, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 291.0, |
|
"eval_loss": 3.727689027786255, |
|
"eval_runtime": 77.757, |
|
"eval_samples_per_second": 6332.854, |
|
"eval_steps_per_second": 2.071, |
|
"step": 420204 |
|
}, |
|
{ |
|
"epoch": 291.2, |
|
"learning_rate": 1.4175900277008311e-05, |
|
"loss": 3.803, |
|
"step": 420500 |
|
}, |
|
{ |
|
"epoch": 291.55, |
|
"learning_rate": 1.4168975069252078e-05, |
|
"loss": 3.7999, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 291.9, |
|
"learning_rate": 1.4162049861495845e-05, |
|
"loss": 3.8047, |
|
"step": 421500 |
|
}, |
|
{ |
|
"epoch": 292.0, |
|
"eval_loss": 3.7302093505859375, |
|
"eval_runtime": 78.9126, |
|
"eval_samples_per_second": 6240.118, |
|
"eval_steps_per_second": 2.04, |
|
"step": 421648 |
|
}, |
|
{ |
|
"epoch": 292.24, |
|
"learning_rate": 1.4155124653739614e-05, |
|
"loss": 3.8048, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 292.59, |
|
"learning_rate": 1.4148199445983382e-05, |
|
"loss": 3.8016, |
|
"step": 422500 |
|
}, |
|
{ |
|
"epoch": 292.94, |
|
"learning_rate": 1.4141274238227148e-05, |
|
"loss": 3.806, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 293.0, |
|
"eval_loss": 3.727792501449585, |
|
"eval_runtime": 73.701, |
|
"eval_samples_per_second": 6681.377, |
|
"eval_steps_per_second": 2.185, |
|
"step": 423092 |
|
}, |
|
{ |
|
"epoch": 293.28, |
|
"learning_rate": 1.4134349030470914e-05, |
|
"loss": 3.8033, |
|
"step": 423500 |
|
}, |
|
{ |
|
"epoch": 293.63, |
|
"learning_rate": 1.4127423822714683e-05, |
|
"loss": 3.8018, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 293.98, |
|
"learning_rate": 1.4120498614958448e-05, |
|
"loss": 3.8034, |
|
"step": 424500 |
|
}, |
|
{ |
|
"epoch": 294.0, |
|
"eval_loss": 3.7282450199127197, |
|
"eval_runtime": 73.3047, |
|
"eval_samples_per_second": 6717.497, |
|
"eval_steps_per_second": 2.196, |
|
"step": 424536 |
|
}, |
|
{ |
|
"epoch": 294.32, |
|
"learning_rate": 1.4113573407202217e-05, |
|
"loss": 3.7997, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 294.67, |
|
"learning_rate": 1.4106648199445986e-05, |
|
"loss": 3.8002, |
|
"step": 425500 |
|
}, |
|
{ |
|
"epoch": 295.0, |
|
"eval_loss": 3.72798490524292, |
|
"eval_runtime": 75.6916, |
|
"eval_samples_per_second": 6505.662, |
|
"eval_steps_per_second": 2.127, |
|
"step": 425980 |
|
}, |
|
{ |
|
"epoch": 295.01, |
|
"learning_rate": 1.4099722991689752e-05, |
|
"loss": 3.8046, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 295.36, |
|
"learning_rate": 1.4092797783933518e-05, |
|
"loss": 3.7991, |
|
"step": 426500 |
|
}, |
|
{ |
|
"epoch": 295.71, |
|
"learning_rate": 1.4085872576177286e-05, |
|
"loss": 3.8009, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 296.0, |
|
"eval_loss": 3.7270190715789795, |
|
"eval_runtime": 75.9779, |
|
"eval_samples_per_second": 6481.15, |
|
"eval_steps_per_second": 2.119, |
|
"step": 427424 |
|
}, |
|
{ |
|
"epoch": 296.05, |
|
"learning_rate": 1.4078947368421055e-05, |
|
"loss": 3.8023, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 296.4, |
|
"learning_rate": 1.4072022160664822e-05, |
|
"loss": 3.799, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 296.75, |
|
"learning_rate": 1.4065096952908589e-05, |
|
"loss": 3.8004, |
|
"step": 428500 |
|
}, |
|
{ |
|
"epoch": 297.0, |
|
"eval_loss": 3.725785255432129, |
|
"eval_runtime": 73.3442, |
|
"eval_samples_per_second": 6713.882, |
|
"eval_steps_per_second": 2.195, |
|
"step": 428868 |
|
}, |
|
{ |
|
"epoch": 297.09, |
|
"learning_rate": 1.4058171745152356e-05, |
|
"loss": 3.798, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 297.44, |
|
"learning_rate": 1.4051246537396124e-05, |
|
"loss": 3.8009, |
|
"step": 429500 |
|
}, |
|
{ |
|
"epoch": 297.78, |
|
"learning_rate": 1.404432132963989e-05, |
|
"loss": 3.8012, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 298.0, |
|
"eval_loss": 3.7256500720977783, |
|
"eval_runtime": 73.7366, |
|
"eval_samples_per_second": 6678.15, |
|
"eval_steps_per_second": 2.183, |
|
"step": 430312 |
|
}, |
|
{ |
|
"epoch": 298.13, |
|
"learning_rate": 1.4037396121883658e-05, |
|
"loss": 3.7996, |
|
"step": 430500 |
|
}, |
|
{ |
|
"epoch": 298.48, |
|
"learning_rate": 1.4030470914127425e-05, |
|
"loss": 3.8001, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 298.82, |
|
"learning_rate": 1.4023545706371194e-05, |
|
"loss": 3.8005, |
|
"step": 431500 |
|
}, |
|
{ |
|
"epoch": 299.0, |
|
"eval_loss": 3.723066568374634, |
|
"eval_runtime": 77.3225, |
|
"eval_samples_per_second": 6368.445, |
|
"eval_steps_per_second": 2.082, |
|
"step": 431756 |
|
}, |
|
{ |
|
"epoch": 299.17, |
|
"learning_rate": 1.4016620498614959e-05, |
|
"loss": 3.7999, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 299.52, |
|
"learning_rate": 1.4009695290858727e-05, |
|
"loss": 3.7986, |
|
"step": 432500 |
|
}, |
|
{ |
|
"epoch": 299.86, |
|
"learning_rate": 1.4002770083102494e-05, |
|
"loss": 3.7992, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"eval_loss": 3.7242555618286133, |
|
"eval_runtime": 76.7477, |
|
"eval_samples_per_second": 6416.137, |
|
"eval_steps_per_second": 2.098, |
|
"step": 433200 |
|
}, |
|
{ |
|
"epoch": 300.21, |
|
"learning_rate": 1.3995844875346261e-05, |
|
"loss": 3.799, |
|
"step": 433500 |
|
}, |
|
{ |
|
"epoch": 300.55, |
|
"learning_rate": 1.3988919667590028e-05, |
|
"loss": 3.7998, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 300.9, |
|
"learning_rate": 1.3981994459833797e-05, |
|
"loss": 3.7975, |
|
"step": 434500 |
|
}, |
|
{ |
|
"epoch": 301.0, |
|
"eval_loss": 3.7223165035247803, |
|
"eval_runtime": 73.7218, |
|
"eval_samples_per_second": 6679.489, |
|
"eval_steps_per_second": 2.184, |
|
"step": 434644 |
|
}, |
|
{ |
|
"epoch": 301.25, |
|
"learning_rate": 1.3975069252077564e-05, |
|
"loss": 3.8001, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 301.59, |
|
"learning_rate": 1.396814404432133e-05, |
|
"loss": 3.7979, |
|
"step": 435500 |
|
}, |
|
{ |
|
"epoch": 301.94, |
|
"learning_rate": 1.3961218836565098e-05, |
|
"loss": 3.7973, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 302.0, |
|
"eval_loss": 3.724653959274292, |
|
"eval_runtime": 77.8047, |
|
"eval_samples_per_second": 6328.977, |
|
"eval_steps_per_second": 2.069, |
|
"step": 436088 |
|
}, |
|
{ |
|
"epoch": 302.29, |
|
"learning_rate": 1.3954293628808866e-05, |
|
"loss": 3.7957, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 302.63, |
|
"learning_rate": 1.3947368421052631e-05, |
|
"loss": 3.7942, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 302.98, |
|
"learning_rate": 1.39404432132964e-05, |
|
"loss": 3.7987, |
|
"step": 437500 |
|
}, |
|
{ |
|
"epoch": 303.0, |
|
"eval_loss": 3.720510244369507, |
|
"eval_runtime": 76.9622, |
|
"eval_samples_per_second": 6398.255, |
|
"eval_steps_per_second": 2.092, |
|
"step": 437532 |
|
}, |
|
{ |
|
"epoch": 303.32, |
|
"learning_rate": 1.3933518005540167e-05, |
|
"loss": 3.795, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 303.67, |
|
"learning_rate": 1.3926592797783936e-05, |
|
"loss": 3.7965, |
|
"step": 438500 |
|
}, |
|
{ |
|
"epoch": 304.0, |
|
"eval_loss": 3.7249743938446045, |
|
"eval_runtime": 73.4932, |
|
"eval_samples_per_second": 6700.264, |
|
"eval_steps_per_second": 2.191, |
|
"step": 438976 |
|
}, |
|
{ |
|
"epoch": 304.02, |
|
"learning_rate": 1.39196675900277e-05, |
|
"loss": 3.796, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 304.36, |
|
"learning_rate": 1.391274238227147e-05, |
|
"loss": 3.7958, |
|
"step": 439500 |
|
}, |
|
{ |
|
"epoch": 304.71, |
|
"learning_rate": 1.3905817174515236e-05, |
|
"loss": 3.7992, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 305.0, |
|
"eval_loss": 3.721456289291382, |
|
"eval_runtime": 80.3944, |
|
"eval_samples_per_second": 6125.106, |
|
"eval_steps_per_second": 2.003, |
|
"step": 440420 |
|
}, |
|
{ |
|
"epoch": 305.06, |
|
"learning_rate": 1.3898891966759005e-05, |
|
"loss": 3.7968, |
|
"step": 440500 |
|
}, |
|
{ |
|
"epoch": 305.4, |
|
"learning_rate": 1.389196675900277e-05, |
|
"loss": 3.7966, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 305.75, |
|
"learning_rate": 1.3885041551246539e-05, |
|
"loss": 3.7974, |
|
"step": 441500 |
|
}, |
|
{ |
|
"epoch": 306.0, |
|
"eval_loss": 3.7187914848327637, |
|
"eval_runtime": 73.1181, |
|
"eval_samples_per_second": 6734.635, |
|
"eval_steps_per_second": 2.202, |
|
"step": 441864 |
|
}, |
|
{ |
|
"epoch": 306.09, |
|
"learning_rate": 1.3878116343490307e-05, |
|
"loss": 3.7968, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 306.44, |
|
"learning_rate": 1.3871191135734073e-05, |
|
"loss": 3.7954, |
|
"step": 442500 |
|
}, |
|
{ |
|
"epoch": 306.79, |
|
"learning_rate": 1.386426592797784e-05, |
|
"loss": 3.7955, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 307.0, |
|
"eval_loss": 3.7259676456451416, |
|
"eval_runtime": 73.6071, |
|
"eval_samples_per_second": 6689.901, |
|
"eval_steps_per_second": 2.187, |
|
"step": 443308 |
|
}, |
|
{ |
|
"epoch": 307.13, |
|
"learning_rate": 1.3857340720221608e-05, |
|
"loss": 3.7946, |
|
"step": 443500 |
|
}, |
|
{ |
|
"epoch": 307.48, |
|
"learning_rate": 1.3850415512465377e-05, |
|
"loss": 3.7958, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 307.83, |
|
"learning_rate": 1.3843490304709142e-05, |
|
"loss": 3.793, |
|
"step": 444500 |
|
}, |
|
{ |
|
"epoch": 308.0, |
|
"eval_loss": 3.719923734664917, |
|
"eval_runtime": 76.694, |
|
"eval_samples_per_second": 6420.633, |
|
"eval_steps_per_second": 2.099, |
|
"step": 444752 |
|
}, |
|
{ |
|
"epoch": 308.17, |
|
"learning_rate": 1.3836565096952909e-05, |
|
"loss": 3.7951, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 308.52, |
|
"learning_rate": 1.3829639889196678e-05, |
|
"loss": 3.7968, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 308.86, |
|
"learning_rate": 1.3822714681440443e-05, |
|
"loss": 3.7923, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 309.0, |
|
"eval_loss": 3.7208216190338135, |
|
"eval_runtime": 74.1466, |
|
"eval_samples_per_second": 6641.218, |
|
"eval_steps_per_second": 2.171, |
|
"step": 446196 |
|
}, |
|
{ |
|
"epoch": 309.21, |
|
"learning_rate": 1.3815789473684211e-05, |
|
"loss": 3.7895, |
|
"step": 446500 |
|
}, |
|
{ |
|
"epoch": 309.56, |
|
"learning_rate": 1.380886426592798e-05, |
|
"loss": 3.7939, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 309.9, |
|
"learning_rate": 1.3801939058171747e-05, |
|
"loss": 3.7956, |
|
"step": 447500 |
|
}, |
|
{ |
|
"epoch": 310.0, |
|
"eval_loss": 3.7165091037750244, |
|
"eval_runtime": 75.6044, |
|
"eval_samples_per_second": 6513.167, |
|
"eval_steps_per_second": 2.13, |
|
"step": 447640 |
|
}, |
|
{ |
|
"epoch": 310.25, |
|
"learning_rate": 1.3795013850415512e-05, |
|
"loss": 3.7944, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 310.6, |
|
"learning_rate": 1.378808864265928e-05, |
|
"loss": 3.7903, |
|
"step": 448500 |
|
}, |
|
{ |
|
"epoch": 310.94, |
|
"learning_rate": 1.378116343490305e-05, |
|
"loss": 3.7945, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 311.0, |
|
"eval_loss": 3.715456962585449, |
|
"eval_runtime": 73.561, |
|
"eval_samples_per_second": 6694.088, |
|
"eval_steps_per_second": 2.189, |
|
"step": 449084 |
|
}, |
|
{ |
|
"epoch": 311.29, |
|
"learning_rate": 1.3774238227146815e-05, |
|
"loss": 3.7901, |
|
"step": 449500 |
|
}, |
|
{ |
|
"epoch": 311.63, |
|
"learning_rate": 1.3767313019390583e-05, |
|
"loss": 3.7902, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 311.98, |
|
"learning_rate": 1.376038781163435e-05, |
|
"loss": 3.7925, |
|
"step": 450500 |
|
}, |
|
{ |
|
"epoch": 312.0, |
|
"eval_loss": 3.7176241874694824, |
|
"eval_runtime": 78.0968, |
|
"eval_samples_per_second": 6305.307, |
|
"eval_steps_per_second": 2.062, |
|
"step": 450528 |
|
}, |
|
{ |
|
"epoch": 312.33, |
|
"learning_rate": 1.3753462603878119e-05, |
|
"loss": 3.7932, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 312.67, |
|
"learning_rate": 1.3746537396121884e-05, |
|
"loss": 3.793, |
|
"step": 451500 |
|
}, |
|
{ |
|
"epoch": 313.0, |
|
"eval_loss": 3.7189948558807373, |
|
"eval_runtime": 74.2572, |
|
"eval_samples_per_second": 6631.333, |
|
"eval_steps_per_second": 2.168, |
|
"step": 451972 |
|
}, |
|
{ |
|
"epoch": 313.02, |
|
"learning_rate": 1.3739612188365653e-05, |
|
"loss": 3.7906, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 313.37, |
|
"learning_rate": 1.373268698060942e-05, |
|
"loss": 3.7898, |
|
"step": 452500 |
|
}, |
|
{ |
|
"epoch": 313.71, |
|
"learning_rate": 1.3725761772853188e-05, |
|
"loss": 3.7905, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 314.0, |
|
"eval_loss": 3.7148730754852295, |
|
"eval_runtime": 74.5995, |
|
"eval_samples_per_second": 6600.898, |
|
"eval_steps_per_second": 2.158, |
|
"step": 453416 |
|
}, |
|
{ |
|
"epoch": 314.06, |
|
"learning_rate": 1.3718836565096953e-05, |
|
"loss": 3.7931, |
|
"step": 453500 |
|
}, |
|
{ |
|
"epoch": 314.4, |
|
"learning_rate": 1.3711911357340722e-05, |
|
"loss": 3.7909, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 314.75, |
|
"learning_rate": 1.3704986149584489e-05, |
|
"loss": 3.789, |
|
"step": 454500 |
|
}, |
|
{ |
|
"epoch": 315.0, |
|
"eval_loss": 3.715317726135254, |
|
"eval_runtime": 73.3676, |
|
"eval_samples_per_second": 6711.739, |
|
"eval_steps_per_second": 2.194, |
|
"step": 454860 |
|
}, |
|
{ |
|
"epoch": 315.1, |
|
"learning_rate": 1.3698060941828256e-05, |
|
"loss": 3.7908, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 315.44, |
|
"learning_rate": 1.3691135734072023e-05, |
|
"loss": 3.7927, |
|
"step": 455500 |
|
}, |
|
{ |
|
"epoch": 315.79, |
|
"learning_rate": 1.3684210526315791e-05, |
|
"loss": 3.7883, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 316.0, |
|
"eval_loss": 3.7178468704223633, |
|
"eval_runtime": 77.3962, |
|
"eval_samples_per_second": 6362.381, |
|
"eval_steps_per_second": 2.08, |
|
"step": 456304 |
|
}, |
|
{ |
|
"epoch": 316.14, |
|
"learning_rate": 1.3677285318559558e-05, |
|
"loss": 3.7919, |
|
"step": 456500 |
|
}, |
|
{ |
|
"epoch": 316.48, |
|
"learning_rate": 1.3670360110803325e-05, |
|
"loss": 3.7879, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 316.83, |
|
"learning_rate": 1.3663434903047092e-05, |
|
"loss": 3.7897, |
|
"step": 457500 |
|
}, |
|
{ |
|
"epoch": 317.0, |
|
"eval_loss": 3.7118659019470215, |
|
"eval_runtime": 74.0536, |
|
"eval_samples_per_second": 6649.566, |
|
"eval_steps_per_second": 2.174, |
|
"step": 457748 |
|
}, |
|
{ |
|
"epoch": 317.17, |
|
"learning_rate": 1.365650969529086e-05, |
|
"loss": 3.7926, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 317.52, |
|
"learning_rate": 1.3649584487534626e-05, |
|
"loss": 3.7899, |
|
"step": 458500 |
|
}, |
|
{ |
|
"epoch": 317.87, |
|
"learning_rate": 1.3642659279778395e-05, |
|
"loss": 3.7886, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 318.0, |
|
"eval_loss": 3.7171525955200195, |
|
"eval_runtime": 73.7675, |
|
"eval_samples_per_second": 6675.351, |
|
"eval_steps_per_second": 2.183, |
|
"step": 459192 |
|
}, |
|
{ |
|
"epoch": 318.21, |
|
"learning_rate": 1.3635734072022161e-05, |
|
"loss": 3.7891, |
|
"step": 459500 |
|
}, |
|
{ |
|
"epoch": 318.56, |
|
"learning_rate": 1.362880886426593e-05, |
|
"loss": 3.789, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 318.91, |
|
"learning_rate": 1.3621883656509695e-05, |
|
"loss": 3.7917, |
|
"step": 460500 |
|
}, |
|
{ |
|
"epoch": 319.0, |
|
"eval_loss": 3.713498830795288, |
|
"eval_runtime": 75.1926, |
|
"eval_samples_per_second": 6548.836, |
|
"eval_steps_per_second": 2.141, |
|
"step": 460636 |
|
}, |
|
{ |
|
"epoch": 319.25, |
|
"learning_rate": 1.3614958448753464e-05, |
|
"loss": 3.7887, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 319.6, |
|
"learning_rate": 1.360803324099723e-05, |
|
"loss": 3.7915, |
|
"step": 461500 |
|
}, |
|
{ |
|
"epoch": 319.94, |
|
"learning_rate": 1.3601108033240998e-05, |
|
"loss": 3.7868, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 320.0, |
|
"eval_loss": 3.7122466564178467, |
|
"eval_runtime": 73.8863, |
|
"eval_samples_per_second": 6664.614, |
|
"eval_steps_per_second": 2.179, |
|
"step": 462080 |
|
}, |
|
{ |
|
"epoch": 320.29, |
|
"learning_rate": 1.3594182825484765e-05, |
|
"loss": 3.7871, |
|
"step": 462500 |
|
}, |
|
{ |
|
"epoch": 320.64, |
|
"learning_rate": 1.3587257617728533e-05, |
|
"loss": 3.7869, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 320.98, |
|
"learning_rate": 1.3580332409972302e-05, |
|
"loss": 3.7881, |
|
"step": 463500 |
|
}, |
|
{ |
|
"epoch": 321.0, |
|
"eval_loss": 3.7132568359375, |
|
"eval_runtime": 75.8172, |
|
"eval_samples_per_second": 6494.889, |
|
"eval_steps_per_second": 2.124, |
|
"step": 463524 |
|
}, |
|
{ |
|
"epoch": 321.33, |
|
"learning_rate": 1.3573407202216067e-05, |
|
"loss": 3.785, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 321.68, |
|
"learning_rate": 1.3566481994459834e-05, |
|
"loss": 3.7882, |
|
"step": 464500 |
|
}, |
|
{ |
|
"epoch": 322.0, |
|
"eval_loss": 3.714015245437622, |
|
"eval_runtime": 77.194, |
|
"eval_samples_per_second": 6379.048, |
|
"eval_steps_per_second": 2.086, |
|
"step": 464968 |
|
}, |
|
{ |
|
"epoch": 322.02, |
|
"learning_rate": 1.3559556786703603e-05, |
|
"loss": 3.7875, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 322.37, |
|
"learning_rate": 1.3552631578947371e-05, |
|
"loss": 3.789, |
|
"step": 465500 |
|
}, |
|
{ |
|
"epoch": 322.71, |
|
"learning_rate": 1.3545706371191136e-05, |
|
"loss": 3.7871, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 323.0, |
|
"eval_loss": 3.707482099533081, |
|
"eval_runtime": 73.4812, |
|
"eval_samples_per_second": 6701.361, |
|
"eval_steps_per_second": 2.191, |
|
"step": 466412 |
|
}, |
|
{ |
|
"epoch": 323.06, |
|
"learning_rate": 1.3538781163434903e-05, |
|
"loss": 3.7855, |
|
"step": 466500 |
|
}, |
|
{ |
|
"epoch": 323.41, |
|
"learning_rate": 1.3531855955678672e-05, |
|
"loss": 3.7851, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 323.75, |
|
"learning_rate": 1.3524930747922437e-05, |
|
"loss": 3.7861, |
|
"step": 467500 |
|
}, |
|
{ |
|
"epoch": 324.0, |
|
"eval_loss": 3.7149996757507324, |
|
"eval_runtime": 77.2505, |
|
"eval_samples_per_second": 6374.382, |
|
"eval_steps_per_second": 2.084, |
|
"step": 467856 |
|
}, |
|
{ |
|
"epoch": 324.1, |
|
"learning_rate": 1.3518005540166206e-05, |
|
"loss": 3.7868, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 324.45, |
|
"learning_rate": 1.3511080332409974e-05, |
|
"loss": 3.7869, |
|
"step": 468500 |
|
}, |
|
{ |
|
"epoch": 324.79, |
|
"learning_rate": 1.3504155124653741e-05, |
|
"loss": 3.7838, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 325.0, |
|
"eval_loss": 3.7106683254241943, |
|
"eval_runtime": 74.8849, |
|
"eval_samples_per_second": 6575.744, |
|
"eval_steps_per_second": 2.15, |
|
"step": 469300 |
|
}, |
|
{ |
|
"epoch": 325.14, |
|
"learning_rate": 1.3497229916897507e-05, |
|
"loss": 3.7865, |
|
"step": 469500 |
|
}, |
|
{ |
|
"epoch": 325.48, |
|
"learning_rate": 1.3490304709141275e-05, |
|
"loss": 3.7858, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 325.83, |
|
"learning_rate": 1.3483379501385044e-05, |
|
"loss": 3.784, |
|
"step": 470500 |
|
}, |
|
{ |
|
"epoch": 326.0, |
|
"eval_loss": 3.7102861404418945, |
|
"eval_runtime": 74.7474, |
|
"eval_samples_per_second": 6587.842, |
|
"eval_steps_per_second": 2.154, |
|
"step": 470744 |
|
}, |
|
{ |
|
"epoch": 326.18, |
|
"learning_rate": 1.3476454293628809e-05, |
|
"loss": 3.786, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 326.52, |
|
"learning_rate": 1.3469529085872578e-05, |
|
"loss": 3.7855, |
|
"step": 471500 |
|
}, |
|
{ |
|
"epoch": 326.87, |
|
"learning_rate": 1.3462603878116345e-05, |
|
"loss": 3.7861, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 327.0, |
|
"eval_loss": 3.7090983390808105, |
|
"eval_runtime": 73.9182, |
|
"eval_samples_per_second": 6661.745, |
|
"eval_steps_per_second": 2.178, |
|
"step": 472188 |
|
}, |
|
{ |
|
"epoch": 327.22, |
|
"learning_rate": 1.3455678670360113e-05, |
|
"loss": 3.7856, |
|
"step": 472500 |
|
}, |
|
{ |
|
"epoch": 327.56, |
|
"learning_rate": 1.3448753462603878e-05, |
|
"loss": 3.7827, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 327.91, |
|
"learning_rate": 1.3441828254847647e-05, |
|
"loss": 3.7854, |
|
"step": 473500 |
|
}, |
|
{ |
|
"epoch": 328.0, |
|
"eval_loss": 3.708383083343506, |
|
"eval_runtime": 74.4676, |
|
"eval_samples_per_second": 6612.59, |
|
"eval_steps_per_second": 2.162, |
|
"step": 473632 |
|
}, |
|
{ |
|
"epoch": 328.25, |
|
"learning_rate": 1.3434903047091414e-05, |
|
"loss": 3.785, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 328.6, |
|
"learning_rate": 1.3427977839335181e-05, |
|
"loss": 3.7828, |
|
"step": 474500 |
|
}, |
|
{ |
|
"epoch": 328.95, |
|
"learning_rate": 1.3421052631578948e-05, |
|
"loss": 3.7859, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 329.0, |
|
"eval_loss": 3.7049036026000977, |
|
"eval_runtime": 76.0122, |
|
"eval_samples_per_second": 6478.227, |
|
"eval_steps_per_second": 2.118, |
|
"step": 475076 |
|
}, |
|
{ |
|
"epoch": 329.29, |
|
"learning_rate": 1.3414127423822716e-05, |
|
"loss": 3.7826, |
|
"step": 475500 |
|
}, |
|
{ |
|
"epoch": 329.64, |
|
"learning_rate": 1.3407202216066483e-05, |
|
"loss": 3.7815, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 329.99, |
|
"learning_rate": 1.340027700831025e-05, |
|
"loss": 3.7841, |
|
"step": 476500 |
|
}, |
|
{ |
|
"epoch": 330.0, |
|
"eval_loss": 3.7112672328948975, |
|
"eval_runtime": 73.3442, |
|
"eval_samples_per_second": 6713.874, |
|
"eval_steps_per_second": 2.195, |
|
"step": 476520 |
|
}, |
|
{ |
|
"epoch": 330.33, |
|
"learning_rate": 1.3393351800554017e-05, |
|
"loss": 3.7814, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 330.68, |
|
"learning_rate": 1.3386426592797786e-05, |
|
"loss": 3.7823, |
|
"step": 477500 |
|
}, |
|
{ |
|
"epoch": 331.0, |
|
"eval_loss": 3.7072346210479736, |
|
"eval_runtime": 76.1202, |
|
"eval_samples_per_second": 6469.031, |
|
"eval_steps_per_second": 2.115, |
|
"step": 477964 |
|
}, |
|
{ |
|
"epoch": 331.02, |
|
"learning_rate": 1.3379501385041553e-05, |
|
"loss": 3.7845, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 331.37, |
|
"learning_rate": 1.337257617728532e-05, |
|
"loss": 3.7822, |
|
"step": 478500 |
|
}, |
|
{ |
|
"epoch": 331.72, |
|
"learning_rate": 1.3365650969529087e-05, |
|
"loss": 3.7848, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 332.0, |
|
"eval_loss": 3.708578109741211, |
|
"eval_runtime": 73.6567, |
|
"eval_samples_per_second": 6685.396, |
|
"eval_steps_per_second": 2.186, |
|
"step": 479408 |
|
}, |
|
{ |
|
"epoch": 332.06, |
|
"learning_rate": 1.3358725761772855e-05, |
|
"loss": 3.7811, |
|
"step": 479500 |
|
}, |
|
{ |
|
"epoch": 332.41, |
|
"learning_rate": 1.335180055401662e-05, |
|
"loss": 3.7825, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 332.76, |
|
"learning_rate": 1.3344875346260389e-05, |
|
"loss": 3.7856, |
|
"step": 480500 |
|
}, |
|
{ |
|
"epoch": 333.0, |
|
"eval_loss": 3.7044219970703125, |
|
"eval_runtime": 73.1824, |
|
"eval_samples_per_second": 6728.725, |
|
"eval_steps_per_second": 2.2, |
|
"step": 480852 |
|
}, |
|
{ |
|
"epoch": 333.1, |
|
"learning_rate": 1.3337950138504156e-05, |
|
"loss": 3.7804, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 333.45, |
|
"learning_rate": 1.3331024930747925e-05, |
|
"loss": 3.7816, |
|
"step": 481500 |
|
}, |
|
{ |
|
"epoch": 333.8, |
|
"learning_rate": 1.332409972299169e-05, |
|
"loss": 3.7793, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 334.0, |
|
"eval_loss": 3.7033491134643555, |
|
"eval_runtime": 74.3462, |
|
"eval_samples_per_second": 6623.388, |
|
"eval_steps_per_second": 2.166, |
|
"step": 482296 |
|
}, |
|
{ |
|
"epoch": 334.14, |
|
"learning_rate": 1.3317174515235458e-05, |
|
"loss": 3.7816, |
|
"step": 482500 |
|
}, |
|
{ |
|
"epoch": 334.49, |
|
"learning_rate": 1.3310249307479225e-05, |
|
"loss": 3.7796, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 334.83, |
|
"learning_rate": 1.3303324099722992e-05, |
|
"loss": 3.7823, |
|
"step": 483500 |
|
}, |
|
{ |
|
"epoch": 335.0, |
|
"eval_loss": 3.712414503097534, |
|
"eval_runtime": 73.7811, |
|
"eval_samples_per_second": 6674.118, |
|
"eval_steps_per_second": 2.182, |
|
"step": 483740 |
|
}, |
|
{ |
|
"epoch": 335.18, |
|
"learning_rate": 1.3296398891966759e-05, |
|
"loss": 3.7811, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 335.53, |
|
"learning_rate": 1.3289473684210528e-05, |
|
"loss": 3.7801, |
|
"step": 484500 |
|
}, |
|
{ |
|
"epoch": 335.87, |
|
"learning_rate": 1.3282548476454296e-05, |
|
"loss": 3.7779, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 336.0, |
|
"eval_loss": 3.713364601135254, |
|
"eval_runtime": 74.5722, |
|
"eval_samples_per_second": 6603.322, |
|
"eval_steps_per_second": 2.159, |
|
"step": 485184 |
|
}, |
|
{ |
|
"epoch": 336.22, |
|
"learning_rate": 1.3275623268698062e-05, |
|
"loss": 3.7814, |
|
"step": 485500 |
|
}, |
|
{ |
|
"epoch": 336.57, |
|
"learning_rate": 1.3268698060941828e-05, |
|
"loss": 3.7797, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 336.91, |
|
"learning_rate": 1.3261772853185597e-05, |
|
"loss": 3.7817, |
|
"step": 486500 |
|
}, |
|
{ |
|
"epoch": 337.0, |
|
"eval_loss": 3.704362392425537, |
|
"eval_runtime": 73.7818, |
|
"eval_samples_per_second": 6674.058, |
|
"eval_steps_per_second": 2.182, |
|
"step": 486628 |
|
}, |
|
{ |
|
"epoch": 337.26, |
|
"learning_rate": 1.3254847645429362e-05, |
|
"loss": 3.7809, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 337.6, |
|
"learning_rate": 1.3247922437673131e-05, |
|
"loss": 3.7793, |
|
"step": 487500 |
|
}, |
|
{ |
|
"epoch": 337.95, |
|
"learning_rate": 1.32409972299169e-05, |
|
"loss": 3.7766, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 338.0, |
|
"eval_loss": 3.7027761936187744, |
|
"eval_runtime": 74.7391, |
|
"eval_samples_per_second": 6588.57, |
|
"eval_steps_per_second": 2.154, |
|
"step": 488072 |
|
}, |
|
{ |
|
"epoch": 338.3, |
|
"learning_rate": 1.3234072022160666e-05, |
|
"loss": 3.7818, |
|
"step": 488500 |
|
}, |
|
{ |
|
"epoch": 338.64, |
|
"learning_rate": 1.3227146814404432e-05, |
|
"loss": 3.778, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 338.99, |
|
"learning_rate": 1.32202216066482e-05, |
|
"loss": 3.7803, |
|
"step": 489500 |
|
}, |
|
{ |
|
"epoch": 339.0, |
|
"eval_loss": 3.7038166522979736, |
|
"eval_runtime": 73.5019, |
|
"eval_samples_per_second": 6699.476, |
|
"eval_steps_per_second": 2.19, |
|
"step": 489516 |
|
}, |
|
{ |
|
"epoch": 339.34, |
|
"learning_rate": 1.3213296398891969e-05, |
|
"loss": 3.7785, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 339.68, |
|
"learning_rate": 1.3206371191135736e-05, |
|
"loss": 3.7806, |
|
"step": 490500 |
|
}, |
|
{ |
|
"epoch": 340.0, |
|
"eval_loss": 3.707486867904663, |
|
"eval_runtime": 76.5029, |
|
"eval_samples_per_second": 6436.667, |
|
"eval_steps_per_second": 2.104, |
|
"step": 490960 |
|
}, |
|
{ |
|
"epoch": 340.03, |
|
"learning_rate": 1.3199445983379501e-05, |
|
"loss": 3.7771, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 340.37, |
|
"learning_rate": 1.319252077562327e-05, |
|
"loss": 3.7764, |
|
"step": 491500 |
|
}, |
|
{ |
|
"epoch": 340.72, |
|
"learning_rate": 1.3185595567867038e-05, |
|
"loss": 3.78, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 341.0, |
|
"eval_loss": 3.7045812606811523, |
|
"eval_runtime": 74.5994, |
|
"eval_samples_per_second": 6600.913, |
|
"eval_steps_per_second": 2.158, |
|
"step": 492404 |
|
}, |
|
{ |
|
"epoch": 341.07, |
|
"learning_rate": 1.3178670360110804e-05, |
|
"loss": 3.7806, |
|
"step": 492500 |
|
}, |
|
{ |
|
"epoch": 341.41, |
|
"learning_rate": 1.3171745152354572e-05, |
|
"loss": 3.7798, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 341.76, |
|
"learning_rate": 1.3164819944598339e-05, |
|
"loss": 3.7794, |
|
"step": 493500 |
|
}, |
|
{ |
|
"epoch": 342.0, |
|
"eval_loss": 3.7000420093536377, |
|
"eval_runtime": 75.6658, |
|
"eval_samples_per_second": 6507.879, |
|
"eval_steps_per_second": 2.128, |
|
"step": 493848 |
|
}, |
|
{ |
|
"epoch": 342.11, |
|
"learning_rate": 1.3157894736842108e-05, |
|
"loss": 3.7764, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 342.45, |
|
"learning_rate": 1.3150969529085873e-05, |
|
"loss": 3.7793, |
|
"step": 494500 |
|
}, |
|
{ |
|
"epoch": 342.8, |
|
"learning_rate": 1.3144044321329642e-05, |
|
"loss": 3.7768, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 343.0, |
|
"eval_loss": 3.704115390777588, |
|
"eval_runtime": 75.5071, |
|
"eval_samples_per_second": 6521.559, |
|
"eval_steps_per_second": 2.132, |
|
"step": 495292 |
|
}, |
|
{ |
|
"epoch": 343.14, |
|
"learning_rate": 1.3137119113573408e-05, |
|
"loss": 3.7755, |
|
"step": 495500 |
|
}, |
|
{ |
|
"epoch": 343.49, |
|
"learning_rate": 1.3130193905817175e-05, |
|
"loss": 3.7773, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 343.84, |
|
"learning_rate": 1.3123268698060942e-05, |
|
"loss": 3.776, |
|
"step": 496500 |
|
}, |
|
{ |
|
"epoch": 344.0, |
|
"eval_loss": 3.7043023109436035, |
|
"eval_runtime": 73.6566, |
|
"eval_samples_per_second": 6685.399, |
|
"eval_steps_per_second": 2.186, |
|
"step": 496736 |
|
}, |
|
{ |
|
"epoch": 344.18, |
|
"learning_rate": 1.3116343490304711e-05, |
|
"loss": 3.7785, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 344.53, |
|
"learning_rate": 1.3109418282548478e-05, |
|
"loss": 3.7817, |
|
"step": 497500 |
|
}, |
|
{ |
|
"epoch": 344.88, |
|
"learning_rate": 1.3102493074792245e-05, |
|
"loss": 3.7799, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 345.0, |
|
"eval_loss": 3.7023870944976807, |
|
"eval_runtime": 76.6367, |
|
"eval_samples_per_second": 6425.431, |
|
"eval_steps_per_second": 2.101, |
|
"step": 498180 |
|
}, |
|
{ |
|
"epoch": 345.22, |
|
"learning_rate": 1.3095567867036012e-05, |
|
"loss": 3.776, |
|
"step": 498500 |
|
}, |
|
{ |
|
"epoch": 345.57, |
|
"learning_rate": 1.308864265927978e-05, |
|
"loss": 3.7761, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 345.91, |
|
"learning_rate": 1.3081717451523547e-05, |
|
"loss": 3.773, |
|
"step": 499500 |
|
}, |
|
{ |
|
"epoch": 346.0, |
|
"eval_loss": 3.698683500289917, |
|
"eval_runtime": 75.0471, |
|
"eval_samples_per_second": 6561.533, |
|
"eval_steps_per_second": 2.145, |
|
"step": 499624 |
|
}, |
|
{ |
|
"epoch": 346.26, |
|
"learning_rate": 1.3074792243767314e-05, |
|
"loss": 3.776, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 346.61, |
|
"learning_rate": 1.3067867036011081e-05, |
|
"loss": 3.7738, |
|
"step": 500500 |
|
}, |
|
{ |
|
"epoch": 346.95, |
|
"learning_rate": 1.306094182825485e-05, |
|
"loss": 3.7755, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 347.0, |
|
"eval_loss": 3.6997292041778564, |
|
"eval_runtime": 75.304, |
|
"eval_samples_per_second": 6539.146, |
|
"eval_steps_per_second": 2.138, |
|
"step": 501068 |
|
}, |
|
{ |
|
"epoch": 347.3, |
|
"learning_rate": 1.3054016620498615e-05, |
|
"loss": 3.7746, |
|
"step": 501500 |
|
}, |
|
{ |
|
"epoch": 347.65, |
|
"learning_rate": 1.3047091412742383e-05, |
|
"loss": 3.7746, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 347.99, |
|
"learning_rate": 1.304016620498615e-05, |
|
"loss": 3.7738, |
|
"step": 502500 |
|
}, |
|
{ |
|
"epoch": 348.0, |
|
"eval_loss": 3.7015268802642822, |
|
"eval_runtime": 73.8099, |
|
"eval_samples_per_second": 6671.521, |
|
"eval_steps_per_second": 2.181, |
|
"step": 502512 |
|
}, |
|
{ |
|
"epoch": 348.34, |
|
"learning_rate": 1.3033240997229919e-05, |
|
"loss": 3.7753, |
|
"step": 503000 |
|
}, |
|
{ |
|
"epoch": 348.68, |
|
"learning_rate": 1.3026315789473684e-05, |
|
"loss": 3.7744, |
|
"step": 503500 |
|
}, |
|
{ |
|
"epoch": 349.0, |
|
"eval_loss": 3.7030045986175537, |
|
"eval_runtime": 75.2471, |
|
"eval_samples_per_second": 6544.096, |
|
"eval_steps_per_second": 2.14, |
|
"step": 503956 |
|
}, |
|
{ |
|
"epoch": 349.03, |
|
"learning_rate": 1.3019390581717453e-05, |
|
"loss": 3.7736, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 349.38, |
|
"learning_rate": 1.301246537396122e-05, |
|
"loss": 3.7759, |
|
"step": 504500 |
|
}, |
|
{ |
|
"epoch": 349.72, |
|
"learning_rate": 1.3005540166204987e-05, |
|
"loss": 3.7721, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 350.0, |
|
"eval_loss": 3.7013323307037354, |
|
"eval_runtime": 74.9488, |
|
"eval_samples_per_second": 6570.141, |
|
"eval_steps_per_second": 2.148, |
|
"step": 505400 |
|
}, |
|
{ |
|
"epoch": 350.07, |
|
"learning_rate": 1.2998614958448754e-05, |
|
"loss": 3.7721, |
|
"step": 505500 |
|
}, |
|
{ |
|
"epoch": 350.42, |
|
"learning_rate": 1.2991689750692522e-05, |
|
"loss": 3.7719, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 350.76, |
|
"learning_rate": 1.298476454293629e-05, |
|
"loss": 3.7756, |
|
"step": 506500 |
|
}, |
|
{ |
|
"epoch": 351.0, |
|
"eval_loss": 3.7012503147125244, |
|
"eval_runtime": 75.0555, |
|
"eval_samples_per_second": 6560.802, |
|
"eval_steps_per_second": 2.145, |
|
"step": 506844 |
|
}, |
|
{ |
|
"epoch": 351.11, |
|
"learning_rate": 1.2977839335180056e-05, |
|
"loss": 3.7736, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 351.45, |
|
"learning_rate": 1.2970914127423823e-05, |
|
"loss": 3.7706, |
|
"step": 507500 |
|
}, |
|
{ |
|
"epoch": 351.8, |
|
"learning_rate": 1.2963988919667592e-05, |
|
"loss": 3.7715, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 352.0, |
|
"eval_loss": 3.699119806289673, |
|
"eval_runtime": 74.874, |
|
"eval_samples_per_second": 6576.701, |
|
"eval_steps_per_second": 2.15, |
|
"step": 508288 |
|
}, |
|
{ |
|
"epoch": 352.15, |
|
"learning_rate": 1.2957063711911357e-05, |
|
"loss": 3.7765, |
|
"step": 508500 |
|
}, |
|
{ |
|
"epoch": 352.49, |
|
"learning_rate": 1.2950138504155125e-05, |
|
"loss": 3.7717, |
|
"step": 509000 |
|
}, |
|
{ |
|
"epoch": 352.84, |
|
"learning_rate": 1.2943213296398894e-05, |
|
"loss": 3.7733, |
|
"step": 509500 |
|
}, |
|
{ |
|
"epoch": 353.0, |
|
"eval_loss": 3.6972882747650146, |
|
"eval_runtime": 76.25, |
|
"eval_samples_per_second": 6458.018, |
|
"eval_steps_per_second": 2.111, |
|
"step": 509732 |
|
}, |
|
{ |
|
"epoch": 353.19, |
|
"learning_rate": 1.2936288088642661e-05, |
|
"loss": 3.7728, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 353.53, |
|
"learning_rate": 1.2929362880886426e-05, |
|
"loss": 3.7725, |
|
"step": 510500 |
|
}, |
|
{ |
|
"epoch": 353.88, |
|
"learning_rate": 1.2922437673130195e-05, |
|
"loss": 3.7701, |
|
"step": 511000 |
|
}, |
|
{ |
|
"epoch": 354.0, |
|
"eval_loss": 3.700070858001709, |
|
"eval_runtime": 75.0621, |
|
"eval_samples_per_second": 6560.222, |
|
"eval_steps_per_second": 2.145, |
|
"step": 511176 |
|
}, |
|
{ |
|
"epoch": 354.22, |
|
"learning_rate": 1.2915512465373963e-05, |
|
"loss": 3.7737, |
|
"step": 511500 |
|
}, |
|
{ |
|
"epoch": 354.57, |
|
"learning_rate": 1.290858725761773e-05, |
|
"loss": 3.7703, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 354.92, |
|
"learning_rate": 1.2901662049861496e-05, |
|
"loss": 3.7733, |
|
"step": 512500 |
|
}, |
|
{ |
|
"epoch": 355.0, |
|
"eval_loss": 3.697479248046875, |
|
"eval_runtime": 75.0016, |
|
"eval_samples_per_second": 6565.514, |
|
"eval_steps_per_second": 2.147, |
|
"step": 512620 |
|
}, |
|
{ |
|
"epoch": 355.26, |
|
"learning_rate": 1.2894736842105264e-05, |
|
"loss": 3.7696, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 355.61, |
|
"learning_rate": 1.2887811634349033e-05, |
|
"loss": 3.771, |
|
"step": 513500 |
|
}, |
|
{ |
|
"epoch": 355.96, |
|
"learning_rate": 1.2880886426592798e-05, |
|
"loss": 3.7723, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 356.0, |
|
"eval_loss": 3.695901870727539, |
|
"eval_runtime": 74.3951, |
|
"eval_samples_per_second": 6619.041, |
|
"eval_steps_per_second": 2.164, |
|
"step": 514064 |
|
}, |
|
{ |
|
"epoch": 356.3, |
|
"learning_rate": 1.2873961218836567e-05, |
|
"loss": 3.771, |
|
"step": 514500 |
|
}, |
|
{ |
|
"epoch": 356.65, |
|
"learning_rate": 1.2867036011080334e-05, |
|
"loss": 3.7708, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 356.99, |
|
"learning_rate": 1.2860110803324102e-05, |
|
"loss": 3.7671, |
|
"step": 515500 |
|
}, |
|
{ |
|
"epoch": 357.0, |
|
"eval_loss": 3.696229934692383, |
|
"eval_runtime": 78.1947, |
|
"eval_samples_per_second": 6297.409, |
|
"eval_steps_per_second": 2.059, |
|
"step": 515508 |
|
}, |
|
{ |
|
"epoch": 357.34, |
|
"learning_rate": 1.2853185595567867e-05, |
|
"loss": 3.7695, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 357.69, |
|
"learning_rate": 1.2846260387811636e-05, |
|
"loss": 3.7711, |
|
"step": 516500 |
|
}, |
|
{ |
|
"epoch": 358.0, |
|
"eval_loss": 3.6941683292388916, |
|
"eval_runtime": 79.1242, |
|
"eval_samples_per_second": 6223.433, |
|
"eval_steps_per_second": 2.035, |
|
"step": 516952 |
|
}, |
|
{ |
|
"epoch": 358.03, |
|
"learning_rate": 1.2839335180055403e-05, |
|
"loss": 3.7728, |
|
"step": 517000 |
|
}, |
|
{ |
|
"epoch": 358.38, |
|
"learning_rate": 1.283240997229917e-05, |
|
"loss": 3.7696, |
|
"step": 517500 |
|
}, |
|
{ |
|
"epoch": 358.73, |
|
"learning_rate": 1.2825484764542937e-05, |
|
"loss": 3.7721, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 359.0, |
|
"eval_loss": 3.6970505714416504, |
|
"eval_runtime": 78.5878, |
|
"eval_samples_per_second": 6265.908, |
|
"eval_steps_per_second": 2.049, |
|
"step": 518396 |
|
}, |
|
{ |
|
"epoch": 359.07, |
|
"learning_rate": 1.2818559556786705e-05, |
|
"loss": 3.7704, |
|
"step": 518500 |
|
}, |
|
{ |
|
"epoch": 359.42, |
|
"learning_rate": 1.2811634349030472e-05, |
|
"loss": 3.7671, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 359.76, |
|
"learning_rate": 1.280470914127424e-05, |
|
"loss": 3.7711, |
|
"step": 519500 |
|
}, |
|
{ |
|
"epoch": 360.0, |
|
"eval_loss": 3.695997476577759, |
|
"eval_runtime": 74.3491, |
|
"eval_samples_per_second": 6623.133, |
|
"eval_steps_per_second": 2.165, |
|
"step": 519840 |
|
}, |
|
{ |
|
"epoch": 360.11, |
|
"learning_rate": 1.2797783933518006e-05, |
|
"loss": 3.7685, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 360.46, |
|
"learning_rate": 1.2790858725761775e-05, |
|
"loss": 3.7706, |
|
"step": 520500 |
|
}, |
|
{ |
|
"epoch": 360.8, |
|
"learning_rate": 1.278393351800554e-05, |
|
"loss": 3.7684, |
|
"step": 521000 |
|
}, |
|
{ |
|
"epoch": 361.0, |
|
"eval_loss": 3.6960372924804688, |
|
"eval_runtime": 74.0632, |
|
"eval_samples_per_second": 6648.696, |
|
"eval_steps_per_second": 2.174, |
|
"step": 521284 |
|
}, |
|
{ |
|
"epoch": 361.15, |
|
"learning_rate": 1.2777008310249309e-05, |
|
"loss": 3.7715, |
|
"step": 521500 |
|
}, |
|
{ |
|
"epoch": 361.5, |
|
"learning_rate": 1.2770083102493075e-05, |
|
"loss": 3.7673, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 361.84, |
|
"learning_rate": 1.2763157894736844e-05, |
|
"loss": 3.7691, |
|
"step": 522500 |
|
}, |
|
{ |
|
"epoch": 362.0, |
|
"eval_loss": 3.693530559539795, |
|
"eval_runtime": 74.6535, |
|
"eval_samples_per_second": 6596.124, |
|
"eval_steps_per_second": 2.157, |
|
"step": 522728 |
|
}, |
|
{ |
|
"epoch": 362.19, |
|
"learning_rate": 1.275623268698061e-05, |
|
"loss": 3.7684, |
|
"step": 523000 |
|
}, |
|
{ |
|
"epoch": 362.53, |
|
"learning_rate": 1.2749307479224378e-05, |
|
"loss": 3.7678, |
|
"step": 523500 |
|
}, |
|
{ |
|
"epoch": 362.88, |
|
"learning_rate": 1.2742382271468145e-05, |
|
"loss": 3.7701, |
|
"step": 524000 |
|
}, |
|
{ |
|
"epoch": 363.0, |
|
"eval_loss": 3.6938717365264893, |
|
"eval_runtime": 77.8737, |
|
"eval_samples_per_second": 6323.366, |
|
"eval_steps_per_second": 2.067, |
|
"step": 524172 |
|
}, |
|
{ |
|
"epoch": 363.23, |
|
"learning_rate": 1.2735457063711913e-05, |
|
"loss": 3.7676, |
|
"step": 524500 |
|
}, |
|
{ |
|
"epoch": 363.57, |
|
"learning_rate": 1.2728531855955679e-05, |
|
"loss": 3.7673, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 363.92, |
|
"learning_rate": 1.2721606648199447e-05, |
|
"loss": 3.7678, |
|
"step": 525500 |
|
}, |
|
{ |
|
"epoch": 364.0, |
|
"eval_loss": 3.694033145904541, |
|
"eval_runtime": 75.4612, |
|
"eval_samples_per_second": 6525.529, |
|
"eval_steps_per_second": 2.134, |
|
"step": 525616 |
|
}, |
|
{ |
|
"epoch": 364.27, |
|
"learning_rate": 1.2714681440443214e-05, |
|
"loss": 3.7668, |
|
"step": 526000 |
|
}, |
|
{ |
|
"epoch": 364.61, |
|
"learning_rate": 1.2707756232686981e-05, |
|
"loss": 3.7671, |
|
"step": 526500 |
|
}, |
|
{ |
|
"epoch": 364.96, |
|
"learning_rate": 1.2700831024930748e-05, |
|
"loss": 3.7685, |
|
"step": 527000 |
|
}, |
|
{ |
|
"epoch": 365.0, |
|
"eval_loss": 3.6901051998138428, |
|
"eval_runtime": 74.9685, |
|
"eval_samples_per_second": 6568.408, |
|
"eval_steps_per_second": 2.148, |
|
"step": 527060 |
|
}, |
|
{ |
|
"epoch": 365.3, |
|
"learning_rate": 1.2693905817174517e-05, |
|
"loss": 3.7688, |
|
"step": 527500 |
|
}, |
|
{ |
|
"epoch": 365.65, |
|
"learning_rate": 1.2686980609418285e-05, |
|
"loss": 3.7698, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 366.0, |
|
"learning_rate": 1.268005540166205e-05, |
|
"loss": 3.7679, |
|
"step": 528500 |
|
}, |
|
{ |
|
"epoch": 366.0, |
|
"eval_loss": 3.6931703090667725, |
|
"eval_runtime": 74.1691, |
|
"eval_samples_per_second": 6639.205, |
|
"eval_steps_per_second": 2.171, |
|
"step": 528504 |
|
}, |
|
{ |
|
"epoch": 366.34, |
|
"learning_rate": 1.2673130193905817e-05, |
|
"loss": 3.7677, |
|
"step": 529000 |
|
}, |
|
{ |
|
"epoch": 366.69, |
|
"learning_rate": 1.2666204986149586e-05, |
|
"loss": 3.7684, |
|
"step": 529500 |
|
}, |
|
{ |
|
"epoch": 367.0, |
|
"eval_loss": 3.687340497970581, |
|
"eval_runtime": 75.5717, |
|
"eval_samples_per_second": 6515.988, |
|
"eval_steps_per_second": 2.13, |
|
"step": 529948 |
|
}, |
|
{ |
|
"epoch": 367.04, |
|
"learning_rate": 1.2659279778393351e-05, |
|
"loss": 3.7671, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 367.38, |
|
"learning_rate": 1.265235457063712e-05, |
|
"loss": 3.7657, |
|
"step": 530500 |
|
}, |
|
{ |
|
"epoch": 367.73, |
|
"learning_rate": 1.2645429362880889e-05, |
|
"loss": 3.7647, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 368.0, |
|
"eval_loss": 3.690807342529297, |
|
"eval_runtime": 73.5177, |
|
"eval_samples_per_second": 6698.037, |
|
"eval_steps_per_second": 2.19, |
|
"step": 531392 |
|
}, |
|
{ |
|
"epoch": 368.07, |
|
"learning_rate": 1.2638504155124655e-05, |
|
"loss": 3.7657, |
|
"step": 531500 |
|
}, |
|
{ |
|
"epoch": 368.42, |
|
"learning_rate": 1.263157894736842e-05, |
|
"loss": 3.7676, |
|
"step": 532000 |
|
}, |
|
{ |
|
"epoch": 368.77, |
|
"learning_rate": 1.262465373961219e-05, |
|
"loss": 3.7641, |
|
"step": 532500 |
|
}, |
|
{ |
|
"epoch": 369.0, |
|
"eval_loss": 3.688164710998535, |
|
"eval_runtime": 75.7562, |
|
"eval_samples_per_second": 6500.118, |
|
"eval_steps_per_second": 2.125, |
|
"step": 532836 |
|
}, |
|
{ |
|
"epoch": 369.11, |
|
"learning_rate": 1.2617728531855958e-05, |
|
"loss": 3.7655, |
|
"step": 533000 |
|
}, |
|
{ |
|
"epoch": 369.46, |
|
"learning_rate": 1.2610803324099723e-05, |
|
"loss": 3.7655, |
|
"step": 533500 |
|
}, |
|
{ |
|
"epoch": 369.81, |
|
"learning_rate": 1.2603878116343492e-05, |
|
"loss": 3.7648, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 370.0, |
|
"eval_loss": 3.6942825317382812, |
|
"eval_runtime": 75.9898, |
|
"eval_samples_per_second": 6480.131, |
|
"eval_steps_per_second": 2.119, |
|
"step": 534280 |
|
}, |
|
{ |
|
"epoch": 370.15, |
|
"learning_rate": 1.2596952908587259e-05, |
|
"loss": 3.7682, |
|
"step": 534500 |
|
}, |
|
{ |
|
"epoch": 370.5, |
|
"learning_rate": 1.2590027700831027e-05, |
|
"loss": 3.7648, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 370.84, |
|
"learning_rate": 1.2583102493074792e-05, |
|
"loss": 3.7688, |
|
"step": 535500 |
|
}, |
|
{ |
|
"epoch": 371.0, |
|
"eval_loss": 3.6921067237854004, |
|
"eval_runtime": 76.7171, |
|
"eval_samples_per_second": 6418.698, |
|
"eval_steps_per_second": 2.099, |
|
"step": 535724 |
|
}, |
|
{ |
|
"epoch": 371.19, |
|
"learning_rate": 1.2576177285318561e-05, |
|
"loss": 3.7628, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 371.54, |
|
"learning_rate": 1.2569252077562328e-05, |
|
"loss": 3.7644, |
|
"step": 536500 |
|
}, |
|
{ |
|
"epoch": 371.88, |
|
"learning_rate": 1.2562326869806097e-05, |
|
"loss": 3.7672, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 372.0, |
|
"eval_loss": 3.6890618801116943, |
|
"eval_runtime": 74.1083, |
|
"eval_samples_per_second": 6644.652, |
|
"eval_steps_per_second": 2.172, |
|
"step": 537168 |
|
}, |
|
{ |
|
"epoch": 372.23, |
|
"learning_rate": 1.2555401662049862e-05, |
|
"loss": 3.7629, |
|
"step": 537500 |
|
}, |
|
{ |
|
"epoch": 372.58, |
|
"learning_rate": 1.254847645429363e-05, |
|
"loss": 3.7617, |
|
"step": 538000 |
|
}, |
|
{ |
|
"epoch": 372.92, |
|
"learning_rate": 1.2541551246537397e-05, |
|
"loss": 3.7618, |
|
"step": 538500 |
|
}, |
|
{ |
|
"epoch": 373.0, |
|
"eval_loss": 3.6865100860595703, |
|
"eval_runtime": 74.394, |
|
"eval_samples_per_second": 6619.133, |
|
"eval_steps_per_second": 2.164, |
|
"step": 538612 |
|
}, |
|
{ |
|
"epoch": 373.27, |
|
"learning_rate": 1.2534626038781164e-05, |
|
"loss": 3.7647, |
|
"step": 539000 |
|
}, |
|
{ |
|
"epoch": 373.61, |
|
"learning_rate": 1.2527700831024931e-05, |
|
"loss": 3.7625, |
|
"step": 539500 |
|
}, |
|
{ |
|
"epoch": 373.96, |
|
"learning_rate": 1.25207756232687e-05, |
|
"loss": 3.7651, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 374.0, |
|
"eval_loss": 3.689401149749756, |
|
"eval_runtime": 73.8005, |
|
"eval_samples_per_second": 6672.368, |
|
"eval_steps_per_second": 2.182, |
|
"step": 540056 |
|
}, |
|
{ |
|
"epoch": 374.31, |
|
"learning_rate": 1.2513850415512467e-05, |
|
"loss": 3.7643, |
|
"step": 540500 |
|
}, |
|
{ |
|
"epoch": 374.65, |
|
"learning_rate": 1.2506925207756234e-05, |
|
"loss": 3.7638, |
|
"step": 541000 |
|
}, |
|
{ |
|
"epoch": 375.0, |
|
"learning_rate": 1.25e-05, |
|
"loss": 3.7631, |
|
"step": 541500 |
|
}, |
|
{ |
|
"epoch": 375.0, |
|
"eval_loss": 3.686310291290283, |
|
"eval_runtime": 73.7737, |
|
"eval_samples_per_second": 6674.791, |
|
"eval_steps_per_second": 2.182, |
|
"step": 541500 |
|
}, |
|
{ |
|
"epoch": 375.35, |
|
"learning_rate": 1.249307479224377e-05, |
|
"loss": 3.7647, |
|
"step": 542000 |
|
}, |
|
{ |
|
"epoch": 375.69, |
|
"learning_rate": 1.2486149584487534e-05, |
|
"loss": 3.7634, |
|
"step": 542500 |
|
}, |
|
{ |
|
"epoch": 376.0, |
|
"eval_loss": 3.685969352722168, |
|
"eval_runtime": 75.4811, |
|
"eval_samples_per_second": 6523.809, |
|
"eval_steps_per_second": 2.133, |
|
"step": 542944 |
|
}, |
|
{ |
|
"epoch": 376.04, |
|
"learning_rate": 1.2479224376731303e-05, |
|
"loss": 3.7621, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 376.39, |
|
"learning_rate": 1.247229916897507e-05, |
|
"loss": 3.7621, |
|
"step": 543500 |
|
}, |
|
{ |
|
"epoch": 376.73, |
|
"learning_rate": 1.2465373961218839e-05, |
|
"loss": 3.7641, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 377.0, |
|
"eval_loss": 3.6860172748565674, |
|
"eval_runtime": 76.1045, |
|
"eval_samples_per_second": 6470.37, |
|
"eval_steps_per_second": 2.116, |
|
"step": 544388 |
|
}, |
|
{ |
|
"epoch": 377.08, |
|
"learning_rate": 1.2458448753462604e-05, |
|
"loss": 3.7648, |
|
"step": 544500 |
|
}, |
|
{ |
|
"epoch": 377.42, |
|
"learning_rate": 1.2451523545706372e-05, |
|
"loss": 3.7628, |
|
"step": 545000 |
|
}, |
|
{ |
|
"epoch": 377.77, |
|
"learning_rate": 1.244459833795014e-05, |
|
"loss": 3.7637, |
|
"step": 545500 |
|
}, |
|
{ |
|
"epoch": 378.0, |
|
"eval_loss": 3.6875898838043213, |
|
"eval_runtime": 74.9317, |
|
"eval_samples_per_second": 6571.636, |
|
"eval_steps_per_second": 2.149, |
|
"step": 545832 |
|
}, |
|
{ |
|
"epoch": 378.12, |
|
"learning_rate": 1.2437673130193906e-05, |
|
"loss": 3.7598, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 378.46, |
|
"learning_rate": 1.2430747922437673e-05, |
|
"loss": 3.7616, |
|
"step": 546500 |
|
}, |
|
{ |
|
"epoch": 378.81, |
|
"learning_rate": 1.2423822714681442e-05, |
|
"loss": 3.762, |
|
"step": 547000 |
|
}, |
|
{ |
|
"epoch": 379.0, |
|
"eval_loss": 3.685751438140869, |
|
"eval_runtime": 73.6835, |
|
"eval_samples_per_second": 6682.965, |
|
"eval_steps_per_second": 2.185, |
|
"step": 547276 |
|
}, |
|
{ |
|
"epoch": 379.16, |
|
"learning_rate": 1.241689750692521e-05, |
|
"loss": 3.7617, |
|
"step": 547500 |
|
}, |
|
{ |
|
"epoch": 379.5, |
|
"learning_rate": 1.2409972299168976e-05, |
|
"loss": 3.7617, |
|
"step": 548000 |
|
}, |
|
{ |
|
"epoch": 379.85, |
|
"learning_rate": 1.2403047091412743e-05, |
|
"loss": 3.7584, |
|
"step": 548500 |
|
}, |
|
{ |
|
"epoch": 380.0, |
|
"eval_loss": 3.6871654987335205, |
|
"eval_runtime": 79.6972, |
|
"eval_samples_per_second": 6178.684, |
|
"eval_steps_per_second": 2.02, |
|
"step": 548720 |
|
}, |
|
{ |
|
"epoch": 380.19, |
|
"learning_rate": 1.2396121883656511e-05, |
|
"loss": 3.7617, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 380.54, |
|
"learning_rate": 1.238919667590028e-05, |
|
"loss": 3.7601, |
|
"step": 549500 |
|
}, |
|
{ |
|
"epoch": 380.89, |
|
"learning_rate": 1.2382271468144045e-05, |
|
"loss": 3.7591, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 381.0, |
|
"eval_loss": 3.685418128967285, |
|
"eval_runtime": 76.3267, |
|
"eval_samples_per_second": 6451.53, |
|
"eval_steps_per_second": 2.109, |
|
"step": 550164 |
|
}, |
|
{ |
|
"epoch": 381.23, |
|
"learning_rate": 1.2375346260387812e-05, |
|
"loss": 3.763, |
|
"step": 550500 |
|
}, |
|
{ |
|
"epoch": 381.58, |
|
"learning_rate": 1.236842105263158e-05, |
|
"loss": 3.7639, |
|
"step": 551000 |
|
}, |
|
{ |
|
"epoch": 381.93, |
|
"learning_rate": 1.2361495844875346e-05, |
|
"loss": 3.7582, |
|
"step": 551500 |
|
}, |
|
{ |
|
"epoch": 382.0, |
|
"eval_loss": 3.681150197982788, |
|
"eval_runtime": 74.94, |
|
"eval_samples_per_second": 6570.908, |
|
"eval_steps_per_second": 2.148, |
|
"step": 551608 |
|
}, |
|
{ |
|
"epoch": 382.27, |
|
"learning_rate": 1.2354570637119114e-05, |
|
"loss": 3.761, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 382.62, |
|
"learning_rate": 1.2347645429362883e-05, |
|
"loss": 3.7575, |
|
"step": 552500 |
|
}, |
|
{ |
|
"epoch": 382.96, |
|
"learning_rate": 1.234072022160665e-05, |
|
"loss": 3.7603, |
|
"step": 553000 |
|
}, |
|
{ |
|
"epoch": 383.0, |
|
"eval_loss": 3.6880075931549072, |
|
"eval_runtime": 77.7382, |
|
"eval_samples_per_second": 6334.39, |
|
"eval_steps_per_second": 2.071, |
|
"step": 553052 |
|
}, |
|
{ |
|
"epoch": 383.31, |
|
"learning_rate": 1.2333795013850415e-05, |
|
"loss": 3.7594, |
|
"step": 553500 |
|
}, |
|
{ |
|
"epoch": 383.66, |
|
"learning_rate": 1.2326869806094184e-05, |
|
"loss": 3.7627, |
|
"step": 554000 |
|
}, |
|
{ |
|
"epoch": 384.0, |
|
"eval_loss": 3.684797763824463, |
|
"eval_runtime": 75.6595, |
|
"eval_samples_per_second": 6508.425, |
|
"eval_steps_per_second": 2.128, |
|
"step": 554496 |
|
}, |
|
{ |
|
"epoch": 384.0, |
|
"learning_rate": 1.2319944598337952e-05, |
|
"loss": 3.761, |
|
"step": 554500 |
|
}, |
|
{ |
|
"epoch": 384.35, |
|
"learning_rate": 1.2313019390581718e-05, |
|
"loss": 3.7601, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 384.7, |
|
"learning_rate": 1.2306094182825486e-05, |
|
"loss": 3.7575, |
|
"step": 555500 |
|
}, |
|
{ |
|
"epoch": 385.0, |
|
"eval_loss": 3.683274745941162, |
|
"eval_runtime": 77.7247, |
|
"eval_samples_per_second": 6335.486, |
|
"eval_steps_per_second": 2.071, |
|
"step": 555940 |
|
}, |
|
{ |
|
"epoch": 385.04, |
|
"learning_rate": 1.2299168975069253e-05, |
|
"loss": 3.7608, |
|
"step": 556000 |
|
}, |
|
{ |
|
"epoch": 385.39, |
|
"learning_rate": 1.2292243767313022e-05, |
|
"loss": 3.7583, |
|
"step": 556500 |
|
}, |
|
{ |
|
"epoch": 385.73, |
|
"learning_rate": 1.2285318559556787e-05, |
|
"loss": 3.7615, |
|
"step": 557000 |
|
}, |
|
{ |
|
"epoch": 386.0, |
|
"eval_loss": 3.684981346130371, |
|
"eval_runtime": 73.8671, |
|
"eval_samples_per_second": 6666.348, |
|
"eval_steps_per_second": 2.18, |
|
"step": 557384 |
|
}, |
|
{ |
|
"epoch": 386.08, |
|
"learning_rate": 1.2278393351800556e-05, |
|
"loss": 3.7573, |
|
"step": 557500 |
|
}, |
|
{ |
|
"epoch": 386.43, |
|
"learning_rate": 1.2271468144044322e-05, |
|
"loss": 3.7603, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 386.77, |
|
"learning_rate": 1.2264542936288088e-05, |
|
"loss": 3.7589, |
|
"step": 558500 |
|
}, |
|
{ |
|
"epoch": 387.0, |
|
"eval_loss": 3.682325601577759, |
|
"eval_runtime": 75.6044, |
|
"eval_samples_per_second": 6513.162, |
|
"eval_steps_per_second": 2.13, |
|
"step": 558828 |
|
}, |
|
{ |
|
"epoch": 387.12, |
|
"learning_rate": 1.2257617728531856e-05, |
|
"loss": 3.7553, |
|
"step": 559000 |
|
}, |
|
{ |
|
"epoch": 387.47, |
|
"learning_rate": 1.2250692520775625e-05, |
|
"loss": 3.7576, |
|
"step": 559500 |
|
}, |
|
{ |
|
"epoch": 387.81, |
|
"learning_rate": 1.2243767313019392e-05, |
|
"loss": 3.7596, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 388.0, |
|
"eval_loss": 3.6864383220672607, |
|
"eval_runtime": 74.4247, |
|
"eval_samples_per_second": 6616.409, |
|
"eval_steps_per_second": 2.163, |
|
"step": 560272 |
|
}, |
|
{ |
|
"epoch": 388.16, |
|
"learning_rate": 1.2236842105263159e-05, |
|
"loss": 3.7583, |
|
"step": 560500 |
|
}, |
|
{ |
|
"epoch": 388.5, |
|
"learning_rate": 1.2229916897506926e-05, |
|
"loss": 3.7577, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 388.85, |
|
"learning_rate": 1.2222991689750694e-05, |
|
"loss": 3.7575, |
|
"step": 561500 |
|
}, |
|
{ |
|
"epoch": 389.0, |
|
"eval_loss": 3.680553674697876, |
|
"eval_runtime": 74.7418, |
|
"eval_samples_per_second": 6588.339, |
|
"eval_steps_per_second": 2.154, |
|
"step": 561716 |
|
}, |
|
{ |
|
"epoch": 389.2, |
|
"learning_rate": 1.2216066481994461e-05, |
|
"loss": 3.7591, |
|
"step": 562000 |
|
}, |
|
{ |
|
"epoch": 389.54, |
|
"learning_rate": 1.2209141274238228e-05, |
|
"loss": 3.7586, |
|
"step": 562500 |
|
}, |
|
{ |
|
"epoch": 389.89, |
|
"learning_rate": 1.2202216066481995e-05, |
|
"loss": 3.7578, |
|
"step": 563000 |
|
}, |
|
{ |
|
"epoch": 390.0, |
|
"eval_loss": 3.685154438018799, |
|
"eval_runtime": 73.6614, |
|
"eval_samples_per_second": 6684.968, |
|
"eval_steps_per_second": 2.186, |
|
"step": 563160 |
|
}, |
|
{ |
|
"epoch": 390.24, |
|
"learning_rate": 1.2195290858725764e-05, |
|
"loss": 3.7573, |
|
"step": 563500 |
|
}, |
|
{ |
|
"epoch": 390.58, |
|
"learning_rate": 1.2188365650969529e-05, |
|
"loss": 3.7584, |
|
"step": 564000 |
|
}, |
|
{ |
|
"epoch": 390.93, |
|
"learning_rate": 1.2181440443213298e-05, |
|
"loss": 3.756, |
|
"step": 564500 |
|
}, |
|
{ |
|
"epoch": 391.0, |
|
"eval_loss": 3.683967113494873, |
|
"eval_runtime": 74.0988, |
|
"eval_samples_per_second": 6645.504, |
|
"eval_steps_per_second": 2.173, |
|
"step": 564604 |
|
}, |
|
{ |
|
"epoch": 391.27, |
|
"learning_rate": 1.2174515235457064e-05, |
|
"loss": 3.7563, |
|
"step": 565000 |
|
}, |
|
{ |
|
"epoch": 391.62, |
|
"learning_rate": 1.2167590027700833e-05, |
|
"loss": 3.7564, |
|
"step": 565500 |
|
}, |
|
{ |
|
"epoch": 391.97, |
|
"learning_rate": 1.2160664819944598e-05, |
|
"loss": 3.7574, |
|
"step": 566000 |
|
}, |
|
{ |
|
"epoch": 392.0, |
|
"eval_loss": 3.6856420040130615, |
|
"eval_runtime": 74.3539, |
|
"eval_samples_per_second": 6622.705, |
|
"eval_steps_per_second": 2.165, |
|
"step": 566048 |
|
}, |
|
{ |
|
"epoch": 392.31, |
|
"learning_rate": 1.2153739612188367e-05, |
|
"loss": 3.7559, |
|
"step": 566500 |
|
}, |
|
{ |
|
"epoch": 392.66, |
|
"learning_rate": 1.2146814404432134e-05, |
|
"loss": 3.7576, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 393.0, |
|
"eval_loss": 3.68398118019104, |
|
"eval_runtime": 75.3631, |
|
"eval_samples_per_second": 6534.022, |
|
"eval_steps_per_second": 2.136, |
|
"step": 567492 |
|
}, |
|
{ |
|
"epoch": 393.01, |
|
"learning_rate": 1.21398891966759e-05, |
|
"loss": 3.7578, |
|
"step": 567500 |
|
}, |
|
{ |
|
"epoch": 393.35, |
|
"learning_rate": 1.2132963988919668e-05, |
|
"loss": 3.7571, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 393.7, |
|
"learning_rate": 1.2126038781163436e-05, |
|
"loss": 3.7559, |
|
"step": 568500 |
|
}, |
|
{ |
|
"epoch": 394.0, |
|
"eval_loss": 3.6840593814849854, |
|
"eval_runtime": 75.017, |
|
"eval_samples_per_second": 6564.166, |
|
"eval_steps_per_second": 2.146, |
|
"step": 568936 |
|
}, |
|
{ |
|
"epoch": 394.04, |
|
"learning_rate": 1.2119113573407205e-05, |
|
"loss": 3.758, |
|
"step": 569000 |
|
}, |
|
{ |
|
"epoch": 394.39, |
|
"learning_rate": 1.211218836565097e-05, |
|
"loss": 3.7553, |
|
"step": 569500 |
|
}, |
|
{ |
|
"epoch": 394.74, |
|
"learning_rate": 1.2105263157894737e-05, |
|
"loss": 3.7552, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 395.0, |
|
"eval_loss": 3.6834726333618164, |
|
"eval_runtime": 78.0649, |
|
"eval_samples_per_second": 6307.88, |
|
"eval_steps_per_second": 2.062, |
|
"step": 570380 |
|
}, |
|
{ |
|
"epoch": 395.08, |
|
"learning_rate": 1.2098337950138506e-05, |
|
"loss": 3.7579, |
|
"step": 570500 |
|
}, |
|
{ |
|
"epoch": 395.43, |
|
"learning_rate": 1.2091412742382271e-05, |
|
"loss": 3.7579, |
|
"step": 571000 |
|
}, |
|
{ |
|
"epoch": 395.78, |
|
"learning_rate": 1.208448753462604e-05, |
|
"loss": 3.7547, |
|
"step": 571500 |
|
}, |
|
{ |
|
"epoch": 396.0, |
|
"eval_loss": 3.6800179481506348, |
|
"eval_runtime": 73.5113, |
|
"eval_samples_per_second": 6698.614, |
|
"eval_steps_per_second": 2.19, |
|
"step": 571824 |
|
}, |
|
{ |
|
"epoch": 396.12, |
|
"learning_rate": 1.2077562326869806e-05, |
|
"loss": 3.7539, |
|
"step": 572000 |
|
}, |
|
{ |
|
"epoch": 396.47, |
|
"learning_rate": 1.2070637119113575e-05, |
|
"loss": 3.7574, |
|
"step": 572500 |
|
}, |
|
{ |
|
"epoch": 396.81, |
|
"learning_rate": 1.206371191135734e-05, |
|
"loss": 3.7572, |
|
"step": 573000 |
|
}, |
|
{ |
|
"epoch": 397.0, |
|
"eval_loss": 3.678455352783203, |
|
"eval_runtime": 74.8356, |
|
"eval_samples_per_second": 6580.081, |
|
"eval_steps_per_second": 2.151, |
|
"step": 573268 |
|
}, |
|
{ |
|
"epoch": 397.16, |
|
"learning_rate": 1.2056786703601109e-05, |
|
"loss": 3.7534, |
|
"step": 573500 |
|
}, |
|
{ |
|
"epoch": 397.51, |
|
"learning_rate": 1.2049861495844877e-05, |
|
"loss": 3.7549, |
|
"step": 574000 |
|
}, |
|
{ |
|
"epoch": 397.85, |
|
"learning_rate": 1.2042936288088644e-05, |
|
"loss": 3.7546, |
|
"step": 574500 |
|
}, |
|
{ |
|
"epoch": 398.0, |
|
"eval_loss": 3.6750450134277344, |
|
"eval_runtime": 73.6423, |
|
"eval_samples_per_second": 6686.702, |
|
"eval_steps_per_second": 2.186, |
|
"step": 574712 |
|
}, |
|
{ |
|
"epoch": 398.2, |
|
"learning_rate": 1.203601108033241e-05, |
|
"loss": 3.7518, |
|
"step": 575000 |
|
}, |
|
{ |
|
"epoch": 398.55, |
|
"learning_rate": 1.2029085872576178e-05, |
|
"loss": 3.7571, |
|
"step": 575500 |
|
}, |
|
{ |
|
"epoch": 398.89, |
|
"learning_rate": 1.2022160664819947e-05, |
|
"loss": 3.7555, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 399.0, |
|
"eval_loss": 3.678826093673706, |
|
"eval_runtime": 76.3543, |
|
"eval_samples_per_second": 6449.196, |
|
"eval_steps_per_second": 2.109, |
|
"step": 576156 |
|
}, |
|
{ |
|
"epoch": 399.24, |
|
"learning_rate": 1.2015235457063712e-05, |
|
"loss": 3.7544, |
|
"step": 576500 |
|
}, |
|
{ |
|
"epoch": 399.58, |
|
"learning_rate": 1.200831024930748e-05, |
|
"loss": 3.7527, |
|
"step": 577000 |
|
}, |
|
{ |
|
"epoch": 399.93, |
|
"learning_rate": 1.2001385041551248e-05, |
|
"loss": 3.7565, |
|
"step": 577500 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"eval_loss": 3.6734468936920166, |
|
"eval_runtime": 74.6685, |
|
"eval_samples_per_second": 6594.802, |
|
"eval_steps_per_second": 2.156, |
|
"step": 577600 |
|
}, |
|
{ |
|
"epoch": 400.28, |
|
"learning_rate": 1.1994459833795016e-05, |
|
"loss": 3.7545, |
|
"step": 578000 |
|
}, |
|
{ |
|
"epoch": 400.62, |
|
"learning_rate": 1.1987534626038781e-05, |
|
"loss": 3.754, |
|
"step": 578500 |
|
}, |
|
{ |
|
"epoch": 400.97, |
|
"learning_rate": 1.198060941828255e-05, |
|
"loss": 3.754, |
|
"step": 579000 |
|
}, |
|
{ |
|
"epoch": 401.0, |
|
"eval_loss": 3.6801302433013916, |
|
"eval_runtime": 75.4548, |
|
"eval_samples_per_second": 6526.079, |
|
"eval_steps_per_second": 2.134, |
|
"step": 579044 |
|
}, |
|
{ |
|
"epoch": 401.32, |
|
"learning_rate": 1.1973684210526317e-05, |
|
"loss": 3.7545, |
|
"step": 579500 |
|
}, |
|
{ |
|
"epoch": 401.66, |
|
"learning_rate": 1.1966759002770084e-05, |
|
"loss": 3.7534, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 402.0, |
|
"eval_loss": 3.6784117221832275, |
|
"eval_runtime": 74.9008, |
|
"eval_samples_per_second": 6574.347, |
|
"eval_steps_per_second": 2.15, |
|
"step": 580488 |
|
}, |
|
{ |
|
"epoch": 402.01, |
|
"learning_rate": 1.195983379501385e-05, |
|
"loss": 3.7546, |
|
"step": 580500 |
|
}, |
|
{ |
|
"epoch": 402.35, |
|
"learning_rate": 1.195290858725762e-05, |
|
"loss": 3.7524, |
|
"step": 581000 |
|
}, |
|
{ |
|
"epoch": 402.7, |
|
"learning_rate": 1.1945983379501386e-05, |
|
"loss": 3.7499, |
|
"step": 581500 |
|
}, |
|
{ |
|
"epoch": 403.0, |
|
"eval_loss": 3.6806490421295166, |
|
"eval_runtime": 77.3538, |
|
"eval_samples_per_second": 6365.865, |
|
"eval_steps_per_second": 2.081, |
|
"step": 581932 |
|
}, |
|
{ |
|
"epoch": 403.05, |
|
"learning_rate": 1.1939058171745153e-05, |
|
"loss": 3.7531, |
|
"step": 582000 |
|
}, |
|
{ |
|
"epoch": 403.39, |
|
"learning_rate": 1.193213296398892e-05, |
|
"loss": 3.7501, |
|
"step": 582500 |
|
}, |
|
{ |
|
"epoch": 403.74, |
|
"learning_rate": 1.1925207756232689e-05, |
|
"loss": 3.7507, |
|
"step": 583000 |
|
}, |
|
{ |
|
"epoch": 404.0, |
|
"eval_loss": 3.6763315200805664, |
|
"eval_runtime": 74.1317, |
|
"eval_samples_per_second": 6642.552, |
|
"eval_steps_per_second": 2.172, |
|
"step": 583376 |
|
}, |
|
{ |
|
"epoch": 404.09, |
|
"learning_rate": 1.1918282548476454e-05, |
|
"loss": 3.7527, |
|
"step": 583500 |
|
}, |
|
{ |
|
"epoch": 404.43, |
|
"learning_rate": 1.1911357340720223e-05, |
|
"loss": 3.7528, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 404.78, |
|
"learning_rate": 1.190443213296399e-05, |
|
"loss": 3.7485, |
|
"step": 584500 |
|
}, |
|
{ |
|
"epoch": 405.0, |
|
"eval_loss": 3.6807456016540527, |
|
"eval_runtime": 76.3544, |
|
"eval_samples_per_second": 6449.191, |
|
"eval_steps_per_second": 2.109, |
|
"step": 584820 |
|
}, |
|
{ |
|
"epoch": 405.12, |
|
"learning_rate": 1.1897506925207758e-05, |
|
"loss": 3.7503, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 405.47, |
|
"learning_rate": 1.1890581717451523e-05, |
|
"loss": 3.7524, |
|
"step": 585500 |
|
}, |
|
{ |
|
"epoch": 405.82, |
|
"learning_rate": 1.1883656509695292e-05, |
|
"loss": 3.7516, |
|
"step": 586000 |
|
}, |
|
{ |
|
"epoch": 406.0, |
|
"eval_loss": 3.6764273643493652, |
|
"eval_runtime": 75.8758, |
|
"eval_samples_per_second": 6489.866, |
|
"eval_steps_per_second": 2.122, |
|
"step": 586264 |
|
}, |
|
{ |
|
"epoch": 406.16, |
|
"learning_rate": 1.1876731301939059e-05, |
|
"loss": 3.7525, |
|
"step": 586500 |
|
}, |
|
{ |
|
"epoch": 406.51, |
|
"learning_rate": 1.1869806094182828e-05, |
|
"loss": 3.7519, |
|
"step": 587000 |
|
}, |
|
{ |
|
"epoch": 406.86, |
|
"learning_rate": 1.1862880886426593e-05, |
|
"loss": 3.7544, |
|
"step": 587500 |
|
}, |
|
{ |
|
"epoch": 407.0, |
|
"eval_loss": 3.679368495941162, |
|
"eval_runtime": 73.5888, |
|
"eval_samples_per_second": 6691.56, |
|
"eval_steps_per_second": 2.188, |
|
"step": 587708 |
|
}, |
|
{ |
|
"epoch": 407.2, |
|
"learning_rate": 1.1855955678670361e-05, |
|
"loss": 3.7496, |
|
"step": 588000 |
|
}, |
|
{ |
|
"epoch": 407.55, |
|
"learning_rate": 1.1849030470914128e-05, |
|
"loss": 3.7509, |
|
"step": 588500 |
|
}, |
|
{ |
|
"epoch": 407.89, |
|
"learning_rate": 1.1842105263157895e-05, |
|
"loss": 3.7532, |
|
"step": 589000 |
|
}, |
|
{ |
|
"epoch": 408.0, |
|
"eval_loss": 3.678314208984375, |
|
"eval_runtime": 73.6357, |
|
"eval_samples_per_second": 6687.301, |
|
"eval_steps_per_second": 2.186, |
|
"step": 589152 |
|
}, |
|
{ |
|
"epoch": 408.24, |
|
"learning_rate": 1.1835180055401662e-05, |
|
"loss": 3.7535, |
|
"step": 589500 |
|
}, |
|
{ |
|
"epoch": 408.59, |
|
"learning_rate": 1.182825484764543e-05, |
|
"loss": 3.7506, |
|
"step": 590000 |
|
}, |
|
{ |
|
"epoch": 408.93, |
|
"learning_rate": 1.18213296398892e-05, |
|
"loss": 3.7532, |
|
"step": 590500 |
|
}, |
|
{ |
|
"epoch": 409.0, |
|
"eval_loss": 3.676326036453247, |
|
"eval_runtime": 73.8096, |
|
"eval_samples_per_second": 6671.541, |
|
"eval_steps_per_second": 2.181, |
|
"step": 590596 |
|
}, |
|
{ |
|
"epoch": 409.28, |
|
"learning_rate": 1.1814404432132965e-05, |
|
"loss": 3.7545, |
|
"step": 591000 |
|
}, |
|
{ |
|
"epoch": 409.63, |
|
"learning_rate": 1.1807479224376732e-05, |
|
"loss": 3.7496, |
|
"step": 591500 |
|
}, |
|
{ |
|
"epoch": 409.97, |
|
"learning_rate": 1.18005540166205e-05, |
|
"loss": 3.7494, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 410.0, |
|
"eval_loss": 3.6734423637390137, |
|
"eval_runtime": 73.4761, |
|
"eval_samples_per_second": 6701.822, |
|
"eval_steps_per_second": 2.191, |
|
"step": 592040 |
|
}, |
|
{ |
|
"epoch": 410.32, |
|
"learning_rate": 1.1793628808864265e-05, |
|
"loss": 3.7547, |
|
"step": 592500 |
|
}, |
|
{ |
|
"epoch": 410.66, |
|
"learning_rate": 1.1786703601108034e-05, |
|
"loss": 3.7501, |
|
"step": 593000 |
|
}, |
|
{ |
|
"epoch": 411.0, |
|
"eval_loss": 3.6773502826690674, |
|
"eval_runtime": 75.1449, |
|
"eval_samples_per_second": 6552.994, |
|
"eval_steps_per_second": 2.143, |
|
"step": 593484 |
|
}, |
|
{ |
|
"epoch": 411.01, |
|
"learning_rate": 1.1779778393351803e-05, |
|
"loss": 3.7512, |
|
"step": 593500 |
|
}, |
|
{ |
|
"epoch": 411.36, |
|
"learning_rate": 1.177285318559557e-05, |
|
"loss": 3.7481, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 411.7, |
|
"learning_rate": 1.1765927977839335e-05, |
|
"loss": 3.7523, |
|
"step": 594500 |
|
}, |
|
{ |
|
"epoch": 412.0, |
|
"eval_loss": 3.6761634349823, |
|
"eval_runtime": 74.2595, |
|
"eval_samples_per_second": 6631.127, |
|
"eval_steps_per_second": 2.168, |
|
"step": 594928 |
|
}, |
|
{ |
|
"epoch": 412.05, |
|
"learning_rate": 1.1759002770083103e-05, |
|
"loss": 3.7488, |
|
"step": 595000 |
|
}, |
|
{ |
|
"epoch": 412.4, |
|
"learning_rate": 1.1752077562326872e-05, |
|
"loss": 3.7468, |
|
"step": 595500 |
|
}, |
|
{ |
|
"epoch": 412.74, |
|
"learning_rate": 1.1745152354570639e-05, |
|
"loss": 3.7527, |
|
"step": 596000 |
|
}, |
|
{ |
|
"epoch": 413.0, |
|
"eval_loss": 3.677605390548706, |
|
"eval_runtime": 75.7462, |
|
"eval_samples_per_second": 6500.971, |
|
"eval_steps_per_second": 2.126, |
|
"step": 596372 |
|
}, |
|
{ |
|
"epoch": 413.09, |
|
"learning_rate": 1.1738227146814404e-05, |
|
"loss": 3.7511, |
|
"step": 596500 |
|
}, |
|
{ |
|
"epoch": 413.43, |
|
"learning_rate": 1.1731301939058173e-05, |
|
"loss": 3.7485, |
|
"step": 597000 |
|
}, |
|
{ |
|
"epoch": 413.78, |
|
"learning_rate": 1.1724376731301941e-05, |
|
"loss": 3.751, |
|
"step": 597500 |
|
}, |
|
{ |
|
"epoch": 414.0, |
|
"eval_loss": 3.6766717433929443, |
|
"eval_runtime": 73.9242, |
|
"eval_samples_per_second": 6661.202, |
|
"eval_steps_per_second": 2.178, |
|
"step": 597816 |
|
}, |
|
{ |
|
"epoch": 414.13, |
|
"learning_rate": 1.1717451523545707e-05, |
|
"loss": 3.7492, |
|
"step": 598000 |
|
}, |
|
{ |
|
"epoch": 414.47, |
|
"learning_rate": 1.1710526315789475e-05, |
|
"loss": 3.749, |
|
"step": 598500 |
|
}, |
|
{ |
|
"epoch": 414.82, |
|
"learning_rate": 1.1703601108033242e-05, |
|
"loss": 3.7475, |
|
"step": 599000 |
|
}, |
|
{ |
|
"epoch": 415.0, |
|
"eval_loss": 3.672994375228882, |
|
"eval_runtime": 75.3605, |
|
"eval_samples_per_second": 6534.25, |
|
"eval_steps_per_second": 2.136, |
|
"step": 599260 |
|
}, |
|
{ |
|
"epoch": 415.17, |
|
"learning_rate": 1.169667590027701e-05, |
|
"loss": 3.7479, |
|
"step": 599500 |
|
}, |
|
{ |
|
"epoch": 415.51, |
|
"learning_rate": 1.1689750692520776e-05, |
|
"loss": 3.7481, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 415.86, |
|
"learning_rate": 1.1682825484764545e-05, |
|
"loss": 3.7493, |
|
"step": 600500 |
|
}, |
|
{ |
|
"epoch": 416.0, |
|
"eval_loss": 3.6774232387542725, |
|
"eval_runtime": 74.4096, |
|
"eval_samples_per_second": 6617.752, |
|
"eval_steps_per_second": 2.164, |
|
"step": 600704 |
|
}, |
|
{ |
|
"epoch": 416.2, |
|
"learning_rate": 1.1675900277008311e-05, |
|
"loss": 3.7479, |
|
"step": 601000 |
|
}, |
|
{ |
|
"epoch": 416.55, |
|
"learning_rate": 1.1668975069252078e-05, |
|
"loss": 3.7478, |
|
"step": 601500 |
|
}, |
|
{ |
|
"epoch": 416.9, |
|
"learning_rate": 1.1662049861495845e-05, |
|
"loss": 3.7468, |
|
"step": 602000 |
|
}, |
|
{ |
|
"epoch": 417.0, |
|
"eval_loss": 3.6730477809906006, |
|
"eval_runtime": 77.6666, |
|
"eval_samples_per_second": 6340.227, |
|
"eval_steps_per_second": 2.073, |
|
"step": 602148 |
|
}, |
|
{ |
|
"epoch": 417.24, |
|
"learning_rate": 1.1655124653739614e-05, |
|
"loss": 3.7497, |
|
"step": 602500 |
|
}, |
|
{ |
|
"epoch": 417.59, |
|
"learning_rate": 1.164819944598338e-05, |
|
"loss": 3.7491, |
|
"step": 603000 |
|
}, |
|
{ |
|
"epoch": 417.94, |
|
"learning_rate": 1.1641274238227148e-05, |
|
"loss": 3.7457, |
|
"step": 603500 |
|
}, |
|
{ |
|
"epoch": 418.0, |
|
"eval_loss": 3.6768581867218018, |
|
"eval_runtime": 74.7091, |
|
"eval_samples_per_second": 6591.216, |
|
"eval_steps_per_second": 2.155, |
|
"step": 603592 |
|
}, |
|
{ |
|
"epoch": 418.28, |
|
"learning_rate": 1.1634349030470915e-05, |
|
"loss": 3.746, |
|
"step": 604000 |
|
}, |
|
{ |
|
"epoch": 418.63, |
|
"learning_rate": 1.1627423822714683e-05, |
|
"loss": 3.7471, |
|
"step": 604500 |
|
}, |
|
{ |
|
"epoch": 418.98, |
|
"learning_rate": 1.1620498614958448e-05, |
|
"loss": 3.749, |
|
"step": 605000 |
|
}, |
|
{ |
|
"epoch": 419.0, |
|
"eval_loss": 3.674077272415161, |
|
"eval_runtime": 74.7647, |
|
"eval_samples_per_second": 6586.318, |
|
"eval_steps_per_second": 2.153, |
|
"step": 605036 |
|
}, |
|
{ |
|
"epoch": 419.32, |
|
"learning_rate": 1.1613573407202217e-05, |
|
"loss": 3.7479, |
|
"step": 605500 |
|
}, |
|
{ |
|
"epoch": 419.67, |
|
"learning_rate": 1.1606648199445984e-05, |
|
"loss": 3.7512, |
|
"step": 606000 |
|
}, |
|
{ |
|
"epoch": 420.0, |
|
"eval_loss": 3.6696927547454834, |
|
"eval_runtime": 74.4025, |
|
"eval_samples_per_second": 6618.38, |
|
"eval_steps_per_second": 2.164, |
|
"step": 606480 |
|
}, |
|
{ |
|
"epoch": 420.01, |
|
"learning_rate": 1.1599722991689753e-05, |
|
"loss": 3.7462, |
|
"step": 606500 |
|
}, |
|
{ |
|
"epoch": 420.36, |
|
"learning_rate": 1.1592797783933518e-05, |
|
"loss": 3.7487, |
|
"step": 607000 |
|
}, |
|
{ |
|
"epoch": 420.71, |
|
"learning_rate": 1.1585872576177286e-05, |
|
"loss": 3.7491, |
|
"step": 607500 |
|
}, |
|
{ |
|
"epoch": 421.0, |
|
"eval_loss": 3.670689105987549, |
|
"eval_runtime": 73.7952, |
|
"eval_samples_per_second": 6672.842, |
|
"eval_steps_per_second": 2.182, |
|
"step": 607924 |
|
}, |
|
{ |
|
"epoch": 421.05, |
|
"learning_rate": 1.1578947368421053e-05, |
|
"loss": 3.7472, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 421.4, |
|
"learning_rate": 1.1572022160664822e-05, |
|
"loss": 3.7462, |
|
"step": 608500 |
|
}, |
|
{ |
|
"epoch": 421.75, |
|
"learning_rate": 1.1565096952908587e-05, |
|
"loss": 3.7464, |
|
"step": 609000 |
|
}, |
|
{ |
|
"epoch": 422.0, |
|
"eval_loss": 3.6667892932891846, |
|
"eval_runtime": 75.7682, |
|
"eval_samples_per_second": 6499.083, |
|
"eval_steps_per_second": 2.125, |
|
"step": 609368 |
|
}, |
|
{ |
|
"epoch": 422.09, |
|
"learning_rate": 1.1558171745152356e-05, |
|
"loss": 3.7461, |
|
"step": 609500 |
|
}, |
|
{ |
|
"epoch": 422.44, |
|
"learning_rate": 1.1551246537396123e-05, |
|
"loss": 3.7467, |
|
"step": 610000 |
|
}, |
|
{ |
|
"epoch": 422.78, |
|
"learning_rate": 1.154432132963989e-05, |
|
"loss": 3.7476, |
|
"step": 610500 |
|
}, |
|
{ |
|
"epoch": 423.0, |
|
"eval_loss": 3.6749465465545654, |
|
"eval_runtime": 74.4127, |
|
"eval_samples_per_second": 6617.471, |
|
"eval_steps_per_second": 2.164, |
|
"step": 610812 |
|
}, |
|
{ |
|
"epoch": 423.13, |
|
"learning_rate": 1.1537396121883657e-05, |
|
"loss": 3.7449, |
|
"step": 611000 |
|
}, |
|
{ |
|
"epoch": 423.48, |
|
"learning_rate": 1.1530470914127425e-05, |
|
"loss": 3.7459, |
|
"step": 611500 |
|
}, |
|
{ |
|
"epoch": 423.82, |
|
"learning_rate": 1.1523545706371194e-05, |
|
"loss": 3.7478, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 424.0, |
|
"eval_loss": 3.673068046569824, |
|
"eval_runtime": 76.5927, |
|
"eval_samples_per_second": 6429.124, |
|
"eval_steps_per_second": 2.102, |
|
"step": 612256 |
|
}, |
|
{ |
|
"epoch": 424.17, |
|
"learning_rate": 1.1516620498614959e-05, |
|
"loss": 3.7464, |
|
"step": 612500 |
|
}, |
|
{ |
|
"epoch": 424.52, |
|
"learning_rate": 1.1509695290858726e-05, |
|
"loss": 3.7449, |
|
"step": 613000 |
|
}, |
|
{ |
|
"epoch": 424.86, |
|
"learning_rate": 1.1502770083102495e-05, |
|
"loss": 3.7474, |
|
"step": 613500 |
|
}, |
|
{ |
|
"epoch": 425.0, |
|
"eval_loss": 3.6703741550445557, |
|
"eval_runtime": 75.4887, |
|
"eval_samples_per_second": 6523.149, |
|
"eval_steps_per_second": 2.133, |
|
"step": 613700 |
|
}, |
|
{ |
|
"epoch": 425.21, |
|
"learning_rate": 1.149584487534626e-05, |
|
"loss": 3.7467, |
|
"step": 614000 |
|
}, |
|
{ |
|
"epoch": 425.55, |
|
"learning_rate": 1.1488919667590028e-05, |
|
"loss": 3.7443, |
|
"step": 614500 |
|
}, |
|
{ |
|
"epoch": 425.9, |
|
"learning_rate": 1.1481994459833797e-05, |
|
"loss": 3.7466, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 426.0, |
|
"eval_loss": 3.6724741458892822, |
|
"eval_runtime": 77.1998, |
|
"eval_samples_per_second": 6378.57, |
|
"eval_steps_per_second": 2.085, |
|
"step": 615144 |
|
}, |
|
{ |
|
"epoch": 426.25, |
|
"learning_rate": 1.1475069252077564e-05, |
|
"loss": 3.7465, |
|
"step": 615500 |
|
}, |
|
{ |
|
"epoch": 426.59, |
|
"learning_rate": 1.146814404432133e-05, |
|
"loss": 3.7436, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 426.94, |
|
"learning_rate": 1.1461218836565098e-05, |
|
"loss": 3.7468, |
|
"step": 616500 |
|
}, |
|
{ |
|
"epoch": 427.0, |
|
"eval_loss": 3.67818021774292, |
|
"eval_runtime": 76.3088, |
|
"eval_samples_per_second": 6453.046, |
|
"eval_steps_per_second": 2.11, |
|
"step": 616588 |
|
}, |
|
{ |
|
"epoch": 427.29, |
|
"learning_rate": 1.1454293628808866e-05, |
|
"loss": 3.7458, |
|
"step": 617000 |
|
}, |
|
{ |
|
"epoch": 427.63, |
|
"learning_rate": 1.1447368421052632e-05, |
|
"loss": 3.7447, |
|
"step": 617500 |
|
}, |
|
{ |
|
"epoch": 427.98, |
|
"learning_rate": 1.1440443213296399e-05, |
|
"loss": 3.7472, |
|
"step": 618000 |
|
}, |
|
{ |
|
"epoch": 428.0, |
|
"eval_loss": 3.6692676544189453, |
|
"eval_runtime": 77.2341, |
|
"eval_samples_per_second": 6375.732, |
|
"eval_steps_per_second": 2.085, |
|
"step": 618032 |
|
}, |
|
{ |
|
"epoch": 428.32, |
|
"learning_rate": 1.1433518005540167e-05, |
|
"loss": 3.7432, |
|
"step": 618500 |
|
}, |
|
{ |
|
"epoch": 428.67, |
|
"learning_rate": 1.1426592797783936e-05, |
|
"loss": 3.7454, |
|
"step": 619000 |
|
}, |
|
{ |
|
"epoch": 429.0, |
|
"eval_loss": 3.6682395935058594, |
|
"eval_runtime": 81.127, |
|
"eval_samples_per_second": 6069.794, |
|
"eval_steps_per_second": 1.985, |
|
"step": 619476 |
|
}, |
|
{ |
|
"epoch": 429.02, |
|
"learning_rate": 1.1419667590027701e-05, |
|
"loss": 3.7452, |
|
"step": 619500 |
|
}, |
|
{ |
|
"epoch": 429.36, |
|
"learning_rate": 1.141274238227147e-05, |
|
"loss": 3.7429, |
|
"step": 620000 |
|
}, |
|
{ |
|
"epoch": 429.71, |
|
"learning_rate": 1.1405817174515237e-05, |
|
"loss": 3.7424, |
|
"step": 620500 |
|
}, |
|
{ |
|
"epoch": 430.0, |
|
"eval_loss": 3.669628858566284, |
|
"eval_runtime": 73.5755, |
|
"eval_samples_per_second": 6692.774, |
|
"eval_steps_per_second": 2.188, |
|
"step": 620920 |
|
}, |
|
{ |
|
"epoch": 430.06, |
|
"learning_rate": 1.1398891966759005e-05, |
|
"loss": 3.7433, |
|
"step": 621000 |
|
}, |
|
{ |
|
"epoch": 430.4, |
|
"learning_rate": 1.139196675900277e-05, |
|
"loss": 3.7426, |
|
"step": 621500 |
|
}, |
|
{ |
|
"epoch": 430.75, |
|
"learning_rate": 1.1385041551246539e-05, |
|
"loss": 3.7435, |
|
"step": 622000 |
|
}, |
|
{ |
|
"epoch": 431.0, |
|
"eval_loss": 3.6648757457733154, |
|
"eval_runtime": 77.6023, |
|
"eval_samples_per_second": 6345.481, |
|
"eval_steps_per_second": 2.075, |
|
"step": 622364 |
|
}, |
|
{ |
|
"epoch": 431.09, |
|
"learning_rate": 1.1378116343490306e-05, |
|
"loss": 3.7437, |
|
"step": 622500 |
|
}, |
|
{ |
|
"epoch": 431.44, |
|
"learning_rate": 1.1371191135734073e-05, |
|
"loss": 3.7417, |
|
"step": 623000 |
|
}, |
|
{ |
|
"epoch": 431.79, |
|
"learning_rate": 1.136426592797784e-05, |
|
"loss": 3.7441, |
|
"step": 623500 |
|
}, |
|
{ |
|
"epoch": 432.0, |
|
"eval_loss": 3.6664254665374756, |
|
"eval_runtime": 74.4125, |
|
"eval_samples_per_second": 6617.491, |
|
"eval_steps_per_second": 2.164, |
|
"step": 623808 |
|
}, |
|
{ |
|
"epoch": 432.13, |
|
"learning_rate": 1.1357340720221608e-05, |
|
"loss": 3.7417, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 432.48, |
|
"learning_rate": 1.1350415512465375e-05, |
|
"loss": 3.7433, |
|
"step": 624500 |
|
}, |
|
{ |
|
"epoch": 432.83, |
|
"learning_rate": 1.1343490304709142e-05, |
|
"loss": 3.7415, |
|
"step": 625000 |
|
}, |
|
{ |
|
"epoch": 433.0, |
|
"eval_loss": 3.6667163372039795, |
|
"eval_runtime": 73.9564, |
|
"eval_samples_per_second": 6658.305, |
|
"eval_steps_per_second": 2.177, |
|
"step": 625252 |
|
}, |
|
{ |
|
"epoch": 433.17, |
|
"learning_rate": 1.1336565096952909e-05, |
|
"loss": 3.7433, |
|
"step": 625500 |
|
}, |
|
{ |
|
"epoch": 433.52, |
|
"learning_rate": 1.1329639889196678e-05, |
|
"loss": 3.7397, |
|
"step": 626000 |
|
}, |
|
{ |
|
"epoch": 433.86, |
|
"learning_rate": 1.1322714681440443e-05, |
|
"loss": 3.7418, |
|
"step": 626500 |
|
}, |
|
{ |
|
"epoch": 434.0, |
|
"eval_loss": 3.6659815311431885, |
|
"eval_runtime": 79.156, |
|
"eval_samples_per_second": 6220.929, |
|
"eval_steps_per_second": 2.034, |
|
"step": 626696 |
|
}, |
|
{ |
|
"epoch": 434.21, |
|
"learning_rate": 1.1315789473684212e-05, |
|
"loss": 3.7422, |
|
"step": 627000 |
|
}, |
|
{ |
|
"epoch": 434.56, |
|
"learning_rate": 1.1308864265927979e-05, |
|
"loss": 3.7429, |
|
"step": 627500 |
|
}, |
|
{ |
|
"epoch": 434.9, |
|
"learning_rate": 1.1301939058171747e-05, |
|
"loss": 3.7442, |
|
"step": 628000 |
|
}, |
|
{ |
|
"epoch": 435.0, |
|
"eval_loss": 3.669684886932373, |
|
"eval_runtime": 75.7717, |
|
"eval_samples_per_second": 6498.787, |
|
"eval_steps_per_second": 2.125, |
|
"step": 628140 |
|
}, |
|
{ |
|
"epoch": 435.25, |
|
"learning_rate": 1.1295013850415512e-05, |
|
"loss": 3.7411, |
|
"step": 628500 |
|
}, |
|
{ |
|
"epoch": 435.6, |
|
"learning_rate": 1.1288088642659281e-05, |
|
"loss": 3.7416, |
|
"step": 629000 |
|
}, |
|
{ |
|
"epoch": 435.94, |
|
"learning_rate": 1.1281163434903048e-05, |
|
"loss": 3.7423, |
|
"step": 629500 |
|
}, |
|
{ |
|
"epoch": 436.0, |
|
"eval_loss": 3.667738914489746, |
|
"eval_runtime": 75.7672, |
|
"eval_samples_per_second": 6499.168, |
|
"eval_steps_per_second": 2.125, |
|
"step": 629584 |
|
}, |
|
{ |
|
"epoch": 436.29, |
|
"learning_rate": 1.1274238227146815e-05, |
|
"loss": 3.7399, |
|
"step": 630000 |
|
} |
|
], |
|
"max_steps": 1444000, |
|
"num_train_epochs": 1000, |
|
"total_flos": 2.9616940967712937e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|