|
{ |
|
"best_metric": 1.9631069898605347, |
|
"best_model_checkpoint": "/gpfs/space/home/sirts/EstRoBERTa/XLM-RoBERTa/output/checkpoint-20000", |
|
"epoch": 0.7784143477332575, |
|
"global_step": 700000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1e-09, |
|
"loss": 2.4605, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 9.93e-07, |
|
"loss": 3.0353, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 2.1905031204223633, |
|
"eval_runtime": 1291.547, |
|
"eval_samples_per_second": 60.963, |
|
"eval_steps_per_second": 1.905, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9930000000000002e-06, |
|
"loss": 2.693, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 2.1410903930664062, |
|
"eval_runtime": 1316.0761, |
|
"eval_samples_per_second": 59.827, |
|
"eval_steps_per_second": 1.87, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9920000000000003e-06, |
|
"loss": 2.609, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 2.112772226333618, |
|
"eval_runtime": 1272.1468, |
|
"eval_samples_per_second": 61.893, |
|
"eval_steps_per_second": 1.935, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.992e-06, |
|
"loss": 2.5452, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 2.0833613872528076, |
|
"eval_runtime": 1295.6928, |
|
"eval_samples_per_second": 60.768, |
|
"eval_steps_per_second": 1.899, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.992e-06, |
|
"loss": 2.5094, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.071711778640747, |
|
"eval_runtime": 1246.6676, |
|
"eval_samples_per_second": 63.158, |
|
"eval_steps_per_second": 1.974, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9918305882352945e-06, |
|
"loss": 2.4723, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.058234691619873, |
|
"eval_runtime": 1244.8428, |
|
"eval_samples_per_second": 63.251, |
|
"eval_steps_per_second": 1.977, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.983595294117648e-06, |
|
"loss": 2.4497, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.049036741256714, |
|
"eval_runtime": 1246.1771, |
|
"eval_samples_per_second": 63.183, |
|
"eval_steps_per_second": 1.975, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.97536e-06, |
|
"loss": 2.4303, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.0401854515075684, |
|
"eval_runtime": 1272.8059, |
|
"eval_samples_per_second": 61.861, |
|
"eval_steps_per_second": 1.934, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.967124705882354e-06, |
|
"loss": 2.4146, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.030539035797119, |
|
"eval_runtime": 1305.6435, |
|
"eval_samples_per_second": 60.305, |
|
"eval_steps_per_second": 1.885, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.958889411764706e-06, |
|
"loss": 2.3796, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.0135810375213623, |
|
"eval_runtime": 1294.4476, |
|
"eval_samples_per_second": 60.827, |
|
"eval_steps_per_second": 1.901, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.950654117647059e-06, |
|
"loss": 2.3635, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.0098395347595215, |
|
"eval_runtime": 1304.5932, |
|
"eval_samples_per_second": 60.354, |
|
"eval_steps_per_second": 1.886, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9424270588235295e-06, |
|
"loss": 2.3659, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 2.0018415451049805, |
|
"eval_runtime": 1309.723, |
|
"eval_samples_per_second": 60.117, |
|
"eval_steps_per_second": 1.879, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.934191764705883e-06, |
|
"loss": 2.3621, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_loss": 1.9982237815856934, |
|
"eval_runtime": 1297.8423, |
|
"eval_samples_per_second": 60.668, |
|
"eval_steps_per_second": 1.896, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.925956470588235e-06, |
|
"loss": 2.3457, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 1.9903459548950195, |
|
"eval_runtime": 1245.2796, |
|
"eval_samples_per_second": 63.228, |
|
"eval_steps_per_second": 1.976, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.917721176470589e-06, |
|
"loss": 2.32, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 1.9873878955841064, |
|
"eval_runtime": 1279.3162, |
|
"eval_samples_per_second": 61.546, |
|
"eval_steps_per_second": 1.924, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.909502352941177e-06, |
|
"loss": 2.3196, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 1.9738166332244873, |
|
"eval_runtime": 1324.3821, |
|
"eval_samples_per_second": 59.452, |
|
"eval_steps_per_second": 1.858, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.901275294117647e-06, |
|
"loss": 2.307, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 1.9697579145431519, |
|
"eval_runtime": 1331.6669, |
|
"eval_samples_per_second": 59.127, |
|
"eval_steps_per_second": 1.848, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.893040000000001e-06, |
|
"loss": 2.3063, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 1.9780616760253906, |
|
"eval_runtime": 1379.3292, |
|
"eval_samples_per_second": 57.084, |
|
"eval_steps_per_second": 1.784, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.884804705882353e-06, |
|
"loss": 2.2927, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 1.9647581577301025, |
|
"eval_runtime": 1273.2021, |
|
"eval_samples_per_second": 61.842, |
|
"eval_steps_per_second": 1.933, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8765694117647066e-06, |
|
"loss": 2.2862, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 1.9631069898605347, |
|
"eval_runtime": 1305.0876, |
|
"eval_samples_per_second": 60.331, |
|
"eval_steps_per_second": 1.886, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.868334117647059e-06, |
|
"loss": 2.2755, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 1.9493948221206665, |
|
"eval_runtime": 1273.9786, |
|
"eval_samples_per_second": 61.804, |
|
"eval_steps_per_second": 1.932, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.86010705882353e-06, |
|
"loss": 2.271, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 1.951810598373413, |
|
"eval_runtime": 1303.7209, |
|
"eval_samples_per_second": 60.394, |
|
"eval_steps_per_second": 1.888, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.85188e-06, |
|
"loss": 2.2652, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.9497112035751343, |
|
"eval_runtime": 1301.1007, |
|
"eval_samples_per_second": 60.516, |
|
"eval_steps_per_second": 1.891, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.843644705882353e-06, |
|
"loss": 2.2598, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.940477967262268, |
|
"eval_runtime": 1324.2392, |
|
"eval_samples_per_second": 59.458, |
|
"eval_steps_per_second": 1.858, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.835409411764706e-06, |
|
"loss": 2.2463, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.9413543939590454, |
|
"eval_runtime": 1243.8574, |
|
"eval_samples_per_second": 63.301, |
|
"eval_steps_per_second": 1.979, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.827174117647059e-06, |
|
"loss": 2.2484, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.9489567279815674, |
|
"eval_runtime": 1267.5954, |
|
"eval_samples_per_second": 62.115, |
|
"eval_steps_per_second": 1.941, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.8189470588235294e-06, |
|
"loss": 2.2345, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.9309957027435303, |
|
"eval_runtime": 1305.12, |
|
"eval_samples_per_second": 60.329, |
|
"eval_steps_per_second": 1.886, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.81072e-06, |
|
"loss": 2.2502, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.935508370399475, |
|
"eval_runtime": 1269.8199, |
|
"eval_samples_per_second": 62.006, |
|
"eval_steps_per_second": 1.938, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.8024847058823534e-06, |
|
"loss": 2.2279, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.9275970458984375, |
|
"eval_runtime": 1298.8069, |
|
"eval_samples_per_second": 60.623, |
|
"eval_steps_per_second": 1.895, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.794249411764706e-06, |
|
"loss": 2.2275, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.9117810726165771, |
|
"eval_runtime": 1334.2691, |
|
"eval_samples_per_second": 59.011, |
|
"eval_steps_per_second": 1.844, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.786014117647059e-06, |
|
"loss": 2.2233, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 1.9150503873825073, |
|
"eval_runtime": 1324.9615, |
|
"eval_samples_per_second": 59.426, |
|
"eval_steps_per_second": 1.857, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.777778823529412e-06, |
|
"loss": 2.2199, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.9108450412750244, |
|
"eval_runtime": 1324.574, |
|
"eval_samples_per_second": 59.443, |
|
"eval_steps_per_second": 1.858, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.769543529411765e-06, |
|
"loss": 2.2113, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.9146023988723755, |
|
"eval_runtime": 1239.9621, |
|
"eval_samples_per_second": 63.5, |
|
"eval_steps_per_second": 1.985, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.761308235294118e-06, |
|
"loss": 2.2216, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.9067528247833252, |
|
"eval_runtime": 1283.5053, |
|
"eval_samples_per_second": 61.345, |
|
"eval_steps_per_second": 1.917, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.753072941176471e-06, |
|
"loss": 2.2054, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.92031729221344, |
|
"eval_runtime": 1316.0539, |
|
"eval_samples_per_second": 59.828, |
|
"eval_steps_per_second": 1.87, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.744845882352942e-06, |
|
"loss": 2.2046, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.9086993932724, |
|
"eval_runtime": 1326.5645, |
|
"eval_samples_per_second": 59.354, |
|
"eval_steps_per_second": 1.855, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.736610588235294e-06, |
|
"loss": 2.2004, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.8981074094772339, |
|
"eval_runtime": 1240.1179, |
|
"eval_samples_per_second": 63.492, |
|
"eval_steps_per_second": 1.984, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.728383529411765e-06, |
|
"loss": 2.1925, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.8917450904846191, |
|
"eval_runtime": 1289.2602, |
|
"eval_samples_per_second": 61.071, |
|
"eval_steps_per_second": 1.909, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.720148235294118e-06, |
|
"loss": 2.1815, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.8893914222717285, |
|
"eval_runtime": 1347.7408, |
|
"eval_samples_per_second": 58.421, |
|
"eval_steps_per_second": 1.826, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.711912941176471e-06, |
|
"loss": 2.1826, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 1.8931858539581299, |
|
"eval_runtime": 1295.4191, |
|
"eval_samples_per_second": 60.781, |
|
"eval_steps_per_second": 1.9, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.7036858823529415e-06, |
|
"loss": 2.1763, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.8918838500976562, |
|
"eval_runtime": 1343.6015, |
|
"eval_samples_per_second": 58.601, |
|
"eval_steps_per_second": 1.832, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.695450588235294e-06, |
|
"loss": 2.1732, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.8779336214065552, |
|
"eval_runtime": 1265.8686, |
|
"eval_samples_per_second": 62.2, |
|
"eval_steps_per_second": 1.944, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.687215294117647e-06, |
|
"loss": 2.175, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.8825352191925049, |
|
"eval_runtime": 1349.6143, |
|
"eval_samples_per_second": 58.34, |
|
"eval_steps_per_second": 1.823, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.67898e-06, |
|
"loss": 2.1715, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.8862870931625366, |
|
"eval_runtime": 1289.9064, |
|
"eval_samples_per_second": 61.041, |
|
"eval_steps_per_second": 1.908, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.670761176470589e-06, |
|
"loss": 2.1676, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.877558946609497, |
|
"eval_runtime": 1339.6904, |
|
"eval_samples_per_second": 58.773, |
|
"eval_steps_per_second": 1.837, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.662525882352941e-06, |
|
"loss": 2.162, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.8775289058685303, |
|
"eval_runtime": 1237.9112, |
|
"eval_samples_per_second": 63.605, |
|
"eval_steps_per_second": 1.988, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.6542905882352945e-06, |
|
"loss": 2.1655, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.882572889328003, |
|
"eval_runtime": 1257.7653, |
|
"eval_samples_per_second": 62.601, |
|
"eval_steps_per_second": 1.957, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.646063529411765e-06, |
|
"loss": 2.1515, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.8675343990325928, |
|
"eval_runtime": 1295.0697, |
|
"eval_samples_per_second": 60.797, |
|
"eval_steps_per_second": 1.9, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.637828235294118e-06, |
|
"loss": 2.1521, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.8788520097732544, |
|
"eval_runtime": 1368.5815, |
|
"eval_samples_per_second": 57.532, |
|
"eval_steps_per_second": 1.798, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.629592941176471e-06, |
|
"loss": 2.1553, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.878118634223938, |
|
"eval_runtime": 1257.5481, |
|
"eval_samples_per_second": 62.612, |
|
"eval_steps_per_second": 1.957, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.621357647058824e-06, |
|
"loss": 2.1468, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.8591060638427734, |
|
"eval_runtime": 1301.9687, |
|
"eval_samples_per_second": 60.475, |
|
"eval_steps_per_second": 1.89, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.613138823529412e-06, |
|
"loss": 2.1498, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.8617653846740723, |
|
"eval_runtime": 1324.559, |
|
"eval_samples_per_second": 59.444, |
|
"eval_steps_per_second": 1.858, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.604903529411765e-06, |
|
"loss": 2.1424, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.8556735515594482, |
|
"eval_runtime": 1332.3018, |
|
"eval_samples_per_second": 59.098, |
|
"eval_steps_per_second": 1.847, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.596684705882353e-06, |
|
"loss": 2.1317, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.8501012325286865, |
|
"eval_runtime": 1335.1091, |
|
"eval_samples_per_second": 58.974, |
|
"eval_steps_per_second": 1.843, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.588449411764706e-06, |
|
"loss": 2.1481, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.8472974300384521, |
|
"eval_runtime": 1253.9865, |
|
"eval_samples_per_second": 62.789, |
|
"eval_steps_per_second": 1.963, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.580214117647059e-06, |
|
"loss": 2.138, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.8578077554702759, |
|
"eval_runtime": 1310.9382, |
|
"eval_samples_per_second": 60.062, |
|
"eval_steps_per_second": 1.877, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.571978823529412e-06, |
|
"loss": 2.1348, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.8515490293502808, |
|
"eval_runtime": 1273.8828, |
|
"eval_samples_per_second": 61.809, |
|
"eval_steps_per_second": 1.932, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.563743529411765e-06, |
|
"loss": 2.1254, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 1.851358413696289, |
|
"eval_runtime": 1272.3623, |
|
"eval_samples_per_second": 61.883, |
|
"eval_steps_per_second": 1.934, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.555508235294118e-06, |
|
"loss": 2.1193, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.8436750173568726, |
|
"eval_runtime": 1350.7689, |
|
"eval_samples_per_second": 58.291, |
|
"eval_steps_per_second": 1.822, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.547281176470589e-06, |
|
"loss": 2.1238, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.8456168174743652, |
|
"eval_runtime": 1276.1478, |
|
"eval_samples_per_second": 61.699, |
|
"eval_steps_per_second": 1.928, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.5390623529411765e-06, |
|
"loss": 2.1201, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.8359283208847046, |
|
"eval_runtime": 1296.4213, |
|
"eval_samples_per_second": 60.734, |
|
"eval_steps_per_second": 1.898, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.53082705882353e-06, |
|
"loss": 2.1216, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.8420335054397583, |
|
"eval_runtime": 1366.2934, |
|
"eval_samples_per_second": 57.628, |
|
"eval_steps_per_second": 1.801, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.522591764705882e-06, |
|
"loss": 2.1078, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.8367574214935303, |
|
"eval_runtime": 1274.6745, |
|
"eval_samples_per_second": 61.77, |
|
"eval_steps_per_second": 1.931, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.514356470588236e-06, |
|
"loss": 2.1124, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.831612467765808, |
|
"eval_runtime": 1350.1025, |
|
"eval_samples_per_second": 58.319, |
|
"eval_steps_per_second": 1.823, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.506121176470588e-06, |
|
"loss": 2.0956, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.8431429862976074, |
|
"eval_runtime": 1286.0027, |
|
"eval_samples_per_second": 61.226, |
|
"eval_steps_per_second": 1.914, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.497894117647059e-06, |
|
"loss": 2.1189, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.8319342136383057, |
|
"eval_runtime": 1368.0474, |
|
"eval_samples_per_second": 57.554, |
|
"eval_steps_per_second": 1.799, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.489658823529412e-06, |
|
"loss": 2.099, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 1.8349796533584595, |
|
"eval_runtime": 1270.3602, |
|
"eval_samples_per_second": 61.98, |
|
"eval_steps_per_second": 1.937, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.481423529411765e-06, |
|
"loss": 2.0924, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.835334062576294, |
|
"eval_runtime": 1307.6688, |
|
"eval_samples_per_second": 60.212, |
|
"eval_steps_per_second": 1.882, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.473188235294118e-06, |
|
"loss": 2.1088, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.8290066719055176, |
|
"eval_runtime": 1334.2905, |
|
"eval_samples_per_second": 59.01, |
|
"eval_steps_per_second": 1.844, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.464952941176472e-06, |
|
"loss": 2.1005, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.827639102935791, |
|
"eval_runtime": 1340.3121, |
|
"eval_samples_per_second": 58.745, |
|
"eval_steps_per_second": 1.836, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.456725882352942e-06, |
|
"loss": 2.0974, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.82291841506958, |
|
"eval_runtime": 1333.5213, |
|
"eval_samples_per_second": 59.044, |
|
"eval_steps_per_second": 1.845, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.448490588235294e-06, |
|
"loss": 2.0993, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.8226332664489746, |
|
"eval_runtime": 1277.2243, |
|
"eval_samples_per_second": 61.647, |
|
"eval_steps_per_second": 1.927, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.440255294117647e-06, |
|
"loss": 2.0923, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.826454520225525, |
|
"eval_runtime": 1282.1547, |
|
"eval_samples_per_second": 61.41, |
|
"eval_steps_per_second": 1.919, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.43202e-06, |
|
"loss": 2.0866, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.81631338596344, |
|
"eval_runtime": 1302.8545, |
|
"eval_samples_per_second": 60.434, |
|
"eval_steps_per_second": 1.889, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.4238011764705886e-06, |
|
"loss": 2.0837, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.8213496208190918, |
|
"eval_runtime": 1316.2489, |
|
"eval_samples_per_second": 59.819, |
|
"eval_steps_per_second": 1.87, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.415565882352941e-06, |
|
"loss": 2.0865, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.8240435123443604, |
|
"eval_runtime": 1333.9994, |
|
"eval_samples_per_second": 59.023, |
|
"eval_steps_per_second": 1.845, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.407338823529412e-06, |
|
"loss": 2.0923, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.809515357017517, |
|
"eval_runtime": 1362.5256, |
|
"eval_samples_per_second": 57.788, |
|
"eval_steps_per_second": 1.806, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.399103529411765e-06, |
|
"loss": 2.0789, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.8183976411819458, |
|
"eval_runtime": 1274.3396, |
|
"eval_samples_per_second": 61.787, |
|
"eval_steps_per_second": 1.931, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.390868235294118e-06, |
|
"loss": 2.0855, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.810996413230896, |
|
"eval_runtime": 1322.4479, |
|
"eval_samples_per_second": 59.539, |
|
"eval_steps_per_second": 1.861, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.382641176470588e-06, |
|
"loss": 2.0832, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.8153365850448608, |
|
"eval_runtime": 1343.5102, |
|
"eval_samples_per_second": 58.605, |
|
"eval_steps_per_second": 1.832, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.374405882352942e-06, |
|
"loss": 2.0825, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.808205485343933, |
|
"eval_runtime": 1313.381, |
|
"eval_samples_per_second": 59.95, |
|
"eval_steps_per_second": 1.874, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.366170588235295e-06, |
|
"loss": 2.0752, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.8093898296356201, |
|
"eval_runtime": 1349.7533, |
|
"eval_samples_per_second": 58.334, |
|
"eval_steps_per_second": 1.823, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.3579352941176475e-06, |
|
"loss": 2.0784, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.8149974346160889, |
|
"eval_runtime": 1295.5666, |
|
"eval_samples_per_second": 60.774, |
|
"eval_steps_per_second": 1.9, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.349700000000001e-06, |
|
"loss": 2.0732, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.8028277158737183, |
|
"eval_runtime": 1304.8774, |
|
"eval_samples_per_second": 60.341, |
|
"eval_steps_per_second": 1.886, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.341481176470588e-06, |
|
"loss": 2.0766, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.803389549255371, |
|
"eval_runtime": 1325.2979, |
|
"eval_samples_per_second": 59.411, |
|
"eval_steps_per_second": 1.857, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.333245882352941e-06, |
|
"loss": 2.0655, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.8071125745773315, |
|
"eval_runtime": 1308.7884, |
|
"eval_samples_per_second": 60.16, |
|
"eval_steps_per_second": 1.88, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.325010588235294e-06, |
|
"loss": 2.0627, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.8011773824691772, |
|
"eval_runtime": 1298.6527, |
|
"eval_samples_per_second": 60.63, |
|
"eval_steps_per_second": 1.895, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.316775294117647e-06, |
|
"loss": 2.0689, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.80793297290802, |
|
"eval_runtime": 1375.2481, |
|
"eval_samples_per_second": 57.253, |
|
"eval_steps_per_second": 1.789, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.308548235294118e-06, |
|
"loss": 2.0594, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.7938480377197266, |
|
"eval_runtime": 1361.5038, |
|
"eval_samples_per_second": 57.831, |
|
"eval_steps_per_second": 1.808, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.30031294117647e-06, |
|
"loss": 2.069, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.7888888120651245, |
|
"eval_runtime": 1395.2585, |
|
"eval_samples_per_second": 56.432, |
|
"eval_steps_per_second": 1.764, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.292085882352941e-06, |
|
"loss": 2.0629, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.7948452234268188, |
|
"eval_runtime": 1306.9601, |
|
"eval_samples_per_second": 60.244, |
|
"eval_steps_per_second": 1.883, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.283850588235294e-06, |
|
"loss": 2.0659, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.7888908386230469, |
|
"eval_runtime": 1295.0247, |
|
"eval_samples_per_second": 60.8, |
|
"eval_steps_per_second": 1.9, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.275623529411765e-06, |
|
"loss": 2.0499, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.7932168245315552, |
|
"eval_runtime": 1346.4053, |
|
"eval_samples_per_second": 58.479, |
|
"eval_steps_per_second": 1.828, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.267388235294118e-06, |
|
"loss": 2.0577, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 1.78506338596344, |
|
"eval_runtime": 1367.5039, |
|
"eval_samples_per_second": 57.577, |
|
"eval_steps_per_second": 1.8, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.259152941176471e-06, |
|
"loss": 2.0567, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.7812087535858154, |
|
"eval_runtime": 1317.2032, |
|
"eval_samples_per_second": 59.776, |
|
"eval_steps_per_second": 1.868, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.250917647058824e-06, |
|
"loss": 2.066, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.7878668308258057, |
|
"eval_runtime": 1334.3015, |
|
"eval_samples_per_second": 59.01, |
|
"eval_steps_per_second": 1.844, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.242682352941177e-06, |
|
"loss": 2.0503, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.7862142324447632, |
|
"eval_runtime": 1360.5455, |
|
"eval_samples_per_second": 57.872, |
|
"eval_steps_per_second": 1.809, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.2344552941176475e-06, |
|
"loss": 2.0524, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.7882963418960571, |
|
"eval_runtime": 1312.4549, |
|
"eval_samples_per_second": 59.992, |
|
"eval_steps_per_second": 1.875, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.226220000000001e-06, |
|
"loss": 2.0484, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.7941440343856812, |
|
"eval_runtime": 1330.4245, |
|
"eval_samples_per_second": 59.182, |
|
"eval_steps_per_second": 1.85, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.2179929411764715e-06, |
|
"loss": 2.0468, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.7767722606658936, |
|
"eval_runtime": 1352.4496, |
|
"eval_samples_per_second": 58.218, |
|
"eval_steps_per_second": 1.82, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.209765882352942e-06, |
|
"loss": 2.048, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.777193546295166, |
|
"eval_runtime": 1392.9228, |
|
"eval_samples_per_second": 56.526, |
|
"eval_steps_per_second": 1.767, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.201530588235295e-06, |
|
"loss": 2.0421, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.7795777320861816, |
|
"eval_runtime": 1353.8569, |
|
"eval_samples_per_second": 58.158, |
|
"eval_steps_per_second": 1.818, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.193295294117647e-06, |
|
"loss": 2.0394, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.7759983539581299, |
|
"eval_runtime": 1337.236, |
|
"eval_samples_per_second": 58.88, |
|
"eval_steps_per_second": 1.84, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.185076470588235e-06, |
|
"loss": 2.039, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.7742137908935547, |
|
"eval_runtime": 1341.9424, |
|
"eval_samples_per_second": 58.674, |
|
"eval_steps_per_second": 1.834, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.1768411764705885e-06, |
|
"loss": 2.0365, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.7747963666915894, |
|
"eval_runtime": 1380.5697, |
|
"eval_samples_per_second": 57.032, |
|
"eval_steps_per_second": 1.783, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.168605882352942e-06, |
|
"loss": 2.0326, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.7771369218826294, |
|
"eval_runtime": 1279.0938, |
|
"eval_samples_per_second": 61.557, |
|
"eval_steps_per_second": 1.924, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.160378823529412e-06, |
|
"loss": 2.0369, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.7755202054977417, |
|
"eval_runtime": 1277.1355, |
|
"eval_samples_per_second": 61.651, |
|
"eval_steps_per_second": 1.927, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.152143529411765e-06, |
|
"loss": 2.0404, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.782023549079895, |
|
"eval_runtime": 1332.026, |
|
"eval_samples_per_second": 59.111, |
|
"eval_steps_per_second": 1.848, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.143908235294118e-06, |
|
"loss": 2.0295, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.7722965478897095, |
|
"eval_runtime": 1302.5963, |
|
"eval_samples_per_second": 60.446, |
|
"eval_steps_per_second": 1.889, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.135672941176471e-06, |
|
"loss": 2.0329, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.772587776184082, |
|
"eval_runtime": 1317.5586, |
|
"eval_samples_per_second": 59.76, |
|
"eval_steps_per_second": 1.868, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.127437647058824e-06, |
|
"loss": 2.0249, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.764708161354065, |
|
"eval_runtime": 1344.7146, |
|
"eval_samples_per_second": 58.553, |
|
"eval_steps_per_second": 1.83, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.119210588235295e-06, |
|
"loss": 2.0372, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 1.7661118507385254, |
|
"eval_runtime": 1264.9153, |
|
"eval_samples_per_second": 62.247, |
|
"eval_steps_per_second": 1.946, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.1109752941176474e-06, |
|
"loss": 2.0287, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.7656384706497192, |
|
"eval_runtime": 1301.1054, |
|
"eval_samples_per_second": 60.515, |
|
"eval_steps_per_second": 1.891, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.102740000000001e-06, |
|
"loss": 2.0134, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.7664334774017334, |
|
"eval_runtime": 1345.5823, |
|
"eval_samples_per_second": 58.515, |
|
"eval_steps_per_second": 1.829, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.094504705882353e-06, |
|
"loss": 2.0164, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.763651967048645, |
|
"eval_runtime": 1262.0498, |
|
"eval_samples_per_second": 62.388, |
|
"eval_steps_per_second": 1.95, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.086277647058824e-06, |
|
"loss": 2.0248, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.7580262422561646, |
|
"eval_runtime": 1266.7103, |
|
"eval_samples_per_second": 62.159, |
|
"eval_steps_per_second": 1.943, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.0780423529411765e-06, |
|
"loss": 2.0172, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.7685062885284424, |
|
"eval_runtime": 1289.7069, |
|
"eval_samples_per_second": 61.05, |
|
"eval_steps_per_second": 1.908, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.06980705882353e-06, |
|
"loss": 2.019, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.7675044536590576, |
|
"eval_runtime": 1320.0327, |
|
"eval_samples_per_second": 59.648, |
|
"eval_steps_per_second": 1.864, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.061571764705882e-06, |
|
"loss": 2.0237, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.7597997188568115, |
|
"eval_runtime": 1298.1829, |
|
"eval_samples_per_second": 60.652, |
|
"eval_steps_per_second": 1.896, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.053352941176471e-06, |
|
"loss": 2.0096, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.7642709016799927, |
|
"eval_runtime": 1299.3688, |
|
"eval_samples_per_second": 60.596, |
|
"eval_steps_per_second": 1.894, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.045117647058824e-06, |
|
"loss": 2.0137, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 1.7602267265319824, |
|
"eval_runtime": 1297.1878, |
|
"eval_samples_per_second": 60.698, |
|
"eval_steps_per_second": 1.897, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.036882352941177e-06, |
|
"loss": 2.0142, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.7619584798812866, |
|
"eval_runtime": 1294.7741, |
|
"eval_samples_per_second": 60.811, |
|
"eval_steps_per_second": 1.901, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.028655294117648e-06, |
|
"loss": 2.0154, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.757212519645691, |
|
"eval_runtime": 1312.6161, |
|
"eval_samples_per_second": 59.985, |
|
"eval_steps_per_second": 1.875, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.02042e-06, |
|
"loss": 2.0131, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.7483080625534058, |
|
"eval_runtime": 1260.0115, |
|
"eval_samples_per_second": 62.489, |
|
"eval_steps_per_second": 1.953, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.012192941176471e-06, |
|
"loss": 2.0062, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.7526593208312988, |
|
"eval_runtime": 1267.5051, |
|
"eval_samples_per_second": 62.12, |
|
"eval_steps_per_second": 1.942, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.003957647058824e-06, |
|
"loss": 2.0056, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.7664062976837158, |
|
"eval_runtime": 1298.3162, |
|
"eval_samples_per_second": 60.645, |
|
"eval_steps_per_second": 1.896, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.995722352941177e-06, |
|
"loss": 2.0136, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.7430987358093262, |
|
"eval_runtime": 1363.753, |
|
"eval_samples_per_second": 57.736, |
|
"eval_steps_per_second": 1.805, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.98748705882353e-06, |
|
"loss": 2.0094, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.7556384801864624, |
|
"eval_runtime": 1283.1923, |
|
"eval_samples_per_second": 61.36, |
|
"eval_steps_per_second": 1.918, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.979251764705883e-06, |
|
"loss": 2.0018, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.7488545179367065, |
|
"eval_runtime": 1318.7895, |
|
"eval_samples_per_second": 59.704, |
|
"eval_steps_per_second": 1.866, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.971016470588236e-06, |
|
"loss": 2.012, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.7493007183074951, |
|
"eval_runtime": 1351.8311, |
|
"eval_samples_per_second": 58.245, |
|
"eval_steps_per_second": 1.82, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.962789411764707e-06, |
|
"loss": 2.0038, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.7452046871185303, |
|
"eval_runtime": 1297.9368, |
|
"eval_samples_per_second": 60.663, |
|
"eval_steps_per_second": 1.896, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.954554117647059e-06, |
|
"loss": 2.0036, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.743809700012207, |
|
"eval_runtime": 1345.0863, |
|
"eval_samples_per_second": 58.537, |
|
"eval_steps_per_second": 1.83, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.94632705882353e-06, |
|
"loss": 2.0069, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.7406339645385742, |
|
"eval_runtime": 1336.5153, |
|
"eval_samples_per_second": 58.912, |
|
"eval_steps_per_second": 1.841, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.938091764705882e-06, |
|
"loss": 2.0023, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.7494807243347168, |
|
"eval_runtime": 1324.0204, |
|
"eval_samples_per_second": 59.468, |
|
"eval_steps_per_second": 1.859, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.929864705882353e-06, |
|
"loss": 2.0032, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.7454729080200195, |
|
"eval_runtime": 1272.2315, |
|
"eval_samples_per_second": 61.889, |
|
"eval_steps_per_second": 1.934, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.921645882352942e-06, |
|
"loss": 1.9948, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.7327392101287842, |
|
"eval_runtime": 1260.2656, |
|
"eval_samples_per_second": 62.477, |
|
"eval_steps_per_second": 1.953, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.913410588235294e-06, |
|
"loss": 2.0008, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.741957426071167, |
|
"eval_runtime": 1292.2259, |
|
"eval_samples_per_second": 60.931, |
|
"eval_steps_per_second": 1.904, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.905175294117648e-06, |
|
"loss": 1.9927, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.7473654747009277, |
|
"eval_runtime": 1318.8071, |
|
"eval_samples_per_second": 59.703, |
|
"eval_steps_per_second": 1.866, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 3.89694e-06, |
|
"loss": 1.987, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.7441741228103638, |
|
"eval_runtime": 1308.7801, |
|
"eval_samples_per_second": 60.161, |
|
"eval_steps_per_second": 1.88, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.8887047058823535e-06, |
|
"loss": 1.9987, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.7342219352722168, |
|
"eval_runtime": 1310.0491, |
|
"eval_samples_per_second": 60.102, |
|
"eval_steps_per_second": 1.879, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.880469411764706e-06, |
|
"loss": 1.9866, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.7366760969161987, |
|
"eval_runtime": 1338.3757, |
|
"eval_samples_per_second": 58.83, |
|
"eval_steps_per_second": 1.839, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.8722341176470594e-06, |
|
"loss": 1.9922, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.733156442642212, |
|
"eval_runtime": 1379.7672, |
|
"eval_samples_per_second": 57.065, |
|
"eval_steps_per_second": 1.784, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.86400705882353e-06, |
|
"loss": 1.9954, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.7350982427597046, |
|
"eval_runtime": 1276.6064, |
|
"eval_samples_per_second": 61.677, |
|
"eval_steps_per_second": 1.928, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.8557717647058834e-06, |
|
"loss": 1.9912, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.7301350831985474, |
|
"eval_runtime": 1290.7008, |
|
"eval_samples_per_second": 61.003, |
|
"eval_steps_per_second": 1.907, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.8475529411764705e-06, |
|
"loss": 1.9852, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.7331594228744507, |
|
"eval_runtime": 1303.6593, |
|
"eval_samples_per_second": 60.397, |
|
"eval_steps_per_second": 1.888, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.839317647058824e-06, |
|
"loss": 1.9878, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.7405369281768799, |
|
"eval_runtime": 1337.4986, |
|
"eval_samples_per_second": 58.869, |
|
"eval_steps_per_second": 1.84, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.831082352941176e-06, |
|
"loss": 1.9873, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.7377097606658936, |
|
"eval_runtime": 1338.3307, |
|
"eval_samples_per_second": 58.832, |
|
"eval_steps_per_second": 1.839, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.82284705882353e-06, |
|
"loss": 1.9896, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.733717441558838, |
|
"eval_runtime": 1270.765, |
|
"eval_samples_per_second": 61.96, |
|
"eval_steps_per_second": 1.937, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.8146117647058823e-06, |
|
"loss": 1.9998, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.7400872707366943, |
|
"eval_runtime": 1374.6506, |
|
"eval_samples_per_second": 57.278, |
|
"eval_steps_per_second": 1.79, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.806384705882353e-06, |
|
"loss": 1.9923, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.7128148078918457, |
|
"eval_runtime": 1269.3048, |
|
"eval_samples_per_second": 62.032, |
|
"eval_steps_per_second": 1.939, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.798149411764706e-06, |
|
"loss": 1.9877, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.7390938997268677, |
|
"eval_runtime": 1305.2056, |
|
"eval_samples_per_second": 60.325, |
|
"eval_steps_per_second": 1.886, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.7899223529411765e-06, |
|
"loss": 1.9802, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.7243410348892212, |
|
"eval_runtime": 1310.1732, |
|
"eval_samples_per_second": 60.097, |
|
"eval_steps_per_second": 1.878, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.7816870588235295e-06, |
|
"loss": 1.9828, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.7285162210464478, |
|
"eval_runtime": 1307.3997, |
|
"eval_samples_per_second": 60.224, |
|
"eval_steps_per_second": 1.882, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.7734599999999997e-06, |
|
"loss": 1.981, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.735887885093689, |
|
"eval_runtime": 1309.4529, |
|
"eval_samples_per_second": 60.13, |
|
"eval_steps_per_second": 1.879, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.765232941176471e-06, |
|
"loss": 1.9692, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.7372865676879883, |
|
"eval_runtime": 1335.6217, |
|
"eval_samples_per_second": 58.952, |
|
"eval_steps_per_second": 1.843, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.7569976470588237e-06, |
|
"loss": 1.9799, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.7367862462997437, |
|
"eval_runtime": 1363.2422, |
|
"eval_samples_per_second": 57.757, |
|
"eval_steps_per_second": 1.805, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 3.7487623529411767e-06, |
|
"loss": 1.9753, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 1.7180030345916748, |
|
"eval_runtime": 1369.9985, |
|
"eval_samples_per_second": 57.472, |
|
"eval_steps_per_second": 1.796, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.74052705882353e-06, |
|
"loss": 1.9783, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.728384256362915, |
|
"eval_runtime": 1283.0397, |
|
"eval_samples_per_second": 61.368, |
|
"eval_steps_per_second": 1.918, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.732291764705883e-06, |
|
"loss": 1.9754, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.7158981561660767, |
|
"eval_runtime": 1309.3703, |
|
"eval_samples_per_second": 60.133, |
|
"eval_steps_per_second": 1.88, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.724056470588236e-06, |
|
"loss": 1.9871, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.7252790927886963, |
|
"eval_runtime": 1292.6928, |
|
"eval_samples_per_second": 60.909, |
|
"eval_steps_per_second": 1.904, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.7158294117647066e-06, |
|
"loss": 1.9716, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.7210992574691772, |
|
"eval_runtime": 257.0246, |
|
"eval_samples_per_second": 306.34, |
|
"eval_steps_per_second": 9.575, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.707610588235295e-06, |
|
"loss": 1.9745, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.7264364957809448, |
|
"eval_runtime": 290.1187, |
|
"eval_samples_per_second": 271.396, |
|
"eval_steps_per_second": 8.483, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.699375294117648e-06, |
|
"loss": 1.9709, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.7170484066009521, |
|
"eval_runtime": 278.2177, |
|
"eval_samples_per_second": 283.005, |
|
"eval_steps_per_second": 8.846, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.6911482352941185e-06, |
|
"loss": 1.9783, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.720254898071289, |
|
"eval_runtime": 295.7088, |
|
"eval_samples_per_second": 266.265, |
|
"eval_steps_per_second": 8.322, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.6829129411764706e-06, |
|
"loss": 1.9717, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.723580002784729, |
|
"eval_runtime": 285.5463, |
|
"eval_samples_per_second": 275.742, |
|
"eval_steps_per_second": 8.619, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 3.6746776470588235e-06, |
|
"loss": 1.9621, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 1.7218551635742188, |
|
"eval_runtime": 289.3233, |
|
"eval_samples_per_second": 272.142, |
|
"eval_steps_per_second": 8.506, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.6664423529411765e-06, |
|
"loss": 1.9615, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.7153804302215576, |
|
"eval_runtime": 304.7383, |
|
"eval_samples_per_second": 258.376, |
|
"eval_steps_per_second": 8.076, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.65820705882353e-06, |
|
"loss": 1.9686, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.7157891988754272, |
|
"eval_runtime": 296.2947, |
|
"eval_samples_per_second": 265.739, |
|
"eval_steps_per_second": 8.306, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.64998e-06, |
|
"loss": 1.967, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.7255160808563232, |
|
"eval_runtime": 270.3222, |
|
"eval_samples_per_second": 291.271, |
|
"eval_steps_per_second": 9.104, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.6417447058823534e-06, |
|
"loss": 1.9701, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.718467116355896, |
|
"eval_runtime": 288.4423, |
|
"eval_samples_per_second": 272.973, |
|
"eval_steps_per_second": 8.532, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.6335094117647064e-06, |
|
"loss": 1.9541, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.7223858833312988, |
|
"eval_runtime": 301.9716, |
|
"eval_samples_per_second": 260.743, |
|
"eval_steps_per_second": 8.15, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.6252741176470598e-06, |
|
"loss": 1.9704, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.7094881534576416, |
|
"eval_runtime": 296.3843, |
|
"eval_samples_per_second": 265.658, |
|
"eval_steps_per_second": 8.303, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.617047058823529e-06, |
|
"loss": 1.9644, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.7200572490692139, |
|
"eval_runtime": 280.0116, |
|
"eval_samples_per_second": 281.192, |
|
"eval_steps_per_second": 8.789, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.6088117647058825e-06, |
|
"loss": 1.9701, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.7085354328155518, |
|
"eval_runtime": 270.4782, |
|
"eval_samples_per_second": 291.103, |
|
"eval_steps_per_second": 9.099, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.6005847058823527e-06, |
|
"loss": 1.963, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 1.6980141401290894, |
|
"eval_runtime": 262.3974, |
|
"eval_samples_per_second": 300.068, |
|
"eval_steps_per_second": 9.379, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.5923576470588234e-06, |
|
"loss": 1.9656, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.7150171995162964, |
|
"eval_runtime": 290.7308, |
|
"eval_samples_per_second": 270.824, |
|
"eval_steps_per_second": 8.465, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.5841223529411763e-06, |
|
"loss": 1.9572, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.718950867652893, |
|
"eval_runtime": 302.7947, |
|
"eval_samples_per_second": 260.034, |
|
"eval_steps_per_second": 8.128, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.5758952941176465e-06, |
|
"loss": 1.9579, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.7136762142181396, |
|
"eval_runtime": 293.5678, |
|
"eval_samples_per_second": 268.207, |
|
"eval_steps_per_second": 8.383, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.56766e-06, |
|
"loss": 1.9511, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.7204865217208862, |
|
"eval_runtime": 276.1567, |
|
"eval_samples_per_second": 285.117, |
|
"eval_steps_per_second": 8.912, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.559424705882353e-06, |
|
"loss": 1.9567, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.7064766883850098, |
|
"eval_runtime": 284.9959, |
|
"eval_samples_per_second": 276.274, |
|
"eval_steps_per_second": 8.635, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.5511976470588235e-06, |
|
"loss": 1.9517, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.7071802616119385, |
|
"eval_runtime": 302.7195, |
|
"eval_samples_per_second": 260.099, |
|
"eval_steps_per_second": 8.13, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.5429623529411764e-06, |
|
"loss": 1.9577, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.7056118249893188, |
|
"eval_runtime": 279.2944, |
|
"eval_samples_per_second": 281.914, |
|
"eval_steps_per_second": 8.811, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.534735294117647e-06, |
|
"loss": 1.9521, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.7090562582015991, |
|
"eval_runtime": 304.8754, |
|
"eval_samples_per_second": 258.26, |
|
"eval_steps_per_second": 8.072, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.5265000000000005e-06, |
|
"loss": 1.9541, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.7047358751296997, |
|
"eval_runtime": 287.5185, |
|
"eval_samples_per_second": 273.85, |
|
"eval_steps_per_second": 8.559, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.5182647058823534e-06, |
|
"loss": 1.9505, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.708825945854187, |
|
"eval_runtime": 282.3838, |
|
"eval_samples_per_second": 278.83, |
|
"eval_steps_per_second": 8.715, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.5100376470588236e-06, |
|
"loss": 1.9559, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.7142888307571411, |
|
"eval_runtime": 296.6697, |
|
"eval_samples_per_second": 265.403, |
|
"eval_steps_per_second": 8.295, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.501802352941177e-06, |
|
"loss": 1.9531, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.7113577127456665, |
|
"eval_runtime": 297.8189, |
|
"eval_samples_per_second": 264.379, |
|
"eval_steps_per_second": 8.263, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.4935752941176476e-06, |
|
"loss": 1.9627, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.7047557830810547, |
|
"eval_runtime": 303.2883, |
|
"eval_samples_per_second": 259.611, |
|
"eval_steps_per_second": 8.114, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.4853400000000006e-06, |
|
"loss": 1.9617, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.7064669132232666, |
|
"eval_runtime": 285.7888, |
|
"eval_samples_per_second": 275.508, |
|
"eval_steps_per_second": 8.611, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.4771047058823535e-06, |
|
"loss": 1.9468, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.6990549564361572, |
|
"eval_runtime": 283.1785, |
|
"eval_samples_per_second": 278.047, |
|
"eval_steps_per_second": 8.691, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.4688694117647065e-06, |
|
"loss": 1.9475, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.7063584327697754, |
|
"eval_runtime": 314.7643, |
|
"eval_samples_per_second": 250.146, |
|
"eval_steps_per_second": 7.819, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.4606341176470594e-06, |
|
"loss": 1.9569, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.7076828479766846, |
|
"eval_runtime": 303.2807, |
|
"eval_samples_per_second": 259.618, |
|
"eval_steps_per_second": 8.115, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.4523988235294128e-06, |
|
"loss": 1.9624, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 1.706101894378662, |
|
"eval_runtime": 292.0709, |
|
"eval_samples_per_second": 269.582, |
|
"eval_steps_per_second": 8.426, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.444171764705883e-06, |
|
"loss": 1.9575, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.704559326171875, |
|
"eval_runtime": 295.1202, |
|
"eval_samples_per_second": 266.796, |
|
"eval_steps_per_second": 8.339, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.4359447058823537e-06, |
|
"loss": 1.9522, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.6992961168289185, |
|
"eval_runtime": 297.5252, |
|
"eval_samples_per_second": 264.64, |
|
"eval_steps_per_second": 8.272, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.4277094117647066e-06, |
|
"loss": 1.9451, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.708154320716858, |
|
"eval_runtime": 296.8626, |
|
"eval_samples_per_second": 265.23, |
|
"eval_steps_per_second": 8.29, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.419474117647059e-06, |
|
"loss": 1.9398, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.694187879562378, |
|
"eval_runtime": 285.2559, |
|
"eval_samples_per_second": 276.022, |
|
"eval_steps_per_second": 8.627, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.411238823529412e-06, |
|
"loss": 1.9487, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.697739839553833, |
|
"eval_runtime": 271.0859, |
|
"eval_samples_per_second": 290.45, |
|
"eval_steps_per_second": 9.078, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.4030117647058823e-06, |
|
"loss": 1.9505, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.6950899362564087, |
|
"eval_runtime": 296.9518, |
|
"eval_samples_per_second": 265.151, |
|
"eval_steps_per_second": 8.288, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.3947847058823534e-06, |
|
"loss": 1.9444, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.6965278387069702, |
|
"eval_runtime": 298.5122, |
|
"eval_samples_per_second": 263.765, |
|
"eval_steps_per_second": 8.244, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.3865494117647063e-06, |
|
"loss": 1.9542, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.6953821182250977, |
|
"eval_runtime": 855.4125, |
|
"eval_samples_per_second": 92.046, |
|
"eval_steps_per_second": 2.877, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.3783141176470592e-06, |
|
"loss": 1.9352, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.6955593824386597, |
|
"eval_runtime": 825.3854, |
|
"eval_samples_per_second": 95.394, |
|
"eval_steps_per_second": 2.982, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.370078823529412e-06, |
|
"loss": 1.9335, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.6933975219726562, |
|
"eval_runtime": 832.1739, |
|
"eval_samples_per_second": 94.616, |
|
"eval_steps_per_second": 2.957, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.3618435294117647e-06, |
|
"loss": 1.9396, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.7037544250488281, |
|
"eval_runtime": 899.1013, |
|
"eval_samples_per_second": 87.573, |
|
"eval_steps_per_second": 2.737, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.3536082352941172e-06, |
|
"loss": 1.9393, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.6980476379394531, |
|
"eval_runtime": 890.1765, |
|
"eval_samples_per_second": 88.451, |
|
"eval_steps_per_second": 2.765, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.3453811764705883e-06, |
|
"loss": 1.9318, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.690232753753662, |
|
"eval_runtime": 893.2562, |
|
"eval_samples_per_second": 88.146, |
|
"eval_steps_per_second": 2.755, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.3371623529411767e-06, |
|
"loss": 1.9394, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.6947176456451416, |
|
"eval_runtime": 845.3897, |
|
"eval_samples_per_second": 93.137, |
|
"eval_steps_per_second": 2.911, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.3289270588235296e-06, |
|
"loss": 1.9349, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.6847898960113525, |
|
"eval_runtime": 840.3632, |
|
"eval_samples_per_second": 93.694, |
|
"eval_steps_per_second": 2.928, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.320691764705882e-06, |
|
"loss": 1.9437, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.6911025047302246, |
|
"eval_runtime": 829.0742, |
|
"eval_samples_per_second": 94.97, |
|
"eval_steps_per_second": 2.968, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.3124564705882355e-06, |
|
"loss": 1.9325, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.6867425441741943, |
|
"eval_runtime": 876.4729, |
|
"eval_samples_per_second": 89.834, |
|
"eval_steps_per_second": 2.808, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.3042211764705884e-06, |
|
"loss": 1.9189, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.6835554838180542, |
|
"eval_runtime": 863.7905, |
|
"eval_samples_per_second": 91.153, |
|
"eval_steps_per_second": 2.849, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.295985882352941e-06, |
|
"loss": 1.9392, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.6935853958129883, |
|
"eval_runtime": 885.5489, |
|
"eval_samples_per_second": 88.913, |
|
"eval_steps_per_second": 2.779, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.2877505882352943e-06, |
|
"loss": 1.9362, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.6833049058914185, |
|
"eval_runtime": 817.314, |
|
"eval_samples_per_second": 96.336, |
|
"eval_steps_per_second": 3.011, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.2795152941176473e-06, |
|
"loss": 1.9389, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.6875420808792114, |
|
"eval_runtime": 831.6008, |
|
"eval_samples_per_second": 94.681, |
|
"eval_steps_per_second": 2.959, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.2712800000000002e-06, |
|
"loss": 1.9332, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.6895575523376465, |
|
"eval_runtime": 785.6485, |
|
"eval_samples_per_second": 100.219, |
|
"eval_steps_per_second": 3.132, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.263061176470588e-06, |
|
"loss": 1.9182, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.6885087490081787, |
|
"eval_runtime": 827.3753, |
|
"eval_samples_per_second": 95.165, |
|
"eval_steps_per_second": 2.974, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.2548258823529415e-06, |
|
"loss": 1.9347, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.681879997253418, |
|
"eval_runtime": 900.142, |
|
"eval_samples_per_second": 87.472, |
|
"eval_steps_per_second": 2.734, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.2465905882352944e-06, |
|
"loss": 1.9381, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.6881948709487915, |
|
"eval_runtime": 834.4334, |
|
"eval_samples_per_second": 94.36, |
|
"eval_steps_per_second": 2.949, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.238363529411765e-06, |
|
"loss": 1.938, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.6861393451690674, |
|
"eval_runtime": 822.7924, |
|
"eval_samples_per_second": 95.695, |
|
"eval_steps_per_second": 2.991, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.230136470588236e-06, |
|
"loss": 1.9387, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.686549186706543, |
|
"eval_runtime": 760.0348, |
|
"eval_samples_per_second": 103.597, |
|
"eval_steps_per_second": 3.238, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.2219011764705887e-06, |
|
"loss": 1.9331, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.6894826889038086, |
|
"eval_runtime": 828.4061, |
|
"eval_samples_per_second": 95.046, |
|
"eval_steps_per_second": 2.971, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.2136658823529416e-06, |
|
"loss": 1.9277, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.6776493787765503, |
|
"eval_runtime": 788.589, |
|
"eval_samples_per_second": 99.845, |
|
"eval_steps_per_second": 3.121, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.205438823529412e-06, |
|
"loss": 1.93, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.6794580221176147, |
|
"eval_runtime": 847.1105, |
|
"eval_samples_per_second": 92.948, |
|
"eval_steps_per_second": 2.905, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.1972035294117652e-06, |
|
"loss": 1.9395, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.6972289085388184, |
|
"eval_runtime": 811.3504, |
|
"eval_samples_per_second": 97.044, |
|
"eval_steps_per_second": 3.033, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.188968235294118e-06, |
|
"loss": 1.937, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.684800148010254, |
|
"eval_runtime": 831.3474, |
|
"eval_samples_per_second": 94.71, |
|
"eval_steps_per_second": 2.96, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.180732941176471e-06, |
|
"loss": 1.9272, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.689572811126709, |
|
"eval_runtime": 858.3647, |
|
"eval_samples_per_second": 91.729, |
|
"eval_steps_per_second": 2.867, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.1724976470588236e-06, |
|
"loss": 1.9299, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.685671329498291, |
|
"eval_runtime": 855.5516, |
|
"eval_samples_per_second": 92.031, |
|
"eval_steps_per_second": 2.877, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.1642623529411766e-06, |
|
"loss": 1.9278, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.6928762197494507, |
|
"eval_runtime": 776.3726, |
|
"eval_samples_per_second": 101.417, |
|
"eval_steps_per_second": 3.17, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.156035294117647e-06, |
|
"loss": 1.9193, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_loss": 1.6825348138809204, |
|
"eval_runtime": 809.4317, |
|
"eval_samples_per_second": 97.274, |
|
"eval_steps_per_second": 3.04, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.1478e-06, |
|
"loss": 1.9247, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.676526427268982, |
|
"eval_runtime": 823.829, |
|
"eval_samples_per_second": 95.574, |
|
"eval_steps_per_second": 2.987, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.139572941176471e-06, |
|
"loss": 1.9233, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.6889564990997314, |
|
"eval_runtime": 781.5255, |
|
"eval_samples_per_second": 100.748, |
|
"eval_steps_per_second": 3.149, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.1313376470588238e-06, |
|
"loss": 1.9276, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.6908719539642334, |
|
"eval_runtime": 786.5253, |
|
"eval_samples_per_second": 100.107, |
|
"eval_steps_per_second": 3.129, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.123110588235295e-06, |
|
"loss": 1.9183, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.6757584810256958, |
|
"eval_runtime": 900.4138, |
|
"eval_samples_per_second": 87.445, |
|
"eval_steps_per_second": 2.733, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.114875294117647e-06, |
|
"loss": 1.9263, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.6813621520996094, |
|
"eval_runtime": 800.9151, |
|
"eval_samples_per_second": 98.309, |
|
"eval_steps_per_second": 3.073, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.1066399999999994e-06, |
|
"loss": 1.9132, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.6753489971160889, |
|
"eval_runtime": 841.1528, |
|
"eval_samples_per_second": 93.606, |
|
"eval_steps_per_second": 2.926, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.0984211764705886e-06, |
|
"loss": 1.9243, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.682147741317749, |
|
"eval_runtime": 876.3921, |
|
"eval_samples_per_second": 89.842, |
|
"eval_steps_per_second": 2.808, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.090185882352941e-06, |
|
"loss": 1.9132, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.6810792684555054, |
|
"eval_runtime": 800.804, |
|
"eval_samples_per_second": 98.322, |
|
"eval_steps_per_second": 3.073, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3.081950588235294e-06, |
|
"loss": 1.918, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.686233639717102, |
|
"eval_runtime": 862.163, |
|
"eval_samples_per_second": 91.325, |
|
"eval_steps_per_second": 2.854, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.0737152941176466e-06, |
|
"loss": 1.9131, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.6782478094100952, |
|
"eval_runtime": 807.9469, |
|
"eval_samples_per_second": 97.453, |
|
"eval_steps_per_second": 3.046, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.06548e-06, |
|
"loss": 1.9292, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.6781325340270996, |
|
"eval_runtime": 808.511, |
|
"eval_samples_per_second": 97.385, |
|
"eval_steps_per_second": 3.044, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.057244705882353e-06, |
|
"loss": 1.9274, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.6813188791275024, |
|
"eval_runtime": 710.823, |
|
"eval_samples_per_second": 110.769, |
|
"eval_steps_per_second": 3.462, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.0490176470588236e-06, |
|
"loss": 1.9217, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.6681177616119385, |
|
"eval_runtime": 701.1999, |
|
"eval_samples_per_second": 112.289, |
|
"eval_steps_per_second": 3.51, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.0407823529411765e-06, |
|
"loss": 1.9136, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.6671432256698608, |
|
"eval_runtime": 639.7881, |
|
"eval_samples_per_second": 123.067, |
|
"eval_steps_per_second": 3.847, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.0325470588235295e-06, |
|
"loss": 1.9225, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.6838454008102417, |
|
"eval_runtime": 617.2221, |
|
"eval_samples_per_second": 127.567, |
|
"eval_steps_per_second": 3.987, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.0243117647058824e-06, |
|
"loss": 1.9202, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.67597234249115, |
|
"eval_runtime": 668.9084, |
|
"eval_samples_per_second": 117.71, |
|
"eval_steps_per_second": 3.679, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.0160847058823526e-06, |
|
"loss": 1.9216, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.6799976825714111, |
|
"eval_runtime": 679.9535, |
|
"eval_samples_per_second": 115.798, |
|
"eval_steps_per_second": 3.619, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.0078576470588237e-06, |
|
"loss": 1.9217, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.6781527996063232, |
|
"eval_runtime": 722.93, |
|
"eval_samples_per_second": 108.914, |
|
"eval_steps_per_second": 3.404, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.9996223529411767e-06, |
|
"loss": 1.9152, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.6670936346054077, |
|
"eval_runtime": 706.8606, |
|
"eval_samples_per_second": 111.39, |
|
"eval_steps_per_second": 3.482, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.9913952941176473e-06, |
|
"loss": 1.911, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.6739472150802612, |
|
"eval_runtime": 639.4338, |
|
"eval_samples_per_second": 123.136, |
|
"eval_steps_per_second": 3.849, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.9831600000000007e-06, |
|
"loss": 1.924, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.6732276678085327, |
|
"eval_runtime": 599.699, |
|
"eval_samples_per_second": 131.294, |
|
"eval_steps_per_second": 4.104, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.974924705882353e-06, |
|
"loss": 1.9087, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.6759378910064697, |
|
"eval_runtime": 603.5409, |
|
"eval_samples_per_second": 130.458, |
|
"eval_steps_per_second": 4.078, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.966689411764706e-06, |
|
"loss": 1.9133, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.6749118566513062, |
|
"eval_runtime": 671.1294, |
|
"eval_samples_per_second": 117.32, |
|
"eval_steps_per_second": 3.667, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.9584623529411764e-06, |
|
"loss": 1.9125, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.6708778142929077, |
|
"eval_runtime": 641.754, |
|
"eval_samples_per_second": 122.69, |
|
"eval_steps_per_second": 3.835, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.9502352941176474e-06, |
|
"loss": 1.9154, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.672595739364624, |
|
"eval_runtime": 665.3613, |
|
"eval_samples_per_second": 118.337, |
|
"eval_steps_per_second": 3.699, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.9420000000000004e-06, |
|
"loss": 1.9094, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.6622326374053955, |
|
"eval_runtime": 598.1523, |
|
"eval_samples_per_second": 131.634, |
|
"eval_steps_per_second": 4.114, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.9337647058823533e-06, |
|
"loss": 1.9101, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 1.6699204444885254, |
|
"eval_runtime": 592.06, |
|
"eval_samples_per_second": 132.988, |
|
"eval_steps_per_second": 4.157, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.9255376470588235e-06, |
|
"loss": 1.9173, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.6692543029785156, |
|
"eval_runtime": 602.9832, |
|
"eval_samples_per_second": 130.579, |
|
"eval_steps_per_second": 4.081, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.9173023529411765e-06, |
|
"loss": 1.9179, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.6609896421432495, |
|
"eval_runtime": 605.196, |
|
"eval_samples_per_second": 130.102, |
|
"eval_steps_per_second": 4.066, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.9090670588235294e-06, |
|
"loss": 1.925, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.6624456644058228, |
|
"eval_runtime": 662.6702, |
|
"eval_samples_per_second": 118.818, |
|
"eval_steps_per_second": 3.714, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.9008317647058824e-06, |
|
"loss": 1.9054, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.671242594718933, |
|
"eval_runtime": 656.7662, |
|
"eval_samples_per_second": 119.886, |
|
"eval_steps_per_second": 3.747, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.8925964705882353e-06, |
|
"loss": 1.9035, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.6635082960128784, |
|
"eval_runtime": 586.3995, |
|
"eval_samples_per_second": 134.272, |
|
"eval_steps_per_second": 4.197, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.884369411764706e-06, |
|
"loss": 1.8994, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.663743495941162, |
|
"eval_runtime": 636.0923, |
|
"eval_samples_per_second": 123.782, |
|
"eval_steps_per_second": 3.869, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.8761341176470593e-06, |
|
"loss": 1.9091, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.6615914106369019, |
|
"eval_runtime": 634.3745, |
|
"eval_samples_per_second": 124.118, |
|
"eval_steps_per_second": 3.879, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.8679070588235296e-06, |
|
"loss": 1.9121, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.6662075519561768, |
|
"eval_runtime": 633.787, |
|
"eval_samples_per_second": 124.233, |
|
"eval_steps_per_second": 3.883, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.859671764705882e-06, |
|
"loss": 1.8999, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.665105938911438, |
|
"eval_runtime": 695.9342, |
|
"eval_samples_per_second": 113.139, |
|
"eval_steps_per_second": 3.536, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.8514364705882355e-06, |
|
"loss": 1.8987, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.659078598022461, |
|
"eval_runtime": 593.4035, |
|
"eval_samples_per_second": 132.687, |
|
"eval_steps_per_second": 4.147, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.843201176470588e-06, |
|
"loss": 1.9075, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.6652796268463135, |
|
"eval_runtime": 675.3349, |
|
"eval_samples_per_second": 116.59, |
|
"eval_steps_per_second": 3.644, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.8349741176470586e-06, |
|
"loss": 1.9122, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.6633714437484741, |
|
"eval_runtime": 677.0834, |
|
"eval_samples_per_second": 116.288, |
|
"eval_steps_per_second": 3.635, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.8267470588235293e-06, |
|
"loss": 1.9027, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.6583502292633057, |
|
"eval_runtime": 635.279, |
|
"eval_samples_per_second": 123.941, |
|
"eval_steps_per_second": 3.874, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.818511764705882e-06, |
|
"loss": 1.9106, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.657875895500183, |
|
"eval_runtime": 579.2726, |
|
"eval_samples_per_second": 135.924, |
|
"eval_steps_per_second": 4.248, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.810284705882353e-06, |
|
"loss": 1.9031, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.6537038087844849, |
|
"eval_runtime": 636.8, |
|
"eval_samples_per_second": 123.645, |
|
"eval_steps_per_second": 3.865, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.802049411764706e-06, |
|
"loss": 1.9132, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.6636656522750854, |
|
"eval_runtime": 698.4216, |
|
"eval_samples_per_second": 112.736, |
|
"eval_steps_per_second": 3.524, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.793822352941176e-06, |
|
"loss": 1.896, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.6606979370117188, |
|
"eval_runtime": 702.9311, |
|
"eval_samples_per_second": 112.012, |
|
"eval_steps_per_second": 3.501, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.7855870588235294e-06, |
|
"loss": 1.8999, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.6614608764648438, |
|
"eval_runtime": 653.7097, |
|
"eval_samples_per_second": 120.446, |
|
"eval_steps_per_second": 3.765, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.77736e-06, |
|
"loss": 1.911, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.6599065065383911, |
|
"eval_runtime": 629.3127, |
|
"eval_samples_per_second": 125.116, |
|
"eval_steps_per_second": 3.911, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.769124705882353e-06, |
|
"loss": 1.9031, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.6615227460861206, |
|
"eval_runtime": 690.0639, |
|
"eval_samples_per_second": 114.101, |
|
"eval_steps_per_second": 3.566, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.7608894117647063e-06, |
|
"loss": 1.9023, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.6454137563705444, |
|
"eval_runtime": 592.223, |
|
"eval_samples_per_second": 132.952, |
|
"eval_steps_per_second": 4.156, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.752654117647059e-06, |
|
"loss": 1.9141, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.6627545356750488, |
|
"eval_runtime": 663.9405, |
|
"eval_samples_per_second": 118.59, |
|
"eval_steps_per_second": 3.707, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.7444188235294122e-06, |
|
"loss": 1.9003, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.6632248163223267, |
|
"eval_runtime": 631.893, |
|
"eval_samples_per_second": 124.605, |
|
"eval_steps_per_second": 3.895, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.736191764705882e-06, |
|
"loss": 1.8972, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.6637376546859741, |
|
"eval_runtime": 634.1872, |
|
"eval_samples_per_second": 124.154, |
|
"eval_steps_per_second": 3.881, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.727964705882353e-06, |
|
"loss": 1.8967, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.6564476490020752, |
|
"eval_runtime": 615.9047, |
|
"eval_samples_per_second": 127.84, |
|
"eval_steps_per_second": 3.996, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.7197294117647065e-06, |
|
"loss": 1.9001, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.6534888744354248, |
|
"eval_runtime": 648.5154, |
|
"eval_samples_per_second": 121.411, |
|
"eval_steps_per_second": 3.795, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.7114941176470594e-06, |
|
"loss": 1.8986, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_loss": 1.6570912599563599, |
|
"eval_runtime": 657.4951, |
|
"eval_samples_per_second": 119.753, |
|
"eval_steps_per_second": 3.743, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.7032588235294124e-06, |
|
"loss": 1.9137, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.6614021062850952, |
|
"eval_runtime": 628.005, |
|
"eval_samples_per_second": 125.376, |
|
"eval_steps_per_second": 3.919, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.6950235294117653e-06, |
|
"loss": 1.9016, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.6537100076675415, |
|
"eval_runtime": 641.9482, |
|
"eval_samples_per_second": 122.653, |
|
"eval_steps_per_second": 3.834, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.686796470588236e-06, |
|
"loss": 1.8957, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.659902572631836, |
|
"eval_runtime": 604.7038, |
|
"eval_samples_per_second": 130.208, |
|
"eval_steps_per_second": 4.07, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.678561176470589e-06, |
|
"loss": 1.9046, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.6513235569000244, |
|
"eval_runtime": 699.652, |
|
"eval_samples_per_second": 112.537, |
|
"eval_steps_per_second": 3.517, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.670325882352941e-06, |
|
"loss": 1.8932, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.6544750928878784, |
|
"eval_runtime": 629.6124, |
|
"eval_samples_per_second": 125.056, |
|
"eval_steps_per_second": 3.909, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.6620988235294116e-06, |
|
"loss": 1.8964, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.6519490480422974, |
|
"eval_runtime": 654.5444, |
|
"eval_samples_per_second": 120.293, |
|
"eval_steps_per_second": 3.76, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.653863529411765e-06, |
|
"loss": 1.8987, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.6551364660263062, |
|
"eval_runtime": 705.5195, |
|
"eval_samples_per_second": 111.601, |
|
"eval_steps_per_second": 3.488, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.645628235294118e-06, |
|
"loss": 1.8841, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.6510001420974731, |
|
"eval_runtime": 642.966, |
|
"eval_samples_per_second": 122.459, |
|
"eval_steps_per_second": 3.828, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.6374011764705886e-06, |
|
"loss": 1.9098, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.662022590637207, |
|
"eval_runtime": 655.201, |
|
"eval_samples_per_second": 120.172, |
|
"eval_steps_per_second": 3.756, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.6291658823529416e-06, |
|
"loss": 1.8923, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.664062261581421, |
|
"eval_runtime": 596.0888, |
|
"eval_samples_per_second": 132.089, |
|
"eval_steps_per_second": 4.129, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.6209388235294118e-06, |
|
"loss": 1.8923, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.6515814065933228, |
|
"eval_runtime": 644.2159, |
|
"eval_samples_per_second": 122.221, |
|
"eval_steps_per_second": 3.82, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.612703529411765e-06, |
|
"loss": 1.9038, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.6637661457061768, |
|
"eval_runtime": 633.5547, |
|
"eval_samples_per_second": 124.278, |
|
"eval_steps_per_second": 3.884, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.6044764705882345e-06, |
|
"loss": 1.8825, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.6578197479248047, |
|
"eval_runtime": 694.0675, |
|
"eval_samples_per_second": 113.443, |
|
"eval_steps_per_second": 3.546, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.596241176470588e-06, |
|
"loss": 1.8956, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.649842381477356, |
|
"eval_runtime": 657.1133, |
|
"eval_samples_per_second": 119.823, |
|
"eval_steps_per_second": 3.745, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.5880141176470585e-06, |
|
"loss": 1.8972, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.644415259361267, |
|
"eval_runtime": 707.7425, |
|
"eval_samples_per_second": 111.251, |
|
"eval_steps_per_second": 3.477, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.5797788235294115e-06, |
|
"loss": 1.8887, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.6474778652191162, |
|
"eval_runtime": 625.3496, |
|
"eval_samples_per_second": 125.909, |
|
"eval_steps_per_second": 3.935, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.571543529411765e-06, |
|
"loss": 1.8792, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.6593352556228638, |
|
"eval_runtime": 700.7863, |
|
"eval_samples_per_second": 112.355, |
|
"eval_steps_per_second": 3.512, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.563308235294118e-06, |
|
"loss": 1.8947, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 1.6596338748931885, |
|
"eval_runtime": 689.3715, |
|
"eval_samples_per_second": 114.216, |
|
"eval_steps_per_second": 3.57, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.5550729411764707e-06, |
|
"loss": 1.8862, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.6665337085723877, |
|
"eval_runtime": 636.7484, |
|
"eval_samples_per_second": 123.655, |
|
"eval_steps_per_second": 3.865, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.5468458823529414e-06, |
|
"loss": 1.8911, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.648636817932129, |
|
"eval_runtime": 627.8839, |
|
"eval_samples_per_second": 125.401, |
|
"eval_steps_per_second": 3.92, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.538610588235294e-06, |
|
"loss": 1.8993, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.6460869312286377, |
|
"eval_runtime": 742.579, |
|
"eval_samples_per_second": 106.032, |
|
"eval_steps_per_second": 3.314, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.5303752941176473e-06, |
|
"loss": 1.8947, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.6531628370285034, |
|
"eval_runtime": 686.6543, |
|
"eval_samples_per_second": 114.668, |
|
"eval_steps_per_second": 3.584, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.5221564705882356e-06, |
|
"loss": 1.8877, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.6493592262268066, |
|
"eval_runtime": 674.6189, |
|
"eval_samples_per_second": 116.713, |
|
"eval_steps_per_second": 3.648, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.5139211764705886e-06, |
|
"loss": 1.889, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.6436575651168823, |
|
"eval_runtime": 660.9898, |
|
"eval_samples_per_second": 119.12, |
|
"eval_steps_per_second": 3.723, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.505685882352942e-06, |
|
"loss": 1.8789, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.6456811428070068, |
|
"eval_runtime": 682.5583, |
|
"eval_samples_per_second": 115.356, |
|
"eval_steps_per_second": 3.606, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.4974505882352945e-06, |
|
"loss": 1.8818, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.6493545770645142, |
|
"eval_runtime": 656.6455, |
|
"eval_samples_per_second": 119.908, |
|
"eval_steps_per_second": 3.748, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.4892152941176474e-06, |
|
"loss": 1.8896, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.654678225517273, |
|
"eval_runtime": 696.7768, |
|
"eval_samples_per_second": 113.002, |
|
"eval_steps_per_second": 3.532, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.480988235294118e-06, |
|
"loss": 1.8835, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.649330496788025, |
|
"eval_runtime": 614.7903, |
|
"eval_samples_per_second": 128.071, |
|
"eval_steps_per_second": 4.003, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.472752941176471e-06, |
|
"loss": 1.8993, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.6418979167938232, |
|
"eval_runtime": 643.8384, |
|
"eval_samples_per_second": 122.293, |
|
"eval_steps_per_second": 3.822, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.464517647058824e-06, |
|
"loss": 1.8897, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.6511067152023315, |
|
"eval_runtime": 685.4246, |
|
"eval_samples_per_second": 114.873, |
|
"eval_steps_per_second": 3.59, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.456282352941177e-06, |
|
"loss": 1.883, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.6433333158493042, |
|
"eval_runtime": 690.9078, |
|
"eval_samples_per_second": 113.962, |
|
"eval_steps_per_second": 3.562, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.448055294117647e-06, |
|
"loss": 1.8812, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.6498456001281738, |
|
"eval_runtime": 721.7278, |
|
"eval_samples_per_second": 109.095, |
|
"eval_steps_per_second": 3.41, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.4398282352941177e-06, |
|
"loss": 1.8898, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.6489810943603516, |
|
"eval_runtime": 615.5981, |
|
"eval_samples_per_second": 127.903, |
|
"eval_steps_per_second": 3.998, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.4315929411764707e-06, |
|
"loss": 1.8841, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.6423102617263794, |
|
"eval_runtime": 619.1731, |
|
"eval_samples_per_second": 127.165, |
|
"eval_steps_per_second": 3.975, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.4233576470588236e-06, |
|
"loss": 1.8929, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.6477515697479248, |
|
"eval_runtime": 698.3602, |
|
"eval_samples_per_second": 112.746, |
|
"eval_steps_per_second": 3.524, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.4151223529411766e-06, |
|
"loss": 1.8769, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 1.6441521644592285, |
|
"eval_runtime": 741.7014, |
|
"eval_samples_per_second": 106.157, |
|
"eval_steps_per_second": 3.318, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.4068952941176472e-06, |
|
"loss": 1.88, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.6422946453094482, |
|
"eval_runtime": 616.4599, |
|
"eval_samples_per_second": 127.724, |
|
"eval_steps_per_second": 3.992, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.39866e-06, |
|
"loss": 1.8892, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.6453232765197754, |
|
"eval_runtime": 598.18, |
|
"eval_samples_per_second": 131.628, |
|
"eval_steps_per_second": 4.114, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.3904411764705885e-06, |
|
"loss": 1.8867, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.6516659259796143, |
|
"eval_runtime": 643.3214, |
|
"eval_samples_per_second": 122.391, |
|
"eval_steps_per_second": 3.825, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.3822058823529415e-06, |
|
"loss": 1.8788, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.6312636137008667, |
|
"eval_runtime": 714.2232, |
|
"eval_samples_per_second": 110.241, |
|
"eval_steps_per_second": 3.446, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.3739705882352944e-06, |
|
"loss": 1.885, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.6397331953048706, |
|
"eval_runtime": 660.6056, |
|
"eval_samples_per_second": 119.189, |
|
"eval_steps_per_second": 3.725, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.3657352941176474e-06, |
|
"loss": 1.8786, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.6381564140319824, |
|
"eval_runtime": 600.1409, |
|
"eval_samples_per_second": 131.198, |
|
"eval_steps_per_second": 4.101, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.357508235294118e-06, |
|
"loss": 1.8799, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.6406282186508179, |
|
"eval_runtime": 638.1975, |
|
"eval_samples_per_second": 123.374, |
|
"eval_steps_per_second": 3.856, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.349272941176471e-06, |
|
"loss": 1.8897, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.6376131772994995, |
|
"eval_runtime": 691.1499, |
|
"eval_samples_per_second": 113.922, |
|
"eval_steps_per_second": 3.561, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.3410541176470593e-06, |
|
"loss": 1.8819, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.6377204656600952, |
|
"eval_runtime": 639.4459, |
|
"eval_samples_per_second": 123.133, |
|
"eval_steps_per_second": 3.849, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.332818823529412e-06, |
|
"loss": 1.8685, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.6423110961914062, |
|
"eval_runtime": 692.3687, |
|
"eval_samples_per_second": 113.721, |
|
"eval_steps_per_second": 3.554, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.3245835294117648e-06, |
|
"loss": 1.8904, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.6445286273956299, |
|
"eval_runtime": 695.1818, |
|
"eval_samples_per_second": 113.261, |
|
"eval_steps_per_second": 3.54, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.3163482352941177e-06, |
|
"loss": 1.8826, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.6447981595993042, |
|
"eval_runtime": 717.5664, |
|
"eval_samples_per_second": 109.728, |
|
"eval_steps_per_second": 3.43, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.3081129411764706e-06, |
|
"loss": 1.8784, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.6407545804977417, |
|
"eval_runtime": 676.9456, |
|
"eval_samples_per_second": 116.312, |
|
"eval_steps_per_second": 3.635, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.2998776470588236e-06, |
|
"loss": 1.8876, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.6438355445861816, |
|
"eval_runtime": 673.2486, |
|
"eval_samples_per_second": 116.951, |
|
"eval_steps_per_second": 3.655, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.2916423529411765e-06, |
|
"loss": 1.8746, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.6389583349227905, |
|
"eval_runtime": 648.594, |
|
"eval_samples_per_second": 121.396, |
|
"eval_steps_per_second": 3.794, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.283415294117647e-06, |
|
"loss": 1.8719, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.6385319232940674, |
|
"eval_runtime": 708.8894, |
|
"eval_samples_per_second": 111.071, |
|
"eval_steps_per_second": 3.472, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.27518e-06, |
|
"loss": 1.881, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.6474238634109497, |
|
"eval_runtime": 686.9218, |
|
"eval_samples_per_second": 114.623, |
|
"eval_steps_per_second": 3.583, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.2669611764705885e-06, |
|
"loss": 1.8909, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 1.6290351152420044, |
|
"eval_runtime": 598.6713, |
|
"eval_samples_per_second": 131.52, |
|
"eval_steps_per_second": 4.111, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.2587258823529414e-06, |
|
"loss": 1.8788, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.646666407585144, |
|
"eval_runtime": 604.8994, |
|
"eval_samples_per_second": 130.165, |
|
"eval_steps_per_second": 4.068, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.2504905882352944e-06, |
|
"loss": 1.8878, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.6379634141921997, |
|
"eval_runtime": 666.7018, |
|
"eval_samples_per_second": 118.099, |
|
"eval_steps_per_second": 3.691, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.242263529411765e-06, |
|
"loss": 1.8879, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.636752724647522, |
|
"eval_runtime": 655.8918, |
|
"eval_samples_per_second": 120.046, |
|
"eval_steps_per_second": 3.752, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.234028235294118e-06, |
|
"loss": 1.8853, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.6320216655731201, |
|
"eval_runtime": 598.9547, |
|
"eval_samples_per_second": 131.457, |
|
"eval_steps_per_second": 4.109, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.2258011764705886e-06, |
|
"loss": 1.878, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.6368935108184814, |
|
"eval_runtime": 671.7937, |
|
"eval_samples_per_second": 117.204, |
|
"eval_steps_per_second": 3.663, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.2175658823529415e-06, |
|
"loss": 1.8724, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.6372097730636597, |
|
"eval_runtime": 649.2117, |
|
"eval_samples_per_second": 121.281, |
|
"eval_steps_per_second": 3.791, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.2093305882352945e-06, |
|
"loss": 1.8853, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.6407856941223145, |
|
"eval_runtime": 687.7581, |
|
"eval_samples_per_second": 114.484, |
|
"eval_steps_per_second": 3.578, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.2011035294117647e-06, |
|
"loss": 1.8826, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.6369657516479492, |
|
"eval_runtime": 653.073, |
|
"eval_samples_per_second": 120.564, |
|
"eval_steps_per_second": 3.768, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.1928682352941177e-06, |
|
"loss": 1.8766, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 1.6378271579742432, |
|
"eval_runtime": 595.8545, |
|
"eval_samples_per_second": 132.141, |
|
"eval_steps_per_second": 4.13, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.1846411764705883e-06, |
|
"loss": 1.8725, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.6308584213256836, |
|
"eval_runtime": 613.2861, |
|
"eval_samples_per_second": 128.385, |
|
"eval_steps_per_second": 4.013, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.1764058823529412e-06, |
|
"loss": 1.8823, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.6472721099853516, |
|
"eval_runtime": 614.538, |
|
"eval_samples_per_second": 128.124, |
|
"eval_steps_per_second": 4.005, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.168170588235294e-06, |
|
"loss": 1.8754, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.634007215499878, |
|
"eval_runtime": 680.3949, |
|
"eval_samples_per_second": 115.723, |
|
"eval_steps_per_second": 3.617, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.159935294117647e-06, |
|
"loss": 1.8871, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.6386611461639404, |
|
"eval_runtime": 681.8742, |
|
"eval_samples_per_second": 115.471, |
|
"eval_steps_per_second": 3.609, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.1517082352941178e-06, |
|
"loss": 1.8682, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.6339727640151978, |
|
"eval_runtime": 632.124, |
|
"eval_samples_per_second": 124.559, |
|
"eval_steps_per_second": 3.893, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.1434811764705884e-06, |
|
"loss": 1.8703, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.6315019130706787, |
|
"eval_runtime": 595.3007, |
|
"eval_samples_per_second": 132.264, |
|
"eval_steps_per_second": 4.134, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.135254117647059e-06, |
|
"loss": 1.8858, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.6289024353027344, |
|
"eval_runtime": 637.5934, |
|
"eval_samples_per_second": 123.491, |
|
"eval_steps_per_second": 3.86, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.127018823529412e-06, |
|
"loss": 1.8727, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.6371269226074219, |
|
"eval_runtime": 671.6032, |
|
"eval_samples_per_second": 117.237, |
|
"eval_steps_per_second": 3.664, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.118783529411765e-06, |
|
"loss": 1.8729, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.6401563882827759, |
|
"eval_runtime": 639.6628, |
|
"eval_samples_per_second": 123.091, |
|
"eval_steps_per_second": 3.847, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.1105564705882356e-06, |
|
"loss": 1.8767, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.6311676502227783, |
|
"eval_runtime": 600.697, |
|
"eval_samples_per_second": 131.076, |
|
"eval_steps_per_second": 4.097, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.1023211764705886e-06, |
|
"loss": 1.8751, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.6302475929260254, |
|
"eval_runtime": 596.162, |
|
"eval_samples_per_second": 132.073, |
|
"eval_steps_per_second": 4.128, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.0940858823529415e-06, |
|
"loss": 1.8778, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.6412676572799683, |
|
"eval_runtime": 620.8121, |
|
"eval_samples_per_second": 126.829, |
|
"eval_steps_per_second": 3.964, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.0858505882352944e-06, |
|
"loss": 1.8709, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.6188048124313354, |
|
"eval_runtime": 599.8506, |
|
"eval_samples_per_second": 131.261, |
|
"eval_steps_per_second": 4.103, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.0776235294117647e-06, |
|
"loss": 1.8763, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.6331790685653687, |
|
"eval_runtime": 650.4909, |
|
"eval_samples_per_second": 121.042, |
|
"eval_steps_per_second": 3.783, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.0693964705882353e-06, |
|
"loss": 1.8881, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.6275856494903564, |
|
"eval_runtime": 595.8254, |
|
"eval_samples_per_second": 132.148, |
|
"eval_steps_per_second": 4.13, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.0611611764705883e-06, |
|
"loss": 1.8805, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.6381438970565796, |
|
"eval_runtime": 627.6658, |
|
"eval_samples_per_second": 125.444, |
|
"eval_steps_per_second": 3.921, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.052925882352941e-06, |
|
"loss": 1.8802, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.6350407600402832, |
|
"eval_runtime": 594.3681, |
|
"eval_samples_per_second": 132.472, |
|
"eval_steps_per_second": 4.141, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.044690588235294e-06, |
|
"loss": 1.8781, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.6267595291137695, |
|
"eval_runtime": 667.2785, |
|
"eval_samples_per_second": 117.997, |
|
"eval_steps_per_second": 3.688, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.036455294117647e-06, |
|
"loss": 1.8637, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.6291269063949585, |
|
"eval_runtime": 595.964, |
|
"eval_samples_per_second": 132.117, |
|
"eval_steps_per_second": 4.129, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.02822e-06, |
|
"loss": 1.8729, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.6322344541549683, |
|
"eval_runtime": 667.1453, |
|
"eval_samples_per_second": 118.021, |
|
"eval_steps_per_second": 3.689, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.0199929411764707e-06, |
|
"loss": 1.8638, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.6420254707336426, |
|
"eval_runtime": 642.8047, |
|
"eval_samples_per_second": 122.49, |
|
"eval_steps_per_second": 3.829, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.0117576470588236e-06, |
|
"loss": 1.8692, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.6281312704086304, |
|
"eval_runtime": 662.9634, |
|
"eval_samples_per_second": 118.765, |
|
"eval_steps_per_second": 3.712, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.0035305882352943e-06, |
|
"loss": 1.8645, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.6313811540603638, |
|
"eval_runtime": 714.0456, |
|
"eval_samples_per_second": 110.269, |
|
"eval_steps_per_second": 3.447, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9952952941176472e-06, |
|
"loss": 1.8684, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.6184828281402588, |
|
"eval_runtime": 685.5984, |
|
"eval_samples_per_second": 114.844, |
|
"eval_steps_per_second": 3.59, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.987068235294118e-06, |
|
"loss": 1.8681, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.628950595855713, |
|
"eval_runtime": 628.2882, |
|
"eval_samples_per_second": 125.32, |
|
"eval_steps_per_second": 3.917, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.978832941176471e-06, |
|
"loss": 1.8704, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.6387931108474731, |
|
"eval_runtime": 678.8489, |
|
"eval_samples_per_second": 115.986, |
|
"eval_steps_per_second": 3.625, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9706058823529415e-06, |
|
"loss": 1.8639, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 1.6350470781326294, |
|
"eval_runtime": 729.0473, |
|
"eval_samples_per_second": 108.0, |
|
"eval_steps_per_second": 3.376, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9623705882352944e-06, |
|
"loss": 1.8632, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.6308817863464355, |
|
"eval_runtime": 631.0976, |
|
"eval_samples_per_second": 124.762, |
|
"eval_steps_per_second": 3.9, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.954135294117647e-06, |
|
"loss": 1.8631, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.6306254863739014, |
|
"eval_runtime": 681.9362, |
|
"eval_samples_per_second": 115.461, |
|
"eval_steps_per_second": 3.609, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9459e-06, |
|
"loss": 1.8727, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.6303691864013672, |
|
"eval_runtime": 714.3006, |
|
"eval_samples_per_second": 110.229, |
|
"eval_steps_per_second": 3.445, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9376729411764705e-06, |
|
"loss": 1.8786, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.625319480895996, |
|
"eval_runtime": 677.4584, |
|
"eval_samples_per_second": 116.224, |
|
"eval_steps_per_second": 3.633, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9294376470588235e-06, |
|
"loss": 1.8619, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.6231558322906494, |
|
"eval_runtime": 710.8825, |
|
"eval_samples_per_second": 110.76, |
|
"eval_steps_per_second": 3.462, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9212023529411764e-06, |
|
"loss": 1.8635, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.6391326189041138, |
|
"eval_runtime": 635.1822, |
|
"eval_samples_per_second": 123.96, |
|
"eval_steps_per_second": 3.874, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.912975294117647e-06, |
|
"loss": 1.8639, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.6201550960540771, |
|
"eval_runtime": 652.0366, |
|
"eval_samples_per_second": 120.755, |
|
"eval_steps_per_second": 3.774, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9047482352941177e-06, |
|
"loss": 1.8671, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.6272953748703003, |
|
"eval_runtime": 628.1118, |
|
"eval_samples_per_second": 125.355, |
|
"eval_steps_per_second": 3.918, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.8965129411764706e-06, |
|
"loss": 1.8646, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 1.6334937810897827, |
|
"eval_runtime": 600.8225, |
|
"eval_samples_per_second": 131.049, |
|
"eval_steps_per_second": 4.096, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8882858823529413e-06, |
|
"loss": 1.8641, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.6256176233291626, |
|
"eval_runtime": 623.5899, |
|
"eval_samples_per_second": 126.264, |
|
"eval_steps_per_second": 3.947, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8800505882352942e-06, |
|
"loss": 1.8641, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.6278165578842163, |
|
"eval_runtime": 634.1163, |
|
"eval_samples_per_second": 124.168, |
|
"eval_steps_per_second": 3.881, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.871815294117647e-06, |
|
"loss": 1.8696, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.6159448623657227, |
|
"eval_runtime": 688.2826, |
|
"eval_samples_per_second": 114.396, |
|
"eval_steps_per_second": 3.576, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8635882352941178e-06, |
|
"loss": 1.8754, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.6252976655960083, |
|
"eval_runtime": 635.7586, |
|
"eval_samples_per_second": 123.847, |
|
"eval_steps_per_second": 3.871, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8553529411764706e-06, |
|
"loss": 1.8652, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.6262269020080566, |
|
"eval_runtime": 660.6971, |
|
"eval_samples_per_second": 119.173, |
|
"eval_steps_per_second": 3.725, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8471176470588237e-06, |
|
"loss": 1.8728, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.622817873954773, |
|
"eval_runtime": 613.1794, |
|
"eval_samples_per_second": 128.408, |
|
"eval_steps_per_second": 4.014, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.838882352941177e-06, |
|
"loss": 1.8573, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.6240416765213013, |
|
"eval_runtime": 658.1977, |
|
"eval_samples_per_second": 119.625, |
|
"eval_steps_per_second": 3.739, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8306552941176469e-06, |
|
"loss": 1.867, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.6374046802520752, |
|
"eval_runtime": 659.4878, |
|
"eval_samples_per_second": 119.391, |
|
"eval_steps_per_second": 3.732, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.8224199999999998e-06, |
|
"loss": 1.8604, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 1.625261902809143, |
|
"eval_runtime": 660.1821, |
|
"eval_samples_per_second": 119.266, |
|
"eval_steps_per_second": 3.728, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8141847058823528e-06, |
|
"loss": 1.867, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.620868444442749, |
|
"eval_runtime": 646.1503, |
|
"eval_samples_per_second": 121.856, |
|
"eval_steps_per_second": 3.809, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.805949411764706e-06, |
|
"loss": 1.8616, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.6343539953231812, |
|
"eval_runtime": 589.1258, |
|
"eval_samples_per_second": 133.651, |
|
"eval_steps_per_second": 4.177, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.7977223529411764e-06, |
|
"loss": 1.8695, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.6252588033676147, |
|
"eval_runtime": 652.0329, |
|
"eval_samples_per_second": 120.756, |
|
"eval_steps_per_second": 3.774, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.7894870588235293e-06, |
|
"loss": 1.8637, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.6274361610412598, |
|
"eval_runtime": 656.356, |
|
"eval_samples_per_second": 119.961, |
|
"eval_steps_per_second": 3.749, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.78126e-06, |
|
"loss": 1.8544, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.6243598461151123, |
|
"eval_runtime": 613.6606, |
|
"eval_samples_per_second": 128.307, |
|
"eval_steps_per_second": 4.01, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.773024705882353e-06, |
|
"loss": 1.8652, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.626272201538086, |
|
"eval_runtime": 627.4053, |
|
"eval_samples_per_second": 125.496, |
|
"eval_steps_per_second": 3.923, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.7647976470588235e-06, |
|
"loss": 1.8707, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.6186515092849731, |
|
"eval_runtime": 651.665, |
|
"eval_samples_per_second": 120.824, |
|
"eval_steps_per_second": 3.776, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.756562352941177e-06, |
|
"loss": 1.8607, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.6232746839523315, |
|
"eval_runtime": 721.7173, |
|
"eval_samples_per_second": 109.097, |
|
"eval_steps_per_second": 3.41, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.7483270588235299e-06, |
|
"loss": 1.8778, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.6159837245941162, |
|
"eval_runtime": 709.9821, |
|
"eval_samples_per_second": 110.9, |
|
"eval_steps_per_second": 3.466, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.740091764705883e-06, |
|
"loss": 1.8651, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.621022343635559, |
|
"eval_runtime": 609.3721, |
|
"eval_samples_per_second": 129.21, |
|
"eval_steps_per_second": 4.039, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.7318729411764711e-06, |
|
"loss": 1.8601, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.620976209640503, |
|
"eval_runtime": 640.963, |
|
"eval_samples_per_second": 122.842, |
|
"eval_steps_per_second": 3.84, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.7236376470588239e-06, |
|
"loss": 1.8634, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.6287916898727417, |
|
"eval_runtime": 722.8242, |
|
"eval_samples_per_second": 108.93, |
|
"eval_steps_per_second": 3.405, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.7154105882352947e-06, |
|
"loss": 1.8692, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.6240730285644531, |
|
"eval_runtime": 638.1242, |
|
"eval_samples_per_second": 123.388, |
|
"eval_steps_per_second": 3.857, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.7071752941176466e-06, |
|
"loss": 1.854, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.6287106275558472, |
|
"eval_runtime": 652.4874, |
|
"eval_samples_per_second": 120.672, |
|
"eval_steps_per_second": 3.772, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.6989399999999998e-06, |
|
"loss": 1.8628, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.6285154819488525, |
|
"eval_runtime": 674.6082, |
|
"eval_samples_per_second": 116.715, |
|
"eval_steps_per_second": 3.648, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.6907047058823527e-06, |
|
"loss": 1.8588, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.6281447410583496, |
|
"eval_runtime": 702.8676, |
|
"eval_samples_per_second": 112.023, |
|
"eval_steps_per_second": 3.501, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.6824776470588234e-06, |
|
"loss": 1.8613, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.6213467121124268, |
|
"eval_runtime": 658.5951, |
|
"eval_samples_per_second": 119.553, |
|
"eval_steps_per_second": 3.737, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.6742423529411763e-06, |
|
"loss": 1.8494, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.6278510093688965, |
|
"eval_runtime": 644.797, |
|
"eval_samples_per_second": 122.111, |
|
"eval_steps_per_second": 3.817, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6660070588235297e-06, |
|
"loss": 1.8584, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.6307947635650635, |
|
"eval_runtime": 622.8216, |
|
"eval_samples_per_second": 126.42, |
|
"eval_steps_per_second": 3.951, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6577882352941176e-06, |
|
"loss": 1.8642, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.6242249011993408, |
|
"eval_runtime": 659.8518, |
|
"eval_samples_per_second": 119.325, |
|
"eval_steps_per_second": 3.73, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6495529411764706e-06, |
|
"loss": 1.8699, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.6131978034973145, |
|
"eval_runtime": 633.5288, |
|
"eval_samples_per_second": 124.283, |
|
"eval_steps_per_second": 3.885, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6413176470588237e-06, |
|
"loss": 1.8609, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.6228089332580566, |
|
"eval_runtime": 663.4595, |
|
"eval_samples_per_second": 118.676, |
|
"eval_steps_per_second": 3.709, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6330823529411769e-06, |
|
"loss": 1.8601, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.6239029169082642, |
|
"eval_runtime": 687.3212, |
|
"eval_samples_per_second": 114.556, |
|
"eval_steps_per_second": 3.581, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6248552941176477e-06, |
|
"loss": 1.8534, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.6289734840393066, |
|
"eval_runtime": 626.344, |
|
"eval_samples_per_second": 125.709, |
|
"eval_steps_per_second": 3.929, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6166200000000005e-06, |
|
"loss": 1.8473, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.6148146390914917, |
|
"eval_runtime": 596.927, |
|
"eval_samples_per_second": 131.904, |
|
"eval_steps_per_second": 4.123, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.6083847058823534e-06, |
|
"loss": 1.8564, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.6277542114257812, |
|
"eval_runtime": 670.7002, |
|
"eval_samples_per_second": 117.395, |
|
"eval_steps_per_second": 3.669, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.600157647058824e-06, |
|
"loss": 1.8491, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 1.619775414466858, |
|
"eval_runtime": 706.5202, |
|
"eval_samples_per_second": 111.443, |
|
"eval_steps_per_second": 3.483, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.5919305882352947e-06, |
|
"loss": 1.8539, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.6218732595443726, |
|
"eval_runtime": 648.1903, |
|
"eval_samples_per_second": 121.472, |
|
"eval_steps_per_second": 3.797, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.5836952941176468e-06, |
|
"loss": 1.8544, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.6217312812805176, |
|
"eval_runtime": 700.6655, |
|
"eval_samples_per_second": 112.375, |
|
"eval_steps_per_second": 3.512, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.5754599999999997e-06, |
|
"loss": 1.8524, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.6175516843795776, |
|
"eval_runtime": 646.3871, |
|
"eval_samples_per_second": 121.811, |
|
"eval_steps_per_second": 3.807, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.567224705882353e-06, |
|
"loss": 1.8586, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.6136716604232788, |
|
"eval_runtime": 637.3755, |
|
"eval_samples_per_second": 123.533, |
|
"eval_steps_per_second": 3.861, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.558989411764706e-06, |
|
"loss": 1.8564, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.6192333698272705, |
|
"eval_runtime": 593.2957, |
|
"eval_samples_per_second": 132.711, |
|
"eval_steps_per_second": 4.148, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.5507788235294117e-06, |
|
"loss": 1.8578, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.621081829071045, |
|
"eval_runtime": 596.6533, |
|
"eval_samples_per_second": 131.964, |
|
"eval_steps_per_second": 4.125, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.5425435294117646e-06, |
|
"loss": 1.8593, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.623468279838562, |
|
"eval_runtime": 694.5364, |
|
"eval_samples_per_second": 113.366, |
|
"eval_steps_per_second": 3.543, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.5343082352941174e-06, |
|
"loss": 1.8499, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.6201629638671875, |
|
"eval_runtime": 620.0591, |
|
"eval_samples_per_second": 126.983, |
|
"eval_steps_per_second": 3.969, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.526072941176471e-06, |
|
"loss": 1.8553, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 1.6244655847549438, |
|
"eval_runtime": 701.0162, |
|
"eval_samples_per_second": 112.318, |
|
"eval_steps_per_second": 3.511, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.5178376470588239e-06, |
|
"loss": 1.8612, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.6230287551879883, |
|
"eval_runtime": 637.0294, |
|
"eval_samples_per_second": 123.6, |
|
"eval_steps_per_second": 3.863, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.5096105882352945e-06, |
|
"loss": 1.8539, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.6243265867233276, |
|
"eval_runtime": 643.6788, |
|
"eval_samples_per_second": 122.323, |
|
"eval_steps_per_second": 3.823, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.5013752941176475e-06, |
|
"loss": 1.8552, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.623043417930603, |
|
"eval_runtime": 604.9319, |
|
"eval_samples_per_second": 130.158, |
|
"eval_steps_per_second": 4.068, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.4931400000000004e-06, |
|
"loss": 1.8608, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.6242647171020508, |
|
"eval_runtime": 645.9074, |
|
"eval_samples_per_second": 121.901, |
|
"eval_steps_per_second": 3.81, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.4849047058823536e-06, |
|
"loss": 1.8541, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.6212998628616333, |
|
"eval_runtime": 593.5841, |
|
"eval_samples_per_second": 132.647, |
|
"eval_steps_per_second": 4.146, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.4766858823529417e-06, |
|
"loss": 1.8471, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.6204901933670044, |
|
"eval_runtime": 632.0978, |
|
"eval_samples_per_second": 124.565, |
|
"eval_steps_per_second": 3.893, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.4684505882352944e-06, |
|
"loss": 1.8505, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.6168400049209595, |
|
"eval_runtime": 634.5582, |
|
"eval_samples_per_second": 124.082, |
|
"eval_steps_per_second": 3.878, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.4602152941176467e-06, |
|
"loss": 1.858, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.6155096292495728, |
|
"eval_runtime": 627.6321, |
|
"eval_samples_per_second": 125.451, |
|
"eval_steps_per_second": 3.921, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.4519800000000001e-06, |
|
"loss": 1.8523, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.6156325340270996, |
|
"eval_runtime": 610.2358, |
|
"eval_samples_per_second": 129.027, |
|
"eval_steps_per_second": 4.033, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.4437529411764703e-06, |
|
"loss": 1.853, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.612059235572815, |
|
"eval_runtime": 710.5317, |
|
"eval_samples_per_second": 110.814, |
|
"eval_steps_per_second": 3.464, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.4355176470588237e-06, |
|
"loss": 1.8495, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.6132701635360718, |
|
"eval_runtime": 627.4106, |
|
"eval_samples_per_second": 125.495, |
|
"eval_steps_per_second": 3.922, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.4272823529411767e-06, |
|
"loss": 1.845, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.6237196922302246, |
|
"eval_runtime": 618.6027, |
|
"eval_samples_per_second": 127.282, |
|
"eval_steps_per_second": 3.978, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.4190552941176473e-06, |
|
"loss": 1.8467, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.6179585456848145, |
|
"eval_runtime": 625.0402, |
|
"eval_samples_per_second": 125.971, |
|
"eval_steps_per_second": 3.937, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.410828235294118e-06, |
|
"loss": 1.8532, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.6177033185958862, |
|
"eval_runtime": 701.2169, |
|
"eval_samples_per_second": 112.286, |
|
"eval_steps_per_second": 3.51, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.4025929411764707e-06, |
|
"loss": 1.8516, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.6135560274124146, |
|
"eval_runtime": 670.6381, |
|
"eval_samples_per_second": 117.406, |
|
"eval_steps_per_second": 3.67, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.3943576470588238e-06, |
|
"loss": 1.8464, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.6133610010147095, |
|
"eval_runtime": 632.607, |
|
"eval_samples_per_second": 124.464, |
|
"eval_steps_per_second": 3.89, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.3861223529411768e-06, |
|
"loss": 1.8418, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.6199904680252075, |
|
"eval_runtime": 676.6467, |
|
"eval_samples_per_second": 116.364, |
|
"eval_steps_per_second": 3.637, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.3778870588235297e-06, |
|
"loss": 1.8538, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 1.620301365852356, |
|
"eval_runtime": 711.6514, |
|
"eval_samples_per_second": 110.64, |
|
"eval_steps_per_second": 3.458, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.3696517647058827e-06, |
|
"loss": 1.8517, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.6171836853027344, |
|
"eval_runtime": 617.9803, |
|
"eval_samples_per_second": 127.41, |
|
"eval_steps_per_second": 3.982, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.361424705882353e-06, |
|
"loss": 1.8612, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.6206892728805542, |
|
"eval_runtime": 639.5461, |
|
"eval_samples_per_second": 123.114, |
|
"eval_steps_per_second": 3.848, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.353197647058824e-06, |
|
"loss": 1.8535, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.6004185676574707, |
|
"eval_runtime": 591.7421, |
|
"eval_samples_per_second": 133.06, |
|
"eval_steps_per_second": 4.159, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.3449623529411767e-06, |
|
"loss": 1.8437, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.6153193712234497, |
|
"eval_runtime": 659.566, |
|
"eval_samples_per_second": 119.377, |
|
"eval_steps_per_second": 3.731, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.3367270588235298e-06, |
|
"loss": 1.8561, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.6150519847869873, |
|
"eval_runtime": 658.2097, |
|
"eval_samples_per_second": 119.623, |
|
"eval_steps_per_second": 3.739, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.3284917647058824e-06, |
|
"loss": 1.8512, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.6145780086517334, |
|
"eval_runtime": 596.1799, |
|
"eval_samples_per_second": 132.069, |
|
"eval_steps_per_second": 4.128, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.320264705882353e-06, |
|
"loss": 1.8492, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.6190379858016968, |
|
"eval_runtime": 599.0501, |
|
"eval_samples_per_second": 131.436, |
|
"eval_steps_per_second": 4.108, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.3120376470588237e-06, |
|
"loss": 1.8539, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.6064990758895874, |
|
"eval_runtime": 627.6701, |
|
"eval_samples_per_second": 125.443, |
|
"eval_steps_per_second": 3.921, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.3038023529411766e-06, |
|
"loss": 1.8406, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 1.61311674118042, |
|
"eval_runtime": 661.9232, |
|
"eval_samples_per_second": 118.952, |
|
"eval_steps_per_second": 3.718, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.2955670588235296e-06, |
|
"loss": 1.8343, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.6140803098678589, |
|
"eval_runtime": 594.958, |
|
"eval_samples_per_second": 132.34, |
|
"eval_steps_per_second": 4.136, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.2873317647058825e-06, |
|
"loss": 1.8447, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.607283592224121, |
|
"eval_runtime": 589.0492, |
|
"eval_samples_per_second": 133.668, |
|
"eval_steps_per_second": 4.178, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.2790964705882357e-06, |
|
"loss": 1.8476, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.623671293258667, |
|
"eval_runtime": 590.9271, |
|
"eval_samples_per_second": 133.243, |
|
"eval_steps_per_second": 4.165, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.270869411764706e-06, |
|
"loss": 1.8511, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.6194998025894165, |
|
"eval_runtime": 610.8052, |
|
"eval_samples_per_second": 128.907, |
|
"eval_steps_per_second": 4.029, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.262634117647059e-06, |
|
"loss": 1.8502, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.614992380142212, |
|
"eval_runtime": 669.6472, |
|
"eval_samples_per_second": 117.58, |
|
"eval_steps_per_second": 3.675, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.254398823529412e-06, |
|
"loss": 1.8485, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.6185976266860962, |
|
"eval_runtime": 593.1091, |
|
"eval_samples_per_second": 132.753, |
|
"eval_steps_per_second": 4.149, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.2461717647058826e-06, |
|
"loss": 1.8486, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.6242183446884155, |
|
"eval_runtime": 619.9316, |
|
"eval_samples_per_second": 127.009, |
|
"eval_steps_per_second": 3.97, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.2379447058823533e-06, |
|
"loss": 1.8508, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.6094743013381958, |
|
"eval_runtime": 682.5864, |
|
"eval_samples_per_second": 115.351, |
|
"eval_steps_per_second": 3.605, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.2297094117647062e-06, |
|
"loss": 1.8509, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.6169160604476929, |
|
"eval_runtime": 650.0935, |
|
"eval_samples_per_second": 121.116, |
|
"eval_steps_per_second": 3.786, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.2214741176470592e-06, |
|
"loss": 1.8449, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.62052583694458, |
|
"eval_runtime": 649.3412, |
|
"eval_samples_per_second": 121.257, |
|
"eval_steps_per_second": 3.79, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.213238823529412e-06, |
|
"loss": 1.8673, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.6155439615249634, |
|
"eval_runtime": 637.256, |
|
"eval_samples_per_second": 123.556, |
|
"eval_steps_per_second": 3.862, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.2050035294117646e-06, |
|
"loss": 1.8482, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.6143635511398315, |
|
"eval_runtime": 663.142, |
|
"eval_samples_per_second": 118.733, |
|
"eval_steps_per_second": 3.711, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.1967764705882353e-06, |
|
"loss": 1.8522, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.6124098300933838, |
|
"eval_runtime": 610.7893, |
|
"eval_samples_per_second": 128.91, |
|
"eval_steps_per_second": 4.029, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.1885411764705882e-06, |
|
"loss": 1.844, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.6135690212249756, |
|
"eval_runtime": 634.195, |
|
"eval_samples_per_second": 124.153, |
|
"eval_steps_per_second": 3.881, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.1803141176470589e-06, |
|
"loss": 1.8462, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.6144988536834717, |
|
"eval_runtime": 636.9429, |
|
"eval_samples_per_second": 123.617, |
|
"eval_steps_per_second": 3.864, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.1720788235294118e-06, |
|
"loss": 1.8593, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.6172668933868408, |
|
"eval_runtime": 705.616, |
|
"eval_samples_per_second": 111.586, |
|
"eval_steps_per_second": 3.488, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.1638435294117648e-06, |
|
"loss": 1.8493, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.6117665767669678, |
|
"eval_runtime": 676.055, |
|
"eval_samples_per_second": 116.465, |
|
"eval_steps_per_second": 3.64, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 1.1556082352941177e-06, |
|
"loss": 1.849, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 1.6118968725204468, |
|
"eval_runtime": 593.9987, |
|
"eval_samples_per_second": 132.554, |
|
"eval_steps_per_second": 4.143, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.1473811764705883e-06, |
|
"loss": 1.8478, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.6105737686157227, |
|
"eval_runtime": 691.8219, |
|
"eval_samples_per_second": 113.811, |
|
"eval_steps_per_second": 3.557, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.1391458823529413e-06, |
|
"loss": 1.8539, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.6143759489059448, |
|
"eval_runtime": 679.8256, |
|
"eval_samples_per_second": 115.819, |
|
"eval_steps_per_second": 3.62, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.1309105882352942e-06, |
|
"loss": 1.8547, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.616089105606079, |
|
"eval_runtime": 631.7324, |
|
"eval_samples_per_second": 124.637, |
|
"eval_steps_per_second": 3.896, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.1226835294117649e-06, |
|
"loss": 1.8483, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.6217153072357178, |
|
"eval_runtime": 634.5839, |
|
"eval_samples_per_second": 124.077, |
|
"eval_steps_per_second": 3.878, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.1144564705882355e-06, |
|
"loss": 1.8545, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.6165567636489868, |
|
"eval_runtime": 636.4723, |
|
"eval_samples_per_second": 123.708, |
|
"eval_steps_per_second": 3.867, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.1062211764705885e-06, |
|
"loss": 1.8458, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.601502776145935, |
|
"eval_runtime": 619.423, |
|
"eval_samples_per_second": 127.113, |
|
"eval_steps_per_second": 3.973, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.0979858823529414e-06, |
|
"loss": 1.8473, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.6162437200546265, |
|
"eval_runtime": 596.2973, |
|
"eval_samples_per_second": 132.043, |
|
"eval_steps_per_second": 4.127, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.089758823529412e-06, |
|
"loss": 1.8534, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.6076633930206299, |
|
"eval_runtime": 608.3431, |
|
"eval_samples_per_second": 129.429, |
|
"eval_steps_per_second": 4.045, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 1.0815235294117646e-06, |
|
"loss": 1.8486, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 1.607995629310608, |
|
"eval_runtime": 729.0011, |
|
"eval_samples_per_second": 108.007, |
|
"eval_steps_per_second": 3.376, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.0732882352941175e-06, |
|
"loss": 1.8526, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.6202946901321411, |
|
"eval_runtime": 755.7769, |
|
"eval_samples_per_second": 104.18, |
|
"eval_steps_per_second": 3.256, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.0650529411764705e-06, |
|
"loss": 1.8449, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.6120697259902954, |
|
"eval_runtime": 657.2602, |
|
"eval_samples_per_second": 119.796, |
|
"eval_steps_per_second": 3.744, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.0568258823529411e-06, |
|
"loss": 1.8369, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.6107741594314575, |
|
"eval_runtime": 614.6669, |
|
"eval_samples_per_second": 128.097, |
|
"eval_steps_per_second": 4.004, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.048590588235294e-06, |
|
"loss": 1.8493, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.6112476587295532, |
|
"eval_runtime": 637.5284, |
|
"eval_samples_per_second": 123.504, |
|
"eval_steps_per_second": 3.86, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.040355294117647e-06, |
|
"loss": 1.849, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.613427996635437, |
|
"eval_runtime": 690.4812, |
|
"eval_samples_per_second": 114.032, |
|
"eval_steps_per_second": 3.564, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.0321364705882354e-06, |
|
"loss": 1.8559, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.6105836629867554, |
|
"eval_runtime": 644.5734, |
|
"eval_samples_per_second": 122.154, |
|
"eval_steps_per_second": 3.818, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.0239011764705883e-06, |
|
"loss": 1.8505, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.6063605546951294, |
|
"eval_runtime": 611.8049, |
|
"eval_samples_per_second": 128.696, |
|
"eval_steps_per_second": 4.023, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.0156658823529412e-06, |
|
"loss": 1.859, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.6112093925476074, |
|
"eval_runtime": 596.3432, |
|
"eval_samples_per_second": 132.033, |
|
"eval_steps_per_second": 4.127, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.0074305882352942e-06, |
|
"loss": 1.8403, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 1.6082416772842407, |
|
"eval_runtime": 636.8445, |
|
"eval_samples_per_second": 123.636, |
|
"eval_steps_per_second": 3.864, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.992035294117648e-07, |
|
"loss": 1.837, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.6034836769104004, |
|
"eval_runtime": 721.1232, |
|
"eval_samples_per_second": 109.187, |
|
"eval_steps_per_second": 3.413, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.909682352941178e-07, |
|
"loss": 1.8472, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.6123322248458862, |
|
"eval_runtime": 598.9191, |
|
"eval_samples_per_second": 131.465, |
|
"eval_steps_per_second": 4.109, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.827329411764707e-07, |
|
"loss": 1.8381, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.6135793924331665, |
|
"eval_runtime": 636.3102, |
|
"eval_samples_per_second": 123.74, |
|
"eval_steps_per_second": 3.868, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.74514117647059e-07, |
|
"loss": 1.8451, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.6103402376174927, |
|
"eval_runtime": 643.5154, |
|
"eval_samples_per_second": 122.355, |
|
"eval_steps_per_second": 3.824, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.66278823529412e-07, |
|
"loss": 1.8472, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.6103589534759521, |
|
"eval_runtime": 713.2659, |
|
"eval_samples_per_second": 110.389, |
|
"eval_steps_per_second": 3.45, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.580517647058822e-07, |
|
"loss": 1.8456, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.6119657754898071, |
|
"eval_runtime": 661.2469, |
|
"eval_samples_per_second": 119.074, |
|
"eval_steps_per_second": 3.722, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.498164705882351e-07, |
|
"loss": 1.8472, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.6195623874664307, |
|
"eval_runtime": 721.1961, |
|
"eval_samples_per_second": 109.176, |
|
"eval_steps_per_second": 3.412, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.415811764705881e-07, |
|
"loss": 1.8463, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.6081604957580566, |
|
"eval_runtime": 653.4788, |
|
"eval_samples_per_second": 120.489, |
|
"eval_steps_per_second": 3.766, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.333458823529411e-07, |
|
"loss": 1.8361, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 1.6095768213272095, |
|
"eval_runtime": 717.3607, |
|
"eval_samples_per_second": 109.759, |
|
"eval_steps_per_second": 3.431, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.25110588235294e-07, |
|
"loss": 1.8419, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.604457974433899, |
|
"eval_runtime": 633.7813, |
|
"eval_samples_per_second": 124.234, |
|
"eval_steps_per_second": 3.883, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.168835294117647e-07, |
|
"loss": 1.8476, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.6114977598190308, |
|
"eval_runtime": 648.4133, |
|
"eval_samples_per_second": 121.43, |
|
"eval_steps_per_second": 3.795, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.086482352941177e-07, |
|
"loss": 1.842, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.6074268817901611, |
|
"eval_runtime": 658.9063, |
|
"eval_samples_per_second": 119.496, |
|
"eval_steps_per_second": 3.735, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.004129411764706e-07, |
|
"loss": 1.8483, |
|
"step": 503000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.6014885902404785, |
|
"eval_runtime": 714.2685, |
|
"eval_samples_per_second": 110.234, |
|
"eval_steps_per_second": 3.445, |
|
"step": 503000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.921858823529413e-07, |
|
"loss": 1.841, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.6082409620285034, |
|
"eval_runtime": 686.6491, |
|
"eval_samples_per_second": 114.668, |
|
"eval_steps_per_second": 3.584, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.839505882352944e-07, |
|
"loss": 1.8388, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.5999279022216797, |
|
"eval_runtime": 635.3059, |
|
"eval_samples_per_second": 123.936, |
|
"eval_steps_per_second": 3.874, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.757235294117648e-07, |
|
"loss": 1.8489, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.612676978111267, |
|
"eval_runtime": 643.6962, |
|
"eval_samples_per_second": 122.32, |
|
"eval_steps_per_second": 3.823, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.674882352941178e-07, |
|
"loss": 1.836, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.6081751585006714, |
|
"eval_runtime": 632.1897, |
|
"eval_samples_per_second": 124.546, |
|
"eval_steps_per_second": 3.893, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.592611764705886e-07, |
|
"loss": 1.8375, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.6010805368423462, |
|
"eval_runtime": 656.7973, |
|
"eval_samples_per_second": 119.88, |
|
"eval_steps_per_second": 3.747, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.510258823529414e-07, |
|
"loss": 1.8406, |
|
"step": 509000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.5963528156280518, |
|
"eval_runtime": 647.0602, |
|
"eval_samples_per_second": 121.684, |
|
"eval_steps_per_second": 3.803, |
|
"step": 509000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.427988235294121e-07, |
|
"loss": 1.8475, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.6051133871078491, |
|
"eval_runtime": 614.1278, |
|
"eval_samples_per_second": 128.209, |
|
"eval_steps_per_second": 4.007, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.345717647058822e-07, |
|
"loss": 1.8439, |
|
"step": 511000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.6119394302368164, |
|
"eval_runtime": 680.4122, |
|
"eval_samples_per_second": 115.72, |
|
"eval_steps_per_second": 3.617, |
|
"step": 511000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.263364705882351e-07, |
|
"loss": 1.8425, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.603615641593933, |
|
"eval_runtime": 582.1286, |
|
"eval_samples_per_second": 135.257, |
|
"eval_steps_per_second": 4.228, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.181011764705881e-07, |
|
"loss": 1.8376, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.5999643802642822, |
|
"eval_runtime": 637.8704, |
|
"eval_samples_per_second": 123.437, |
|
"eval_steps_per_second": 3.858, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.098658823529411e-07, |
|
"loss": 1.8362, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.6083329916000366, |
|
"eval_runtime": 645.3739, |
|
"eval_samples_per_second": 122.002, |
|
"eval_steps_per_second": 3.813, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.016388235294116e-07, |
|
"loss": 1.8531, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.6102386713027954, |
|
"eval_runtime": 641.5128, |
|
"eval_samples_per_second": 122.736, |
|
"eval_steps_per_second": 3.836, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.934035294117648e-07, |
|
"loss": 1.8377, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.6046222448349, |
|
"eval_runtime": 613.188, |
|
"eval_samples_per_second": 128.406, |
|
"eval_steps_per_second": 4.013, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 7.851764705882353e-07, |
|
"loss": 1.8382, |
|
"step": 517000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.6014589071273804, |
|
"eval_runtime": 670.3738, |
|
"eval_samples_per_second": 117.452, |
|
"eval_steps_per_second": 3.671, |
|
"step": 517000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.76949411764706e-07, |
|
"loss": 1.8428, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.5967994928359985, |
|
"eval_runtime": 603.4298, |
|
"eval_samples_per_second": 130.482, |
|
"eval_steps_per_second": 4.078, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.687141176470591e-07, |
|
"loss": 1.8425, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.6028351783752441, |
|
"eval_runtime": 653.6448, |
|
"eval_samples_per_second": 120.458, |
|
"eval_steps_per_second": 3.765, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.604788235294119e-07, |
|
"loss": 1.8387, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.6120455265045166, |
|
"eval_runtime": 592.1502, |
|
"eval_samples_per_second": 132.968, |
|
"eval_steps_per_second": 4.156, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.522435294117649e-07, |
|
"loss": 1.8395, |
|
"step": 521000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.595177173614502, |
|
"eval_runtime": 632.9138, |
|
"eval_samples_per_second": 124.404, |
|
"eval_steps_per_second": 3.888, |
|
"step": 521000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.440082352941178e-07, |
|
"loss": 1.8505, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.6045807600021362, |
|
"eval_runtime": 656.1512, |
|
"eval_samples_per_second": 119.998, |
|
"eval_steps_per_second": 3.751, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.357894117647061e-07, |
|
"loss": 1.8452, |
|
"step": 523000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.6030842065811157, |
|
"eval_runtime": 628.9711, |
|
"eval_samples_per_second": 125.184, |
|
"eval_steps_per_second": 3.913, |
|
"step": 523000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.275541176470591e-07, |
|
"loss": 1.8469, |
|
"step": 524000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.608011245727539, |
|
"eval_runtime": 706.4291, |
|
"eval_samples_per_second": 111.458, |
|
"eval_steps_per_second": 3.484, |
|
"step": 524000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.19318823529412e-07, |
|
"loss": 1.8323, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.6070075035095215, |
|
"eval_runtime": 612.8785, |
|
"eval_samples_per_second": 128.471, |
|
"eval_steps_per_second": 4.015, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.110835294117644e-07, |
|
"loss": 1.8438, |
|
"step": 526000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.612598180770874, |
|
"eval_runtime": 651.7595, |
|
"eval_samples_per_second": 120.807, |
|
"eval_steps_per_second": 3.776, |
|
"step": 526000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.028482352941175e-07, |
|
"loss": 1.8445, |
|
"step": 527000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.6047810316085815, |
|
"eval_runtime": 628.6369, |
|
"eval_samples_per_second": 125.25, |
|
"eval_steps_per_second": 3.915, |
|
"step": 527000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 6.946294117647058e-07, |
|
"loss": 1.8534, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.6085864305496216, |
|
"eval_runtime": 583.8466, |
|
"eval_samples_per_second": 134.859, |
|
"eval_steps_per_second": 4.215, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 6.863941176470587e-07, |
|
"loss": 1.8422, |
|
"step": 529000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.6161532402038574, |
|
"eval_runtime": 640.5762, |
|
"eval_samples_per_second": 122.916, |
|
"eval_steps_per_second": 3.842, |
|
"step": 529000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 6.781588235294117e-07, |
|
"loss": 1.8346, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.608655571937561, |
|
"eval_runtime": 602.4702, |
|
"eval_samples_per_second": 130.69, |
|
"eval_steps_per_second": 4.085, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 6.699235294117647e-07, |
|
"loss": 1.8323, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.6097711324691772, |
|
"eval_runtime": 631.9053, |
|
"eval_samples_per_second": 124.603, |
|
"eval_steps_per_second": 3.895, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 6.617047058823529e-07, |
|
"loss": 1.8431, |
|
"step": 532000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.6174230575561523, |
|
"eval_runtime": 690.1823, |
|
"eval_samples_per_second": 114.081, |
|
"eval_steps_per_second": 3.566, |
|
"step": 532000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 6.53469411764706e-07, |
|
"loss": 1.8356, |
|
"step": 533000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.6107144355773926, |
|
"eval_runtime": 614.0509, |
|
"eval_samples_per_second": 128.226, |
|
"eval_steps_per_second": 4.008, |
|
"step": 533000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 6.452341176470589e-07, |
|
"loss": 1.8445, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.6077425479888916, |
|
"eval_runtime": 602.2466, |
|
"eval_samples_per_second": 130.739, |
|
"eval_steps_per_second": 4.086, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 6.369988235294118e-07, |
|
"loss": 1.841, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.600361704826355, |
|
"eval_runtime": 594.6743, |
|
"eval_samples_per_second": 132.404, |
|
"eval_steps_per_second": 4.138, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.287717647058825e-07, |
|
"loss": 1.8499, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.6131929159164429, |
|
"eval_runtime": 631.7613, |
|
"eval_samples_per_second": 124.631, |
|
"eval_steps_per_second": 3.895, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.205447058823532e-07, |
|
"loss": 1.8418, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.6085360050201416, |
|
"eval_runtime": 645.6432, |
|
"eval_samples_per_second": 121.951, |
|
"eval_steps_per_second": 3.812, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.123094117647061e-07, |
|
"loss": 1.8293, |
|
"step": 538000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.6009540557861328, |
|
"eval_runtime": 646.7415, |
|
"eval_samples_per_second": 121.744, |
|
"eval_steps_per_second": 3.805, |
|
"step": 538000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 6.04074117647059e-07, |
|
"loss": 1.8343, |
|
"step": 539000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.6065633296966553, |
|
"eval_runtime": 661.4898, |
|
"eval_samples_per_second": 119.03, |
|
"eval_steps_per_second": 3.72, |
|
"step": 539000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5.958388235294121e-07, |
|
"loss": 1.8332, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.6017389297485352, |
|
"eval_runtime": 606.6913, |
|
"eval_samples_per_second": 129.781, |
|
"eval_steps_per_second": 4.056, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5.876117647058821e-07, |
|
"loss": 1.8292, |
|
"step": 541000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.6000785827636719, |
|
"eval_runtime": 633.3979, |
|
"eval_samples_per_second": 124.309, |
|
"eval_steps_per_second": 3.885, |
|
"step": 541000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5.793764705882352e-07, |
|
"loss": 1.836, |
|
"step": 542000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.6023591756820679, |
|
"eval_runtime": 657.5131, |
|
"eval_samples_per_second": 119.75, |
|
"eval_steps_per_second": 3.743, |
|
"step": 542000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5.711494117647057e-07, |
|
"loss": 1.8399, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.5998636484146118, |
|
"eval_runtime": 660.5514, |
|
"eval_samples_per_second": 119.199, |
|
"eval_steps_per_second": 3.726, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5.629141176470587e-07, |
|
"loss": 1.8474, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 1.6036168336868286, |
|
"eval_runtime": 632.9235, |
|
"eval_samples_per_second": 124.402, |
|
"eval_steps_per_second": 3.888, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5.546870588235293e-07, |
|
"loss": 1.8332, |
|
"step": 545000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.5995994806289673, |
|
"eval_runtime": 655.1294, |
|
"eval_samples_per_second": 120.185, |
|
"eval_steps_per_second": 3.757, |
|
"step": 545000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5.464517647058823e-07, |
|
"loss": 1.8409, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.6112017631530762, |
|
"eval_runtime": 703.7737, |
|
"eval_samples_per_second": 111.878, |
|
"eval_steps_per_second": 3.497, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5.382164705882353e-07, |
|
"loss": 1.8445, |
|
"step": 547000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.6046810150146484, |
|
"eval_runtime": 585.0127, |
|
"eval_samples_per_second": 134.59, |
|
"eval_steps_per_second": 4.207, |
|
"step": 547000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5.299894117647059e-07, |
|
"loss": 1.8394, |
|
"step": 548000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.6096245050430298, |
|
"eval_runtime": 618.7198, |
|
"eval_samples_per_second": 127.258, |
|
"eval_steps_per_second": 3.978, |
|
"step": 548000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5.217541176470589e-07, |
|
"loss": 1.844, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.5993245840072632, |
|
"eval_runtime": 595.419, |
|
"eval_samples_per_second": 132.238, |
|
"eval_steps_per_second": 4.133, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5.135270588235295e-07, |
|
"loss": 1.8311, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.6198428869247437, |
|
"eval_runtime": 630.0269, |
|
"eval_samples_per_second": 124.974, |
|
"eval_steps_per_second": 3.906, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5.052917647058826e-07, |
|
"loss": 1.8387, |
|
"step": 551000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.6028835773468018, |
|
"eval_runtime": 645.501, |
|
"eval_samples_per_second": 121.978, |
|
"eval_steps_per_second": 3.813, |
|
"step": 551000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.970564705882355e-07, |
|
"loss": 1.8366, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.6106973886489868, |
|
"eval_runtime": 600.5839, |
|
"eval_samples_per_second": 131.101, |
|
"eval_steps_per_second": 4.098, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.888294117647062e-07, |
|
"loss": 1.8402, |
|
"step": 553000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 1.608701467514038, |
|
"eval_runtime": 657.3776, |
|
"eval_samples_per_second": 119.774, |
|
"eval_steps_per_second": 3.744, |
|
"step": 553000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.806023529411768e-07, |
|
"loss": 1.8369, |
|
"step": 554000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.6089677810668945, |
|
"eval_runtime": 618.0145, |
|
"eval_samples_per_second": 127.403, |
|
"eval_steps_per_second": 3.982, |
|
"step": 554000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.723670588235297e-07, |
|
"loss": 1.8392, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.6029914617538452, |
|
"eval_runtime": 655.4895, |
|
"eval_samples_per_second": 120.119, |
|
"eval_steps_per_second": 3.754, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.641317647058821e-07, |
|
"loss": 1.8462, |
|
"step": 556000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.6060106754302979, |
|
"eval_runtime": 665.1863, |
|
"eval_samples_per_second": 118.368, |
|
"eval_steps_per_second": 3.7, |
|
"step": 556000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.558964705882351e-07, |
|
"loss": 1.8292, |
|
"step": 557000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.6043024063110352, |
|
"eval_runtime": 621.9745, |
|
"eval_samples_per_second": 126.592, |
|
"eval_steps_per_second": 3.957, |
|
"step": 557000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.4766117647058806e-07, |
|
"loss": 1.8376, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.6115914583206177, |
|
"eval_runtime": 629.9877, |
|
"eval_samples_per_second": 124.982, |
|
"eval_steps_per_second": 3.906, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.3943411764705876e-07, |
|
"loss": 1.8418, |
|
"step": 559000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.609912633895874, |
|
"eval_runtime": 671.4315, |
|
"eval_samples_per_second": 117.267, |
|
"eval_steps_per_second": 3.665, |
|
"step": 559000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.311988235294117e-07, |
|
"loss": 1.8454, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.5986706018447876, |
|
"eval_runtime": 632.0091, |
|
"eval_samples_per_second": 124.582, |
|
"eval_steps_per_second": 3.894, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.2629831155778895e-06, |
|
"loss": 1.8358, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.6113981008529663, |
|
"eval_runtime": 1373.4808, |
|
"eval_samples_per_second": 57.327, |
|
"eval_steps_per_second": 1.792, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.2580584924623117e-06, |
|
"loss": 1.8424, |
|
"step": 562000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 1.608789086341858, |
|
"eval_runtime": 1317.8964, |
|
"eval_samples_per_second": 59.744, |
|
"eval_steps_per_second": 1.867, |
|
"step": 562000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.2531387939698494e-06, |
|
"loss": 1.8396, |
|
"step": 563000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.602584719657898, |
|
"eval_runtime": 1403.3018, |
|
"eval_samples_per_second": 56.108, |
|
"eval_steps_per_second": 1.754, |
|
"step": 563000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.248219095477387e-06, |
|
"loss": 1.8367, |
|
"step": 564000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.6060080528259277, |
|
"eval_runtime": 1432.9745, |
|
"eval_samples_per_second": 54.947, |
|
"eval_steps_per_second": 1.717, |
|
"step": 564000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.2432944723618093e-06, |
|
"loss": 1.8533, |
|
"step": 565000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.6123038530349731, |
|
"eval_runtime": 1298.3906, |
|
"eval_samples_per_second": 60.642, |
|
"eval_steps_per_second": 1.895, |
|
"step": 565000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.2383698492462315e-06, |
|
"loss": 1.8436, |
|
"step": 566000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.612805724143982, |
|
"eval_runtime": 1365.4257, |
|
"eval_samples_per_second": 57.665, |
|
"eval_steps_per_second": 1.802, |
|
"step": 566000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.2334501507537692e-06, |
|
"loss": 1.8483, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.610816478729248, |
|
"eval_runtime": 1332.0876, |
|
"eval_samples_per_second": 59.108, |
|
"eval_steps_per_second": 1.847, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.228530452261307e-06, |
|
"loss": 1.8398, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.5979838371276855, |
|
"eval_runtime": 1364.0972, |
|
"eval_samples_per_second": 57.721, |
|
"eval_steps_per_second": 1.804, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.2236058291457287e-06, |
|
"loss": 1.8463, |
|
"step": 569000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.6000773906707764, |
|
"eval_runtime": 1360.1908, |
|
"eval_samples_per_second": 57.887, |
|
"eval_steps_per_second": 1.809, |
|
"step": 569000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.2186861306532664e-06, |
|
"loss": 1.8359, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.6006139516830444, |
|
"eval_runtime": 1317.5401, |
|
"eval_samples_per_second": 59.761, |
|
"eval_steps_per_second": 1.868, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.2137615075376886e-06, |
|
"loss": 1.84, |
|
"step": 571000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 1.5974223613739014, |
|
"eval_runtime": 1313.3637, |
|
"eval_samples_per_second": 59.951, |
|
"eval_steps_per_second": 1.874, |
|
"step": 571000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.2088368844221107e-06, |
|
"loss": 1.8376, |
|
"step": 572000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.6001969575881958, |
|
"eval_runtime": 1469.4741, |
|
"eval_samples_per_second": 53.582, |
|
"eval_steps_per_second": 1.675, |
|
"step": 572000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.2039171859296485e-06, |
|
"loss": 1.8449, |
|
"step": 573000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.607892394065857, |
|
"eval_runtime": 1422.4124, |
|
"eval_samples_per_second": 55.355, |
|
"eval_steps_per_second": 1.73, |
|
"step": 573000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.19899256281407e-06, |
|
"loss": 1.8446, |
|
"step": 574000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.6081199645996094, |
|
"eval_runtime": 1373.6037, |
|
"eval_samples_per_second": 57.321, |
|
"eval_steps_per_second": 1.792, |
|
"step": 574000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.1940679396984923e-06, |
|
"loss": 1.8439, |
|
"step": 575000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.608659267425537, |
|
"eval_runtime": 1439.9758, |
|
"eval_samples_per_second": 54.679, |
|
"eval_steps_per_second": 1.709, |
|
"step": 575000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.189143316582915e-06, |
|
"loss": 1.8482, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.6077545881271362, |
|
"eval_runtime": 1319.2178, |
|
"eval_samples_per_second": 59.685, |
|
"eval_steps_per_second": 1.865, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.1842236180904522e-06, |
|
"loss": 1.8439, |
|
"step": 577000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.6032804250717163, |
|
"eval_runtime": 1317.0887, |
|
"eval_samples_per_second": 59.781, |
|
"eval_steps_per_second": 1.869, |
|
"step": 577000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.1792989949748748e-06, |
|
"loss": 1.8321, |
|
"step": 578000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.6043972969055176, |
|
"eval_runtime": 1470.5701, |
|
"eval_samples_per_second": 53.542, |
|
"eval_steps_per_second": 1.674, |
|
"step": 578000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.1743743718592965e-06, |
|
"loss": 1.8419, |
|
"step": 579000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.605370283126831, |
|
"eval_runtime": 1314.2922, |
|
"eval_samples_per_second": 59.908, |
|
"eval_steps_per_second": 1.872, |
|
"step": 579000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.1694497487437186e-06, |
|
"loss": 1.8476, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.6129260063171387, |
|
"eval_runtime": 1361.9015, |
|
"eval_samples_per_second": 57.814, |
|
"eval_steps_per_second": 1.807, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.1645300502512564e-06, |
|
"loss": 1.8451, |
|
"step": 581000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.6105306148529053, |
|
"eval_runtime": 1364.3506, |
|
"eval_samples_per_second": 57.71, |
|
"eval_steps_per_second": 1.804, |
|
"step": 581000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.1596054271356785e-06, |
|
"loss": 1.8428, |
|
"step": 582000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.6004083156585693, |
|
"eval_runtime": 1425.3385, |
|
"eval_samples_per_second": 55.241, |
|
"eval_steps_per_second": 1.727, |
|
"step": 582000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.1546808040201002e-06, |
|
"loss": 1.8428, |
|
"step": 583000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.5961782932281494, |
|
"eval_runtime": 1316.8053, |
|
"eval_samples_per_second": 59.794, |
|
"eval_steps_per_second": 1.869, |
|
"step": 583000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.149761105527638e-06, |
|
"loss": 1.8397, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.614754319190979, |
|
"eval_runtime": 1364.4836, |
|
"eval_samples_per_second": 57.705, |
|
"eval_steps_per_second": 1.804, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.14483648241206e-06, |
|
"loss": 1.8391, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.611263632774353, |
|
"eval_runtime": 1427.9009, |
|
"eval_samples_per_second": 55.142, |
|
"eval_steps_per_second": 1.724, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.139916783919598e-06, |
|
"loss": 1.8429, |
|
"step": 586000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.5951673984527588, |
|
"eval_runtime": 1415.1329, |
|
"eval_samples_per_second": 55.639, |
|
"eval_steps_per_second": 1.739, |
|
"step": 586000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.13499216080402e-06, |
|
"loss": 1.8367, |
|
"step": 587000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.6043238639831543, |
|
"eval_runtime": 1472.8356, |
|
"eval_samples_per_second": 53.459, |
|
"eval_steps_per_second": 1.671, |
|
"step": 587000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.1300724623115578e-06, |
|
"loss": 1.8346, |
|
"step": 588000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.6021771430969238, |
|
"eval_runtime": 1366.1283, |
|
"eval_samples_per_second": 57.635, |
|
"eval_steps_per_second": 1.801, |
|
"step": 588000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.1251478391959803e-06, |
|
"loss": 1.8367, |
|
"step": 589000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 1.6067354679107666, |
|
"eval_runtime": 1371.6575, |
|
"eval_samples_per_second": 57.403, |
|
"eval_steps_per_second": 1.794, |
|
"step": 589000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.1202281407035177e-06, |
|
"loss": 1.8272, |
|
"step": 590000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.610526442527771, |
|
"eval_runtime": 1417.1973, |
|
"eval_samples_per_second": 55.558, |
|
"eval_steps_per_second": 1.737, |
|
"step": 590000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.1153035175879402e-06, |
|
"loss": 1.8354, |
|
"step": 591000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.6014180183410645, |
|
"eval_runtime": 1484.2434, |
|
"eval_samples_per_second": 53.049, |
|
"eval_steps_per_second": 1.658, |
|
"step": 591000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.110378894472362e-06, |
|
"loss": 1.8425, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.6036982536315918, |
|
"eval_runtime": 1357.7515, |
|
"eval_samples_per_second": 57.991, |
|
"eval_steps_per_second": 1.813, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.105464120603015e-06, |
|
"loss": 1.8375, |
|
"step": 593000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.601694941520691, |
|
"eval_runtime": 1426.4682, |
|
"eval_samples_per_second": 55.197, |
|
"eval_steps_per_second": 1.725, |
|
"step": 593000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.100539497487437e-06, |
|
"loss": 1.8217, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.605579137802124, |
|
"eval_runtime": 1363.4095, |
|
"eval_samples_per_second": 57.75, |
|
"eval_steps_per_second": 1.805, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.0956148743718596e-06, |
|
"loss": 1.8363, |
|
"step": 595000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.602707028388977, |
|
"eval_runtime": 1465.2017, |
|
"eval_samples_per_second": 53.738, |
|
"eval_steps_per_second": 1.68, |
|
"step": 595000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.0906902512562817e-06, |
|
"loss": 1.8389, |
|
"step": 596000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.6052539348602295, |
|
"eval_runtime": 1364.7712, |
|
"eval_samples_per_second": 57.692, |
|
"eval_steps_per_second": 1.803, |
|
"step": 596000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.0857656281407034e-06, |
|
"loss": 1.8345, |
|
"step": 597000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.6135693788528442, |
|
"eval_runtime": 1319.0075, |
|
"eval_samples_per_second": 59.694, |
|
"eval_steps_per_second": 1.866, |
|
"step": 597000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.0808410050251256e-06, |
|
"loss": 1.8364, |
|
"step": 598000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.595258116722107, |
|
"eval_runtime": 1370.9372, |
|
"eval_samples_per_second": 57.433, |
|
"eval_steps_per_second": 1.795, |
|
"step": 598000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.0759213065326633e-06, |
|
"loss": 1.839, |
|
"step": 599000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.6040674448013306, |
|
"eval_runtime": 1411.8985, |
|
"eval_samples_per_second": 55.767, |
|
"eval_steps_per_second": 1.743, |
|
"step": 599000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.0709966834170855e-06, |
|
"loss": 1.8401, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.5981929302215576, |
|
"eval_runtime": 1370.3726, |
|
"eval_samples_per_second": 57.457, |
|
"eval_steps_per_second": 1.796, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.066072060301508e-06, |
|
"loss": 1.8327, |
|
"step": 601000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.607890009880066, |
|
"eval_runtime": 1378.4994, |
|
"eval_samples_per_second": 57.118, |
|
"eval_steps_per_second": 1.785, |
|
"step": 601000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.061152361809045e-06, |
|
"loss": 1.8361, |
|
"step": 602000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.606207013130188, |
|
"eval_runtime": 1418.921, |
|
"eval_samples_per_second": 55.491, |
|
"eval_steps_per_second": 1.734, |
|
"step": 602000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.0562277386934675e-06, |
|
"loss": 1.8387, |
|
"step": 603000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.5986562967300415, |
|
"eval_runtime": 1440.6978, |
|
"eval_samples_per_second": 54.652, |
|
"eval_steps_per_second": 1.708, |
|
"step": 603000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.0513031155778896e-06, |
|
"loss": 1.8336, |
|
"step": 604000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.599565863609314, |
|
"eval_runtime": 1366.4669, |
|
"eval_samples_per_second": 57.621, |
|
"eval_steps_per_second": 1.801, |
|
"step": 604000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.0463784924623118e-06, |
|
"loss": 1.8457, |
|
"step": 605000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.6037099361419678, |
|
"eval_runtime": 1374.6709, |
|
"eval_samples_per_second": 57.277, |
|
"eval_steps_per_second": 1.79, |
|
"step": 605000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.0414587939698495e-06, |
|
"loss": 1.8418, |
|
"step": 606000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.596120834350586, |
|
"eval_runtime": 1313.4049, |
|
"eval_samples_per_second": 59.949, |
|
"eval_steps_per_second": 1.874, |
|
"step": 606000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.0365390954773873e-06, |
|
"loss": 1.8334, |
|
"step": 607000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 1.598099946975708, |
|
"eval_runtime": 1348.2663, |
|
"eval_samples_per_second": 58.399, |
|
"eval_steps_per_second": 1.825, |
|
"step": 607000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.031614472361809e-06, |
|
"loss": 1.8376, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.6052206754684448, |
|
"eval_runtime": 1454.3089, |
|
"eval_samples_per_second": 54.14, |
|
"eval_steps_per_second": 1.692, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.026689849246231e-06, |
|
"loss": 1.8387, |
|
"step": 609000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.5904994010925293, |
|
"eval_runtime": 1429.466, |
|
"eval_samples_per_second": 55.081, |
|
"eval_steps_per_second": 1.722, |
|
"step": 609000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.0217652261306533e-06, |
|
"loss": 1.829, |
|
"step": 610000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.6081486940383911, |
|
"eval_runtime": 1346.6097, |
|
"eval_samples_per_second": 58.471, |
|
"eval_steps_per_second": 1.828, |
|
"step": 610000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.016840603015076e-06, |
|
"loss": 1.8329, |
|
"step": 611000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.6047282218933105, |
|
"eval_runtime": 1326.3086, |
|
"eval_samples_per_second": 59.366, |
|
"eval_steps_per_second": 1.856, |
|
"step": 611000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.0119159798994975e-06, |
|
"loss": 1.8415, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.6007598638534546, |
|
"eval_runtime": 1302.4192, |
|
"eval_samples_per_second": 60.454, |
|
"eval_steps_per_second": 1.89, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.0069962814070357e-06, |
|
"loss": 1.8287, |
|
"step": 613000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.6105679273605347, |
|
"eval_runtime": 1347.332, |
|
"eval_samples_per_second": 58.439, |
|
"eval_steps_per_second": 1.827, |
|
"step": 613000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.0020716582914574e-06, |
|
"loss": 1.8327, |
|
"step": 614000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.607060194015503, |
|
"eval_runtime": 1305.8265, |
|
"eval_samples_per_second": 60.297, |
|
"eval_steps_per_second": 1.885, |
|
"step": 614000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.9971470351758796e-06, |
|
"loss": 1.8366, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 1.6026506423950195, |
|
"eval_runtime": 1335.1184, |
|
"eval_samples_per_second": 58.974, |
|
"eval_steps_per_second": 1.843, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.9922322613065325e-06, |
|
"loss": 1.8467, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.6091498136520386, |
|
"eval_runtime": 1322.5487, |
|
"eval_samples_per_second": 59.534, |
|
"eval_steps_per_second": 1.861, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.9873125628140703e-06, |
|
"loss": 1.8222, |
|
"step": 617000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.6029044389724731, |
|
"eval_runtime": 1467.5465, |
|
"eval_samples_per_second": 53.652, |
|
"eval_steps_per_second": 1.677, |
|
"step": 617000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.982387939698493e-06, |
|
"loss": 1.833, |
|
"step": 618000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.6027536392211914, |
|
"eval_runtime": 1350.4841, |
|
"eval_samples_per_second": 58.303, |
|
"eval_steps_per_second": 1.822, |
|
"step": 618000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.977463316582915e-06, |
|
"loss": 1.8324, |
|
"step": 619000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.5961847305297852, |
|
"eval_runtime": 1301.7193, |
|
"eval_samples_per_second": 60.487, |
|
"eval_steps_per_second": 1.891, |
|
"step": 619000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.9725386934673367e-06, |
|
"loss": 1.8227, |
|
"step": 620000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.5996179580688477, |
|
"eval_runtime": 1340.0152, |
|
"eval_samples_per_second": 58.758, |
|
"eval_steps_per_second": 1.837, |
|
"step": 620000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.967614070351759e-06, |
|
"loss": 1.8286, |
|
"step": 621000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.598938226699829, |
|
"eval_runtime": 1445.9352, |
|
"eval_samples_per_second": 54.454, |
|
"eval_steps_per_second": 1.702, |
|
"step": 621000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.962689447236181e-06, |
|
"loss": 1.8252, |
|
"step": 622000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.6006757020950317, |
|
"eval_runtime": 1407.7995, |
|
"eval_samples_per_second": 55.929, |
|
"eval_steps_per_second": 1.748, |
|
"step": 622000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.9577648241206035e-06, |
|
"loss": 1.8317, |
|
"step": 623000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.5989092588424683, |
|
"eval_runtime": 1327.983, |
|
"eval_samples_per_second": 59.291, |
|
"eval_steps_per_second": 1.853, |
|
"step": 623000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.952854974874372e-06, |
|
"loss": 1.8389, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 1.602870225906372, |
|
"eval_runtime": 1383.3273, |
|
"eval_samples_per_second": 56.919, |
|
"eval_steps_per_second": 1.779, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.9479303517587942e-06, |
|
"loss": 1.8345, |
|
"step": 625000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.6017168760299683, |
|
"eval_runtime": 1446.0375, |
|
"eval_samples_per_second": 54.45, |
|
"eval_steps_per_second": 1.702, |
|
"step": 625000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.9430057286432164e-06, |
|
"loss": 1.8345, |
|
"step": 626000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.5972059965133667, |
|
"eval_runtime": 1349.1112, |
|
"eval_samples_per_second": 58.362, |
|
"eval_steps_per_second": 1.824, |
|
"step": 626000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.938081105527638e-06, |
|
"loss": 1.8314, |
|
"step": 627000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.59882390499115, |
|
"eval_runtime": 1362.0292, |
|
"eval_samples_per_second": 57.809, |
|
"eval_steps_per_second": 1.807, |
|
"step": 627000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.9331564824120606e-06, |
|
"loss": 1.8314, |
|
"step": 628000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.5979121923446655, |
|
"eval_runtime": 1463.5496, |
|
"eval_samples_per_second": 53.799, |
|
"eval_steps_per_second": 1.682, |
|
"step": 628000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.9282318592964828e-06, |
|
"loss": 1.8395, |
|
"step": 629000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.601837158203125, |
|
"eval_runtime": 1388.4972, |
|
"eval_samples_per_second": 56.707, |
|
"eval_steps_per_second": 1.772, |
|
"step": 629000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.9233121608040205e-06, |
|
"loss": 1.8279, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.5932117700576782, |
|
"eval_runtime": 1304.0965, |
|
"eval_samples_per_second": 60.377, |
|
"eval_steps_per_second": 1.887, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.9183875376884427e-06, |
|
"loss": 1.8349, |
|
"step": 631000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.60895574092865, |
|
"eval_runtime": 1312.317, |
|
"eval_samples_per_second": 59.998, |
|
"eval_steps_per_second": 1.875, |
|
"step": 631000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.9134629145728644e-06, |
|
"loss": 1.838, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.6004664897918701, |
|
"eval_runtime": 1437.6414, |
|
"eval_samples_per_second": 54.768, |
|
"eval_steps_per_second": 1.712, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.9085382914572865e-06, |
|
"loss": 1.8316, |
|
"step": 633000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 1.599939227104187, |
|
"eval_runtime": 1452.9057, |
|
"eval_samples_per_second": 54.193, |
|
"eval_steps_per_second": 1.694, |
|
"step": 633000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.90362351758794e-06, |
|
"loss": 1.832, |
|
"step": 634000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.5970202684402466, |
|
"eval_runtime": 1303.8096, |
|
"eval_samples_per_second": 60.39, |
|
"eval_steps_per_second": 1.888, |
|
"step": 634000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.898698894472362e-06, |
|
"loss": 1.8472, |
|
"step": 635000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.5940812826156616, |
|
"eval_runtime": 1337.4473, |
|
"eval_samples_per_second": 58.871, |
|
"eval_steps_per_second": 1.84, |
|
"step": 635000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.8937742713567842e-06, |
|
"loss": 1.8398, |
|
"step": 636000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.5929611921310425, |
|
"eval_runtime": 1328.097, |
|
"eval_samples_per_second": 59.286, |
|
"eval_steps_per_second": 1.853, |
|
"step": 636000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.8888496482412059e-06, |
|
"loss": 1.8337, |
|
"step": 637000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.5947929620742798, |
|
"eval_runtime": 1389.3994, |
|
"eval_samples_per_second": 56.67, |
|
"eval_steps_per_second": 1.771, |
|
"step": 637000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.8839299497487437e-06, |
|
"loss": 1.8344, |
|
"step": 638000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.593649983406067, |
|
"eval_runtime": 1395.5343, |
|
"eval_samples_per_second": 56.421, |
|
"eval_steps_per_second": 1.763, |
|
"step": 638000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.8790053266331656e-06, |
|
"loss": 1.8294, |
|
"step": 639000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.5959663391113281, |
|
"eval_runtime": 1347.3949, |
|
"eval_samples_per_second": 58.436, |
|
"eval_steps_per_second": 1.826, |
|
"step": 639000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.8740807035175883e-06, |
|
"loss": 1.8142, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.5931050777435303, |
|
"eval_runtime": 1396.136, |
|
"eval_samples_per_second": 56.396, |
|
"eval_steps_per_second": 1.763, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.8691560804020103e-06, |
|
"loss": 1.8267, |
|
"step": 641000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.5900132656097412, |
|
"eval_runtime": 1363.8371, |
|
"eval_samples_per_second": 57.732, |
|
"eval_steps_per_second": 1.804, |
|
"step": 641000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.8642314572864322e-06, |
|
"loss": 1.8277, |
|
"step": 642000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 1.598568320274353, |
|
"eval_runtime": 1470.956, |
|
"eval_samples_per_second": 53.528, |
|
"eval_steps_per_second": 1.673, |
|
"step": 642000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.85931175879397e-06, |
|
"loss": 1.817, |
|
"step": 643000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.5886738300323486, |
|
"eval_runtime": 1396.8317, |
|
"eval_samples_per_second": 56.368, |
|
"eval_steps_per_second": 1.762, |
|
"step": 643000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.854387135678392e-06, |
|
"loss": 1.8217, |
|
"step": 644000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.5907179117202759, |
|
"eval_runtime": 1416.7398, |
|
"eval_samples_per_second": 55.576, |
|
"eval_steps_per_second": 1.737, |
|
"step": 644000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.8494674371859298e-06, |
|
"loss": 1.8254, |
|
"step": 645000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.6016545295715332, |
|
"eval_runtime": 1459.5293, |
|
"eval_samples_per_second": 53.947, |
|
"eval_steps_per_second": 1.686, |
|
"step": 645000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.844542814070352e-06, |
|
"loss": 1.8257, |
|
"step": 646000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.6024249792099, |
|
"eval_runtime": 1378.9773, |
|
"eval_samples_per_second": 57.098, |
|
"eval_steps_per_second": 1.785, |
|
"step": 646000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.83962311557789e-06, |
|
"loss": 1.83, |
|
"step": 647000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.6028681993484497, |
|
"eval_runtime": 1452.3606, |
|
"eval_samples_per_second": 54.213, |
|
"eval_steps_per_second": 1.694, |
|
"step": 647000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.8347034170854275e-06, |
|
"loss": 1.8204, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.599324345588684, |
|
"eval_runtime": 1463.7748, |
|
"eval_samples_per_second": 53.79, |
|
"eval_steps_per_second": 1.681, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.8297787939698492e-06, |
|
"loss": 1.8234, |
|
"step": 649000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.5928692817687988, |
|
"eval_runtime": 1330.8025, |
|
"eval_samples_per_second": 59.165, |
|
"eval_steps_per_second": 1.849, |
|
"step": 649000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.8248590954773874e-06, |
|
"loss": 1.834, |
|
"step": 650000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.5874714851379395, |
|
"eval_runtime": 1348.2089, |
|
"eval_samples_per_second": 58.401, |
|
"eval_steps_per_second": 1.825, |
|
"step": 650000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.8199344723618089e-06, |
|
"loss": 1.8159, |
|
"step": 651000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.5897165536880493, |
|
"eval_runtime": 1380.8115, |
|
"eval_samples_per_second": 57.022, |
|
"eval_steps_per_second": 1.782, |
|
"step": 651000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.8150098492462312e-06, |
|
"loss": 1.8282, |
|
"step": 652000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.5937752723693848, |
|
"eval_runtime": 1430.1609, |
|
"eval_samples_per_second": 55.055, |
|
"eval_steps_per_second": 1.721, |
|
"step": 652000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.8100852261306536e-06, |
|
"loss": 1.8179, |
|
"step": 653000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.5979640483856201, |
|
"eval_runtime": 1317.4556, |
|
"eval_samples_per_second": 59.764, |
|
"eval_steps_per_second": 1.868, |
|
"step": 653000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.805165527638191e-06, |
|
"loss": 1.8235, |
|
"step": 654000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.596608281135559, |
|
"eval_runtime": 1413.1903, |
|
"eval_samples_per_second": 55.716, |
|
"eval_steps_per_second": 1.741, |
|
"step": 654000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.8002409045226133e-06, |
|
"loss": 1.8264, |
|
"step": 655000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.5875301361083984, |
|
"eval_runtime": 1474.9742, |
|
"eval_samples_per_second": 53.382, |
|
"eval_steps_per_second": 1.669, |
|
"step": 655000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7953212060301506e-06, |
|
"loss": 1.8279, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.5822259187698364, |
|
"eval_runtime": 1434.0813, |
|
"eval_samples_per_second": 54.904, |
|
"eval_steps_per_second": 1.716, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7903965829145732e-06, |
|
"loss": 1.8275, |
|
"step": 657000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.5921976566314697, |
|
"eval_runtime": 1303.7483, |
|
"eval_samples_per_second": 60.393, |
|
"eval_steps_per_second": 1.888, |
|
"step": 657000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7854719597989953e-06, |
|
"loss": 1.8201, |
|
"step": 658000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.5922507047653198, |
|
"eval_runtime": 1451.3783, |
|
"eval_samples_per_second": 54.25, |
|
"eval_steps_per_second": 1.696, |
|
"step": 658000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7805522613065333e-06, |
|
"loss": 1.8343, |
|
"step": 659000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.5992974042892456, |
|
"eval_runtime": 1312.6672, |
|
"eval_samples_per_second": 59.982, |
|
"eval_steps_per_second": 1.875, |
|
"step": 659000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7756276381909552e-06, |
|
"loss": 1.8252, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.5954718589782715, |
|
"eval_runtime": 1329.4403, |
|
"eval_samples_per_second": 59.226, |
|
"eval_steps_per_second": 1.851, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7707030150753767e-06, |
|
"loss": 1.8247, |
|
"step": 661000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.5888080596923828, |
|
"eval_runtime": 1470.1668, |
|
"eval_samples_per_second": 53.557, |
|
"eval_steps_per_second": 1.674, |
|
"step": 661000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.765778391959799e-06, |
|
"loss": 1.8241, |
|
"step": 662000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.5860859155654907, |
|
"eval_runtime": 1332.2427, |
|
"eval_samples_per_second": 59.101, |
|
"eval_steps_per_second": 1.847, |
|
"step": 662000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7608537688442214e-06, |
|
"loss": 1.8299, |
|
"step": 663000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.592593789100647, |
|
"eval_runtime": 1407.4541, |
|
"eval_samples_per_second": 55.943, |
|
"eval_steps_per_second": 1.749, |
|
"step": 663000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.755934070351759e-06, |
|
"loss": 1.8337, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.5933688879013062, |
|
"eval_runtime": 1320.4107, |
|
"eval_samples_per_second": 59.631, |
|
"eval_steps_per_second": 1.864, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.751009447236181e-06, |
|
"loss": 1.8244, |
|
"step": 665000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.599759817123413, |
|
"eval_runtime": 1322.9928, |
|
"eval_samples_per_second": 59.514, |
|
"eval_steps_per_second": 1.86, |
|
"step": 665000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7460897487437184e-06, |
|
"loss": 1.8275, |
|
"step": 666000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.5976141691207886, |
|
"eval_runtime": 1363.02, |
|
"eval_samples_per_second": 57.767, |
|
"eval_steps_per_second": 1.806, |
|
"step": 666000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7411700502512564e-06, |
|
"loss": 1.8287, |
|
"step": 667000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.579295039176941, |
|
"eval_runtime": 1381.0984, |
|
"eval_samples_per_second": 57.01, |
|
"eval_steps_per_second": 1.782, |
|
"step": 667000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7362454271356783e-06, |
|
"loss": 1.8293, |
|
"step": 668000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.5919257402420044, |
|
"eval_runtime": 1393.4561, |
|
"eval_samples_per_second": 56.505, |
|
"eval_steps_per_second": 1.766, |
|
"step": 668000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.7313208040201006e-06, |
|
"loss": 1.8171, |
|
"step": 669000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 1.5862181186676025, |
|
"eval_runtime": 1394.7193, |
|
"eval_samples_per_second": 56.454, |
|
"eval_steps_per_second": 1.765, |
|
"step": 669000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.726396180904523e-06, |
|
"loss": 1.8196, |
|
"step": 670000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.6017342805862427, |
|
"eval_runtime": 1385.2253, |
|
"eval_samples_per_second": 56.841, |
|
"eval_steps_per_second": 1.777, |
|
"step": 670000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.7214715577889445e-06, |
|
"loss": 1.8256, |
|
"step": 671000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.6013214588165283, |
|
"eval_runtime": 1435.0073, |
|
"eval_samples_per_second": 54.869, |
|
"eval_steps_per_second": 1.715, |
|
"step": 671000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.7165567839195985e-06, |
|
"loss": 1.8294, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.5915107727050781, |
|
"eval_runtime": 1381.2373, |
|
"eval_samples_per_second": 57.005, |
|
"eval_steps_per_second": 1.782, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.71163216080402e-06, |
|
"loss": 1.8299, |
|
"step": 673000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.5885646343231201, |
|
"eval_runtime": 1495.9255, |
|
"eval_samples_per_second": 52.634, |
|
"eval_steps_per_second": 1.645, |
|
"step": 673000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.7067075376884424e-06, |
|
"loss": 1.8264, |
|
"step": 674000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.5896543264389038, |
|
"eval_runtime": 1438.4722, |
|
"eval_samples_per_second": 54.737, |
|
"eval_steps_per_second": 1.711, |
|
"step": 674000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.7017878391959803e-06, |
|
"loss": 1.8256, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.5811702013015747, |
|
"eval_runtime": 1475.0011, |
|
"eval_samples_per_second": 53.381, |
|
"eval_steps_per_second": 1.668, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.6968632160804023e-06, |
|
"loss": 1.8179, |
|
"step": 676000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.5885577201843262, |
|
"eval_runtime": 1330.732, |
|
"eval_samples_per_second": 59.168, |
|
"eval_steps_per_second": 1.849, |
|
"step": 676000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.6919385929648242e-06, |
|
"loss": 1.8288, |
|
"step": 677000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.57944917678833, |
|
"eval_runtime": 1356.8541, |
|
"eval_samples_per_second": 58.029, |
|
"eval_steps_per_second": 1.814, |
|
"step": 677000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.6870139698492465e-06, |
|
"loss": 1.8225, |
|
"step": 678000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 1.5852633714675903, |
|
"eval_runtime": 1411.1498, |
|
"eval_samples_per_second": 55.796, |
|
"eval_steps_per_second": 1.744, |
|
"step": 678000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6820893467336684e-06, |
|
"loss": 1.8151, |
|
"step": 679000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.6006282567977905, |
|
"eval_runtime": 1433.5991, |
|
"eval_samples_per_second": 54.923, |
|
"eval_steps_per_second": 1.717, |
|
"step": 679000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6771696482412064e-06, |
|
"loss": 1.8326, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.5857617855072021, |
|
"eval_runtime": 1370.4186, |
|
"eval_samples_per_second": 57.455, |
|
"eval_steps_per_second": 1.796, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6722450251256286e-06, |
|
"loss": 1.8248, |
|
"step": 681000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.5932334661483765, |
|
"eval_runtime": 1475.8822, |
|
"eval_samples_per_second": 53.349, |
|
"eval_steps_per_second": 1.667, |
|
"step": 681000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6673204020100503e-06, |
|
"loss": 1.8206, |
|
"step": 682000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.5989573001861572, |
|
"eval_runtime": 1449.7957, |
|
"eval_samples_per_second": 54.309, |
|
"eval_steps_per_second": 1.697, |
|
"step": 682000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.662400703517588e-06, |
|
"loss": 1.8309, |
|
"step": 683000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.59175705909729, |
|
"eval_runtime": 1380.8863, |
|
"eval_samples_per_second": 57.019, |
|
"eval_steps_per_second": 1.782, |
|
"step": 683000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6574760804020102e-06, |
|
"loss": 1.8219, |
|
"step": 684000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.5923930406570435, |
|
"eval_runtime": 1358.5234, |
|
"eval_samples_per_second": 57.958, |
|
"eval_steps_per_second": 1.812, |
|
"step": 684000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6525563819095481e-06, |
|
"loss": 1.8155, |
|
"step": 685000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.5951777696609497, |
|
"eval_runtime": 1366.1742, |
|
"eval_samples_per_second": 57.633, |
|
"eval_steps_per_second": 1.801, |
|
"step": 685000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.64763175879397e-06, |
|
"loss": 1.8192, |
|
"step": 686000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.5806286334991455, |
|
"eval_runtime": 1380.6238, |
|
"eval_samples_per_second": 57.03, |
|
"eval_steps_per_second": 1.783, |
|
"step": 686000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.6427071356783916e-06, |
|
"loss": 1.8175, |
|
"step": 687000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 1.587588906288147, |
|
"eval_runtime": 1441.4618, |
|
"eval_samples_per_second": 54.623, |
|
"eval_steps_per_second": 1.707, |
|
"step": 687000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6377825125628143e-06, |
|
"loss": 1.8198, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.5877431631088257, |
|
"eval_runtime": 1325.2917, |
|
"eval_samples_per_second": 59.411, |
|
"eval_steps_per_second": 1.857, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6328578894472362e-06, |
|
"loss": 1.8172, |
|
"step": 689000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.5791815519332886, |
|
"eval_runtime": 1344.8655, |
|
"eval_samples_per_second": 58.546, |
|
"eval_steps_per_second": 1.83, |
|
"step": 689000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6279381909547742e-06, |
|
"loss": 1.8135, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.591376543045044, |
|
"eval_runtime": 1410.6096, |
|
"eval_samples_per_second": 55.818, |
|
"eval_steps_per_second": 1.745, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6230135678391964e-06, |
|
"loss": 1.824, |
|
"step": 691000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.584437370300293, |
|
"eval_runtime": 1318.433, |
|
"eval_samples_per_second": 59.72, |
|
"eval_steps_per_second": 1.867, |
|
"step": 691000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6180938693467341e-06, |
|
"loss": 1.8146, |
|
"step": 692000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.5931404829025269, |
|
"eval_runtime": 1360.7166, |
|
"eval_samples_per_second": 57.864, |
|
"eval_steps_per_second": 1.809, |
|
"step": 692000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.6131692462311558e-06, |
|
"loss": 1.8215, |
|
"step": 693000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.5878210067749023, |
|
"eval_runtime": 1361.6065, |
|
"eval_samples_per_second": 57.827, |
|
"eval_steps_per_second": 1.807, |
|
"step": 693000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.608244623115578e-06, |
|
"loss": 1.8125, |
|
"step": 694000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.5885907411575317, |
|
"eval_runtime": 1346.9108, |
|
"eval_samples_per_second": 58.457, |
|
"eval_steps_per_second": 1.827, |
|
"step": 694000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.603324924623116e-06, |
|
"loss": 1.8284, |
|
"step": 695000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.579284906387329, |
|
"eval_runtime": 1419.5492, |
|
"eval_samples_per_second": 55.466, |
|
"eval_steps_per_second": 1.734, |
|
"step": 695000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.5984003015075379e-06, |
|
"loss": 1.8155, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 1.590699553489685, |
|
"eval_runtime": 1311.3869, |
|
"eval_samples_per_second": 60.041, |
|
"eval_steps_per_second": 1.877, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.5934855276381908e-06, |
|
"loss": 1.8215, |
|
"step": 697000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.5784730911254883, |
|
"eval_runtime": 1345.5637, |
|
"eval_samples_per_second": 58.516, |
|
"eval_steps_per_second": 1.829, |
|
"step": 697000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.5885609045226134e-06, |
|
"loss": 1.8356, |
|
"step": 698000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.5905190706253052, |
|
"eval_runtime": 1329.0593, |
|
"eval_samples_per_second": 59.243, |
|
"eval_steps_per_second": 1.852, |
|
"step": 698000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.5836362814070355e-06, |
|
"loss": 1.8121, |
|
"step": 699000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.5879490375518799, |
|
"eval_runtime": 1353.1981, |
|
"eval_samples_per_second": 58.186, |
|
"eval_steps_per_second": 1.819, |
|
"step": 699000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.5787116582914572e-06, |
|
"loss": 1.8282, |
|
"step": 700000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 1.589970350265503, |
|
"eval_runtime": 1320.5704, |
|
"eval_samples_per_second": 59.623, |
|
"eval_steps_per_second": 1.864, |
|
"step": 700000 |
|
} |
|
], |
|
"max_steps": 1000000, |
|
"num_train_epochs": 2, |
|
"total_flos": 1.6795290993650842e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|