|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 21.650061500615006, |
|
"global_step": 316827, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.840909090909091e-06, |
|
"loss": 4.4773, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0005, |
|
"loss": 4.2748, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_loss": 5.831331253051758, |
|
"eval_max_distance": 201, |
|
"eval_mean_distance": 116, |
|
"eval_runtime": 16.9557, |
|
"eval_samples_per_second": 29.489, |
|
"eval_steps_per_second": 0.59, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.001, |
|
"loss": 3.2983, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 5.142276287078857, |
|
"eval_max_distance": 186, |
|
"eval_mean_distance": 102, |
|
"eval_runtime": 14.0636, |
|
"eval_samples_per_second": 35.553, |
|
"eval_steps_per_second": 0.711, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0009949384562291498, |
|
"loss": 2.9967, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 4.896122932434082, |
|
"eval_max_distance": 196, |
|
"eval_mean_distance": 104, |
|
"eval_runtime": 15.2018, |
|
"eval_samples_per_second": 32.891, |
|
"eval_steps_per_second": 0.658, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0009898769124582999, |
|
"loss": 2.9064, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_loss": 4.704981803894043, |
|
"eval_max_distance": 171, |
|
"eval_mean_distance": 99, |
|
"eval_runtime": 14.8835, |
|
"eval_samples_per_second": 33.594, |
|
"eval_steps_per_second": 0.672, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0009848153686874497, |
|
"loss": 2.8444, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 4.648359775543213, |
|
"eval_max_distance": 173, |
|
"eval_mean_distance": 99, |
|
"eval_runtime": 13.5358, |
|
"eval_samples_per_second": 36.939, |
|
"eval_steps_per_second": 0.739, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0009797538249165995, |
|
"loss": 2.7359, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 4.639862537384033, |
|
"eval_max_distance": 200, |
|
"eval_mean_distance": 96, |
|
"eval_runtime": 13.4266, |
|
"eval_samples_per_second": 37.239, |
|
"eval_steps_per_second": 0.745, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0009746922811457495, |
|
"loss": 2.667, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 4.517767906188965, |
|
"eval_max_distance": 160, |
|
"eval_mean_distance": 93, |
|
"eval_runtime": 13.7165, |
|
"eval_samples_per_second": 36.453, |
|
"eval_steps_per_second": 0.729, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0009696307373748994, |
|
"loss": 2.6454, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_loss": 4.468171119689941, |
|
"eval_max_distance": 154, |
|
"eval_mean_distance": 92, |
|
"eval_runtime": 14.5339, |
|
"eval_samples_per_second": 34.402, |
|
"eval_steps_per_second": 0.688, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0009645691936040492, |
|
"loss": 2.5737, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 4.346231460571289, |
|
"eval_max_distance": 151, |
|
"eval_mean_distance": 91, |
|
"eval_runtime": 13.9596, |
|
"eval_samples_per_second": 35.818, |
|
"eval_steps_per_second": 0.716, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0009595076498331991, |
|
"loss": 2.5569, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 4.252463340759277, |
|
"eval_max_distance": 166, |
|
"eval_mean_distance": 92, |
|
"eval_runtime": 13.7296, |
|
"eval_samples_per_second": 36.418, |
|
"eval_steps_per_second": 0.728, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0009544461060623491, |
|
"loss": 2.5114, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 4.216597557067871, |
|
"eval_max_distance": 164, |
|
"eval_mean_distance": 90, |
|
"eval_runtime": 14.6796, |
|
"eval_samples_per_second": 34.061, |
|
"eval_steps_per_second": 0.681, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0009493845622914989, |
|
"loss": 2.4448, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 4.229903697967529, |
|
"eval_max_distance": 151, |
|
"eval_mean_distance": 90, |
|
"eval_runtime": 14.5473, |
|
"eval_samples_per_second": 34.371, |
|
"eval_steps_per_second": 0.687, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0009443230185206488, |
|
"loss": 2.3847, |
|
"step": 2288 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 4.108386039733887, |
|
"eval_max_distance": 146, |
|
"eval_mean_distance": 90, |
|
"eval_runtime": 14.7702, |
|
"eval_samples_per_second": 33.852, |
|
"eval_steps_per_second": 0.677, |
|
"step": 2288 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0009392614747497988, |
|
"loss": 2.1415, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 3.6208255290985107, |
|
"eval_max_distance": 130, |
|
"eval_mean_distance": 78, |
|
"eval_runtime": 15.5259, |
|
"eval_samples_per_second": 32.204, |
|
"eval_steps_per_second": 0.644, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0009341999309789486, |
|
"loss": 1.0359, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.5739717483520508, |
|
"eval_max_distance": 139, |
|
"eval_mean_distance": 41, |
|
"eval_runtime": 17.5626, |
|
"eval_samples_per_second": 28.47, |
|
"eval_steps_per_second": 0.569, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0009291383872080985, |
|
"loss": 0.5688, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.293850064277649, |
|
"eval_max_distance": 127, |
|
"eval_mean_distance": 35, |
|
"eval_runtime": 13.8285, |
|
"eval_samples_per_second": 36.157, |
|
"eval_steps_per_second": 0.723, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0009240768434372484, |
|
"loss": 0.5115, |
|
"step": 2992 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 1.188537836074829, |
|
"eval_max_distance": 120, |
|
"eval_mean_distance": 32, |
|
"eval_runtime": 15.477, |
|
"eval_samples_per_second": 32.306, |
|
"eval_steps_per_second": 0.646, |
|
"step": 2992 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0009190152996663983, |
|
"loss": 0.4662, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 1.1915712356567383, |
|
"eval_max_distance": 127, |
|
"eval_mean_distance": 32, |
|
"eval_runtime": 16.2261, |
|
"eval_samples_per_second": 30.815, |
|
"eval_steps_per_second": 0.616, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0009139537558955482, |
|
"loss": 0.4401, |
|
"step": 3344 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.15479576587677, |
|
"eval_max_distance": 126, |
|
"eval_mean_distance": 31, |
|
"eval_runtime": 16.6846, |
|
"eval_samples_per_second": 29.968, |
|
"eval_steps_per_second": 0.599, |
|
"step": 3344 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0009088922121246981, |
|
"loss": 0.408, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 1.1046110391616821, |
|
"eval_max_distance": 103, |
|
"eval_mean_distance": 29, |
|
"eval_runtime": 14.9865, |
|
"eval_samples_per_second": 33.363, |
|
"eval_steps_per_second": 0.667, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.000903830668353848, |
|
"loss": 0.3858, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 0.9291953444480896, |
|
"eval_max_distance": 135, |
|
"eval_mean_distance": 26, |
|
"eval_runtime": 13.5564, |
|
"eval_samples_per_second": 36.883, |
|
"eval_steps_per_second": 0.738, |
|
"step": 3696 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0008987691245829979, |
|
"loss": 0.3586, |
|
"step": 3872 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 0.7040215730667114, |
|
"eval_max_distance": 115, |
|
"eval_mean_distance": 21, |
|
"eval_runtime": 15.3203, |
|
"eval_samples_per_second": 32.636, |
|
"eval_steps_per_second": 0.653, |
|
"step": 3872 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0008937075808121477, |
|
"loss": 0.3237, |
|
"step": 4048 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 0.5791512131690979, |
|
"eval_max_distance": 139, |
|
"eval_mean_distance": 17, |
|
"eval_runtime": 14.8657, |
|
"eval_samples_per_second": 33.634, |
|
"eval_steps_per_second": 0.673, |
|
"step": 4048 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0008886460370412976, |
|
"loss": 0.2783, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 0.4473143219947815, |
|
"eval_max_distance": 122, |
|
"eval_mean_distance": 14, |
|
"eval_runtime": 13.1694, |
|
"eval_samples_per_second": 37.967, |
|
"eval_steps_per_second": 0.759, |
|
"step": 4224 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0008835844932704476, |
|
"loss": 0.2666, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 0.3882352411746979, |
|
"eval_max_distance": 125, |
|
"eval_mean_distance": 11, |
|
"eval_runtime": 14.0887, |
|
"eval_samples_per_second": 35.489, |
|
"eval_steps_per_second": 0.71, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0008785229494995974, |
|
"loss": 0.249, |
|
"step": 4576 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 0.3606760799884796, |
|
"eval_max_distance": 126, |
|
"eval_mean_distance": 11, |
|
"eval_runtime": 13.741, |
|
"eval_samples_per_second": 36.388, |
|
"eval_steps_per_second": 0.728, |
|
"step": 4576 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0008734614057287473, |
|
"loss": 0.2371, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 0.3074778616428375, |
|
"eval_max_distance": 111, |
|
"eval_mean_distance": 9, |
|
"eval_runtime": 13.0071, |
|
"eval_samples_per_second": 38.441, |
|
"eval_steps_per_second": 0.769, |
|
"step": 4752 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0008683998619578973, |
|
"loss": 0.2231, |
|
"step": 4928 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 0.2844279706478119, |
|
"eval_max_distance": 123, |
|
"eval_mean_distance": 8, |
|
"eval_runtime": 12.8112, |
|
"eval_samples_per_second": 39.028, |
|
"eval_steps_per_second": 0.781, |
|
"step": 4928 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0008633383181870471, |
|
"loss": 0.2114, |
|
"step": 5104 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 0.27491652965545654, |
|
"eval_max_distance": 133, |
|
"eval_mean_distance": 8, |
|
"eval_runtime": 11.3043, |
|
"eval_samples_per_second": 44.231, |
|
"eval_steps_per_second": 0.885, |
|
"step": 5104 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.000858276774416197, |
|
"loss": 0.1946, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 0.2683752775192261, |
|
"eval_max_distance": 124, |
|
"eval_mean_distance": 8, |
|
"eval_runtime": 11.6576, |
|
"eval_samples_per_second": 42.891, |
|
"eval_steps_per_second": 0.858, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.000853215230645347, |
|
"loss": 0.1893, |
|
"step": 5456 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 0.25331711769104004, |
|
"eval_max_distance": 129, |
|
"eval_mean_distance": 7, |
|
"eval_runtime": 11.955, |
|
"eval_samples_per_second": 41.824, |
|
"eval_steps_per_second": 0.836, |
|
"step": 5456 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0008481536868744968, |
|
"loss": 0.1865, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 0.22615253925323486, |
|
"eval_max_distance": 129, |
|
"eval_mean_distance": 7, |
|
"eval_runtime": 10.581, |
|
"eval_samples_per_second": 47.255, |
|
"eval_steps_per_second": 0.945, |
|
"step": 5632 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0008430921431036467, |
|
"loss": 0.1769, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 0.22688022255897522, |
|
"eval_max_distance": 137, |
|
"eval_mean_distance": 6, |
|
"eval_runtime": 10.5367, |
|
"eval_samples_per_second": 47.453, |
|
"eval_steps_per_second": 0.949, |
|
"step": 5808 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0008380305993327965, |
|
"loss": 0.172, |
|
"step": 5984 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 0.21938543021678925, |
|
"eval_max_distance": 129, |
|
"eval_mean_distance": 6, |
|
"eval_runtime": 10.9867, |
|
"eval_samples_per_second": 45.51, |
|
"eval_steps_per_second": 0.91, |
|
"step": 5984 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0008329690555619465, |
|
"loss": 0.1594, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_loss": 0.20960509777069092, |
|
"eval_max_distance": 138, |
|
"eval_mean_distance": 6, |
|
"eval_runtime": 8.8034, |
|
"eval_samples_per_second": 56.796, |
|
"eval_steps_per_second": 1.136, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0008279075117910963, |
|
"loss": 0.158, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 0.20247827470302582, |
|
"eval_max_distance": 122, |
|
"eval_mean_distance": 6, |
|
"eval_runtime": 9.7934, |
|
"eval_samples_per_second": 51.055, |
|
"eval_steps_per_second": 1.021, |
|
"step": 6336 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0008228459680202461, |
|
"loss": 0.1436, |
|
"step": 6512 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 0.20747961103916168, |
|
"eval_max_distance": 128, |
|
"eval_mean_distance": 6, |
|
"eval_runtime": 10.4315, |
|
"eval_samples_per_second": 47.932, |
|
"eval_steps_per_second": 0.959, |
|
"step": 6512 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.000817784424249396, |
|
"loss": 0.1398, |
|
"step": 6688 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 0.19245657324790955, |
|
"eval_max_distance": 128, |
|
"eval_mean_distance": 6, |
|
"eval_runtime": 9.4807, |
|
"eval_samples_per_second": 52.739, |
|
"eval_steps_per_second": 1.055, |
|
"step": 6688 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.000812722880478546, |
|
"loss": 0.1326, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 0.1919807493686676, |
|
"eval_max_distance": 125, |
|
"eval_mean_distance": 6, |
|
"eval_runtime": 9.3305, |
|
"eval_samples_per_second": 53.588, |
|
"eval_steps_per_second": 1.072, |
|
"step": 6864 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0008076613367076958, |
|
"loss": 0.1321, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 0.18875986337661743, |
|
"eval_max_distance": 135, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 7.8177, |
|
"eval_samples_per_second": 63.958, |
|
"eval_steps_per_second": 1.279, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0008025997929368457, |
|
"loss": 0.1255, |
|
"step": 7216 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 0.18568550050258636, |
|
"eval_max_distance": 144, |
|
"eval_mean_distance": 6, |
|
"eval_runtime": 9.4575, |
|
"eval_samples_per_second": 52.868, |
|
"eval_steps_per_second": 1.057, |
|
"step": 7216 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0007975382491659956, |
|
"loss": 0.1257, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 0.18572939932346344, |
|
"eval_max_distance": 113, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 7.6787, |
|
"eval_samples_per_second": 65.115, |
|
"eval_steps_per_second": 1.302, |
|
"step": 7392 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0007924767053951455, |
|
"loss": 0.1175, |
|
"step": 7568 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 0.17028774321079254, |
|
"eval_max_distance": 124, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 7.3771, |
|
"eval_samples_per_second": 67.777, |
|
"eval_steps_per_second": 1.356, |
|
"step": 7568 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0007874151616242954, |
|
"loss": 0.1109, |
|
"step": 7744 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 0.1755068004131317, |
|
"eval_max_distance": 126, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 7.0941, |
|
"eval_samples_per_second": 70.482, |
|
"eval_steps_per_second": 1.41, |
|
"step": 7744 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0007823536178534452, |
|
"loss": 0.1065, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 0.17363803088665009, |
|
"eval_max_distance": 128, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 8.9523, |
|
"eval_samples_per_second": 55.852, |
|
"eval_steps_per_second": 1.117, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0007772920740825952, |
|
"loss": 0.104, |
|
"step": 8096 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 0.16887737810611725, |
|
"eval_max_distance": 129, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 8.6778, |
|
"eval_samples_per_second": 57.619, |
|
"eval_steps_per_second": 1.152, |
|
"step": 8096 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0007722305303117451, |
|
"loss": 0.0997, |
|
"step": 8272 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 0.16253332793712616, |
|
"eval_max_distance": 117, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 9.3982, |
|
"eval_samples_per_second": 53.202, |
|
"eval_steps_per_second": 1.064, |
|
"step": 8272 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0007671689865408949, |
|
"loss": 0.0993, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 0.16519272327423096, |
|
"eval_max_distance": 120, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 8.3081, |
|
"eval_samples_per_second": 60.182, |
|
"eval_steps_per_second": 1.204, |
|
"step": 8448 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0007621074427700448, |
|
"loss": 0.0962, |
|
"step": 8624 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 0.16153652966022491, |
|
"eval_max_distance": 113, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 8.3339, |
|
"eval_samples_per_second": 59.996, |
|
"eval_steps_per_second": 1.2, |
|
"step": 8624 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0007570458989991948, |
|
"loss": 0.0904, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_loss": 0.1528134047985077, |
|
"eval_max_distance": 131, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 8.605, |
|
"eval_samples_per_second": 58.106, |
|
"eval_steps_per_second": 1.162, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0007519843552283446, |
|
"loss": 0.0873, |
|
"step": 8976 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 0.15612851083278656, |
|
"eval_max_distance": 106, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 9.222, |
|
"eval_samples_per_second": 54.218, |
|
"eval_steps_per_second": 1.084, |
|
"step": 8976 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0007469228114574945, |
|
"loss": 0.0858, |
|
"step": 9152 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 0.15163549780845642, |
|
"eval_max_distance": 137, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 8.6808, |
|
"eval_samples_per_second": 57.598, |
|
"eval_steps_per_second": 1.152, |
|
"step": 9152 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0007418612676866445, |
|
"loss": 0.082, |
|
"step": 9328 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 0.1514330506324768, |
|
"eval_max_distance": 136, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 8.6904, |
|
"eval_samples_per_second": 57.535, |
|
"eval_steps_per_second": 1.151, |
|
"step": 9328 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0007367997239157943, |
|
"loss": 0.0766, |
|
"step": 9504 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 0.15236127376556396, |
|
"eval_max_distance": 148, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 8.5887, |
|
"eval_samples_per_second": 58.216, |
|
"eval_steps_per_second": 1.164, |
|
"step": 9504 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0007317381801449442, |
|
"loss": 0.075, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 0.14720940589904785, |
|
"eval_max_distance": 138, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 8.12, |
|
"eval_samples_per_second": 61.576, |
|
"eval_steps_per_second": 1.232, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.000726676636374094, |
|
"loss": 0.0785, |
|
"step": 9856 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 0.15077313780784607, |
|
"eval_max_distance": 134, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 7.0224, |
|
"eval_samples_per_second": 71.201, |
|
"eval_steps_per_second": 1.424, |
|
"step": 9856 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.000721615092603244, |
|
"loss": 0.0696, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 0.1496003121137619, |
|
"eval_max_distance": 147, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 8.4446, |
|
"eval_samples_per_second": 59.209, |
|
"eval_steps_per_second": 1.184, |
|
"step": 10032 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0007165535488323939, |
|
"loss": 0.0697, |
|
"step": 10208 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 0.14819692075252533, |
|
"eval_max_distance": 133, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 7.7516, |
|
"eval_samples_per_second": 64.503, |
|
"eval_steps_per_second": 1.29, |
|
"step": 10208 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0007114920050615437, |
|
"loss": 0.0654, |
|
"step": 10384 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 0.14708730578422546, |
|
"eval_max_distance": 109, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 7.7796, |
|
"eval_samples_per_second": 64.271, |
|
"eval_steps_per_second": 1.285, |
|
"step": 10384 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0007064304612906937, |
|
"loss": 0.0656, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_loss": 0.14370670914649963, |
|
"eval_max_distance": 111, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 7.3904, |
|
"eval_samples_per_second": 67.655, |
|
"eval_steps_per_second": 1.353, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0007013689175198436, |
|
"loss": 0.0617, |
|
"step": 10736 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 0.1476200520992279, |
|
"eval_max_distance": 127, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 8.6231, |
|
"eval_samples_per_second": 57.984, |
|
"eval_steps_per_second": 1.16, |
|
"step": 10736 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0006963073737489934, |
|
"loss": 0.0619, |
|
"step": 10912 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 0.14763715863227844, |
|
"eval_max_distance": 122, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 8.0149, |
|
"eval_samples_per_second": 62.384, |
|
"eval_steps_per_second": 1.248, |
|
"step": 10912 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0006912458299781433, |
|
"loss": 0.0597, |
|
"step": 11088 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 0.1550969034433365, |
|
"eval_max_distance": 130, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 8.7957, |
|
"eval_samples_per_second": 56.846, |
|
"eval_steps_per_second": 1.137, |
|
"step": 11088 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0006861842862072933, |
|
"loss": 0.0575, |
|
"step": 11264 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 0.15137550234794617, |
|
"eval_max_distance": 124, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 8.729, |
|
"eval_samples_per_second": 57.28, |
|
"eval_steps_per_second": 1.146, |
|
"step": 11264 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0006811227424364431, |
|
"loss": 0.0575, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 0.15129156410694122, |
|
"eval_max_distance": 133, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 8.3361, |
|
"eval_samples_per_second": 59.98, |
|
"eval_steps_per_second": 1.2, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.000676061198665593, |
|
"loss": 0.0561, |
|
"step": 11616 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 0.14808788895606995, |
|
"eval_max_distance": 135, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 8.5181, |
|
"eval_samples_per_second": 58.699, |
|
"eval_steps_per_second": 1.174, |
|
"step": 11616 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.000670999654894743, |
|
"loss": 0.2917, |
|
"step": 11792 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 0.11704121530056, |
|
"eval_max_distance": 77, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 8.4947, |
|
"eval_samples_per_second": 58.86, |
|
"eval_steps_per_second": 1.177, |
|
"step": 11792 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0006659381111238928, |
|
"loss": 0.2267, |
|
"step": 11968 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 0.09986243396997452, |
|
"eval_max_distance": 76, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 8.3117, |
|
"eval_samples_per_second": 60.156, |
|
"eval_steps_per_second": 1.203, |
|
"step": 11968 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0006608765673530427, |
|
"loss": 0.1755, |
|
"step": 12144 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 0.09780210256576538, |
|
"eval_max_distance": 100, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 7.2356, |
|
"eval_samples_per_second": 69.103, |
|
"eval_steps_per_second": 1.382, |
|
"step": 12144 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0006558150235821925, |
|
"loss": 0.1476, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_loss": 0.09662258625030518, |
|
"eval_max_distance": 77, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 7.9894, |
|
"eval_samples_per_second": 62.583, |
|
"eval_steps_per_second": 1.252, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0006507534798113425, |
|
"loss": 0.1283, |
|
"step": 12496 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 0.09604214131832123, |
|
"eval_max_distance": 92, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 9.1331, |
|
"eval_samples_per_second": 54.746, |
|
"eval_steps_per_second": 1.095, |
|
"step": 12496 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0006456919360404924, |
|
"loss": 0.117, |
|
"step": 12672 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 0.08993188291788101, |
|
"eval_max_distance": 82, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 8.194, |
|
"eval_samples_per_second": 61.02, |
|
"eval_steps_per_second": 1.22, |
|
"step": 12672 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0006406303922696422, |
|
"loss": 0.1077, |
|
"step": 12848 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_loss": 0.09431099146604538, |
|
"eval_max_distance": 76, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 7.8825, |
|
"eval_samples_per_second": 63.432, |
|
"eval_steps_per_second": 1.269, |
|
"step": 12848 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.0006355688484987922, |
|
"loss": 0.1023, |
|
"step": 13024 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 0.08808578550815582, |
|
"eval_max_distance": 71, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 9.0408, |
|
"eval_samples_per_second": 55.305, |
|
"eval_steps_per_second": 1.106, |
|
"step": 13024 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0006305073047279421, |
|
"loss": 0.0975, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_loss": 0.09387736767530441, |
|
"eval_max_distance": 61, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 7.3392, |
|
"eval_samples_per_second": 68.127, |
|
"eval_steps_per_second": 1.363, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0006254457609570919, |
|
"loss": 0.0959, |
|
"step": 13376 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 0.09010987728834152, |
|
"eval_max_distance": 91, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 8.2013, |
|
"eval_samples_per_second": 60.966, |
|
"eval_steps_per_second": 1.219, |
|
"step": 13376 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0006203842171862418, |
|
"loss": 0.0933, |
|
"step": 13552 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_loss": 0.09695952385663986, |
|
"eval_max_distance": 84, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 7.5998, |
|
"eval_samples_per_second": 65.792, |
|
"eval_steps_per_second": 1.316, |
|
"step": 13552 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0006153226734153918, |
|
"loss": 0.0907, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 0.0932065024971962, |
|
"eval_max_distance": 81, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 7.5398, |
|
"eval_samples_per_second": 66.314, |
|
"eval_steps_per_second": 1.326, |
|
"step": 13728 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.0006102611296445416, |
|
"loss": 0.09, |
|
"step": 13904 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_loss": 0.09407244622707367, |
|
"eval_max_distance": 83, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 8.8256, |
|
"eval_samples_per_second": 56.653, |
|
"eval_steps_per_second": 1.133, |
|
"step": 13904 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0006051995858736915, |
|
"loss": 0.0873, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 0.09223543852567673, |
|
"eval_max_distance": 72, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 8.3913, |
|
"eval_samples_per_second": 59.586, |
|
"eval_steps_per_second": 1.192, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0006001380421028413, |
|
"loss": 0.089, |
|
"step": 14256 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 0.09844117611646652, |
|
"eval_max_distance": 61, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 8.6496, |
|
"eval_samples_per_second": 57.806, |
|
"eval_steps_per_second": 1.156, |
|
"step": 14256 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0005950764983319913, |
|
"loss": 0.0849, |
|
"step": 14432 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 0.09052952378988266, |
|
"eval_max_distance": 80, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 8.703, |
|
"eval_samples_per_second": 57.452, |
|
"eval_steps_per_second": 1.149, |
|
"step": 14432 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0005900149545611412, |
|
"loss": 0.0842, |
|
"step": 14608 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 0.09249469637870789, |
|
"eval_max_distance": 76, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 10.036, |
|
"eval_samples_per_second": 49.821, |
|
"eval_steps_per_second": 0.996, |
|
"step": 14608 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.000584953410790291, |
|
"loss": 0.0866, |
|
"step": 14784 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_loss": 0.09183748811483383, |
|
"eval_max_distance": 82, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 7.2806, |
|
"eval_samples_per_second": 68.676, |
|
"eval_steps_per_second": 1.374, |
|
"step": 14784 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.000579891867019441, |
|
"loss": 0.0841, |
|
"step": 14960 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 0.09966301172971725, |
|
"eval_max_distance": 62, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 9.5849, |
|
"eval_samples_per_second": 52.165, |
|
"eval_steps_per_second": 1.043, |
|
"step": 14960 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0005748303232485909, |
|
"loss": 0.0828, |
|
"step": 15136 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 0.10113956034183502, |
|
"eval_max_distance": 83, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 9.7885, |
|
"eval_samples_per_second": 51.081, |
|
"eval_steps_per_second": 1.022, |
|
"step": 15136 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0005697687794777407, |
|
"loss": 0.0788, |
|
"step": 15312 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 0.10437356680631638, |
|
"eval_max_distance": 59, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 11.3278, |
|
"eval_samples_per_second": 44.139, |
|
"eval_steps_per_second": 0.883, |
|
"step": 15312 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0005647072357068907, |
|
"loss": 0.0795, |
|
"step": 15488 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_loss": 0.0787021666765213, |
|
"eval_max_distance": 84, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 7.3395, |
|
"eval_samples_per_second": 68.125, |
|
"eval_steps_per_second": 1.362, |
|
"step": 15488 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0005596456919360406, |
|
"loss": 0.0767, |
|
"step": 15664 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 0.0796855166554451, |
|
"eval_max_distance": 65, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 6.4769, |
|
"eval_samples_per_second": 77.198, |
|
"eval_steps_per_second": 1.544, |
|
"step": 15664 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0005545841481651904, |
|
"loss": 0.0745, |
|
"step": 15840 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 0.07943727821111679, |
|
"eval_max_distance": 80, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 7.2103, |
|
"eval_samples_per_second": 69.345, |
|
"eval_steps_per_second": 1.387, |
|
"step": 15840 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0005495226043943403, |
|
"loss": 0.0711, |
|
"step": 16016 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 0.07364234328269958, |
|
"eval_max_distance": 87, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 5.8839, |
|
"eval_samples_per_second": 84.978, |
|
"eval_steps_per_second": 1.7, |
|
"step": 16016 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0005444610606234902, |
|
"loss": 0.0701, |
|
"step": 16192 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 0.07703561335802078, |
|
"eval_max_distance": 83, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 7.9093, |
|
"eval_samples_per_second": 63.217, |
|
"eval_steps_per_second": 1.264, |
|
"step": 16192 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0005393995168526401, |
|
"loss": 0.0659, |
|
"step": 16368 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 0.0723421648144722, |
|
"eval_max_distance": 76, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 7.5303, |
|
"eval_samples_per_second": 66.398, |
|
"eval_steps_per_second": 1.328, |
|
"step": 16368 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00053433797308179, |
|
"loss": 0.0661, |
|
"step": 16544 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 0.07238639891147614, |
|
"eval_max_distance": 80, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 6.7269, |
|
"eval_samples_per_second": 74.328, |
|
"eval_steps_per_second": 1.487, |
|
"step": 16544 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0005292764293109398, |
|
"loss": 0.0639, |
|
"step": 16720 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 0.07442823052406311, |
|
"eval_max_distance": 74, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 5.4344, |
|
"eval_samples_per_second": 92.007, |
|
"eval_steps_per_second": 1.84, |
|
"step": 16720 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0005242148855400898, |
|
"loss": 0.0616, |
|
"step": 16896 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 0.07285340130329132, |
|
"eval_max_distance": 64, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 6.0352, |
|
"eval_samples_per_second": 82.848, |
|
"eval_steps_per_second": 1.657, |
|
"step": 16896 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0005191533417692397, |
|
"loss": 0.0585, |
|
"step": 17072 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 0.07329737395048141, |
|
"eval_max_distance": 79, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 6.7646, |
|
"eval_samples_per_second": 73.914, |
|
"eval_steps_per_second": 1.478, |
|
"step": 17072 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0005140917979983895, |
|
"loss": 0.0587, |
|
"step": 17248 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_loss": 0.07280145585536957, |
|
"eval_max_distance": 81, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 6.8294, |
|
"eval_samples_per_second": 73.212, |
|
"eval_steps_per_second": 1.464, |
|
"step": 17248 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0005090302542275395, |
|
"loss": 0.0565, |
|
"step": 17424 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_loss": 0.07057639956474304, |
|
"eval_max_distance": 83, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 6.5899, |
|
"eval_samples_per_second": 75.874, |
|
"eval_steps_per_second": 1.517, |
|
"step": 17424 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0005039687104566894, |
|
"loss": 0.056, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 0.06988174468278885, |
|
"eval_max_distance": 69, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 6.7807, |
|
"eval_samples_per_second": 73.739, |
|
"eval_steps_per_second": 1.475, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0004989071666858392, |
|
"loss": 0.0544, |
|
"step": 17776 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 0.06715495139360428, |
|
"eval_max_distance": 65, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.3191, |
|
"eval_samples_per_second": 79.126, |
|
"eval_steps_per_second": 1.583, |
|
"step": 17776 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.000493845622914989, |
|
"loss": 0.0524, |
|
"step": 17952 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 0.06758233904838562, |
|
"eval_max_distance": 86, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.0088, |
|
"eval_samples_per_second": 83.212, |
|
"eval_steps_per_second": 1.664, |
|
"step": 17952 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.000488784079144139, |
|
"loss": 0.052, |
|
"step": 18128 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 0.06754262000322342, |
|
"eval_max_distance": 82, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.502, |
|
"eval_samples_per_second": 90.876, |
|
"eval_steps_per_second": 1.818, |
|
"step": 18128 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00048372253537328885, |
|
"loss": 0.0502, |
|
"step": 18304 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_loss": 0.06616352498531342, |
|
"eval_max_distance": 78, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.1322, |
|
"eval_samples_per_second": 81.537, |
|
"eval_steps_per_second": 1.631, |
|
"step": 18304 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0004786609916024388, |
|
"loss": 0.0499, |
|
"step": 18480 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 0.06735648959875107, |
|
"eval_max_distance": 75, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.1213, |
|
"eval_samples_per_second": 81.683, |
|
"eval_steps_per_second": 1.634, |
|
"step": 18480 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00047359944783158866, |
|
"loss": 0.0475, |
|
"step": 18656 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 0.0657699704170227, |
|
"eval_max_distance": 66, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.434, |
|
"eval_samples_per_second": 77.713, |
|
"eval_steps_per_second": 1.554, |
|
"step": 18656 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00046853790406073853, |
|
"loss": 0.0486, |
|
"step": 18832 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_loss": 0.06461019814014435, |
|
"eval_max_distance": 77, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.2565, |
|
"eval_samples_per_second": 117.468, |
|
"eval_steps_per_second": 2.349, |
|
"step": 18832 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0004634763602898884, |
|
"loss": 0.0458, |
|
"step": 19008 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 0.06515143066644669, |
|
"eval_max_distance": 97, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.9339, |
|
"eval_samples_per_second": 84.261, |
|
"eval_steps_per_second": 1.685, |
|
"step": 19008 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00045841481651903834, |
|
"loss": 0.0462, |
|
"step": 19184 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_loss": 0.0647222101688385, |
|
"eval_max_distance": 86, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.7775, |
|
"eval_samples_per_second": 104.658, |
|
"eval_steps_per_second": 2.093, |
|
"step": 19184 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0004533532727481882, |
|
"loss": 0.0443, |
|
"step": 19360 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_loss": 0.06631914526224136, |
|
"eval_max_distance": 92, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.4448, |
|
"eval_samples_per_second": 77.582, |
|
"eval_steps_per_second": 1.552, |
|
"step": 19360 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0004482917289773381, |
|
"loss": 0.0444, |
|
"step": 19536 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 0.06480421125888824, |
|
"eval_max_distance": 79, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.8948, |
|
"eval_samples_per_second": 84.82, |
|
"eval_steps_per_second": 1.696, |
|
"step": 19536 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00044323018520648803, |
|
"loss": 0.0423, |
|
"step": 19712 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 0.06277700513601303, |
|
"eval_max_distance": 66, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.8803, |
|
"eval_samples_per_second": 85.03, |
|
"eval_steps_per_second": 1.701, |
|
"step": 19712 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0004381686414356379, |
|
"loss": 0.0419, |
|
"step": 19888 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 0.06238849461078644, |
|
"eval_max_distance": 70, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.6974, |
|
"eval_samples_per_second": 106.443, |
|
"eval_steps_per_second": 2.129, |
|
"step": 19888 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0004331070976647878, |
|
"loss": 0.0409, |
|
"step": 20064 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 0.06289780884981155, |
|
"eval_max_distance": 58, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.5388, |
|
"eval_samples_per_second": 90.272, |
|
"eval_steps_per_second": 1.805, |
|
"step": 20064 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00042804555389393766, |
|
"loss": 0.0402, |
|
"step": 20240 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 0.06532587110996246, |
|
"eval_max_distance": 79, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.4649, |
|
"eval_samples_per_second": 91.493, |
|
"eval_steps_per_second": 1.83, |
|
"step": 20240 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0004229840101230876, |
|
"loss": 0.0405, |
|
"step": 20416 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_loss": 0.06438089162111282, |
|
"eval_max_distance": 72, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.8994, |
|
"eval_samples_per_second": 84.754, |
|
"eval_steps_per_second": 1.695, |
|
"step": 20416 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00041792246635223747, |
|
"loss": 0.0374, |
|
"step": 20592 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 0.06247664615511894, |
|
"eval_max_distance": 77, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.1384, |
|
"eval_samples_per_second": 97.307, |
|
"eval_steps_per_second": 1.946, |
|
"step": 20592 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00041286092258138734, |
|
"loss": 0.039, |
|
"step": 20768 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_loss": 0.06493379175662994, |
|
"eval_max_distance": 96, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.5781, |
|
"eval_samples_per_second": 109.216, |
|
"eval_steps_per_second": 2.184, |
|
"step": 20768 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0004077993788105372, |
|
"loss": 0.0374, |
|
"step": 20944 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 0.06642530113458633, |
|
"eval_max_distance": 73, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.1825, |
|
"eval_samples_per_second": 80.873, |
|
"eval_steps_per_second": 1.617, |
|
"step": 20944 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00040273783503968715, |
|
"loss": 0.0372, |
|
"step": 21120 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 0.0631885975599289, |
|
"eval_max_distance": 70, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.5472, |
|
"eval_samples_per_second": 90.135, |
|
"eval_steps_per_second": 1.803, |
|
"step": 21120 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.000397676291268837, |
|
"loss": 0.0356, |
|
"step": 21296 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 0.06364666670560837, |
|
"eval_max_distance": 73, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.534, |
|
"eval_samples_per_second": 90.351, |
|
"eval_steps_per_second": 1.807, |
|
"step": 21296 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00039261474749798685, |
|
"loss": 0.0364, |
|
"step": 21472 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_loss": 0.06835252046585083, |
|
"eval_max_distance": 101, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.667, |
|
"eval_samples_per_second": 88.23, |
|
"eval_steps_per_second": 1.765, |
|
"step": 21472 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0003875532037271368, |
|
"loss": 0.0349, |
|
"step": 21648 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 0.06463531404733658, |
|
"eval_max_distance": 81, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.6807, |
|
"eval_samples_per_second": 88.018, |
|
"eval_steps_per_second": 1.76, |
|
"step": 21648 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00038249165995628666, |
|
"loss": 0.0338, |
|
"step": 21824 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 0.061899635940790176, |
|
"eval_max_distance": 87, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.5863, |
|
"eval_samples_per_second": 89.505, |
|
"eval_steps_per_second": 1.79, |
|
"step": 21824 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00037743011618543654, |
|
"loss": 0.0332, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_loss": 0.06767092645168304, |
|
"eval_max_distance": 91, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 6.1428, |
|
"eval_samples_per_second": 81.396, |
|
"eval_steps_per_second": 1.628, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0003723685724145864, |
|
"loss": 0.0331, |
|
"step": 22176 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 0.06576185673475266, |
|
"eval_max_distance": 93, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 5.8963, |
|
"eval_samples_per_second": 84.799, |
|
"eval_steps_per_second": 1.696, |
|
"step": 22176 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00036730702864373635, |
|
"loss": 0.0324, |
|
"step": 22352 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_loss": 0.0631786659359932, |
|
"eval_max_distance": 89, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.3685, |
|
"eval_samples_per_second": 93.135, |
|
"eval_steps_per_second": 1.863, |
|
"step": 22352 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.0003622454848728862, |
|
"loss": 0.0324, |
|
"step": 22528 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 0.06909012049436569, |
|
"eval_max_distance": 85, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 5.8638, |
|
"eval_samples_per_second": 85.269, |
|
"eval_steps_per_second": 1.705, |
|
"step": 22528 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0003571839411020361, |
|
"loss": 0.0318, |
|
"step": 22704 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_loss": 0.06905217468738556, |
|
"eval_max_distance": 91, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.4271, |
|
"eval_samples_per_second": 92.131, |
|
"eval_steps_per_second": 1.843, |
|
"step": 22704 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.000352122397331186, |
|
"loss": 0.031, |
|
"step": 22880 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 0.0721253752708435, |
|
"eval_max_distance": 89, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 6.1894, |
|
"eval_samples_per_second": 80.783, |
|
"eval_steps_per_second": 1.616, |
|
"step": 22880 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0003470608535603359, |
|
"loss": 0.0308, |
|
"step": 23056 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 0.06949847936630249, |
|
"eval_max_distance": 71, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 5.7252, |
|
"eval_samples_per_second": 87.333, |
|
"eval_steps_per_second": 1.747, |
|
"step": 23056 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0003419993097894858, |
|
"loss": 0.0309, |
|
"step": 23232 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_loss": 0.07597502321004868, |
|
"eval_max_distance": 99, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 5.6902, |
|
"eval_samples_per_second": 87.87, |
|
"eval_steps_per_second": 1.757, |
|
"step": 23232 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00033693776601863567, |
|
"loss": 0.0293, |
|
"step": 23408 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.0717659443616867, |
|
"eval_max_distance": 101, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 5.9061, |
|
"eval_samples_per_second": 84.658, |
|
"eval_steps_per_second": 1.693, |
|
"step": 23408 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.0003318762222477856, |
|
"loss": 0.1908, |
|
"step": 23584 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": 0.05668208748102188, |
|
"eval_max_distance": 51, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.6014, |
|
"eval_samples_per_second": 75.741, |
|
"eval_steps_per_second": 1.515, |
|
"step": 23584 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.0003268146784769355, |
|
"loss": 0.0875, |
|
"step": 23760 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 0.058700818568468094, |
|
"eval_max_distance": 35, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.9161, |
|
"eval_samples_per_second": 84.516, |
|
"eval_steps_per_second": 1.69, |
|
"step": 23760 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00032175313470608535, |
|
"loss": 0.0773, |
|
"step": 23936 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 0.05530280992388725, |
|
"eval_max_distance": 41, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 7.0323, |
|
"eval_samples_per_second": 71.1, |
|
"eval_steps_per_second": 1.422, |
|
"step": 23936 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00031669159093523523, |
|
"loss": 0.0678, |
|
"step": 24112 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_loss": 0.056951854377985, |
|
"eval_max_distance": 44, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.8239, |
|
"eval_samples_per_second": 73.272, |
|
"eval_steps_per_second": 1.465, |
|
"step": 24112 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00031163004716438516, |
|
"loss": 0.0625, |
|
"step": 24288 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_loss": 0.05978156253695488, |
|
"eval_max_distance": 53, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.0447, |
|
"eval_samples_per_second": 82.717, |
|
"eval_steps_per_second": 1.654, |
|
"step": 24288 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00030656850339353504, |
|
"loss": 0.0603, |
|
"step": 24464 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 0.057503603398799896, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 8.2715, |
|
"eval_samples_per_second": 60.449, |
|
"eval_steps_per_second": 1.209, |
|
"step": 24464 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.0003015069596226849, |
|
"loss": 0.0557, |
|
"step": 24640 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_loss": 0.05871723219752312, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 8.307, |
|
"eval_samples_per_second": 60.19, |
|
"eval_steps_per_second": 1.204, |
|
"step": 24640 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00029644541585183485, |
|
"loss": 0.0549, |
|
"step": 24816 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_loss": 0.057098135352134705, |
|
"eval_max_distance": 43, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 7.5317, |
|
"eval_samples_per_second": 66.386, |
|
"eval_steps_per_second": 1.328, |
|
"step": 24816 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0002913838720809847, |
|
"loss": 0.0536, |
|
"step": 24992 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_loss": 0.05979160591959953, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 8.5051, |
|
"eval_samples_per_second": 58.788, |
|
"eval_steps_per_second": 1.176, |
|
"step": 24992 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.0002863223283101346, |
|
"loss": 0.0524, |
|
"step": 25168 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_loss": 0.06075568497180939, |
|
"eval_max_distance": 49, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 8.3407, |
|
"eval_samples_per_second": 59.947, |
|
"eval_steps_per_second": 1.199, |
|
"step": 25168 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.0002812607845392845, |
|
"loss": 0.0511, |
|
"step": 25344 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 0.057893384248018265, |
|
"eval_max_distance": 55, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 8.3435, |
|
"eval_samples_per_second": 59.927, |
|
"eval_steps_per_second": 1.199, |
|
"step": 25344 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.0002761992407684344, |
|
"loss": 0.0529, |
|
"step": 25520 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 0.059256672859191895, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 8.2791, |
|
"eval_samples_per_second": 60.393, |
|
"eval_steps_per_second": 1.208, |
|
"step": 25520 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0002711376969975843, |
|
"loss": 0.0518, |
|
"step": 25696 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 0.056909676641225815, |
|
"eval_max_distance": 60, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.9914, |
|
"eval_samples_per_second": 71.516, |
|
"eval_steps_per_second": 1.43, |
|
"step": 25696 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00026607615322673416, |
|
"loss": 0.0512, |
|
"step": 25872 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_loss": 0.059548597782850266, |
|
"eval_max_distance": 60, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 7.847, |
|
"eval_samples_per_second": 63.719, |
|
"eval_steps_per_second": 1.274, |
|
"step": 25872 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.0002610146094558841, |
|
"loss": 0.0512, |
|
"step": 26048 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 0.06114144250750542, |
|
"eval_max_distance": 44, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 7.4839, |
|
"eval_samples_per_second": 66.81, |
|
"eval_steps_per_second": 1.336, |
|
"step": 26048 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.00025595306568503397, |
|
"loss": 0.0507, |
|
"step": 26224 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_loss": 0.056723106652498245, |
|
"eval_max_distance": 50, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 7.1155, |
|
"eval_samples_per_second": 70.269, |
|
"eval_steps_per_second": 1.405, |
|
"step": 26224 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.00025089152191418385, |
|
"loss": 0.0506, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_loss": 0.058637164533138275, |
|
"eval_max_distance": 41, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 7.7501, |
|
"eval_samples_per_second": 64.515, |
|
"eval_steps_per_second": 1.29, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0002458299781433337, |
|
"loss": 0.0499, |
|
"step": 26576 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 0.06316396594047546, |
|
"eval_max_distance": 50, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 7.0322, |
|
"eval_samples_per_second": 71.102, |
|
"eval_steps_per_second": 1.422, |
|
"step": 26576 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.0002407684343724836, |
|
"loss": 0.05, |
|
"step": 26752 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_loss": 0.060861945152282715, |
|
"eval_max_distance": 66, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 8.5106, |
|
"eval_samples_per_second": 58.75, |
|
"eval_steps_per_second": 1.175, |
|
"step": 26752 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.0002357068906016335, |
|
"loss": 0.0499, |
|
"step": 26928 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_loss": 0.05814690515398979, |
|
"eval_max_distance": 54, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 8.0366, |
|
"eval_samples_per_second": 62.215, |
|
"eval_steps_per_second": 1.244, |
|
"step": 26928 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00023064534683078338, |
|
"loss": 0.0489, |
|
"step": 27104 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_loss": 0.05106068029999733, |
|
"eval_max_distance": 51, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.8676, |
|
"eval_samples_per_second": 85.214, |
|
"eval_steps_per_second": 1.704, |
|
"step": 27104 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002255838030599333, |
|
"loss": 0.0485, |
|
"step": 27280 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 0.04777122661471367, |
|
"eval_max_distance": 59, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.5204, |
|
"eval_samples_per_second": 90.573, |
|
"eval_steps_per_second": 1.811, |
|
"step": 27280 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.00022052225928908317, |
|
"loss": 0.0468, |
|
"step": 27456 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_loss": 0.049591317772865295, |
|
"eval_max_distance": 58, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.7077, |
|
"eval_samples_per_second": 87.601, |
|
"eval_steps_per_second": 1.752, |
|
"step": 27456 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00021546071551823307, |
|
"loss": 0.0458, |
|
"step": 27632 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 0.04786006361246109, |
|
"eval_max_distance": 54, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.6611, |
|
"eval_samples_per_second": 88.322, |
|
"eval_steps_per_second": 1.766, |
|
"step": 27632 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00021039917174738297, |
|
"loss": 0.0446, |
|
"step": 27808 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_loss": 0.04865054786205292, |
|
"eval_max_distance": 51, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.7404, |
|
"eval_samples_per_second": 87.102, |
|
"eval_steps_per_second": 1.742, |
|
"step": 27808 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.00020533762797653285, |
|
"loss": 0.0433, |
|
"step": 27984 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 0.04700545221567154, |
|
"eval_max_distance": 65, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.2552, |
|
"eval_samples_per_second": 95.144, |
|
"eval_steps_per_second": 1.903, |
|
"step": 27984 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.00020027608420568276, |
|
"loss": 0.0424, |
|
"step": 28160 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 0.048602811992168427, |
|
"eval_max_distance": 70, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.8464, |
|
"eval_samples_per_second": 85.522, |
|
"eval_steps_per_second": 1.71, |
|
"step": 28160 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.00019521454043483263, |
|
"loss": 0.0418, |
|
"step": 28336 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"eval_loss": 0.04602031037211418, |
|
"eval_max_distance": 66, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.4163, |
|
"eval_samples_per_second": 92.315, |
|
"eval_steps_per_second": 1.846, |
|
"step": 28336 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00019015299666398254, |
|
"loss": 0.0416, |
|
"step": 28512 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_loss": 0.046469803899526596, |
|
"eval_max_distance": 51, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.2978, |
|
"eval_samples_per_second": 116.339, |
|
"eval_steps_per_second": 2.327, |
|
"step": 28512 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.00018509145289313241, |
|
"loss": 0.0394, |
|
"step": 28688 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_loss": 0.04603447765111923, |
|
"eval_max_distance": 49, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.5843, |
|
"eval_samples_per_second": 89.537, |
|
"eval_steps_per_second": 1.791, |
|
"step": 28688 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.0001800299091222823, |
|
"loss": 0.0384, |
|
"step": 28864 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 0.0456426702439785, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.1318, |
|
"eval_samples_per_second": 97.431, |
|
"eval_steps_per_second": 1.949, |
|
"step": 28864 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.00017496836535143217, |
|
"loss": 0.039, |
|
"step": 29040 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 0.04492652416229248, |
|
"eval_max_distance": 51, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.3648, |
|
"eval_samples_per_second": 93.2, |
|
"eval_steps_per_second": 1.864, |
|
"step": 29040 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.00016990682158058207, |
|
"loss": 0.0374, |
|
"step": 29216 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 0.04678362235426903, |
|
"eval_max_distance": 59, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.1983, |
|
"eval_samples_per_second": 96.186, |
|
"eval_steps_per_second": 1.924, |
|
"step": 29216 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.00016484527780973198, |
|
"loss": 0.037, |
|
"step": 29392 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 0.04649132117629051, |
|
"eval_max_distance": 60, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.2517, |
|
"eval_samples_per_second": 117.599, |
|
"eval_steps_per_second": 2.352, |
|
"step": 29392 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.00015978373403888185, |
|
"loss": 0.0352, |
|
"step": 29568 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 0.046459365636110306, |
|
"eval_max_distance": 68, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.4243, |
|
"eval_samples_per_second": 92.178, |
|
"eval_steps_per_second": 1.844, |
|
"step": 29568 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.00015472219026803176, |
|
"loss": 0.0358, |
|
"step": 29744 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_loss": 0.04598597064614296, |
|
"eval_max_distance": 66, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.2396, |
|
"eval_samples_per_second": 117.937, |
|
"eval_steps_per_second": 2.359, |
|
"step": 29744 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.00014966064649718164, |
|
"loss": 0.034, |
|
"step": 29920 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 0.04516398161649704, |
|
"eval_max_distance": 54, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.9094, |
|
"eval_samples_per_second": 127.897, |
|
"eval_steps_per_second": 2.558, |
|
"step": 29920 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.00014459910272633154, |
|
"loss": 0.0346, |
|
"step": 30096 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_loss": 0.046647679060697556, |
|
"eval_max_distance": 61, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.2262, |
|
"eval_samples_per_second": 118.31, |
|
"eval_steps_per_second": 2.366, |
|
"step": 30096 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00013953755895548142, |
|
"loss": 0.0335, |
|
"step": 30272 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 0.04539273679256439, |
|
"eval_max_distance": 45, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.5809, |
|
"eval_samples_per_second": 109.148, |
|
"eval_steps_per_second": 2.183, |
|
"step": 30272 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.00013447601518463132, |
|
"loss": 0.0326, |
|
"step": 30448 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_loss": 0.04557649791240692, |
|
"eval_max_distance": 58, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.698, |
|
"eval_samples_per_second": 135.209, |
|
"eval_steps_per_second": 2.704, |
|
"step": 30448 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0001294144714137812, |
|
"loss": 0.032, |
|
"step": 30624 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 0.04527006670832634, |
|
"eval_max_distance": 49, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.5477, |
|
"eval_samples_per_second": 109.946, |
|
"eval_steps_per_second": 2.199, |
|
"step": 30624 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.0001243529276429311, |
|
"loss": 0.0328, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 0.04470500349998474, |
|
"eval_max_distance": 58, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.9109, |
|
"eval_samples_per_second": 101.814, |
|
"eval_steps_per_second": 2.036, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.000119291383872081, |
|
"loss": 0.031, |
|
"step": 30976 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_loss": 0.04523780569434166, |
|
"eval_max_distance": 73, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.6719, |
|
"eval_samples_per_second": 107.023, |
|
"eval_steps_per_second": 2.14, |
|
"step": 30976 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.00011422984010123087, |
|
"loss": 0.0304, |
|
"step": 31152 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 0.045120373368263245, |
|
"eval_max_distance": 73, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.7157, |
|
"eval_samples_per_second": 106.029, |
|
"eval_steps_per_second": 2.121, |
|
"step": 31152 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.00010916829633038076, |
|
"loss": 0.0296, |
|
"step": 31328 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_loss": 0.04615224152803421, |
|
"eval_max_distance": 62, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.6187, |
|
"eval_samples_per_second": 108.256, |
|
"eval_steps_per_second": 2.165, |
|
"step": 31328 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00010410675255953065, |
|
"loss": 0.0298, |
|
"step": 31504 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 0.045413993299007416, |
|
"eval_max_distance": 61, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.666, |
|
"eval_samples_per_second": 107.158, |
|
"eval_steps_per_second": 2.143, |
|
"step": 31504 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 9.904520878868054e-05, |
|
"loss": 0.0289, |
|
"step": 31680 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_loss": 0.04516046866774559, |
|
"eval_max_distance": 59, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.7866, |
|
"eval_samples_per_second": 132.046, |
|
"eval_steps_per_second": 2.641, |
|
"step": 31680 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 9.398366501783045e-05, |
|
"loss": 0.0289, |
|
"step": 31856 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_loss": 0.044648416340351105, |
|
"eval_max_distance": 63, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.8092, |
|
"eval_samples_per_second": 103.968, |
|
"eval_steps_per_second": 2.079, |
|
"step": 31856 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 8.892212124698034e-05, |
|
"loss": 0.0283, |
|
"step": 32032 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"eval_loss": 0.04460017383098602, |
|
"eval_max_distance": 52, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 5.0713, |
|
"eval_samples_per_second": 98.594, |
|
"eval_steps_per_second": 1.972, |
|
"step": 32032 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 8.386057747613023e-05, |
|
"loss": 0.0282, |
|
"step": 32208 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_loss": 0.04471622407436371, |
|
"eval_max_distance": 50, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.7988, |
|
"eval_samples_per_second": 104.192, |
|
"eval_steps_per_second": 2.084, |
|
"step": 32208 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 7.879903370528012e-05, |
|
"loss": 0.0274, |
|
"step": 32384 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 0.0455770380795002, |
|
"eval_max_distance": 55, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.8258, |
|
"eval_samples_per_second": 103.61, |
|
"eval_steps_per_second": 2.072, |
|
"step": 32384 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 7.373748993443e-05, |
|
"loss": 0.0281, |
|
"step": 32560 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_loss": 0.044936638325452805, |
|
"eval_max_distance": 53, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.4674, |
|
"eval_samples_per_second": 111.922, |
|
"eval_steps_per_second": 2.238, |
|
"step": 32560 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 6.867594616357989e-05, |
|
"loss": 0.0271, |
|
"step": 32736 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 0.0453297421336174, |
|
"eval_max_distance": 53, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.7508, |
|
"eval_samples_per_second": 105.245, |
|
"eval_steps_per_second": 2.105, |
|
"step": 32736 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 6.361440239272978e-05, |
|
"loss": 0.0261, |
|
"step": 32912 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_loss": 0.044983986765146255, |
|
"eval_max_distance": 53, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.8094, |
|
"eval_samples_per_second": 131.254, |
|
"eval_steps_per_second": 2.625, |
|
"step": 32912 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 5.8552858621879675e-05, |
|
"loss": 0.0263, |
|
"step": 33088 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 0.04485508054494858, |
|
"eval_max_distance": 53, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.5334, |
|
"eval_samples_per_second": 110.292, |
|
"eval_steps_per_second": 2.206, |
|
"step": 33088 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.3491314851029566e-05, |
|
"loss": 0.026, |
|
"step": 33264 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 0.044998109340667725, |
|
"eval_max_distance": 43, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.6486, |
|
"eval_samples_per_second": 137.038, |
|
"eval_steps_per_second": 2.741, |
|
"step": 33264 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 4.842977108017946e-05, |
|
"loss": 0.025, |
|
"step": 33440 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 0.04560336843132973, |
|
"eval_max_distance": 49, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.8552, |
|
"eval_samples_per_second": 129.695, |
|
"eval_steps_per_second": 2.594, |
|
"step": 33440 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.336822730932934e-05, |
|
"loss": 0.025, |
|
"step": 33616 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_loss": 0.045977283269166946, |
|
"eval_max_distance": 49, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.3858, |
|
"eval_samples_per_second": 114.004, |
|
"eval_steps_per_second": 2.28, |
|
"step": 33616 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.830668353847924e-05, |
|
"loss": 0.0243, |
|
"step": 33792 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_loss": 0.046003151684999466, |
|
"eval_max_distance": 43, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.9069, |
|
"eval_samples_per_second": 127.98, |
|
"eval_steps_per_second": 2.56, |
|
"step": 33792 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.324513976762913e-05, |
|
"loss": 0.0239, |
|
"step": 33968 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_loss": 0.04568994790315628, |
|
"eval_max_distance": 44, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.1848, |
|
"eval_samples_per_second": 119.48, |
|
"eval_steps_per_second": 2.39, |
|
"step": 33968 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 2.818359599677902e-05, |
|
"loss": 0.0241, |
|
"step": 34144 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_loss": 0.04591059312224388, |
|
"eval_max_distance": 53, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.4039, |
|
"eval_samples_per_second": 113.535, |
|
"eval_steps_per_second": 2.271, |
|
"step": 34144 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.3122052225928907e-05, |
|
"loss": 0.0239, |
|
"step": 34320 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_loss": 0.046651940792798996, |
|
"eval_max_distance": 50, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.1885, |
|
"eval_samples_per_second": 119.376, |
|
"eval_steps_per_second": 2.388, |
|
"step": 34320 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.80605084550788e-05, |
|
"loss": 0.0241, |
|
"step": 34496 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_loss": 0.0469602532684803, |
|
"eval_max_distance": 57, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.8088, |
|
"eval_samples_per_second": 103.977, |
|
"eval_steps_per_second": 2.08, |
|
"step": 34496 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.299896468422869e-05, |
|
"loss": 0.0234, |
|
"step": 34672 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 0.04661025106906891, |
|
"eval_max_distance": 57, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.836, |
|
"eval_samples_per_second": 103.39, |
|
"eval_steps_per_second": 2.068, |
|
"step": 34672 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 7.937420913378581e-06, |
|
"loss": 0.0235, |
|
"step": 34848 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"eval_loss": 0.04825682193040848, |
|
"eval_max_distance": 58, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.064, |
|
"eval_samples_per_second": 98.737, |
|
"eval_steps_per_second": 1.975, |
|
"step": 34848 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.8758771425284712e-06, |
|
"loss": 0.0226, |
|
"step": 35024 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 0.0471869558095932, |
|
"eval_max_distance": 55, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.4865, |
|
"eval_samples_per_second": 111.445, |
|
"eval_steps_per_second": 2.229, |
|
"step": 35024 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.1604095563139932e-05, |
|
"loss": 0.1431, |
|
"step": 35160 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.05489028990268707, |
|
"eval_max_distance": 62, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.1578, |
|
"eval_samples_per_second": 81.198, |
|
"eval_steps_per_second": 1.624, |
|
"step": 35160 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 3.660409556313993e-05, |
|
"loss": 0.0731, |
|
"step": 35453 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_loss": 0.04241102561354637, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 7.2607, |
|
"eval_samples_per_second": 68.864, |
|
"eval_steps_per_second": 1.377, |
|
"step": 35453 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 4.988266556234255e-05, |
|
"loss": 0.0514, |
|
"step": 35746 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_loss": 0.04323163628578186, |
|
"eval_max_distance": 41, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.4484, |
|
"eval_samples_per_second": 91.77, |
|
"eval_steps_per_second": 1.835, |
|
"step": 35746 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 4.962987886944819e-05, |
|
"loss": 0.0442, |
|
"step": 36039 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"eval_loss": 0.0440400205552578, |
|
"eval_max_distance": 41, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.4941, |
|
"eval_samples_per_second": 76.993, |
|
"eval_steps_per_second": 1.54, |
|
"step": 36039 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 4.9377092176553816e-05, |
|
"loss": 0.0415, |
|
"step": 36332 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_loss": 0.04508192837238312, |
|
"eval_max_distance": 44, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.3698, |
|
"eval_samples_per_second": 78.496, |
|
"eval_steps_per_second": 1.57, |
|
"step": 36332 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 4.912430548365945e-05, |
|
"loss": 0.0394, |
|
"step": 36625 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_loss": 0.04381772503256798, |
|
"eval_max_distance": 44, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.6587, |
|
"eval_samples_per_second": 107.326, |
|
"eval_steps_per_second": 2.147, |
|
"step": 36625 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 4.8871518790765095e-05, |
|
"loss": 0.0392, |
|
"step": 36918 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_loss": 0.04516580328345299, |
|
"eval_max_distance": 44, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.142, |
|
"eval_samples_per_second": 81.406, |
|
"eval_steps_per_second": 1.628, |
|
"step": 36918 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 4.861873209787073e-05, |
|
"loss": 0.0386, |
|
"step": 37211 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_loss": 0.045004624873399734, |
|
"eval_max_distance": 48, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.7991, |
|
"eval_samples_per_second": 86.22, |
|
"eval_steps_per_second": 1.724, |
|
"step": 37211 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 4.8365945404976366e-05, |
|
"loss": 0.0387, |
|
"step": 37504 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 0.04667551815509796, |
|
"eval_max_distance": 43, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.5404, |
|
"eval_samples_per_second": 90.246, |
|
"eval_steps_per_second": 1.805, |
|
"step": 37504 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 4.8113158712081995e-05, |
|
"loss": 0.0385, |
|
"step": 37797 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 0.047200217843055725, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.397, |
|
"eval_samples_per_second": 78.161, |
|
"eval_steps_per_second": 1.563, |
|
"step": 37797 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 4.786037201918763e-05, |
|
"loss": 0.0381, |
|
"step": 38090 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_loss": 0.04793046787381172, |
|
"eval_max_distance": 40, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.537, |
|
"eval_samples_per_second": 76.487, |
|
"eval_steps_per_second": 1.53, |
|
"step": 38090 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 4.7607585326293267e-05, |
|
"loss": 0.0383, |
|
"step": 38383 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_loss": 0.04902255907654762, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 7.2436, |
|
"eval_samples_per_second": 69.027, |
|
"eval_steps_per_second": 1.381, |
|
"step": 38383 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 4.735479863339891e-05, |
|
"loss": 0.0381, |
|
"step": 38676 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_loss": 0.05011408030986786, |
|
"eval_max_distance": 45, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.2764, |
|
"eval_samples_per_second": 79.664, |
|
"eval_steps_per_second": 1.593, |
|
"step": 38676 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 4.7102011940504545e-05, |
|
"loss": 0.0378, |
|
"step": 38969 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 0.0420503243803978, |
|
"eval_max_distance": 51, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.988, |
|
"eval_samples_per_second": 100.24, |
|
"eval_steps_per_second": 2.005, |
|
"step": 38969 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 4.6849225247610174e-05, |
|
"loss": 0.0371, |
|
"step": 39262 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_loss": 0.0440935455262661, |
|
"eval_max_distance": 56, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.2384, |
|
"eval_samples_per_second": 95.448, |
|
"eval_steps_per_second": 1.909, |
|
"step": 39262 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 4.659643855471581e-05, |
|
"loss": 0.0356, |
|
"step": 39555 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_loss": 0.04321606457233429, |
|
"eval_max_distance": 51, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 7.4211, |
|
"eval_samples_per_second": 67.375, |
|
"eval_steps_per_second": 1.348, |
|
"step": 39555 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 4.6343651861821445e-05, |
|
"loss": 0.0342, |
|
"step": 39848 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_loss": 0.04155835881829262, |
|
"eval_max_distance": 52, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.9259, |
|
"eval_samples_per_second": 101.504, |
|
"eval_steps_per_second": 2.03, |
|
"step": 39848 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 4.609086516892708e-05, |
|
"loss": 0.0338, |
|
"step": 40141 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"eval_loss": 0.041754692792892456, |
|
"eval_max_distance": 68, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.6906, |
|
"eval_samples_per_second": 106.595, |
|
"eval_steps_per_second": 2.132, |
|
"step": 40141 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 4.583807847603272e-05, |
|
"loss": 0.0323, |
|
"step": 40434 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_loss": 0.04196465387940407, |
|
"eval_max_distance": 44, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.0109, |
|
"eval_samples_per_second": 124.66, |
|
"eval_steps_per_second": 2.493, |
|
"step": 40434 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.558529178313835e-05, |
|
"loss": 0.0318, |
|
"step": 40727 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"eval_loss": 0.04092620685696602, |
|
"eval_max_distance": 52, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.882, |
|
"eval_samples_per_second": 128.801, |
|
"eval_steps_per_second": 2.576, |
|
"step": 40727 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 4.533250509024399e-05, |
|
"loss": 0.0311, |
|
"step": 41020 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 0.042109958827495575, |
|
"eval_max_distance": 47, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.8329, |
|
"eval_samples_per_second": 130.448, |
|
"eval_steps_per_second": 2.609, |
|
"step": 41020 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 4.5079718397349624e-05, |
|
"loss": 0.0297, |
|
"step": 41313 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"eval_loss": 0.041698385030031204, |
|
"eval_max_distance": 57, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.0043, |
|
"eval_samples_per_second": 124.867, |
|
"eval_steps_per_second": 2.497, |
|
"step": 41313 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 4.482693170445526e-05, |
|
"loss": 0.0296, |
|
"step": 41606 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_loss": 0.041246239095926285, |
|
"eval_max_distance": 44, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.5677, |
|
"eval_samples_per_second": 109.465, |
|
"eval_steps_per_second": 2.189, |
|
"step": 41606 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 4.4574145011560896e-05, |
|
"loss": 0.0293, |
|
"step": 41899 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"eval_loss": 0.0423794724047184, |
|
"eval_max_distance": 51, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.8582, |
|
"eval_samples_per_second": 129.594, |
|
"eval_steps_per_second": 2.592, |
|
"step": 41899 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 4.432135831866653e-05, |
|
"loss": 0.028, |
|
"step": 42192 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_loss": 0.04165972024202347, |
|
"eval_max_distance": 51, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.2401, |
|
"eval_samples_per_second": 117.92, |
|
"eval_steps_per_second": 2.358, |
|
"step": 42192 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 4.406857162577217e-05, |
|
"loss": 0.0279, |
|
"step": 42485 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"eval_loss": 0.04228993132710457, |
|
"eval_max_distance": 51, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.5976, |
|
"eval_samples_per_second": 108.752, |
|
"eval_steps_per_second": 2.175, |
|
"step": 42485 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 4.38157849328778e-05, |
|
"loss": 0.0267, |
|
"step": 42778 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_loss": 0.04240557178854942, |
|
"eval_max_distance": 61, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.3219, |
|
"eval_samples_per_second": 115.69, |
|
"eval_steps_per_second": 2.314, |
|
"step": 42778 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 4.356299823998344e-05, |
|
"loss": 0.0265, |
|
"step": 43071 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"eval_loss": 0.04247906431555748, |
|
"eval_max_distance": 51, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.69, |
|
"eval_samples_per_second": 106.609, |
|
"eval_steps_per_second": 2.132, |
|
"step": 43071 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 4.3310211547089074e-05, |
|
"loss": 0.026, |
|
"step": 43364 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_loss": 0.04282011464238167, |
|
"eval_max_distance": 47, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.8996, |
|
"eval_samples_per_second": 128.218, |
|
"eval_steps_per_second": 2.564, |
|
"step": 43364 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 4.3057424854194703e-05, |
|
"loss": 0.0256, |
|
"step": 43657 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_loss": 0.04263199865818024, |
|
"eval_max_distance": 57, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 5.2399, |
|
"eval_samples_per_second": 95.421, |
|
"eval_steps_per_second": 1.908, |
|
"step": 43657 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 4.280463816130034e-05, |
|
"loss": 0.0255, |
|
"step": 43950 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_loss": 0.043559763580560684, |
|
"eval_max_distance": 47, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.7867, |
|
"eval_samples_per_second": 104.457, |
|
"eval_steps_per_second": 2.089, |
|
"step": 43950 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 4.255185146840598e-05, |
|
"loss": 0.0251, |
|
"step": 44243 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_loss": 0.04391922801733017, |
|
"eval_max_distance": 57, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.947, |
|
"eval_samples_per_second": 126.678, |
|
"eval_steps_per_second": 2.534, |
|
"step": 44243 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 4.229906477551162e-05, |
|
"loss": 0.0246, |
|
"step": 44536 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_loss": 0.04322103410959244, |
|
"eval_max_distance": 57, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.5911, |
|
"eval_samples_per_second": 108.907, |
|
"eval_steps_per_second": 2.178, |
|
"step": 44536 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.204627808261725e-05, |
|
"loss": 0.024, |
|
"step": 44829 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_loss": 0.04289233684539795, |
|
"eval_max_distance": 59, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.5965, |
|
"eval_samples_per_second": 108.779, |
|
"eval_steps_per_second": 2.176, |
|
"step": 44829 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 4.179349138972288e-05, |
|
"loss": 0.0236, |
|
"step": 45122 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"eval_loss": 0.04328041896224022, |
|
"eval_max_distance": 47, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.5214, |
|
"eval_samples_per_second": 110.586, |
|
"eval_steps_per_second": 2.212, |
|
"step": 45122 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 4.154070469682852e-05, |
|
"loss": 0.0233, |
|
"step": 45415 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_loss": 0.04426594451069832, |
|
"eval_max_distance": 49, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.9139, |
|
"eval_samples_per_second": 127.749, |
|
"eval_steps_per_second": 2.555, |
|
"step": 45415 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 4.1287918003934154e-05, |
|
"loss": 0.023, |
|
"step": 45708 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"eval_loss": 0.043395720422267914, |
|
"eval_max_distance": 49, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.4284, |
|
"eval_samples_per_second": 112.907, |
|
"eval_steps_per_second": 2.258, |
|
"step": 45708 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 4.103513131103979e-05, |
|
"loss": 0.023, |
|
"step": 46001 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_loss": 0.04368527978658676, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.3885, |
|
"eval_samples_per_second": 113.933, |
|
"eval_steps_per_second": 2.279, |
|
"step": 46001 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 4.078234461814543e-05, |
|
"loss": 0.023, |
|
"step": 46294 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_loss": 0.04507759213447571, |
|
"eval_max_distance": 56, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.6509, |
|
"eval_samples_per_second": 107.505, |
|
"eval_steps_per_second": 2.15, |
|
"step": 46294 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 4.052955792525106e-05, |
|
"loss": 0.0227, |
|
"step": 46587 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"eval_loss": 0.045307405292987823, |
|
"eval_max_distance": 43, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.908, |
|
"eval_samples_per_second": 127.944, |
|
"eval_steps_per_second": 2.559, |
|
"step": 46587 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.02767712323567e-05, |
|
"loss": 0.0616, |
|
"step": 46880 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.04507147893309593, |
|
"eval_max_distance": 60, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.1024, |
|
"eval_samples_per_second": 97.993, |
|
"eval_steps_per_second": 1.96, |
|
"step": 46880 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 4.002398453946233e-05, |
|
"loss": 0.0581, |
|
"step": 47173 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"eval_loss": 0.04314437881112099, |
|
"eval_max_distance": 31, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.525, |
|
"eval_samples_per_second": 90.497, |
|
"eval_steps_per_second": 1.81, |
|
"step": 47173 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 3.977119784656797e-05, |
|
"loss": 0.0449, |
|
"step": 47466 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"eval_loss": 0.043075479567050934, |
|
"eval_max_distance": 30, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.2169, |
|
"eval_samples_per_second": 80.426, |
|
"eval_steps_per_second": 1.609, |
|
"step": 47466 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 3.9518411153673604e-05, |
|
"loss": 0.0398, |
|
"step": 47759 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_loss": 0.046902794390916824, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.9712, |
|
"eval_samples_per_second": 83.735, |
|
"eval_steps_per_second": 1.675, |
|
"step": 47759 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.926562446077924e-05, |
|
"loss": 0.0379, |
|
"step": 48052 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_loss": 0.046891409903764725, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.5218, |
|
"eval_samples_per_second": 76.666, |
|
"eval_steps_per_second": 1.533, |
|
"step": 48052 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 3.9012837767884876e-05, |
|
"loss": 0.0365, |
|
"step": 48345 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"eval_loss": 0.04368309676647186, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.1085, |
|
"eval_samples_per_second": 81.853, |
|
"eval_steps_per_second": 1.637, |
|
"step": 48345 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 3.876005107499051e-05, |
|
"loss": 0.0362, |
|
"step": 48638 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"eval_loss": 0.049253445118665695, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.9151, |
|
"eval_samples_per_second": 72.306, |
|
"eval_steps_per_second": 1.446, |
|
"step": 48638 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 3.850726438209615e-05, |
|
"loss": 0.036, |
|
"step": 48931 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_loss": 0.04533643275499344, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.4033, |
|
"eval_samples_per_second": 78.084, |
|
"eval_steps_per_second": 1.562, |
|
"step": 48931 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3.825447768920178e-05, |
|
"loss": 0.0359, |
|
"step": 49224 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_loss": 0.04666188731789589, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 5.9118, |
|
"eval_samples_per_second": 84.577, |
|
"eval_steps_per_second": 1.692, |
|
"step": 49224 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 3.800169099630742e-05, |
|
"loss": 0.0356, |
|
"step": 49517 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"eval_loss": 0.04655005782842636, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 6.8551, |
|
"eval_samples_per_second": 72.938, |
|
"eval_steps_per_second": 1.459, |
|
"step": 49517 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 3.7748904303413054e-05, |
|
"loss": 0.0357, |
|
"step": 49810 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"eval_loss": 0.04722580313682556, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 7.2929, |
|
"eval_samples_per_second": 68.56, |
|
"eval_steps_per_second": 1.371, |
|
"step": 49810 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 3.749611761051869e-05, |
|
"loss": 0.0357, |
|
"step": 50103 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_loss": 0.04763193428516388, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 7.0996, |
|
"eval_samples_per_second": 70.426, |
|
"eval_steps_per_second": 1.409, |
|
"step": 50103 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 3.7243330917624326e-05, |
|
"loss": 0.0357, |
|
"step": 50396 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"eval_loss": 0.04951860010623932, |
|
"eval_max_distance": 45, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 7.0362, |
|
"eval_samples_per_second": 71.061, |
|
"eval_steps_per_second": 1.421, |
|
"step": 50396 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 3.699054422472996e-05, |
|
"loss": 0.0358, |
|
"step": 50689 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_loss": 0.040653664618730545, |
|
"eval_max_distance": 41, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.2049, |
|
"eval_samples_per_second": 118.908, |
|
"eval_steps_per_second": 2.378, |
|
"step": 50689 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 3.67377575318356e-05, |
|
"loss": 0.0349, |
|
"step": 50982 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_loss": 0.041064370423555374, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 5.2421, |
|
"eval_samples_per_second": 95.382, |
|
"eval_steps_per_second": 1.908, |
|
"step": 50982 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 3.6484970838941226e-05, |
|
"loss": 0.0337, |
|
"step": 51275 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_loss": 0.040478698909282684, |
|
"eval_max_distance": 43, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.7434, |
|
"eval_samples_per_second": 105.41, |
|
"eval_steps_per_second": 2.108, |
|
"step": 51275 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 3.623218414604687e-05, |
|
"loss": 0.0325, |
|
"step": 51568 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_loss": 0.04032284766435623, |
|
"eval_max_distance": 42, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 5.0822, |
|
"eval_samples_per_second": 98.382, |
|
"eval_steps_per_second": 1.968, |
|
"step": 51568 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 3.5979397453152505e-05, |
|
"loss": 0.0319, |
|
"step": 51861 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_loss": 0.04050859808921814, |
|
"eval_max_distance": 58, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.3025, |
|
"eval_samples_per_second": 116.21, |
|
"eval_steps_per_second": 2.324, |
|
"step": 51861 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 3.572661076025814e-05, |
|
"loss": 0.0308, |
|
"step": 52154 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"eval_loss": 0.04165998846292496, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.9661, |
|
"eval_samples_per_second": 126.068, |
|
"eval_steps_per_second": 2.521, |
|
"step": 52154 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 3.5473824067363776e-05, |
|
"loss": 0.0303, |
|
"step": 52447 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"eval_loss": 0.04113217815756798, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.0769, |
|
"eval_samples_per_second": 122.641, |
|
"eval_steps_per_second": 2.453, |
|
"step": 52447 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 3.5221037374469405e-05, |
|
"loss": 0.0296, |
|
"step": 52740 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_loss": 0.04085549712181091, |
|
"eval_max_distance": 47, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 4.8819, |
|
"eval_samples_per_second": 102.418, |
|
"eval_steps_per_second": 2.048, |
|
"step": 52740 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 3.496825068157504e-05, |
|
"loss": 0.0284, |
|
"step": 53033 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"eval_loss": 0.04045039415359497, |
|
"eval_max_distance": 50, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.7887, |
|
"eval_samples_per_second": 131.972, |
|
"eval_steps_per_second": 2.639, |
|
"step": 53033 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 3.471546398868068e-05, |
|
"loss": 0.0283, |
|
"step": 53326 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"eval_loss": 0.04034719988703728, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.8596, |
|
"eval_samples_per_second": 129.547, |
|
"eval_steps_per_second": 2.591, |
|
"step": 53326 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 7.513661202185792e-05, |
|
"loss": 0.8484, |
|
"step": 53436 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_loss": 0.5610889792442322, |
|
"eval_max_distance": 118, |
|
"eval_mean_distance": 8, |
|
"eval_runtime": 6.924, |
|
"eval_samples_per_second": 72.212, |
|
"eval_steps_per_second": 1.444, |
|
"step": 53436 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 0.000575136612021858, |
|
"loss": 0.1562, |
|
"step": 54168 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_loss": 0.14375992119312286, |
|
"eval_max_distance": 104, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 6.298, |
|
"eval_samples_per_second": 79.39, |
|
"eval_steps_per_second": 1.588, |
|
"step": 54168 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.0009992407300035892, |
|
"loss": 0.0793, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_loss": 0.19094187021255493, |
|
"eval_max_distance": 119, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 7.1903, |
|
"eval_samples_per_second": 69.538, |
|
"eval_steps_per_second": 1.391, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 0.0009941881333002016, |
|
"loss": 0.087, |
|
"step": 55632 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_loss": 0.17700538039207458, |
|
"eval_max_distance": 119, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 5.325, |
|
"eval_samples_per_second": 93.896, |
|
"eval_steps_per_second": 1.878, |
|
"step": 55632 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 0.0009891355365968138, |
|
"loss": 0.0896, |
|
"step": 56364 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"eval_loss": 0.259384423494339, |
|
"eval_max_distance": 144, |
|
"eval_mean_distance": 7, |
|
"eval_runtime": 6.0715, |
|
"eval_samples_per_second": 82.352, |
|
"eval_steps_per_second": 1.647, |
|
"step": 56364 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 0.0009840829398934262, |
|
"loss": 0.091, |
|
"step": 57096 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"eval_loss": 0.41866716742515564, |
|
"eval_max_distance": 162, |
|
"eval_mean_distance": 10, |
|
"eval_runtime": 7.1111, |
|
"eval_samples_per_second": 70.313, |
|
"eval_steps_per_second": 1.406, |
|
"step": 57096 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 0.0009790303431900383, |
|
"loss": 0.1024, |
|
"step": 57828 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"eval_loss": 0.6052113771438599, |
|
"eval_max_distance": 172, |
|
"eval_mean_distance": 15, |
|
"eval_runtime": 7.9319, |
|
"eval_samples_per_second": 63.036, |
|
"eval_steps_per_second": 1.261, |
|
"step": 57828 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0009739777464866507, |
|
"loss": 0.1156, |
|
"step": 58560 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.14988236129283905, |
|
"eval_max_distance": 98, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 5.0312, |
|
"eval_samples_per_second": 99.38, |
|
"eval_steps_per_second": 1.988, |
|
"step": 58560 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 0.000968925149783263, |
|
"loss": 0.0924, |
|
"step": 59292 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"eval_loss": 0.10364539921283722, |
|
"eval_max_distance": 65, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.385, |
|
"eval_samples_per_second": 114.025, |
|
"eval_steps_per_second": 2.28, |
|
"step": 59292 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 0.0009638725530798753, |
|
"loss": 0.0454, |
|
"step": 60024 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_loss": 0.07579351961612701, |
|
"eval_max_distance": 77, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.1809, |
|
"eval_samples_per_second": 119.592, |
|
"eval_steps_per_second": 2.392, |
|
"step": 60024 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.0009588199563764874, |
|
"loss": 0.0407, |
|
"step": 60756 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"eval_loss": 0.07706684619188309, |
|
"eval_max_distance": 64, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 2.884, |
|
"eval_samples_per_second": 173.37, |
|
"eval_steps_per_second": 3.467, |
|
"step": 60756 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.0009537673596730998, |
|
"loss": 0.0404, |
|
"step": 61488 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_loss": 0.07059109956026077, |
|
"eval_max_distance": 82, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.407, |
|
"eval_samples_per_second": 146.757, |
|
"eval_steps_per_second": 2.935, |
|
"step": 61488 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 0.0009487147629697121, |
|
"loss": 0.0397, |
|
"step": 62220 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"eval_loss": 0.07464081048965454, |
|
"eval_max_distance": 95, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.498, |
|
"eval_samples_per_second": 142.939, |
|
"eval_steps_per_second": 2.859, |
|
"step": 62220 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 0.0009436621662663243, |
|
"loss": 0.0395, |
|
"step": 62952 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"eval_loss": 0.07409149408340454, |
|
"eval_max_distance": 85, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.192, |
|
"eval_samples_per_second": 119.275, |
|
"eval_steps_per_second": 2.385, |
|
"step": 62952 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.0009386095695629366, |
|
"loss": 0.0396, |
|
"step": 63684 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_loss": 0.08215318620204926, |
|
"eval_max_distance": 86, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.7115, |
|
"eval_samples_per_second": 106.123, |
|
"eval_steps_per_second": 2.122, |
|
"step": 63684 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.0009335569728595489, |
|
"loss": 0.0388, |
|
"step": 64416 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_loss": 0.07010287791490555, |
|
"eval_max_distance": 100, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 4.6721, |
|
"eval_samples_per_second": 107.019, |
|
"eval_steps_per_second": 2.14, |
|
"step": 64416 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 0.0009285043761561612, |
|
"loss": 0.038, |
|
"step": 65148 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"eval_loss": 0.07180768251419067, |
|
"eval_max_distance": 111, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 4.9954, |
|
"eval_samples_per_second": 100.093, |
|
"eval_steps_per_second": 2.002, |
|
"step": 65148 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 0.0009234517794527735, |
|
"loss": 0.0388, |
|
"step": 65880 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_loss": 0.08386632055044174, |
|
"eval_max_distance": 102, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 4.8999, |
|
"eval_samples_per_second": 102.042, |
|
"eval_steps_per_second": 2.041, |
|
"step": 65880 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 0.0009183991827493857, |
|
"loss": 0.0392, |
|
"step": 66612 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"eval_loss": 0.08535018563270569, |
|
"eval_max_distance": 80, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 4.809, |
|
"eval_samples_per_second": 103.972, |
|
"eval_steps_per_second": 2.079, |
|
"step": 66612 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 0.000913346586045998, |
|
"loss": 0.0387, |
|
"step": 67344 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"eval_loss": 0.10104835033416748, |
|
"eval_max_distance": 131, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 4.431, |
|
"eval_samples_per_second": 112.841, |
|
"eval_steps_per_second": 2.257, |
|
"step": 67344 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 0.0009082939893426102, |
|
"loss": 0.0393, |
|
"step": 68076 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_loss": 0.0824475884437561, |
|
"eval_max_distance": 133, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 5.444, |
|
"eval_samples_per_second": 91.844, |
|
"eval_steps_per_second": 1.837, |
|
"step": 68076 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 0.0009032413926392225, |
|
"loss": 0.0393, |
|
"step": 68808 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"eval_loss": 0.09969473630189896, |
|
"eval_max_distance": 127, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 5.366, |
|
"eval_samples_per_second": 93.179, |
|
"eval_steps_per_second": 1.864, |
|
"step": 68808 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 0.0008981887959358348, |
|
"loss": 0.0392, |
|
"step": 69540 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_loss": 0.10442204028367996, |
|
"eval_max_distance": 128, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 5.5386, |
|
"eval_samples_per_second": 90.276, |
|
"eval_steps_per_second": 1.806, |
|
"step": 69540 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.000893136199232447, |
|
"loss": 0.0404, |
|
"step": 70272 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_loss": 0.10716131329536438, |
|
"eval_max_distance": 146, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 5.5122, |
|
"eval_samples_per_second": 90.708, |
|
"eval_steps_per_second": 1.814, |
|
"step": 70272 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 0.0008880836025290594, |
|
"loss": 0.0432, |
|
"step": 71004 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_loss": 0.12576422095298767, |
|
"eval_max_distance": 127, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 6.7417, |
|
"eval_samples_per_second": 74.166, |
|
"eval_steps_per_second": 1.483, |
|
"step": 71004 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 0.0008830310058256716, |
|
"loss": 0.048, |
|
"step": 71736 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_loss": 0.2953876852989197, |
|
"eval_max_distance": 152, |
|
"eval_mean_distance": 7, |
|
"eval_runtime": 7.5184, |
|
"eval_samples_per_second": 66.503, |
|
"eval_steps_per_second": 1.33, |
|
"step": 71736 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.0008779784091222839, |
|
"loss": 0.0527, |
|
"step": 72468 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_loss": 0.3366003632545471, |
|
"eval_max_distance": 143, |
|
"eval_mean_distance": 9, |
|
"eval_runtime": 8.2771, |
|
"eval_samples_per_second": 60.408, |
|
"eval_steps_per_second": 1.208, |
|
"step": 72468 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0008729258124188961, |
|
"loss": 0.0751, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.06711767613887787, |
|
"eval_max_distance": 91, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.714, |
|
"eval_samples_per_second": 106.068, |
|
"eval_steps_per_second": 2.121, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 0.0008678732157155085, |
|
"loss": 0.0586, |
|
"step": 73932 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_loss": 0.10157773643732071, |
|
"eval_max_distance": 58, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.6629, |
|
"eval_samples_per_second": 136.503, |
|
"eval_steps_per_second": 2.73, |
|
"step": 73932 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 0.0008628206190121207, |
|
"loss": 0.0295, |
|
"step": 74664 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"eval_loss": 0.10495835542678833, |
|
"eval_max_distance": 34, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.5918, |
|
"eval_samples_per_second": 139.208, |
|
"eval_steps_per_second": 2.784, |
|
"step": 74664 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 0.000857768022308733, |
|
"loss": 0.0253, |
|
"step": 75396 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_loss": 0.10891541838645935, |
|
"eval_max_distance": 64, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.7467, |
|
"eval_samples_per_second": 133.451, |
|
"eval_steps_per_second": 2.669, |
|
"step": 75396 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 0.0008527154256053453, |
|
"loss": 0.0245, |
|
"step": 76128 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"eval_loss": 0.08191870152950287, |
|
"eval_max_distance": 57, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.751, |
|
"eval_samples_per_second": 133.296, |
|
"eval_steps_per_second": 2.666, |
|
"step": 76128 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 0.0008476628289019576, |
|
"loss": 0.0238, |
|
"step": 76860 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"eval_loss": 0.08061353117227554, |
|
"eval_max_distance": 98, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.894, |
|
"eval_samples_per_second": 128.402, |
|
"eval_steps_per_second": 2.568, |
|
"step": 76860 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 0.0008426102321985698, |
|
"loss": 0.0236, |
|
"step": 77592 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"eval_loss": 0.07853155583143234, |
|
"eval_max_distance": 62, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.7131, |
|
"eval_samples_per_second": 134.66, |
|
"eval_steps_per_second": 2.693, |
|
"step": 77592 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 0.0008375576354951821, |
|
"loss": 0.0236, |
|
"step": 78324 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"eval_loss": 0.07758867740631104, |
|
"eval_max_distance": 65, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.8414, |
|
"eval_samples_per_second": 130.161, |
|
"eval_steps_per_second": 2.603, |
|
"step": 78324 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 0.0008325050387917944, |
|
"loss": 0.0241, |
|
"step": 79056 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"eval_loss": 0.06412464380264282, |
|
"eval_max_distance": 106, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.257, |
|
"eval_samples_per_second": 117.452, |
|
"eval_steps_per_second": 2.349, |
|
"step": 79056 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 0.0008274524420884067, |
|
"loss": 0.0237, |
|
"step": 79788 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"eval_loss": 0.053821686655282974, |
|
"eval_max_distance": 83, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.7649, |
|
"eval_samples_per_second": 132.805, |
|
"eval_steps_per_second": 2.656, |
|
"step": 79788 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 0.0008223998453850188, |
|
"loss": 0.0241, |
|
"step": 80520 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"eval_loss": 0.06780679523944855, |
|
"eval_max_distance": 87, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.095, |
|
"eval_samples_per_second": 122.101, |
|
"eval_steps_per_second": 2.442, |
|
"step": 80520 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 0.0008173472486816312, |
|
"loss": 0.0239, |
|
"step": 81252 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"eval_loss": 0.051452018320560455, |
|
"eval_max_distance": 92, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.548, |
|
"eval_samples_per_second": 109.939, |
|
"eval_steps_per_second": 2.199, |
|
"step": 81252 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 0.0008122946519782435, |
|
"loss": 0.0249, |
|
"step": 81984 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_loss": 0.06307797878980637, |
|
"eval_max_distance": 112, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 4.659, |
|
"eval_samples_per_second": 107.32, |
|
"eval_steps_per_second": 2.146, |
|
"step": 81984 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 0.0008072420552748558, |
|
"loss": 0.0248, |
|
"step": 82716 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"eval_loss": 0.06688910722732544, |
|
"eval_max_distance": 130, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 4.818, |
|
"eval_samples_per_second": 103.777, |
|
"eval_steps_per_second": 2.076, |
|
"step": 82716 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 0.000802189458571468, |
|
"loss": 0.025, |
|
"step": 83448 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"eval_loss": 0.06885194033384323, |
|
"eval_max_distance": 137, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 4.755, |
|
"eval_samples_per_second": 105.152, |
|
"eval_steps_per_second": 2.103, |
|
"step": 83448 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 0.0007971368618680803, |
|
"loss": 0.0252, |
|
"step": 84180 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"eval_loss": 0.08273730427026749, |
|
"eval_max_distance": 126, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 5.2933, |
|
"eval_samples_per_second": 94.46, |
|
"eval_steps_per_second": 1.889, |
|
"step": 84180 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.0007920842651646926, |
|
"loss": 0.0265, |
|
"step": 84912 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"eval_loss": 0.14016655087471008, |
|
"eval_max_distance": 153, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 5.7979, |
|
"eval_samples_per_second": 86.238, |
|
"eval_steps_per_second": 1.725, |
|
"step": 84912 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 0.0007870316684613049, |
|
"loss": 0.0281, |
|
"step": 85644 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"eval_loss": 0.11726028472185135, |
|
"eval_max_distance": 147, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 5.5217, |
|
"eval_samples_per_second": 90.552, |
|
"eval_steps_per_second": 1.811, |
|
"step": 85644 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 0.0007819790717579171, |
|
"loss": 0.0309, |
|
"step": 86376 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"eval_loss": 0.19342409074306488, |
|
"eval_max_distance": 159, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 6.4908, |
|
"eval_samples_per_second": 77.032, |
|
"eval_steps_per_second": 1.541, |
|
"step": 86376 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 0.0007769264750545294, |
|
"loss": 0.0353, |
|
"step": 87108 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"eval_loss": 0.21934360265731812, |
|
"eval_max_distance": 163, |
|
"eval_mean_distance": 6, |
|
"eval_runtime": 7.0293, |
|
"eval_samples_per_second": 71.131, |
|
"eval_steps_per_second": 1.423, |
|
"step": 87108 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.0007718738783511417, |
|
"loss": 0.0567, |
|
"step": 87840 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.05241122096776962, |
|
"eval_max_distance": 75, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.3556, |
|
"eval_samples_per_second": 114.796, |
|
"eval_steps_per_second": 2.296, |
|
"step": 87840 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.000766821281647754, |
|
"loss": 0.0428, |
|
"step": 88572 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"eval_loss": 0.10328952223062515, |
|
"eval_max_distance": 41, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.055, |
|
"eval_samples_per_second": 163.667, |
|
"eval_steps_per_second": 3.273, |
|
"step": 88572 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.0007617686849443662, |
|
"loss": 0.0216, |
|
"step": 89304 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_loss": 0.13436926901340485, |
|
"eval_max_distance": 64, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.5469, |
|
"eval_samples_per_second": 140.969, |
|
"eval_steps_per_second": 2.819, |
|
"step": 89304 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 0.0007567160882409785, |
|
"loss": 0.0187, |
|
"step": 90036 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"eval_loss": 0.08495381474494934, |
|
"eval_max_distance": 64, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.4669, |
|
"eval_samples_per_second": 144.221, |
|
"eval_steps_per_second": 2.884, |
|
"step": 90036 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 0.0007516634915375909, |
|
"loss": 0.0175, |
|
"step": 90768 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_loss": 0.05331624671816826, |
|
"eval_max_distance": 65, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.4958, |
|
"eval_samples_per_second": 143.031, |
|
"eval_steps_per_second": 2.861, |
|
"step": 90768 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.000746610894834203, |
|
"loss": 0.0171, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"eval_loss": 0.05643443390727043, |
|
"eval_max_distance": 77, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.007, |
|
"eval_samples_per_second": 124.782, |
|
"eval_steps_per_second": 2.496, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 0.0007415582981308153, |
|
"loss": 0.0163, |
|
"step": 92232 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"eval_loss": 0.05690987408161163, |
|
"eval_max_distance": 74, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.2524, |
|
"eval_samples_per_second": 153.733, |
|
"eval_steps_per_second": 3.075, |
|
"step": 92232 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 0.0007365057014274276, |
|
"loss": 0.0166, |
|
"step": 92964 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"eval_loss": 0.060303423553705215, |
|
"eval_max_distance": 70, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.2886, |
|
"eval_samples_per_second": 152.041, |
|
"eval_steps_per_second": 3.041, |
|
"step": 92964 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.00073145310472404, |
|
"loss": 0.0166, |
|
"step": 93696 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 0.045122452080249786, |
|
"eval_max_distance": 90, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.1433, |
|
"eval_samples_per_second": 120.676, |
|
"eval_steps_per_second": 2.414, |
|
"step": 93696 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.0007264005080206521, |
|
"loss": 0.0167, |
|
"step": 94428 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"eval_loss": 0.045794181525707245, |
|
"eval_max_distance": 117, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.2035, |
|
"eval_samples_per_second": 118.949, |
|
"eval_steps_per_second": 2.379, |
|
"step": 94428 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 0.0007213479113172644, |
|
"loss": 0.0169, |
|
"step": 95160 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"eval_loss": 0.06077966466546059, |
|
"eval_max_distance": 94, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.8906, |
|
"eval_samples_per_second": 128.513, |
|
"eval_steps_per_second": 2.57, |
|
"step": 95160 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.0007162953146138768, |
|
"loss": 0.0169, |
|
"step": 95892 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"eval_loss": 0.061703383922576904, |
|
"eval_max_distance": 106, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.324, |
|
"eval_samples_per_second": 115.634, |
|
"eval_steps_per_second": 2.313, |
|
"step": 95892 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.000711242717910489, |
|
"loss": 0.017, |
|
"step": 96624 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 0.06622283160686493, |
|
"eval_max_distance": 130, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 4.818, |
|
"eval_samples_per_second": 103.777, |
|
"eval_steps_per_second": 2.076, |
|
"step": 96624 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 0.0007061901212071012, |
|
"loss": 0.0175, |
|
"step": 97356 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"eval_loss": 0.05441684648394585, |
|
"eval_max_distance": 134, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.626, |
|
"eval_samples_per_second": 108.084, |
|
"eval_steps_per_second": 2.162, |
|
"step": 97356 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 0.0007011375245037135, |
|
"loss": 0.0174, |
|
"step": 98088 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"eval_loss": 0.055900994688272476, |
|
"eval_max_distance": 124, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.5758, |
|
"eval_samples_per_second": 109.272, |
|
"eval_steps_per_second": 2.185, |
|
"step": 98088 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 0.0006960849278003259, |
|
"loss": 0.0183, |
|
"step": 98820 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"eval_loss": 0.0677228718996048, |
|
"eval_max_distance": 131, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 4.9539, |
|
"eval_samples_per_second": 100.931, |
|
"eval_steps_per_second": 2.019, |
|
"step": 98820 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 0.0006910323310969381, |
|
"loss": 0.0184, |
|
"step": 99552 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_loss": 0.07514572888612747, |
|
"eval_max_distance": 127, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 5.1829, |
|
"eval_samples_per_second": 96.47, |
|
"eval_steps_per_second": 1.929, |
|
"step": 99552 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 0.0006859797343935503, |
|
"loss": 0.0198, |
|
"step": 100284 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"eval_loss": 0.08426449447870255, |
|
"eval_max_distance": 142, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 5.1188, |
|
"eval_samples_per_second": 97.679, |
|
"eval_steps_per_second": 1.954, |
|
"step": 100284 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 0.0006809271376901627, |
|
"loss": 0.0212, |
|
"step": 101016 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"eval_loss": 0.16564741730690002, |
|
"eval_max_distance": 149, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 5.7331, |
|
"eval_samples_per_second": 87.213, |
|
"eval_steps_per_second": 1.744, |
|
"step": 101016 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 0.0006758745409867749, |
|
"loss": 0.0246, |
|
"step": 101748 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"eval_loss": 0.17887726426124573, |
|
"eval_max_distance": 151, |
|
"eval_mean_distance": 5, |
|
"eval_runtime": 6.705, |
|
"eval_samples_per_second": 74.571, |
|
"eval_steps_per_second": 1.491, |
|
"step": 101748 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.0006708219442833872, |
|
"loss": 0.0468, |
|
"step": 102480 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.08162112534046173, |
|
"eval_max_distance": 58, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.066, |
|
"eval_samples_per_second": 122.971, |
|
"eval_steps_per_second": 2.459, |
|
"step": 102480 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 0.0006657693475799994, |
|
"loss": 0.0334, |
|
"step": 103212 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"eval_loss": 0.13462159037590027, |
|
"eval_max_distance": 44, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.4851, |
|
"eval_samples_per_second": 143.47, |
|
"eval_steps_per_second": 2.869, |
|
"step": 103212 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.0006607167508766118, |
|
"loss": 0.0171, |
|
"step": 103944 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_loss": 0.07761073857545853, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.2222, |
|
"eval_samples_per_second": 155.174, |
|
"eval_steps_per_second": 3.103, |
|
"step": 103944 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 0.000655664154173224, |
|
"loss": 0.0142, |
|
"step": 104676 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"eval_loss": 0.08971526473760605, |
|
"eval_max_distance": 51, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.1513, |
|
"eval_samples_per_second": 158.665, |
|
"eval_steps_per_second": 3.173, |
|
"step": 104676 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 0.0006506115574698363, |
|
"loss": 0.0129, |
|
"step": 105408 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"eval_loss": 0.05487065017223358, |
|
"eval_max_distance": 55, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.6564, |
|
"eval_samples_per_second": 136.748, |
|
"eval_steps_per_second": 2.735, |
|
"step": 105408 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 0.0006455589607664486, |
|
"loss": 0.013, |
|
"step": 106140 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_loss": 0.05428076535463333, |
|
"eval_max_distance": 66, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.3832, |
|
"eval_samples_per_second": 147.791, |
|
"eval_steps_per_second": 2.956, |
|
"step": 106140 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 0.0006405063640630608, |
|
"loss": 0.0122, |
|
"step": 106872 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"eval_loss": 0.05511023849248886, |
|
"eval_max_distance": 80, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.5886, |
|
"eval_samples_per_second": 139.331, |
|
"eval_steps_per_second": 2.787, |
|
"step": 106872 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 0.0006354537673596731, |
|
"loss": 0.0121, |
|
"step": 107604 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"eval_loss": 0.07942553609609604, |
|
"eval_max_distance": 70, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.2062, |
|
"eval_samples_per_second": 155.946, |
|
"eval_steps_per_second": 3.119, |
|
"step": 107604 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 0.0006304011706562855, |
|
"loss": 0.0123, |
|
"step": 108336 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"eval_loss": 0.04670649766921997, |
|
"eval_max_distance": 76, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.2736, |
|
"eval_samples_per_second": 152.737, |
|
"eval_steps_per_second": 3.055, |
|
"step": 108336 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 0.0006253485739528977, |
|
"loss": 0.0122, |
|
"step": 109068 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"eval_loss": 0.04738261550664902, |
|
"eval_max_distance": 73, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.904, |
|
"eval_samples_per_second": 128.074, |
|
"eval_steps_per_second": 2.561, |
|
"step": 109068 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 0.0006202959772495099, |
|
"loss": 0.0123, |
|
"step": 109800 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_loss": 0.04781508818268776, |
|
"eval_max_distance": 104, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.681, |
|
"eval_samples_per_second": 135.831, |
|
"eval_steps_per_second": 2.717, |
|
"step": 109800 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 0.0006152433805461222, |
|
"loss": 0.012, |
|
"step": 110532 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_loss": 0.05633458122611046, |
|
"eval_max_distance": 112, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.3082, |
|
"eval_samples_per_second": 116.057, |
|
"eval_steps_per_second": 2.321, |
|
"step": 110532 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 0.0006101907838427346, |
|
"loss": 0.0124, |
|
"step": 111264 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"eval_loss": 0.04918990656733513, |
|
"eval_max_distance": 115, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.1629, |
|
"eval_samples_per_second": 120.11, |
|
"eval_steps_per_second": 2.402, |
|
"step": 111264 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 0.0006051381871393467, |
|
"loss": 0.0129, |
|
"step": 111996 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"eval_loss": 0.05522555857896805, |
|
"eval_max_distance": 86, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.1427, |
|
"eval_samples_per_second": 120.694, |
|
"eval_steps_per_second": 2.414, |
|
"step": 111996 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 0.000600085590435959, |
|
"loss": 0.0129, |
|
"step": 112728 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"eval_loss": 0.05691719055175781, |
|
"eval_max_distance": 130, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.176, |
|
"eval_samples_per_second": 119.731, |
|
"eval_steps_per_second": 2.395, |
|
"step": 112728 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 0.0005950329937325714, |
|
"loss": 0.0128, |
|
"step": 113460 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"eval_loss": 0.07056962698698044, |
|
"eval_max_distance": 106, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 4.6735, |
|
"eval_samples_per_second": 106.987, |
|
"eval_steps_per_second": 2.14, |
|
"step": 113460 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 0.0005899803970291835, |
|
"loss": 0.0133, |
|
"step": 114192 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"eval_loss": 0.0818600282073021, |
|
"eval_max_distance": 145, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 4.9214, |
|
"eval_samples_per_second": 101.596, |
|
"eval_steps_per_second": 2.032, |
|
"step": 114192 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 0.0005849278003257958, |
|
"loss": 0.0136, |
|
"step": 114924 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"eval_loss": 0.10967979580163956, |
|
"eval_max_distance": 141, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 5.3911, |
|
"eval_samples_per_second": 92.745, |
|
"eval_steps_per_second": 1.855, |
|
"step": 114924 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 0.0005798752036224081, |
|
"loss": 0.015, |
|
"step": 115656 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"eval_loss": 0.1418592780828476, |
|
"eval_max_distance": 162, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 5.9144, |
|
"eval_samples_per_second": 84.54, |
|
"eval_steps_per_second": 1.691, |
|
"step": 115656 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 0.0005748226069190205, |
|
"loss": 0.0174, |
|
"step": 116388 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"eval_loss": 0.1534065157175064, |
|
"eval_max_distance": 154, |
|
"eval_mean_distance": 4, |
|
"eval_runtime": 5.9441, |
|
"eval_samples_per_second": 84.117, |
|
"eval_steps_per_second": 1.682, |
|
"step": 116388 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0005697700102156326, |
|
"loss": 0.0404, |
|
"step": 117120 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.06091161444783211, |
|
"eval_max_distance": 62, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.5301, |
|
"eval_samples_per_second": 141.639, |
|
"eval_steps_per_second": 2.833, |
|
"step": 117120 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 0.0005647174135122449, |
|
"loss": 0.0268, |
|
"step": 117852 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"eval_loss": 0.10339893400669098, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.087, |
|
"eval_samples_per_second": 161.969, |
|
"eval_steps_per_second": 3.239, |
|
"step": 117852 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 0.0005596648168088573, |
|
"loss": 0.0137, |
|
"step": 118584 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"eval_loss": 0.07406046241521835, |
|
"eval_max_distance": 45, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.1741, |
|
"eval_samples_per_second": 157.523, |
|
"eval_steps_per_second": 3.15, |
|
"step": 118584 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 0.0005546122201054696, |
|
"loss": 0.011, |
|
"step": 119316 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"eval_loss": 0.08871261775493622, |
|
"eval_max_distance": 66, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.292, |
|
"eval_samples_per_second": 151.884, |
|
"eval_steps_per_second": 3.038, |
|
"step": 119316 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 0.0005495596234020817, |
|
"loss": 0.0099, |
|
"step": 120048 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"eval_loss": 0.05957801640033722, |
|
"eval_max_distance": 59, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.2017, |
|
"eval_samples_per_second": 156.166, |
|
"eval_steps_per_second": 3.123, |
|
"step": 120048 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 0.0005445070266986941, |
|
"loss": 0.0096, |
|
"step": 120780 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"eval_loss": 0.0444614440202713, |
|
"eval_max_distance": 62, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.9626, |
|
"eval_samples_per_second": 168.773, |
|
"eval_steps_per_second": 3.375, |
|
"step": 120780 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 0.0005394544299953064, |
|
"loss": 0.0091, |
|
"step": 121512 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"eval_loss": 0.04867273196578026, |
|
"eval_max_distance": 62, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.2803, |
|
"eval_samples_per_second": 152.425, |
|
"eval_steps_per_second": 3.049, |
|
"step": 121512 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 0.0005344018332919186, |
|
"loss": 0.0088, |
|
"step": 122244 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"eval_loss": 0.03973795846104622, |
|
"eval_max_distance": 73, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.6655, |
|
"eval_samples_per_second": 136.408, |
|
"eval_steps_per_second": 2.728, |
|
"step": 122244 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 0.0005293492365885308, |
|
"loss": 0.009, |
|
"step": 122976 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"eval_loss": 0.03982974588871002, |
|
"eval_max_distance": 77, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.8429, |
|
"eval_samples_per_second": 130.109, |
|
"eval_steps_per_second": 2.602, |
|
"step": 122976 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 0.0005242966398851432, |
|
"loss": 0.009, |
|
"step": 123708 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"eval_loss": 0.04630805924534798, |
|
"eval_max_distance": 57, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.7474, |
|
"eval_samples_per_second": 133.426, |
|
"eval_steps_per_second": 2.669, |
|
"step": 123708 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 0.0005192440431817555, |
|
"loss": 0.0091, |
|
"step": 124440 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"eval_loss": 0.0445212796330452, |
|
"eval_max_distance": 100, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.8615, |
|
"eval_samples_per_second": 129.483, |
|
"eval_steps_per_second": 2.59, |
|
"step": 124440 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 0.0005141914464783677, |
|
"loss": 0.0085, |
|
"step": 125172 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"eval_loss": 0.045358140021562576, |
|
"eval_max_distance": 93, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.689, |
|
"eval_samples_per_second": 135.538, |
|
"eval_steps_per_second": 2.711, |
|
"step": 125172 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 0.00050913884977498, |
|
"loss": 0.0089, |
|
"step": 125904 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"eval_loss": 0.05022185668349266, |
|
"eval_max_distance": 108, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.1555, |
|
"eval_samples_per_second": 120.323, |
|
"eval_steps_per_second": 2.406, |
|
"step": 125904 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 0.0005040862530715923, |
|
"loss": 0.0092, |
|
"step": 126636 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"eval_loss": 0.04238352179527283, |
|
"eval_max_distance": 94, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.121, |
|
"eval_samples_per_second": 121.33, |
|
"eval_steps_per_second": 2.427, |
|
"step": 126636 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 0.0004990336563682045, |
|
"loss": 0.009, |
|
"step": 127368 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"eval_loss": 0.05611519515514374, |
|
"eval_max_distance": 86, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.224, |
|
"eval_samples_per_second": 118.372, |
|
"eval_steps_per_second": 2.367, |
|
"step": 127368 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 0.0004939810596648168, |
|
"loss": 0.009, |
|
"step": 128100 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"eval_loss": 0.057163383811712265, |
|
"eval_max_distance": 94, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.9408, |
|
"eval_samples_per_second": 101.198, |
|
"eval_steps_per_second": 2.024, |
|
"step": 128100 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 0.0004889284629614291, |
|
"loss": 0.0092, |
|
"step": 128832 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"eval_loss": 0.06296004354953766, |
|
"eval_max_distance": 113, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.632, |
|
"eval_samples_per_second": 107.945, |
|
"eval_steps_per_second": 2.159, |
|
"step": 128832 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 0.00048387586625804136, |
|
"loss": 0.0096, |
|
"step": 129564 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"eval_loss": 0.08643154799938202, |
|
"eval_max_distance": 127, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 5.004, |
|
"eval_samples_per_second": 99.921, |
|
"eval_steps_per_second": 1.998, |
|
"step": 129564 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 0.0004788232695546537, |
|
"loss": 0.0106, |
|
"step": 130296 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"eval_loss": 0.09613845497369766, |
|
"eval_max_distance": 126, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 4.952, |
|
"eval_samples_per_second": 100.969, |
|
"eval_steps_per_second": 2.019, |
|
"step": 130296 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 0.0004737706728512659, |
|
"loss": 0.012, |
|
"step": 131028 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"eval_loss": 0.11241430044174194, |
|
"eval_max_distance": 124, |
|
"eval_mean_distance": 3, |
|
"eval_runtime": 5.5085, |
|
"eval_samples_per_second": 90.769, |
|
"eval_steps_per_second": 1.815, |
|
"step": 131028 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.0004687180761478782, |
|
"loss": 0.0334, |
|
"step": 131760 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.07378821820020676, |
|
"eval_max_distance": 44, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.74, |
|
"eval_samples_per_second": 133.69, |
|
"eval_steps_per_second": 2.674, |
|
"step": 131760 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 0.00046366547944449044, |
|
"loss": 0.0223, |
|
"step": 132492 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"eval_loss": 0.14633670449256897, |
|
"eval_max_distance": 37, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.2404, |
|
"eval_samples_per_second": 154.3, |
|
"eval_steps_per_second": 3.086, |
|
"step": 132492 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 0.00045861288274110277, |
|
"loss": 0.0112, |
|
"step": 133224 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"eval_loss": 0.09432947635650635, |
|
"eval_max_distance": 37, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 2.907, |
|
"eval_samples_per_second": 171.999, |
|
"eval_steps_per_second": 3.44, |
|
"step": 133224 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 0.00045356028603771504, |
|
"loss": 0.0086, |
|
"step": 133956 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"eval_loss": 0.08595520257949829, |
|
"eval_max_distance": 45, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 2.9948, |
|
"eval_samples_per_second": 166.958, |
|
"eval_steps_per_second": 3.339, |
|
"step": 133956 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 0.0004485076893343273, |
|
"loss": 0.0074, |
|
"step": 134688 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"eval_loss": 0.08134690672159195, |
|
"eval_max_distance": 56, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.541, |
|
"eval_samples_per_second": 141.203, |
|
"eval_steps_per_second": 2.824, |
|
"step": 134688 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 0.0004434550926309396, |
|
"loss": 0.0068, |
|
"step": 135420 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"eval_loss": 0.07917257398366928, |
|
"eval_max_distance": 55, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 2.7556, |
|
"eval_samples_per_second": 181.448, |
|
"eval_steps_per_second": 3.629, |
|
"step": 135420 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 0.0004384024959275518, |
|
"loss": 0.0064, |
|
"step": 136152 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"eval_loss": 0.0820038914680481, |
|
"eval_max_distance": 45, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 2.9383, |
|
"eval_samples_per_second": 170.165, |
|
"eval_steps_per_second": 3.403, |
|
"step": 136152 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 0.0004333498992241641, |
|
"loss": 0.0063, |
|
"step": 136884 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"eval_loss": 0.05062669888138771, |
|
"eval_max_distance": 84, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.894, |
|
"eval_samples_per_second": 172.769, |
|
"eval_steps_per_second": 3.455, |
|
"step": 136884 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"learning_rate": 0.0004282973025207764, |
|
"loss": 0.0062, |
|
"step": 137616 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"eval_loss": 0.046124935150146484, |
|
"eval_max_distance": 57, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.247, |
|
"eval_samples_per_second": 153.989, |
|
"eval_steps_per_second": 3.08, |
|
"step": 137616 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"learning_rate": 0.00042324470581738867, |
|
"loss": 0.0062, |
|
"step": 138348 |
|
}, |
|
{ |
|
"epoch": 9.45, |
|
"eval_loss": 0.033984892070293427, |
|
"eval_max_distance": 48, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.394, |
|
"eval_samples_per_second": 147.318, |
|
"eval_steps_per_second": 2.946, |
|
"step": 138348 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 0.00041819210911400094, |
|
"loss": 0.0063, |
|
"step": 139080 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"eval_loss": 0.038424424827098846, |
|
"eval_max_distance": 56, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.5795, |
|
"eval_samples_per_second": 139.684, |
|
"eval_steps_per_second": 2.794, |
|
"step": 139080 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 0.0004131395124106132, |
|
"loss": 0.0059, |
|
"step": 139812 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"eval_loss": 0.03822114318609238, |
|
"eval_max_distance": 88, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.5907, |
|
"eval_samples_per_second": 139.248, |
|
"eval_steps_per_second": 2.785, |
|
"step": 139812 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 0.0004080869157072255, |
|
"loss": 0.0063, |
|
"step": 140544 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_loss": 0.04504471272230148, |
|
"eval_max_distance": 108, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.715, |
|
"eval_samples_per_second": 134.588, |
|
"eval_steps_per_second": 2.692, |
|
"step": 140544 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 0.00040303431900383775, |
|
"loss": 0.0063, |
|
"step": 141276 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"eval_loss": 0.04976603761315346, |
|
"eval_max_distance": 109, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.9331, |
|
"eval_samples_per_second": 127.125, |
|
"eval_steps_per_second": 2.543, |
|
"step": 141276 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 0.00039798172230045, |
|
"loss": 0.0064, |
|
"step": 142008 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"eval_loss": 0.05060645565390587, |
|
"eval_max_distance": 111, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.1192, |
|
"eval_samples_per_second": 121.383, |
|
"eval_steps_per_second": 2.428, |
|
"step": 142008 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 0.00039292912559706235, |
|
"loss": 0.0062, |
|
"step": 142740 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"eval_loss": 0.05505865439772606, |
|
"eval_max_distance": 109, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.3878, |
|
"eval_samples_per_second": 113.953, |
|
"eval_steps_per_second": 2.279, |
|
"step": 142740 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 0.00038787652889367457, |
|
"loss": 0.0061, |
|
"step": 143472 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"eval_loss": 0.0596136748790741, |
|
"eval_max_distance": 142, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 4.395, |
|
"eval_samples_per_second": 113.765, |
|
"eval_steps_per_second": 2.275, |
|
"step": 143472 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 0.0003828239321902869, |
|
"loss": 0.0062, |
|
"step": 144204 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"eval_loss": 0.07242786139249802, |
|
"eval_max_distance": 135, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 4.5735, |
|
"eval_samples_per_second": 109.325, |
|
"eval_steps_per_second": 2.187, |
|
"step": 144204 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 0.0003777713354868991, |
|
"loss": 0.0069, |
|
"step": 144936 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"eval_loss": 0.09855272620916367, |
|
"eval_max_distance": 151, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 4.9915, |
|
"eval_samples_per_second": 100.17, |
|
"eval_steps_per_second": 2.003, |
|
"step": 144936 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 0.00037271873878351144, |
|
"loss": 0.0078, |
|
"step": 145668 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"eval_loss": 0.10503390431404114, |
|
"eval_max_distance": 133, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 5.2264, |
|
"eval_samples_per_second": 95.668, |
|
"eval_steps_per_second": 1.913, |
|
"step": 145668 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2.5008541168431845e-06, |
|
"loss": 0.1557, |
|
"step": 146400 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.0961650088429451, |
|
"eval_max_distance": 130, |
|
"eval_mean_distance": 2, |
|
"eval_runtime": 5.211, |
|
"eval_samples_per_second": 95.952, |
|
"eval_steps_per_second": 1.919, |
|
"step": 146400 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 7.502562350529553e-06, |
|
"loss": 0.1283, |
|
"step": 147864 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"eval_loss": 0.03514343872666359, |
|
"eval_max_distance": 71, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 2.8424, |
|
"eval_samples_per_second": 175.908, |
|
"eval_steps_per_second": 3.518, |
|
"step": 147864 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 9.974702591517603e-06, |
|
"loss": 0.0159, |
|
"step": 149328 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"eval_loss": 0.03262121602892876, |
|
"eval_max_distance": 70, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.5617, |
|
"eval_samples_per_second": 195.18, |
|
"eval_steps_per_second": 3.904, |
|
"step": 149328 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 9.924176798859719e-06, |
|
"loss": 0.0088, |
|
"step": 150792 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"eval_loss": 0.03363807499408722, |
|
"eval_max_distance": 72, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.5129, |
|
"eval_samples_per_second": 198.972, |
|
"eval_steps_per_second": 3.979, |
|
"step": 150792 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 9.873651006201834e-06, |
|
"loss": 0.0063, |
|
"step": 152256 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"eval_loss": 0.031921450048685074, |
|
"eval_max_distance": 76, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.5172, |
|
"eval_samples_per_second": 198.633, |
|
"eval_steps_per_second": 3.973, |
|
"step": 152256 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"learning_rate": 9.823125213543951e-06, |
|
"loss": 0.0053, |
|
"step": 153720 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"eval_loss": 0.031937919557094574, |
|
"eval_max_distance": 76, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.5246, |
|
"eval_samples_per_second": 198.048, |
|
"eval_steps_per_second": 3.961, |
|
"step": 153720 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 9.772599420886064e-06, |
|
"loss": 0.0043, |
|
"step": 155184 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"eval_loss": 0.03107617422938347, |
|
"eval_max_distance": 76, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.7282, |
|
"eval_samples_per_second": 183.269, |
|
"eval_steps_per_second": 3.665, |
|
"step": 155184 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 9.722073628228181e-06, |
|
"loss": 0.0038, |
|
"step": 156648 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"eval_loss": 0.033566124737262726, |
|
"eval_max_distance": 80, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.9105, |
|
"eval_samples_per_second": 171.789, |
|
"eval_steps_per_second": 3.436, |
|
"step": 156648 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 9.671547835570297e-06, |
|
"loss": 0.0031, |
|
"step": 158112 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"eval_loss": 0.03568200394511223, |
|
"eval_max_distance": 97, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 2.9978, |
|
"eval_samples_per_second": 166.787, |
|
"eval_steps_per_second": 3.336, |
|
"step": 158112 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 9.621022042912412e-06, |
|
"loss": 0.0025, |
|
"step": 159576 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"eval_loss": 0.03964918479323387, |
|
"eval_max_distance": 105, |
|
"eval_mean_distance": 1, |
|
"eval_runtime": 3.3639, |
|
"eval_samples_per_second": 148.637, |
|
"eval_steps_per_second": 2.973, |
|
"step": 159576 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 9.570496250254529e-06, |
|
"loss": 0.0257, |
|
"step": 161040 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.030957885086536407, |
|
"eval_max_distance": 75, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.9202, |
|
"eval_samples_per_second": 171.221, |
|
"eval_steps_per_second": 3.424, |
|
"step": 161040 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 9.519970457596643e-06, |
|
"loss": 0.0239, |
|
"step": 162504 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"eval_loss": 0.020240401849150658, |
|
"eval_max_distance": 67, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.4054, |
|
"eval_samples_per_second": 207.863, |
|
"eval_steps_per_second": 4.157, |
|
"step": 162504 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 9.46944466493876e-06, |
|
"loss": 0.0077, |
|
"step": 163968 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"eval_loss": 0.02426925301551819, |
|
"eval_max_distance": 67, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.2824, |
|
"eval_samples_per_second": 219.066, |
|
"eval_steps_per_second": 4.381, |
|
"step": 163968 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"learning_rate": 9.418918872280875e-06, |
|
"loss": 0.0052, |
|
"step": 165432 |
|
}, |
|
{ |
|
"epoch": 11.3, |
|
"eval_loss": 0.021467674523591995, |
|
"eval_max_distance": 60, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3057, |
|
"eval_samples_per_second": 216.857, |
|
"eval_steps_per_second": 4.337, |
|
"step": 165432 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 9.36839307962299e-06, |
|
"loss": 0.004, |
|
"step": 166896 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"eval_loss": 0.02434716746211052, |
|
"eval_max_distance": 60, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.2997, |
|
"eval_samples_per_second": 217.424, |
|
"eval_steps_per_second": 4.348, |
|
"step": 166896 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"learning_rate": 9.317867286965105e-06, |
|
"loss": 0.0035, |
|
"step": 168360 |
|
}, |
|
{ |
|
"epoch": 11.5, |
|
"eval_loss": 0.02479979395866394, |
|
"eval_max_distance": 57, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.2201, |
|
"eval_samples_per_second": 225.211, |
|
"eval_steps_per_second": 4.504, |
|
"step": 168360 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"learning_rate": 9.26734149430722e-06, |
|
"loss": 0.003, |
|
"step": 169824 |
|
}, |
|
{ |
|
"epoch": 11.6, |
|
"eval_loss": 0.0237094946205616, |
|
"eval_max_distance": 65, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.156, |
|
"eval_samples_per_second": 231.91, |
|
"eval_steps_per_second": 4.638, |
|
"step": 169824 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 9.216815701649336e-06, |
|
"loss": 0.0028, |
|
"step": 171288 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"eval_loss": 0.026788845658302307, |
|
"eval_max_distance": 76, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.4004, |
|
"eval_samples_per_second": 208.302, |
|
"eval_steps_per_second": 4.166, |
|
"step": 171288 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 9.166289908991453e-06, |
|
"loss": 0.0023, |
|
"step": 172752 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"eval_loss": 0.028016921132802963, |
|
"eval_max_distance": 70, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.865, |
|
"eval_samples_per_second": 174.518, |
|
"eval_steps_per_second": 3.49, |
|
"step": 172752 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 9.115764116333568e-06, |
|
"loss": 0.0019, |
|
"step": 174216 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"eval_loss": 0.029591867700219154, |
|
"eval_max_distance": 74, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.5756, |
|
"eval_samples_per_second": 139.837, |
|
"eval_steps_per_second": 2.797, |
|
"step": 174216 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 9.065238323675684e-06, |
|
"loss": 0.0191, |
|
"step": 175680 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.0253616813570261, |
|
"eval_max_distance": 70, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.4599, |
|
"eval_samples_per_second": 144.511, |
|
"eval_steps_per_second": 2.89, |
|
"step": 175680 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"learning_rate": 9.014712531017799e-06, |
|
"loss": 0.0184, |
|
"step": 177144 |
|
}, |
|
{ |
|
"epoch": 12.1, |
|
"eval_loss": 0.019834740087389946, |
|
"eval_max_distance": 49, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.8168, |
|
"eval_samples_per_second": 177.505, |
|
"eval_steps_per_second": 3.55, |
|
"step": 177144 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 8.964186738359914e-06, |
|
"loss": 0.0064, |
|
"step": 178608 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"eval_loss": 0.02111968584358692, |
|
"eval_max_distance": 51, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.795, |
|
"eval_samples_per_second": 178.891, |
|
"eval_steps_per_second": 3.578, |
|
"step": 178608 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 8.91366094570203e-06, |
|
"loss": 0.0043, |
|
"step": 180072 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"eval_loss": 0.019950907677412033, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.114, |
|
"eval_samples_per_second": 236.522, |
|
"eval_steps_per_second": 4.73, |
|
"step": 180072 |
|
}, |
|
{ |
|
"epoch": 12.41, |
|
"learning_rate": 8.863135153044146e-06, |
|
"loss": 0.0033, |
|
"step": 181536 |
|
}, |
|
{ |
|
"epoch": 12.41, |
|
"eval_loss": 0.022810520604252815, |
|
"eval_max_distance": 47, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.7861, |
|
"eval_samples_per_second": 179.465, |
|
"eval_steps_per_second": 3.589, |
|
"step": 181536 |
|
}, |
|
{ |
|
"epoch": 12.51, |
|
"learning_rate": 8.81260936038626e-06, |
|
"loss": 0.0029, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 12.51, |
|
"eval_loss": 0.022028256207704544, |
|
"eval_max_distance": 47, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.4, |
|
"eval_samples_per_second": 208.337, |
|
"eval_steps_per_second": 4.167, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 8.762083567728377e-06, |
|
"loss": 0.0025, |
|
"step": 184464 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"eval_loss": 0.021184444427490234, |
|
"eval_max_distance": 51, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.6483, |
|
"eval_samples_per_second": 188.797, |
|
"eval_steps_per_second": 3.776, |
|
"step": 184464 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 8.711557775070492e-06, |
|
"loss": 0.0024, |
|
"step": 185928 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"eval_loss": 0.023559901863336563, |
|
"eval_max_distance": 56, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.7031, |
|
"eval_samples_per_second": 184.976, |
|
"eval_steps_per_second": 3.7, |
|
"step": 185928 |
|
}, |
|
{ |
|
"epoch": 12.81, |
|
"learning_rate": 8.661031982412607e-06, |
|
"loss": 0.002, |
|
"step": 187392 |
|
}, |
|
{ |
|
"epoch": 12.81, |
|
"eval_loss": 0.023327892646193504, |
|
"eval_max_distance": 55, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.5819, |
|
"eval_samples_per_second": 193.657, |
|
"eval_steps_per_second": 3.873, |
|
"step": 187392 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"learning_rate": 8.610506189754723e-06, |
|
"loss": 0.0016, |
|
"step": 188856 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"eval_loss": 0.027195889502763748, |
|
"eval_max_distance": 61, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.1865, |
|
"eval_samples_per_second": 156.911, |
|
"eval_steps_per_second": 3.138, |
|
"step": 188856 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 8.559980397096838e-06, |
|
"loss": 0.0156, |
|
"step": 190320 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"eval_loss": 0.022682741284370422, |
|
"eval_max_distance": 57, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.6784, |
|
"eval_samples_per_second": 186.677, |
|
"eval_steps_per_second": 3.734, |
|
"step": 190320 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 8.509454604438953e-06, |
|
"loss": 0.0159, |
|
"step": 191784 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"eval_loss": 0.018665272742509842, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.861, |
|
"eval_samples_per_second": 174.766, |
|
"eval_steps_per_second": 3.495, |
|
"step": 191784 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 8.45892881178107e-06, |
|
"loss": 0.0057, |
|
"step": 193248 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"eval_loss": 0.018314659595489502, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.753, |
|
"eval_samples_per_second": 181.621, |
|
"eval_steps_per_second": 3.632, |
|
"step": 193248 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"learning_rate": 8.408403019123184e-06, |
|
"loss": 0.0038, |
|
"step": 194712 |
|
}, |
|
{ |
|
"epoch": 13.31, |
|
"eval_loss": 0.021940866485238075, |
|
"eval_max_distance": 40, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.2402, |
|
"eval_samples_per_second": 223.198, |
|
"eval_steps_per_second": 4.464, |
|
"step": 194712 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 8.357877226465301e-06, |
|
"loss": 0.0029, |
|
"step": 196176 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"eval_loss": 0.019176060333848, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.2189, |
|
"eval_samples_per_second": 225.341, |
|
"eval_steps_per_second": 4.507, |
|
"step": 196176 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 8.307351433807416e-06, |
|
"loss": 0.0026, |
|
"step": 197640 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"eval_loss": 0.019291119650006294, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1239, |
|
"eval_samples_per_second": 235.419, |
|
"eval_steps_per_second": 4.708, |
|
"step": 197640 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"learning_rate": 8.256825641149531e-06, |
|
"loss": 0.0022, |
|
"step": 199104 |
|
}, |
|
{ |
|
"epoch": 13.61, |
|
"eval_loss": 0.019858654588460922, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.6694, |
|
"eval_samples_per_second": 187.308, |
|
"eval_steps_per_second": 3.746, |
|
"step": 199104 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"learning_rate": 8.206299848491647e-06, |
|
"loss": 0.0021, |
|
"step": 200568 |
|
}, |
|
{ |
|
"epoch": 13.71, |
|
"eval_loss": 0.020177775993943214, |
|
"eval_max_distance": 52, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.5426, |
|
"eval_samples_per_second": 196.646, |
|
"eval_steps_per_second": 3.933, |
|
"step": 200568 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"learning_rate": 8.155774055833762e-06, |
|
"loss": 0.0018, |
|
"step": 202032 |
|
}, |
|
{ |
|
"epoch": 13.81, |
|
"eval_loss": 0.02495921589434147, |
|
"eval_max_distance": 52, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.252, |
|
"eval_samples_per_second": 222.025, |
|
"eval_steps_per_second": 4.44, |
|
"step": 202032 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"learning_rate": 8.105248263175877e-06, |
|
"loss": 0.0015, |
|
"step": 203496 |
|
}, |
|
{ |
|
"epoch": 13.91, |
|
"eval_loss": 0.022407229989767075, |
|
"eval_max_distance": 56, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.368, |
|
"eval_samples_per_second": 211.149, |
|
"eval_steps_per_second": 4.223, |
|
"step": 203496 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"learning_rate": 8.054722470517994e-06, |
|
"loss": 0.014, |
|
"step": 204960 |
|
}, |
|
{ |
|
"epoch": 14.01, |
|
"eval_loss": 0.0217585526406765, |
|
"eval_max_distance": 53, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.764, |
|
"eval_samples_per_second": 180.895, |
|
"eval_steps_per_second": 3.618, |
|
"step": 204960 |
|
}, |
|
{ |
|
"epoch": 14.11, |
|
"learning_rate": 8.00419667786011e-06, |
|
"loss": 0.0145, |
|
"step": 206424 |
|
}, |
|
{ |
|
"epoch": 14.11, |
|
"eval_loss": 0.019289566203951836, |
|
"eval_max_distance": 43, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1796, |
|
"eval_samples_per_second": 229.403, |
|
"eval_steps_per_second": 4.588, |
|
"step": 206424 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 7.953670885202225e-06, |
|
"loss": 0.0052, |
|
"step": 207888 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"eval_loss": 0.018878955394029617, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1918, |
|
"eval_samples_per_second": 228.124, |
|
"eval_steps_per_second": 4.562, |
|
"step": 207888 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 7.90314509254434e-06, |
|
"loss": 0.0035, |
|
"step": 209352 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"eval_loss": 0.018222585320472717, |
|
"eval_max_distance": 40, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1312, |
|
"eval_samples_per_second": 234.605, |
|
"eval_steps_per_second": 4.692, |
|
"step": 209352 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 7.852619299886455e-06, |
|
"loss": 0.0026, |
|
"step": 210816 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"eval_loss": 0.02002587914466858, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.271, |
|
"eval_samples_per_second": 220.17, |
|
"eval_steps_per_second": 4.403, |
|
"step": 210816 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"learning_rate": 7.80209350722857e-06, |
|
"loss": 0.0023, |
|
"step": 212280 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"eval_loss": 0.019655397161841393, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1726, |
|
"eval_samples_per_second": 230.139, |
|
"eval_steps_per_second": 4.603, |
|
"step": 212280 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"learning_rate": 7.751567714570688e-06, |
|
"loss": 0.002, |
|
"step": 213744 |
|
}, |
|
{ |
|
"epoch": 14.61, |
|
"eval_loss": 0.020178088918328285, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.162, |
|
"eval_samples_per_second": 231.271, |
|
"eval_steps_per_second": 4.625, |
|
"step": 213744 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 7.701041921912801e-06, |
|
"loss": 0.002, |
|
"step": 215208 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"eval_loss": 0.023831263184547424, |
|
"eval_max_distance": 48, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1181, |
|
"eval_samples_per_second": 236.063, |
|
"eval_steps_per_second": 4.721, |
|
"step": 215208 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"learning_rate": 7.650516129254918e-06, |
|
"loss": 0.0016, |
|
"step": 216672 |
|
}, |
|
{ |
|
"epoch": 14.81, |
|
"eval_loss": 0.02202780544757843, |
|
"eval_max_distance": 48, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.6647, |
|
"eval_samples_per_second": 187.638, |
|
"eval_steps_per_second": 3.753, |
|
"step": 216672 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"learning_rate": 7.599990336597033e-06, |
|
"loss": 0.0013, |
|
"step": 218136 |
|
}, |
|
{ |
|
"epoch": 14.91, |
|
"eval_loss": 0.024373715743422508, |
|
"eval_max_distance": 52, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.249, |
|
"eval_samples_per_second": 222.322, |
|
"eval_steps_per_second": 4.446, |
|
"step": 218136 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"learning_rate": 7.549464543939149e-06, |
|
"loss": 0.0127, |
|
"step": 219600 |
|
}, |
|
{ |
|
"epoch": 15.01, |
|
"eval_loss": 0.02190934307873249, |
|
"eval_max_distance": 51, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.1441, |
|
"eval_samples_per_second": 159.026, |
|
"eval_steps_per_second": 3.181, |
|
"step": 219600 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"learning_rate": 7.498938751281264e-06, |
|
"loss": 0.0135, |
|
"step": 221064 |
|
}, |
|
{ |
|
"epoch": 15.11, |
|
"eval_loss": 0.017063263803720474, |
|
"eval_max_distance": 43, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1044, |
|
"eval_samples_per_second": 237.6, |
|
"eval_steps_per_second": 4.752, |
|
"step": 221064 |
|
}, |
|
{ |
|
"epoch": 15.21, |
|
"learning_rate": 7.44841295862338e-06, |
|
"loss": 0.0049, |
|
"step": 222528 |
|
}, |
|
{ |
|
"epoch": 15.21, |
|
"eval_loss": 0.018728330731391907, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.6091, |
|
"eval_samples_per_second": 191.64, |
|
"eval_steps_per_second": 3.833, |
|
"step": 222528 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"learning_rate": 7.397887165965495e-06, |
|
"loss": 0.0032, |
|
"step": 223992 |
|
}, |
|
{ |
|
"epoch": 15.31, |
|
"eval_loss": 0.019817959517240524, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.5028, |
|
"eval_samples_per_second": 199.775, |
|
"eval_steps_per_second": 3.995, |
|
"step": 223992 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 7.347361373307611e-06, |
|
"loss": 0.0024, |
|
"step": 225456 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"eval_loss": 0.017553431913256645, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9951, |
|
"eval_samples_per_second": 250.614, |
|
"eval_steps_per_second": 5.012, |
|
"step": 225456 |
|
}, |
|
{ |
|
"epoch": 15.51, |
|
"learning_rate": 7.296835580649726e-06, |
|
"loss": 0.0022, |
|
"step": 226920 |
|
}, |
|
{ |
|
"epoch": 15.51, |
|
"eval_loss": 0.01916639320552349, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.5108, |
|
"eval_samples_per_second": 199.143, |
|
"eval_steps_per_second": 3.983, |
|
"step": 226920 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"learning_rate": 7.246309787991842e-06, |
|
"loss": 0.0018, |
|
"step": 228384 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"eval_loss": 0.018844295293092728, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9922, |
|
"eval_samples_per_second": 250.984, |
|
"eval_steps_per_second": 5.02, |
|
"step": 228384 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"learning_rate": 7.195783995333957e-06, |
|
"loss": 0.0018, |
|
"step": 229848 |
|
}, |
|
{ |
|
"epoch": 15.71, |
|
"eval_loss": 0.020747974514961243, |
|
"eval_max_distance": 48, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.4786, |
|
"eval_samples_per_second": 201.73, |
|
"eval_steps_per_second": 4.035, |
|
"step": 229848 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"learning_rate": 7.145258202676073e-06, |
|
"loss": 0.0015, |
|
"step": 231312 |
|
}, |
|
{ |
|
"epoch": 15.81, |
|
"eval_loss": 0.021101944148540497, |
|
"eval_max_distance": 48, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.0975, |
|
"eval_samples_per_second": 238.374, |
|
"eval_steps_per_second": 4.767, |
|
"step": 231312 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"learning_rate": 7.094732410018188e-06, |
|
"loss": 0.0013, |
|
"step": 232776 |
|
}, |
|
{ |
|
"epoch": 15.91, |
|
"eval_loss": 0.02276449464261532, |
|
"eval_max_distance": 48, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.182, |
|
"eval_samples_per_second": 229.148, |
|
"eval_steps_per_second": 4.583, |
|
"step": 232776 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 7.044206617360304e-06, |
|
"loss": 0.0117, |
|
"step": 234240 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"eval_loss": 0.020543677732348442, |
|
"eval_max_distance": 44, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.4063, |
|
"eval_samples_per_second": 207.787, |
|
"eval_steps_per_second": 4.156, |
|
"step": 234240 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 6.993680824702419e-06, |
|
"loss": 0.0126, |
|
"step": 235704 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"eval_loss": 0.017592445015907288, |
|
"eval_max_distance": 40, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3265, |
|
"eval_samples_per_second": 214.918, |
|
"eval_steps_per_second": 4.298, |
|
"step": 235704 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"learning_rate": 6.9431550320445355e-06, |
|
"loss": 0.0047, |
|
"step": 237168 |
|
}, |
|
{ |
|
"epoch": 16.21, |
|
"eval_loss": 0.01700788550078869, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.6521, |
|
"eval_samples_per_second": 188.528, |
|
"eval_steps_per_second": 3.771, |
|
"step": 237168 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"learning_rate": 6.89262923938665e-06, |
|
"loss": 0.003, |
|
"step": 238632 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"eval_loss": 0.017684699967503548, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.6466, |
|
"eval_samples_per_second": 188.923, |
|
"eval_steps_per_second": 3.778, |
|
"step": 238632 |
|
}, |
|
{ |
|
"epoch": 16.41, |
|
"learning_rate": 6.842103446728766e-06, |
|
"loss": 0.0023, |
|
"step": 240096 |
|
}, |
|
{ |
|
"epoch": 16.41, |
|
"eval_loss": 0.01923580840229988, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.5212, |
|
"eval_samples_per_second": 198.314, |
|
"eval_steps_per_second": 3.966, |
|
"step": 240096 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"learning_rate": 6.791577654070881e-06, |
|
"loss": 0.002, |
|
"step": 241560 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"eval_loss": 0.018833961337804794, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.0402, |
|
"eval_samples_per_second": 245.068, |
|
"eval_steps_per_second": 4.901, |
|
"step": 241560 |
|
}, |
|
{ |
|
"epoch": 16.61, |
|
"learning_rate": 6.7410518614129975e-06, |
|
"loss": 0.0017, |
|
"step": 243024 |
|
}, |
|
{ |
|
"epoch": 16.61, |
|
"eval_loss": 0.01853407733142376, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.6623, |
|
"eval_samples_per_second": 187.81, |
|
"eval_steps_per_second": 3.756, |
|
"step": 243024 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"learning_rate": 6.690526068755112e-06, |
|
"loss": 0.0017, |
|
"step": 244488 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"eval_loss": 0.020028624683618546, |
|
"eval_max_distance": 43, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3728, |
|
"eval_samples_per_second": 210.718, |
|
"eval_steps_per_second": 4.214, |
|
"step": 244488 |
|
}, |
|
{ |
|
"epoch": 16.81, |
|
"learning_rate": 6.640000276097228e-06, |
|
"loss": 0.0014, |
|
"step": 245952 |
|
}, |
|
{ |
|
"epoch": 16.81, |
|
"eval_loss": 0.021073581650853157, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.0603, |
|
"eval_samples_per_second": 242.683, |
|
"eval_steps_per_second": 4.854, |
|
"step": 245952 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"learning_rate": 6.589474483439343e-06, |
|
"loss": 0.0012, |
|
"step": 247416 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"eval_loss": 0.020787488669157028, |
|
"eval_max_distance": 48, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1877, |
|
"eval_samples_per_second": 228.552, |
|
"eval_steps_per_second": 4.571, |
|
"step": 247416 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 6.5389486907814595e-06, |
|
"loss": 0.0111, |
|
"step": 248880 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"eval_loss": 0.02048674039542675, |
|
"eval_max_distance": 47, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 3.0915, |
|
"eval_samples_per_second": 161.731, |
|
"eval_steps_per_second": 3.235, |
|
"step": 248880 |
|
}, |
|
{ |
|
"epoch": 17.11, |
|
"learning_rate": 6.488422898123576e-06, |
|
"loss": 0.012, |
|
"step": 250344 |
|
}, |
|
{ |
|
"epoch": 17.11, |
|
"eval_loss": 0.018051166087388992, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.6273, |
|
"eval_samples_per_second": 190.309, |
|
"eval_steps_per_second": 3.806, |
|
"step": 250344 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"learning_rate": 6.43789710546569e-06, |
|
"loss": 0.0045, |
|
"step": 251808 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"eval_loss": 0.01751534268260002, |
|
"eval_max_distance": 42, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.6979, |
|
"eval_samples_per_second": 185.332, |
|
"eval_steps_per_second": 3.707, |
|
"step": 251808 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"learning_rate": 6.387371312807806e-06, |
|
"loss": 0.0029, |
|
"step": 253272 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"eval_loss": 0.018825719133019447, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.0239, |
|
"eval_samples_per_second": 247.05, |
|
"eval_steps_per_second": 4.941, |
|
"step": 253272 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 6.3368455201499214e-06, |
|
"loss": 0.0022, |
|
"step": 254736 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"eval_loss": 0.01774280145764351, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9404, |
|
"eval_samples_per_second": 257.676, |
|
"eval_steps_per_second": 5.154, |
|
"step": 254736 |
|
}, |
|
{ |
|
"epoch": 17.51, |
|
"learning_rate": 6.2863197274920376e-06, |
|
"loss": 0.0019, |
|
"step": 256200 |
|
}, |
|
{ |
|
"epoch": 17.51, |
|
"eval_loss": 0.017391487956047058, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 1.9824, |
|
"eval_samples_per_second": 252.224, |
|
"eval_steps_per_second": 5.044, |
|
"step": 256200 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 6.235793934834152e-06, |
|
"loss": 0.0016, |
|
"step": 257664 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"eval_loss": 0.018112240359187126, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.6476, |
|
"eval_samples_per_second": 188.849, |
|
"eval_steps_per_second": 3.777, |
|
"step": 257664 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"learning_rate": 6.185268142176268e-06, |
|
"loss": 0.0016, |
|
"step": 259128 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"eval_loss": 0.019098376855254173, |
|
"eval_max_distance": 35, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.0112, |
|
"eval_samples_per_second": 248.606, |
|
"eval_steps_per_second": 4.972, |
|
"step": 259128 |
|
}, |
|
{ |
|
"epoch": 17.81, |
|
"learning_rate": 6.134742349518383e-06, |
|
"loss": 0.0013, |
|
"step": 260592 |
|
}, |
|
{ |
|
"epoch": 17.81, |
|
"eval_loss": 0.019062627106904984, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.581, |
|
"eval_samples_per_second": 193.726, |
|
"eval_steps_per_second": 3.875, |
|
"step": 260592 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"learning_rate": 6.0842165568604995e-06, |
|
"loss": 0.0011, |
|
"step": 262056 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"eval_loss": 0.02094220370054245, |
|
"eval_max_distance": 48, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1774, |
|
"eval_samples_per_second": 229.631, |
|
"eval_steps_per_second": 4.593, |
|
"step": 262056 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 6.033690764202614e-06, |
|
"loss": 0.0106, |
|
"step": 263520 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"eval_loss": 0.01973116211593151, |
|
"eval_max_distance": 47, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.9851, |
|
"eval_samples_per_second": 167.497, |
|
"eval_steps_per_second": 3.35, |
|
"step": 263520 |
|
}, |
|
{ |
|
"epoch": 18.11, |
|
"learning_rate": 5.98316497154473e-06, |
|
"loss": 0.0114, |
|
"step": 264984 |
|
}, |
|
{ |
|
"epoch": 18.11, |
|
"eval_loss": 0.01814502477645874, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.0996, |
|
"eval_samples_per_second": 238.139, |
|
"eval_steps_per_second": 4.763, |
|
"step": 264984 |
|
}, |
|
{ |
|
"epoch": 18.21, |
|
"learning_rate": 5.932639178886845e-06, |
|
"loss": 0.0043, |
|
"step": 266448 |
|
}, |
|
{ |
|
"epoch": 18.21, |
|
"eval_loss": 0.01815211959183216, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.5596, |
|
"eval_samples_per_second": 195.345, |
|
"eval_steps_per_second": 3.907, |
|
"step": 266448 |
|
}, |
|
{ |
|
"epoch": 18.31, |
|
"learning_rate": 5.8821133862289615e-06, |
|
"loss": 0.0027, |
|
"step": 267912 |
|
}, |
|
{ |
|
"epoch": 18.31, |
|
"eval_loss": 0.017581813037395477, |
|
"eval_max_distance": 36, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.5266, |
|
"eval_samples_per_second": 197.897, |
|
"eval_steps_per_second": 3.958, |
|
"step": 267912 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"learning_rate": 5.831587593571077e-06, |
|
"loss": 0.0021, |
|
"step": 269376 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"eval_loss": 0.018509158864617348, |
|
"eval_max_distance": 36, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.2319, |
|
"eval_samples_per_second": 224.023, |
|
"eval_steps_per_second": 4.48, |
|
"step": 269376 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 5.781061800913192e-06, |
|
"loss": 0.0018, |
|
"step": 270840 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"eval_loss": 0.020238015800714493, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.5077, |
|
"eval_samples_per_second": 199.386, |
|
"eval_steps_per_second": 3.988, |
|
"step": 270840 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"learning_rate": 5.730536008255307e-06, |
|
"loss": 0.0015, |
|
"step": 272304 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"eval_loss": 0.018911337479948997, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.5132, |
|
"eval_samples_per_second": 198.948, |
|
"eval_steps_per_second": 3.979, |
|
"step": 272304 |
|
}, |
|
{ |
|
"epoch": 18.71, |
|
"learning_rate": 5.6800102155974235e-06, |
|
"loss": 0.0015, |
|
"step": 273768 |
|
}, |
|
{ |
|
"epoch": 18.71, |
|
"eval_loss": 0.021090172231197357, |
|
"eval_max_distance": 36, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.262, |
|
"eval_samples_per_second": 221.045, |
|
"eval_steps_per_second": 4.421, |
|
"step": 273768 |
|
}, |
|
{ |
|
"epoch": 18.81, |
|
"learning_rate": 5.629484422939539e-06, |
|
"loss": 0.0013, |
|
"step": 275232 |
|
}, |
|
{ |
|
"epoch": 18.81, |
|
"eval_loss": 0.018903149291872978, |
|
"eval_max_distance": 45, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.4844, |
|
"eval_samples_per_second": 201.255, |
|
"eval_steps_per_second": 4.025, |
|
"step": 275232 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"learning_rate": 5.578958630281655e-06, |
|
"loss": 0.001, |
|
"step": 276696 |
|
}, |
|
{ |
|
"epoch": 18.91, |
|
"eval_loss": 0.01939016580581665, |
|
"eval_max_distance": 45, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.4506, |
|
"eval_samples_per_second": 204.028, |
|
"eval_steps_per_second": 4.081, |
|
"step": 276696 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"learning_rate": 5.528432837623769e-06, |
|
"loss": 0.0102, |
|
"step": 278160 |
|
}, |
|
{ |
|
"epoch": 19.01, |
|
"eval_loss": 0.019789060577750206, |
|
"eval_max_distance": 47, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3546, |
|
"eval_samples_per_second": 212.349, |
|
"eval_steps_per_second": 4.247, |
|
"step": 278160 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"learning_rate": 5.4779070449658855e-06, |
|
"loss": 0.011, |
|
"step": 279624 |
|
}, |
|
{ |
|
"epoch": 19.11, |
|
"eval_loss": 0.018310004845261574, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.0507, |
|
"eval_samples_per_second": 243.824, |
|
"eval_steps_per_second": 4.876, |
|
"step": 279624 |
|
}, |
|
{ |
|
"epoch": 19.21, |
|
"learning_rate": 5.427381252308001e-06, |
|
"loss": 0.0042, |
|
"step": 281088 |
|
}, |
|
{ |
|
"epoch": 19.21, |
|
"eval_loss": 0.019428173080086708, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.7958, |
|
"eval_samples_per_second": 178.84, |
|
"eval_steps_per_second": 3.577, |
|
"step": 281088 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"learning_rate": 5.376855459650117e-06, |
|
"loss": 0.0026, |
|
"step": 282552 |
|
}, |
|
{ |
|
"epoch": 19.31, |
|
"eval_loss": 0.018094651401042938, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.5895, |
|
"eval_samples_per_second": 193.087, |
|
"eval_steps_per_second": 3.862, |
|
"step": 282552 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"learning_rate": 5.326329666992231e-06, |
|
"loss": 0.002, |
|
"step": 284016 |
|
}, |
|
{ |
|
"epoch": 19.41, |
|
"eval_loss": 0.018259983509778976, |
|
"eval_max_distance": 36, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.4756, |
|
"eval_samples_per_second": 201.968, |
|
"eval_steps_per_second": 4.039, |
|
"step": 284016 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"learning_rate": 5.2758038743343474e-06, |
|
"loss": 0.0017, |
|
"step": 285480 |
|
}, |
|
{ |
|
"epoch": 19.51, |
|
"eval_loss": 0.01770331896841526, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.4887, |
|
"eval_samples_per_second": 200.91, |
|
"eval_steps_per_second": 4.018, |
|
"step": 285480 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"learning_rate": 5.225278081676463e-06, |
|
"loss": 0.0015, |
|
"step": 286944 |
|
}, |
|
{ |
|
"epoch": 19.61, |
|
"eval_loss": 0.021017737686634064, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.531, |
|
"eval_samples_per_second": 197.549, |
|
"eval_steps_per_second": 3.951, |
|
"step": 286944 |
|
}, |
|
{ |
|
"epoch": 19.71, |
|
"learning_rate": 5.174752289018579e-06, |
|
"loss": 0.0015, |
|
"step": 288408 |
|
}, |
|
{ |
|
"epoch": 19.71, |
|
"eval_loss": 0.020359748974442482, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.4747, |
|
"eval_samples_per_second": 202.046, |
|
"eval_steps_per_second": 4.041, |
|
"step": 288408 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 5.124226496360693e-06, |
|
"loss": 0.0012, |
|
"step": 289872 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"eval_loss": 0.018832657486200333, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.6331, |
|
"eval_samples_per_second": 189.894, |
|
"eval_steps_per_second": 3.798, |
|
"step": 289872 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 5.073700703702809e-06, |
|
"loss": 0.001, |
|
"step": 291336 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"eval_loss": 0.020642299205064774, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3046, |
|
"eval_samples_per_second": 216.959, |
|
"eval_steps_per_second": 4.339, |
|
"step": 291336 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"learning_rate": 3.88e-08, |
|
"loss": 0.0271, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"eval_loss": 0.021931374445557594, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3671, |
|
"eval_samples_per_second": 211.23, |
|
"eval_steps_per_second": 4.225, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 20.09, |
|
"learning_rate": 8.88e-08, |
|
"loss": 0.0248, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 20.09, |
|
"eval_loss": 0.022779377177357674, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.0968, |
|
"eval_samples_per_second": 238.461, |
|
"eval_steps_per_second": 4.769, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 20.19, |
|
"learning_rate": 8.48799796831305e-08, |
|
"loss": 0.0054, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 20.19, |
|
"eval_loss": 0.01905255950987339, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.0895, |
|
"eval_samples_per_second": 239.294, |
|
"eval_steps_per_second": 4.786, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 20.3, |
|
"learning_rate": 7.277781371744775e-08, |
|
"loss": 0.0032, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 20.3, |
|
"eval_loss": 0.020227482542395592, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1433, |
|
"eval_samples_per_second": 233.28, |
|
"eval_steps_per_second": 4.666, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"learning_rate": 6.471170442908496e-08, |
|
"loss": 0.0023, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 20.4, |
|
"eval_loss": 0.019163601100444794, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.099, |
|
"eval_samples_per_second": 238.207, |
|
"eval_steps_per_second": 4.764, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 20.47, |
|
"learning_rate": 3.360681603029347e-09, |
|
"loss": 0.0019, |
|
"step": 299565 |
|
}, |
|
{ |
|
"epoch": 20.47, |
|
"eval_loss": 0.01855267398059368, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.4246, |
|
"eval_samples_per_second": 206.219, |
|
"eval_steps_per_second": 4.124, |
|
"step": 299565 |
|
}, |
|
{ |
|
"epoch": 20.58, |
|
"learning_rate": 8.362259387819501e-09, |
|
"loss": 0.0016, |
|
"step": 301150 |
|
}, |
|
{ |
|
"epoch": 20.58, |
|
"eval_loss": 0.019294343888759613, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1809, |
|
"eval_samples_per_second": 229.262, |
|
"eval_steps_per_second": 4.585, |
|
"step": 301150 |
|
}, |
|
{ |
|
"epoch": 20.69, |
|
"learning_rate": 8.650364590933742e-09, |
|
"loss": 0.0015, |
|
"step": 302735 |
|
}, |
|
{ |
|
"epoch": 20.69, |
|
"eval_loss": 0.019666763022542, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.6939, |
|
"eval_samples_per_second": 185.606, |
|
"eval_steps_per_second": 3.712, |
|
"step": 302735 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"learning_rate": 7.3790359682892075e-09, |
|
"loss": 0.0012, |
|
"step": 304320 |
|
}, |
|
{ |
|
"epoch": 20.8, |
|
"eval_loss": 0.02117960713803768, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.6939, |
|
"eval_samples_per_second": 185.604, |
|
"eval_steps_per_second": 3.712, |
|
"step": 304320 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"learning_rate": 6.5418199690732104e-09, |
|
"loss": 0.0009, |
|
"step": 305905 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"eval_loss": 0.01806722581386566, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1281, |
|
"eval_samples_per_second": 234.95, |
|
"eval_steps_per_second": 4.699, |
|
"step": 305905 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 5.937194462602945e-09, |
|
"loss": 0.0331, |
|
"step": 307490 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"eval_loss": 0.019925368949770927, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.2473, |
|
"eval_samples_per_second": 222.488, |
|
"eval_steps_per_second": 4.45, |
|
"step": 307490 |
|
}, |
|
{ |
|
"epoch": 21.12, |
|
"learning_rate": 5.4742034190634586e-09, |
|
"loss": 0.0108, |
|
"step": 309075 |
|
}, |
|
{ |
|
"epoch": 21.12, |
|
"eval_loss": 0.01871710829436779, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.1936, |
|
"eval_samples_per_second": 227.938, |
|
"eval_steps_per_second": 4.559, |
|
"step": 309075 |
|
}, |
|
{ |
|
"epoch": 21.23, |
|
"learning_rate": 5.104983375404655e-09, |
|
"loss": 0.0045, |
|
"step": 310660 |
|
}, |
|
{ |
|
"epoch": 21.23, |
|
"eval_loss": 0.019821077585220337, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.2507, |
|
"eval_samples_per_second": 222.154, |
|
"eval_steps_per_second": 4.443, |
|
"step": 310660 |
|
}, |
|
{ |
|
"epoch": 21.34, |
|
"learning_rate": 4.801630619112727e-09, |
|
"loss": 0.0028, |
|
"step": 312245 |
|
}, |
|
{ |
|
"epoch": 21.34, |
|
"eval_loss": 0.022539684548974037, |
|
"eval_max_distance": 39, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.4779, |
|
"eval_samples_per_second": 201.781, |
|
"eval_steps_per_second": 4.036, |
|
"step": 312245 |
|
}, |
|
{ |
|
"epoch": 21.45, |
|
"learning_rate": 4.54663449074512e-09, |
|
"loss": 0.002, |
|
"step": 313830 |
|
}, |
|
{ |
|
"epoch": 21.45, |
|
"eval_loss": 0.018958711996674538, |
|
"eval_max_distance": 46, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.3193, |
|
"eval_samples_per_second": 215.586, |
|
"eval_steps_per_second": 4.312, |
|
"step": 313830 |
|
}, |
|
{ |
|
"epoch": 21.55, |
|
"learning_rate": 4.328377377748997e-09, |
|
"loss": 0.0017, |
|
"step": 315415 |
|
}, |
|
{ |
|
"epoch": 21.55, |
|
"eval_loss": 0.01911868527531624, |
|
"eval_max_distance": 38, |
|
"eval_mean_distance": 0, |
|
"eval_runtime": 2.6165, |
|
"eval_samples_per_second": 191.092, |
|
"eval_steps_per_second": 3.822, |
|
"step": 315415 |
|
}, |
|
{ |
|
"epoch": 21.65, |
|
"step": 316827, |
|
"total_flos": 3.443227992585216e+16, |
|
"train_loss": 6.756745522816313e-06, |
|
"train_runtime": 151.9677, |
|
"train_samples_per_second": 108410.633, |
|
"train_steps_per_second": 2084.831 |
|
} |
|
], |
|
"max_steps": 316827, |
|
"num_train_epochs": 22, |
|
"total_flos": 3.443227992585216e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|