text-normalization-ru-terrible / trainer_state.json
maximxls's picture
End of training
6cace96
raw
history blame
197 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 21.650061500615006,
"global_step": 316827,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.840909090909091e-06,
"loss": 4.4773,
"step": 1
},
{
"epoch": 0.02,
"learning_rate": 0.0005,
"loss": 4.2748,
"step": 176
},
{
"epoch": 0.02,
"eval_loss": 5.831331253051758,
"eval_max_distance": 201,
"eval_mean_distance": 116,
"eval_runtime": 16.9557,
"eval_samples_per_second": 29.489,
"eval_steps_per_second": 0.59,
"step": 176
},
{
"epoch": 0.03,
"learning_rate": 0.001,
"loss": 3.2983,
"step": 352
},
{
"epoch": 0.03,
"eval_loss": 5.142276287078857,
"eval_max_distance": 186,
"eval_mean_distance": 102,
"eval_runtime": 14.0636,
"eval_samples_per_second": 35.553,
"eval_steps_per_second": 0.711,
"step": 352
},
{
"epoch": 0.05,
"learning_rate": 0.0009949384562291498,
"loss": 2.9967,
"step": 528
},
{
"epoch": 0.05,
"eval_loss": 4.896122932434082,
"eval_max_distance": 196,
"eval_mean_distance": 104,
"eval_runtime": 15.2018,
"eval_samples_per_second": 32.891,
"eval_steps_per_second": 0.658,
"step": 528
},
{
"epoch": 0.06,
"learning_rate": 0.0009898769124582999,
"loss": 2.9064,
"step": 704
},
{
"epoch": 0.06,
"eval_loss": 4.704981803894043,
"eval_max_distance": 171,
"eval_mean_distance": 99,
"eval_runtime": 14.8835,
"eval_samples_per_second": 33.594,
"eval_steps_per_second": 0.672,
"step": 704
},
{
"epoch": 0.08,
"learning_rate": 0.0009848153686874497,
"loss": 2.8444,
"step": 880
},
{
"epoch": 0.08,
"eval_loss": 4.648359775543213,
"eval_max_distance": 173,
"eval_mean_distance": 99,
"eval_runtime": 13.5358,
"eval_samples_per_second": 36.939,
"eval_steps_per_second": 0.739,
"step": 880
},
{
"epoch": 0.09,
"learning_rate": 0.0009797538249165995,
"loss": 2.7359,
"step": 1056
},
{
"epoch": 0.09,
"eval_loss": 4.639862537384033,
"eval_max_distance": 200,
"eval_mean_distance": 96,
"eval_runtime": 13.4266,
"eval_samples_per_second": 37.239,
"eval_steps_per_second": 0.745,
"step": 1056
},
{
"epoch": 0.11,
"learning_rate": 0.0009746922811457495,
"loss": 2.667,
"step": 1232
},
{
"epoch": 0.11,
"eval_loss": 4.517767906188965,
"eval_max_distance": 160,
"eval_mean_distance": 93,
"eval_runtime": 13.7165,
"eval_samples_per_second": 36.453,
"eval_steps_per_second": 0.729,
"step": 1232
},
{
"epoch": 0.12,
"learning_rate": 0.0009696307373748994,
"loss": 2.6454,
"step": 1408
},
{
"epoch": 0.12,
"eval_loss": 4.468171119689941,
"eval_max_distance": 154,
"eval_mean_distance": 92,
"eval_runtime": 14.5339,
"eval_samples_per_second": 34.402,
"eval_steps_per_second": 0.688,
"step": 1408
},
{
"epoch": 0.14,
"learning_rate": 0.0009645691936040492,
"loss": 2.5737,
"step": 1584
},
{
"epoch": 0.14,
"eval_loss": 4.346231460571289,
"eval_max_distance": 151,
"eval_mean_distance": 91,
"eval_runtime": 13.9596,
"eval_samples_per_second": 35.818,
"eval_steps_per_second": 0.716,
"step": 1584
},
{
"epoch": 0.15,
"learning_rate": 0.0009595076498331991,
"loss": 2.5569,
"step": 1760
},
{
"epoch": 0.15,
"eval_loss": 4.252463340759277,
"eval_max_distance": 166,
"eval_mean_distance": 92,
"eval_runtime": 13.7296,
"eval_samples_per_second": 36.418,
"eval_steps_per_second": 0.728,
"step": 1760
},
{
"epoch": 0.17,
"learning_rate": 0.0009544461060623491,
"loss": 2.5114,
"step": 1936
},
{
"epoch": 0.17,
"eval_loss": 4.216597557067871,
"eval_max_distance": 164,
"eval_mean_distance": 90,
"eval_runtime": 14.6796,
"eval_samples_per_second": 34.061,
"eval_steps_per_second": 0.681,
"step": 1936
},
{
"epoch": 0.18,
"learning_rate": 0.0009493845622914989,
"loss": 2.4448,
"step": 2112
},
{
"epoch": 0.18,
"eval_loss": 4.229903697967529,
"eval_max_distance": 151,
"eval_mean_distance": 90,
"eval_runtime": 14.5473,
"eval_samples_per_second": 34.371,
"eval_steps_per_second": 0.687,
"step": 2112
},
{
"epoch": 0.2,
"learning_rate": 0.0009443230185206488,
"loss": 2.3847,
"step": 2288
},
{
"epoch": 0.2,
"eval_loss": 4.108386039733887,
"eval_max_distance": 146,
"eval_mean_distance": 90,
"eval_runtime": 14.7702,
"eval_samples_per_second": 33.852,
"eval_steps_per_second": 0.677,
"step": 2288
},
{
"epoch": 0.21,
"learning_rate": 0.0009392614747497988,
"loss": 2.1415,
"step": 2464
},
{
"epoch": 0.21,
"eval_loss": 3.6208255290985107,
"eval_max_distance": 130,
"eval_mean_distance": 78,
"eval_runtime": 15.5259,
"eval_samples_per_second": 32.204,
"eval_steps_per_second": 0.644,
"step": 2464
},
{
"epoch": 0.23,
"learning_rate": 0.0009341999309789486,
"loss": 1.0359,
"step": 2640
},
{
"epoch": 0.23,
"eval_loss": 1.5739717483520508,
"eval_max_distance": 139,
"eval_mean_distance": 41,
"eval_runtime": 17.5626,
"eval_samples_per_second": 28.47,
"eval_steps_per_second": 0.569,
"step": 2640
},
{
"epoch": 0.24,
"learning_rate": 0.0009291383872080985,
"loss": 0.5688,
"step": 2816
},
{
"epoch": 0.24,
"eval_loss": 1.293850064277649,
"eval_max_distance": 127,
"eval_mean_distance": 35,
"eval_runtime": 13.8285,
"eval_samples_per_second": 36.157,
"eval_steps_per_second": 0.723,
"step": 2816
},
{
"epoch": 0.26,
"learning_rate": 0.0009240768434372484,
"loss": 0.5115,
"step": 2992
},
{
"epoch": 0.26,
"eval_loss": 1.188537836074829,
"eval_max_distance": 120,
"eval_mean_distance": 32,
"eval_runtime": 15.477,
"eval_samples_per_second": 32.306,
"eval_steps_per_second": 0.646,
"step": 2992
},
{
"epoch": 0.27,
"learning_rate": 0.0009190152996663983,
"loss": 0.4662,
"step": 3168
},
{
"epoch": 0.27,
"eval_loss": 1.1915712356567383,
"eval_max_distance": 127,
"eval_mean_distance": 32,
"eval_runtime": 16.2261,
"eval_samples_per_second": 30.815,
"eval_steps_per_second": 0.616,
"step": 3168
},
{
"epoch": 0.29,
"learning_rate": 0.0009139537558955482,
"loss": 0.4401,
"step": 3344
},
{
"epoch": 0.29,
"eval_loss": 1.15479576587677,
"eval_max_distance": 126,
"eval_mean_distance": 31,
"eval_runtime": 16.6846,
"eval_samples_per_second": 29.968,
"eval_steps_per_second": 0.599,
"step": 3344
},
{
"epoch": 0.3,
"learning_rate": 0.0009088922121246981,
"loss": 0.408,
"step": 3520
},
{
"epoch": 0.3,
"eval_loss": 1.1046110391616821,
"eval_max_distance": 103,
"eval_mean_distance": 29,
"eval_runtime": 14.9865,
"eval_samples_per_second": 33.363,
"eval_steps_per_second": 0.667,
"step": 3520
},
{
"epoch": 0.32,
"learning_rate": 0.000903830668353848,
"loss": 0.3858,
"step": 3696
},
{
"epoch": 0.32,
"eval_loss": 0.9291953444480896,
"eval_max_distance": 135,
"eval_mean_distance": 26,
"eval_runtime": 13.5564,
"eval_samples_per_second": 36.883,
"eval_steps_per_second": 0.738,
"step": 3696
},
{
"epoch": 0.33,
"learning_rate": 0.0008987691245829979,
"loss": 0.3586,
"step": 3872
},
{
"epoch": 0.33,
"eval_loss": 0.7040215730667114,
"eval_max_distance": 115,
"eval_mean_distance": 21,
"eval_runtime": 15.3203,
"eval_samples_per_second": 32.636,
"eval_steps_per_second": 0.653,
"step": 3872
},
{
"epoch": 0.35,
"learning_rate": 0.0008937075808121477,
"loss": 0.3237,
"step": 4048
},
{
"epoch": 0.35,
"eval_loss": 0.5791512131690979,
"eval_max_distance": 139,
"eval_mean_distance": 17,
"eval_runtime": 14.8657,
"eval_samples_per_second": 33.634,
"eval_steps_per_second": 0.673,
"step": 4048
},
{
"epoch": 0.36,
"learning_rate": 0.0008886460370412976,
"loss": 0.2783,
"step": 4224
},
{
"epoch": 0.36,
"eval_loss": 0.4473143219947815,
"eval_max_distance": 122,
"eval_mean_distance": 14,
"eval_runtime": 13.1694,
"eval_samples_per_second": 37.967,
"eval_steps_per_second": 0.759,
"step": 4224
},
{
"epoch": 0.38,
"learning_rate": 0.0008835844932704476,
"loss": 0.2666,
"step": 4400
},
{
"epoch": 0.38,
"eval_loss": 0.3882352411746979,
"eval_max_distance": 125,
"eval_mean_distance": 11,
"eval_runtime": 14.0887,
"eval_samples_per_second": 35.489,
"eval_steps_per_second": 0.71,
"step": 4400
},
{
"epoch": 0.39,
"learning_rate": 0.0008785229494995974,
"loss": 0.249,
"step": 4576
},
{
"epoch": 0.39,
"eval_loss": 0.3606760799884796,
"eval_max_distance": 126,
"eval_mean_distance": 11,
"eval_runtime": 13.741,
"eval_samples_per_second": 36.388,
"eval_steps_per_second": 0.728,
"step": 4576
},
{
"epoch": 0.41,
"learning_rate": 0.0008734614057287473,
"loss": 0.2371,
"step": 4752
},
{
"epoch": 0.41,
"eval_loss": 0.3074778616428375,
"eval_max_distance": 111,
"eval_mean_distance": 9,
"eval_runtime": 13.0071,
"eval_samples_per_second": 38.441,
"eval_steps_per_second": 0.769,
"step": 4752
},
{
"epoch": 0.42,
"learning_rate": 0.0008683998619578973,
"loss": 0.2231,
"step": 4928
},
{
"epoch": 0.42,
"eval_loss": 0.2844279706478119,
"eval_max_distance": 123,
"eval_mean_distance": 8,
"eval_runtime": 12.8112,
"eval_samples_per_second": 39.028,
"eval_steps_per_second": 0.781,
"step": 4928
},
{
"epoch": 0.44,
"learning_rate": 0.0008633383181870471,
"loss": 0.2114,
"step": 5104
},
{
"epoch": 0.44,
"eval_loss": 0.27491652965545654,
"eval_max_distance": 133,
"eval_mean_distance": 8,
"eval_runtime": 11.3043,
"eval_samples_per_second": 44.231,
"eval_steps_per_second": 0.885,
"step": 5104
},
{
"epoch": 0.45,
"learning_rate": 0.000858276774416197,
"loss": 0.1946,
"step": 5280
},
{
"epoch": 0.45,
"eval_loss": 0.2683752775192261,
"eval_max_distance": 124,
"eval_mean_distance": 8,
"eval_runtime": 11.6576,
"eval_samples_per_second": 42.891,
"eval_steps_per_second": 0.858,
"step": 5280
},
{
"epoch": 0.47,
"learning_rate": 0.000853215230645347,
"loss": 0.1893,
"step": 5456
},
{
"epoch": 0.47,
"eval_loss": 0.25331711769104004,
"eval_max_distance": 129,
"eval_mean_distance": 7,
"eval_runtime": 11.955,
"eval_samples_per_second": 41.824,
"eval_steps_per_second": 0.836,
"step": 5456
},
{
"epoch": 0.48,
"learning_rate": 0.0008481536868744968,
"loss": 0.1865,
"step": 5632
},
{
"epoch": 0.48,
"eval_loss": 0.22615253925323486,
"eval_max_distance": 129,
"eval_mean_distance": 7,
"eval_runtime": 10.581,
"eval_samples_per_second": 47.255,
"eval_steps_per_second": 0.945,
"step": 5632
},
{
"epoch": 0.5,
"learning_rate": 0.0008430921431036467,
"loss": 0.1769,
"step": 5808
},
{
"epoch": 0.5,
"eval_loss": 0.22688022255897522,
"eval_max_distance": 137,
"eval_mean_distance": 6,
"eval_runtime": 10.5367,
"eval_samples_per_second": 47.453,
"eval_steps_per_second": 0.949,
"step": 5808
},
{
"epoch": 0.51,
"learning_rate": 0.0008380305993327965,
"loss": 0.172,
"step": 5984
},
{
"epoch": 0.51,
"eval_loss": 0.21938543021678925,
"eval_max_distance": 129,
"eval_mean_distance": 6,
"eval_runtime": 10.9867,
"eval_samples_per_second": 45.51,
"eval_steps_per_second": 0.91,
"step": 5984
},
{
"epoch": 0.53,
"learning_rate": 0.0008329690555619465,
"loss": 0.1594,
"step": 6160
},
{
"epoch": 0.53,
"eval_loss": 0.20960509777069092,
"eval_max_distance": 138,
"eval_mean_distance": 6,
"eval_runtime": 8.8034,
"eval_samples_per_second": 56.796,
"eval_steps_per_second": 1.136,
"step": 6160
},
{
"epoch": 0.54,
"learning_rate": 0.0008279075117910963,
"loss": 0.158,
"step": 6336
},
{
"epoch": 0.54,
"eval_loss": 0.20247827470302582,
"eval_max_distance": 122,
"eval_mean_distance": 6,
"eval_runtime": 9.7934,
"eval_samples_per_second": 51.055,
"eval_steps_per_second": 1.021,
"step": 6336
},
{
"epoch": 0.56,
"learning_rate": 0.0008228459680202461,
"loss": 0.1436,
"step": 6512
},
{
"epoch": 0.56,
"eval_loss": 0.20747961103916168,
"eval_max_distance": 128,
"eval_mean_distance": 6,
"eval_runtime": 10.4315,
"eval_samples_per_second": 47.932,
"eval_steps_per_second": 0.959,
"step": 6512
},
{
"epoch": 0.57,
"learning_rate": 0.000817784424249396,
"loss": 0.1398,
"step": 6688
},
{
"epoch": 0.57,
"eval_loss": 0.19245657324790955,
"eval_max_distance": 128,
"eval_mean_distance": 6,
"eval_runtime": 9.4807,
"eval_samples_per_second": 52.739,
"eval_steps_per_second": 1.055,
"step": 6688
},
{
"epoch": 0.59,
"learning_rate": 0.000812722880478546,
"loss": 0.1326,
"step": 6864
},
{
"epoch": 0.59,
"eval_loss": 0.1919807493686676,
"eval_max_distance": 125,
"eval_mean_distance": 6,
"eval_runtime": 9.3305,
"eval_samples_per_second": 53.588,
"eval_steps_per_second": 1.072,
"step": 6864
},
{
"epoch": 0.6,
"learning_rate": 0.0008076613367076958,
"loss": 0.1321,
"step": 7040
},
{
"epoch": 0.6,
"eval_loss": 0.18875986337661743,
"eval_max_distance": 135,
"eval_mean_distance": 5,
"eval_runtime": 7.8177,
"eval_samples_per_second": 63.958,
"eval_steps_per_second": 1.279,
"step": 7040
},
{
"epoch": 0.62,
"learning_rate": 0.0008025997929368457,
"loss": 0.1255,
"step": 7216
},
{
"epoch": 0.62,
"eval_loss": 0.18568550050258636,
"eval_max_distance": 144,
"eval_mean_distance": 6,
"eval_runtime": 9.4575,
"eval_samples_per_second": 52.868,
"eval_steps_per_second": 1.057,
"step": 7216
},
{
"epoch": 0.63,
"learning_rate": 0.0007975382491659956,
"loss": 0.1257,
"step": 7392
},
{
"epoch": 0.63,
"eval_loss": 0.18572939932346344,
"eval_max_distance": 113,
"eval_mean_distance": 5,
"eval_runtime": 7.6787,
"eval_samples_per_second": 65.115,
"eval_steps_per_second": 1.302,
"step": 7392
},
{
"epoch": 0.65,
"learning_rate": 0.0007924767053951455,
"loss": 0.1175,
"step": 7568
},
{
"epoch": 0.65,
"eval_loss": 0.17028774321079254,
"eval_max_distance": 124,
"eval_mean_distance": 5,
"eval_runtime": 7.3771,
"eval_samples_per_second": 67.777,
"eval_steps_per_second": 1.356,
"step": 7568
},
{
"epoch": 0.66,
"learning_rate": 0.0007874151616242954,
"loss": 0.1109,
"step": 7744
},
{
"epoch": 0.66,
"eval_loss": 0.1755068004131317,
"eval_max_distance": 126,
"eval_mean_distance": 5,
"eval_runtime": 7.0941,
"eval_samples_per_second": 70.482,
"eval_steps_per_second": 1.41,
"step": 7744
},
{
"epoch": 0.68,
"learning_rate": 0.0007823536178534452,
"loss": 0.1065,
"step": 7920
},
{
"epoch": 0.68,
"eval_loss": 0.17363803088665009,
"eval_max_distance": 128,
"eval_mean_distance": 5,
"eval_runtime": 8.9523,
"eval_samples_per_second": 55.852,
"eval_steps_per_second": 1.117,
"step": 7920
},
{
"epoch": 0.69,
"learning_rate": 0.0007772920740825952,
"loss": 0.104,
"step": 8096
},
{
"epoch": 0.69,
"eval_loss": 0.16887737810611725,
"eval_max_distance": 129,
"eval_mean_distance": 5,
"eval_runtime": 8.6778,
"eval_samples_per_second": 57.619,
"eval_steps_per_second": 1.152,
"step": 8096
},
{
"epoch": 0.71,
"learning_rate": 0.0007722305303117451,
"loss": 0.0997,
"step": 8272
},
{
"epoch": 0.71,
"eval_loss": 0.16253332793712616,
"eval_max_distance": 117,
"eval_mean_distance": 5,
"eval_runtime": 9.3982,
"eval_samples_per_second": 53.202,
"eval_steps_per_second": 1.064,
"step": 8272
},
{
"epoch": 0.72,
"learning_rate": 0.0007671689865408949,
"loss": 0.0993,
"step": 8448
},
{
"epoch": 0.72,
"eval_loss": 0.16519272327423096,
"eval_max_distance": 120,
"eval_mean_distance": 5,
"eval_runtime": 8.3081,
"eval_samples_per_second": 60.182,
"eval_steps_per_second": 1.204,
"step": 8448
},
{
"epoch": 0.74,
"learning_rate": 0.0007621074427700448,
"loss": 0.0962,
"step": 8624
},
{
"epoch": 0.74,
"eval_loss": 0.16153652966022491,
"eval_max_distance": 113,
"eval_mean_distance": 5,
"eval_runtime": 8.3339,
"eval_samples_per_second": 59.996,
"eval_steps_per_second": 1.2,
"step": 8624
},
{
"epoch": 0.75,
"learning_rate": 0.0007570458989991948,
"loss": 0.0904,
"step": 8800
},
{
"epoch": 0.75,
"eval_loss": 0.1528134047985077,
"eval_max_distance": 131,
"eval_mean_distance": 4,
"eval_runtime": 8.605,
"eval_samples_per_second": 58.106,
"eval_steps_per_second": 1.162,
"step": 8800
},
{
"epoch": 0.77,
"learning_rate": 0.0007519843552283446,
"loss": 0.0873,
"step": 8976
},
{
"epoch": 0.77,
"eval_loss": 0.15612851083278656,
"eval_max_distance": 106,
"eval_mean_distance": 4,
"eval_runtime": 9.222,
"eval_samples_per_second": 54.218,
"eval_steps_per_second": 1.084,
"step": 8976
},
{
"epoch": 0.78,
"learning_rate": 0.0007469228114574945,
"loss": 0.0858,
"step": 9152
},
{
"epoch": 0.78,
"eval_loss": 0.15163549780845642,
"eval_max_distance": 137,
"eval_mean_distance": 4,
"eval_runtime": 8.6808,
"eval_samples_per_second": 57.598,
"eval_steps_per_second": 1.152,
"step": 9152
},
{
"epoch": 0.8,
"learning_rate": 0.0007418612676866445,
"loss": 0.082,
"step": 9328
},
{
"epoch": 0.8,
"eval_loss": 0.1514330506324768,
"eval_max_distance": 136,
"eval_mean_distance": 4,
"eval_runtime": 8.6904,
"eval_samples_per_second": 57.535,
"eval_steps_per_second": 1.151,
"step": 9328
},
{
"epoch": 0.81,
"learning_rate": 0.0007367997239157943,
"loss": 0.0766,
"step": 9504
},
{
"epoch": 0.81,
"eval_loss": 0.15236127376556396,
"eval_max_distance": 148,
"eval_mean_distance": 5,
"eval_runtime": 8.5887,
"eval_samples_per_second": 58.216,
"eval_steps_per_second": 1.164,
"step": 9504
},
{
"epoch": 0.83,
"learning_rate": 0.0007317381801449442,
"loss": 0.075,
"step": 9680
},
{
"epoch": 0.83,
"eval_loss": 0.14720940589904785,
"eval_max_distance": 138,
"eval_mean_distance": 4,
"eval_runtime": 8.12,
"eval_samples_per_second": 61.576,
"eval_steps_per_second": 1.232,
"step": 9680
},
{
"epoch": 0.84,
"learning_rate": 0.000726676636374094,
"loss": 0.0785,
"step": 9856
},
{
"epoch": 0.84,
"eval_loss": 0.15077313780784607,
"eval_max_distance": 134,
"eval_mean_distance": 5,
"eval_runtime": 7.0224,
"eval_samples_per_second": 71.201,
"eval_steps_per_second": 1.424,
"step": 9856
},
{
"epoch": 0.86,
"learning_rate": 0.000721615092603244,
"loss": 0.0696,
"step": 10032
},
{
"epoch": 0.86,
"eval_loss": 0.1496003121137619,
"eval_max_distance": 147,
"eval_mean_distance": 4,
"eval_runtime": 8.4446,
"eval_samples_per_second": 59.209,
"eval_steps_per_second": 1.184,
"step": 10032
},
{
"epoch": 0.87,
"learning_rate": 0.0007165535488323939,
"loss": 0.0697,
"step": 10208
},
{
"epoch": 0.87,
"eval_loss": 0.14819692075252533,
"eval_max_distance": 133,
"eval_mean_distance": 4,
"eval_runtime": 7.7516,
"eval_samples_per_second": 64.503,
"eval_steps_per_second": 1.29,
"step": 10208
},
{
"epoch": 0.89,
"learning_rate": 0.0007114920050615437,
"loss": 0.0654,
"step": 10384
},
{
"epoch": 0.89,
"eval_loss": 0.14708730578422546,
"eval_max_distance": 109,
"eval_mean_distance": 4,
"eval_runtime": 7.7796,
"eval_samples_per_second": 64.271,
"eval_steps_per_second": 1.285,
"step": 10384
},
{
"epoch": 0.9,
"learning_rate": 0.0007064304612906937,
"loss": 0.0656,
"step": 10560
},
{
"epoch": 0.9,
"eval_loss": 0.14370670914649963,
"eval_max_distance": 111,
"eval_mean_distance": 4,
"eval_runtime": 7.3904,
"eval_samples_per_second": 67.655,
"eval_steps_per_second": 1.353,
"step": 10560
},
{
"epoch": 0.92,
"learning_rate": 0.0007013689175198436,
"loss": 0.0617,
"step": 10736
},
{
"epoch": 0.92,
"eval_loss": 0.1476200520992279,
"eval_max_distance": 127,
"eval_mean_distance": 5,
"eval_runtime": 8.6231,
"eval_samples_per_second": 57.984,
"eval_steps_per_second": 1.16,
"step": 10736
},
{
"epoch": 0.93,
"learning_rate": 0.0006963073737489934,
"loss": 0.0619,
"step": 10912
},
{
"epoch": 0.93,
"eval_loss": 0.14763715863227844,
"eval_max_distance": 122,
"eval_mean_distance": 4,
"eval_runtime": 8.0149,
"eval_samples_per_second": 62.384,
"eval_steps_per_second": 1.248,
"step": 10912
},
{
"epoch": 0.95,
"learning_rate": 0.0006912458299781433,
"loss": 0.0597,
"step": 11088
},
{
"epoch": 0.95,
"eval_loss": 0.1550969034433365,
"eval_max_distance": 130,
"eval_mean_distance": 5,
"eval_runtime": 8.7957,
"eval_samples_per_second": 56.846,
"eval_steps_per_second": 1.137,
"step": 11088
},
{
"epoch": 0.96,
"learning_rate": 0.0006861842862072933,
"loss": 0.0575,
"step": 11264
},
{
"epoch": 0.96,
"eval_loss": 0.15137550234794617,
"eval_max_distance": 124,
"eval_mean_distance": 5,
"eval_runtime": 8.729,
"eval_samples_per_second": 57.28,
"eval_steps_per_second": 1.146,
"step": 11264
},
{
"epoch": 0.98,
"learning_rate": 0.0006811227424364431,
"loss": 0.0575,
"step": 11440
},
{
"epoch": 0.98,
"eval_loss": 0.15129156410694122,
"eval_max_distance": 133,
"eval_mean_distance": 4,
"eval_runtime": 8.3361,
"eval_samples_per_second": 59.98,
"eval_steps_per_second": 1.2,
"step": 11440
},
{
"epoch": 0.99,
"learning_rate": 0.000676061198665593,
"loss": 0.0561,
"step": 11616
},
{
"epoch": 0.99,
"eval_loss": 0.14808788895606995,
"eval_max_distance": 135,
"eval_mean_distance": 5,
"eval_runtime": 8.5181,
"eval_samples_per_second": 58.699,
"eval_steps_per_second": 1.174,
"step": 11616
},
{
"epoch": 1.01,
"learning_rate": 0.000670999654894743,
"loss": 0.2917,
"step": 11792
},
{
"epoch": 1.01,
"eval_loss": 0.11704121530056,
"eval_max_distance": 77,
"eval_mean_distance": 3,
"eval_runtime": 8.4947,
"eval_samples_per_second": 58.86,
"eval_steps_per_second": 1.177,
"step": 11792
},
{
"epoch": 1.02,
"learning_rate": 0.0006659381111238928,
"loss": 0.2267,
"step": 11968
},
{
"epoch": 1.02,
"eval_loss": 0.09986243396997452,
"eval_max_distance": 76,
"eval_mean_distance": 3,
"eval_runtime": 8.3117,
"eval_samples_per_second": 60.156,
"eval_steps_per_second": 1.203,
"step": 11968
},
{
"epoch": 1.04,
"learning_rate": 0.0006608765673530427,
"loss": 0.1755,
"step": 12144
},
{
"epoch": 1.04,
"eval_loss": 0.09780210256576538,
"eval_max_distance": 100,
"eval_mean_distance": 2,
"eval_runtime": 7.2356,
"eval_samples_per_second": 69.103,
"eval_steps_per_second": 1.382,
"step": 12144
},
{
"epoch": 1.05,
"learning_rate": 0.0006558150235821925,
"loss": 0.1476,
"step": 12320
},
{
"epoch": 1.05,
"eval_loss": 0.09662258625030518,
"eval_max_distance": 77,
"eval_mean_distance": 2,
"eval_runtime": 7.9894,
"eval_samples_per_second": 62.583,
"eval_steps_per_second": 1.252,
"step": 12320
},
{
"epoch": 1.07,
"learning_rate": 0.0006507534798113425,
"loss": 0.1283,
"step": 12496
},
{
"epoch": 1.07,
"eval_loss": 0.09604214131832123,
"eval_max_distance": 92,
"eval_mean_distance": 2,
"eval_runtime": 9.1331,
"eval_samples_per_second": 54.746,
"eval_steps_per_second": 1.095,
"step": 12496
},
{
"epoch": 1.08,
"learning_rate": 0.0006456919360404924,
"loss": 0.117,
"step": 12672
},
{
"epoch": 1.08,
"eval_loss": 0.08993188291788101,
"eval_max_distance": 82,
"eval_mean_distance": 2,
"eval_runtime": 8.194,
"eval_samples_per_second": 61.02,
"eval_steps_per_second": 1.22,
"step": 12672
},
{
"epoch": 1.1,
"learning_rate": 0.0006406303922696422,
"loss": 0.1077,
"step": 12848
},
{
"epoch": 1.1,
"eval_loss": 0.09431099146604538,
"eval_max_distance": 76,
"eval_mean_distance": 2,
"eval_runtime": 7.8825,
"eval_samples_per_second": 63.432,
"eval_steps_per_second": 1.269,
"step": 12848
},
{
"epoch": 1.11,
"learning_rate": 0.0006355688484987922,
"loss": 0.1023,
"step": 13024
},
{
"epoch": 1.11,
"eval_loss": 0.08808578550815582,
"eval_max_distance": 71,
"eval_mean_distance": 2,
"eval_runtime": 9.0408,
"eval_samples_per_second": 55.305,
"eval_steps_per_second": 1.106,
"step": 13024
},
{
"epoch": 1.13,
"learning_rate": 0.0006305073047279421,
"loss": 0.0975,
"step": 13200
},
{
"epoch": 1.13,
"eval_loss": 0.09387736767530441,
"eval_max_distance": 61,
"eval_mean_distance": 2,
"eval_runtime": 7.3392,
"eval_samples_per_second": 68.127,
"eval_steps_per_second": 1.363,
"step": 13200
},
{
"epoch": 1.14,
"learning_rate": 0.0006254457609570919,
"loss": 0.0959,
"step": 13376
},
{
"epoch": 1.14,
"eval_loss": 0.09010987728834152,
"eval_max_distance": 91,
"eval_mean_distance": 2,
"eval_runtime": 8.2013,
"eval_samples_per_second": 60.966,
"eval_steps_per_second": 1.219,
"step": 13376
},
{
"epoch": 1.16,
"learning_rate": 0.0006203842171862418,
"loss": 0.0933,
"step": 13552
},
{
"epoch": 1.16,
"eval_loss": 0.09695952385663986,
"eval_max_distance": 84,
"eval_mean_distance": 3,
"eval_runtime": 7.5998,
"eval_samples_per_second": 65.792,
"eval_steps_per_second": 1.316,
"step": 13552
},
{
"epoch": 1.17,
"learning_rate": 0.0006153226734153918,
"loss": 0.0907,
"step": 13728
},
{
"epoch": 1.17,
"eval_loss": 0.0932065024971962,
"eval_max_distance": 81,
"eval_mean_distance": 2,
"eval_runtime": 7.5398,
"eval_samples_per_second": 66.314,
"eval_steps_per_second": 1.326,
"step": 13728
},
{
"epoch": 1.19,
"learning_rate": 0.0006102611296445416,
"loss": 0.09,
"step": 13904
},
{
"epoch": 1.19,
"eval_loss": 0.09407244622707367,
"eval_max_distance": 83,
"eval_mean_distance": 2,
"eval_runtime": 8.8256,
"eval_samples_per_second": 56.653,
"eval_steps_per_second": 1.133,
"step": 13904
},
{
"epoch": 1.2,
"learning_rate": 0.0006051995858736915,
"loss": 0.0873,
"step": 14080
},
{
"epoch": 1.2,
"eval_loss": 0.09223543852567673,
"eval_max_distance": 72,
"eval_mean_distance": 2,
"eval_runtime": 8.3913,
"eval_samples_per_second": 59.586,
"eval_steps_per_second": 1.192,
"step": 14080
},
{
"epoch": 1.22,
"learning_rate": 0.0006001380421028413,
"loss": 0.089,
"step": 14256
},
{
"epoch": 1.22,
"eval_loss": 0.09844117611646652,
"eval_max_distance": 61,
"eval_mean_distance": 3,
"eval_runtime": 8.6496,
"eval_samples_per_second": 57.806,
"eval_steps_per_second": 1.156,
"step": 14256
},
{
"epoch": 1.23,
"learning_rate": 0.0005950764983319913,
"loss": 0.0849,
"step": 14432
},
{
"epoch": 1.23,
"eval_loss": 0.09052952378988266,
"eval_max_distance": 80,
"eval_mean_distance": 2,
"eval_runtime": 8.703,
"eval_samples_per_second": 57.452,
"eval_steps_per_second": 1.149,
"step": 14432
},
{
"epoch": 1.25,
"learning_rate": 0.0005900149545611412,
"loss": 0.0842,
"step": 14608
},
{
"epoch": 1.25,
"eval_loss": 0.09249469637870789,
"eval_max_distance": 76,
"eval_mean_distance": 2,
"eval_runtime": 10.036,
"eval_samples_per_second": 49.821,
"eval_steps_per_second": 0.996,
"step": 14608
},
{
"epoch": 1.26,
"learning_rate": 0.000584953410790291,
"loss": 0.0866,
"step": 14784
},
{
"epoch": 1.26,
"eval_loss": 0.09183748811483383,
"eval_max_distance": 82,
"eval_mean_distance": 2,
"eval_runtime": 7.2806,
"eval_samples_per_second": 68.676,
"eval_steps_per_second": 1.374,
"step": 14784
},
{
"epoch": 1.28,
"learning_rate": 0.000579891867019441,
"loss": 0.0841,
"step": 14960
},
{
"epoch": 1.28,
"eval_loss": 0.09966301172971725,
"eval_max_distance": 62,
"eval_mean_distance": 2,
"eval_runtime": 9.5849,
"eval_samples_per_second": 52.165,
"eval_steps_per_second": 1.043,
"step": 14960
},
{
"epoch": 1.29,
"learning_rate": 0.0005748303232485909,
"loss": 0.0828,
"step": 15136
},
{
"epoch": 1.29,
"eval_loss": 0.10113956034183502,
"eval_max_distance": 83,
"eval_mean_distance": 3,
"eval_runtime": 9.7885,
"eval_samples_per_second": 51.081,
"eval_steps_per_second": 1.022,
"step": 15136
},
{
"epoch": 1.31,
"learning_rate": 0.0005697687794777407,
"loss": 0.0788,
"step": 15312
},
{
"epoch": 1.31,
"eval_loss": 0.10437356680631638,
"eval_max_distance": 59,
"eval_mean_distance": 3,
"eval_runtime": 11.3278,
"eval_samples_per_second": 44.139,
"eval_steps_per_second": 0.883,
"step": 15312
},
{
"epoch": 1.32,
"learning_rate": 0.0005647072357068907,
"loss": 0.0795,
"step": 15488
},
{
"epoch": 1.32,
"eval_loss": 0.0787021666765213,
"eval_max_distance": 84,
"eval_mean_distance": 2,
"eval_runtime": 7.3395,
"eval_samples_per_second": 68.125,
"eval_steps_per_second": 1.362,
"step": 15488
},
{
"epoch": 1.34,
"learning_rate": 0.0005596456919360406,
"loss": 0.0767,
"step": 15664
},
{
"epoch": 1.34,
"eval_loss": 0.0796855166554451,
"eval_max_distance": 65,
"eval_mean_distance": 2,
"eval_runtime": 6.4769,
"eval_samples_per_second": 77.198,
"eval_steps_per_second": 1.544,
"step": 15664
},
{
"epoch": 1.35,
"learning_rate": 0.0005545841481651904,
"loss": 0.0745,
"step": 15840
},
{
"epoch": 1.35,
"eval_loss": 0.07943727821111679,
"eval_max_distance": 80,
"eval_mean_distance": 2,
"eval_runtime": 7.2103,
"eval_samples_per_second": 69.345,
"eval_steps_per_second": 1.387,
"step": 15840
},
{
"epoch": 1.37,
"learning_rate": 0.0005495226043943403,
"loss": 0.0711,
"step": 16016
},
{
"epoch": 1.37,
"eval_loss": 0.07364234328269958,
"eval_max_distance": 87,
"eval_mean_distance": 2,
"eval_runtime": 5.8839,
"eval_samples_per_second": 84.978,
"eval_steps_per_second": 1.7,
"step": 16016
},
{
"epoch": 1.38,
"learning_rate": 0.0005444610606234902,
"loss": 0.0701,
"step": 16192
},
{
"epoch": 1.38,
"eval_loss": 0.07703561335802078,
"eval_max_distance": 83,
"eval_mean_distance": 2,
"eval_runtime": 7.9093,
"eval_samples_per_second": 63.217,
"eval_steps_per_second": 1.264,
"step": 16192
},
{
"epoch": 1.4,
"learning_rate": 0.0005393995168526401,
"loss": 0.0659,
"step": 16368
},
{
"epoch": 1.4,
"eval_loss": 0.0723421648144722,
"eval_max_distance": 76,
"eval_mean_distance": 2,
"eval_runtime": 7.5303,
"eval_samples_per_second": 66.398,
"eval_steps_per_second": 1.328,
"step": 16368
},
{
"epoch": 1.41,
"learning_rate": 0.00053433797308179,
"loss": 0.0661,
"step": 16544
},
{
"epoch": 1.41,
"eval_loss": 0.07238639891147614,
"eval_max_distance": 80,
"eval_mean_distance": 2,
"eval_runtime": 6.7269,
"eval_samples_per_second": 74.328,
"eval_steps_per_second": 1.487,
"step": 16544
},
{
"epoch": 1.43,
"learning_rate": 0.0005292764293109398,
"loss": 0.0639,
"step": 16720
},
{
"epoch": 1.43,
"eval_loss": 0.07442823052406311,
"eval_max_distance": 74,
"eval_mean_distance": 2,
"eval_runtime": 5.4344,
"eval_samples_per_second": 92.007,
"eval_steps_per_second": 1.84,
"step": 16720
},
{
"epoch": 1.44,
"learning_rate": 0.0005242148855400898,
"loss": 0.0616,
"step": 16896
},
{
"epoch": 1.44,
"eval_loss": 0.07285340130329132,
"eval_max_distance": 64,
"eval_mean_distance": 2,
"eval_runtime": 6.0352,
"eval_samples_per_second": 82.848,
"eval_steps_per_second": 1.657,
"step": 16896
},
{
"epoch": 1.46,
"learning_rate": 0.0005191533417692397,
"loss": 0.0585,
"step": 17072
},
{
"epoch": 1.46,
"eval_loss": 0.07329737395048141,
"eval_max_distance": 79,
"eval_mean_distance": 2,
"eval_runtime": 6.7646,
"eval_samples_per_second": 73.914,
"eval_steps_per_second": 1.478,
"step": 17072
},
{
"epoch": 1.47,
"learning_rate": 0.0005140917979983895,
"loss": 0.0587,
"step": 17248
},
{
"epoch": 1.47,
"eval_loss": 0.07280145585536957,
"eval_max_distance": 81,
"eval_mean_distance": 2,
"eval_runtime": 6.8294,
"eval_samples_per_second": 73.212,
"eval_steps_per_second": 1.464,
"step": 17248
},
{
"epoch": 1.49,
"learning_rate": 0.0005090302542275395,
"loss": 0.0565,
"step": 17424
},
{
"epoch": 1.49,
"eval_loss": 0.07057639956474304,
"eval_max_distance": 83,
"eval_mean_distance": 2,
"eval_runtime": 6.5899,
"eval_samples_per_second": 75.874,
"eval_steps_per_second": 1.517,
"step": 17424
},
{
"epoch": 1.5,
"learning_rate": 0.0005039687104566894,
"loss": 0.056,
"step": 17600
},
{
"epoch": 1.5,
"eval_loss": 0.06988174468278885,
"eval_max_distance": 69,
"eval_mean_distance": 2,
"eval_runtime": 6.7807,
"eval_samples_per_second": 73.739,
"eval_steps_per_second": 1.475,
"step": 17600
},
{
"epoch": 1.52,
"learning_rate": 0.0004989071666858392,
"loss": 0.0544,
"step": 17776
},
{
"epoch": 1.52,
"eval_loss": 0.06715495139360428,
"eval_max_distance": 65,
"eval_mean_distance": 1,
"eval_runtime": 6.3191,
"eval_samples_per_second": 79.126,
"eval_steps_per_second": 1.583,
"step": 17776
},
{
"epoch": 1.53,
"learning_rate": 0.000493845622914989,
"loss": 0.0524,
"step": 17952
},
{
"epoch": 1.53,
"eval_loss": 0.06758233904838562,
"eval_max_distance": 86,
"eval_mean_distance": 1,
"eval_runtime": 6.0088,
"eval_samples_per_second": 83.212,
"eval_steps_per_second": 1.664,
"step": 17952
},
{
"epoch": 1.55,
"learning_rate": 0.000488784079144139,
"loss": 0.052,
"step": 18128
},
{
"epoch": 1.55,
"eval_loss": 0.06754262000322342,
"eval_max_distance": 82,
"eval_mean_distance": 1,
"eval_runtime": 5.502,
"eval_samples_per_second": 90.876,
"eval_steps_per_second": 1.818,
"step": 18128
},
{
"epoch": 1.56,
"learning_rate": 0.00048372253537328885,
"loss": 0.0502,
"step": 18304
},
{
"epoch": 1.56,
"eval_loss": 0.06616352498531342,
"eval_max_distance": 78,
"eval_mean_distance": 1,
"eval_runtime": 6.1322,
"eval_samples_per_second": 81.537,
"eval_steps_per_second": 1.631,
"step": 18304
},
{
"epoch": 1.58,
"learning_rate": 0.0004786609916024388,
"loss": 0.0499,
"step": 18480
},
{
"epoch": 1.58,
"eval_loss": 0.06735648959875107,
"eval_max_distance": 75,
"eval_mean_distance": 1,
"eval_runtime": 6.1213,
"eval_samples_per_second": 81.683,
"eval_steps_per_second": 1.634,
"step": 18480
},
{
"epoch": 1.59,
"learning_rate": 0.00047359944783158866,
"loss": 0.0475,
"step": 18656
},
{
"epoch": 1.59,
"eval_loss": 0.0657699704170227,
"eval_max_distance": 66,
"eval_mean_distance": 1,
"eval_runtime": 6.434,
"eval_samples_per_second": 77.713,
"eval_steps_per_second": 1.554,
"step": 18656
},
{
"epoch": 1.61,
"learning_rate": 0.00046853790406073853,
"loss": 0.0486,
"step": 18832
},
{
"epoch": 1.61,
"eval_loss": 0.06461019814014435,
"eval_max_distance": 77,
"eval_mean_distance": 1,
"eval_runtime": 4.2565,
"eval_samples_per_second": 117.468,
"eval_steps_per_second": 2.349,
"step": 18832
},
{
"epoch": 1.62,
"learning_rate": 0.0004634763602898884,
"loss": 0.0458,
"step": 19008
},
{
"epoch": 1.62,
"eval_loss": 0.06515143066644669,
"eval_max_distance": 97,
"eval_mean_distance": 1,
"eval_runtime": 5.9339,
"eval_samples_per_second": 84.261,
"eval_steps_per_second": 1.685,
"step": 19008
},
{
"epoch": 1.64,
"learning_rate": 0.00045841481651903834,
"loss": 0.0462,
"step": 19184
},
{
"epoch": 1.64,
"eval_loss": 0.0647222101688385,
"eval_max_distance": 86,
"eval_mean_distance": 1,
"eval_runtime": 4.7775,
"eval_samples_per_second": 104.658,
"eval_steps_per_second": 2.093,
"step": 19184
},
{
"epoch": 1.65,
"learning_rate": 0.0004533532727481882,
"loss": 0.0443,
"step": 19360
},
{
"epoch": 1.65,
"eval_loss": 0.06631914526224136,
"eval_max_distance": 92,
"eval_mean_distance": 1,
"eval_runtime": 6.4448,
"eval_samples_per_second": 77.582,
"eval_steps_per_second": 1.552,
"step": 19360
},
{
"epoch": 1.67,
"learning_rate": 0.0004482917289773381,
"loss": 0.0444,
"step": 19536
},
{
"epoch": 1.67,
"eval_loss": 0.06480421125888824,
"eval_max_distance": 79,
"eval_mean_distance": 1,
"eval_runtime": 5.8948,
"eval_samples_per_second": 84.82,
"eval_steps_per_second": 1.696,
"step": 19536
},
{
"epoch": 1.68,
"learning_rate": 0.00044323018520648803,
"loss": 0.0423,
"step": 19712
},
{
"epoch": 1.68,
"eval_loss": 0.06277700513601303,
"eval_max_distance": 66,
"eval_mean_distance": 1,
"eval_runtime": 5.8803,
"eval_samples_per_second": 85.03,
"eval_steps_per_second": 1.701,
"step": 19712
},
{
"epoch": 1.7,
"learning_rate": 0.0004381686414356379,
"loss": 0.0419,
"step": 19888
},
{
"epoch": 1.7,
"eval_loss": 0.06238849461078644,
"eval_max_distance": 70,
"eval_mean_distance": 1,
"eval_runtime": 4.6974,
"eval_samples_per_second": 106.443,
"eval_steps_per_second": 2.129,
"step": 19888
},
{
"epoch": 1.71,
"learning_rate": 0.0004331070976647878,
"loss": 0.0409,
"step": 20064
},
{
"epoch": 1.71,
"eval_loss": 0.06289780884981155,
"eval_max_distance": 58,
"eval_mean_distance": 1,
"eval_runtime": 5.5388,
"eval_samples_per_second": 90.272,
"eval_steps_per_second": 1.805,
"step": 20064
},
{
"epoch": 1.73,
"learning_rate": 0.00042804555389393766,
"loss": 0.0402,
"step": 20240
},
{
"epoch": 1.73,
"eval_loss": 0.06532587110996246,
"eval_max_distance": 79,
"eval_mean_distance": 1,
"eval_runtime": 5.4649,
"eval_samples_per_second": 91.493,
"eval_steps_per_second": 1.83,
"step": 20240
},
{
"epoch": 1.74,
"learning_rate": 0.0004229840101230876,
"loss": 0.0405,
"step": 20416
},
{
"epoch": 1.74,
"eval_loss": 0.06438089162111282,
"eval_max_distance": 72,
"eval_mean_distance": 1,
"eval_runtime": 5.8994,
"eval_samples_per_second": 84.754,
"eval_steps_per_second": 1.695,
"step": 20416
},
{
"epoch": 1.76,
"learning_rate": 0.00041792246635223747,
"loss": 0.0374,
"step": 20592
},
{
"epoch": 1.76,
"eval_loss": 0.06247664615511894,
"eval_max_distance": 77,
"eval_mean_distance": 1,
"eval_runtime": 5.1384,
"eval_samples_per_second": 97.307,
"eval_steps_per_second": 1.946,
"step": 20592
},
{
"epoch": 1.77,
"learning_rate": 0.00041286092258138734,
"loss": 0.039,
"step": 20768
},
{
"epoch": 1.77,
"eval_loss": 0.06493379175662994,
"eval_max_distance": 96,
"eval_mean_distance": 1,
"eval_runtime": 4.5781,
"eval_samples_per_second": 109.216,
"eval_steps_per_second": 2.184,
"step": 20768
},
{
"epoch": 1.79,
"learning_rate": 0.0004077993788105372,
"loss": 0.0374,
"step": 20944
},
{
"epoch": 1.79,
"eval_loss": 0.06642530113458633,
"eval_max_distance": 73,
"eval_mean_distance": 1,
"eval_runtime": 6.1825,
"eval_samples_per_second": 80.873,
"eval_steps_per_second": 1.617,
"step": 20944
},
{
"epoch": 1.8,
"learning_rate": 0.00040273783503968715,
"loss": 0.0372,
"step": 21120
},
{
"epoch": 1.8,
"eval_loss": 0.0631885975599289,
"eval_max_distance": 70,
"eval_mean_distance": 1,
"eval_runtime": 5.5472,
"eval_samples_per_second": 90.135,
"eval_steps_per_second": 1.803,
"step": 21120
},
{
"epoch": 1.82,
"learning_rate": 0.000397676291268837,
"loss": 0.0356,
"step": 21296
},
{
"epoch": 1.82,
"eval_loss": 0.06364666670560837,
"eval_max_distance": 73,
"eval_mean_distance": 1,
"eval_runtime": 5.534,
"eval_samples_per_second": 90.351,
"eval_steps_per_second": 1.807,
"step": 21296
},
{
"epoch": 1.83,
"learning_rate": 0.00039261474749798685,
"loss": 0.0364,
"step": 21472
},
{
"epoch": 1.83,
"eval_loss": 0.06835252046585083,
"eval_max_distance": 101,
"eval_mean_distance": 1,
"eval_runtime": 5.667,
"eval_samples_per_second": 88.23,
"eval_steps_per_second": 1.765,
"step": 21472
},
{
"epoch": 1.85,
"learning_rate": 0.0003875532037271368,
"loss": 0.0349,
"step": 21648
},
{
"epoch": 1.85,
"eval_loss": 0.06463531404733658,
"eval_max_distance": 81,
"eval_mean_distance": 1,
"eval_runtime": 5.6807,
"eval_samples_per_second": 88.018,
"eval_steps_per_second": 1.76,
"step": 21648
},
{
"epoch": 1.86,
"learning_rate": 0.00038249165995628666,
"loss": 0.0338,
"step": 21824
},
{
"epoch": 1.86,
"eval_loss": 0.061899635940790176,
"eval_max_distance": 87,
"eval_mean_distance": 1,
"eval_runtime": 5.5863,
"eval_samples_per_second": 89.505,
"eval_steps_per_second": 1.79,
"step": 21824
},
{
"epoch": 1.88,
"learning_rate": 0.00037743011618543654,
"loss": 0.0332,
"step": 22000
},
{
"epoch": 1.88,
"eval_loss": 0.06767092645168304,
"eval_max_distance": 91,
"eval_mean_distance": 2,
"eval_runtime": 6.1428,
"eval_samples_per_second": 81.396,
"eval_steps_per_second": 1.628,
"step": 22000
},
{
"epoch": 1.89,
"learning_rate": 0.0003723685724145864,
"loss": 0.0331,
"step": 22176
},
{
"epoch": 1.89,
"eval_loss": 0.06576185673475266,
"eval_max_distance": 93,
"eval_mean_distance": 2,
"eval_runtime": 5.8963,
"eval_samples_per_second": 84.799,
"eval_steps_per_second": 1.696,
"step": 22176
},
{
"epoch": 1.91,
"learning_rate": 0.00036730702864373635,
"loss": 0.0324,
"step": 22352
},
{
"epoch": 1.91,
"eval_loss": 0.0631786659359932,
"eval_max_distance": 89,
"eval_mean_distance": 1,
"eval_runtime": 5.3685,
"eval_samples_per_second": 93.135,
"eval_steps_per_second": 1.863,
"step": 22352
},
{
"epoch": 1.92,
"learning_rate": 0.0003622454848728862,
"loss": 0.0324,
"step": 22528
},
{
"epoch": 1.92,
"eval_loss": 0.06909012049436569,
"eval_max_distance": 85,
"eval_mean_distance": 2,
"eval_runtime": 5.8638,
"eval_samples_per_second": 85.269,
"eval_steps_per_second": 1.705,
"step": 22528
},
{
"epoch": 1.94,
"learning_rate": 0.0003571839411020361,
"loss": 0.0318,
"step": 22704
},
{
"epoch": 1.94,
"eval_loss": 0.06905217468738556,
"eval_max_distance": 91,
"eval_mean_distance": 1,
"eval_runtime": 5.4271,
"eval_samples_per_second": 92.131,
"eval_steps_per_second": 1.843,
"step": 22704
},
{
"epoch": 1.95,
"learning_rate": 0.000352122397331186,
"loss": 0.031,
"step": 22880
},
{
"epoch": 1.95,
"eval_loss": 0.0721253752708435,
"eval_max_distance": 89,
"eval_mean_distance": 2,
"eval_runtime": 6.1894,
"eval_samples_per_second": 80.783,
"eval_steps_per_second": 1.616,
"step": 22880
},
{
"epoch": 1.97,
"learning_rate": 0.0003470608535603359,
"loss": 0.0308,
"step": 23056
},
{
"epoch": 1.97,
"eval_loss": 0.06949847936630249,
"eval_max_distance": 71,
"eval_mean_distance": 2,
"eval_runtime": 5.7252,
"eval_samples_per_second": 87.333,
"eval_steps_per_second": 1.747,
"step": 23056
},
{
"epoch": 1.98,
"learning_rate": 0.0003419993097894858,
"loss": 0.0309,
"step": 23232
},
{
"epoch": 1.98,
"eval_loss": 0.07597502321004868,
"eval_max_distance": 99,
"eval_mean_distance": 2,
"eval_runtime": 5.6902,
"eval_samples_per_second": 87.87,
"eval_steps_per_second": 1.757,
"step": 23232
},
{
"epoch": 2.0,
"learning_rate": 0.00033693776601863567,
"loss": 0.0293,
"step": 23408
},
{
"epoch": 2.0,
"eval_loss": 0.0717659443616867,
"eval_max_distance": 101,
"eval_mean_distance": 2,
"eval_runtime": 5.9061,
"eval_samples_per_second": 84.658,
"eval_steps_per_second": 1.693,
"step": 23408
},
{
"epoch": 2.01,
"learning_rate": 0.0003318762222477856,
"loss": 0.1908,
"step": 23584
},
{
"epoch": 2.01,
"eval_loss": 0.05668208748102188,
"eval_max_distance": 51,
"eval_mean_distance": 1,
"eval_runtime": 6.6014,
"eval_samples_per_second": 75.741,
"eval_steps_per_second": 1.515,
"step": 23584
},
{
"epoch": 2.03,
"learning_rate": 0.0003268146784769355,
"loss": 0.0875,
"step": 23760
},
{
"epoch": 2.03,
"eval_loss": 0.058700818568468094,
"eval_max_distance": 35,
"eval_mean_distance": 1,
"eval_runtime": 5.9161,
"eval_samples_per_second": 84.516,
"eval_steps_per_second": 1.69,
"step": 23760
},
{
"epoch": 2.04,
"learning_rate": 0.00032175313470608535,
"loss": 0.0773,
"step": 23936
},
{
"epoch": 2.04,
"eval_loss": 0.05530280992388725,
"eval_max_distance": 41,
"eval_mean_distance": 1,
"eval_runtime": 7.0323,
"eval_samples_per_second": 71.1,
"eval_steps_per_second": 1.422,
"step": 23936
},
{
"epoch": 2.06,
"learning_rate": 0.00031669159093523523,
"loss": 0.0678,
"step": 24112
},
{
"epoch": 2.06,
"eval_loss": 0.056951854377985,
"eval_max_distance": 44,
"eval_mean_distance": 1,
"eval_runtime": 6.8239,
"eval_samples_per_second": 73.272,
"eval_steps_per_second": 1.465,
"step": 24112
},
{
"epoch": 2.07,
"learning_rate": 0.00031163004716438516,
"loss": 0.0625,
"step": 24288
},
{
"epoch": 2.07,
"eval_loss": 0.05978156253695488,
"eval_max_distance": 53,
"eval_mean_distance": 1,
"eval_runtime": 6.0447,
"eval_samples_per_second": 82.717,
"eval_steps_per_second": 1.654,
"step": 24288
},
{
"epoch": 2.09,
"learning_rate": 0.00030656850339353504,
"loss": 0.0603,
"step": 24464
},
{
"epoch": 2.09,
"eval_loss": 0.057503603398799896,
"eval_max_distance": 39,
"eval_mean_distance": 1,
"eval_runtime": 8.2715,
"eval_samples_per_second": 60.449,
"eval_steps_per_second": 1.209,
"step": 24464
},
{
"epoch": 2.1,
"learning_rate": 0.0003015069596226849,
"loss": 0.0557,
"step": 24640
},
{
"epoch": 2.1,
"eval_loss": 0.05871723219752312,
"eval_max_distance": 38,
"eval_mean_distance": 1,
"eval_runtime": 8.307,
"eval_samples_per_second": 60.19,
"eval_steps_per_second": 1.204,
"step": 24640
},
{
"epoch": 2.12,
"learning_rate": 0.00029644541585183485,
"loss": 0.0549,
"step": 24816
},
{
"epoch": 2.12,
"eval_loss": 0.057098135352134705,
"eval_max_distance": 43,
"eval_mean_distance": 1,
"eval_runtime": 7.5317,
"eval_samples_per_second": 66.386,
"eval_steps_per_second": 1.328,
"step": 24816
},
{
"epoch": 2.13,
"learning_rate": 0.0002913838720809847,
"loss": 0.0536,
"step": 24992
},
{
"epoch": 2.13,
"eval_loss": 0.05979160591959953,
"eval_max_distance": 46,
"eval_mean_distance": 1,
"eval_runtime": 8.5051,
"eval_samples_per_second": 58.788,
"eval_steps_per_second": 1.176,
"step": 24992
},
{
"epoch": 2.15,
"learning_rate": 0.0002863223283101346,
"loss": 0.0524,
"step": 25168
},
{
"epoch": 2.15,
"eval_loss": 0.06075568497180939,
"eval_max_distance": 49,
"eval_mean_distance": 1,
"eval_runtime": 8.3407,
"eval_samples_per_second": 59.947,
"eval_steps_per_second": 1.199,
"step": 25168
},
{
"epoch": 2.16,
"learning_rate": 0.0002812607845392845,
"loss": 0.0511,
"step": 25344
},
{
"epoch": 2.16,
"eval_loss": 0.057893384248018265,
"eval_max_distance": 55,
"eval_mean_distance": 1,
"eval_runtime": 8.3435,
"eval_samples_per_second": 59.927,
"eval_steps_per_second": 1.199,
"step": 25344
},
{
"epoch": 2.18,
"learning_rate": 0.0002761992407684344,
"loss": 0.0529,
"step": 25520
},
{
"epoch": 2.18,
"eval_loss": 0.059256672859191895,
"eval_max_distance": 46,
"eval_mean_distance": 1,
"eval_runtime": 8.2791,
"eval_samples_per_second": 60.393,
"eval_steps_per_second": 1.208,
"step": 25520
},
{
"epoch": 2.19,
"learning_rate": 0.0002711376969975843,
"loss": 0.0518,
"step": 25696
},
{
"epoch": 2.19,
"eval_loss": 0.056909676641225815,
"eval_max_distance": 60,
"eval_mean_distance": 1,
"eval_runtime": 6.9914,
"eval_samples_per_second": 71.516,
"eval_steps_per_second": 1.43,
"step": 25696
},
{
"epoch": 2.21,
"learning_rate": 0.00026607615322673416,
"loss": 0.0512,
"step": 25872
},
{
"epoch": 2.21,
"eval_loss": 0.059548597782850266,
"eval_max_distance": 60,
"eval_mean_distance": 1,
"eval_runtime": 7.847,
"eval_samples_per_second": 63.719,
"eval_steps_per_second": 1.274,
"step": 25872
},
{
"epoch": 2.22,
"learning_rate": 0.0002610146094558841,
"loss": 0.0512,
"step": 26048
},
{
"epoch": 2.22,
"eval_loss": 0.06114144250750542,
"eval_max_distance": 44,
"eval_mean_distance": 1,
"eval_runtime": 7.4839,
"eval_samples_per_second": 66.81,
"eval_steps_per_second": 1.336,
"step": 26048
},
{
"epoch": 2.24,
"learning_rate": 0.00025595306568503397,
"loss": 0.0507,
"step": 26224
},
{
"epoch": 2.24,
"eval_loss": 0.056723106652498245,
"eval_max_distance": 50,
"eval_mean_distance": 1,
"eval_runtime": 7.1155,
"eval_samples_per_second": 70.269,
"eval_steps_per_second": 1.405,
"step": 26224
},
{
"epoch": 2.25,
"learning_rate": 0.00025089152191418385,
"loss": 0.0506,
"step": 26400
},
{
"epoch": 2.25,
"eval_loss": 0.058637164533138275,
"eval_max_distance": 41,
"eval_mean_distance": 1,
"eval_runtime": 7.7501,
"eval_samples_per_second": 64.515,
"eval_steps_per_second": 1.29,
"step": 26400
},
{
"epoch": 2.27,
"learning_rate": 0.0002458299781433337,
"loss": 0.0499,
"step": 26576
},
{
"epoch": 2.27,
"eval_loss": 0.06316396594047546,
"eval_max_distance": 50,
"eval_mean_distance": 1,
"eval_runtime": 7.0322,
"eval_samples_per_second": 71.102,
"eval_steps_per_second": 1.422,
"step": 26576
},
{
"epoch": 2.28,
"learning_rate": 0.0002407684343724836,
"loss": 0.05,
"step": 26752
},
{
"epoch": 2.28,
"eval_loss": 0.060861945152282715,
"eval_max_distance": 66,
"eval_mean_distance": 1,
"eval_runtime": 8.5106,
"eval_samples_per_second": 58.75,
"eval_steps_per_second": 1.175,
"step": 26752
},
{
"epoch": 2.3,
"learning_rate": 0.0002357068906016335,
"loss": 0.0499,
"step": 26928
},
{
"epoch": 2.3,
"eval_loss": 0.05814690515398979,
"eval_max_distance": 54,
"eval_mean_distance": 1,
"eval_runtime": 8.0366,
"eval_samples_per_second": 62.215,
"eval_steps_per_second": 1.244,
"step": 26928
},
{
"epoch": 2.31,
"learning_rate": 0.00023064534683078338,
"loss": 0.0489,
"step": 27104
},
{
"epoch": 2.31,
"eval_loss": 0.05106068029999733,
"eval_max_distance": 51,
"eval_mean_distance": 1,
"eval_runtime": 5.8676,
"eval_samples_per_second": 85.214,
"eval_steps_per_second": 1.704,
"step": 27104
},
{
"epoch": 2.33,
"learning_rate": 0.0002255838030599333,
"loss": 0.0485,
"step": 27280
},
{
"epoch": 2.33,
"eval_loss": 0.04777122661471367,
"eval_max_distance": 59,
"eval_mean_distance": 1,
"eval_runtime": 5.5204,
"eval_samples_per_second": 90.573,
"eval_steps_per_second": 1.811,
"step": 27280
},
{
"epoch": 2.35,
"learning_rate": 0.00022052225928908317,
"loss": 0.0468,
"step": 27456
},
{
"epoch": 2.35,
"eval_loss": 0.049591317772865295,
"eval_max_distance": 58,
"eval_mean_distance": 1,
"eval_runtime": 5.7077,
"eval_samples_per_second": 87.601,
"eval_steps_per_second": 1.752,
"step": 27456
},
{
"epoch": 2.36,
"learning_rate": 0.00021546071551823307,
"loss": 0.0458,
"step": 27632
},
{
"epoch": 2.36,
"eval_loss": 0.04786006361246109,
"eval_max_distance": 54,
"eval_mean_distance": 1,
"eval_runtime": 5.6611,
"eval_samples_per_second": 88.322,
"eval_steps_per_second": 1.766,
"step": 27632
},
{
"epoch": 2.38,
"learning_rate": 0.00021039917174738297,
"loss": 0.0446,
"step": 27808
},
{
"epoch": 2.38,
"eval_loss": 0.04865054786205292,
"eval_max_distance": 51,
"eval_mean_distance": 1,
"eval_runtime": 5.7404,
"eval_samples_per_second": 87.102,
"eval_steps_per_second": 1.742,
"step": 27808
},
{
"epoch": 2.39,
"learning_rate": 0.00020533762797653285,
"loss": 0.0433,
"step": 27984
},
{
"epoch": 2.39,
"eval_loss": 0.04700545221567154,
"eval_max_distance": 65,
"eval_mean_distance": 1,
"eval_runtime": 5.2552,
"eval_samples_per_second": 95.144,
"eval_steps_per_second": 1.903,
"step": 27984
},
{
"epoch": 2.41,
"learning_rate": 0.00020027608420568276,
"loss": 0.0424,
"step": 28160
},
{
"epoch": 2.41,
"eval_loss": 0.048602811992168427,
"eval_max_distance": 70,
"eval_mean_distance": 1,
"eval_runtime": 5.8464,
"eval_samples_per_second": 85.522,
"eval_steps_per_second": 1.71,
"step": 28160
},
{
"epoch": 2.42,
"learning_rate": 0.00019521454043483263,
"loss": 0.0418,
"step": 28336
},
{
"epoch": 2.42,
"eval_loss": 0.04602031037211418,
"eval_max_distance": 66,
"eval_mean_distance": 1,
"eval_runtime": 5.4163,
"eval_samples_per_second": 92.315,
"eval_steps_per_second": 1.846,
"step": 28336
},
{
"epoch": 2.44,
"learning_rate": 0.00019015299666398254,
"loss": 0.0416,
"step": 28512
},
{
"epoch": 2.44,
"eval_loss": 0.046469803899526596,
"eval_max_distance": 51,
"eval_mean_distance": 1,
"eval_runtime": 4.2978,
"eval_samples_per_second": 116.339,
"eval_steps_per_second": 2.327,
"step": 28512
},
{
"epoch": 2.45,
"learning_rate": 0.00018509145289313241,
"loss": 0.0394,
"step": 28688
},
{
"epoch": 2.45,
"eval_loss": 0.04603447765111923,
"eval_max_distance": 49,
"eval_mean_distance": 1,
"eval_runtime": 5.5843,
"eval_samples_per_second": 89.537,
"eval_steps_per_second": 1.791,
"step": 28688
},
{
"epoch": 2.47,
"learning_rate": 0.0001800299091222823,
"loss": 0.0384,
"step": 28864
},
{
"epoch": 2.47,
"eval_loss": 0.0456426702439785,
"eval_max_distance": 46,
"eval_mean_distance": 1,
"eval_runtime": 5.1318,
"eval_samples_per_second": 97.431,
"eval_steps_per_second": 1.949,
"step": 28864
},
{
"epoch": 2.48,
"learning_rate": 0.00017496836535143217,
"loss": 0.039,
"step": 29040
},
{
"epoch": 2.48,
"eval_loss": 0.04492652416229248,
"eval_max_distance": 51,
"eval_mean_distance": 1,
"eval_runtime": 5.3648,
"eval_samples_per_second": 93.2,
"eval_steps_per_second": 1.864,
"step": 29040
},
{
"epoch": 2.5,
"learning_rate": 0.00016990682158058207,
"loss": 0.0374,
"step": 29216
},
{
"epoch": 2.5,
"eval_loss": 0.04678362235426903,
"eval_max_distance": 59,
"eval_mean_distance": 1,
"eval_runtime": 5.1983,
"eval_samples_per_second": 96.186,
"eval_steps_per_second": 1.924,
"step": 29216
},
{
"epoch": 2.51,
"learning_rate": 0.00016484527780973198,
"loss": 0.037,
"step": 29392
},
{
"epoch": 2.51,
"eval_loss": 0.04649132117629051,
"eval_max_distance": 60,
"eval_mean_distance": 1,
"eval_runtime": 4.2517,
"eval_samples_per_second": 117.599,
"eval_steps_per_second": 2.352,
"step": 29392
},
{
"epoch": 2.53,
"learning_rate": 0.00015978373403888185,
"loss": 0.0352,
"step": 29568
},
{
"epoch": 2.53,
"eval_loss": 0.046459365636110306,
"eval_max_distance": 68,
"eval_mean_distance": 1,
"eval_runtime": 5.4243,
"eval_samples_per_second": 92.178,
"eval_steps_per_second": 1.844,
"step": 29568
},
{
"epoch": 2.54,
"learning_rate": 0.00015472219026803176,
"loss": 0.0358,
"step": 29744
},
{
"epoch": 2.54,
"eval_loss": 0.04598597064614296,
"eval_max_distance": 66,
"eval_mean_distance": 1,
"eval_runtime": 4.2396,
"eval_samples_per_second": 117.937,
"eval_steps_per_second": 2.359,
"step": 29744
},
{
"epoch": 2.56,
"learning_rate": 0.00014966064649718164,
"loss": 0.034,
"step": 29920
},
{
"epoch": 2.56,
"eval_loss": 0.04516398161649704,
"eval_max_distance": 54,
"eval_mean_distance": 1,
"eval_runtime": 3.9094,
"eval_samples_per_second": 127.897,
"eval_steps_per_second": 2.558,
"step": 29920
},
{
"epoch": 2.57,
"learning_rate": 0.00014459910272633154,
"loss": 0.0346,
"step": 30096
},
{
"epoch": 2.57,
"eval_loss": 0.046647679060697556,
"eval_max_distance": 61,
"eval_mean_distance": 1,
"eval_runtime": 4.2262,
"eval_samples_per_second": 118.31,
"eval_steps_per_second": 2.366,
"step": 30096
},
{
"epoch": 2.59,
"learning_rate": 0.00013953755895548142,
"loss": 0.0335,
"step": 30272
},
{
"epoch": 2.59,
"eval_loss": 0.04539273679256439,
"eval_max_distance": 45,
"eval_mean_distance": 1,
"eval_runtime": 4.5809,
"eval_samples_per_second": 109.148,
"eval_steps_per_second": 2.183,
"step": 30272
},
{
"epoch": 2.6,
"learning_rate": 0.00013447601518463132,
"loss": 0.0326,
"step": 30448
},
{
"epoch": 2.6,
"eval_loss": 0.04557649791240692,
"eval_max_distance": 58,
"eval_mean_distance": 0,
"eval_runtime": 3.698,
"eval_samples_per_second": 135.209,
"eval_steps_per_second": 2.704,
"step": 30448
},
{
"epoch": 2.62,
"learning_rate": 0.0001294144714137812,
"loss": 0.032,
"step": 30624
},
{
"epoch": 2.62,
"eval_loss": 0.04527006670832634,
"eval_max_distance": 49,
"eval_mean_distance": 0,
"eval_runtime": 4.5477,
"eval_samples_per_second": 109.946,
"eval_steps_per_second": 2.199,
"step": 30624
},
{
"epoch": 2.63,
"learning_rate": 0.0001243529276429311,
"loss": 0.0328,
"step": 30800
},
{
"epoch": 2.63,
"eval_loss": 0.04470500349998474,
"eval_max_distance": 58,
"eval_mean_distance": 1,
"eval_runtime": 4.9109,
"eval_samples_per_second": 101.814,
"eval_steps_per_second": 2.036,
"step": 30800
},
{
"epoch": 2.65,
"learning_rate": 0.000119291383872081,
"loss": 0.031,
"step": 30976
},
{
"epoch": 2.65,
"eval_loss": 0.04523780569434166,
"eval_max_distance": 73,
"eval_mean_distance": 1,
"eval_runtime": 4.6719,
"eval_samples_per_second": 107.023,
"eval_steps_per_second": 2.14,
"step": 30976
},
{
"epoch": 2.66,
"learning_rate": 0.00011422984010123087,
"loss": 0.0304,
"step": 31152
},
{
"epoch": 2.66,
"eval_loss": 0.045120373368263245,
"eval_max_distance": 73,
"eval_mean_distance": 0,
"eval_runtime": 4.7157,
"eval_samples_per_second": 106.029,
"eval_steps_per_second": 2.121,
"step": 31152
},
{
"epoch": 2.68,
"learning_rate": 0.00010916829633038076,
"loss": 0.0296,
"step": 31328
},
{
"epoch": 2.68,
"eval_loss": 0.04615224152803421,
"eval_max_distance": 62,
"eval_mean_distance": 0,
"eval_runtime": 4.6187,
"eval_samples_per_second": 108.256,
"eval_steps_per_second": 2.165,
"step": 31328
},
{
"epoch": 2.69,
"learning_rate": 0.00010410675255953065,
"loss": 0.0298,
"step": 31504
},
{
"epoch": 2.69,
"eval_loss": 0.045413993299007416,
"eval_max_distance": 61,
"eval_mean_distance": 0,
"eval_runtime": 4.666,
"eval_samples_per_second": 107.158,
"eval_steps_per_second": 2.143,
"step": 31504
},
{
"epoch": 2.71,
"learning_rate": 9.904520878868054e-05,
"loss": 0.0289,
"step": 31680
},
{
"epoch": 2.71,
"eval_loss": 0.04516046866774559,
"eval_max_distance": 59,
"eval_mean_distance": 1,
"eval_runtime": 3.7866,
"eval_samples_per_second": 132.046,
"eval_steps_per_second": 2.641,
"step": 31680
},
{
"epoch": 2.72,
"learning_rate": 9.398366501783045e-05,
"loss": 0.0289,
"step": 31856
},
{
"epoch": 2.72,
"eval_loss": 0.044648416340351105,
"eval_max_distance": 63,
"eval_mean_distance": 0,
"eval_runtime": 4.8092,
"eval_samples_per_second": 103.968,
"eval_steps_per_second": 2.079,
"step": 31856
},
{
"epoch": 2.74,
"learning_rate": 8.892212124698034e-05,
"loss": 0.0283,
"step": 32032
},
{
"epoch": 2.74,
"eval_loss": 0.04460017383098602,
"eval_max_distance": 52,
"eval_mean_distance": 0,
"eval_runtime": 5.0713,
"eval_samples_per_second": 98.594,
"eval_steps_per_second": 1.972,
"step": 32032
},
{
"epoch": 2.75,
"learning_rate": 8.386057747613023e-05,
"loss": 0.0282,
"step": 32208
},
{
"epoch": 2.75,
"eval_loss": 0.04471622407436371,
"eval_max_distance": 50,
"eval_mean_distance": 0,
"eval_runtime": 4.7988,
"eval_samples_per_second": 104.192,
"eval_steps_per_second": 2.084,
"step": 32208
},
{
"epoch": 2.77,
"learning_rate": 7.879903370528012e-05,
"loss": 0.0274,
"step": 32384
},
{
"epoch": 2.77,
"eval_loss": 0.0455770380795002,
"eval_max_distance": 55,
"eval_mean_distance": 1,
"eval_runtime": 4.8258,
"eval_samples_per_second": 103.61,
"eval_steps_per_second": 2.072,
"step": 32384
},
{
"epoch": 2.78,
"learning_rate": 7.373748993443e-05,
"loss": 0.0281,
"step": 32560
},
{
"epoch": 2.78,
"eval_loss": 0.044936638325452805,
"eval_max_distance": 53,
"eval_mean_distance": 0,
"eval_runtime": 4.4674,
"eval_samples_per_second": 111.922,
"eval_steps_per_second": 2.238,
"step": 32560
},
{
"epoch": 2.8,
"learning_rate": 6.867594616357989e-05,
"loss": 0.0271,
"step": 32736
},
{
"epoch": 2.8,
"eval_loss": 0.0453297421336174,
"eval_max_distance": 53,
"eval_mean_distance": 1,
"eval_runtime": 4.7508,
"eval_samples_per_second": 105.245,
"eval_steps_per_second": 2.105,
"step": 32736
},
{
"epoch": 2.81,
"learning_rate": 6.361440239272978e-05,
"loss": 0.0261,
"step": 32912
},
{
"epoch": 2.81,
"eval_loss": 0.044983986765146255,
"eval_max_distance": 53,
"eval_mean_distance": 1,
"eval_runtime": 3.8094,
"eval_samples_per_second": 131.254,
"eval_steps_per_second": 2.625,
"step": 32912
},
{
"epoch": 2.83,
"learning_rate": 5.8552858621879675e-05,
"loss": 0.0263,
"step": 33088
},
{
"epoch": 2.83,
"eval_loss": 0.04485508054494858,
"eval_max_distance": 53,
"eval_mean_distance": 1,
"eval_runtime": 4.5334,
"eval_samples_per_second": 110.292,
"eval_steps_per_second": 2.206,
"step": 33088
},
{
"epoch": 2.84,
"learning_rate": 5.3491314851029566e-05,
"loss": 0.026,
"step": 33264
},
{
"epoch": 2.84,
"eval_loss": 0.044998109340667725,
"eval_max_distance": 43,
"eval_mean_distance": 0,
"eval_runtime": 3.6486,
"eval_samples_per_second": 137.038,
"eval_steps_per_second": 2.741,
"step": 33264
},
{
"epoch": 2.86,
"learning_rate": 4.842977108017946e-05,
"loss": 0.025,
"step": 33440
},
{
"epoch": 2.86,
"eval_loss": 0.04560336843132973,
"eval_max_distance": 49,
"eval_mean_distance": 1,
"eval_runtime": 3.8552,
"eval_samples_per_second": 129.695,
"eval_steps_per_second": 2.594,
"step": 33440
},
{
"epoch": 2.87,
"learning_rate": 4.336822730932934e-05,
"loss": 0.025,
"step": 33616
},
{
"epoch": 2.87,
"eval_loss": 0.045977283269166946,
"eval_max_distance": 49,
"eval_mean_distance": 1,
"eval_runtime": 4.3858,
"eval_samples_per_second": 114.004,
"eval_steps_per_second": 2.28,
"step": 33616
},
{
"epoch": 2.89,
"learning_rate": 3.830668353847924e-05,
"loss": 0.0243,
"step": 33792
},
{
"epoch": 2.89,
"eval_loss": 0.046003151684999466,
"eval_max_distance": 43,
"eval_mean_distance": 1,
"eval_runtime": 3.9069,
"eval_samples_per_second": 127.98,
"eval_steps_per_second": 2.56,
"step": 33792
},
{
"epoch": 2.9,
"learning_rate": 3.324513976762913e-05,
"loss": 0.0239,
"step": 33968
},
{
"epoch": 2.9,
"eval_loss": 0.04568994790315628,
"eval_max_distance": 44,
"eval_mean_distance": 1,
"eval_runtime": 4.1848,
"eval_samples_per_second": 119.48,
"eval_steps_per_second": 2.39,
"step": 33968
},
{
"epoch": 2.92,
"learning_rate": 2.818359599677902e-05,
"loss": 0.0241,
"step": 34144
},
{
"epoch": 2.92,
"eval_loss": 0.04591059312224388,
"eval_max_distance": 53,
"eval_mean_distance": 1,
"eval_runtime": 4.4039,
"eval_samples_per_second": 113.535,
"eval_steps_per_second": 2.271,
"step": 34144
},
{
"epoch": 2.93,
"learning_rate": 2.3122052225928907e-05,
"loss": 0.0239,
"step": 34320
},
{
"epoch": 2.93,
"eval_loss": 0.046651940792798996,
"eval_max_distance": 50,
"eval_mean_distance": 1,
"eval_runtime": 4.1885,
"eval_samples_per_second": 119.376,
"eval_steps_per_second": 2.388,
"step": 34320
},
{
"epoch": 2.95,
"learning_rate": 1.80605084550788e-05,
"loss": 0.0241,
"step": 34496
},
{
"epoch": 2.95,
"eval_loss": 0.0469602532684803,
"eval_max_distance": 57,
"eval_mean_distance": 1,
"eval_runtime": 4.8088,
"eval_samples_per_second": 103.977,
"eval_steps_per_second": 2.08,
"step": 34496
},
{
"epoch": 2.96,
"learning_rate": 1.299896468422869e-05,
"loss": 0.0234,
"step": 34672
},
{
"epoch": 2.96,
"eval_loss": 0.04661025106906891,
"eval_max_distance": 57,
"eval_mean_distance": 1,
"eval_runtime": 4.836,
"eval_samples_per_second": 103.39,
"eval_steps_per_second": 2.068,
"step": 34672
},
{
"epoch": 2.98,
"learning_rate": 7.937420913378581e-06,
"loss": 0.0235,
"step": 34848
},
{
"epoch": 2.98,
"eval_loss": 0.04825682193040848,
"eval_max_distance": 58,
"eval_mean_distance": 1,
"eval_runtime": 5.064,
"eval_samples_per_second": 98.737,
"eval_steps_per_second": 1.975,
"step": 34848
},
{
"epoch": 2.99,
"learning_rate": 2.8758771425284712e-06,
"loss": 0.0226,
"step": 35024
},
{
"epoch": 2.99,
"eval_loss": 0.0471869558095932,
"eval_max_distance": 55,
"eval_mean_distance": 1,
"eval_runtime": 4.4865,
"eval_samples_per_second": 111.445,
"eval_steps_per_second": 2.229,
"step": 35024
},
{
"epoch": 3.0,
"learning_rate": 1.1604095563139932e-05,
"loss": 0.1431,
"step": 35160
},
{
"epoch": 3.0,
"eval_loss": 0.05489028990268707,
"eval_max_distance": 62,
"eval_mean_distance": 1,
"eval_runtime": 6.1578,
"eval_samples_per_second": 81.198,
"eval_steps_per_second": 1.624,
"step": 35160
},
{
"epoch": 3.03,
"learning_rate": 3.660409556313993e-05,
"loss": 0.0731,
"step": 35453
},
{
"epoch": 3.03,
"eval_loss": 0.04241102561354637,
"eval_max_distance": 46,
"eval_mean_distance": 1,
"eval_runtime": 7.2607,
"eval_samples_per_second": 68.864,
"eval_steps_per_second": 1.377,
"step": 35453
},
{
"epoch": 3.05,
"learning_rate": 4.988266556234255e-05,
"loss": 0.0514,
"step": 35746
},
{
"epoch": 3.05,
"eval_loss": 0.04323163628578186,
"eval_max_distance": 41,
"eval_mean_distance": 1,
"eval_runtime": 5.4484,
"eval_samples_per_second": 91.77,
"eval_steps_per_second": 1.835,
"step": 35746
},
{
"epoch": 3.08,
"learning_rate": 4.962987886944819e-05,
"loss": 0.0442,
"step": 36039
},
{
"epoch": 3.08,
"eval_loss": 0.0440400205552578,
"eval_max_distance": 41,
"eval_mean_distance": 1,
"eval_runtime": 6.4941,
"eval_samples_per_second": 76.993,
"eval_steps_per_second": 1.54,
"step": 36039
},
{
"epoch": 3.1,
"learning_rate": 4.9377092176553816e-05,
"loss": 0.0415,
"step": 36332
},
{
"epoch": 3.1,
"eval_loss": 0.04508192837238312,
"eval_max_distance": 44,
"eval_mean_distance": 1,
"eval_runtime": 6.3698,
"eval_samples_per_second": 78.496,
"eval_steps_per_second": 1.57,
"step": 36332
},
{
"epoch": 3.13,
"learning_rate": 4.912430548365945e-05,
"loss": 0.0394,
"step": 36625
},
{
"epoch": 3.13,
"eval_loss": 0.04381772503256798,
"eval_max_distance": 44,
"eval_mean_distance": 1,
"eval_runtime": 4.6587,
"eval_samples_per_second": 107.326,
"eval_steps_per_second": 2.147,
"step": 36625
},
{
"epoch": 3.15,
"learning_rate": 4.8871518790765095e-05,
"loss": 0.0392,
"step": 36918
},
{
"epoch": 3.15,
"eval_loss": 0.04516580328345299,
"eval_max_distance": 44,
"eval_mean_distance": 1,
"eval_runtime": 6.142,
"eval_samples_per_second": 81.406,
"eval_steps_per_second": 1.628,
"step": 36918
},
{
"epoch": 3.18,
"learning_rate": 4.861873209787073e-05,
"loss": 0.0386,
"step": 37211
},
{
"epoch": 3.18,
"eval_loss": 0.045004624873399734,
"eval_max_distance": 48,
"eval_mean_distance": 1,
"eval_runtime": 5.7991,
"eval_samples_per_second": 86.22,
"eval_steps_per_second": 1.724,
"step": 37211
},
{
"epoch": 3.2,
"learning_rate": 4.8365945404976366e-05,
"loss": 0.0387,
"step": 37504
},
{
"epoch": 3.2,
"eval_loss": 0.04667551815509796,
"eval_max_distance": 43,
"eval_mean_distance": 1,
"eval_runtime": 5.5404,
"eval_samples_per_second": 90.246,
"eval_steps_per_second": 1.805,
"step": 37504
},
{
"epoch": 3.23,
"learning_rate": 4.8113158712081995e-05,
"loss": 0.0385,
"step": 37797
},
{
"epoch": 3.23,
"eval_loss": 0.047200217843055725,
"eval_max_distance": 46,
"eval_mean_distance": 1,
"eval_runtime": 6.397,
"eval_samples_per_second": 78.161,
"eval_steps_per_second": 1.563,
"step": 37797
},
{
"epoch": 3.25,
"learning_rate": 4.786037201918763e-05,
"loss": 0.0381,
"step": 38090
},
{
"epoch": 3.25,
"eval_loss": 0.04793046787381172,
"eval_max_distance": 40,
"eval_mean_distance": 1,
"eval_runtime": 6.537,
"eval_samples_per_second": 76.487,
"eval_steps_per_second": 1.53,
"step": 38090
},
{
"epoch": 3.28,
"learning_rate": 4.7607585326293267e-05,
"loss": 0.0383,
"step": 38383
},
{
"epoch": 3.28,
"eval_loss": 0.04902255907654762,
"eval_max_distance": 39,
"eval_mean_distance": 1,
"eval_runtime": 7.2436,
"eval_samples_per_second": 69.027,
"eval_steps_per_second": 1.381,
"step": 38383
},
{
"epoch": 3.3,
"learning_rate": 4.735479863339891e-05,
"loss": 0.0381,
"step": 38676
},
{
"epoch": 3.3,
"eval_loss": 0.05011408030986786,
"eval_max_distance": 45,
"eval_mean_distance": 1,
"eval_runtime": 6.2764,
"eval_samples_per_second": 79.664,
"eval_steps_per_second": 1.593,
"step": 38676
},
{
"epoch": 3.33,
"learning_rate": 4.7102011940504545e-05,
"loss": 0.0378,
"step": 38969
},
{
"epoch": 3.33,
"eval_loss": 0.0420503243803978,
"eval_max_distance": 51,
"eval_mean_distance": 0,
"eval_runtime": 4.988,
"eval_samples_per_second": 100.24,
"eval_steps_per_second": 2.005,
"step": 38969
},
{
"epoch": 3.35,
"learning_rate": 4.6849225247610174e-05,
"loss": 0.0371,
"step": 39262
},
{
"epoch": 3.35,
"eval_loss": 0.0440935455262661,
"eval_max_distance": 56,
"eval_mean_distance": 1,
"eval_runtime": 5.2384,
"eval_samples_per_second": 95.448,
"eval_steps_per_second": 1.909,
"step": 39262
},
{
"epoch": 3.38,
"learning_rate": 4.659643855471581e-05,
"loss": 0.0356,
"step": 39555
},
{
"epoch": 3.38,
"eval_loss": 0.04321606457233429,
"eval_max_distance": 51,
"eval_mean_distance": 0,
"eval_runtime": 7.4211,
"eval_samples_per_second": 67.375,
"eval_steps_per_second": 1.348,
"step": 39555
},
{
"epoch": 3.4,
"learning_rate": 4.6343651861821445e-05,
"loss": 0.0342,
"step": 39848
},
{
"epoch": 3.4,
"eval_loss": 0.04155835881829262,
"eval_max_distance": 52,
"eval_mean_distance": 0,
"eval_runtime": 4.9259,
"eval_samples_per_second": 101.504,
"eval_steps_per_second": 2.03,
"step": 39848
},
{
"epoch": 3.43,
"learning_rate": 4.609086516892708e-05,
"loss": 0.0338,
"step": 40141
},
{
"epoch": 3.43,
"eval_loss": 0.041754692792892456,
"eval_max_distance": 68,
"eval_mean_distance": 0,
"eval_runtime": 4.6906,
"eval_samples_per_second": 106.595,
"eval_steps_per_second": 2.132,
"step": 40141
},
{
"epoch": 3.45,
"learning_rate": 4.583807847603272e-05,
"loss": 0.0323,
"step": 40434
},
{
"epoch": 3.45,
"eval_loss": 0.04196465387940407,
"eval_max_distance": 44,
"eval_mean_distance": 0,
"eval_runtime": 4.0109,
"eval_samples_per_second": 124.66,
"eval_steps_per_second": 2.493,
"step": 40434
},
{
"epoch": 3.48,
"learning_rate": 4.558529178313835e-05,
"loss": 0.0318,
"step": 40727
},
{
"epoch": 3.48,
"eval_loss": 0.04092620685696602,
"eval_max_distance": 52,
"eval_mean_distance": 0,
"eval_runtime": 3.882,
"eval_samples_per_second": 128.801,
"eval_steps_per_second": 2.576,
"step": 40727
},
{
"epoch": 3.5,
"learning_rate": 4.533250509024399e-05,
"loss": 0.0311,
"step": 41020
},
{
"epoch": 3.5,
"eval_loss": 0.042109958827495575,
"eval_max_distance": 47,
"eval_mean_distance": 0,
"eval_runtime": 3.8329,
"eval_samples_per_second": 130.448,
"eval_steps_per_second": 2.609,
"step": 41020
},
{
"epoch": 3.53,
"learning_rate": 4.5079718397349624e-05,
"loss": 0.0297,
"step": 41313
},
{
"epoch": 3.53,
"eval_loss": 0.041698385030031204,
"eval_max_distance": 57,
"eval_mean_distance": 0,
"eval_runtime": 4.0043,
"eval_samples_per_second": 124.867,
"eval_steps_per_second": 2.497,
"step": 41313
},
{
"epoch": 3.55,
"learning_rate": 4.482693170445526e-05,
"loss": 0.0296,
"step": 41606
},
{
"epoch": 3.55,
"eval_loss": 0.041246239095926285,
"eval_max_distance": 44,
"eval_mean_distance": 0,
"eval_runtime": 4.5677,
"eval_samples_per_second": 109.465,
"eval_steps_per_second": 2.189,
"step": 41606
},
{
"epoch": 3.58,
"learning_rate": 4.4574145011560896e-05,
"loss": 0.0293,
"step": 41899
},
{
"epoch": 3.58,
"eval_loss": 0.0423794724047184,
"eval_max_distance": 51,
"eval_mean_distance": 0,
"eval_runtime": 3.8582,
"eval_samples_per_second": 129.594,
"eval_steps_per_second": 2.592,
"step": 41899
},
{
"epoch": 3.6,
"learning_rate": 4.432135831866653e-05,
"loss": 0.028,
"step": 42192
},
{
"epoch": 3.6,
"eval_loss": 0.04165972024202347,
"eval_max_distance": 51,
"eval_mean_distance": 0,
"eval_runtime": 4.2401,
"eval_samples_per_second": 117.92,
"eval_steps_per_second": 2.358,
"step": 42192
},
{
"epoch": 3.63,
"learning_rate": 4.406857162577217e-05,
"loss": 0.0279,
"step": 42485
},
{
"epoch": 3.63,
"eval_loss": 0.04228993132710457,
"eval_max_distance": 51,
"eval_mean_distance": 0,
"eval_runtime": 4.5976,
"eval_samples_per_second": 108.752,
"eval_steps_per_second": 2.175,
"step": 42485
},
{
"epoch": 3.65,
"learning_rate": 4.38157849328778e-05,
"loss": 0.0267,
"step": 42778
},
{
"epoch": 3.65,
"eval_loss": 0.04240557178854942,
"eval_max_distance": 61,
"eval_mean_distance": 0,
"eval_runtime": 4.3219,
"eval_samples_per_second": 115.69,
"eval_steps_per_second": 2.314,
"step": 42778
},
{
"epoch": 3.68,
"learning_rate": 4.356299823998344e-05,
"loss": 0.0265,
"step": 43071
},
{
"epoch": 3.68,
"eval_loss": 0.04247906431555748,
"eval_max_distance": 51,
"eval_mean_distance": 0,
"eval_runtime": 4.69,
"eval_samples_per_second": 106.609,
"eval_steps_per_second": 2.132,
"step": 43071
},
{
"epoch": 3.7,
"learning_rate": 4.3310211547089074e-05,
"loss": 0.026,
"step": 43364
},
{
"epoch": 3.7,
"eval_loss": 0.04282011464238167,
"eval_max_distance": 47,
"eval_mean_distance": 0,
"eval_runtime": 3.8996,
"eval_samples_per_second": 128.218,
"eval_steps_per_second": 2.564,
"step": 43364
},
{
"epoch": 3.73,
"learning_rate": 4.3057424854194703e-05,
"loss": 0.0256,
"step": 43657
},
{
"epoch": 3.73,
"eval_loss": 0.04263199865818024,
"eval_max_distance": 57,
"eval_mean_distance": 0,
"eval_runtime": 5.2399,
"eval_samples_per_second": 95.421,
"eval_steps_per_second": 1.908,
"step": 43657
},
{
"epoch": 3.75,
"learning_rate": 4.280463816130034e-05,
"loss": 0.0255,
"step": 43950
},
{
"epoch": 3.75,
"eval_loss": 0.043559763580560684,
"eval_max_distance": 47,
"eval_mean_distance": 0,
"eval_runtime": 4.7867,
"eval_samples_per_second": 104.457,
"eval_steps_per_second": 2.089,
"step": 43950
},
{
"epoch": 3.78,
"learning_rate": 4.255185146840598e-05,
"loss": 0.0251,
"step": 44243
},
{
"epoch": 3.78,
"eval_loss": 0.04391922801733017,
"eval_max_distance": 57,
"eval_mean_distance": 0,
"eval_runtime": 3.947,
"eval_samples_per_second": 126.678,
"eval_steps_per_second": 2.534,
"step": 44243
},
{
"epoch": 3.8,
"learning_rate": 4.229906477551162e-05,
"loss": 0.0246,
"step": 44536
},
{
"epoch": 3.8,
"eval_loss": 0.04322103410959244,
"eval_max_distance": 57,
"eval_mean_distance": 0,
"eval_runtime": 4.5911,
"eval_samples_per_second": 108.907,
"eval_steps_per_second": 2.178,
"step": 44536
},
{
"epoch": 3.83,
"learning_rate": 4.204627808261725e-05,
"loss": 0.024,
"step": 44829
},
{
"epoch": 3.83,
"eval_loss": 0.04289233684539795,
"eval_max_distance": 59,
"eval_mean_distance": 0,
"eval_runtime": 4.5965,
"eval_samples_per_second": 108.779,
"eval_steps_per_second": 2.176,
"step": 44829
},
{
"epoch": 3.85,
"learning_rate": 4.179349138972288e-05,
"loss": 0.0236,
"step": 45122
},
{
"epoch": 3.85,
"eval_loss": 0.04328041896224022,
"eval_max_distance": 47,
"eval_mean_distance": 1,
"eval_runtime": 4.5214,
"eval_samples_per_second": 110.586,
"eval_steps_per_second": 2.212,
"step": 45122
},
{
"epoch": 3.88,
"learning_rate": 4.154070469682852e-05,
"loss": 0.0233,
"step": 45415
},
{
"epoch": 3.88,
"eval_loss": 0.04426594451069832,
"eval_max_distance": 49,
"eval_mean_distance": 1,
"eval_runtime": 3.9139,
"eval_samples_per_second": 127.749,
"eval_steps_per_second": 2.555,
"step": 45415
},
{
"epoch": 3.9,
"learning_rate": 4.1287918003934154e-05,
"loss": 0.023,
"step": 45708
},
{
"epoch": 3.9,
"eval_loss": 0.043395720422267914,
"eval_max_distance": 49,
"eval_mean_distance": 0,
"eval_runtime": 4.4284,
"eval_samples_per_second": 112.907,
"eval_steps_per_second": 2.258,
"step": 45708
},
{
"epoch": 3.93,
"learning_rate": 4.103513131103979e-05,
"loss": 0.023,
"step": 46001
},
{
"epoch": 3.93,
"eval_loss": 0.04368527978658676,
"eval_max_distance": 46,
"eval_mean_distance": 1,
"eval_runtime": 4.3885,
"eval_samples_per_second": 113.933,
"eval_steps_per_second": 2.279,
"step": 46001
},
{
"epoch": 3.95,
"learning_rate": 4.078234461814543e-05,
"loss": 0.023,
"step": 46294
},
{
"epoch": 3.95,
"eval_loss": 0.04507759213447571,
"eval_max_distance": 56,
"eval_mean_distance": 1,
"eval_runtime": 4.6509,
"eval_samples_per_second": 107.505,
"eval_steps_per_second": 2.15,
"step": 46294
},
{
"epoch": 3.98,
"learning_rate": 4.052955792525106e-05,
"loss": 0.0227,
"step": 46587
},
{
"epoch": 3.98,
"eval_loss": 0.045307405292987823,
"eval_max_distance": 43,
"eval_mean_distance": 1,
"eval_runtime": 3.908,
"eval_samples_per_second": 127.944,
"eval_steps_per_second": 2.559,
"step": 46587
},
{
"epoch": 4.0,
"learning_rate": 4.02767712323567e-05,
"loss": 0.0616,
"step": 46880
},
{
"epoch": 4.0,
"eval_loss": 0.04507147893309593,
"eval_max_distance": 60,
"eval_mean_distance": 1,
"eval_runtime": 5.1024,
"eval_samples_per_second": 97.993,
"eval_steps_per_second": 1.96,
"step": 46880
},
{
"epoch": 4.03,
"learning_rate": 4.002398453946233e-05,
"loss": 0.0581,
"step": 47173
},
{
"epoch": 4.03,
"eval_loss": 0.04314437881112099,
"eval_max_distance": 31,
"eval_mean_distance": 1,
"eval_runtime": 5.525,
"eval_samples_per_second": 90.497,
"eval_steps_per_second": 1.81,
"step": 47173
},
{
"epoch": 4.05,
"learning_rate": 3.977119784656797e-05,
"loss": 0.0449,
"step": 47466
},
{
"epoch": 4.05,
"eval_loss": 0.043075479567050934,
"eval_max_distance": 30,
"eval_mean_distance": 1,
"eval_runtime": 6.2169,
"eval_samples_per_second": 80.426,
"eval_steps_per_second": 1.609,
"step": 47466
},
{
"epoch": 4.08,
"learning_rate": 3.9518411153673604e-05,
"loss": 0.0398,
"step": 47759
},
{
"epoch": 4.08,
"eval_loss": 0.046902794390916824,
"eval_max_distance": 39,
"eval_mean_distance": 1,
"eval_runtime": 5.9712,
"eval_samples_per_second": 83.735,
"eval_steps_per_second": 1.675,
"step": 47759
},
{
"epoch": 4.1,
"learning_rate": 3.926562446077924e-05,
"loss": 0.0379,
"step": 48052
},
{
"epoch": 4.1,
"eval_loss": 0.046891409903764725,
"eval_max_distance": 39,
"eval_mean_distance": 1,
"eval_runtime": 6.5218,
"eval_samples_per_second": 76.666,
"eval_steps_per_second": 1.533,
"step": 48052
},
{
"epoch": 4.13,
"learning_rate": 3.9012837767884876e-05,
"loss": 0.0365,
"step": 48345
},
{
"epoch": 4.13,
"eval_loss": 0.04368309676647186,
"eval_max_distance": 39,
"eval_mean_distance": 1,
"eval_runtime": 6.1085,
"eval_samples_per_second": 81.853,
"eval_steps_per_second": 1.637,
"step": 48345
},
{
"epoch": 4.15,
"learning_rate": 3.876005107499051e-05,
"loss": 0.0362,
"step": 48638
},
{
"epoch": 4.15,
"eval_loss": 0.049253445118665695,
"eval_max_distance": 39,
"eval_mean_distance": 1,
"eval_runtime": 6.9151,
"eval_samples_per_second": 72.306,
"eval_steps_per_second": 1.446,
"step": 48638
},
{
"epoch": 4.18,
"learning_rate": 3.850726438209615e-05,
"loss": 0.036,
"step": 48931
},
{
"epoch": 4.18,
"eval_loss": 0.04533643275499344,
"eval_max_distance": 39,
"eval_mean_distance": 1,
"eval_runtime": 6.4033,
"eval_samples_per_second": 78.084,
"eval_steps_per_second": 1.562,
"step": 48931
},
{
"epoch": 4.2,
"learning_rate": 3.825447768920178e-05,
"loss": 0.0359,
"step": 49224
},
{
"epoch": 4.2,
"eval_loss": 0.04666188731789589,
"eval_max_distance": 39,
"eval_mean_distance": 1,
"eval_runtime": 5.9118,
"eval_samples_per_second": 84.577,
"eval_steps_per_second": 1.692,
"step": 49224
},
{
"epoch": 4.23,
"learning_rate": 3.800169099630742e-05,
"loss": 0.0356,
"step": 49517
},
{
"epoch": 4.23,
"eval_loss": 0.04655005782842636,
"eval_max_distance": 39,
"eval_mean_distance": 1,
"eval_runtime": 6.8551,
"eval_samples_per_second": 72.938,
"eval_steps_per_second": 1.459,
"step": 49517
},
{
"epoch": 4.25,
"learning_rate": 3.7748904303413054e-05,
"loss": 0.0357,
"step": 49810
},
{
"epoch": 4.25,
"eval_loss": 0.04722580313682556,
"eval_max_distance": 38,
"eval_mean_distance": 1,
"eval_runtime": 7.2929,
"eval_samples_per_second": 68.56,
"eval_steps_per_second": 1.371,
"step": 49810
},
{
"epoch": 4.28,
"learning_rate": 3.749611761051869e-05,
"loss": 0.0357,
"step": 50103
},
{
"epoch": 4.28,
"eval_loss": 0.04763193428516388,
"eval_max_distance": 39,
"eval_mean_distance": 1,
"eval_runtime": 7.0996,
"eval_samples_per_second": 70.426,
"eval_steps_per_second": 1.409,
"step": 50103
},
{
"epoch": 4.3,
"learning_rate": 3.7243330917624326e-05,
"loss": 0.0357,
"step": 50396
},
{
"epoch": 4.3,
"eval_loss": 0.04951860010623932,
"eval_max_distance": 45,
"eval_mean_distance": 1,
"eval_runtime": 7.0362,
"eval_samples_per_second": 71.061,
"eval_steps_per_second": 1.421,
"step": 50396
},
{
"epoch": 4.33,
"learning_rate": 3.699054422472996e-05,
"loss": 0.0358,
"step": 50689
},
{
"epoch": 4.33,
"eval_loss": 0.040653664618730545,
"eval_max_distance": 41,
"eval_mean_distance": 0,
"eval_runtime": 4.2049,
"eval_samples_per_second": 118.908,
"eval_steps_per_second": 2.378,
"step": 50689
},
{
"epoch": 4.35,
"learning_rate": 3.67377575318356e-05,
"loss": 0.0349,
"step": 50982
},
{
"epoch": 4.35,
"eval_loss": 0.041064370423555374,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 5.2421,
"eval_samples_per_second": 95.382,
"eval_steps_per_second": 1.908,
"step": 50982
},
{
"epoch": 4.38,
"learning_rate": 3.6484970838941226e-05,
"loss": 0.0337,
"step": 51275
},
{
"epoch": 4.38,
"eval_loss": 0.040478698909282684,
"eval_max_distance": 43,
"eval_mean_distance": 0,
"eval_runtime": 4.7434,
"eval_samples_per_second": 105.41,
"eval_steps_per_second": 2.108,
"step": 51275
},
{
"epoch": 4.4,
"learning_rate": 3.623218414604687e-05,
"loss": 0.0325,
"step": 51568
},
{
"epoch": 4.4,
"eval_loss": 0.04032284766435623,
"eval_max_distance": 42,
"eval_mean_distance": 0,
"eval_runtime": 5.0822,
"eval_samples_per_second": 98.382,
"eval_steps_per_second": 1.968,
"step": 51568
},
{
"epoch": 4.43,
"learning_rate": 3.5979397453152505e-05,
"loss": 0.0319,
"step": 51861
},
{
"epoch": 4.43,
"eval_loss": 0.04050859808921814,
"eval_max_distance": 58,
"eval_mean_distance": 0,
"eval_runtime": 4.3025,
"eval_samples_per_second": 116.21,
"eval_steps_per_second": 2.324,
"step": 51861
},
{
"epoch": 4.45,
"learning_rate": 3.572661076025814e-05,
"loss": 0.0308,
"step": 52154
},
{
"epoch": 4.45,
"eval_loss": 0.04165998846292496,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 3.9661,
"eval_samples_per_second": 126.068,
"eval_steps_per_second": 2.521,
"step": 52154
},
{
"epoch": 4.48,
"learning_rate": 3.5473824067363776e-05,
"loss": 0.0303,
"step": 52447
},
{
"epoch": 4.48,
"eval_loss": 0.04113217815756798,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 4.0769,
"eval_samples_per_second": 122.641,
"eval_steps_per_second": 2.453,
"step": 52447
},
{
"epoch": 4.5,
"learning_rate": 3.5221037374469405e-05,
"loss": 0.0296,
"step": 52740
},
{
"epoch": 4.5,
"eval_loss": 0.04085549712181091,
"eval_max_distance": 47,
"eval_mean_distance": 0,
"eval_runtime": 4.8819,
"eval_samples_per_second": 102.418,
"eval_steps_per_second": 2.048,
"step": 52740
},
{
"epoch": 4.53,
"learning_rate": 3.496825068157504e-05,
"loss": 0.0284,
"step": 53033
},
{
"epoch": 4.53,
"eval_loss": 0.04045039415359497,
"eval_max_distance": 50,
"eval_mean_distance": 0,
"eval_runtime": 3.7887,
"eval_samples_per_second": 131.972,
"eval_steps_per_second": 2.639,
"step": 53033
},
{
"epoch": 4.55,
"learning_rate": 3.471546398868068e-05,
"loss": 0.0283,
"step": 53326
},
{
"epoch": 4.55,
"eval_loss": 0.04034719988703728,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 3.8596,
"eval_samples_per_second": 129.547,
"eval_steps_per_second": 2.591,
"step": 53326
},
{
"epoch": 3.65,
"learning_rate": 7.513661202185792e-05,
"loss": 0.8484,
"step": 53436
},
{
"epoch": 3.65,
"eval_loss": 0.5610889792442322,
"eval_max_distance": 118,
"eval_mean_distance": 8,
"eval_runtime": 6.924,
"eval_samples_per_second": 72.212,
"eval_steps_per_second": 1.444,
"step": 53436
},
{
"epoch": 3.7,
"learning_rate": 0.000575136612021858,
"loss": 0.1562,
"step": 54168
},
{
"epoch": 3.7,
"eval_loss": 0.14375992119312286,
"eval_max_distance": 104,
"eval_mean_distance": 3,
"eval_runtime": 6.298,
"eval_samples_per_second": 79.39,
"eval_steps_per_second": 1.588,
"step": 54168
},
{
"epoch": 3.75,
"learning_rate": 0.0009992407300035892,
"loss": 0.0793,
"step": 54900
},
{
"epoch": 3.75,
"eval_loss": 0.19094187021255493,
"eval_max_distance": 119,
"eval_mean_distance": 5,
"eval_runtime": 7.1903,
"eval_samples_per_second": 69.538,
"eval_steps_per_second": 1.391,
"step": 54900
},
{
"epoch": 3.8,
"learning_rate": 0.0009941881333002016,
"loss": 0.087,
"step": 55632
},
{
"epoch": 3.8,
"eval_loss": 0.17700538039207458,
"eval_max_distance": 119,
"eval_mean_distance": 4,
"eval_runtime": 5.325,
"eval_samples_per_second": 93.896,
"eval_steps_per_second": 1.878,
"step": 55632
},
{
"epoch": 3.85,
"learning_rate": 0.0009891355365968138,
"loss": 0.0896,
"step": 56364
},
{
"epoch": 3.85,
"eval_loss": 0.259384423494339,
"eval_max_distance": 144,
"eval_mean_distance": 7,
"eval_runtime": 6.0715,
"eval_samples_per_second": 82.352,
"eval_steps_per_second": 1.647,
"step": 56364
},
{
"epoch": 3.9,
"learning_rate": 0.0009840829398934262,
"loss": 0.091,
"step": 57096
},
{
"epoch": 3.9,
"eval_loss": 0.41866716742515564,
"eval_max_distance": 162,
"eval_mean_distance": 10,
"eval_runtime": 7.1111,
"eval_samples_per_second": 70.313,
"eval_steps_per_second": 1.406,
"step": 57096
},
{
"epoch": 3.95,
"learning_rate": 0.0009790303431900383,
"loss": 0.1024,
"step": 57828
},
{
"epoch": 3.95,
"eval_loss": 0.6052113771438599,
"eval_max_distance": 172,
"eval_mean_distance": 15,
"eval_runtime": 7.9319,
"eval_samples_per_second": 63.036,
"eval_steps_per_second": 1.261,
"step": 57828
},
{
"epoch": 4.0,
"learning_rate": 0.0009739777464866507,
"loss": 0.1156,
"step": 58560
},
{
"epoch": 4.0,
"eval_loss": 0.14988236129283905,
"eval_max_distance": 98,
"eval_mean_distance": 4,
"eval_runtime": 5.0312,
"eval_samples_per_second": 99.38,
"eval_steps_per_second": 1.988,
"step": 58560
},
{
"epoch": 4.05,
"learning_rate": 0.000968925149783263,
"loss": 0.0924,
"step": 59292
},
{
"epoch": 4.05,
"eval_loss": 0.10364539921283722,
"eval_max_distance": 65,
"eval_mean_distance": 1,
"eval_runtime": 4.385,
"eval_samples_per_second": 114.025,
"eval_steps_per_second": 2.28,
"step": 59292
},
{
"epoch": 4.1,
"learning_rate": 0.0009638725530798753,
"loss": 0.0454,
"step": 60024
},
{
"epoch": 4.1,
"eval_loss": 0.07579351961612701,
"eval_max_distance": 77,
"eval_mean_distance": 1,
"eval_runtime": 4.1809,
"eval_samples_per_second": 119.592,
"eval_steps_per_second": 2.392,
"step": 60024
},
{
"epoch": 4.15,
"learning_rate": 0.0009588199563764874,
"loss": 0.0407,
"step": 60756
},
{
"epoch": 4.15,
"eval_loss": 0.07706684619188309,
"eval_max_distance": 64,
"eval_mean_distance": 1,
"eval_runtime": 2.884,
"eval_samples_per_second": 173.37,
"eval_steps_per_second": 3.467,
"step": 60756
},
{
"epoch": 4.2,
"learning_rate": 0.0009537673596730998,
"loss": 0.0404,
"step": 61488
},
{
"epoch": 4.2,
"eval_loss": 0.07059109956026077,
"eval_max_distance": 82,
"eval_mean_distance": 1,
"eval_runtime": 3.407,
"eval_samples_per_second": 146.757,
"eval_steps_per_second": 2.935,
"step": 61488
},
{
"epoch": 4.25,
"learning_rate": 0.0009487147629697121,
"loss": 0.0397,
"step": 62220
},
{
"epoch": 4.25,
"eval_loss": 0.07464081048965454,
"eval_max_distance": 95,
"eval_mean_distance": 1,
"eval_runtime": 3.498,
"eval_samples_per_second": 142.939,
"eval_steps_per_second": 2.859,
"step": 62220
},
{
"epoch": 4.3,
"learning_rate": 0.0009436621662663243,
"loss": 0.0395,
"step": 62952
},
{
"epoch": 4.3,
"eval_loss": 0.07409149408340454,
"eval_max_distance": 85,
"eval_mean_distance": 1,
"eval_runtime": 4.192,
"eval_samples_per_second": 119.275,
"eval_steps_per_second": 2.385,
"step": 62952
},
{
"epoch": 4.35,
"learning_rate": 0.0009386095695629366,
"loss": 0.0396,
"step": 63684
},
{
"epoch": 4.35,
"eval_loss": 0.08215318620204926,
"eval_max_distance": 86,
"eval_mean_distance": 1,
"eval_runtime": 4.7115,
"eval_samples_per_second": 106.123,
"eval_steps_per_second": 2.122,
"step": 63684
},
{
"epoch": 4.4,
"learning_rate": 0.0009335569728595489,
"loss": 0.0388,
"step": 64416
},
{
"epoch": 4.4,
"eval_loss": 0.07010287791490555,
"eval_max_distance": 100,
"eval_mean_distance": 2,
"eval_runtime": 4.6721,
"eval_samples_per_second": 107.019,
"eval_steps_per_second": 2.14,
"step": 64416
},
{
"epoch": 4.45,
"learning_rate": 0.0009285043761561612,
"loss": 0.038,
"step": 65148
},
{
"epoch": 4.45,
"eval_loss": 0.07180768251419067,
"eval_max_distance": 111,
"eval_mean_distance": 2,
"eval_runtime": 4.9954,
"eval_samples_per_second": 100.093,
"eval_steps_per_second": 2.002,
"step": 65148
},
{
"epoch": 4.5,
"learning_rate": 0.0009234517794527735,
"loss": 0.0388,
"step": 65880
},
{
"epoch": 4.5,
"eval_loss": 0.08386632055044174,
"eval_max_distance": 102,
"eval_mean_distance": 2,
"eval_runtime": 4.8999,
"eval_samples_per_second": 102.042,
"eval_steps_per_second": 2.041,
"step": 65880
},
{
"epoch": 4.55,
"learning_rate": 0.0009183991827493857,
"loss": 0.0392,
"step": 66612
},
{
"epoch": 4.55,
"eval_loss": 0.08535018563270569,
"eval_max_distance": 80,
"eval_mean_distance": 2,
"eval_runtime": 4.809,
"eval_samples_per_second": 103.972,
"eval_steps_per_second": 2.079,
"step": 66612
},
{
"epoch": 4.6,
"learning_rate": 0.000913346586045998,
"loss": 0.0387,
"step": 67344
},
{
"epoch": 4.6,
"eval_loss": 0.10104835033416748,
"eval_max_distance": 131,
"eval_mean_distance": 3,
"eval_runtime": 4.431,
"eval_samples_per_second": 112.841,
"eval_steps_per_second": 2.257,
"step": 67344
},
{
"epoch": 4.65,
"learning_rate": 0.0009082939893426102,
"loss": 0.0393,
"step": 68076
},
{
"epoch": 4.65,
"eval_loss": 0.0824475884437561,
"eval_max_distance": 133,
"eval_mean_distance": 2,
"eval_runtime": 5.444,
"eval_samples_per_second": 91.844,
"eval_steps_per_second": 1.837,
"step": 68076
},
{
"epoch": 4.7,
"learning_rate": 0.0009032413926392225,
"loss": 0.0393,
"step": 68808
},
{
"epoch": 4.7,
"eval_loss": 0.09969473630189896,
"eval_max_distance": 127,
"eval_mean_distance": 2,
"eval_runtime": 5.366,
"eval_samples_per_second": 93.179,
"eval_steps_per_second": 1.864,
"step": 68808
},
{
"epoch": 4.75,
"learning_rate": 0.0008981887959358348,
"loss": 0.0392,
"step": 69540
},
{
"epoch": 4.75,
"eval_loss": 0.10442204028367996,
"eval_max_distance": 128,
"eval_mean_distance": 3,
"eval_runtime": 5.5386,
"eval_samples_per_second": 90.276,
"eval_steps_per_second": 1.806,
"step": 69540
},
{
"epoch": 4.8,
"learning_rate": 0.000893136199232447,
"loss": 0.0404,
"step": 70272
},
{
"epoch": 4.8,
"eval_loss": 0.10716131329536438,
"eval_max_distance": 146,
"eval_mean_distance": 3,
"eval_runtime": 5.5122,
"eval_samples_per_second": 90.708,
"eval_steps_per_second": 1.814,
"step": 70272
},
{
"epoch": 4.85,
"learning_rate": 0.0008880836025290594,
"loss": 0.0432,
"step": 71004
},
{
"epoch": 4.85,
"eval_loss": 0.12576422095298767,
"eval_max_distance": 127,
"eval_mean_distance": 3,
"eval_runtime": 6.7417,
"eval_samples_per_second": 74.166,
"eval_steps_per_second": 1.483,
"step": 71004
},
{
"epoch": 4.9,
"learning_rate": 0.0008830310058256716,
"loss": 0.048,
"step": 71736
},
{
"epoch": 4.9,
"eval_loss": 0.2953876852989197,
"eval_max_distance": 152,
"eval_mean_distance": 7,
"eval_runtime": 7.5184,
"eval_samples_per_second": 66.503,
"eval_steps_per_second": 1.33,
"step": 71736
},
{
"epoch": 4.95,
"learning_rate": 0.0008779784091222839,
"loss": 0.0527,
"step": 72468
},
{
"epoch": 4.95,
"eval_loss": 0.3366003632545471,
"eval_max_distance": 143,
"eval_mean_distance": 9,
"eval_runtime": 8.2771,
"eval_samples_per_second": 60.408,
"eval_steps_per_second": 1.208,
"step": 72468
},
{
"epoch": 5.0,
"learning_rate": 0.0008729258124188961,
"loss": 0.0751,
"step": 73200
},
{
"epoch": 5.0,
"eval_loss": 0.06711767613887787,
"eval_max_distance": 91,
"eval_mean_distance": 1,
"eval_runtime": 4.714,
"eval_samples_per_second": 106.068,
"eval_steps_per_second": 2.121,
"step": 73200
},
{
"epoch": 5.05,
"learning_rate": 0.0008678732157155085,
"loss": 0.0586,
"step": 73932
},
{
"epoch": 5.05,
"eval_loss": 0.10157773643732071,
"eval_max_distance": 58,
"eval_mean_distance": 1,
"eval_runtime": 3.6629,
"eval_samples_per_second": 136.503,
"eval_steps_per_second": 2.73,
"step": 73932
},
{
"epoch": 5.1,
"learning_rate": 0.0008628206190121207,
"loss": 0.0295,
"step": 74664
},
{
"epoch": 5.1,
"eval_loss": 0.10495835542678833,
"eval_max_distance": 34,
"eval_mean_distance": 1,
"eval_runtime": 3.5918,
"eval_samples_per_second": 139.208,
"eval_steps_per_second": 2.784,
"step": 74664
},
{
"epoch": 5.15,
"learning_rate": 0.000857768022308733,
"loss": 0.0253,
"step": 75396
},
{
"epoch": 5.15,
"eval_loss": 0.10891541838645935,
"eval_max_distance": 64,
"eval_mean_distance": 1,
"eval_runtime": 3.7467,
"eval_samples_per_second": 133.451,
"eval_steps_per_second": 2.669,
"step": 75396
},
{
"epoch": 5.2,
"learning_rate": 0.0008527154256053453,
"loss": 0.0245,
"step": 76128
},
{
"epoch": 5.2,
"eval_loss": 0.08191870152950287,
"eval_max_distance": 57,
"eval_mean_distance": 1,
"eval_runtime": 3.751,
"eval_samples_per_second": 133.296,
"eval_steps_per_second": 2.666,
"step": 76128
},
{
"epoch": 5.25,
"learning_rate": 0.0008476628289019576,
"loss": 0.0238,
"step": 76860
},
{
"epoch": 5.25,
"eval_loss": 0.08061353117227554,
"eval_max_distance": 98,
"eval_mean_distance": 1,
"eval_runtime": 3.894,
"eval_samples_per_second": 128.402,
"eval_steps_per_second": 2.568,
"step": 76860
},
{
"epoch": 5.3,
"learning_rate": 0.0008426102321985698,
"loss": 0.0236,
"step": 77592
},
{
"epoch": 5.3,
"eval_loss": 0.07853155583143234,
"eval_max_distance": 62,
"eval_mean_distance": 1,
"eval_runtime": 3.7131,
"eval_samples_per_second": 134.66,
"eval_steps_per_second": 2.693,
"step": 77592
},
{
"epoch": 5.35,
"learning_rate": 0.0008375576354951821,
"loss": 0.0236,
"step": 78324
},
{
"epoch": 5.35,
"eval_loss": 0.07758867740631104,
"eval_max_distance": 65,
"eval_mean_distance": 1,
"eval_runtime": 3.8414,
"eval_samples_per_second": 130.161,
"eval_steps_per_second": 2.603,
"step": 78324
},
{
"epoch": 5.4,
"learning_rate": 0.0008325050387917944,
"loss": 0.0241,
"step": 79056
},
{
"epoch": 5.4,
"eval_loss": 0.06412464380264282,
"eval_max_distance": 106,
"eval_mean_distance": 1,
"eval_runtime": 4.257,
"eval_samples_per_second": 117.452,
"eval_steps_per_second": 2.349,
"step": 79056
},
{
"epoch": 5.45,
"learning_rate": 0.0008274524420884067,
"loss": 0.0237,
"step": 79788
},
{
"epoch": 5.45,
"eval_loss": 0.053821686655282974,
"eval_max_distance": 83,
"eval_mean_distance": 1,
"eval_runtime": 3.7649,
"eval_samples_per_second": 132.805,
"eval_steps_per_second": 2.656,
"step": 79788
},
{
"epoch": 5.5,
"learning_rate": 0.0008223998453850188,
"loss": 0.0241,
"step": 80520
},
{
"epoch": 5.5,
"eval_loss": 0.06780679523944855,
"eval_max_distance": 87,
"eval_mean_distance": 1,
"eval_runtime": 4.095,
"eval_samples_per_second": 122.101,
"eval_steps_per_second": 2.442,
"step": 80520
},
{
"epoch": 5.55,
"learning_rate": 0.0008173472486816312,
"loss": 0.0239,
"step": 81252
},
{
"epoch": 5.55,
"eval_loss": 0.051452018320560455,
"eval_max_distance": 92,
"eval_mean_distance": 1,
"eval_runtime": 4.548,
"eval_samples_per_second": 109.939,
"eval_steps_per_second": 2.199,
"step": 81252
},
{
"epoch": 5.6,
"learning_rate": 0.0008122946519782435,
"loss": 0.0249,
"step": 81984
},
{
"epoch": 5.6,
"eval_loss": 0.06307797878980637,
"eval_max_distance": 112,
"eval_mean_distance": 2,
"eval_runtime": 4.659,
"eval_samples_per_second": 107.32,
"eval_steps_per_second": 2.146,
"step": 81984
},
{
"epoch": 5.65,
"learning_rate": 0.0008072420552748558,
"loss": 0.0248,
"step": 82716
},
{
"epoch": 5.65,
"eval_loss": 0.06688910722732544,
"eval_max_distance": 130,
"eval_mean_distance": 2,
"eval_runtime": 4.818,
"eval_samples_per_second": 103.777,
"eval_steps_per_second": 2.076,
"step": 82716
},
{
"epoch": 5.7,
"learning_rate": 0.000802189458571468,
"loss": 0.025,
"step": 83448
},
{
"epoch": 5.7,
"eval_loss": 0.06885194033384323,
"eval_max_distance": 137,
"eval_mean_distance": 2,
"eval_runtime": 4.755,
"eval_samples_per_second": 105.152,
"eval_steps_per_second": 2.103,
"step": 83448
},
{
"epoch": 5.75,
"learning_rate": 0.0007971368618680803,
"loss": 0.0252,
"step": 84180
},
{
"epoch": 5.75,
"eval_loss": 0.08273730427026749,
"eval_max_distance": 126,
"eval_mean_distance": 2,
"eval_runtime": 5.2933,
"eval_samples_per_second": 94.46,
"eval_steps_per_second": 1.889,
"step": 84180
},
{
"epoch": 5.8,
"learning_rate": 0.0007920842651646926,
"loss": 0.0265,
"step": 84912
},
{
"epoch": 5.8,
"eval_loss": 0.14016655087471008,
"eval_max_distance": 153,
"eval_mean_distance": 3,
"eval_runtime": 5.7979,
"eval_samples_per_second": 86.238,
"eval_steps_per_second": 1.725,
"step": 84912
},
{
"epoch": 5.85,
"learning_rate": 0.0007870316684613049,
"loss": 0.0281,
"step": 85644
},
{
"epoch": 5.85,
"eval_loss": 0.11726028472185135,
"eval_max_distance": 147,
"eval_mean_distance": 3,
"eval_runtime": 5.5217,
"eval_samples_per_second": 90.552,
"eval_steps_per_second": 1.811,
"step": 85644
},
{
"epoch": 5.9,
"learning_rate": 0.0007819790717579171,
"loss": 0.0309,
"step": 86376
},
{
"epoch": 5.9,
"eval_loss": 0.19342409074306488,
"eval_max_distance": 159,
"eval_mean_distance": 5,
"eval_runtime": 6.4908,
"eval_samples_per_second": 77.032,
"eval_steps_per_second": 1.541,
"step": 86376
},
{
"epoch": 5.95,
"learning_rate": 0.0007769264750545294,
"loss": 0.0353,
"step": 87108
},
{
"epoch": 5.95,
"eval_loss": 0.21934360265731812,
"eval_max_distance": 163,
"eval_mean_distance": 6,
"eval_runtime": 7.0293,
"eval_samples_per_second": 71.131,
"eval_steps_per_second": 1.423,
"step": 87108
},
{
"epoch": 6.0,
"learning_rate": 0.0007718738783511417,
"loss": 0.0567,
"step": 87840
},
{
"epoch": 6.0,
"eval_loss": 0.05241122096776962,
"eval_max_distance": 75,
"eval_mean_distance": 1,
"eval_runtime": 4.3556,
"eval_samples_per_second": 114.796,
"eval_steps_per_second": 2.296,
"step": 87840
},
{
"epoch": 6.05,
"learning_rate": 0.000766821281647754,
"loss": 0.0428,
"step": 88572
},
{
"epoch": 6.05,
"eval_loss": 0.10328952223062515,
"eval_max_distance": 41,
"eval_mean_distance": 1,
"eval_runtime": 3.055,
"eval_samples_per_second": 163.667,
"eval_steps_per_second": 3.273,
"step": 88572
},
{
"epoch": 6.1,
"learning_rate": 0.0007617686849443662,
"loss": 0.0216,
"step": 89304
},
{
"epoch": 6.1,
"eval_loss": 0.13436926901340485,
"eval_max_distance": 64,
"eval_mean_distance": 1,
"eval_runtime": 3.5469,
"eval_samples_per_second": 140.969,
"eval_steps_per_second": 2.819,
"step": 89304
},
{
"epoch": 6.15,
"learning_rate": 0.0007567160882409785,
"loss": 0.0187,
"step": 90036
},
{
"epoch": 6.15,
"eval_loss": 0.08495381474494934,
"eval_max_distance": 64,
"eval_mean_distance": 1,
"eval_runtime": 3.4669,
"eval_samples_per_second": 144.221,
"eval_steps_per_second": 2.884,
"step": 90036
},
{
"epoch": 6.2,
"learning_rate": 0.0007516634915375909,
"loss": 0.0175,
"step": 90768
},
{
"epoch": 6.2,
"eval_loss": 0.05331624671816826,
"eval_max_distance": 65,
"eval_mean_distance": 1,
"eval_runtime": 3.4958,
"eval_samples_per_second": 143.031,
"eval_steps_per_second": 2.861,
"step": 90768
},
{
"epoch": 6.25,
"learning_rate": 0.000746610894834203,
"loss": 0.0171,
"step": 91500
},
{
"epoch": 6.25,
"eval_loss": 0.05643443390727043,
"eval_max_distance": 77,
"eval_mean_distance": 1,
"eval_runtime": 4.007,
"eval_samples_per_second": 124.782,
"eval_steps_per_second": 2.496,
"step": 91500
},
{
"epoch": 6.3,
"learning_rate": 0.0007415582981308153,
"loss": 0.0163,
"step": 92232
},
{
"epoch": 6.3,
"eval_loss": 0.05690987408161163,
"eval_max_distance": 74,
"eval_mean_distance": 1,
"eval_runtime": 3.2524,
"eval_samples_per_second": 153.733,
"eval_steps_per_second": 3.075,
"step": 92232
},
{
"epoch": 6.35,
"learning_rate": 0.0007365057014274276,
"loss": 0.0166,
"step": 92964
},
{
"epoch": 6.35,
"eval_loss": 0.060303423553705215,
"eval_max_distance": 70,
"eval_mean_distance": 1,
"eval_runtime": 3.2886,
"eval_samples_per_second": 152.041,
"eval_steps_per_second": 3.041,
"step": 92964
},
{
"epoch": 6.4,
"learning_rate": 0.00073145310472404,
"loss": 0.0166,
"step": 93696
},
{
"epoch": 6.4,
"eval_loss": 0.045122452080249786,
"eval_max_distance": 90,
"eval_mean_distance": 1,
"eval_runtime": 4.1433,
"eval_samples_per_second": 120.676,
"eval_steps_per_second": 2.414,
"step": 93696
},
{
"epoch": 6.45,
"learning_rate": 0.0007264005080206521,
"loss": 0.0167,
"step": 94428
},
{
"epoch": 6.45,
"eval_loss": 0.045794181525707245,
"eval_max_distance": 117,
"eval_mean_distance": 1,
"eval_runtime": 4.2035,
"eval_samples_per_second": 118.949,
"eval_steps_per_second": 2.379,
"step": 94428
},
{
"epoch": 6.5,
"learning_rate": 0.0007213479113172644,
"loss": 0.0169,
"step": 95160
},
{
"epoch": 6.5,
"eval_loss": 0.06077966466546059,
"eval_max_distance": 94,
"eval_mean_distance": 1,
"eval_runtime": 3.8906,
"eval_samples_per_second": 128.513,
"eval_steps_per_second": 2.57,
"step": 95160
},
{
"epoch": 6.55,
"learning_rate": 0.0007162953146138768,
"loss": 0.0169,
"step": 95892
},
{
"epoch": 6.55,
"eval_loss": 0.061703383922576904,
"eval_max_distance": 106,
"eval_mean_distance": 1,
"eval_runtime": 4.324,
"eval_samples_per_second": 115.634,
"eval_steps_per_second": 2.313,
"step": 95892
},
{
"epoch": 6.6,
"learning_rate": 0.000711242717910489,
"loss": 0.017,
"step": 96624
},
{
"epoch": 6.6,
"eval_loss": 0.06622283160686493,
"eval_max_distance": 130,
"eval_mean_distance": 2,
"eval_runtime": 4.818,
"eval_samples_per_second": 103.777,
"eval_steps_per_second": 2.076,
"step": 96624
},
{
"epoch": 6.65,
"learning_rate": 0.0007061901212071012,
"loss": 0.0175,
"step": 97356
},
{
"epoch": 6.65,
"eval_loss": 0.05441684648394585,
"eval_max_distance": 134,
"eval_mean_distance": 1,
"eval_runtime": 4.626,
"eval_samples_per_second": 108.084,
"eval_steps_per_second": 2.162,
"step": 97356
},
{
"epoch": 6.7,
"learning_rate": 0.0007011375245037135,
"loss": 0.0174,
"step": 98088
},
{
"epoch": 6.7,
"eval_loss": 0.055900994688272476,
"eval_max_distance": 124,
"eval_mean_distance": 1,
"eval_runtime": 4.5758,
"eval_samples_per_second": 109.272,
"eval_steps_per_second": 2.185,
"step": 98088
},
{
"epoch": 6.75,
"learning_rate": 0.0006960849278003259,
"loss": 0.0183,
"step": 98820
},
{
"epoch": 6.75,
"eval_loss": 0.0677228718996048,
"eval_max_distance": 131,
"eval_mean_distance": 2,
"eval_runtime": 4.9539,
"eval_samples_per_second": 100.931,
"eval_steps_per_second": 2.019,
"step": 98820
},
{
"epoch": 6.8,
"learning_rate": 0.0006910323310969381,
"loss": 0.0184,
"step": 99552
},
{
"epoch": 6.8,
"eval_loss": 0.07514572888612747,
"eval_max_distance": 127,
"eval_mean_distance": 2,
"eval_runtime": 5.1829,
"eval_samples_per_second": 96.47,
"eval_steps_per_second": 1.929,
"step": 99552
},
{
"epoch": 6.85,
"learning_rate": 0.0006859797343935503,
"loss": 0.0198,
"step": 100284
},
{
"epoch": 6.85,
"eval_loss": 0.08426449447870255,
"eval_max_distance": 142,
"eval_mean_distance": 2,
"eval_runtime": 5.1188,
"eval_samples_per_second": 97.679,
"eval_steps_per_second": 1.954,
"step": 100284
},
{
"epoch": 6.9,
"learning_rate": 0.0006809271376901627,
"loss": 0.0212,
"step": 101016
},
{
"epoch": 6.9,
"eval_loss": 0.16564741730690002,
"eval_max_distance": 149,
"eval_mean_distance": 4,
"eval_runtime": 5.7331,
"eval_samples_per_second": 87.213,
"eval_steps_per_second": 1.744,
"step": 101016
},
{
"epoch": 6.95,
"learning_rate": 0.0006758745409867749,
"loss": 0.0246,
"step": 101748
},
{
"epoch": 6.95,
"eval_loss": 0.17887726426124573,
"eval_max_distance": 151,
"eval_mean_distance": 5,
"eval_runtime": 6.705,
"eval_samples_per_second": 74.571,
"eval_steps_per_second": 1.491,
"step": 101748
},
{
"epoch": 7.0,
"learning_rate": 0.0006708219442833872,
"loss": 0.0468,
"step": 102480
},
{
"epoch": 7.0,
"eval_loss": 0.08162112534046173,
"eval_max_distance": 58,
"eval_mean_distance": 1,
"eval_runtime": 4.066,
"eval_samples_per_second": 122.971,
"eval_steps_per_second": 2.459,
"step": 102480
},
{
"epoch": 7.05,
"learning_rate": 0.0006657693475799994,
"loss": 0.0334,
"step": 103212
},
{
"epoch": 7.05,
"eval_loss": 0.13462159037590027,
"eval_max_distance": 44,
"eval_mean_distance": 1,
"eval_runtime": 3.4851,
"eval_samples_per_second": 143.47,
"eval_steps_per_second": 2.869,
"step": 103212
},
{
"epoch": 7.1,
"learning_rate": 0.0006607167508766118,
"loss": 0.0171,
"step": 103944
},
{
"epoch": 7.1,
"eval_loss": 0.07761073857545853,
"eval_max_distance": 46,
"eval_mean_distance": 1,
"eval_runtime": 3.2222,
"eval_samples_per_second": 155.174,
"eval_steps_per_second": 3.103,
"step": 103944
},
{
"epoch": 7.15,
"learning_rate": 0.000655664154173224,
"loss": 0.0142,
"step": 104676
},
{
"epoch": 7.15,
"eval_loss": 0.08971526473760605,
"eval_max_distance": 51,
"eval_mean_distance": 1,
"eval_runtime": 3.1513,
"eval_samples_per_second": 158.665,
"eval_steps_per_second": 3.173,
"step": 104676
},
{
"epoch": 7.2,
"learning_rate": 0.0006506115574698363,
"loss": 0.0129,
"step": 105408
},
{
"epoch": 7.2,
"eval_loss": 0.05487065017223358,
"eval_max_distance": 55,
"eval_mean_distance": 1,
"eval_runtime": 3.6564,
"eval_samples_per_second": 136.748,
"eval_steps_per_second": 2.735,
"step": 105408
},
{
"epoch": 7.25,
"learning_rate": 0.0006455589607664486,
"loss": 0.013,
"step": 106140
},
{
"epoch": 7.25,
"eval_loss": 0.05428076535463333,
"eval_max_distance": 66,
"eval_mean_distance": 1,
"eval_runtime": 3.3832,
"eval_samples_per_second": 147.791,
"eval_steps_per_second": 2.956,
"step": 106140
},
{
"epoch": 7.3,
"learning_rate": 0.0006405063640630608,
"loss": 0.0122,
"step": 106872
},
{
"epoch": 7.3,
"eval_loss": 0.05511023849248886,
"eval_max_distance": 80,
"eval_mean_distance": 1,
"eval_runtime": 3.5886,
"eval_samples_per_second": 139.331,
"eval_steps_per_second": 2.787,
"step": 106872
},
{
"epoch": 7.35,
"learning_rate": 0.0006354537673596731,
"loss": 0.0121,
"step": 107604
},
{
"epoch": 7.35,
"eval_loss": 0.07942553609609604,
"eval_max_distance": 70,
"eval_mean_distance": 1,
"eval_runtime": 3.2062,
"eval_samples_per_second": 155.946,
"eval_steps_per_second": 3.119,
"step": 107604
},
{
"epoch": 7.4,
"learning_rate": 0.0006304011706562855,
"loss": 0.0123,
"step": 108336
},
{
"epoch": 7.4,
"eval_loss": 0.04670649766921997,
"eval_max_distance": 76,
"eval_mean_distance": 1,
"eval_runtime": 3.2736,
"eval_samples_per_second": 152.737,
"eval_steps_per_second": 3.055,
"step": 108336
},
{
"epoch": 7.45,
"learning_rate": 0.0006253485739528977,
"loss": 0.0122,
"step": 109068
},
{
"epoch": 7.45,
"eval_loss": 0.04738261550664902,
"eval_max_distance": 73,
"eval_mean_distance": 1,
"eval_runtime": 3.904,
"eval_samples_per_second": 128.074,
"eval_steps_per_second": 2.561,
"step": 109068
},
{
"epoch": 7.5,
"learning_rate": 0.0006202959772495099,
"loss": 0.0123,
"step": 109800
},
{
"epoch": 7.5,
"eval_loss": 0.04781508818268776,
"eval_max_distance": 104,
"eval_mean_distance": 1,
"eval_runtime": 3.681,
"eval_samples_per_second": 135.831,
"eval_steps_per_second": 2.717,
"step": 109800
},
{
"epoch": 7.55,
"learning_rate": 0.0006152433805461222,
"loss": 0.012,
"step": 110532
},
{
"epoch": 7.55,
"eval_loss": 0.05633458122611046,
"eval_max_distance": 112,
"eval_mean_distance": 1,
"eval_runtime": 4.3082,
"eval_samples_per_second": 116.057,
"eval_steps_per_second": 2.321,
"step": 110532
},
{
"epoch": 7.6,
"learning_rate": 0.0006101907838427346,
"loss": 0.0124,
"step": 111264
},
{
"epoch": 7.6,
"eval_loss": 0.04918990656733513,
"eval_max_distance": 115,
"eval_mean_distance": 1,
"eval_runtime": 4.1629,
"eval_samples_per_second": 120.11,
"eval_steps_per_second": 2.402,
"step": 111264
},
{
"epoch": 7.65,
"learning_rate": 0.0006051381871393467,
"loss": 0.0129,
"step": 111996
},
{
"epoch": 7.65,
"eval_loss": 0.05522555857896805,
"eval_max_distance": 86,
"eval_mean_distance": 1,
"eval_runtime": 4.1427,
"eval_samples_per_second": 120.694,
"eval_steps_per_second": 2.414,
"step": 111996
},
{
"epoch": 7.7,
"learning_rate": 0.000600085590435959,
"loss": 0.0129,
"step": 112728
},
{
"epoch": 7.7,
"eval_loss": 0.05691719055175781,
"eval_max_distance": 130,
"eval_mean_distance": 1,
"eval_runtime": 4.176,
"eval_samples_per_second": 119.731,
"eval_steps_per_second": 2.395,
"step": 112728
},
{
"epoch": 7.75,
"learning_rate": 0.0005950329937325714,
"loss": 0.0128,
"step": 113460
},
{
"epoch": 7.75,
"eval_loss": 0.07056962698698044,
"eval_max_distance": 106,
"eval_mean_distance": 2,
"eval_runtime": 4.6735,
"eval_samples_per_second": 106.987,
"eval_steps_per_second": 2.14,
"step": 113460
},
{
"epoch": 7.8,
"learning_rate": 0.0005899803970291835,
"loss": 0.0133,
"step": 114192
},
{
"epoch": 7.8,
"eval_loss": 0.0818600282073021,
"eval_max_distance": 145,
"eval_mean_distance": 2,
"eval_runtime": 4.9214,
"eval_samples_per_second": 101.596,
"eval_steps_per_second": 2.032,
"step": 114192
},
{
"epoch": 7.85,
"learning_rate": 0.0005849278003257958,
"loss": 0.0136,
"step": 114924
},
{
"epoch": 7.85,
"eval_loss": 0.10967979580163956,
"eval_max_distance": 141,
"eval_mean_distance": 3,
"eval_runtime": 5.3911,
"eval_samples_per_second": 92.745,
"eval_steps_per_second": 1.855,
"step": 114924
},
{
"epoch": 7.9,
"learning_rate": 0.0005798752036224081,
"loss": 0.015,
"step": 115656
},
{
"epoch": 7.9,
"eval_loss": 0.1418592780828476,
"eval_max_distance": 162,
"eval_mean_distance": 4,
"eval_runtime": 5.9144,
"eval_samples_per_second": 84.54,
"eval_steps_per_second": 1.691,
"step": 115656
},
{
"epoch": 7.95,
"learning_rate": 0.0005748226069190205,
"loss": 0.0174,
"step": 116388
},
{
"epoch": 7.95,
"eval_loss": 0.1534065157175064,
"eval_max_distance": 154,
"eval_mean_distance": 4,
"eval_runtime": 5.9441,
"eval_samples_per_second": 84.117,
"eval_steps_per_second": 1.682,
"step": 116388
},
{
"epoch": 8.0,
"learning_rate": 0.0005697700102156326,
"loss": 0.0404,
"step": 117120
},
{
"epoch": 8.0,
"eval_loss": 0.06091161444783211,
"eval_max_distance": 62,
"eval_mean_distance": 1,
"eval_runtime": 3.5301,
"eval_samples_per_second": 141.639,
"eval_steps_per_second": 2.833,
"step": 117120
},
{
"epoch": 8.05,
"learning_rate": 0.0005647174135122449,
"loss": 0.0268,
"step": 117852
},
{
"epoch": 8.05,
"eval_loss": 0.10339893400669098,
"eval_max_distance": 38,
"eval_mean_distance": 1,
"eval_runtime": 3.087,
"eval_samples_per_second": 161.969,
"eval_steps_per_second": 3.239,
"step": 117852
},
{
"epoch": 8.1,
"learning_rate": 0.0005596648168088573,
"loss": 0.0137,
"step": 118584
},
{
"epoch": 8.1,
"eval_loss": 0.07406046241521835,
"eval_max_distance": 45,
"eval_mean_distance": 1,
"eval_runtime": 3.1741,
"eval_samples_per_second": 157.523,
"eval_steps_per_second": 3.15,
"step": 118584
},
{
"epoch": 8.15,
"learning_rate": 0.0005546122201054696,
"loss": 0.011,
"step": 119316
},
{
"epoch": 8.15,
"eval_loss": 0.08871261775493622,
"eval_max_distance": 66,
"eval_mean_distance": 1,
"eval_runtime": 3.292,
"eval_samples_per_second": 151.884,
"eval_steps_per_second": 3.038,
"step": 119316
},
{
"epoch": 8.2,
"learning_rate": 0.0005495596234020817,
"loss": 0.0099,
"step": 120048
},
{
"epoch": 8.2,
"eval_loss": 0.05957801640033722,
"eval_max_distance": 59,
"eval_mean_distance": 1,
"eval_runtime": 3.2017,
"eval_samples_per_second": 156.166,
"eval_steps_per_second": 3.123,
"step": 120048
},
{
"epoch": 8.25,
"learning_rate": 0.0005445070266986941,
"loss": 0.0096,
"step": 120780
},
{
"epoch": 8.25,
"eval_loss": 0.0444614440202713,
"eval_max_distance": 62,
"eval_mean_distance": 0,
"eval_runtime": 2.9626,
"eval_samples_per_second": 168.773,
"eval_steps_per_second": 3.375,
"step": 120780
},
{
"epoch": 8.3,
"learning_rate": 0.0005394544299953064,
"loss": 0.0091,
"step": 121512
},
{
"epoch": 8.3,
"eval_loss": 0.04867273196578026,
"eval_max_distance": 62,
"eval_mean_distance": 1,
"eval_runtime": 3.2803,
"eval_samples_per_second": 152.425,
"eval_steps_per_second": 3.049,
"step": 121512
},
{
"epoch": 8.35,
"learning_rate": 0.0005344018332919186,
"loss": 0.0088,
"step": 122244
},
{
"epoch": 8.35,
"eval_loss": 0.03973795846104622,
"eval_max_distance": 73,
"eval_mean_distance": 0,
"eval_runtime": 3.6655,
"eval_samples_per_second": 136.408,
"eval_steps_per_second": 2.728,
"step": 122244
},
{
"epoch": 8.4,
"learning_rate": 0.0005293492365885308,
"loss": 0.009,
"step": 122976
},
{
"epoch": 8.4,
"eval_loss": 0.03982974588871002,
"eval_max_distance": 77,
"eval_mean_distance": 0,
"eval_runtime": 3.8429,
"eval_samples_per_second": 130.109,
"eval_steps_per_second": 2.602,
"step": 122976
},
{
"epoch": 8.45,
"learning_rate": 0.0005242966398851432,
"loss": 0.009,
"step": 123708
},
{
"epoch": 8.45,
"eval_loss": 0.04630805924534798,
"eval_max_distance": 57,
"eval_mean_distance": 1,
"eval_runtime": 3.7474,
"eval_samples_per_second": 133.426,
"eval_steps_per_second": 2.669,
"step": 123708
},
{
"epoch": 8.5,
"learning_rate": 0.0005192440431817555,
"loss": 0.0091,
"step": 124440
},
{
"epoch": 8.5,
"eval_loss": 0.0445212796330452,
"eval_max_distance": 100,
"eval_mean_distance": 1,
"eval_runtime": 3.8615,
"eval_samples_per_second": 129.483,
"eval_steps_per_second": 2.59,
"step": 124440
},
{
"epoch": 8.55,
"learning_rate": 0.0005141914464783677,
"loss": 0.0085,
"step": 125172
},
{
"epoch": 8.55,
"eval_loss": 0.045358140021562576,
"eval_max_distance": 93,
"eval_mean_distance": 1,
"eval_runtime": 3.689,
"eval_samples_per_second": 135.538,
"eval_steps_per_second": 2.711,
"step": 125172
},
{
"epoch": 8.6,
"learning_rate": 0.00050913884977498,
"loss": 0.0089,
"step": 125904
},
{
"epoch": 8.6,
"eval_loss": 0.05022185668349266,
"eval_max_distance": 108,
"eval_mean_distance": 1,
"eval_runtime": 4.1555,
"eval_samples_per_second": 120.323,
"eval_steps_per_second": 2.406,
"step": 125904
},
{
"epoch": 8.65,
"learning_rate": 0.0005040862530715923,
"loss": 0.0092,
"step": 126636
},
{
"epoch": 8.65,
"eval_loss": 0.04238352179527283,
"eval_max_distance": 94,
"eval_mean_distance": 1,
"eval_runtime": 4.121,
"eval_samples_per_second": 121.33,
"eval_steps_per_second": 2.427,
"step": 126636
},
{
"epoch": 8.7,
"learning_rate": 0.0004990336563682045,
"loss": 0.009,
"step": 127368
},
{
"epoch": 8.7,
"eval_loss": 0.05611519515514374,
"eval_max_distance": 86,
"eval_mean_distance": 1,
"eval_runtime": 4.224,
"eval_samples_per_second": 118.372,
"eval_steps_per_second": 2.367,
"step": 127368
},
{
"epoch": 8.75,
"learning_rate": 0.0004939810596648168,
"loss": 0.009,
"step": 128100
},
{
"epoch": 8.75,
"eval_loss": 0.057163383811712265,
"eval_max_distance": 94,
"eval_mean_distance": 1,
"eval_runtime": 4.9408,
"eval_samples_per_second": 101.198,
"eval_steps_per_second": 2.024,
"step": 128100
},
{
"epoch": 8.8,
"learning_rate": 0.0004889284629614291,
"loss": 0.0092,
"step": 128832
},
{
"epoch": 8.8,
"eval_loss": 0.06296004354953766,
"eval_max_distance": 113,
"eval_mean_distance": 1,
"eval_runtime": 4.632,
"eval_samples_per_second": 107.945,
"eval_steps_per_second": 2.159,
"step": 128832
},
{
"epoch": 8.85,
"learning_rate": 0.00048387586625804136,
"loss": 0.0096,
"step": 129564
},
{
"epoch": 8.85,
"eval_loss": 0.08643154799938202,
"eval_max_distance": 127,
"eval_mean_distance": 2,
"eval_runtime": 5.004,
"eval_samples_per_second": 99.921,
"eval_steps_per_second": 1.998,
"step": 129564
},
{
"epoch": 8.9,
"learning_rate": 0.0004788232695546537,
"loss": 0.0106,
"step": 130296
},
{
"epoch": 8.9,
"eval_loss": 0.09613845497369766,
"eval_max_distance": 126,
"eval_mean_distance": 2,
"eval_runtime": 4.952,
"eval_samples_per_second": 100.969,
"eval_steps_per_second": 2.019,
"step": 130296
},
{
"epoch": 8.95,
"learning_rate": 0.0004737706728512659,
"loss": 0.012,
"step": 131028
},
{
"epoch": 8.95,
"eval_loss": 0.11241430044174194,
"eval_max_distance": 124,
"eval_mean_distance": 3,
"eval_runtime": 5.5085,
"eval_samples_per_second": 90.769,
"eval_steps_per_second": 1.815,
"step": 131028
},
{
"epoch": 9.0,
"learning_rate": 0.0004687180761478782,
"loss": 0.0334,
"step": 131760
},
{
"epoch": 9.0,
"eval_loss": 0.07378821820020676,
"eval_max_distance": 44,
"eval_mean_distance": 1,
"eval_runtime": 3.74,
"eval_samples_per_second": 133.69,
"eval_steps_per_second": 2.674,
"step": 131760
},
{
"epoch": 9.05,
"learning_rate": 0.00046366547944449044,
"loss": 0.0223,
"step": 132492
},
{
"epoch": 9.05,
"eval_loss": 0.14633670449256897,
"eval_max_distance": 37,
"eval_mean_distance": 1,
"eval_runtime": 3.2404,
"eval_samples_per_second": 154.3,
"eval_steps_per_second": 3.086,
"step": 132492
},
{
"epoch": 9.1,
"learning_rate": 0.00045861288274110277,
"loss": 0.0112,
"step": 133224
},
{
"epoch": 9.1,
"eval_loss": 0.09432947635650635,
"eval_max_distance": 37,
"eval_mean_distance": 1,
"eval_runtime": 2.907,
"eval_samples_per_second": 171.999,
"eval_steps_per_second": 3.44,
"step": 133224
},
{
"epoch": 9.15,
"learning_rate": 0.00045356028603771504,
"loss": 0.0086,
"step": 133956
},
{
"epoch": 9.15,
"eval_loss": 0.08595520257949829,
"eval_max_distance": 45,
"eval_mean_distance": 1,
"eval_runtime": 2.9948,
"eval_samples_per_second": 166.958,
"eval_steps_per_second": 3.339,
"step": 133956
},
{
"epoch": 9.2,
"learning_rate": 0.0004485076893343273,
"loss": 0.0074,
"step": 134688
},
{
"epoch": 9.2,
"eval_loss": 0.08134690672159195,
"eval_max_distance": 56,
"eval_mean_distance": 1,
"eval_runtime": 3.541,
"eval_samples_per_second": 141.203,
"eval_steps_per_second": 2.824,
"step": 134688
},
{
"epoch": 9.25,
"learning_rate": 0.0004434550926309396,
"loss": 0.0068,
"step": 135420
},
{
"epoch": 9.25,
"eval_loss": 0.07917257398366928,
"eval_max_distance": 55,
"eval_mean_distance": 1,
"eval_runtime": 2.7556,
"eval_samples_per_second": 181.448,
"eval_steps_per_second": 3.629,
"step": 135420
},
{
"epoch": 9.3,
"learning_rate": 0.0004384024959275518,
"loss": 0.0064,
"step": 136152
},
{
"epoch": 9.3,
"eval_loss": 0.0820038914680481,
"eval_max_distance": 45,
"eval_mean_distance": 1,
"eval_runtime": 2.9383,
"eval_samples_per_second": 170.165,
"eval_steps_per_second": 3.403,
"step": 136152
},
{
"epoch": 9.35,
"learning_rate": 0.0004333498992241641,
"loss": 0.0063,
"step": 136884
},
{
"epoch": 9.35,
"eval_loss": 0.05062669888138771,
"eval_max_distance": 84,
"eval_mean_distance": 0,
"eval_runtime": 2.894,
"eval_samples_per_second": 172.769,
"eval_steps_per_second": 3.455,
"step": 136884
},
{
"epoch": 9.4,
"learning_rate": 0.0004282973025207764,
"loss": 0.0062,
"step": 137616
},
{
"epoch": 9.4,
"eval_loss": 0.046124935150146484,
"eval_max_distance": 57,
"eval_mean_distance": 0,
"eval_runtime": 3.247,
"eval_samples_per_second": 153.989,
"eval_steps_per_second": 3.08,
"step": 137616
},
{
"epoch": 9.45,
"learning_rate": 0.00042324470581738867,
"loss": 0.0062,
"step": 138348
},
{
"epoch": 9.45,
"eval_loss": 0.033984892070293427,
"eval_max_distance": 48,
"eval_mean_distance": 0,
"eval_runtime": 3.394,
"eval_samples_per_second": 147.318,
"eval_steps_per_second": 2.946,
"step": 138348
},
{
"epoch": 9.5,
"learning_rate": 0.00041819210911400094,
"loss": 0.0063,
"step": 139080
},
{
"epoch": 9.5,
"eval_loss": 0.038424424827098846,
"eval_max_distance": 56,
"eval_mean_distance": 0,
"eval_runtime": 3.5795,
"eval_samples_per_second": 139.684,
"eval_steps_per_second": 2.794,
"step": 139080
},
{
"epoch": 9.55,
"learning_rate": 0.0004131395124106132,
"loss": 0.0059,
"step": 139812
},
{
"epoch": 9.55,
"eval_loss": 0.03822114318609238,
"eval_max_distance": 88,
"eval_mean_distance": 0,
"eval_runtime": 3.5907,
"eval_samples_per_second": 139.248,
"eval_steps_per_second": 2.785,
"step": 139812
},
{
"epoch": 9.6,
"learning_rate": 0.0004080869157072255,
"loss": 0.0063,
"step": 140544
},
{
"epoch": 9.6,
"eval_loss": 0.04504471272230148,
"eval_max_distance": 108,
"eval_mean_distance": 1,
"eval_runtime": 3.715,
"eval_samples_per_second": 134.588,
"eval_steps_per_second": 2.692,
"step": 140544
},
{
"epoch": 9.65,
"learning_rate": 0.00040303431900383775,
"loss": 0.0063,
"step": 141276
},
{
"epoch": 9.65,
"eval_loss": 0.04976603761315346,
"eval_max_distance": 109,
"eval_mean_distance": 1,
"eval_runtime": 3.9331,
"eval_samples_per_second": 127.125,
"eval_steps_per_second": 2.543,
"step": 141276
},
{
"epoch": 9.7,
"learning_rate": 0.00039798172230045,
"loss": 0.0064,
"step": 142008
},
{
"epoch": 9.7,
"eval_loss": 0.05060645565390587,
"eval_max_distance": 111,
"eval_mean_distance": 1,
"eval_runtime": 4.1192,
"eval_samples_per_second": 121.383,
"eval_steps_per_second": 2.428,
"step": 142008
},
{
"epoch": 9.75,
"learning_rate": 0.00039292912559706235,
"loss": 0.0062,
"step": 142740
},
{
"epoch": 9.75,
"eval_loss": 0.05505865439772606,
"eval_max_distance": 109,
"eval_mean_distance": 1,
"eval_runtime": 4.3878,
"eval_samples_per_second": 113.953,
"eval_steps_per_second": 2.279,
"step": 142740
},
{
"epoch": 9.8,
"learning_rate": 0.00038787652889367457,
"loss": 0.0061,
"step": 143472
},
{
"epoch": 9.8,
"eval_loss": 0.0596136748790741,
"eval_max_distance": 142,
"eval_mean_distance": 1,
"eval_runtime": 4.395,
"eval_samples_per_second": 113.765,
"eval_steps_per_second": 2.275,
"step": 143472
},
{
"epoch": 9.85,
"learning_rate": 0.0003828239321902869,
"loss": 0.0062,
"step": 144204
},
{
"epoch": 9.85,
"eval_loss": 0.07242786139249802,
"eval_max_distance": 135,
"eval_mean_distance": 2,
"eval_runtime": 4.5735,
"eval_samples_per_second": 109.325,
"eval_steps_per_second": 2.187,
"step": 144204
},
{
"epoch": 9.9,
"learning_rate": 0.0003777713354868991,
"loss": 0.0069,
"step": 144936
},
{
"epoch": 9.9,
"eval_loss": 0.09855272620916367,
"eval_max_distance": 151,
"eval_mean_distance": 2,
"eval_runtime": 4.9915,
"eval_samples_per_second": 100.17,
"eval_steps_per_second": 2.003,
"step": 144936
},
{
"epoch": 9.95,
"learning_rate": 0.00037271873878351144,
"loss": 0.0078,
"step": 145668
},
{
"epoch": 9.95,
"eval_loss": 0.10503390431404114,
"eval_max_distance": 133,
"eval_mean_distance": 2,
"eval_runtime": 5.2264,
"eval_samples_per_second": 95.668,
"eval_steps_per_second": 1.913,
"step": 145668
},
{
"epoch": 10.0,
"learning_rate": 2.5008541168431845e-06,
"loss": 0.1557,
"step": 146400
},
{
"epoch": 10.0,
"eval_loss": 0.0961650088429451,
"eval_max_distance": 130,
"eval_mean_distance": 2,
"eval_runtime": 5.211,
"eval_samples_per_second": 95.952,
"eval_steps_per_second": 1.919,
"step": 146400
},
{
"epoch": 10.1,
"learning_rate": 7.502562350529553e-06,
"loss": 0.1283,
"step": 147864
},
{
"epoch": 10.1,
"eval_loss": 0.03514343872666359,
"eval_max_distance": 71,
"eval_mean_distance": 1,
"eval_runtime": 2.8424,
"eval_samples_per_second": 175.908,
"eval_steps_per_second": 3.518,
"step": 147864
},
{
"epoch": 10.2,
"learning_rate": 9.974702591517603e-06,
"loss": 0.0159,
"step": 149328
},
{
"epoch": 10.2,
"eval_loss": 0.03262121602892876,
"eval_max_distance": 70,
"eval_mean_distance": 0,
"eval_runtime": 2.5617,
"eval_samples_per_second": 195.18,
"eval_steps_per_second": 3.904,
"step": 149328
},
{
"epoch": 10.3,
"learning_rate": 9.924176798859719e-06,
"loss": 0.0088,
"step": 150792
},
{
"epoch": 10.3,
"eval_loss": 0.03363807499408722,
"eval_max_distance": 72,
"eval_mean_distance": 0,
"eval_runtime": 2.5129,
"eval_samples_per_second": 198.972,
"eval_steps_per_second": 3.979,
"step": 150792
},
{
"epoch": 10.4,
"learning_rate": 9.873651006201834e-06,
"loss": 0.0063,
"step": 152256
},
{
"epoch": 10.4,
"eval_loss": 0.031921450048685074,
"eval_max_distance": 76,
"eval_mean_distance": 0,
"eval_runtime": 2.5172,
"eval_samples_per_second": 198.633,
"eval_steps_per_second": 3.973,
"step": 152256
},
{
"epoch": 10.5,
"learning_rate": 9.823125213543951e-06,
"loss": 0.0053,
"step": 153720
},
{
"epoch": 10.5,
"eval_loss": 0.031937919557094574,
"eval_max_distance": 76,
"eval_mean_distance": 0,
"eval_runtime": 2.5246,
"eval_samples_per_second": 198.048,
"eval_steps_per_second": 3.961,
"step": 153720
},
{
"epoch": 10.6,
"learning_rate": 9.772599420886064e-06,
"loss": 0.0043,
"step": 155184
},
{
"epoch": 10.6,
"eval_loss": 0.03107617422938347,
"eval_max_distance": 76,
"eval_mean_distance": 0,
"eval_runtime": 2.7282,
"eval_samples_per_second": 183.269,
"eval_steps_per_second": 3.665,
"step": 155184
},
{
"epoch": 10.7,
"learning_rate": 9.722073628228181e-06,
"loss": 0.0038,
"step": 156648
},
{
"epoch": 10.7,
"eval_loss": 0.033566124737262726,
"eval_max_distance": 80,
"eval_mean_distance": 0,
"eval_runtime": 2.9105,
"eval_samples_per_second": 171.789,
"eval_steps_per_second": 3.436,
"step": 156648
},
{
"epoch": 10.8,
"learning_rate": 9.671547835570297e-06,
"loss": 0.0031,
"step": 158112
},
{
"epoch": 10.8,
"eval_loss": 0.03568200394511223,
"eval_max_distance": 97,
"eval_mean_distance": 1,
"eval_runtime": 2.9978,
"eval_samples_per_second": 166.787,
"eval_steps_per_second": 3.336,
"step": 158112
},
{
"epoch": 10.9,
"learning_rate": 9.621022042912412e-06,
"loss": 0.0025,
"step": 159576
},
{
"epoch": 10.9,
"eval_loss": 0.03964918479323387,
"eval_max_distance": 105,
"eval_mean_distance": 1,
"eval_runtime": 3.3639,
"eval_samples_per_second": 148.637,
"eval_steps_per_second": 2.973,
"step": 159576
},
{
"epoch": 11.0,
"learning_rate": 9.570496250254529e-06,
"loss": 0.0257,
"step": 161040
},
{
"epoch": 11.0,
"eval_loss": 0.030957885086536407,
"eval_max_distance": 75,
"eval_mean_distance": 0,
"eval_runtime": 2.9202,
"eval_samples_per_second": 171.221,
"eval_steps_per_second": 3.424,
"step": 161040
},
{
"epoch": 11.1,
"learning_rate": 9.519970457596643e-06,
"loss": 0.0239,
"step": 162504
},
{
"epoch": 11.1,
"eval_loss": 0.020240401849150658,
"eval_max_distance": 67,
"eval_mean_distance": 0,
"eval_runtime": 2.4054,
"eval_samples_per_second": 207.863,
"eval_steps_per_second": 4.157,
"step": 162504
},
{
"epoch": 11.2,
"learning_rate": 9.46944466493876e-06,
"loss": 0.0077,
"step": 163968
},
{
"epoch": 11.2,
"eval_loss": 0.02426925301551819,
"eval_max_distance": 67,
"eval_mean_distance": 0,
"eval_runtime": 2.2824,
"eval_samples_per_second": 219.066,
"eval_steps_per_second": 4.381,
"step": 163968
},
{
"epoch": 11.3,
"learning_rate": 9.418918872280875e-06,
"loss": 0.0052,
"step": 165432
},
{
"epoch": 11.3,
"eval_loss": 0.021467674523591995,
"eval_max_distance": 60,
"eval_mean_distance": 0,
"eval_runtime": 2.3057,
"eval_samples_per_second": 216.857,
"eval_steps_per_second": 4.337,
"step": 165432
},
{
"epoch": 11.4,
"learning_rate": 9.36839307962299e-06,
"loss": 0.004,
"step": 166896
},
{
"epoch": 11.4,
"eval_loss": 0.02434716746211052,
"eval_max_distance": 60,
"eval_mean_distance": 0,
"eval_runtime": 2.2997,
"eval_samples_per_second": 217.424,
"eval_steps_per_second": 4.348,
"step": 166896
},
{
"epoch": 11.5,
"learning_rate": 9.317867286965105e-06,
"loss": 0.0035,
"step": 168360
},
{
"epoch": 11.5,
"eval_loss": 0.02479979395866394,
"eval_max_distance": 57,
"eval_mean_distance": 0,
"eval_runtime": 2.2201,
"eval_samples_per_second": 225.211,
"eval_steps_per_second": 4.504,
"step": 168360
},
{
"epoch": 11.6,
"learning_rate": 9.26734149430722e-06,
"loss": 0.003,
"step": 169824
},
{
"epoch": 11.6,
"eval_loss": 0.0237094946205616,
"eval_max_distance": 65,
"eval_mean_distance": 0,
"eval_runtime": 2.156,
"eval_samples_per_second": 231.91,
"eval_steps_per_second": 4.638,
"step": 169824
},
{
"epoch": 11.7,
"learning_rate": 9.216815701649336e-06,
"loss": 0.0028,
"step": 171288
},
{
"epoch": 11.7,
"eval_loss": 0.026788845658302307,
"eval_max_distance": 76,
"eval_mean_distance": 0,
"eval_runtime": 2.4004,
"eval_samples_per_second": 208.302,
"eval_steps_per_second": 4.166,
"step": 171288
},
{
"epoch": 11.8,
"learning_rate": 9.166289908991453e-06,
"loss": 0.0023,
"step": 172752
},
{
"epoch": 11.8,
"eval_loss": 0.028016921132802963,
"eval_max_distance": 70,
"eval_mean_distance": 0,
"eval_runtime": 2.865,
"eval_samples_per_second": 174.518,
"eval_steps_per_second": 3.49,
"step": 172752
},
{
"epoch": 11.9,
"learning_rate": 9.115764116333568e-06,
"loss": 0.0019,
"step": 174216
},
{
"epoch": 11.9,
"eval_loss": 0.029591867700219154,
"eval_max_distance": 74,
"eval_mean_distance": 0,
"eval_runtime": 3.5756,
"eval_samples_per_second": 139.837,
"eval_steps_per_second": 2.797,
"step": 174216
},
{
"epoch": 12.0,
"learning_rate": 9.065238323675684e-06,
"loss": 0.0191,
"step": 175680
},
{
"epoch": 12.0,
"eval_loss": 0.0253616813570261,
"eval_max_distance": 70,
"eval_mean_distance": 0,
"eval_runtime": 3.4599,
"eval_samples_per_second": 144.511,
"eval_steps_per_second": 2.89,
"step": 175680
},
{
"epoch": 12.1,
"learning_rate": 9.014712531017799e-06,
"loss": 0.0184,
"step": 177144
},
{
"epoch": 12.1,
"eval_loss": 0.019834740087389946,
"eval_max_distance": 49,
"eval_mean_distance": 0,
"eval_runtime": 2.8168,
"eval_samples_per_second": 177.505,
"eval_steps_per_second": 3.55,
"step": 177144
},
{
"epoch": 12.21,
"learning_rate": 8.964186738359914e-06,
"loss": 0.0064,
"step": 178608
},
{
"epoch": 12.21,
"eval_loss": 0.02111968584358692,
"eval_max_distance": 51,
"eval_mean_distance": 0,
"eval_runtime": 2.795,
"eval_samples_per_second": 178.891,
"eval_steps_per_second": 3.578,
"step": 178608
},
{
"epoch": 12.31,
"learning_rate": 8.91366094570203e-06,
"loss": 0.0043,
"step": 180072
},
{
"epoch": 12.31,
"eval_loss": 0.019950907677412033,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 2.114,
"eval_samples_per_second": 236.522,
"eval_steps_per_second": 4.73,
"step": 180072
},
{
"epoch": 12.41,
"learning_rate": 8.863135153044146e-06,
"loss": 0.0033,
"step": 181536
},
{
"epoch": 12.41,
"eval_loss": 0.022810520604252815,
"eval_max_distance": 47,
"eval_mean_distance": 0,
"eval_runtime": 2.7861,
"eval_samples_per_second": 179.465,
"eval_steps_per_second": 3.589,
"step": 181536
},
{
"epoch": 12.51,
"learning_rate": 8.81260936038626e-06,
"loss": 0.0029,
"step": 183000
},
{
"epoch": 12.51,
"eval_loss": 0.022028256207704544,
"eval_max_distance": 47,
"eval_mean_distance": 0,
"eval_runtime": 2.4,
"eval_samples_per_second": 208.337,
"eval_steps_per_second": 4.167,
"step": 183000
},
{
"epoch": 12.61,
"learning_rate": 8.762083567728377e-06,
"loss": 0.0025,
"step": 184464
},
{
"epoch": 12.61,
"eval_loss": 0.021184444427490234,
"eval_max_distance": 51,
"eval_mean_distance": 0,
"eval_runtime": 2.6483,
"eval_samples_per_second": 188.797,
"eval_steps_per_second": 3.776,
"step": 184464
},
{
"epoch": 12.71,
"learning_rate": 8.711557775070492e-06,
"loss": 0.0024,
"step": 185928
},
{
"epoch": 12.71,
"eval_loss": 0.023559901863336563,
"eval_max_distance": 56,
"eval_mean_distance": 0,
"eval_runtime": 2.7031,
"eval_samples_per_second": 184.976,
"eval_steps_per_second": 3.7,
"step": 185928
},
{
"epoch": 12.81,
"learning_rate": 8.661031982412607e-06,
"loss": 0.002,
"step": 187392
},
{
"epoch": 12.81,
"eval_loss": 0.023327892646193504,
"eval_max_distance": 55,
"eval_mean_distance": 0,
"eval_runtime": 2.5819,
"eval_samples_per_second": 193.657,
"eval_steps_per_second": 3.873,
"step": 187392
},
{
"epoch": 12.91,
"learning_rate": 8.610506189754723e-06,
"loss": 0.0016,
"step": 188856
},
{
"epoch": 12.91,
"eval_loss": 0.027195889502763748,
"eval_max_distance": 61,
"eval_mean_distance": 0,
"eval_runtime": 3.1865,
"eval_samples_per_second": 156.911,
"eval_steps_per_second": 3.138,
"step": 188856
},
{
"epoch": 13.01,
"learning_rate": 8.559980397096838e-06,
"loss": 0.0156,
"step": 190320
},
{
"epoch": 13.01,
"eval_loss": 0.022682741284370422,
"eval_max_distance": 57,
"eval_mean_distance": 0,
"eval_runtime": 2.6784,
"eval_samples_per_second": 186.677,
"eval_steps_per_second": 3.734,
"step": 190320
},
{
"epoch": 13.11,
"learning_rate": 8.509454604438953e-06,
"loss": 0.0159,
"step": 191784
},
{
"epoch": 13.11,
"eval_loss": 0.018665272742509842,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.861,
"eval_samples_per_second": 174.766,
"eval_steps_per_second": 3.495,
"step": 191784
},
{
"epoch": 13.21,
"learning_rate": 8.45892881178107e-06,
"loss": 0.0057,
"step": 193248
},
{
"epoch": 13.21,
"eval_loss": 0.018314659595489502,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.753,
"eval_samples_per_second": 181.621,
"eval_steps_per_second": 3.632,
"step": 193248
},
{
"epoch": 13.31,
"learning_rate": 8.408403019123184e-06,
"loss": 0.0038,
"step": 194712
},
{
"epoch": 13.31,
"eval_loss": 0.021940866485238075,
"eval_max_distance": 40,
"eval_mean_distance": 0,
"eval_runtime": 2.2402,
"eval_samples_per_second": 223.198,
"eval_steps_per_second": 4.464,
"step": 194712
},
{
"epoch": 13.41,
"learning_rate": 8.357877226465301e-06,
"loss": 0.0029,
"step": 196176
},
{
"epoch": 13.41,
"eval_loss": 0.019176060333848,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 2.2189,
"eval_samples_per_second": 225.341,
"eval_steps_per_second": 4.507,
"step": 196176
},
{
"epoch": 13.51,
"learning_rate": 8.307351433807416e-06,
"loss": 0.0026,
"step": 197640
},
{
"epoch": 13.51,
"eval_loss": 0.019291119650006294,
"eval_max_distance": 46,
"eval_mean_distance": 0,
"eval_runtime": 2.1239,
"eval_samples_per_second": 235.419,
"eval_steps_per_second": 4.708,
"step": 197640
},
{
"epoch": 13.61,
"learning_rate": 8.256825641149531e-06,
"loss": 0.0022,
"step": 199104
},
{
"epoch": 13.61,
"eval_loss": 0.019858654588460922,
"eval_max_distance": 46,
"eval_mean_distance": 0,
"eval_runtime": 2.6694,
"eval_samples_per_second": 187.308,
"eval_steps_per_second": 3.746,
"step": 199104
},
{
"epoch": 13.71,
"learning_rate": 8.206299848491647e-06,
"loss": 0.0021,
"step": 200568
},
{
"epoch": 13.71,
"eval_loss": 0.020177775993943214,
"eval_max_distance": 52,
"eval_mean_distance": 0,
"eval_runtime": 2.5426,
"eval_samples_per_second": 196.646,
"eval_steps_per_second": 3.933,
"step": 200568
},
{
"epoch": 13.81,
"learning_rate": 8.155774055833762e-06,
"loss": 0.0018,
"step": 202032
},
{
"epoch": 13.81,
"eval_loss": 0.02495921589434147,
"eval_max_distance": 52,
"eval_mean_distance": 0,
"eval_runtime": 2.252,
"eval_samples_per_second": 222.025,
"eval_steps_per_second": 4.44,
"step": 202032
},
{
"epoch": 13.91,
"learning_rate": 8.105248263175877e-06,
"loss": 0.0015,
"step": 203496
},
{
"epoch": 13.91,
"eval_loss": 0.022407229989767075,
"eval_max_distance": 56,
"eval_mean_distance": 0,
"eval_runtime": 2.368,
"eval_samples_per_second": 211.149,
"eval_steps_per_second": 4.223,
"step": 203496
},
{
"epoch": 14.01,
"learning_rate": 8.054722470517994e-06,
"loss": 0.014,
"step": 204960
},
{
"epoch": 14.01,
"eval_loss": 0.0217585526406765,
"eval_max_distance": 53,
"eval_mean_distance": 0,
"eval_runtime": 2.764,
"eval_samples_per_second": 180.895,
"eval_steps_per_second": 3.618,
"step": 204960
},
{
"epoch": 14.11,
"learning_rate": 8.00419667786011e-06,
"loss": 0.0145,
"step": 206424
},
{
"epoch": 14.11,
"eval_loss": 0.019289566203951836,
"eval_max_distance": 43,
"eval_mean_distance": 0,
"eval_runtime": 2.1796,
"eval_samples_per_second": 229.403,
"eval_steps_per_second": 4.588,
"step": 206424
},
{
"epoch": 14.21,
"learning_rate": 7.953670885202225e-06,
"loss": 0.0052,
"step": 207888
},
{
"epoch": 14.21,
"eval_loss": 0.018878955394029617,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.1918,
"eval_samples_per_second": 228.124,
"eval_steps_per_second": 4.562,
"step": 207888
},
{
"epoch": 14.31,
"learning_rate": 7.90314509254434e-06,
"loss": 0.0035,
"step": 209352
},
{
"epoch": 14.31,
"eval_loss": 0.018222585320472717,
"eval_max_distance": 40,
"eval_mean_distance": 0,
"eval_runtime": 2.1312,
"eval_samples_per_second": 234.605,
"eval_steps_per_second": 4.692,
"step": 209352
},
{
"epoch": 14.41,
"learning_rate": 7.852619299886455e-06,
"loss": 0.0026,
"step": 210816
},
{
"epoch": 14.41,
"eval_loss": 0.02002587914466858,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 2.271,
"eval_samples_per_second": 220.17,
"eval_steps_per_second": 4.403,
"step": 210816
},
{
"epoch": 14.51,
"learning_rate": 7.80209350722857e-06,
"loss": 0.0023,
"step": 212280
},
{
"epoch": 14.51,
"eval_loss": 0.019655397161841393,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 2.1726,
"eval_samples_per_second": 230.139,
"eval_steps_per_second": 4.603,
"step": 212280
},
{
"epoch": 14.61,
"learning_rate": 7.751567714570688e-06,
"loss": 0.002,
"step": 213744
},
{
"epoch": 14.61,
"eval_loss": 0.020178088918328285,
"eval_max_distance": 46,
"eval_mean_distance": 0,
"eval_runtime": 2.162,
"eval_samples_per_second": 231.271,
"eval_steps_per_second": 4.625,
"step": 213744
},
{
"epoch": 14.71,
"learning_rate": 7.701041921912801e-06,
"loss": 0.002,
"step": 215208
},
{
"epoch": 14.71,
"eval_loss": 0.023831263184547424,
"eval_max_distance": 48,
"eval_mean_distance": 0,
"eval_runtime": 2.1181,
"eval_samples_per_second": 236.063,
"eval_steps_per_second": 4.721,
"step": 215208
},
{
"epoch": 14.81,
"learning_rate": 7.650516129254918e-06,
"loss": 0.0016,
"step": 216672
},
{
"epoch": 14.81,
"eval_loss": 0.02202780544757843,
"eval_max_distance": 48,
"eval_mean_distance": 0,
"eval_runtime": 2.6647,
"eval_samples_per_second": 187.638,
"eval_steps_per_second": 3.753,
"step": 216672
},
{
"epoch": 14.91,
"learning_rate": 7.599990336597033e-06,
"loss": 0.0013,
"step": 218136
},
{
"epoch": 14.91,
"eval_loss": 0.024373715743422508,
"eval_max_distance": 52,
"eval_mean_distance": 0,
"eval_runtime": 2.249,
"eval_samples_per_second": 222.322,
"eval_steps_per_second": 4.446,
"step": 218136
},
{
"epoch": 15.01,
"learning_rate": 7.549464543939149e-06,
"loss": 0.0127,
"step": 219600
},
{
"epoch": 15.01,
"eval_loss": 0.02190934307873249,
"eval_max_distance": 51,
"eval_mean_distance": 0,
"eval_runtime": 3.1441,
"eval_samples_per_second": 159.026,
"eval_steps_per_second": 3.181,
"step": 219600
},
{
"epoch": 15.11,
"learning_rate": 7.498938751281264e-06,
"loss": 0.0135,
"step": 221064
},
{
"epoch": 15.11,
"eval_loss": 0.017063263803720474,
"eval_max_distance": 43,
"eval_mean_distance": 0,
"eval_runtime": 2.1044,
"eval_samples_per_second": 237.6,
"eval_steps_per_second": 4.752,
"step": 221064
},
{
"epoch": 15.21,
"learning_rate": 7.44841295862338e-06,
"loss": 0.0049,
"step": 222528
},
{
"epoch": 15.21,
"eval_loss": 0.018728330731391907,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.6091,
"eval_samples_per_second": 191.64,
"eval_steps_per_second": 3.833,
"step": 222528
},
{
"epoch": 15.31,
"learning_rate": 7.397887165965495e-06,
"loss": 0.0032,
"step": 223992
},
{
"epoch": 15.31,
"eval_loss": 0.019817959517240524,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 2.5028,
"eval_samples_per_second": 199.775,
"eval_steps_per_second": 3.995,
"step": 223992
},
{
"epoch": 15.41,
"learning_rate": 7.347361373307611e-06,
"loss": 0.0024,
"step": 225456
},
{
"epoch": 15.41,
"eval_loss": 0.017553431913256645,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 1.9951,
"eval_samples_per_second": 250.614,
"eval_steps_per_second": 5.012,
"step": 225456
},
{
"epoch": 15.51,
"learning_rate": 7.296835580649726e-06,
"loss": 0.0022,
"step": 226920
},
{
"epoch": 15.51,
"eval_loss": 0.01916639320552349,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 2.5108,
"eval_samples_per_second": 199.143,
"eval_steps_per_second": 3.983,
"step": 226920
},
{
"epoch": 15.61,
"learning_rate": 7.246309787991842e-06,
"loss": 0.0018,
"step": 228384
},
{
"epoch": 15.61,
"eval_loss": 0.018844295293092728,
"eval_max_distance": 46,
"eval_mean_distance": 0,
"eval_runtime": 1.9922,
"eval_samples_per_second": 250.984,
"eval_steps_per_second": 5.02,
"step": 228384
},
{
"epoch": 15.71,
"learning_rate": 7.195783995333957e-06,
"loss": 0.0018,
"step": 229848
},
{
"epoch": 15.71,
"eval_loss": 0.020747974514961243,
"eval_max_distance": 48,
"eval_mean_distance": 0,
"eval_runtime": 2.4786,
"eval_samples_per_second": 201.73,
"eval_steps_per_second": 4.035,
"step": 229848
},
{
"epoch": 15.81,
"learning_rate": 7.145258202676073e-06,
"loss": 0.0015,
"step": 231312
},
{
"epoch": 15.81,
"eval_loss": 0.021101944148540497,
"eval_max_distance": 48,
"eval_mean_distance": 0,
"eval_runtime": 2.0975,
"eval_samples_per_second": 238.374,
"eval_steps_per_second": 4.767,
"step": 231312
},
{
"epoch": 15.91,
"learning_rate": 7.094732410018188e-06,
"loss": 0.0013,
"step": 232776
},
{
"epoch": 15.91,
"eval_loss": 0.02276449464261532,
"eval_max_distance": 48,
"eval_mean_distance": 0,
"eval_runtime": 2.182,
"eval_samples_per_second": 229.148,
"eval_steps_per_second": 4.583,
"step": 232776
},
{
"epoch": 16.01,
"learning_rate": 7.044206617360304e-06,
"loss": 0.0117,
"step": 234240
},
{
"epoch": 16.01,
"eval_loss": 0.020543677732348442,
"eval_max_distance": 44,
"eval_mean_distance": 0,
"eval_runtime": 2.4063,
"eval_samples_per_second": 207.787,
"eval_steps_per_second": 4.156,
"step": 234240
},
{
"epoch": 16.11,
"learning_rate": 6.993680824702419e-06,
"loss": 0.0126,
"step": 235704
},
{
"epoch": 16.11,
"eval_loss": 0.017592445015907288,
"eval_max_distance": 40,
"eval_mean_distance": 0,
"eval_runtime": 2.3265,
"eval_samples_per_second": 214.918,
"eval_steps_per_second": 4.298,
"step": 235704
},
{
"epoch": 16.21,
"learning_rate": 6.9431550320445355e-06,
"loss": 0.0047,
"step": 237168
},
{
"epoch": 16.21,
"eval_loss": 0.01700788550078869,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.6521,
"eval_samples_per_second": 188.528,
"eval_steps_per_second": 3.771,
"step": 237168
},
{
"epoch": 16.31,
"learning_rate": 6.89262923938665e-06,
"loss": 0.003,
"step": 238632
},
{
"epoch": 16.31,
"eval_loss": 0.017684699967503548,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.6466,
"eval_samples_per_second": 188.923,
"eval_steps_per_second": 3.778,
"step": 238632
},
{
"epoch": 16.41,
"learning_rate": 6.842103446728766e-06,
"loss": 0.0023,
"step": 240096
},
{
"epoch": 16.41,
"eval_loss": 0.01923580840229988,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 2.5212,
"eval_samples_per_second": 198.314,
"eval_steps_per_second": 3.966,
"step": 240096
},
{
"epoch": 16.51,
"learning_rate": 6.791577654070881e-06,
"loss": 0.002,
"step": 241560
},
{
"epoch": 16.51,
"eval_loss": 0.018833961337804794,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 2.0402,
"eval_samples_per_second": 245.068,
"eval_steps_per_second": 4.901,
"step": 241560
},
{
"epoch": 16.61,
"learning_rate": 6.7410518614129975e-06,
"loss": 0.0017,
"step": 243024
},
{
"epoch": 16.61,
"eval_loss": 0.01853407733142376,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 2.6623,
"eval_samples_per_second": 187.81,
"eval_steps_per_second": 3.756,
"step": 243024
},
{
"epoch": 16.71,
"learning_rate": 6.690526068755112e-06,
"loss": 0.0017,
"step": 244488
},
{
"epoch": 16.71,
"eval_loss": 0.020028624683618546,
"eval_max_distance": 43,
"eval_mean_distance": 0,
"eval_runtime": 2.3728,
"eval_samples_per_second": 210.718,
"eval_steps_per_second": 4.214,
"step": 244488
},
{
"epoch": 16.81,
"learning_rate": 6.640000276097228e-06,
"loss": 0.0014,
"step": 245952
},
{
"epoch": 16.81,
"eval_loss": 0.021073581650853157,
"eval_max_distance": 46,
"eval_mean_distance": 0,
"eval_runtime": 2.0603,
"eval_samples_per_second": 242.683,
"eval_steps_per_second": 4.854,
"step": 245952
},
{
"epoch": 16.91,
"learning_rate": 6.589474483439343e-06,
"loss": 0.0012,
"step": 247416
},
{
"epoch": 16.91,
"eval_loss": 0.020787488669157028,
"eval_max_distance": 48,
"eval_mean_distance": 0,
"eval_runtime": 2.1877,
"eval_samples_per_second": 228.552,
"eval_steps_per_second": 4.571,
"step": 247416
},
{
"epoch": 17.01,
"learning_rate": 6.5389486907814595e-06,
"loss": 0.0111,
"step": 248880
},
{
"epoch": 17.01,
"eval_loss": 0.02048674039542675,
"eval_max_distance": 47,
"eval_mean_distance": 0,
"eval_runtime": 3.0915,
"eval_samples_per_second": 161.731,
"eval_steps_per_second": 3.235,
"step": 248880
},
{
"epoch": 17.11,
"learning_rate": 6.488422898123576e-06,
"loss": 0.012,
"step": 250344
},
{
"epoch": 17.11,
"eval_loss": 0.018051166087388992,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.6273,
"eval_samples_per_second": 190.309,
"eval_steps_per_second": 3.806,
"step": 250344
},
{
"epoch": 17.21,
"learning_rate": 6.43789710546569e-06,
"loss": 0.0045,
"step": 251808
},
{
"epoch": 17.21,
"eval_loss": 0.01751534268260002,
"eval_max_distance": 42,
"eval_mean_distance": 0,
"eval_runtime": 2.6979,
"eval_samples_per_second": 185.332,
"eval_steps_per_second": 3.707,
"step": 251808
},
{
"epoch": 17.31,
"learning_rate": 6.387371312807806e-06,
"loss": 0.0029,
"step": 253272
},
{
"epoch": 17.31,
"eval_loss": 0.018825719133019447,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.0239,
"eval_samples_per_second": 247.05,
"eval_steps_per_second": 4.941,
"step": 253272
},
{
"epoch": 17.41,
"learning_rate": 6.3368455201499214e-06,
"loss": 0.0022,
"step": 254736
},
{
"epoch": 17.41,
"eval_loss": 0.01774280145764351,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 1.9404,
"eval_samples_per_second": 257.676,
"eval_steps_per_second": 5.154,
"step": 254736
},
{
"epoch": 17.51,
"learning_rate": 6.2863197274920376e-06,
"loss": 0.0019,
"step": 256200
},
{
"epoch": 17.51,
"eval_loss": 0.017391487956047058,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 1.9824,
"eval_samples_per_second": 252.224,
"eval_steps_per_second": 5.044,
"step": 256200
},
{
"epoch": 17.61,
"learning_rate": 6.235793934834152e-06,
"loss": 0.0016,
"step": 257664
},
{
"epoch": 17.61,
"eval_loss": 0.018112240359187126,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 2.6476,
"eval_samples_per_second": 188.849,
"eval_steps_per_second": 3.777,
"step": 257664
},
{
"epoch": 17.71,
"learning_rate": 6.185268142176268e-06,
"loss": 0.0016,
"step": 259128
},
{
"epoch": 17.71,
"eval_loss": 0.019098376855254173,
"eval_max_distance": 35,
"eval_mean_distance": 0,
"eval_runtime": 2.0112,
"eval_samples_per_second": 248.606,
"eval_steps_per_second": 4.972,
"step": 259128
},
{
"epoch": 17.81,
"learning_rate": 6.134742349518383e-06,
"loss": 0.0013,
"step": 260592
},
{
"epoch": 17.81,
"eval_loss": 0.019062627106904984,
"eval_max_distance": 46,
"eval_mean_distance": 0,
"eval_runtime": 2.581,
"eval_samples_per_second": 193.726,
"eval_steps_per_second": 3.875,
"step": 260592
},
{
"epoch": 17.91,
"learning_rate": 6.0842165568604995e-06,
"loss": 0.0011,
"step": 262056
},
{
"epoch": 17.91,
"eval_loss": 0.02094220370054245,
"eval_max_distance": 48,
"eval_mean_distance": 0,
"eval_runtime": 2.1774,
"eval_samples_per_second": 229.631,
"eval_steps_per_second": 4.593,
"step": 262056
},
{
"epoch": 18.01,
"learning_rate": 6.033690764202614e-06,
"loss": 0.0106,
"step": 263520
},
{
"epoch": 18.01,
"eval_loss": 0.01973116211593151,
"eval_max_distance": 47,
"eval_mean_distance": 0,
"eval_runtime": 2.9851,
"eval_samples_per_second": 167.497,
"eval_steps_per_second": 3.35,
"step": 263520
},
{
"epoch": 18.11,
"learning_rate": 5.98316497154473e-06,
"loss": 0.0114,
"step": 264984
},
{
"epoch": 18.11,
"eval_loss": 0.01814502477645874,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.0996,
"eval_samples_per_second": 238.139,
"eval_steps_per_second": 4.763,
"step": 264984
},
{
"epoch": 18.21,
"learning_rate": 5.932639178886845e-06,
"loss": 0.0043,
"step": 266448
},
{
"epoch": 18.21,
"eval_loss": 0.01815211959183216,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.5596,
"eval_samples_per_second": 195.345,
"eval_steps_per_second": 3.907,
"step": 266448
},
{
"epoch": 18.31,
"learning_rate": 5.8821133862289615e-06,
"loss": 0.0027,
"step": 267912
},
{
"epoch": 18.31,
"eval_loss": 0.017581813037395477,
"eval_max_distance": 36,
"eval_mean_distance": 0,
"eval_runtime": 2.5266,
"eval_samples_per_second": 197.897,
"eval_steps_per_second": 3.958,
"step": 267912
},
{
"epoch": 18.41,
"learning_rate": 5.831587593571077e-06,
"loss": 0.0021,
"step": 269376
},
{
"epoch": 18.41,
"eval_loss": 0.018509158864617348,
"eval_max_distance": 36,
"eval_mean_distance": 0,
"eval_runtime": 2.2319,
"eval_samples_per_second": 224.023,
"eval_steps_per_second": 4.48,
"step": 269376
},
{
"epoch": 18.51,
"learning_rate": 5.781061800913192e-06,
"loss": 0.0018,
"step": 270840
},
{
"epoch": 18.51,
"eval_loss": 0.020238015800714493,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.5077,
"eval_samples_per_second": 199.386,
"eval_steps_per_second": 3.988,
"step": 270840
},
{
"epoch": 18.61,
"learning_rate": 5.730536008255307e-06,
"loss": 0.0015,
"step": 272304
},
{
"epoch": 18.61,
"eval_loss": 0.018911337479948997,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 2.5132,
"eval_samples_per_second": 198.948,
"eval_steps_per_second": 3.979,
"step": 272304
},
{
"epoch": 18.71,
"learning_rate": 5.6800102155974235e-06,
"loss": 0.0015,
"step": 273768
},
{
"epoch": 18.71,
"eval_loss": 0.021090172231197357,
"eval_max_distance": 36,
"eval_mean_distance": 0,
"eval_runtime": 2.262,
"eval_samples_per_second": 221.045,
"eval_steps_per_second": 4.421,
"step": 273768
},
{
"epoch": 18.81,
"learning_rate": 5.629484422939539e-06,
"loss": 0.0013,
"step": 275232
},
{
"epoch": 18.81,
"eval_loss": 0.018903149291872978,
"eval_max_distance": 45,
"eval_mean_distance": 0,
"eval_runtime": 2.4844,
"eval_samples_per_second": 201.255,
"eval_steps_per_second": 4.025,
"step": 275232
},
{
"epoch": 18.91,
"learning_rate": 5.578958630281655e-06,
"loss": 0.001,
"step": 276696
},
{
"epoch": 18.91,
"eval_loss": 0.01939016580581665,
"eval_max_distance": 45,
"eval_mean_distance": 0,
"eval_runtime": 2.4506,
"eval_samples_per_second": 204.028,
"eval_steps_per_second": 4.081,
"step": 276696
},
{
"epoch": 19.01,
"learning_rate": 5.528432837623769e-06,
"loss": 0.0102,
"step": 278160
},
{
"epoch": 19.01,
"eval_loss": 0.019789060577750206,
"eval_max_distance": 47,
"eval_mean_distance": 0,
"eval_runtime": 2.3546,
"eval_samples_per_second": 212.349,
"eval_steps_per_second": 4.247,
"step": 278160
},
{
"epoch": 19.11,
"learning_rate": 5.4779070449658855e-06,
"loss": 0.011,
"step": 279624
},
{
"epoch": 19.11,
"eval_loss": 0.018310004845261574,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.0507,
"eval_samples_per_second": 243.824,
"eval_steps_per_second": 4.876,
"step": 279624
},
{
"epoch": 19.21,
"learning_rate": 5.427381252308001e-06,
"loss": 0.0042,
"step": 281088
},
{
"epoch": 19.21,
"eval_loss": 0.019428173080086708,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.7958,
"eval_samples_per_second": 178.84,
"eval_steps_per_second": 3.577,
"step": 281088
},
{
"epoch": 19.31,
"learning_rate": 5.376855459650117e-06,
"loss": 0.0026,
"step": 282552
},
{
"epoch": 19.31,
"eval_loss": 0.018094651401042938,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.5895,
"eval_samples_per_second": 193.087,
"eval_steps_per_second": 3.862,
"step": 282552
},
{
"epoch": 19.41,
"learning_rate": 5.326329666992231e-06,
"loss": 0.002,
"step": 284016
},
{
"epoch": 19.41,
"eval_loss": 0.018259983509778976,
"eval_max_distance": 36,
"eval_mean_distance": 0,
"eval_runtime": 2.4756,
"eval_samples_per_second": 201.968,
"eval_steps_per_second": 4.039,
"step": 284016
},
{
"epoch": 19.51,
"learning_rate": 5.2758038743343474e-06,
"loss": 0.0017,
"step": 285480
},
{
"epoch": 19.51,
"eval_loss": 0.01770331896841526,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 2.4887,
"eval_samples_per_second": 200.91,
"eval_steps_per_second": 4.018,
"step": 285480
},
{
"epoch": 19.61,
"learning_rate": 5.225278081676463e-06,
"loss": 0.0015,
"step": 286944
},
{
"epoch": 19.61,
"eval_loss": 0.021017737686634064,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 2.531,
"eval_samples_per_second": 197.549,
"eval_steps_per_second": 3.951,
"step": 286944
},
{
"epoch": 19.71,
"learning_rate": 5.174752289018579e-06,
"loss": 0.0015,
"step": 288408
},
{
"epoch": 19.71,
"eval_loss": 0.020359748974442482,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.4747,
"eval_samples_per_second": 202.046,
"eval_steps_per_second": 4.041,
"step": 288408
},
{
"epoch": 19.81,
"learning_rate": 5.124226496360693e-06,
"loss": 0.0012,
"step": 289872
},
{
"epoch": 19.81,
"eval_loss": 0.018832657486200333,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 2.6331,
"eval_samples_per_second": 189.894,
"eval_steps_per_second": 3.798,
"step": 289872
},
{
"epoch": 19.91,
"learning_rate": 5.073700703702809e-06,
"loss": 0.001,
"step": 291336
},
{
"epoch": 19.91,
"eval_loss": 0.020642299205064774,
"eval_max_distance": 46,
"eval_mean_distance": 0,
"eval_runtime": 2.3046,
"eval_samples_per_second": 216.959,
"eval_steps_per_second": 4.339,
"step": 291336
},
{
"epoch": 19.99,
"learning_rate": 3.88e-08,
"loss": 0.0271,
"step": 292500
},
{
"epoch": 19.99,
"eval_loss": 0.021931374445557594,
"eval_max_distance": 46,
"eval_mean_distance": 0,
"eval_runtime": 2.3671,
"eval_samples_per_second": 211.23,
"eval_steps_per_second": 4.225,
"step": 292500
},
{
"epoch": 20.09,
"learning_rate": 8.88e-08,
"loss": 0.0248,
"step": 294000
},
{
"epoch": 20.09,
"eval_loss": 0.022779377177357674,
"eval_max_distance": 46,
"eval_mean_distance": 0,
"eval_runtime": 2.0968,
"eval_samples_per_second": 238.461,
"eval_steps_per_second": 4.769,
"step": 294000
},
{
"epoch": 20.19,
"learning_rate": 8.48799796831305e-08,
"loss": 0.0054,
"step": 295500
},
{
"epoch": 20.19,
"eval_loss": 0.01905255950987339,
"eval_max_distance": 46,
"eval_mean_distance": 0,
"eval_runtime": 2.0895,
"eval_samples_per_second": 239.294,
"eval_steps_per_second": 4.786,
"step": 295500
},
{
"epoch": 20.3,
"learning_rate": 7.277781371744775e-08,
"loss": 0.0032,
"step": 297000
},
{
"epoch": 20.3,
"eval_loss": 0.020227482542395592,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.1433,
"eval_samples_per_second": 233.28,
"eval_steps_per_second": 4.666,
"step": 297000
},
{
"epoch": 20.4,
"learning_rate": 6.471170442908496e-08,
"loss": 0.0023,
"step": 298500
},
{
"epoch": 20.4,
"eval_loss": 0.019163601100444794,
"eval_max_distance": 46,
"eval_mean_distance": 0,
"eval_runtime": 2.099,
"eval_samples_per_second": 238.207,
"eval_steps_per_second": 4.764,
"step": 298500
},
{
"epoch": 20.47,
"learning_rate": 3.360681603029347e-09,
"loss": 0.0019,
"step": 299565
},
{
"epoch": 20.47,
"eval_loss": 0.01855267398059368,
"eval_max_distance": 46,
"eval_mean_distance": 0,
"eval_runtime": 2.4246,
"eval_samples_per_second": 206.219,
"eval_steps_per_second": 4.124,
"step": 299565
},
{
"epoch": 20.58,
"learning_rate": 8.362259387819501e-09,
"loss": 0.0016,
"step": 301150
},
{
"epoch": 20.58,
"eval_loss": 0.019294343888759613,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.1809,
"eval_samples_per_second": 229.262,
"eval_steps_per_second": 4.585,
"step": 301150
},
{
"epoch": 20.69,
"learning_rate": 8.650364590933742e-09,
"loss": 0.0015,
"step": 302735
},
{
"epoch": 20.69,
"eval_loss": 0.019666763022542,
"eval_max_distance": 46,
"eval_mean_distance": 0,
"eval_runtime": 2.6939,
"eval_samples_per_second": 185.606,
"eval_steps_per_second": 3.712,
"step": 302735
},
{
"epoch": 20.8,
"learning_rate": 7.3790359682892075e-09,
"loss": 0.0012,
"step": 304320
},
{
"epoch": 20.8,
"eval_loss": 0.02117960713803768,
"eval_max_distance": 46,
"eval_mean_distance": 0,
"eval_runtime": 2.6939,
"eval_samples_per_second": 185.604,
"eval_steps_per_second": 3.712,
"step": 304320
},
{
"epoch": 20.9,
"learning_rate": 6.5418199690732104e-09,
"loss": 0.0009,
"step": 305905
},
{
"epoch": 20.9,
"eval_loss": 0.01806722581386566,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.1281,
"eval_samples_per_second": 234.95,
"eval_steps_per_second": 4.699,
"step": 305905
},
{
"epoch": 21.01,
"learning_rate": 5.937194462602945e-09,
"loss": 0.0331,
"step": 307490
},
{
"epoch": 21.01,
"eval_loss": 0.019925368949770927,
"eval_max_distance": 46,
"eval_mean_distance": 0,
"eval_runtime": 2.2473,
"eval_samples_per_second": 222.488,
"eval_steps_per_second": 4.45,
"step": 307490
},
{
"epoch": 21.12,
"learning_rate": 5.4742034190634586e-09,
"loss": 0.0108,
"step": 309075
},
{
"epoch": 21.12,
"eval_loss": 0.01871710829436779,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.1936,
"eval_samples_per_second": 227.938,
"eval_steps_per_second": 4.559,
"step": 309075
},
{
"epoch": 21.23,
"learning_rate": 5.104983375404655e-09,
"loss": 0.0045,
"step": 310660
},
{
"epoch": 21.23,
"eval_loss": 0.019821077585220337,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.2507,
"eval_samples_per_second": 222.154,
"eval_steps_per_second": 4.443,
"step": 310660
},
{
"epoch": 21.34,
"learning_rate": 4.801630619112727e-09,
"loss": 0.0028,
"step": 312245
},
{
"epoch": 21.34,
"eval_loss": 0.022539684548974037,
"eval_max_distance": 39,
"eval_mean_distance": 0,
"eval_runtime": 2.4779,
"eval_samples_per_second": 201.781,
"eval_steps_per_second": 4.036,
"step": 312245
},
{
"epoch": 21.45,
"learning_rate": 4.54663449074512e-09,
"loss": 0.002,
"step": 313830
},
{
"epoch": 21.45,
"eval_loss": 0.018958711996674538,
"eval_max_distance": 46,
"eval_mean_distance": 0,
"eval_runtime": 2.3193,
"eval_samples_per_second": 215.586,
"eval_steps_per_second": 4.312,
"step": 313830
},
{
"epoch": 21.55,
"learning_rate": 4.328377377748997e-09,
"loss": 0.0017,
"step": 315415
},
{
"epoch": 21.55,
"eval_loss": 0.01911868527531624,
"eval_max_distance": 38,
"eval_mean_distance": 0,
"eval_runtime": 2.6165,
"eval_samples_per_second": 191.092,
"eval_steps_per_second": 3.822,
"step": 315415
},
{
"epoch": 21.65,
"step": 316827,
"total_flos": 3.443227992585216e+16,
"train_loss": 6.756745522816313e-06,
"train_runtime": 151.9677,
"train_samples_per_second": 108410.633,
"train_steps_per_second": 2084.831
}
],
"max_steps": 316827,
"num_train_epochs": 22,
"total_flos": 3.443227992585216e+16,
"trial_name": null,
"trial_params": null
}