Rajat
adds model
6c65f0a
raw
history blame
129 kB
{
"best_metric": 0.8931613819214387,
"best_model_checkpoint": "bge-small-hotpotwa-matryoshka-fine-tuned-50/checkpoint-500",
"epoch": 26.924694993689524,
"eval_steps": 50,
"global_step": 4000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.33655868742111905,
"grad_norm": 1.7359095811843872,
"learning_rate": 1.3513513513513515e-06,
"loss": 19.5758,
"step": 50
},
{
"epoch": 0.33655868742111905,
"eval_dim_128_cosine_accuracy": 0.9551585423568386,
"eval_dim_128_dot_accuracy": 0.08980123047799338,
"eval_dim_128_euclidean_accuracy": 0.9530288689067676,
"eval_dim_128_manhattan_accuracy": 0.9527922385234264,
"eval_dim_128_max_accuracy": 0.9551585423568386,
"eval_dim_256_cosine_accuracy": 0.966280170373876,
"eval_dim_256_dot_accuracy": 0.042711784193090394,
"eval_dim_256_euclidean_accuracy": 0.9659252247988642,
"eval_dim_256_manhattan_accuracy": 0.9634406057737813,
"eval_dim_256_max_accuracy": 0.966280170373876,
"eval_dim_384_cosine_accuracy": 0.9667534311405585,
"eval_dim_384_dot_accuracy": 0.03324656885944155,
"eval_dim_384_euclidean_accuracy": 0.9667534311405585,
"eval_dim_384_manhattan_accuracy": 0.9669900615238997,
"eval_dim_384_max_accuracy": 0.9669900615238997,
"eval_dim_64_cosine_accuracy": 0.9358731661145291,
"eval_dim_64_dot_accuracy": 0.1320397539044013,
"eval_dim_64_euclidean_accuracy": 0.9345716990061524,
"eval_dim_64_manhattan_accuracy": 0.9269995267392334,
"eval_dim_64_max_accuracy": 0.9358731661145291,
"eval_loss": 19.393272399902344,
"eval_runtime": 104.7788,
"eval_samples_per_second": 80.665,
"eval_sequential_score": 0.9358731661145291,
"eval_steps_per_second": 2.529,
"step": 50
},
{
"epoch": 0.6731173748422381,
"grad_norm": 1.976278305053711,
"learning_rate": 2.702702702702703e-06,
"loss": 19.4573,
"step": 100
},
{
"epoch": 0.6731173748422381,
"eval_dim_128_cosine_accuracy": 0.9570515854235684,
"eval_dim_128_dot_accuracy": 0.06625650733554188,
"eval_dim_128_euclidean_accuracy": 0.9589446284902982,
"eval_dim_128_manhattan_accuracy": 0.9557501183151916,
"eval_dim_128_max_accuracy": 0.9589446284902982,
"eval_dim_256_cosine_accuracy": 0.9646237576904875,
"eval_dim_256_dot_accuracy": 0.04046379555134879,
"eval_dim_256_euclidean_accuracy": 0.9650970184571699,
"eval_dim_256_manhattan_accuracy": 0.9632039753904401,
"eval_dim_256_max_accuracy": 0.9650970184571699,
"eval_dim_384_cosine_accuracy": 0.9653336488405111,
"eval_dim_384_dot_accuracy": 0.03466635115948888,
"eval_dim_384_euclidean_accuracy": 0.9653336488405111,
"eval_dim_384_manhattan_accuracy": 0.9646237576904875,
"eval_dim_384_max_accuracy": 0.9653336488405111,
"eval_dim_64_cosine_accuracy": 0.9449834358731661,
"eval_dim_64_dot_accuracy": 0.08932796971131093,
"eval_dim_64_euclidean_accuracy": 0.9461665877898722,
"eval_dim_64_manhattan_accuracy": 0.9420255560814008,
"eval_dim_64_max_accuracy": 0.9461665877898722,
"eval_loss": 19.097097396850586,
"eval_runtime": 103.9699,
"eval_samples_per_second": 81.293,
"eval_sequential_score": 0.9449834358731661,
"eval_steps_per_second": 2.549,
"step": 100
},
{
"epoch": 1.0096760622633572,
"grad_norm": 2.1209616661071777,
"learning_rate": 4.0540540540540545e-06,
"loss": 19.1409,
"step": 150
},
{
"epoch": 1.0096760622633572,
"eval_dim_128_cosine_accuracy": 0.9384761003312825,
"eval_dim_128_dot_accuracy": 0.06897775674396593,
"eval_dim_128_euclidean_accuracy": 0.9421438712730714,
"eval_dim_128_manhattan_accuracy": 0.939540937056318,
"eval_dim_128_max_accuracy": 0.9421438712730714,
"eval_dim_256_cosine_accuracy": 0.9434453383814482,
"eval_dim_256_dot_accuracy": 0.05797444391859915,
"eval_dim_256_euclidean_accuracy": 0.9436819687647894,
"eval_dim_256_manhattan_accuracy": 0.9423805016564126,
"eval_dim_256_max_accuracy": 0.9436819687647894,
"eval_dim_384_cosine_accuracy": 0.9473497397065783,
"eval_dim_384_dot_accuracy": 0.05265026029342167,
"eval_dim_384_euclidean_accuracy": 0.9473497397065783,
"eval_dim_384_manhattan_accuracy": 0.9458116422148604,
"eval_dim_384_max_accuracy": 0.9473497397065783,
"eval_dim_64_cosine_accuracy": 0.9306672976810223,
"eval_dim_64_dot_accuracy": 0.07749645054424988,
"eval_dim_64_euclidean_accuracy": 0.9332702318977757,
"eval_dim_64_manhattan_accuracy": 0.9320870799810695,
"eval_dim_64_max_accuracy": 0.9332702318977757,
"eval_loss": 18.4069766998291,
"eval_runtime": 103.2125,
"eval_samples_per_second": 81.889,
"eval_sequential_score": 0.9306672976810223,
"eval_steps_per_second": 2.568,
"step": 150
},
{
"epoch": 1.3462347496844762,
"grad_norm": 1.658170461654663,
"learning_rate": 5.405405405405406e-06,
"loss": 18.6431,
"step": 200
},
{
"epoch": 1.3462347496844762,
"eval_dim_128_cosine_accuracy": 0.9125650733554188,
"eval_dim_128_dot_accuracy": 0.08826313298627544,
"eval_dim_128_euclidean_accuracy": 0.9139848556554662,
"eval_dim_128_manhattan_accuracy": 0.9145764316138192,
"eval_dim_128_max_accuracy": 0.9145764316138192,
"eval_dim_256_cosine_accuracy": 0.9163511594888784,
"eval_dim_256_dot_accuracy": 0.08613345953620445,
"eval_dim_256_euclidean_accuracy": 0.9163511594888784,
"eval_dim_256_manhattan_accuracy": 0.9151680075721723,
"eval_dim_256_max_accuracy": 0.9163511594888784,
"eval_dim_384_cosine_accuracy": 0.9183625177472787,
"eval_dim_384_dot_accuracy": 0.08163748225272124,
"eval_dim_384_euclidean_accuracy": 0.9183625177472787,
"eval_dim_384_manhattan_accuracy": 0.9184808329389493,
"eval_dim_384_max_accuracy": 0.9184808329389493,
"eval_dim_64_cosine_accuracy": 0.9093705631803124,
"eval_dim_64_dot_accuracy": 0.09477046852815901,
"eval_dim_64_euclidean_accuracy": 0.9126833885470894,
"eval_dim_64_manhattan_accuracy": 0.9113819214387128,
"eval_dim_64_max_accuracy": 0.9126833885470894,
"eval_loss": 17.32919692993164,
"eval_runtime": 102.8811,
"eval_samples_per_second": 82.153,
"eval_sequential_score": 0.9093705631803124,
"eval_steps_per_second": 2.576,
"step": 200
},
{
"epoch": 1.6827934371055953,
"grad_norm": 1.5389924049377441,
"learning_rate": 6.7567567567567575e-06,
"loss": 18.2288,
"step": 250
},
{
"epoch": 1.6827934371055953,
"eval_dim_128_cosine_accuracy": 0.9062943681968765,
"eval_dim_128_dot_accuracy": 0.09311405584477046,
"eval_dim_128_euclidean_accuracy": 0.9062943681968765,
"eval_dim_128_manhattan_accuracy": 0.9062943681968765,
"eval_dim_128_max_accuracy": 0.9062943681968765,
"eval_dim_256_cosine_accuracy": 0.9071225745385707,
"eval_dim_256_dot_accuracy": 0.09335068622811168,
"eval_dim_256_euclidean_accuracy": 0.907950780880265,
"eval_dim_256_manhattan_accuracy": 0.9093705631803124,
"eval_dim_256_max_accuracy": 0.9093705631803124,
"eval_dim_384_cosine_accuracy": 0.9099621391386654,
"eval_dim_384_dot_accuracy": 0.0900378608613346,
"eval_dim_384_euclidean_accuracy": 0.9099621391386654,
"eval_dim_384_manhattan_accuracy": 0.9087789872219593,
"eval_dim_384_max_accuracy": 0.9099621391386654,
"eval_dim_64_cosine_accuracy": 0.9022716516800757,
"eval_dim_64_dot_accuracy": 0.09962139138665405,
"eval_dim_64_euclidean_accuracy": 0.9046379555134879,
"eval_dim_64_manhattan_accuracy": 0.9040463795551349,
"eval_dim_64_max_accuracy": 0.9046379555134879,
"eval_loss": 16.875099182128906,
"eval_runtime": 104.7249,
"eval_samples_per_second": 80.707,
"eval_sequential_score": 0.9022716516800757,
"eval_steps_per_second": 2.53,
"step": 250
},
{
"epoch": 2.0193521245267143,
"grad_norm": 1.4371246099472046,
"learning_rate": 8.108108108108109e-06,
"loss": 18.0425,
"step": 300
},
{
"epoch": 2.0193521245267143,
"eval_dim_128_cosine_accuracy": 0.9020350212967345,
"eval_dim_128_dot_accuracy": 0.09772834831992427,
"eval_dim_128_euclidean_accuracy": 0.9035731187884525,
"eval_dim_128_manhattan_accuracy": 0.9044013251301467,
"eval_dim_128_max_accuracy": 0.9044013251301467,
"eval_dim_256_cosine_accuracy": 0.9032181732134406,
"eval_dim_256_dot_accuracy": 0.09690014197823,
"eval_dim_256_euclidean_accuracy": 0.90309985802177,
"eval_dim_256_manhattan_accuracy": 0.9042830099384761,
"eval_dim_256_max_accuracy": 0.9042830099384761,
"eval_dim_384_cosine_accuracy": 0.9045196403218173,
"eval_dim_384_dot_accuracy": 0.09548035967818268,
"eval_dim_384_euclidean_accuracy": 0.9045196403218173,
"eval_dim_384_manhattan_accuracy": 0.9049929010884997,
"eval_dim_384_max_accuracy": 0.9049929010884997,
"eval_dim_64_cosine_accuracy": 0.8989588263132986,
"eval_dim_64_dot_accuracy": 0.10234264079507809,
"eval_dim_64_euclidean_accuracy": 0.9016800757217227,
"eval_dim_64_manhattan_accuracy": 0.9016800757217227,
"eval_dim_64_max_accuracy": 0.9016800757217227,
"eval_loss": 16.69808578491211,
"eval_runtime": 103.4615,
"eval_samples_per_second": 81.692,
"eval_sequential_score": 0.8989588263132986,
"eval_steps_per_second": 2.561,
"step": 300
},
{
"epoch": 2.3559108119478336,
"grad_norm": 1.386720895767212,
"learning_rate": 9.45945945945946e-06,
"loss": 17.9458,
"step": 350
},
{
"epoch": 2.3559108119478336,
"eval_dim_128_cosine_accuracy": 0.9036914339801231,
"eval_dim_128_dot_accuracy": 0.09761003312825367,
"eval_dim_128_euclidean_accuracy": 0.9034548035967819,
"eval_dim_128_manhattan_accuracy": 0.9016800757217227,
"eval_dim_128_max_accuracy": 0.9036914339801231,
"eval_dim_256_cosine_accuracy": 0.9013251301467108,
"eval_dim_256_dot_accuracy": 0.09855655466161856,
"eval_dim_256_euclidean_accuracy": 0.9015617605300521,
"eval_dim_256_manhattan_accuracy": 0.9022716516800757,
"eval_dim_256_max_accuracy": 0.9022716516800757,
"eval_dim_384_cosine_accuracy": 0.9021533364884051,
"eval_dim_384_dot_accuracy": 0.09784666351159489,
"eval_dim_384_euclidean_accuracy": 0.9021533364884051,
"eval_dim_384_manhattan_accuracy": 0.9039280643634643,
"eval_dim_384_max_accuracy": 0.9039280643634643,
"eval_dim_64_cosine_accuracy": 0.8983672503549456,
"eval_dim_64_dot_accuracy": 0.10352579271178419,
"eval_dim_64_euclidean_accuracy": 0.8995504022716517,
"eval_dim_64_manhattan_accuracy": 0.8981306199716044,
"eval_dim_64_max_accuracy": 0.8995504022716517,
"eval_loss": 16.615509033203125,
"eval_runtime": 103.1308,
"eval_samples_per_second": 81.954,
"eval_sequential_score": 0.8983672503549456,
"eval_steps_per_second": 2.57,
"step": 350
},
{
"epoch": 2.6924694993689524,
"grad_norm": 1.4882862567901611,
"learning_rate": 1.0810810810810812e-05,
"loss": 17.8525,
"step": 400
},
{
"epoch": 2.6924694993689524,
"eval_dim_128_cosine_accuracy": 0.8977756743965926,
"eval_dim_128_dot_accuracy": 0.10269758637008992,
"eval_dim_128_euclidean_accuracy": 0.9006152389966872,
"eval_dim_128_manhattan_accuracy": 0.900378608613346,
"eval_dim_128_max_accuracy": 0.9006152389966872,
"eval_dim_256_cosine_accuracy": 0.8970657832465688,
"eval_dim_256_dot_accuracy": 0.10269758637008992,
"eval_dim_256_euclidean_accuracy": 0.8980123047799338,
"eval_dim_256_manhattan_accuracy": 0.8971840984382394,
"eval_dim_256_max_accuracy": 0.8980123047799338,
"eval_dim_384_cosine_accuracy": 0.8974207288215806,
"eval_dim_384_dot_accuracy": 0.1025792711784193,
"eval_dim_384_euclidean_accuracy": 0.8974207288215806,
"eval_dim_384_manhattan_accuracy": 0.898248935163275,
"eval_dim_384_max_accuracy": 0.898248935163275,
"eval_dim_64_cosine_accuracy": 0.8948177946048272,
"eval_dim_64_dot_accuracy": 0.10636535731187885,
"eval_dim_64_euclidean_accuracy": 0.8969474680548982,
"eval_dim_64_manhattan_accuracy": 0.8948177946048272,
"eval_dim_64_max_accuracy": 0.8969474680548982,
"eval_loss": 16.553625106811523,
"eval_runtime": 103.3808,
"eval_samples_per_second": 81.756,
"eval_sequential_score": 0.8948177946048272,
"eval_steps_per_second": 2.563,
"step": 400
},
{
"epoch": 3.0290281867900717,
"grad_norm": 1.5986053943634033,
"learning_rate": 1.2162162162162164e-05,
"loss": 17.7529,
"step": 450
},
{
"epoch": 3.0290281867900717,
"eval_dim_128_cosine_accuracy": 0.8980123047799338,
"eval_dim_128_dot_accuracy": 0.10340747752011359,
"eval_dim_128_euclidean_accuracy": 0.8997870326549929,
"eval_dim_128_manhattan_accuracy": 0.8996687174633223,
"eval_dim_128_max_accuracy": 0.8997870326549929,
"eval_dim_256_cosine_accuracy": 0.8956460009465216,
"eval_dim_256_dot_accuracy": 0.10399905347846664,
"eval_dim_256_euclidean_accuracy": 0.8970657832465688,
"eval_dim_256_manhattan_accuracy": 0.8960009465215334,
"eval_dim_256_max_accuracy": 0.8970657832465688,
"eval_dim_384_cosine_accuracy": 0.8952910553715097,
"eval_dim_384_dot_accuracy": 0.1047089446284903,
"eval_dim_384_euclidean_accuracy": 0.8952910553715097,
"eval_dim_384_manhattan_accuracy": 0.8971840984382394,
"eval_dim_384_max_accuracy": 0.8971840984382394,
"eval_dim_64_cosine_accuracy": 0.8950544249881685,
"eval_dim_64_dot_accuracy": 0.10541883577851396,
"eval_dim_64_euclidean_accuracy": 0.8969474680548982,
"eval_dim_64_manhattan_accuracy": 0.8948177946048272,
"eval_dim_64_max_accuracy": 0.8969474680548982,
"eval_loss": 16.51355743408203,
"eval_runtime": 104.654,
"eval_samples_per_second": 80.761,
"eval_sequential_score": 0.8950544249881685,
"eval_steps_per_second": 2.532,
"step": 450
},
{
"epoch": 3.3655868742111905,
"grad_norm": 1.8756661415100098,
"learning_rate": 1.3513513513513515e-05,
"loss": 17.6709,
"step": 500
},
{
"epoch": 3.3655868742111905,
"eval_dim_128_cosine_accuracy": 0.8931613819214387,
"eval_dim_128_dot_accuracy": 0.10766682442025556,
"eval_dim_128_euclidean_accuracy": 0.8944628490298154,
"eval_dim_128_manhattan_accuracy": 0.8942262186464742,
"eval_dim_128_max_accuracy": 0.8944628490298154,
"eval_dim_256_cosine_accuracy": 0.8913866540463795,
"eval_dim_256_dot_accuracy": 0.10896829152863227,
"eval_dim_256_euclidean_accuracy": 0.8937529578797918,
"eval_dim_256_manhattan_accuracy": 0.8937529578797918,
"eval_dim_256_max_accuracy": 0.8937529578797918,
"eval_dim_384_cosine_accuracy": 0.8928064363464269,
"eval_dim_384_dot_accuracy": 0.10719356365357312,
"eval_dim_384_euclidean_accuracy": 0.8928064363464269,
"eval_dim_384_manhattan_accuracy": 0.8932796971131093,
"eval_dim_384_max_accuracy": 0.8932796971131093,
"eval_dim_64_cosine_accuracy": 0.8906767628963559,
"eval_dim_64_dot_accuracy": 0.11121628017037388,
"eval_dim_64_euclidean_accuracy": 0.8911500236630383,
"eval_dim_64_manhattan_accuracy": 0.8893752957879791,
"eval_dim_64_max_accuracy": 0.8911500236630383,
"eval_loss": 16.4824161529541,
"eval_runtime": 103.2754,
"eval_samples_per_second": 81.839,
"eval_sequential_score": 0.8906767628963559,
"eval_steps_per_second": 2.566,
"step": 500
},
{
"epoch": 3.70214556163231,
"grad_norm": 2.3590304851531982,
"learning_rate": 1.4864864864864865e-05,
"loss": 17.5348,
"step": 550
},
{
"epoch": 3.70214556163231,
"eval_dim_128_cosine_accuracy": 0.8862991008045433,
"eval_dim_128_dot_accuracy": 0.11500236630383341,
"eval_dim_128_euclidean_accuracy": 0.8864174159962139,
"eval_dim_128_manhattan_accuracy": 0.8858258400378609,
"eval_dim_128_max_accuracy": 0.8864174159962139,
"eval_dim_256_cosine_accuracy": 0.8858258400378609,
"eval_dim_256_dot_accuracy": 0.11358258400378608,
"eval_dim_256_euclidean_accuracy": 0.8867723615712257,
"eval_dim_256_manhattan_accuracy": 0.8858258400378609,
"eval_dim_256_max_accuracy": 0.8867723615712257,
"eval_dim_384_cosine_accuracy": 0.8859441552295315,
"eval_dim_384_dot_accuracy": 0.11405584477046853,
"eval_dim_384_euclidean_accuracy": 0.8859441552295315,
"eval_dim_384_manhattan_accuracy": 0.88760056791292,
"eval_dim_384_max_accuracy": 0.88760056791292,
"eval_dim_64_cosine_accuracy": 0.884879318504496,
"eval_dim_64_dot_accuracy": 0.11985328916232844,
"eval_dim_64_euclidean_accuracy": 0.8845243729294842,
"eval_dim_64_manhattan_accuracy": 0.8828679602460956,
"eval_dim_64_max_accuracy": 0.884879318504496,
"eval_loss": 16.463218688964844,
"eval_runtime": 103.2788,
"eval_samples_per_second": 81.837,
"eval_sequential_score": 0.884879318504496,
"eval_steps_per_second": 2.566,
"step": 550
},
{
"epoch": 4.038704249053429,
"grad_norm": 2.6120336055755615,
"learning_rate": 1.6216216216216218e-05,
"loss": 17.4198,
"step": 600
},
{
"epoch": 4.038704249053429,
"eval_dim_128_cosine_accuracy": 0.8852342640795078,
"eval_dim_128_dot_accuracy": 0.11748698532891623,
"eval_dim_128_euclidean_accuracy": 0.8846426881211548,
"eval_dim_128_manhattan_accuracy": 0.8859441552295315,
"eval_dim_128_max_accuracy": 0.8859441552295315,
"eval_dim_256_cosine_accuracy": 0.8861807856128727,
"eval_dim_256_dot_accuracy": 0.1137008991954567,
"eval_dim_256_euclidean_accuracy": 0.8871273071462376,
"eval_dim_256_manhattan_accuracy": 0.8866540463795551,
"eval_dim_256_max_accuracy": 0.8871273071462376,
"eval_dim_384_cosine_accuracy": 0.8859441552295315,
"eval_dim_384_dot_accuracy": 0.11405584477046853,
"eval_dim_384_euclidean_accuracy": 0.8859441552295315,
"eval_dim_384_manhattan_accuracy": 0.8847610033128254,
"eval_dim_384_max_accuracy": 0.8859441552295315,
"eval_dim_64_cosine_accuracy": 0.8839327969711311,
"eval_dim_64_dot_accuracy": 0.12103644107903455,
"eval_dim_64_euclidean_accuracy": 0.8861807856128727,
"eval_dim_64_manhattan_accuracy": 0.8857075248461902,
"eval_dim_64_max_accuracy": 0.8861807856128727,
"eval_loss": 16.46009063720703,
"eval_runtime": 104.1113,
"eval_samples_per_second": 81.182,
"eval_sequential_score": 0.8839327969711311,
"eval_steps_per_second": 2.545,
"step": 600
},
{
"epoch": 4.375262936474548,
"grad_norm": 2.63383412361145,
"learning_rate": 1.756756756756757e-05,
"loss": 17.3673,
"step": 650
},
{
"epoch": 4.375262936474548,
"eval_dim_128_cosine_accuracy": 0.8853525792711784,
"eval_dim_128_dot_accuracy": 0.1160672030288689,
"eval_dim_128_euclidean_accuracy": 0.8867723615712257,
"eval_dim_128_manhattan_accuracy": 0.8855892096545196,
"eval_dim_128_max_accuracy": 0.8867723615712257,
"eval_dim_256_cosine_accuracy": 0.8864174159962139,
"eval_dim_256_dot_accuracy": 0.11417415996213914,
"eval_dim_256_euclidean_accuracy": 0.8871273071462376,
"eval_dim_256_manhattan_accuracy": 0.8862991008045433,
"eval_dim_256_max_accuracy": 0.8871273071462376,
"eval_dim_384_cosine_accuracy": 0.8865357311878845,
"eval_dim_384_dot_accuracy": 0.11346426881211548,
"eval_dim_384_euclidean_accuracy": 0.8865357311878845,
"eval_dim_384_manhattan_accuracy": 0.8861807856128727,
"eval_dim_384_max_accuracy": 0.8865357311878845,
"eval_dim_64_cosine_accuracy": 0.8841694273544723,
"eval_dim_64_dot_accuracy": 0.12091812588736393,
"eval_dim_64_euclidean_accuracy": 0.883341221012778,
"eval_dim_64_manhattan_accuracy": 0.8828679602460956,
"eval_dim_64_max_accuracy": 0.8841694273544723,
"eval_loss": 16.440513610839844,
"eval_runtime": 102.5958,
"eval_samples_per_second": 82.382,
"eval_sequential_score": 0.8841694273544723,
"eval_steps_per_second": 2.583,
"step": 650
},
{
"epoch": 4.711821623895667,
"grad_norm": 3.044569730758667,
"learning_rate": 1.891891891891892e-05,
"loss": 17.2603,
"step": 700
},
{
"epoch": 4.711821623895667,
"eval_dim_128_cosine_accuracy": 0.8834595362044486,
"eval_dim_128_dot_accuracy": 0.11772361571225745,
"eval_dim_128_euclidean_accuracy": 0.8835778513961192,
"eval_dim_128_manhattan_accuracy": 0.8840511121628017,
"eval_dim_128_max_accuracy": 0.8840511121628017,
"eval_dim_256_cosine_accuracy": 0.8838144817794605,
"eval_dim_256_dot_accuracy": 0.11571225745385708,
"eval_dim_256_euclidean_accuracy": 0.8838144817794605,
"eval_dim_256_manhattan_accuracy": 0.8839327969711311,
"eval_dim_256_max_accuracy": 0.8839327969711311,
"eval_dim_384_cosine_accuracy": 0.8838144817794605,
"eval_dim_384_dot_accuracy": 0.11618551822053952,
"eval_dim_384_euclidean_accuracy": 0.8838144817794605,
"eval_dim_384_manhattan_accuracy": 0.8847610033128254,
"eval_dim_384_max_accuracy": 0.8847610033128254,
"eval_dim_64_cosine_accuracy": 0.8807382867960246,
"eval_dim_64_dot_accuracy": 0.12328442972077615,
"eval_dim_64_euclidean_accuracy": 0.8814481779460482,
"eval_dim_64_manhattan_accuracy": 0.8810932323710364,
"eval_dim_64_max_accuracy": 0.8814481779460482,
"eval_loss": 16.435609817504883,
"eval_runtime": 103.6437,
"eval_samples_per_second": 81.549,
"eval_sequential_score": 0.8807382867960246,
"eval_steps_per_second": 2.557,
"step": 700
},
{
"epoch": 5.0483803113167856,
"grad_norm": 3.3264880180358887,
"learning_rate": 1.9999888744757143e-05,
"loss": 17.1807,
"step": 750
},
{
"epoch": 5.0483803113167856,
"eval_dim_128_cosine_accuracy": 0.8849976336961666,
"eval_dim_128_dot_accuracy": 0.11654046379555134,
"eval_dim_128_euclidean_accuracy": 0.884879318504496,
"eval_dim_128_manhattan_accuracy": 0.8838144817794605,
"eval_dim_128_max_accuracy": 0.8849976336961666,
"eval_dim_256_cosine_accuracy": 0.8864174159962139,
"eval_dim_256_dot_accuracy": 0.11417415996213914,
"eval_dim_256_euclidean_accuracy": 0.8852342640795078,
"eval_dim_256_manhattan_accuracy": 0.8857075248461902,
"eval_dim_256_max_accuracy": 0.8864174159962139,
"eval_dim_384_cosine_accuracy": 0.8859441552295315,
"eval_dim_384_dot_accuracy": 0.11405584477046853,
"eval_dim_384_euclidean_accuracy": 0.8859441552295315,
"eval_dim_384_manhattan_accuracy": 0.8855892096545196,
"eval_dim_384_max_accuracy": 0.8859441552295315,
"eval_dim_64_cosine_accuracy": 0.8838144817794605,
"eval_dim_64_dot_accuracy": 0.12079981069569333,
"eval_dim_64_euclidean_accuracy": 0.8844060577378136,
"eval_dim_64_manhattan_accuracy": 0.8834595362044486,
"eval_dim_64_max_accuracy": 0.8844060577378136,
"eval_loss": 16.444347381591797,
"eval_runtime": 103.5226,
"eval_samples_per_second": 81.644,
"eval_sequential_score": 0.8838144817794605,
"eval_steps_per_second": 2.56,
"step": 750
},
{
"epoch": 5.384938998737905,
"grad_norm": 2.7032034397125244,
"learning_rate": 1.999599507118322e-05,
"loss": 17.1629,
"step": 800
},
{
"epoch": 5.384938998737905,
"eval_dim_128_cosine_accuracy": 0.8847610033128254,
"eval_dim_128_dot_accuracy": 0.11701372456223379,
"eval_dim_128_euclidean_accuracy": 0.8859441552295315,
"eval_dim_128_manhattan_accuracy": 0.884879318504496,
"eval_dim_128_max_accuracy": 0.8859441552295315,
"eval_dim_256_cosine_accuracy": 0.8861807856128727,
"eval_dim_256_dot_accuracy": 0.11417415996213914,
"eval_dim_256_euclidean_accuracy": 0.8859441552295315,
"eval_dim_256_manhattan_accuracy": 0.8853525792711784,
"eval_dim_256_max_accuracy": 0.8861807856128727,
"eval_dim_384_cosine_accuracy": 0.8866540463795551,
"eval_dim_384_dot_accuracy": 0.11334595362044486,
"eval_dim_384_euclidean_accuracy": 0.8866540463795551,
"eval_dim_384_manhattan_accuracy": 0.8862991008045433,
"eval_dim_384_max_accuracy": 0.8866540463795551,
"eval_dim_64_cosine_accuracy": 0.8841694273544723,
"eval_dim_64_dot_accuracy": 0.11831519167061051,
"eval_dim_64_euclidean_accuracy": 0.8841694273544723,
"eval_dim_64_manhattan_accuracy": 0.8839327969711311,
"eval_dim_64_max_accuracy": 0.8841694273544723,
"eval_loss": 16.420166015625,
"eval_runtime": 103.5297,
"eval_samples_per_second": 81.638,
"eval_sequential_score": 0.8841694273544723,
"eval_steps_per_second": 2.56,
"step": 800
},
{
"epoch": 5.721497686159024,
"grad_norm": 3.8163998126983643,
"learning_rate": 1.9986541110764565e-05,
"loss": 17.0747,
"step": 850
},
{
"epoch": 5.721497686159024,
"eval_dim_128_cosine_accuracy": 0.8853525792711784,
"eval_dim_128_dot_accuracy": 0.11618551822053952,
"eval_dim_128_euclidean_accuracy": 0.8835778513961192,
"eval_dim_128_manhattan_accuracy": 0.8845243729294842,
"eval_dim_128_max_accuracy": 0.8853525792711784,
"eval_dim_256_cosine_accuracy": 0.8874822527212494,
"eval_dim_256_dot_accuracy": 0.11358258400378608,
"eval_dim_256_euclidean_accuracy": 0.8864174159962139,
"eval_dim_256_manhattan_accuracy": 0.8862991008045433,
"eval_dim_256_max_accuracy": 0.8874822527212494,
"eval_dim_384_cosine_accuracy": 0.8868906767628963,
"eval_dim_384_dot_accuracy": 0.11310932323710364,
"eval_dim_384_euclidean_accuracy": 0.8868906767628963,
"eval_dim_384_manhattan_accuracy": 0.8862991008045433,
"eval_dim_384_max_accuracy": 0.8868906767628963,
"eval_dim_64_cosine_accuracy": 0.8836961665877898,
"eval_dim_64_dot_accuracy": 0.11867013724562234,
"eval_dim_64_euclidean_accuracy": 0.8831045906294368,
"eval_dim_64_manhattan_accuracy": 0.8832229058211074,
"eval_dim_64_max_accuracy": 0.8836961665877898,
"eval_loss": 16.416208267211914,
"eval_runtime": 103.4694,
"eval_samples_per_second": 81.686,
"eval_sequential_score": 0.8836961665877898,
"eval_steps_per_second": 2.561,
"step": 850
},
{
"epoch": 6.058056373580143,
"grad_norm": 3.9848620891571045,
"learning_rate": 1.9971532122280466e-05,
"loss": 17.0161,
"step": 900
},
{
"epoch": 6.058056373580143,
"eval_dim_128_cosine_accuracy": 0.8852342640795078,
"eval_dim_128_dot_accuracy": 0.11618551822053952,
"eval_dim_128_euclidean_accuracy": 0.8852342640795078,
"eval_dim_128_manhattan_accuracy": 0.8846426881211548,
"eval_dim_128_max_accuracy": 0.8852342640795078,
"eval_dim_256_cosine_accuracy": 0.8862991008045433,
"eval_dim_256_dot_accuracy": 0.11417415996213914,
"eval_dim_256_euclidean_accuracy": 0.8858258400378609,
"eval_dim_256_manhattan_accuracy": 0.8853525792711784,
"eval_dim_256_max_accuracy": 0.8862991008045433,
"eval_dim_384_cosine_accuracy": 0.8855892096545196,
"eval_dim_384_dot_accuracy": 0.11441079034548036,
"eval_dim_384_euclidean_accuracy": 0.8855892096545196,
"eval_dim_384_manhattan_accuracy": 0.885470894462849,
"eval_dim_384_max_accuracy": 0.8855892096545196,
"eval_dim_64_cosine_accuracy": 0.8855892096545196,
"eval_dim_64_dot_accuracy": 0.11831519167061051,
"eval_dim_64_euclidean_accuracy": 0.885470894462849,
"eval_dim_64_manhattan_accuracy": 0.8834595362044486,
"eval_dim_64_max_accuracy": 0.8855892096545196,
"eval_loss": 16.419212341308594,
"eval_runtime": 104.3001,
"eval_samples_per_second": 81.035,
"eval_sequential_score": 0.8855892096545196,
"eval_steps_per_second": 2.541,
"step": 900
},
{
"epoch": 6.394615061001262,
"grad_norm": 4.083323001861572,
"learning_rate": 1.995097645450266e-05,
"loss": 17.0146,
"step": 950
},
{
"epoch": 6.394615061001262,
"eval_dim_128_cosine_accuracy": 0.884879318504496,
"eval_dim_128_dot_accuracy": 0.1171320397539044,
"eval_dim_128_euclidean_accuracy": 0.8861807856128727,
"eval_dim_128_manhattan_accuracy": 0.8853525792711784,
"eval_dim_128_max_accuracy": 0.8861807856128727,
"eval_dim_256_cosine_accuracy": 0.8853525792711784,
"eval_dim_256_dot_accuracy": 0.11464742072882159,
"eval_dim_256_euclidean_accuracy": 0.885470894462849,
"eval_dim_256_manhattan_accuracy": 0.8858258400378609,
"eval_dim_256_max_accuracy": 0.8858258400378609,
"eval_dim_384_cosine_accuracy": 0.8855892096545196,
"eval_dim_384_dot_accuracy": 0.11441079034548036,
"eval_dim_384_euclidean_accuracy": 0.8855892096545196,
"eval_dim_384_manhattan_accuracy": 0.8864174159962139,
"eval_dim_384_max_accuracy": 0.8864174159962139,
"eval_dim_64_cosine_accuracy": 0.8844060577378136,
"eval_dim_64_dot_accuracy": 0.11796024609559867,
"eval_dim_64_euclidean_accuracy": 0.8852342640795078,
"eval_dim_64_manhattan_accuracy": 0.8844060577378136,
"eval_dim_64_max_accuracy": 0.8852342640795078,
"eval_loss": 16.403297424316406,
"eval_runtime": 102.2875,
"eval_samples_per_second": 82.63,
"eval_sequential_score": 0.8844060577378136,
"eval_steps_per_second": 2.591,
"step": 950
},
{
"epoch": 6.731173748422381,
"grad_norm": 3.874021291732788,
"learning_rate": 1.992488554155135e-05,
"loss": 16.9393,
"step": 1000
},
{
"epoch": 6.731173748422381,
"eval_dim_128_cosine_accuracy": 0.8828679602460956,
"eval_dim_128_dot_accuracy": 0.11784193090392807,
"eval_dim_128_euclidean_accuracy": 0.8846426881211548,
"eval_dim_128_manhattan_accuracy": 0.8841694273544723,
"eval_dim_128_max_accuracy": 0.8846426881211548,
"eval_dim_256_cosine_accuracy": 0.8839327969711311,
"eval_dim_256_dot_accuracy": 0.1171320397539044,
"eval_dim_256_euclidean_accuracy": 0.8840511121628017,
"eval_dim_256_manhattan_accuracy": 0.8852342640795078,
"eval_dim_256_max_accuracy": 0.8852342640795078,
"eval_dim_384_cosine_accuracy": 0.8847610033128254,
"eval_dim_384_dot_accuracy": 0.11523899668717463,
"eval_dim_384_euclidean_accuracy": 0.8847610033128254,
"eval_dim_384_manhattan_accuracy": 0.8852342640795078,
"eval_dim_384_max_accuracy": 0.8852342640795078,
"eval_dim_64_cosine_accuracy": 0.8834595362044486,
"eval_dim_64_dot_accuracy": 0.11831519167061051,
"eval_dim_64_euclidean_accuracy": 0.8835778513961192,
"eval_dim_64_manhattan_accuracy": 0.8820397539044014,
"eval_dim_64_max_accuracy": 0.8835778513961192,
"eval_loss": 16.40532684326172,
"eval_runtime": 104.0121,
"eval_samples_per_second": 81.26,
"eval_sequential_score": 0.8834595362044486,
"eval_steps_per_second": 2.548,
"step": 1000
},
{
"epoch": 7.0677324358435,
"grad_norm": 4.689154148101807,
"learning_rate": 1.9893273896534936e-05,
"loss": 16.899,
"step": 1050
},
{
"epoch": 7.0677324358435,
"eval_dim_128_cosine_accuracy": 0.8826313298627544,
"eval_dim_128_dot_accuracy": 0.11867013724562234,
"eval_dim_128_euclidean_accuracy": 0.8823946994794132,
"eval_dim_128_manhattan_accuracy": 0.882158069096072,
"eval_dim_128_max_accuracy": 0.8826313298627544,
"eval_dim_256_cosine_accuracy": 0.8828679602460956,
"eval_dim_256_dot_accuracy": 0.11725035494557501,
"eval_dim_256_euclidean_accuracy": 0.8831045906294368,
"eval_dim_256_manhattan_accuracy": 0.8834595362044486,
"eval_dim_256_max_accuracy": 0.8834595362044486,
"eval_dim_384_cosine_accuracy": 0.883341221012778,
"eval_dim_384_dot_accuracy": 0.11665877898722196,
"eval_dim_384_euclidean_accuracy": 0.883341221012778,
"eval_dim_384_manhattan_accuracy": 0.8839327969711311,
"eval_dim_384_max_accuracy": 0.8839327969711311,
"eval_dim_64_cosine_accuracy": 0.88180312352106,
"eval_dim_64_dot_accuracy": 0.11890676762896356,
"eval_dim_64_euclidean_accuracy": 0.8828679602460956,
"eval_dim_64_manhattan_accuracy": 0.8820397539044014,
"eval_dim_64_max_accuracy": 0.8828679602460956,
"eval_loss": 16.416202545166016,
"eval_runtime": 104.6249,
"eval_samples_per_second": 80.784,
"eval_sequential_score": 0.88180312352106,
"eval_steps_per_second": 2.533,
"step": 1050
},
{
"epoch": 7.40429112326462,
"grad_norm": 3.6406683921813965,
"learning_rate": 1.9856159103477085e-05,
"loss": 16.9112,
"step": 1100
},
{
"epoch": 7.40429112326462,
"eval_dim_128_cosine_accuracy": 0.8828679602460956,
"eval_dim_128_dot_accuracy": 0.11878845243729295,
"eval_dim_128_euclidean_accuracy": 0.8828679602460956,
"eval_dim_128_manhattan_accuracy": 0.8828679602460956,
"eval_dim_128_max_accuracy": 0.8828679602460956,
"eval_dim_256_cosine_accuracy": 0.8834595362044486,
"eval_dim_256_dot_accuracy": 0.11618551822053952,
"eval_dim_256_euclidean_accuracy": 0.8826313298627544,
"eval_dim_256_manhattan_accuracy": 0.8840511121628017,
"eval_dim_256_max_accuracy": 0.8840511121628017,
"eval_dim_384_cosine_accuracy": 0.883341221012778,
"eval_dim_384_dot_accuracy": 0.11665877898722196,
"eval_dim_384_euclidean_accuracy": 0.883341221012778,
"eval_dim_384_manhattan_accuracy": 0.884287742546143,
"eval_dim_384_max_accuracy": 0.884287742546143,
"eval_dim_64_cosine_accuracy": 0.8820397539044014,
"eval_dim_64_dot_accuracy": 0.11914339801230478,
"eval_dim_64_euclidean_accuracy": 0.8831045906294368,
"eval_dim_64_manhattan_accuracy": 0.8826313298627544,
"eval_dim_64_max_accuracy": 0.8831045906294368,
"eval_loss": 16.405092239379883,
"eval_runtime": 101.4605,
"eval_samples_per_second": 83.303,
"eval_sequential_score": 0.8820397539044014,
"eval_steps_per_second": 2.612,
"step": 1100
},
{
"epoch": 7.740849810685738,
"grad_norm": 4.141761302947998,
"learning_rate": 1.9813561807535597e-05,
"loss": 16.8508,
"step": 1150
},
{
"epoch": 7.740849810685738,
"eval_dim_128_cosine_accuracy": 0.882158069096072,
"eval_dim_128_dot_accuracy": 0.11878845243729295,
"eval_dim_128_euclidean_accuracy": 0.8825130146710838,
"eval_dim_128_manhattan_accuracy": 0.8838144817794605,
"eval_dim_128_max_accuracy": 0.8838144817794605,
"eval_dim_256_cosine_accuracy": 0.8825130146710838,
"eval_dim_256_dot_accuracy": 0.11748698532891623,
"eval_dim_256_euclidean_accuracy": 0.8831045906294368,
"eval_dim_256_manhattan_accuracy": 0.8835778513961192,
"eval_dim_256_max_accuracy": 0.8835778513961192,
"eval_dim_384_cosine_accuracy": 0.8829862754377662,
"eval_dim_384_dot_accuracy": 0.11701372456223379,
"eval_dim_384_euclidean_accuracy": 0.8829862754377662,
"eval_dim_384_manhattan_accuracy": 0.883341221012778,
"eval_dim_384_max_accuracy": 0.883341221012778,
"eval_dim_64_cosine_accuracy": 0.8820397539044014,
"eval_dim_64_dot_accuracy": 0.12115475627070516,
"eval_dim_64_euclidean_accuracy": 0.882158069096072,
"eval_dim_64_manhattan_accuracy": 0.882749645054425,
"eval_dim_64_max_accuracy": 0.882749645054425,
"eval_loss": 16.40436363220215,
"eval_runtime": 102.9818,
"eval_samples_per_second": 82.073,
"eval_sequential_score": 0.8820397539044014,
"eval_steps_per_second": 2.573,
"step": 1150
},
{
"epoch": 8.077408498106857,
"grad_norm": 3.7137351036071777,
"learning_rate": 1.9765505703518494e-05,
"loss": 16.8104,
"step": 1200
},
{
"epoch": 8.077408498106857,
"eval_dim_128_cosine_accuracy": 0.8815664931377188,
"eval_dim_128_dot_accuracy": 0.119380028395646,
"eval_dim_128_euclidean_accuracy": 0.8813298627543776,
"eval_dim_128_manhattan_accuracy": 0.8820397539044014,
"eval_dim_128_max_accuracy": 0.8820397539044014,
"eval_dim_256_cosine_accuracy": 0.8815664931377188,
"eval_dim_256_dot_accuracy": 0.11796024609559867,
"eval_dim_256_euclidean_accuracy": 0.8808566019876952,
"eval_dim_256_manhattan_accuracy": 0.8807382867960246,
"eval_dim_256_max_accuracy": 0.8815664931377188,
"eval_dim_384_cosine_accuracy": 0.8814481779460482,
"eval_dim_384_dot_accuracy": 0.11855182205395173,
"eval_dim_384_euclidean_accuracy": 0.8814481779460482,
"eval_dim_384_manhattan_accuracy": 0.880619971604354,
"eval_dim_384_max_accuracy": 0.8814481779460482,
"eval_dim_64_cosine_accuracy": 0.8816848083293894,
"eval_dim_64_dot_accuracy": 0.12174633222905822,
"eval_dim_64_euclidean_accuracy": 0.880619971604354,
"eval_dim_64_manhattan_accuracy": 0.8809749171793658,
"eval_dim_64_max_accuracy": 0.8816848083293894,
"eval_loss": 16.40627670288086,
"eval_runtime": 104.9051,
"eval_samples_per_second": 80.568,
"eval_sequential_score": 0.8816848083293894,
"eval_steps_per_second": 2.526,
"step": 1200
},
{
"epoch": 8.413967185527977,
"grad_norm": 3.3535964488983154,
"learning_rate": 1.9712017522703764e-05,
"loss": 16.8212,
"step": 1250
},
{
"epoch": 8.413967185527977,
"eval_dim_128_cosine_accuracy": 0.8834595362044486,
"eval_dim_128_dot_accuracy": 0.11796024609559867,
"eval_dim_128_euclidean_accuracy": 0.882749645054425,
"eval_dim_128_manhattan_accuracy": 0.8825130146710838,
"eval_dim_128_max_accuracy": 0.8834595362044486,
"eval_dim_256_cosine_accuracy": 0.882158069096072,
"eval_dim_256_dot_accuracy": 0.11748698532891623,
"eval_dim_256_euclidean_accuracy": 0.8823946994794132,
"eval_dim_256_manhattan_accuracy": 0.8819214387127308,
"eval_dim_256_max_accuracy": 0.8823946994794132,
"eval_dim_384_cosine_accuracy": 0.882158069096072,
"eval_dim_384_dot_accuracy": 0.11784193090392807,
"eval_dim_384_euclidean_accuracy": 0.882158069096072,
"eval_dim_384_manhattan_accuracy": 0.882749645054425,
"eval_dim_384_max_accuracy": 0.882749645054425,
"eval_dim_64_cosine_accuracy": 0.8820397539044014,
"eval_dim_64_dot_accuracy": 0.12091812588736393,
"eval_dim_64_euclidean_accuracy": 0.8819214387127308,
"eval_dim_64_manhattan_accuracy": 0.8815664931377188,
"eval_dim_64_max_accuracy": 0.8820397539044014,
"eval_loss": 16.40399169921875,
"eval_runtime": 103.0829,
"eval_samples_per_second": 81.992,
"eval_sequential_score": 0.8820397539044014,
"eval_steps_per_second": 2.571,
"step": 1250
},
{
"epoch": 8.750525872949096,
"grad_norm": 4.203086853027344,
"learning_rate": 1.9653127017970035e-05,
"loss": 16.7743,
"step": 1300
},
{
"epoch": 8.750525872949096,
"eval_dim_128_cosine_accuracy": 0.882158069096072,
"eval_dim_128_dot_accuracy": 0.12020823473734027,
"eval_dim_128_euclidean_accuracy": 0.8815664931377188,
"eval_dim_128_manhattan_accuracy": 0.8814481779460482,
"eval_dim_128_max_accuracy": 0.882158069096072,
"eval_dim_256_cosine_accuracy": 0.8823946994794132,
"eval_dim_256_dot_accuracy": 0.11878845243729295,
"eval_dim_256_euclidean_accuracy": 0.8819214387127308,
"eval_dim_256_manhattan_accuracy": 0.8816848083293894,
"eval_dim_256_max_accuracy": 0.8823946994794132,
"eval_dim_384_cosine_accuracy": 0.8816848083293894,
"eval_dim_384_dot_accuracy": 0.11831519167061051,
"eval_dim_384_euclidean_accuracy": 0.8816848083293894,
"eval_dim_384_manhattan_accuracy": 0.882158069096072,
"eval_dim_384_max_accuracy": 0.882158069096072,
"eval_dim_64_cosine_accuracy": 0.8809749171793658,
"eval_dim_64_dot_accuracy": 0.121509701845717,
"eval_dim_64_euclidean_accuracy": 0.8807382867960246,
"eval_dim_64_manhattan_accuracy": 0.881211547562707,
"eval_dim_64_max_accuracy": 0.881211547562707,
"eval_loss": 16.39342498779297,
"eval_runtime": 102.6649,
"eval_samples_per_second": 82.326,
"eval_sequential_score": 0.8809749171793658,
"eval_steps_per_second": 2.581,
"step": 1300
},
{
"epoch": 9.087084560370215,
"grad_norm": 3.313908576965332,
"learning_rate": 1.9588866947246498e-05,
"loss": 16.7383,
"step": 1350
},
{
"epoch": 9.087084560370215,
"eval_dim_128_cosine_accuracy": 0.8809749171793658,
"eval_dim_128_dot_accuracy": 0.12068149550402271,
"eval_dim_128_euclidean_accuracy": 0.8808566019876952,
"eval_dim_128_manhattan_accuracy": 0.8814481779460482,
"eval_dim_128_max_accuracy": 0.8814481779460482,
"eval_dim_256_cosine_accuracy": 0.8820397539044014,
"eval_dim_256_dot_accuracy": 0.11831519167061051,
"eval_dim_256_euclidean_accuracy": 0.8810932323710364,
"eval_dim_256_manhattan_accuracy": 0.881211547562707,
"eval_dim_256_max_accuracy": 0.8820397539044014,
"eval_dim_384_cosine_accuracy": 0.8807382867960246,
"eval_dim_384_dot_accuracy": 0.11926171320397538,
"eval_dim_384_euclidean_accuracy": 0.8807382867960246,
"eval_dim_384_manhattan_accuracy": 0.8803833412210128,
"eval_dim_384_max_accuracy": 0.8807382867960246,
"eval_dim_64_cosine_accuracy": 0.880028395646001,
"eval_dim_64_dot_accuracy": 0.12245622337908188,
"eval_dim_64_euclidean_accuracy": 0.8807382867960246,
"eval_dim_64_manhattan_accuracy": 0.8816848083293894,
"eval_dim_64_max_accuracy": 0.8816848083293894,
"eval_loss": 16.39626121520996,
"eval_runtime": 105.1167,
"eval_samples_per_second": 80.406,
"eval_sequential_score": 0.880028395646001,
"eval_steps_per_second": 2.521,
"step": 1350
},
{
"epoch": 9.423643247791334,
"grad_norm": 6.617325305938721,
"learning_rate": 1.9519273055291266e-05,
"loss": 16.743,
"step": 1400
},
{
"epoch": 9.423643247791334,
"eval_dim_128_cosine_accuracy": 0.8819214387127308,
"eval_dim_128_dot_accuracy": 0.119380028395646,
"eval_dim_128_euclidean_accuracy": 0.8826313298627544,
"eval_dim_128_manhattan_accuracy": 0.8815664931377188,
"eval_dim_128_max_accuracy": 0.8826313298627544,
"eval_dim_256_cosine_accuracy": 0.882158069096072,
"eval_dim_256_dot_accuracy": 0.11784193090392807,
"eval_dim_256_euclidean_accuracy": 0.882158069096072,
"eval_dim_256_manhattan_accuracy": 0.8816848083293894,
"eval_dim_256_max_accuracy": 0.882158069096072,
"eval_dim_384_cosine_accuracy": 0.8819214387127308,
"eval_dim_384_dot_accuracy": 0.11807856128726929,
"eval_dim_384_euclidean_accuracy": 0.8819214387127308,
"eval_dim_384_manhattan_accuracy": 0.8826313298627544,
"eval_dim_384_max_accuracy": 0.8826313298627544,
"eval_dim_64_cosine_accuracy": 0.8797917652626597,
"eval_dim_64_dot_accuracy": 0.12091812588736393,
"eval_dim_64_euclidean_accuracy": 0.8810932323710364,
"eval_dim_64_manhattan_accuracy": 0.8807382867960246,
"eval_dim_64_max_accuracy": 0.8810932323710364,
"eval_loss": 16.406700134277344,
"eval_runtime": 101.1577,
"eval_samples_per_second": 83.553,
"eval_sequential_score": 0.8797917652626597,
"eval_steps_per_second": 2.62,
"step": 1400
},
{
"epoch": 9.760201935212454,
"grad_norm": 4.450948715209961,
"learning_rate": 1.944438405380829e-05,
"loss": 16.7047,
"step": 1450
},
{
"epoch": 9.760201935212454,
"eval_dim_128_cosine_accuracy": 0.8803833412210128,
"eval_dim_128_dot_accuracy": 0.12056318031235211,
"eval_dim_128_euclidean_accuracy": 0.8810932323710364,
"eval_dim_128_manhattan_accuracy": 0.8802650260293422,
"eval_dim_128_max_accuracy": 0.8810932323710364,
"eval_dim_256_cosine_accuracy": 0.8809749171793658,
"eval_dim_256_dot_accuracy": 0.11914339801230478,
"eval_dim_256_euclidean_accuracy": 0.8813298627543776,
"eval_dim_256_manhattan_accuracy": 0.881211547562707,
"eval_dim_256_max_accuracy": 0.8813298627543776,
"eval_dim_384_cosine_accuracy": 0.8809749171793658,
"eval_dim_384_dot_accuracy": 0.11902508282063418,
"eval_dim_384_euclidean_accuracy": 0.8809749171793658,
"eval_dim_384_manhattan_accuracy": 0.8820397539044014,
"eval_dim_384_max_accuracy": 0.8820397539044014,
"eval_dim_64_cosine_accuracy": 0.8796734500709891,
"eval_dim_64_dot_accuracy": 0.12245622337908188,
"eval_dim_64_euclidean_accuracy": 0.880028395646001,
"eval_dim_64_manhattan_accuracy": 0.8803833412210128,
"eval_dim_64_max_accuracy": 0.8803833412210128,
"eval_loss": 16.39591407775879,
"eval_runtime": 102.018,
"eval_samples_per_second": 82.848,
"eval_sequential_score": 0.8796734500709891,
"eval_steps_per_second": 2.598,
"step": 1450
},
{
"epoch": 10.096760622633571,
"grad_norm": 6.13853120803833,
"learning_rate": 1.9364241599913923e-05,
"loss": 16.6782,
"step": 1500
},
{
"epoch": 10.096760622633571,
"eval_dim_128_cosine_accuracy": 0.8788452437292948,
"eval_dim_128_dot_accuracy": 0.1228111689540937,
"eval_dim_128_euclidean_accuracy": 0.8796734500709891,
"eval_dim_128_manhattan_accuracy": 0.879081874112636,
"eval_dim_128_max_accuracy": 0.8796734500709891,
"eval_dim_256_cosine_accuracy": 0.879081874112636,
"eval_dim_256_dot_accuracy": 0.12091812588736393,
"eval_dim_256_euclidean_accuracy": 0.8797917652626597,
"eval_dim_256_manhattan_accuracy": 0.8803833412210128,
"eval_dim_256_max_accuracy": 0.8803833412210128,
"eval_dim_384_cosine_accuracy": 0.8795551348793185,
"eval_dim_384_dot_accuracy": 0.12044486512068149,
"eval_dim_384_euclidean_accuracy": 0.8795551348793185,
"eval_dim_384_manhattan_accuracy": 0.8799100804543304,
"eval_dim_384_max_accuracy": 0.8799100804543304,
"eval_dim_64_cosine_accuracy": 0.8783719829626124,
"eval_dim_64_dot_accuracy": 0.12363937529578797,
"eval_dim_64_euclidean_accuracy": 0.8795551348793185,
"eval_dim_64_manhattan_accuracy": 0.8781353525792712,
"eval_dim_64_max_accuracy": 0.8795551348793185,
"eval_loss": 16.398588180541992,
"eval_runtime": 103.6429,
"eval_samples_per_second": 81.549,
"eval_sequential_score": 0.8783719829626124,
"eval_steps_per_second": 2.557,
"step": 1500
},
{
"epoch": 10.43331931005469,
"grad_norm": 4.757913112640381,
"learning_rate": 1.9278890272965097e-05,
"loss": 16.6708,
"step": 1550
},
{
"epoch": 10.43331931005469,
"eval_dim_128_cosine_accuracy": 0.8794368196876479,
"eval_dim_128_dot_accuracy": 0.121509701845717,
"eval_dim_128_euclidean_accuracy": 0.8795551348793185,
"eval_dim_128_manhattan_accuracy": 0.879081874112636,
"eval_dim_128_max_accuracy": 0.8795551348793185,
"eval_dim_256_cosine_accuracy": 0.8792001893043067,
"eval_dim_256_dot_accuracy": 0.11961665877898722,
"eval_dim_256_euclidean_accuracy": 0.8794368196876479,
"eval_dim_256_manhattan_accuracy": 0.8795551348793185,
"eval_dim_256_max_accuracy": 0.8795551348793185,
"eval_dim_384_cosine_accuracy": 0.8796734500709891,
"eval_dim_384_dot_accuracy": 0.12032654992901089,
"eval_dim_384_euclidean_accuracy": 0.8796734500709891,
"eval_dim_384_manhattan_accuracy": 0.8809749171793658,
"eval_dim_384_max_accuracy": 0.8809749171793658,
"eval_dim_64_cosine_accuracy": 0.879081874112636,
"eval_dim_64_dot_accuracy": 0.12245622337908188,
"eval_dim_64_euclidean_accuracy": 0.8796734500709891,
"eval_dim_64_manhattan_accuracy": 0.8802650260293422,
"eval_dim_64_max_accuracy": 0.8802650260293422,
"eval_loss": 16.401565551757812,
"eval_runtime": 103.0896,
"eval_samples_per_second": 81.987,
"eval_sequential_score": 0.879081874112636,
"eval_steps_per_second": 2.571,
"step": 1550
},
{
"epoch": 10.76987799747581,
"grad_norm": 5.452834129333496,
"learning_rate": 1.9188377549761962e-05,
"loss": 16.6485,
"step": 1600
},
{
"epoch": 10.76987799747581,
"eval_dim_128_cosine_accuracy": 0.8789635589209654,
"eval_dim_128_dot_accuracy": 0.1216280170373876,
"eval_dim_128_euclidean_accuracy": 0.8789635589209654,
"eval_dim_128_manhattan_accuracy": 0.8781353525792712,
"eval_dim_128_max_accuracy": 0.8789635589209654,
"eval_dim_256_cosine_accuracy": 0.8801467108376716,
"eval_dim_256_dot_accuracy": 0.11985328916232844,
"eval_dim_256_euclidean_accuracy": 0.8796734500709891,
"eval_dim_256_manhattan_accuracy": 0.8794368196876479,
"eval_dim_256_max_accuracy": 0.8801467108376716,
"eval_dim_384_cosine_accuracy": 0.879081874112636,
"eval_dim_384_dot_accuracy": 0.12091812588736393,
"eval_dim_384_euclidean_accuracy": 0.879081874112636,
"eval_dim_384_manhattan_accuracy": 0.8794368196876479,
"eval_dim_384_max_accuracy": 0.8794368196876479,
"eval_dim_64_cosine_accuracy": 0.8781353525792712,
"eval_dim_64_dot_accuracy": 0.12304779933743493,
"eval_dim_64_euclidean_accuracy": 0.8783719829626124,
"eval_dim_64_manhattan_accuracy": 0.879081874112636,
"eval_dim_64_max_accuracy": 0.879081874112636,
"eval_loss": 16.396345138549805,
"eval_runtime": 103.471,
"eval_samples_per_second": 81.685,
"eval_sequential_score": 0.8781353525792712,
"eval_steps_per_second": 2.561,
"step": 1600
},
{
"epoch": 11.106436684896929,
"grad_norm": 3.5591487884521484,
"learning_rate": 1.9092753778138885e-05,
"loss": 16.6205,
"step": 1650
},
{
"epoch": 11.106436684896929,
"eval_dim_128_cosine_accuracy": 0.8778987221959299,
"eval_dim_128_dot_accuracy": 0.12316611452910553,
"eval_dim_128_euclidean_accuracy": 0.8781353525792712,
"eval_dim_128_manhattan_accuracy": 0.8780170373876006,
"eval_dim_128_max_accuracy": 0.8781353525792712,
"eval_dim_256_cosine_accuracy": 0.8787269285376242,
"eval_dim_256_dot_accuracy": 0.121509701845717,
"eval_dim_256_euclidean_accuracy": 0.8787269285376242,
"eval_dim_256_manhattan_accuracy": 0.8793185044959773,
"eval_dim_256_max_accuracy": 0.8793185044959773,
"eval_dim_384_cosine_accuracy": 0.8793185044959773,
"eval_dim_384_dot_accuracy": 0.12068149550402271,
"eval_dim_384_euclidean_accuracy": 0.8793185044959773,
"eval_dim_384_manhattan_accuracy": 0.8801467108376716,
"eval_dim_384_max_accuracy": 0.8801467108376716,
"eval_dim_64_cosine_accuracy": 0.8770705158542357,
"eval_dim_64_dot_accuracy": 0.12541410317084714,
"eval_dim_64_euclidean_accuracy": 0.8771888310459063,
"eval_dim_64_manhattan_accuracy": 0.8776620918125887,
"eval_dim_64_max_accuracy": 0.8776620918125887,
"eval_loss": 16.401174545288086,
"eval_runtime": 102.9169,
"eval_samples_per_second": 82.124,
"eval_sequential_score": 0.8770705158542357,
"eval_steps_per_second": 2.575,
"step": 1650
},
{
"epoch": 11.442995372318048,
"grad_norm": 3.712305784225464,
"learning_rate": 1.8992072148958368e-05,
"loss": 16.6095,
"step": 1700
},
{
"epoch": 11.442995372318048,
"eval_dim_128_cosine_accuracy": 0.8786086133459536,
"eval_dim_128_dot_accuracy": 0.12233790818741126,
"eval_dim_128_euclidean_accuracy": 0.878490298154283,
"eval_dim_128_manhattan_accuracy": 0.8786086133459536,
"eval_dim_128_max_accuracy": 0.8786086133459536,
"eval_dim_256_cosine_accuracy": 0.8789635589209654,
"eval_dim_256_dot_accuracy": 0.1216280170373876,
"eval_dim_256_euclidean_accuracy": 0.879081874112636,
"eval_dim_256_manhattan_accuracy": 0.8780170373876006,
"eval_dim_256_max_accuracy": 0.879081874112636,
"eval_dim_384_cosine_accuracy": 0.8794368196876479,
"eval_dim_384_dot_accuracy": 0.12056318031235211,
"eval_dim_384_euclidean_accuracy": 0.8794368196876479,
"eval_dim_384_manhattan_accuracy": 0.879081874112636,
"eval_dim_384_max_accuracy": 0.8794368196876479,
"eval_dim_64_cosine_accuracy": 0.879081874112636,
"eval_dim_64_dot_accuracy": 0.12541410317084714,
"eval_dim_64_euclidean_accuracy": 0.8777804070042593,
"eval_dim_64_manhattan_accuracy": 0.8788452437292948,
"eval_dim_64_max_accuracy": 0.879081874112636,
"eval_loss": 16.413122177124023,
"eval_runtime": 103.5898,
"eval_samples_per_second": 81.591,
"eval_sequential_score": 0.879081874112636,
"eval_steps_per_second": 2.558,
"step": 1700
},
{
"epoch": 11.779554059739167,
"grad_norm": 4.9205145835876465,
"learning_rate": 1.888638866652356e-05,
"loss": 16.5891,
"step": 1750
},
{
"epoch": 11.779554059739167,
"eval_dim_128_cosine_accuracy": 0.8807382867960246,
"eval_dim_128_dot_accuracy": 0.1194983435873166,
"eval_dim_128_euclidean_accuracy": 0.8805016564126834,
"eval_dim_128_manhattan_accuracy": 0.8792001893043067,
"eval_dim_128_max_accuracy": 0.8807382867960246,
"eval_dim_256_cosine_accuracy": 0.8805016564126834,
"eval_dim_256_dot_accuracy": 0.11902508282063418,
"eval_dim_256_euclidean_accuracy": 0.8797917652626597,
"eval_dim_256_manhattan_accuracy": 0.8795551348793185,
"eval_dim_256_max_accuracy": 0.8805016564126834,
"eval_dim_384_cosine_accuracy": 0.8809749171793658,
"eval_dim_384_dot_accuracy": 0.11902508282063418,
"eval_dim_384_euclidean_accuracy": 0.8809749171793658,
"eval_dim_384_manhattan_accuracy": 0.880028395646001,
"eval_dim_384_max_accuracy": 0.8809749171793658,
"eval_dim_64_cosine_accuracy": 0.8801467108376716,
"eval_dim_64_dot_accuracy": 0.12292948414576432,
"eval_dim_64_euclidean_accuracy": 0.879081874112636,
"eval_dim_64_manhattan_accuracy": 0.879081874112636,
"eval_dim_64_max_accuracy": 0.8801467108376716,
"eval_loss": 16.40700340270996,
"eval_runtime": 103.5887,
"eval_samples_per_second": 81.592,
"eval_sequential_score": 0.8801467108376716,
"eval_steps_per_second": 2.558,
"step": 1750
},
{
"epoch": 12.116112747160287,
"grad_norm": 4.849546909332275,
"learning_rate": 1.8775762117425777e-05,
"loss": 16.5619,
"step": 1800
},
{
"epoch": 12.116112747160287,
"eval_dim_128_cosine_accuracy": 0.8794368196876479,
"eval_dim_128_dot_accuracy": 0.121509701845717,
"eval_dim_128_euclidean_accuracy": 0.8792001893043067,
"eval_dim_128_manhattan_accuracy": 0.8789635589209654,
"eval_dim_128_max_accuracy": 0.8794368196876479,
"eval_dim_256_cosine_accuracy": 0.880028395646001,
"eval_dim_256_dot_accuracy": 0.11973497397065783,
"eval_dim_256_euclidean_accuracy": 0.8799100804543304,
"eval_dim_256_manhattan_accuracy": 0.8792001893043067,
"eval_dim_256_max_accuracy": 0.880028395646001,
"eval_dim_384_cosine_accuracy": 0.8796734500709891,
"eval_dim_384_dot_accuracy": 0.12032654992901089,
"eval_dim_384_euclidean_accuracy": 0.8796734500709891,
"eval_dim_384_manhattan_accuracy": 0.8796734500709891,
"eval_dim_384_max_accuracy": 0.8796734500709891,
"eval_dim_64_cosine_accuracy": 0.8780170373876006,
"eval_dim_64_dot_accuracy": 0.12470421202082348,
"eval_dim_64_euclidean_accuracy": 0.8797917652626597,
"eval_dim_64_manhattan_accuracy": 0.8786086133459536,
"eval_dim_64_max_accuracy": 0.8797917652626597,
"eval_loss": 16.396265029907227,
"eval_runtime": 102.3506,
"eval_samples_per_second": 82.579,
"eval_sequential_score": 0.8780170373876006,
"eval_steps_per_second": 2.589,
"step": 1800
},
{
"epoch": 12.452671434581404,
"grad_norm": 4.944924831390381,
"learning_rate": 1.866025403784439e-05,
"loss": 16.5467,
"step": 1850
},
{
"epoch": 12.452671434581404,
"eval_dim_128_cosine_accuracy": 0.8795551348793185,
"eval_dim_128_dot_accuracy": 0.12316611452910553,
"eval_dim_128_euclidean_accuracy": 0.8787269285376242,
"eval_dim_128_manhattan_accuracy": 0.8794368196876479,
"eval_dim_128_max_accuracy": 0.8795551348793185,
"eval_dim_256_cosine_accuracy": 0.880619971604354,
"eval_dim_256_dot_accuracy": 0.12068149550402271,
"eval_dim_256_euclidean_accuracy": 0.8794368196876479,
"eval_dim_256_manhattan_accuracy": 0.8801467108376716,
"eval_dim_256_max_accuracy": 0.880619971604354,
"eval_dim_384_cosine_accuracy": 0.8803833412210128,
"eval_dim_384_dot_accuracy": 0.11961665877898722,
"eval_dim_384_euclidean_accuracy": 0.8803833412210128,
"eval_dim_384_manhattan_accuracy": 0.8807382867960246,
"eval_dim_384_max_accuracy": 0.8807382867960246,
"eval_dim_64_cosine_accuracy": 0.8789635589209654,
"eval_dim_64_dot_accuracy": 0.12470421202082348,
"eval_dim_64_euclidean_accuracy": 0.8781353525792712,
"eval_dim_64_manhattan_accuracy": 0.8796734500709891,
"eval_dim_64_max_accuracy": 0.8796734500709891,
"eval_loss": 16.399133682250977,
"eval_runtime": 104.1432,
"eval_samples_per_second": 81.157,
"eval_sequential_score": 0.8789635589209654,
"eval_steps_per_second": 2.545,
"step": 1850
},
{
"epoch": 12.789230122002524,
"grad_norm": 6.032313346862793,
"learning_rate": 1.853992867931721e-05,
"loss": 16.5398,
"step": 1900
},
{
"epoch": 12.789230122002524,
"eval_dim_128_cosine_accuracy": 0.8792001893043067,
"eval_dim_128_dot_accuracy": 0.12139138665404638,
"eval_dim_128_euclidean_accuracy": 0.8797917652626597,
"eval_dim_128_manhattan_accuracy": 0.8787269285376242,
"eval_dim_128_max_accuracy": 0.8797917652626597,
"eval_dim_256_cosine_accuracy": 0.8797917652626597,
"eval_dim_256_dot_accuracy": 0.11973497397065783,
"eval_dim_256_euclidean_accuracy": 0.8797917652626597,
"eval_dim_256_manhattan_accuracy": 0.8792001893043067,
"eval_dim_256_max_accuracy": 0.8797917652626597,
"eval_dim_384_cosine_accuracy": 0.8801467108376716,
"eval_dim_384_dot_accuracy": 0.11985328916232844,
"eval_dim_384_euclidean_accuracy": 0.8801467108376716,
"eval_dim_384_manhattan_accuracy": 0.8805016564126834,
"eval_dim_384_max_accuracy": 0.8805016564126834,
"eval_dim_64_cosine_accuracy": 0.8788452437292948,
"eval_dim_64_dot_accuracy": 0.12423095125414103,
"eval_dim_64_euclidean_accuracy": 0.8793185044959773,
"eval_dim_64_manhattan_accuracy": 0.8776620918125887,
"eval_dim_64_max_accuracy": 0.8793185044959773,
"eval_loss": 16.397045135498047,
"eval_runtime": 103.5361,
"eval_samples_per_second": 81.633,
"eval_sequential_score": 0.8788452437292948,
"eval_steps_per_second": 2.559,
"step": 1900
},
{
"epoch": 13.125788809423643,
"grad_norm": 4.27797269821167,
"learning_rate": 1.8414852973000503e-05,
"loss": 16.5047,
"step": 1950
},
{
"epoch": 13.125788809423643,
"eval_dim_128_cosine_accuracy": 0.8795551348793185,
"eval_dim_128_dot_accuracy": 0.1216280170373876,
"eval_dim_128_euclidean_accuracy": 0.8796734500709891,
"eval_dim_128_manhattan_accuracy": 0.8797917652626597,
"eval_dim_128_max_accuracy": 0.8797917652626597,
"eval_dim_256_cosine_accuracy": 0.8803833412210128,
"eval_dim_256_dot_accuracy": 0.12068149550402271,
"eval_dim_256_euclidean_accuracy": 0.8797917652626597,
"eval_dim_256_manhattan_accuracy": 0.8797917652626597,
"eval_dim_256_max_accuracy": 0.8803833412210128,
"eval_dim_384_cosine_accuracy": 0.8803833412210128,
"eval_dim_384_dot_accuracy": 0.11961665877898722,
"eval_dim_384_euclidean_accuracy": 0.8803833412210128,
"eval_dim_384_manhattan_accuracy": 0.8805016564126834,
"eval_dim_384_max_accuracy": 0.8805016564126834,
"eval_dim_64_cosine_accuracy": 0.8788452437292948,
"eval_dim_64_dot_accuracy": 0.12588736393752958,
"eval_dim_64_euclidean_accuracy": 0.8793185044959773,
"eval_dim_64_manhattan_accuracy": 0.8802650260293422,
"eval_dim_64_max_accuracy": 0.8802650260293422,
"eval_loss": 16.396381378173828,
"eval_runtime": 102.672,
"eval_samples_per_second": 82.32,
"eval_sequential_score": 0.8788452437292948,
"eval_steps_per_second": 2.581,
"step": 1950
},
{
"epoch": 13.462347496844762,
"grad_norm": 4.051229953765869,
"learning_rate": 1.8285096492438424e-05,
"loss": 16.4985,
"step": 2000
},
{
"epoch": 13.462347496844762,
"eval_dim_128_cosine_accuracy": 0.8793185044959773,
"eval_dim_128_dot_accuracy": 0.12127307146237577,
"eval_dim_128_euclidean_accuracy": 0.8803833412210128,
"eval_dim_128_manhattan_accuracy": 0.8796734500709891,
"eval_dim_128_max_accuracy": 0.8803833412210128,
"eval_dim_256_cosine_accuracy": 0.8797917652626597,
"eval_dim_256_dot_accuracy": 0.12020823473734027,
"eval_dim_256_euclidean_accuracy": 0.8796734500709891,
"eval_dim_256_manhattan_accuracy": 0.8797917652626597,
"eval_dim_256_max_accuracy": 0.8797917652626597,
"eval_dim_384_cosine_accuracy": 0.8807382867960246,
"eval_dim_384_dot_accuracy": 0.11926171320397538,
"eval_dim_384_euclidean_accuracy": 0.8807382867960246,
"eval_dim_384_manhattan_accuracy": 0.8810932323710364,
"eval_dim_384_max_accuracy": 0.8810932323710364,
"eval_dim_64_cosine_accuracy": 0.8789635589209654,
"eval_dim_64_dot_accuracy": 0.12316611452910553,
"eval_dim_64_euclidean_accuracy": 0.8787269285376242,
"eval_dim_64_manhattan_accuracy": 0.879081874112636,
"eval_dim_64_max_accuracy": 0.879081874112636,
"eval_loss": 16.4024600982666,
"eval_runtime": 104.2185,
"eval_samples_per_second": 81.099,
"eval_sequential_score": 0.8789635589209654,
"eval_steps_per_second": 2.543,
"step": 2000
},
{
"epoch": 13.798906184265881,
"grad_norm": 4.3837666511535645,
"learning_rate": 1.8150731414862623e-05,
"loss": 16.4852,
"step": 2050
},
{
"epoch": 13.798906184265881,
"eval_dim_128_cosine_accuracy": 0.8801467108376716,
"eval_dim_128_dot_accuracy": 0.12032654992901089,
"eval_dim_128_euclidean_accuracy": 0.8805016564126834,
"eval_dim_128_manhattan_accuracy": 0.8797917652626597,
"eval_dim_128_max_accuracy": 0.8805016564126834,
"eval_dim_256_cosine_accuracy": 0.8809749171793658,
"eval_dim_256_dot_accuracy": 0.119380028395646,
"eval_dim_256_euclidean_accuracy": 0.8814481779460482,
"eval_dim_256_manhattan_accuracy": 0.8807382867960246,
"eval_dim_256_max_accuracy": 0.8814481779460482,
"eval_dim_384_cosine_accuracy": 0.880028395646001,
"eval_dim_384_dot_accuracy": 0.11997160435399905,
"eval_dim_384_euclidean_accuracy": 0.880028395646001,
"eval_dim_384_manhattan_accuracy": 0.8794368196876479,
"eval_dim_384_max_accuracy": 0.880028395646001,
"eval_dim_64_cosine_accuracy": 0.8793185044959773,
"eval_dim_64_dot_accuracy": 0.12352106010411737,
"eval_dim_64_euclidean_accuracy": 0.8801467108376716,
"eval_dim_64_manhattan_accuracy": 0.8796734500709891,
"eval_dim_64_max_accuracy": 0.8801467108376716,
"eval_loss": 16.410737991333008,
"eval_runtime": 102.5333,
"eval_samples_per_second": 82.432,
"eval_sequential_score": 0.8793185044959773,
"eval_steps_per_second": 2.585,
"step": 2050
},
{
"epoch": 14.135464871687,
"grad_norm": 4.87747859954834,
"learning_rate": 1.8011832481043577e-05,
"loss": 16.4526,
"step": 2100
},
{
"epoch": 14.135464871687,
"eval_dim_128_cosine_accuracy": 0.8796734500709891,
"eval_dim_128_dot_accuracy": 0.12103644107903455,
"eval_dim_128_euclidean_accuracy": 0.8796734500709891,
"eval_dim_128_manhattan_accuracy": 0.8794368196876479,
"eval_dim_128_max_accuracy": 0.8796734500709891,
"eval_dim_256_cosine_accuracy": 0.8801467108376716,
"eval_dim_256_dot_accuracy": 0.12068149550402271,
"eval_dim_256_euclidean_accuracy": 0.8805016564126834,
"eval_dim_256_manhattan_accuracy": 0.8797917652626597,
"eval_dim_256_max_accuracy": 0.8805016564126834,
"eval_dim_384_cosine_accuracy": 0.8808566019876952,
"eval_dim_384_dot_accuracy": 0.11914339801230478,
"eval_dim_384_euclidean_accuracy": 0.8808566019876952,
"eval_dim_384_manhattan_accuracy": 0.8810932323710364,
"eval_dim_384_max_accuracy": 0.8810932323710364,
"eval_dim_64_cosine_accuracy": 0.8778987221959299,
"eval_dim_64_dot_accuracy": 0.12470421202082348,
"eval_dim_64_euclidean_accuracy": 0.8781353525792712,
"eval_dim_64_manhattan_accuracy": 0.879081874112636,
"eval_dim_64_max_accuracy": 0.879081874112636,
"eval_loss": 16.392879486083984,
"eval_runtime": 103.5589,
"eval_samples_per_second": 81.615,
"eval_sequential_score": 0.8778987221959299,
"eval_steps_per_second": 2.559,
"step": 2100
},
{
"epoch": 14.47202355910812,
"grad_norm": 6.463150978088379,
"learning_rate": 1.78684769537159e-05,
"loss": 16.4343,
"step": 2150
},
{
"epoch": 14.47202355910812,
"eval_dim_128_cosine_accuracy": 0.8788452437292948,
"eval_dim_128_dot_accuracy": 0.12221959299574066,
"eval_dim_128_euclidean_accuracy": 0.878490298154283,
"eval_dim_128_manhattan_accuracy": 0.8797917652626597,
"eval_dim_128_max_accuracy": 0.8797917652626597,
"eval_dim_256_cosine_accuracy": 0.879081874112636,
"eval_dim_256_dot_accuracy": 0.121509701845717,
"eval_dim_256_euclidean_accuracy": 0.8786086133459536,
"eval_dim_256_manhattan_accuracy": 0.8797917652626597,
"eval_dim_256_max_accuracy": 0.8797917652626597,
"eval_dim_384_cosine_accuracy": 0.8796734500709891,
"eval_dim_384_dot_accuracy": 0.12032654992901089,
"eval_dim_384_euclidean_accuracy": 0.8796734500709891,
"eval_dim_384_manhattan_accuracy": 0.880619971604354,
"eval_dim_384_max_accuracy": 0.880619971604354,
"eval_dim_64_cosine_accuracy": 0.8774254614292475,
"eval_dim_64_dot_accuracy": 0.1250591575958353,
"eval_dim_64_euclidean_accuracy": 0.8774254614292475,
"eval_dim_64_manhattan_accuracy": 0.8780170373876006,
"eval_dim_64_max_accuracy": 0.8780170373876006,
"eval_loss": 16.40749740600586,
"eval_runtime": 102.9532,
"eval_samples_per_second": 82.096,
"eval_sequential_score": 0.8774254614292475,
"eval_steps_per_second": 2.574,
"step": 2150
},
{
"epoch": 14.80858224652924,
"grad_norm": 4.839356422424316,
"learning_rate": 1.7720744574600865e-05,
"loss": 16.4244,
"step": 2200
},
{
"epoch": 14.80858224652924,
"eval_dim_128_cosine_accuracy": 0.8803833412210128,
"eval_dim_128_dot_accuracy": 0.11973497397065783,
"eval_dim_128_euclidean_accuracy": 0.880619971604354,
"eval_dim_128_manhattan_accuracy": 0.8809749171793658,
"eval_dim_128_max_accuracy": 0.8809749171793658,
"eval_dim_256_cosine_accuracy": 0.8819214387127308,
"eval_dim_256_dot_accuracy": 0.119380028395646,
"eval_dim_256_euclidean_accuracy": 0.8815664931377188,
"eval_dim_256_manhattan_accuracy": 0.8814481779460482,
"eval_dim_256_max_accuracy": 0.8819214387127308,
"eval_dim_384_cosine_accuracy": 0.8820397539044014,
"eval_dim_384_dot_accuracy": 0.11796024609559867,
"eval_dim_384_euclidean_accuracy": 0.8820397539044014,
"eval_dim_384_manhattan_accuracy": 0.882158069096072,
"eval_dim_384_max_accuracy": 0.882158069096072,
"eval_dim_64_cosine_accuracy": 0.8808566019876952,
"eval_dim_64_dot_accuracy": 0.12221959299574066,
"eval_dim_64_euclidean_accuracy": 0.880619971604354,
"eval_dim_64_manhattan_accuracy": 0.8786086133459536,
"eval_dim_64_max_accuracy": 0.8808566019876952,
"eval_loss": 16.402673721313477,
"eval_runtime": 103.4179,
"eval_samples_per_second": 81.727,
"eval_sequential_score": 0.8808566019876952,
"eval_steps_per_second": 2.562,
"step": 2200
},
{
"epoch": 15.145140933950358,
"grad_norm": 5.812349796295166,
"learning_rate": 1.756871752004992e-05,
"loss": 16.3947,
"step": 2250
},
{
"epoch": 15.145140933950358,
"eval_dim_128_cosine_accuracy": 0.879081874112636,
"eval_dim_128_dot_accuracy": 0.12316611452910553,
"eval_dim_128_euclidean_accuracy": 0.8786086133459536,
"eval_dim_128_manhattan_accuracy": 0.8809749171793658,
"eval_dim_128_max_accuracy": 0.8809749171793658,
"eval_dim_256_cosine_accuracy": 0.8792001893043067,
"eval_dim_256_dot_accuracy": 0.12139138665404638,
"eval_dim_256_euclidean_accuracy": 0.8801467108376716,
"eval_dim_256_manhattan_accuracy": 0.8813298627543776,
"eval_dim_256_max_accuracy": 0.8813298627543776,
"eval_dim_384_cosine_accuracy": 0.8802650260293422,
"eval_dim_384_dot_accuracy": 0.11973497397065783,
"eval_dim_384_euclidean_accuracy": 0.8802650260293422,
"eval_dim_384_manhattan_accuracy": 0.8808566019876952,
"eval_dim_384_max_accuracy": 0.8808566019876952,
"eval_dim_64_cosine_accuracy": 0.8773071462375769,
"eval_dim_64_dot_accuracy": 0.12695220066256507,
"eval_dim_64_euclidean_accuracy": 0.8768338854708945,
"eval_dim_64_manhattan_accuracy": 0.8787269285376242,
"eval_dim_64_max_accuracy": 0.8787269285376242,
"eval_loss": 16.4101619720459,
"eval_runtime": 105.1832,
"eval_samples_per_second": 80.355,
"eval_sequential_score": 0.8773071462375769,
"eval_steps_per_second": 2.519,
"step": 2250
},
{
"epoch": 15.481699621371476,
"grad_norm": 4.386394023895264,
"learning_rate": 1.7412480355334006e-05,
"loss": 16.3827,
"step": 2300
},
{
"epoch": 15.481699621371476,
"eval_dim_128_cosine_accuracy": 0.8803833412210128,
"eval_dim_128_dot_accuracy": 0.12245622337908188,
"eval_dim_128_euclidean_accuracy": 0.880619971604354,
"eval_dim_128_manhattan_accuracy": 0.88180312352106,
"eval_dim_128_max_accuracy": 0.88180312352106,
"eval_dim_256_cosine_accuracy": 0.8813298627543776,
"eval_dim_256_dot_accuracy": 0.12079981069569333,
"eval_dim_256_euclidean_accuracy": 0.8809749171793658,
"eval_dim_256_manhattan_accuracy": 0.8819214387127308,
"eval_dim_256_max_accuracy": 0.8819214387127308,
"eval_dim_384_cosine_accuracy": 0.8813298627543776,
"eval_dim_384_dot_accuracy": 0.11867013724562234,
"eval_dim_384_euclidean_accuracy": 0.8813298627543776,
"eval_dim_384_manhattan_accuracy": 0.8809749171793658,
"eval_dim_384_max_accuracy": 0.8813298627543776,
"eval_dim_64_cosine_accuracy": 0.8781353525792712,
"eval_dim_64_dot_accuracy": 0.1260056791292002,
"eval_dim_64_euclidean_accuracy": 0.8789635589209654,
"eval_dim_64_manhattan_accuracy": 0.8814481779460482,
"eval_dim_64_max_accuracy": 0.8814481779460482,
"eval_loss": 16.404207229614258,
"eval_runtime": 101.3893,
"eval_samples_per_second": 83.362,
"eval_sequential_score": 0.8781353525792712,
"eval_steps_per_second": 2.614,
"step": 2300
},
{
"epoch": 15.818258308792595,
"grad_norm": 4.8762359619140625,
"learning_rate": 1.7252119987603976e-05,
"loss": 16.3719,
"step": 2350
},
{
"epoch": 15.818258308792595,
"eval_dim_128_cosine_accuracy": 0.8801467108376716,
"eval_dim_128_dot_accuracy": 0.12032654992901089,
"eval_dim_128_euclidean_accuracy": 0.8802650260293422,
"eval_dim_128_manhattan_accuracy": 0.8802650260293422,
"eval_dim_128_max_accuracy": 0.8802650260293422,
"eval_dim_256_cosine_accuracy": 0.88180312352106,
"eval_dim_256_dot_accuracy": 0.11878845243729295,
"eval_dim_256_euclidean_accuracy": 0.8820397539044014,
"eval_dim_256_manhattan_accuracy": 0.881211547562707,
"eval_dim_256_max_accuracy": 0.8820397539044014,
"eval_dim_384_cosine_accuracy": 0.8820397539044014,
"eval_dim_384_dot_accuracy": 0.11796024609559867,
"eval_dim_384_euclidean_accuracy": 0.8820397539044014,
"eval_dim_384_manhattan_accuracy": 0.8808566019876952,
"eval_dim_384_max_accuracy": 0.8820397539044014,
"eval_dim_64_cosine_accuracy": 0.879081874112636,
"eval_dim_64_dot_accuracy": 0.12458589682915286,
"eval_dim_64_euclidean_accuracy": 0.8805016564126834,
"eval_dim_64_manhattan_accuracy": 0.8797917652626597,
"eval_dim_64_max_accuracy": 0.8805016564126834,
"eval_loss": 16.40033721923828,
"eval_runtime": 104.1264,
"eval_samples_per_second": 81.171,
"eval_sequential_score": 0.879081874112636,
"eval_steps_per_second": 2.545,
"step": 2350
},
{
"epoch": 16.154816996213714,
"grad_norm": 5.414395809173584,
"learning_rate": 1.7087725617548385e-05,
"loss": 16.3403,
"step": 2400
},
{
"epoch": 16.154816996213714,
"eval_dim_128_cosine_accuracy": 0.8781353525792712,
"eval_dim_128_dot_accuracy": 0.12328442972077615,
"eval_dim_128_euclidean_accuracy": 0.8778987221959299,
"eval_dim_128_manhattan_accuracy": 0.8787269285376242,
"eval_dim_128_max_accuracy": 0.8787269285376242,
"eval_dim_256_cosine_accuracy": 0.879081874112636,
"eval_dim_256_dot_accuracy": 0.12210127780407004,
"eval_dim_256_euclidean_accuracy": 0.8788452437292948,
"eval_dim_256_manhattan_accuracy": 0.879081874112636,
"eval_dim_256_max_accuracy": 0.879081874112636,
"eval_dim_384_cosine_accuracy": 0.8799100804543304,
"eval_dim_384_dot_accuracy": 0.12008991954566967,
"eval_dim_384_euclidean_accuracy": 0.8799100804543304,
"eval_dim_384_manhattan_accuracy": 0.8794368196876479,
"eval_dim_384_max_accuracy": 0.8799100804543304,
"eval_dim_64_cosine_accuracy": 0.8767155702792239,
"eval_dim_64_dot_accuracy": 0.12766209181258872,
"eval_dim_64_euclidean_accuracy": 0.8765972550875533,
"eval_dim_64_manhattan_accuracy": 0.8769522006625651,
"eval_dim_64_max_accuracy": 0.8769522006625651,
"eval_loss": 16.413236618041992,
"eval_runtime": 105.1626,
"eval_samples_per_second": 80.371,
"eval_sequential_score": 0.8767155702792239,
"eval_steps_per_second": 2.52,
"step": 2400
},
{
"epoch": 16.491375683634836,
"grad_norm": 4.138753414154053,
"learning_rate": 1.6919388689775463e-05,
"loss": 16.3357,
"step": 2450
},
{
"epoch": 16.491375683634836,
"eval_dim_128_cosine_accuracy": 0.8803833412210128,
"eval_dim_128_dot_accuracy": 0.1216280170373876,
"eval_dim_128_euclidean_accuracy": 0.879081874112636,
"eval_dim_128_manhattan_accuracy": 0.8802650260293422,
"eval_dim_128_max_accuracy": 0.8803833412210128,
"eval_dim_256_cosine_accuracy": 0.8808566019876952,
"eval_dim_256_dot_accuracy": 0.121509701845717,
"eval_dim_256_euclidean_accuracy": 0.8802650260293422,
"eval_dim_256_manhattan_accuracy": 0.8807382867960246,
"eval_dim_256_max_accuracy": 0.8808566019876952,
"eval_dim_384_cosine_accuracy": 0.8807382867960246,
"eval_dim_384_dot_accuracy": 0.11926171320397538,
"eval_dim_384_euclidean_accuracy": 0.8807382867960246,
"eval_dim_384_manhattan_accuracy": 0.8801467108376716,
"eval_dim_384_max_accuracy": 0.8807382867960246,
"eval_dim_64_cosine_accuracy": 0.8792001893043067,
"eval_dim_64_dot_accuracy": 0.12647893989588263,
"eval_dim_64_euclidean_accuracy": 0.8786086133459536,
"eval_dim_64_manhattan_accuracy": 0.8807382867960246,
"eval_dim_64_max_accuracy": 0.8807382867960246,
"eval_loss": 16.414878845214844,
"eval_runtime": 100.6398,
"eval_samples_per_second": 83.983,
"eval_sequential_score": 0.8792001893043067,
"eval_steps_per_second": 2.633,
"step": 2450
},
{
"epoch": 16.827934371055953,
"grad_norm": 4.080146312713623,
"learning_rate": 1.6747202841946928e-05,
"loss": 16.3203,
"step": 2500
},
{
"epoch": 16.827934371055953,
"eval_dim_128_cosine_accuracy": 0.8803833412210128,
"eval_dim_128_dot_accuracy": 0.12186464742072882,
"eval_dim_128_euclidean_accuracy": 0.8814481779460482,
"eval_dim_128_manhattan_accuracy": 0.880619971604354,
"eval_dim_128_max_accuracy": 0.8814481779460482,
"eval_dim_256_cosine_accuracy": 0.8814481779460482,
"eval_dim_256_dot_accuracy": 0.12044486512068149,
"eval_dim_256_euclidean_accuracy": 0.8820397539044014,
"eval_dim_256_manhattan_accuracy": 0.8825130146710838,
"eval_dim_256_max_accuracy": 0.8825130146710838,
"eval_dim_384_cosine_accuracy": 0.8815664931377188,
"eval_dim_384_dot_accuracy": 0.11843350686228112,
"eval_dim_384_euclidean_accuracy": 0.8815664931377188,
"eval_dim_384_manhattan_accuracy": 0.8815664931377188,
"eval_dim_384_max_accuracy": 0.8815664931377188,
"eval_dim_64_cosine_accuracy": 0.879081874112636,
"eval_dim_64_dot_accuracy": 0.12565073355418835,
"eval_dim_64_euclidean_accuracy": 0.880028395646001,
"eval_dim_64_manhattan_accuracy": 0.8810932323710364,
"eval_dim_64_max_accuracy": 0.8810932323710364,
"eval_loss": 16.408126831054688,
"eval_runtime": 103.4973,
"eval_samples_per_second": 81.664,
"eval_sequential_score": 0.879081874112636,
"eval_steps_per_second": 2.56,
"step": 2500
},
{
"epoch": 17.16449305847707,
"grad_norm": 5.26322078704834,
"learning_rate": 1.6571263852691887e-05,
"loss": 16.2986,
"step": 2550
},
{
"epoch": 17.16449305847707,
"eval_dim_128_cosine_accuracy": 0.8797917652626597,
"eval_dim_128_dot_accuracy": 0.12304779933743493,
"eval_dim_128_euclidean_accuracy": 0.8801467108376716,
"eval_dim_128_manhattan_accuracy": 0.8778987221959299,
"eval_dim_128_max_accuracy": 0.8801467108376716,
"eval_dim_256_cosine_accuracy": 0.880028395646001,
"eval_dim_256_dot_accuracy": 0.12068149550402271,
"eval_dim_256_euclidean_accuracy": 0.880619971604354,
"eval_dim_256_manhattan_accuracy": 0.8805016564126834,
"eval_dim_256_max_accuracy": 0.880619971604354,
"eval_dim_384_cosine_accuracy": 0.8820397539044014,
"eval_dim_384_dot_accuracy": 0.11796024609559867,
"eval_dim_384_euclidean_accuracy": 0.8820397539044014,
"eval_dim_384_manhattan_accuracy": 0.8808566019876952,
"eval_dim_384_max_accuracy": 0.8820397539044014,
"eval_dim_64_cosine_accuracy": 0.879081874112636,
"eval_dim_64_dot_accuracy": 0.12707051585423568,
"eval_dim_64_euclidean_accuracy": 0.8781353525792712,
"eval_dim_64_manhattan_accuracy": 0.8778987221959299,
"eval_dim_64_max_accuracy": 0.879081874112636,
"eval_loss": 16.413921356201172,
"eval_runtime": 103.9357,
"eval_samples_per_second": 81.32,
"eval_sequential_score": 0.879081874112636,
"eval_steps_per_second": 2.55,
"step": 2550
},
{
"epoch": 17.50105174589819,
"grad_norm": 9.353097915649414,
"learning_rate": 1.639166958832985e-05,
"loss": 16.2923,
"step": 2600
},
{
"epoch": 17.50105174589819,
"eval_dim_128_cosine_accuracy": 0.8786086133459536,
"eval_dim_128_dot_accuracy": 0.12352106010411737,
"eval_dim_128_euclidean_accuracy": 0.8783719829626124,
"eval_dim_128_manhattan_accuracy": 0.8807382867960246,
"eval_dim_128_max_accuracy": 0.8807382867960246,
"eval_dim_256_cosine_accuracy": 0.8792001893043067,
"eval_dim_256_dot_accuracy": 0.12103644107903455,
"eval_dim_256_euclidean_accuracy": 0.8796734500709891,
"eval_dim_256_manhattan_accuracy": 0.8810932323710364,
"eval_dim_256_max_accuracy": 0.8810932323710364,
"eval_dim_384_cosine_accuracy": 0.8799100804543304,
"eval_dim_384_dot_accuracy": 0.12008991954566967,
"eval_dim_384_euclidean_accuracy": 0.8799100804543304,
"eval_dim_384_manhattan_accuracy": 0.880028395646001,
"eval_dim_384_max_accuracy": 0.880028395646001,
"eval_dim_64_cosine_accuracy": 0.8768338854708945,
"eval_dim_64_dot_accuracy": 0.12754377662091812,
"eval_dim_64_euclidean_accuracy": 0.8762423095125415,
"eval_dim_64_manhattan_accuracy": 0.8789635589209654,
"eval_dim_64_max_accuracy": 0.8789635589209654,
"eval_loss": 16.406217575073242,
"eval_runtime": 101.8719,
"eval_samples_per_second": 82.967,
"eval_sequential_score": 0.8768338854708945,
"eval_steps_per_second": 2.601,
"step": 2600
},
{
"epoch": 17.83761043331931,
"grad_norm": 5.8258891105651855,
"learning_rate": 1.6208519948432438e-05,
"loss": 16.2649,
"step": 2650
},
{
"epoch": 17.83761043331931,
"eval_dim_128_cosine_accuracy": 0.880028395646001,
"eval_dim_128_dot_accuracy": 0.12186464742072882,
"eval_dim_128_euclidean_accuracy": 0.8803833412210128,
"eval_dim_128_manhattan_accuracy": 0.8799100804543304,
"eval_dim_128_max_accuracy": 0.8803833412210128,
"eval_dim_256_cosine_accuracy": 0.8807382867960246,
"eval_dim_256_dot_accuracy": 0.12210127780407004,
"eval_dim_256_euclidean_accuracy": 0.8814481779460482,
"eval_dim_256_manhattan_accuracy": 0.8810932323710364,
"eval_dim_256_max_accuracy": 0.8814481779460482,
"eval_dim_384_cosine_accuracy": 0.8814481779460482,
"eval_dim_384_dot_accuracy": 0.11855182205395173,
"eval_dim_384_euclidean_accuracy": 0.8814481779460482,
"eval_dim_384_manhattan_accuracy": 0.8814481779460482,
"eval_dim_384_max_accuracy": 0.8814481779460482,
"eval_dim_64_cosine_accuracy": 0.8787269285376242,
"eval_dim_64_dot_accuracy": 0.1283719829626124,
"eval_dim_64_euclidean_accuracy": 0.8788452437292948,
"eval_dim_64_manhattan_accuracy": 0.8799100804543304,
"eval_dim_64_max_accuracy": 0.8799100804543304,
"eval_loss": 16.410572052001953,
"eval_runtime": 101.9269,
"eval_samples_per_second": 82.922,
"eval_sequential_score": 0.8787269285376242,
"eval_steps_per_second": 2.6,
"step": 2650
},
{
"epoch": 18.17416912074043,
"grad_norm": 4.463468074798584,
"learning_rate": 1.6021916810254096e-05,
"loss": 16.2505,
"step": 2700
},
{
"epoch": 18.17416912074043,
"eval_dim_128_cosine_accuracy": 0.8786086133459536,
"eval_dim_128_dot_accuracy": 0.12411263606247042,
"eval_dim_128_euclidean_accuracy": 0.8780170373876006,
"eval_dim_128_manhattan_accuracy": 0.8792001893043067,
"eval_dim_128_max_accuracy": 0.8792001893043067,
"eval_dim_256_cosine_accuracy": 0.8793185044959773,
"eval_dim_256_dot_accuracy": 0.12210127780407004,
"eval_dim_256_euclidean_accuracy": 0.8793185044959773,
"eval_dim_256_manhattan_accuracy": 0.8805016564126834,
"eval_dim_256_max_accuracy": 0.8805016564126834,
"eval_dim_384_cosine_accuracy": 0.8802650260293422,
"eval_dim_384_dot_accuracy": 0.11973497397065783,
"eval_dim_384_euclidean_accuracy": 0.8802650260293422,
"eval_dim_384_manhattan_accuracy": 0.8813298627543776,
"eval_dim_384_max_accuracy": 0.8813298627543776,
"eval_dim_64_cosine_accuracy": 0.8770705158542357,
"eval_dim_64_dot_accuracy": 0.13014671083767157,
"eval_dim_64_euclidean_accuracy": 0.8761239943208708,
"eval_dim_64_manhattan_accuracy": 0.8787269285376242,
"eval_dim_64_max_accuracy": 0.8787269285376242,
"eval_loss": 16.418752670288086,
"eval_runtime": 106.398,
"eval_samples_per_second": 79.438,
"eval_sequential_score": 0.8770705158542357,
"eval_steps_per_second": 2.491,
"step": 2700
},
{
"epoch": 18.510727808161548,
"grad_norm": 5.066239833831787,
"learning_rate": 1.5831963972062734e-05,
"loss": 16.226,
"step": 2750
},
{
"epoch": 18.510727808161548,
"eval_dim_128_cosine_accuracy": 0.8770705158542357,
"eval_dim_128_dot_accuracy": 0.12446758163748226,
"eval_dim_128_euclidean_accuracy": 0.8771888310459063,
"eval_dim_128_manhattan_accuracy": 0.8778987221959299,
"eval_dim_128_max_accuracy": 0.8778987221959299,
"eval_dim_256_cosine_accuracy": 0.8781353525792712,
"eval_dim_256_dot_accuracy": 0.12304779933743493,
"eval_dim_256_euclidean_accuracy": 0.8788452437292948,
"eval_dim_256_manhattan_accuracy": 0.8799100804543304,
"eval_dim_256_max_accuracy": 0.8799100804543304,
"eval_dim_384_cosine_accuracy": 0.8780170373876006,
"eval_dim_384_dot_accuracy": 0.12198296261239944,
"eval_dim_384_euclidean_accuracy": 0.8780170373876006,
"eval_dim_384_manhattan_accuracy": 0.8770705158542357,
"eval_dim_384_max_accuracy": 0.8780170373876006,
"eval_dim_64_cosine_accuracy": 0.8765972550875533,
"eval_dim_64_dot_accuracy": 0.12884524372929484,
"eval_dim_64_euclidean_accuracy": 0.8765972550875533,
"eval_dim_64_manhattan_accuracy": 0.8778987221959299,
"eval_dim_64_max_accuracy": 0.8778987221959299,
"eval_loss": 16.4149112701416,
"eval_runtime": 101.2915,
"eval_samples_per_second": 83.442,
"eval_sequential_score": 0.8765972550875533,
"eval_steps_per_second": 2.616,
"step": 2750
},
{
"epoch": 18.84728649558267,
"grad_norm": 4.982476234436035,
"learning_rate": 1.5638767095401778e-05,
"loss": 16.2106,
"step": 2800
},
{
"epoch": 18.84728649558267,
"eval_dim_128_cosine_accuracy": 0.8780170373876006,
"eval_dim_128_dot_accuracy": 0.12529578797917654,
"eval_dim_128_euclidean_accuracy": 0.878490298154283,
"eval_dim_128_manhattan_accuracy": 0.8794368196876479,
"eval_dim_128_max_accuracy": 0.8794368196876479,
"eval_dim_256_cosine_accuracy": 0.8799100804543304,
"eval_dim_256_dot_accuracy": 0.1226928537624231,
"eval_dim_256_euclidean_accuracy": 0.8797917652626597,
"eval_dim_256_manhattan_accuracy": 0.8801467108376716,
"eval_dim_256_max_accuracy": 0.8801467108376716,
"eval_dim_384_cosine_accuracy": 0.879081874112636,
"eval_dim_384_dot_accuracy": 0.12091812588736393,
"eval_dim_384_euclidean_accuracy": 0.879081874112636,
"eval_dim_384_manhattan_accuracy": 0.8794368196876479,
"eval_dim_384_max_accuracy": 0.8794368196876479,
"eval_dim_64_cosine_accuracy": 0.8767155702792239,
"eval_dim_64_dot_accuracy": 0.13002839564600094,
"eval_dim_64_euclidean_accuracy": 0.8768338854708945,
"eval_dim_64_manhattan_accuracy": 0.8778987221959299,
"eval_dim_64_max_accuracy": 0.8778987221959299,
"eval_loss": 16.423009872436523,
"eval_runtime": 103.6087,
"eval_samples_per_second": 81.576,
"eval_sequential_score": 0.8767155702792239,
"eval_steps_per_second": 2.558,
"step": 2800
},
{
"epoch": 19.183845183003786,
"grad_norm": 6.176373481750488,
"learning_rate": 1.5442433646315792e-05,
"loss": 16.2052,
"step": 2850
},
{
"epoch": 19.183845183003786,
"eval_dim_128_cosine_accuracy": 0.8769522006625651,
"eval_dim_128_dot_accuracy": 0.12576904874585898,
"eval_dim_128_euclidean_accuracy": 0.8758873639375295,
"eval_dim_128_manhattan_accuracy": 0.8793185044959773,
"eval_dim_128_max_accuracy": 0.8793185044959773,
"eval_dim_256_cosine_accuracy": 0.8776620918125887,
"eval_dim_256_dot_accuracy": 0.12328442972077615,
"eval_dim_256_euclidean_accuracy": 0.8778987221959299,
"eval_dim_256_manhattan_accuracy": 0.8796734500709891,
"eval_dim_256_max_accuracy": 0.8796734500709891,
"eval_dim_384_cosine_accuracy": 0.878490298154283,
"eval_dim_384_dot_accuracy": 0.121509701845717,
"eval_dim_384_euclidean_accuracy": 0.878490298154283,
"eval_dim_384_manhattan_accuracy": 0.8814481779460482,
"eval_dim_384_max_accuracy": 0.8814481779460482,
"eval_dim_64_cosine_accuracy": 0.8744675816374823,
"eval_dim_64_dot_accuracy": 0.13369616658778988,
"eval_dim_64_euclidean_accuracy": 0.8742309512541411,
"eval_dim_64_manhattan_accuracy": 0.8781353525792712,
"eval_dim_64_max_accuracy": 0.8781353525792712,
"eval_loss": 16.435117721557617,
"eval_runtime": 104.4101,
"eval_samples_per_second": 80.95,
"eval_sequential_score": 0.8744675816374823,
"eval_steps_per_second": 2.538,
"step": 2850
},
{
"epoch": 19.520403870424904,
"grad_norm": 7.323819160461426,
"learning_rate": 1.5243072835572319e-05,
"loss": 16.186,
"step": 2900
},
{
"epoch": 19.520403870424904,
"eval_dim_128_cosine_accuracy": 0.8776620918125887,
"eval_dim_128_dot_accuracy": 0.12363937529578797,
"eval_dim_128_euclidean_accuracy": 0.8776620918125887,
"eval_dim_128_manhattan_accuracy": 0.876360624704212,
"eval_dim_128_max_accuracy": 0.8776620918125887,
"eval_dim_256_cosine_accuracy": 0.8793185044959773,
"eval_dim_256_dot_accuracy": 0.12198296261239944,
"eval_dim_256_euclidean_accuracy": 0.8789635589209654,
"eval_dim_256_manhattan_accuracy": 0.8777804070042593,
"eval_dim_256_max_accuracy": 0.8793185044959773,
"eval_dim_384_cosine_accuracy": 0.8792001893043067,
"eval_dim_384_dot_accuracy": 0.12079981069569333,
"eval_dim_384_euclidean_accuracy": 0.8792001893043067,
"eval_dim_384_manhattan_accuracy": 0.8789635589209654,
"eval_dim_384_max_accuracy": 0.8792001893043067,
"eval_dim_64_cosine_accuracy": 0.8762423095125415,
"eval_dim_64_dot_accuracy": 0.13097491717936582,
"eval_dim_64_euclidean_accuracy": 0.8748225272124941,
"eval_dim_64_manhattan_accuracy": 0.8782536677709418,
"eval_dim_64_max_accuracy": 0.8782536677709418,
"eval_loss": 16.433080673217773,
"eval_runtime": 101.0285,
"eval_samples_per_second": 83.66,
"eval_sequential_score": 0.8762423095125415,
"eval_steps_per_second": 2.623,
"step": 2900
},
{
"epoch": 19.856962557846025,
"grad_norm": 6.637113571166992,
"learning_rate": 1.5040795557913246e-05,
"loss": 16.1496,
"step": 2950
},
{
"epoch": 19.856962557846025,
"eval_dim_128_cosine_accuracy": 0.8774254614292475,
"eval_dim_128_dot_accuracy": 0.12529578797917654,
"eval_dim_128_euclidean_accuracy": 0.8770705158542357,
"eval_dim_128_manhattan_accuracy": 0.8782536677709418,
"eval_dim_128_max_accuracy": 0.8782536677709418,
"eval_dim_256_cosine_accuracy": 0.8781353525792712,
"eval_dim_256_dot_accuracy": 0.12375769048745859,
"eval_dim_256_euclidean_accuracy": 0.8783719829626124,
"eval_dim_256_manhattan_accuracy": 0.879081874112636,
"eval_dim_256_max_accuracy": 0.879081874112636,
"eval_dim_384_cosine_accuracy": 0.8780170373876006,
"eval_dim_384_dot_accuracy": 0.12198296261239944,
"eval_dim_384_euclidean_accuracy": 0.8780170373876006,
"eval_dim_384_manhattan_accuracy": 0.8786086133459536,
"eval_dim_384_max_accuracy": 0.8786086133459536,
"eval_dim_64_cosine_accuracy": 0.8770705158542357,
"eval_dim_64_dot_accuracy": 0.13357785139611927,
"eval_dim_64_euclidean_accuracy": 0.8756507335541883,
"eval_dim_64_manhattan_accuracy": 0.8775437766209181,
"eval_dim_64_max_accuracy": 0.8775437766209181,
"eval_loss": 16.437721252441406,
"eval_runtime": 103.9645,
"eval_samples_per_second": 81.297,
"eval_sequential_score": 0.8770705158542357,
"eval_steps_per_second": 2.549,
"step": 2950
},
{
"epoch": 20.193521245267142,
"grad_norm": 4.9336957931518555,
"learning_rate": 1.4835714330369445e-05,
"loss": 16.151,
"step": 3000
},
{
"epoch": 20.193521245267142,
"eval_dim_128_cosine_accuracy": 0.8765972550875533,
"eval_dim_128_dot_accuracy": 0.1261239943208708,
"eval_dim_128_euclidean_accuracy": 0.8761239943208708,
"eval_dim_128_manhattan_accuracy": 0.8797917652626597,
"eval_dim_128_max_accuracy": 0.8797917652626597,
"eval_dim_256_cosine_accuracy": 0.8780170373876006,
"eval_dim_256_dot_accuracy": 0.12245622337908188,
"eval_dim_256_euclidean_accuracy": 0.8771888310459063,
"eval_dim_256_manhattan_accuracy": 0.8801467108376716,
"eval_dim_256_max_accuracy": 0.8801467108376716,
"eval_dim_384_cosine_accuracy": 0.8780170373876006,
"eval_dim_384_dot_accuracy": 0.12198296261239944,
"eval_dim_384_euclidean_accuracy": 0.8780170373876006,
"eval_dim_384_manhattan_accuracy": 0.8819214387127308,
"eval_dim_384_max_accuracy": 0.8819214387127308,
"eval_dim_64_cosine_accuracy": 0.8750591575958353,
"eval_dim_64_dot_accuracy": 0.1361807856128727,
"eval_dim_64_euclidean_accuracy": 0.8730477993374349,
"eval_dim_64_manhattan_accuracy": 0.878490298154283,
"eval_dim_64_max_accuracy": 0.878490298154283,
"eval_loss": 16.44074821472168,
"eval_runtime": 101.9564,
"eval_samples_per_second": 82.898,
"eval_sequential_score": 0.8750591575958353,
"eval_steps_per_second": 2.599,
"step": 3000
},
{
"epoch": 20.530079932688263,
"grad_norm": 5.225156784057617,
"learning_rate": 1.4627943229672992e-05,
"loss": 16.1081,
"step": 3050
},
{
"epoch": 20.530079932688263,
"eval_dim_128_cosine_accuracy": 0.8758873639375295,
"eval_dim_128_dot_accuracy": 0.1261239943208708,
"eval_dim_128_euclidean_accuracy": 0.8758873639375295,
"eval_dim_128_manhattan_accuracy": 0.8781353525792712,
"eval_dim_128_max_accuracy": 0.8781353525792712,
"eval_dim_256_cosine_accuracy": 0.8775437766209181,
"eval_dim_256_dot_accuracy": 0.12245622337908188,
"eval_dim_256_euclidean_accuracy": 0.8778987221959299,
"eval_dim_256_manhattan_accuracy": 0.8776620918125887,
"eval_dim_256_max_accuracy": 0.8778987221959299,
"eval_dim_384_cosine_accuracy": 0.8774254614292475,
"eval_dim_384_dot_accuracy": 0.12257453857075248,
"eval_dim_384_euclidean_accuracy": 0.8774254614292475,
"eval_dim_384_manhattan_accuracy": 0.8788452437292948,
"eval_dim_384_max_accuracy": 0.8788452437292948,
"eval_dim_64_cosine_accuracy": 0.8749408424041647,
"eval_dim_64_dot_accuracy": 0.13712730714623758,
"eval_dim_64_euclidean_accuracy": 0.8743492664458117,
"eval_dim_64_manhattan_accuracy": 0.8765972550875533,
"eval_dim_64_max_accuracy": 0.8765972550875533,
"eval_loss": 16.442630767822266,
"eval_runtime": 104.3455,
"eval_samples_per_second": 81.0,
"eval_sequential_score": 0.8749408424041647,
"eval_steps_per_second": 2.54,
"step": 3050
},
{
"epoch": 20.86663862010938,
"grad_norm": 4.5568132400512695,
"learning_rate": 1.4417597828801833e-05,
"loss": 16.0864,
"step": 3100
},
{
"epoch": 20.86663862010938,
"eval_dim_128_cosine_accuracy": 0.8774254614292475,
"eval_dim_128_dot_accuracy": 0.12659725508755323,
"eval_dim_128_euclidean_accuracy": 0.8765972550875533,
"eval_dim_128_manhattan_accuracy": 0.879081874112636,
"eval_dim_128_max_accuracy": 0.879081874112636,
"eval_dim_256_cosine_accuracy": 0.8781353525792712,
"eval_dim_256_dot_accuracy": 0.12292948414576432,
"eval_dim_256_euclidean_accuracy": 0.8780170373876006,
"eval_dim_256_manhattan_accuracy": 0.880619971604354,
"eval_dim_256_max_accuracy": 0.880619971604354,
"eval_dim_384_cosine_accuracy": 0.8787269285376242,
"eval_dim_384_dot_accuracy": 0.12127307146237577,
"eval_dim_384_euclidean_accuracy": 0.8787269285376242,
"eval_dim_384_manhattan_accuracy": 0.8793185044959773,
"eval_dim_384_max_accuracy": 0.8793185044959773,
"eval_dim_64_cosine_accuracy": 0.8745858968291529,
"eval_dim_64_dot_accuracy": 0.13724562233790819,
"eval_dim_64_euclidean_accuracy": 0.8744675816374823,
"eval_dim_64_manhattan_accuracy": 0.8780170373876006,
"eval_dim_64_max_accuracy": 0.8780170373876006,
"eval_loss": 16.441152572631836,
"eval_runtime": 103.8678,
"eval_samples_per_second": 81.373,
"eval_sequential_score": 0.8745858968291529,
"eval_steps_per_second": 2.551,
"step": 3100
},
{
"epoch": 21.203197307530502,
"grad_norm": 6.664557933807373,
"learning_rate": 1.4204795132692146e-05,
"loss": 16.0934,
"step": 3150
},
{
"epoch": 21.203197307530502,
"eval_dim_128_cosine_accuracy": 0.8768338854708945,
"eval_dim_128_dot_accuracy": 0.12789872219592996,
"eval_dim_128_euclidean_accuracy": 0.8758873639375295,
"eval_dim_128_manhattan_accuracy": 0.8803833412210128,
"eval_dim_128_max_accuracy": 0.8803833412210128,
"eval_dim_256_cosine_accuracy": 0.8782536677709418,
"eval_dim_256_dot_accuracy": 0.12411263606247042,
"eval_dim_256_euclidean_accuracy": 0.8777804070042593,
"eval_dim_256_manhattan_accuracy": 0.88180312352106,
"eval_dim_256_max_accuracy": 0.88180312352106,
"eval_dim_384_cosine_accuracy": 0.8794368196876479,
"eval_dim_384_dot_accuracy": 0.12056318031235211,
"eval_dim_384_euclidean_accuracy": 0.8794368196876479,
"eval_dim_384_manhattan_accuracy": 0.881211547562707,
"eval_dim_384_max_accuracy": 0.881211547562707,
"eval_dim_64_cosine_accuracy": 0.8745858968291529,
"eval_dim_64_dot_accuracy": 0.14008518693800284,
"eval_dim_64_euclidean_accuracy": 0.8729294841457643,
"eval_dim_64_manhattan_accuracy": 0.8795551348793185,
"eval_dim_64_max_accuracy": 0.8795551348793185,
"eval_loss": 16.4547176361084,
"eval_runtime": 105.011,
"eval_samples_per_second": 80.487,
"eval_sequential_score": 0.8745858968291529,
"eval_steps_per_second": 2.524,
"step": 3150
},
{
"epoch": 21.53975599495162,
"grad_norm": 6.669680118560791,
"learning_rate": 1.3989653513154165e-05,
"loss": 16.0382,
"step": 3200
},
{
"epoch": 21.53975599495162,
"eval_dim_128_cosine_accuracy": 0.8742309512541411,
"eval_dim_128_dot_accuracy": 0.1283719829626124,
"eval_dim_128_euclidean_accuracy": 0.8738760056791292,
"eval_dim_128_manhattan_accuracy": 0.8748225272124941,
"eval_dim_128_max_accuracy": 0.8748225272124941,
"eval_dim_256_cosine_accuracy": 0.8751774727875059,
"eval_dim_256_dot_accuracy": 0.12446758163748226,
"eval_dim_256_euclidean_accuracy": 0.8754141031708471,
"eval_dim_256_manhattan_accuracy": 0.8765972550875533,
"eval_dim_256_max_accuracy": 0.8765972550875533,
"eval_dim_384_cosine_accuracy": 0.8765972550875533,
"eval_dim_384_dot_accuracy": 0.12340274491244675,
"eval_dim_384_euclidean_accuracy": 0.8765972550875533,
"eval_dim_384_manhattan_accuracy": 0.8761239943208708,
"eval_dim_384_max_accuracy": 0.8765972550875533,
"eval_dim_64_cosine_accuracy": 0.8723379081874113,
"eval_dim_64_dot_accuracy": 0.14020350212967345,
"eval_dim_64_euclidean_accuracy": 0.8703265499290109,
"eval_dim_64_manhattan_accuracy": 0.8754141031708471,
"eval_dim_64_max_accuracy": 0.8754141031708471,
"eval_loss": 16.458948135375977,
"eval_runtime": 101.007,
"eval_samples_per_second": 83.677,
"eval_sequential_score": 0.8723379081874113,
"eval_steps_per_second": 2.624,
"step": 3200
},
{
"epoch": 21.87631468237274,
"grad_norm": 5.666304588317871,
"learning_rate": 1.37722926430277e-05,
"loss": 16.0279,
"step": 3250
},
{
"epoch": 21.87631468237274,
"eval_dim_128_cosine_accuracy": 0.8751774727875059,
"eval_dim_128_dot_accuracy": 0.12979176526265973,
"eval_dim_128_euclidean_accuracy": 0.8743492664458117,
"eval_dim_128_manhattan_accuracy": 0.8771888310459063,
"eval_dim_128_max_accuracy": 0.8771888310459063,
"eval_dim_256_cosine_accuracy": 0.8765972550875533,
"eval_dim_256_dot_accuracy": 0.1260056791292002,
"eval_dim_256_euclidean_accuracy": 0.8761239943208708,
"eval_dim_256_manhattan_accuracy": 0.8796734500709891,
"eval_dim_256_max_accuracy": 0.8796734500709891,
"eval_dim_384_cosine_accuracy": 0.8773071462375769,
"eval_dim_384_dot_accuracy": 0.1226928537624231,
"eval_dim_384_euclidean_accuracy": 0.8773071462375769,
"eval_dim_384_manhattan_accuracy": 0.8777804070042593,
"eval_dim_384_max_accuracy": 0.8777804070042593,
"eval_dim_64_cosine_accuracy": 0.8728111689540937,
"eval_dim_64_dot_accuracy": 0.14221486038807382,
"eval_dim_64_euclidean_accuracy": 0.8732844297207761,
"eval_dim_64_manhattan_accuracy": 0.8776620918125887,
"eval_dim_64_max_accuracy": 0.8776620918125887,
"eval_loss": 16.46676254272461,
"eval_runtime": 102.9103,
"eval_samples_per_second": 82.13,
"eval_sequential_score": 0.8728111689540937,
"eval_steps_per_second": 2.575,
"step": 3250
},
{
"epoch": 22.212873369793858,
"grad_norm": 6.600480556488037,
"learning_rate": 1.3552833429613939e-05,
"loss": 16.0327,
"step": 3300
},
{
"epoch": 22.212873369793858,
"eval_dim_128_cosine_accuracy": 0.8742309512541411,
"eval_dim_128_dot_accuracy": 0.13002839564600094,
"eval_dim_128_euclidean_accuracy": 0.8742309512541411,
"eval_dim_128_manhattan_accuracy": 0.880028395646001,
"eval_dim_128_max_accuracy": 0.880028395646001,
"eval_dim_256_cosine_accuracy": 0.8768338854708945,
"eval_dim_256_dot_accuracy": 0.12363937529578797,
"eval_dim_256_euclidean_accuracy": 0.8764789398958827,
"eval_dim_256_manhattan_accuracy": 0.8814481779460482,
"eval_dim_256_max_accuracy": 0.8814481779460482,
"eval_dim_384_cosine_accuracy": 0.8773071462375769,
"eval_dim_384_dot_accuracy": 0.1226928537624231,
"eval_dim_384_euclidean_accuracy": 0.8773071462375769,
"eval_dim_384_manhattan_accuracy": 0.8807382867960246,
"eval_dim_384_max_accuracy": 0.8807382867960246,
"eval_dim_64_cosine_accuracy": 0.8726928537624231,
"eval_dim_64_dot_accuracy": 0.1432796971131093,
"eval_dim_64_euclidean_accuracy": 0.869971604353999,
"eval_dim_64_manhattan_accuracy": 0.8795551348793185,
"eval_dim_64_max_accuracy": 0.8795551348793185,
"eval_loss": 16.47365379333496,
"eval_runtime": 104.4255,
"eval_samples_per_second": 80.938,
"eval_sequential_score": 0.8726928537624231,
"eval_steps_per_second": 2.538,
"step": 3300
},
{
"epoch": 22.549432057214975,
"grad_norm": 7.925108432769775,
"learning_rate": 1.3331397947420578e-05,
"loss": 15.979,
"step": 3350
},
{
"epoch": 22.549432057214975,
"eval_dim_128_cosine_accuracy": 0.8739943208707998,
"eval_dim_128_dot_accuracy": 0.1293185044959773,
"eval_dim_128_euclidean_accuracy": 0.8732844297207761,
"eval_dim_128_manhattan_accuracy": 0.8782536677709418,
"eval_dim_128_max_accuracy": 0.8782536677709418,
"eval_dim_256_cosine_accuracy": 0.8770705158542357,
"eval_dim_256_dot_accuracy": 0.12328442972077615,
"eval_dim_256_euclidean_accuracy": 0.8773071462375769,
"eval_dim_256_manhattan_accuracy": 0.8793185044959773,
"eval_dim_256_max_accuracy": 0.8793185044959773,
"eval_dim_384_cosine_accuracy": 0.8770705158542357,
"eval_dim_384_dot_accuracy": 0.12292948414576432,
"eval_dim_384_euclidean_accuracy": 0.8770705158542357,
"eval_dim_384_manhattan_accuracy": 0.8778987221959299,
"eval_dim_384_max_accuracy": 0.8778987221959299,
"eval_dim_64_cosine_accuracy": 0.8722195929957407,
"eval_dim_64_dot_accuracy": 0.14162328442972077,
"eval_dim_64_euclidean_accuracy": 0.8700899195456696,
"eval_dim_64_manhattan_accuracy": 0.8767155702792239,
"eval_dim_64_max_accuracy": 0.8767155702792239,
"eval_loss": 16.468605041503906,
"eval_runtime": 101.8518,
"eval_samples_per_second": 82.983,
"eval_sequential_score": 0.8722195929957407,
"eval_steps_per_second": 2.602,
"step": 3350
},
{
"epoch": 22.885990744636096,
"grad_norm": 6.396854877471924,
"learning_rate": 1.3108109370257714e-05,
"loss": 15.9622,
"step": 3400
},
{
"epoch": 22.885990744636096,
"eval_dim_128_cosine_accuracy": 0.8743492664458117,
"eval_dim_128_dot_accuracy": 0.13002839564600094,
"eval_dim_128_euclidean_accuracy": 0.873639375295788,
"eval_dim_128_manhattan_accuracy": 0.8786086133459536,
"eval_dim_128_max_accuracy": 0.8786086133459536,
"eval_dim_256_cosine_accuracy": 0.8760056791292002,
"eval_dim_256_dot_accuracy": 0.12434926644581164,
"eval_dim_256_euclidean_accuracy": 0.8757690487458589,
"eval_dim_256_manhattan_accuracy": 0.8805016564126834,
"eval_dim_256_max_accuracy": 0.8805016564126834,
"eval_dim_384_cosine_accuracy": 0.8764789398958827,
"eval_dim_384_dot_accuracy": 0.12352106010411737,
"eval_dim_384_euclidean_accuracy": 0.8764789398958827,
"eval_dim_384_manhattan_accuracy": 0.8807382867960246,
"eval_dim_384_max_accuracy": 0.8807382867960246,
"eval_dim_64_cosine_accuracy": 0.8721012778040701,
"eval_dim_64_dot_accuracy": 0.14351632749645055,
"eval_dim_64_euclidean_accuracy": 0.8703265499290109,
"eval_dim_64_manhattan_accuracy": 0.8781353525792712,
"eval_dim_64_max_accuracy": 0.8781353525792712,
"eval_loss": 16.473587036132812,
"eval_runtime": 103.4538,
"eval_samples_per_second": 81.698,
"eval_sequential_score": 0.8721012778040701,
"eval_steps_per_second": 2.562,
"step": 3400
},
{
"epoch": 23.222549432057214,
"grad_norm": 4.757622241973877,
"learning_rate": 1.288309190272222e-05,
"loss": 15.9881,
"step": 3450
},
{
"epoch": 23.222549432057214,
"eval_dim_128_cosine_accuracy": 0.8743492664458117,
"eval_dim_128_dot_accuracy": 0.13097491717936582,
"eval_dim_128_euclidean_accuracy": 0.8737576904874585,
"eval_dim_128_manhattan_accuracy": 0.879081874112636,
"eval_dim_128_max_accuracy": 0.879081874112636,
"eval_dim_256_cosine_accuracy": 0.8756507335541883,
"eval_dim_256_dot_accuracy": 0.12588736393752958,
"eval_dim_256_euclidean_accuracy": 0.8747042120208235,
"eval_dim_256_manhattan_accuracy": 0.8795551348793185,
"eval_dim_256_max_accuracy": 0.8795551348793185,
"eval_dim_384_cosine_accuracy": 0.8755324183625177,
"eval_dim_384_dot_accuracy": 0.12446758163748226,
"eval_dim_384_euclidean_accuracy": 0.8755324183625177,
"eval_dim_384_manhattan_accuracy": 0.879081874112636,
"eval_dim_384_max_accuracy": 0.879081874112636,
"eval_dim_64_cosine_accuracy": 0.8723379081874113,
"eval_dim_64_dot_accuracy": 0.14375295787979175,
"eval_dim_64_euclidean_accuracy": 0.8700899195456696,
"eval_dim_64_manhattan_accuracy": 0.8788452437292948,
"eval_dim_64_max_accuracy": 0.8788452437292948,
"eval_loss": 16.48019790649414,
"eval_runtime": 104.0826,
"eval_samples_per_second": 81.205,
"eval_sequential_score": 0.8723379081874113,
"eval_steps_per_second": 2.546,
"step": 3450
},
{
"epoch": 23.559108119478335,
"grad_norm": 5.279081344604492,
"learning_rate": 1.2656470711108763e-05,
"loss": 15.9482,
"step": 3500
},
{
"epoch": 23.559108119478335,
"eval_dim_128_cosine_accuracy": 0.8724562233790819,
"eval_dim_128_dot_accuracy": 0.13073828679602462,
"eval_dim_128_euclidean_accuracy": 0.8728111689540937,
"eval_dim_128_manhattan_accuracy": 0.8783719829626124,
"eval_dim_128_max_accuracy": 0.8783719829626124,
"eval_dim_256_cosine_accuracy": 0.8761239943208708,
"eval_dim_256_dot_accuracy": 0.1250591575958353,
"eval_dim_256_euclidean_accuracy": 0.8761239943208708,
"eval_dim_256_manhattan_accuracy": 0.8797917652626597,
"eval_dim_256_max_accuracy": 0.8797917652626597,
"eval_dim_384_cosine_accuracy": 0.8761239943208708,
"eval_dim_384_dot_accuracy": 0.1238760056791292,
"eval_dim_384_euclidean_accuracy": 0.8761239943208708,
"eval_dim_384_manhattan_accuracy": 0.8770705158542357,
"eval_dim_384_max_accuracy": 0.8770705158542357,
"eval_dim_64_cosine_accuracy": 0.8710364410790346,
"eval_dim_64_dot_accuracy": 0.143989588263133,
"eval_dim_64_euclidean_accuracy": 0.867841930903928,
"eval_dim_64_manhattan_accuracy": 0.8764789398958827,
"eval_dim_64_max_accuracy": 0.8764789398958827,
"eval_loss": 16.482074737548828,
"eval_runtime": 102.3602,
"eval_samples_per_second": 82.571,
"eval_sequential_score": 0.8710364410790346,
"eval_steps_per_second": 2.589,
"step": 3500
},
{
"epoch": 23.895666806899452,
"grad_norm": 5.999639511108398,
"learning_rate": 1.2428371853785872e-05,
"loss": 15.9228,
"step": 3550
},
{
"epoch": 23.895666806899452,
"eval_dim_128_cosine_accuracy": 0.8725745385707525,
"eval_dim_128_dot_accuracy": 0.13310459062943683,
"eval_dim_128_euclidean_accuracy": 0.8719829626123994,
"eval_dim_128_manhattan_accuracy": 0.878490298154283,
"eval_dim_128_max_accuracy": 0.878490298154283,
"eval_dim_256_cosine_accuracy": 0.8748225272124941,
"eval_dim_256_dot_accuracy": 0.12671557027922387,
"eval_dim_256_euclidean_accuracy": 0.8743492664458117,
"eval_dim_256_manhattan_accuracy": 0.879081874112636,
"eval_dim_256_max_accuracy": 0.879081874112636,
"eval_dim_384_cosine_accuracy": 0.8750591575958353,
"eval_dim_384_dot_accuracy": 0.1249408424041647,
"eval_dim_384_euclidean_accuracy": 0.8750591575958353,
"eval_dim_384_manhattan_accuracy": 0.8781353525792712,
"eval_dim_384_max_accuracy": 0.8781353525792712,
"eval_dim_64_cosine_accuracy": 0.870918125887364,
"eval_dim_64_dot_accuracy": 0.14469947941315664,
"eval_dim_64_euclidean_accuracy": 0.8691433980123048,
"eval_dim_64_manhattan_accuracy": 0.8776620918125887,
"eval_dim_64_max_accuracy": 0.8776620918125887,
"eval_loss": 16.499635696411133,
"eval_runtime": 103.3405,
"eval_samples_per_second": 81.788,
"eval_sequential_score": 0.870918125887364,
"eval_steps_per_second": 2.564,
"step": 3550
},
{
"epoch": 24.232225494320573,
"grad_norm": 6.511181354522705,
"learning_rate": 1.2198922211075779e-05,
"loss": 15.9418,
"step": 3600
},
{
"epoch": 24.232225494320573,
"eval_dim_128_cosine_accuracy": 0.870918125887364,
"eval_dim_128_dot_accuracy": 0.1353525792711784,
"eval_dim_128_euclidean_accuracy": 0.8703265499290109,
"eval_dim_128_manhattan_accuracy": 0.8783719829626124,
"eval_dim_128_max_accuracy": 0.8783719829626124,
"eval_dim_256_cosine_accuracy": 0.8728111689540937,
"eval_dim_256_dot_accuracy": 0.12884524372929484,
"eval_dim_256_euclidean_accuracy": 0.8721012778040701,
"eval_dim_256_manhattan_accuracy": 0.8794368196876479,
"eval_dim_256_max_accuracy": 0.8794368196876479,
"eval_dim_384_cosine_accuracy": 0.8734027449124467,
"eval_dim_384_dot_accuracy": 0.12659725508755323,
"eval_dim_384_euclidean_accuracy": 0.8734027449124467,
"eval_dim_384_manhattan_accuracy": 0.8794368196876479,
"eval_dim_384_max_accuracy": 0.8794368196876479,
"eval_dim_64_cosine_accuracy": 0.8698532891623284,
"eval_dim_64_dot_accuracy": 0.14564600094652153,
"eval_dim_64_euclidean_accuracy": 0.8680785612872692,
"eval_dim_64_manhattan_accuracy": 0.8770705158542357,
"eval_dim_64_max_accuracy": 0.8770705158542357,
"eval_loss": 16.497343063354492,
"eval_runtime": 104.6868,
"eval_samples_per_second": 80.736,
"eval_sequential_score": 0.8698532891623284,
"eval_steps_per_second": 2.531,
"step": 3600
},
{
"epoch": 24.56878418174169,
"grad_norm": 5.682207107543945,
"learning_rate": 1.1968249414677055e-05,
"loss": 15.896,
"step": 3650
},
{
"epoch": 24.56878418174169,
"eval_dim_128_cosine_accuracy": 0.8696166587789872,
"eval_dim_128_dot_accuracy": 0.13487931850449597,
"eval_dim_128_euclidean_accuracy": 0.8685518220539518,
"eval_dim_128_manhattan_accuracy": 0.8764789398958827,
"eval_dim_128_max_accuracy": 0.8764789398958827,
"eval_dim_256_cosine_accuracy": 0.8716280170373876,
"eval_dim_256_dot_accuracy": 0.12896355892096545,
"eval_dim_256_euclidean_accuracy": 0.871509701845717,
"eval_dim_256_manhattan_accuracy": 0.8777804070042593,
"eval_dim_256_max_accuracy": 0.8777804070042593,
"eval_dim_384_cosine_accuracy": 0.8726928537624231,
"eval_dim_384_dot_accuracy": 0.1273071462375769,
"eval_dim_384_euclidean_accuracy": 0.8726928537624231,
"eval_dim_384_manhattan_accuracy": 0.8773071462375769,
"eval_dim_384_max_accuracy": 0.8773071462375769,
"eval_dim_64_cosine_accuracy": 0.8685518220539518,
"eval_dim_64_dot_accuracy": 0.14694746805489825,
"eval_dim_64_euclidean_accuracy": 0.8659488878371983,
"eval_dim_64_manhattan_accuracy": 0.8760056791292002,
"eval_dim_64_max_accuracy": 0.8760056791292002,
"eval_loss": 16.498498916625977,
"eval_runtime": 102.6029,
"eval_samples_per_second": 82.376,
"eval_sequential_score": 0.8685518220539518,
"eval_steps_per_second": 2.583,
"step": 3650
},
{
"epoch": 24.90534286916281,
"grad_norm": 5.5915117263793945,
"learning_rate": 1.1736481776669307e-05,
"loss": 15.8788,
"step": 3700
},
{
"epoch": 24.90534286916281,
"eval_dim_128_cosine_accuracy": 0.8691433980123048,
"eval_dim_128_dot_accuracy": 0.1361807856128727,
"eval_dim_128_euclidean_accuracy": 0.8697349739706578,
"eval_dim_128_manhattan_accuracy": 0.8747042120208235,
"eval_dim_128_max_accuracy": 0.8747042120208235,
"eval_dim_256_cosine_accuracy": 0.871509701845717,
"eval_dim_256_dot_accuracy": 0.13073828679602462,
"eval_dim_256_euclidean_accuracy": 0.8704448651206815,
"eval_dim_256_manhattan_accuracy": 0.8770705158542357,
"eval_dim_256_max_accuracy": 0.8770705158542357,
"eval_dim_384_cosine_accuracy": 0.8717463322290582,
"eval_dim_384_dot_accuracy": 0.1282536677709418,
"eval_dim_384_euclidean_accuracy": 0.8717463322290582,
"eval_dim_384_manhattan_accuracy": 0.8758873639375295,
"eval_dim_384_max_accuracy": 0.8758873639375295,
"eval_dim_64_cosine_accuracy": 0.8661855182205395,
"eval_dim_64_dot_accuracy": 0.14824893516327498,
"eval_dim_64_euclidean_accuracy": 0.8667770941788926,
"eval_dim_64_manhattan_accuracy": 0.8744675816374823,
"eval_dim_64_max_accuracy": 0.8744675816374823,
"eval_loss": 16.517175674438477,
"eval_runtime": 103.5179,
"eval_samples_per_second": 81.648,
"eval_sequential_score": 0.8661855182205395,
"eval_steps_per_second": 2.56,
"step": 3700
},
{
"epoch": 25.24190155658393,
"grad_norm": 5.408066749572754,
"learning_rate": 1.150374821813937e-05,
"loss": 15.9147,
"step": 3750
},
{
"epoch": 25.24190155658393,
"eval_dim_128_cosine_accuracy": 0.8677236157122574,
"eval_dim_128_dot_accuracy": 0.13724562233790819,
"eval_dim_128_euclidean_accuracy": 0.8673686701372456,
"eval_dim_128_manhattan_accuracy": 0.8768338854708945,
"eval_dim_128_max_accuracy": 0.8768338854708945,
"eval_dim_256_cosine_accuracy": 0.8705631803123521,
"eval_dim_256_dot_accuracy": 0.13144817794604827,
"eval_dim_256_euclidean_accuracy": 0.869971604353999,
"eval_dim_256_manhattan_accuracy": 0.8782536677709418,
"eval_dim_256_max_accuracy": 0.8782536677709418,
"eval_dim_384_cosine_accuracy": 0.8711547562707052,
"eval_dim_384_dot_accuracy": 0.12884524372929484,
"eval_dim_384_euclidean_accuracy": 0.8711547562707052,
"eval_dim_384_manhattan_accuracy": 0.8782536677709418,
"eval_dim_384_max_accuracy": 0.8782536677709418,
"eval_dim_64_cosine_accuracy": 0.8661855182205395,
"eval_dim_64_dot_accuracy": 0.14955040227165167,
"eval_dim_64_euclidean_accuracy": 0.866658778987222,
"eval_dim_64_manhattan_accuracy": 0.8768338854708945,
"eval_dim_64_max_accuracy": 0.8768338854708945,
"eval_loss": 16.506189346313477,
"eval_runtime": 103.9114,
"eval_samples_per_second": 81.339,
"eval_sequential_score": 0.8661855182205395,
"eval_steps_per_second": 2.55,
"step": 3750
},
{
"epoch": 25.578460244005047,
"grad_norm": 6.964442253112793,
"learning_rate": 1.1270178197468788e-05,
"loss": 15.857,
"step": 3800
},
{
"epoch": 25.578460244005047,
"eval_dim_128_cosine_accuracy": 0.8683151916706106,
"eval_dim_128_dot_accuracy": 0.13712730714623758,
"eval_dim_128_euclidean_accuracy": 0.86819687647894,
"eval_dim_128_manhattan_accuracy": 0.8739943208707998,
"eval_dim_128_max_accuracy": 0.8739943208707998,
"eval_dim_256_cosine_accuracy": 0.8717463322290582,
"eval_dim_256_dot_accuracy": 0.13073828679602462,
"eval_dim_256_euclidean_accuracy": 0.871509701845717,
"eval_dim_256_manhattan_accuracy": 0.8761239943208708,
"eval_dim_256_max_accuracy": 0.8761239943208708,
"eval_dim_384_cosine_accuracy": 0.8731661145291055,
"eval_dim_384_dot_accuracy": 0.12683388547089447,
"eval_dim_384_euclidean_accuracy": 0.8731661145291055,
"eval_dim_384_manhattan_accuracy": 0.8755324183625177,
"eval_dim_384_max_accuracy": 0.8755324183625177,
"eval_dim_64_cosine_accuracy": 0.8663038334122102,
"eval_dim_64_dot_accuracy": 0.1499053478466635,
"eval_dim_64_euclidean_accuracy": 0.865120681495504,
"eval_dim_64_manhattan_accuracy": 0.8748225272124941,
"eval_dim_64_max_accuracy": 0.8748225272124941,
"eval_loss": 16.505783081054688,
"eval_runtime": 102.7207,
"eval_samples_per_second": 82.281,
"eval_sequential_score": 0.8663038334122102,
"eval_steps_per_second": 2.58,
"step": 3800
},
{
"epoch": 25.915018931426168,
"grad_norm": 17.978727340698242,
"learning_rate": 1.1035901638322392e-05,
"loss": 15.8291,
"step": 3850
},
{
"epoch": 25.915018931426168,
"eval_dim_128_cosine_accuracy": 0.8673686701372456,
"eval_dim_128_dot_accuracy": 0.13771888310459063,
"eval_dim_128_euclidean_accuracy": 0.8665404637955514,
"eval_dim_128_manhattan_accuracy": 0.8745858968291529,
"eval_dim_128_max_accuracy": 0.8745858968291529,
"eval_dim_256_cosine_accuracy": 0.8702082347373403,
"eval_dim_256_dot_accuracy": 0.1320397539044013,
"eval_dim_256_euclidean_accuracy": 0.868788452437293,
"eval_dim_256_manhattan_accuracy": 0.8756507335541883,
"eval_dim_256_max_accuracy": 0.8756507335541883,
"eval_dim_384_cosine_accuracy": 0.8705631803123521,
"eval_dim_384_dot_accuracy": 0.1294368196876479,
"eval_dim_384_euclidean_accuracy": 0.8705631803123521,
"eval_dim_384_manhattan_accuracy": 0.8762423095125415,
"eval_dim_384_max_accuracy": 0.8762423095125415,
"eval_dim_64_cosine_accuracy": 0.8644107903454804,
"eval_dim_64_dot_accuracy": 0.15309985802177,
"eval_dim_64_euclidean_accuracy": 0.8640558447704685,
"eval_dim_64_manhattan_accuracy": 0.8731661145291055,
"eval_dim_64_max_accuracy": 0.8731661145291055,
"eval_loss": 16.520679473876953,
"eval_runtime": 104.1552,
"eval_samples_per_second": 81.148,
"eval_sequential_score": 0.8644107903454804,
"eval_steps_per_second": 2.544,
"step": 3850
},
{
"epoch": 26.251577618847286,
"grad_norm": 7.759204387664795,
"learning_rate": 1.080104885737807e-05,
"loss": 15.8802,
"step": 3900
},
{
"epoch": 26.251577618847286,
"eval_dim_128_cosine_accuracy": 0.867841930903928,
"eval_dim_128_dot_accuracy": 0.13913866540463796,
"eval_dim_128_euclidean_accuracy": 0.86819687647894,
"eval_dim_128_manhattan_accuracy": 0.8750591575958353,
"eval_dim_128_max_accuracy": 0.8750591575958353,
"eval_dim_256_cosine_accuracy": 0.8697349739706578,
"eval_dim_256_dot_accuracy": 0.1318031235210601,
"eval_dim_256_euclidean_accuracy": 0.8697349739706578,
"eval_dim_256_manhattan_accuracy": 0.8764789398958827,
"eval_dim_256_max_accuracy": 0.8764789398958827,
"eval_dim_384_cosine_accuracy": 0.8713913866540464,
"eval_dim_384_dot_accuracy": 0.1286086133459536,
"eval_dim_384_euclidean_accuracy": 0.8713913866540464,
"eval_dim_384_manhattan_accuracy": 0.8762423095125415,
"eval_dim_384_max_accuracy": 0.8762423095125415,
"eval_dim_64_cosine_accuracy": 0.8664221486038808,
"eval_dim_64_dot_accuracy": 0.15061523899668716,
"eval_dim_64_euclidean_accuracy": 0.8655939422621864,
"eval_dim_64_manhattan_accuracy": 0.8737576904874585,
"eval_dim_64_max_accuracy": 0.8737576904874585,
"eval_loss": 16.52326011657715,
"eval_runtime": 103.0214,
"eval_samples_per_second": 82.041,
"eval_sequential_score": 0.8664221486038808,
"eval_steps_per_second": 2.572,
"step": 3900
},
{
"epoch": 26.588136306268407,
"grad_norm": 6.951057434082031,
"learning_rate": 1.0565750491837925e-05,
"loss": 15.846,
"step": 3950
},
{
"epoch": 26.588136306268407,
"eval_dim_128_cosine_accuracy": 0.8685518220539518,
"eval_dim_128_dot_accuracy": 0.13689067676289635,
"eval_dim_128_euclidean_accuracy": 0.867841930903928,
"eval_dim_128_manhattan_accuracy": 0.8729294841457643,
"eval_dim_128_max_accuracy": 0.8729294841457643,
"eval_dim_256_cosine_accuracy": 0.8712730714623758,
"eval_dim_256_dot_accuracy": 0.13097491717936582,
"eval_dim_256_euclidean_accuracy": 0.8704448651206815,
"eval_dim_256_manhattan_accuracy": 0.8765972550875533,
"eval_dim_256_max_accuracy": 0.8765972550875533,
"eval_dim_384_cosine_accuracy": 0.8717463322290582,
"eval_dim_384_dot_accuracy": 0.1282536677709418,
"eval_dim_384_euclidean_accuracy": 0.8717463322290582,
"eval_dim_384_manhattan_accuracy": 0.8741126360624705,
"eval_dim_384_max_accuracy": 0.8741126360624705,
"eval_dim_64_cosine_accuracy": 0.8654756270705158,
"eval_dim_64_dot_accuracy": 0.1499053478466635,
"eval_dim_64_euclidean_accuracy": 0.865120681495504,
"eval_dim_64_manhattan_accuracy": 0.8729294841457643,
"eval_dim_64_max_accuracy": 0.8729294841457643,
"eval_loss": 16.517038345336914,
"eval_runtime": 103.0824,
"eval_samples_per_second": 81.993,
"eval_sequential_score": 0.8654756270705158,
"eval_steps_per_second": 2.571,
"step": 3950
},
{
"epoch": 26.924694993689524,
"grad_norm": 7.398913860321045,
"learning_rate": 1.0330137426761136e-05,
"loss": 15.8012,
"step": 4000
},
{
"epoch": 26.924694993689524,
"eval_dim_128_cosine_accuracy": 0.8663038334122102,
"eval_dim_128_dot_accuracy": 0.1386654046379555,
"eval_dim_128_euclidean_accuracy": 0.865712257453857,
"eval_dim_128_manhattan_accuracy": 0.8742309512541411,
"eval_dim_128_max_accuracy": 0.8742309512541411,
"eval_dim_256_cosine_accuracy": 0.86819687647894,
"eval_dim_256_dot_accuracy": 0.1319214387127307,
"eval_dim_256_euclidean_accuracy": 0.8680785612872692,
"eval_dim_256_manhattan_accuracy": 0.8762423095125415,
"eval_dim_256_max_accuracy": 0.8762423095125415,
"eval_dim_384_cosine_accuracy": 0.8698532891623284,
"eval_dim_384_dot_accuracy": 0.13014671083767157,
"eval_dim_384_euclidean_accuracy": 0.8698532891623284,
"eval_dim_384_manhattan_accuracy": 0.8737576904874585,
"eval_dim_384_max_accuracy": 0.8737576904874585,
"eval_dim_64_cosine_accuracy": 0.8634642688121155,
"eval_dim_64_dot_accuracy": 0.1508518693800284,
"eval_dim_64_euclidean_accuracy": 0.8637008991954567,
"eval_dim_64_manhattan_accuracy": 0.8728111689540937,
"eval_dim_64_max_accuracy": 0.8728111689540937,
"eval_loss": 16.53356170654297,
"eval_runtime": 104.1737,
"eval_samples_per_second": 81.134,
"eval_sequential_score": 0.8634642688121155,
"eval_steps_per_second": 2.544,
"step": 4000
}
],
"logging_steps": 50,
"max_steps": 7400,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}