bobox's picture
Training in progress, step 428, checkpoint
e1f853e verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.4163726182074807,
"eval_steps": 27,
"global_step": 428,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.016937191249117856,
"grad_norm": 34.22002029418945,
"learning_rate": 6.818181818181818e-07,
"loss": 7.2372,
"step": 3
},
{
"epoch": 0.03387438249823571,
"grad_norm": 21.76839828491211,
"learning_rate": 1.3636363636363636e-06,
"loss": 6.855,
"step": 6
},
{
"epoch": 0.05081157374735357,
"grad_norm": 21.260774612426758,
"learning_rate": 2.0454545454545457e-06,
"loss": 7.4707,
"step": 9
},
{
"epoch": 0.06774876499647142,
"grad_norm": 16.885921478271484,
"learning_rate": 2.7272727272727272e-06,
"loss": 7.0187,
"step": 12
},
{
"epoch": 0.08468595624558928,
"grad_norm": 19.509899139404297,
"learning_rate": 3.409090909090909e-06,
"loss": 6.6756,
"step": 15
},
{
"epoch": 0.10162314749470713,
"grad_norm": 7.9427289962768555,
"learning_rate": 4.0909090909090915e-06,
"loss": 6.0155,
"step": 18
},
{
"epoch": 0.11856033874382499,
"grad_norm": 7.325345039367676,
"learning_rate": 4.772727272727273e-06,
"loss": 6.1644,
"step": 21
},
{
"epoch": 0.13549752999294284,
"grad_norm": 7.544689655303955,
"learning_rate": 5.4545454545454545e-06,
"loss": 6.2158,
"step": 24
},
{
"epoch": 0.1524347212420607,
"grad_norm": 5.141758918762207,
"learning_rate": 6.136363636363637e-06,
"loss": 6.1369,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.109375,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.5546875,
"eval_VitaminC_cosine_accuracy_threshold": 0.9544724822044373,
"eval_VitaminC_cosine_ap": 0.5356492030729136,
"eval_VitaminC_cosine_f1": 0.6542553191489362,
"eval_VitaminC_cosine_f1_threshold": 0.7148199081420898,
"eval_VitaminC_cosine_precision": 0.48616600790513836,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.55078125,
"eval_VitaminC_dot_accuracy_threshold": 414.4264831542969,
"eval_VitaminC_dot_ap": 0.5108219546857565,
"eval_VitaminC_dot_f1": 0.6507936507936508,
"eval_VitaminC_dot_f1_threshold": 271.6522521972656,
"eval_VitaminC_dot_precision": 0.4823529411764706,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.55078125,
"eval_VitaminC_euclidean_accuracy_threshold": 6.519885063171387,
"eval_VitaminC_euclidean_ap": 0.5226419655984281,
"eval_VitaminC_euclidean_f1": 0.6505376344086021,
"eval_VitaminC_euclidean_f1_threshold": 15.194067001342773,
"eval_VitaminC_euclidean_precision": 0.4859437751004016,
"eval_VitaminC_euclidean_recall": 0.983739837398374,
"eval_VitaminC_manhattan_accuracy": 0.546875,
"eval_VitaminC_manhattan_accuracy_threshold": 149.20114135742188,
"eval_VitaminC_manhattan_ap": 0.5237451656134715,
"eval_VitaminC_manhattan_f1": 0.6542553191489362,
"eval_VitaminC_manhattan_f1_threshold": 259.007080078125,
"eval_VitaminC_manhattan_precision": 0.48616600790513836,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.5546875,
"eval_VitaminC_max_accuracy_threshold": 414.4264831542969,
"eval_VitaminC_max_ap": 0.5356492030729136,
"eval_VitaminC_max_f1": 0.6542553191489362,
"eval_VitaminC_max_f1_threshold": 271.6522521972656,
"eval_VitaminC_max_precision": 0.48616600790513836,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5356492030729136,
"eval_sts-test_pearson_cosine": 0.056062031998983373,
"eval_sts-test_pearson_dot": 0.2979259445723872,
"eval_sts-test_pearson_euclidean": 0.0498319208592713,
"eval_sts-test_pearson_manhattan": 0.07381429239121526,
"eval_sts-test_pearson_max": 0.2979259445723872,
"eval_sts-test_spearman_cosine": 0.1066788491614481,
"eval_sts-test_spearman_dot": 0.315952670306405,
"eval_sts-test_spearman_euclidean": 0.07303394554435191,
"eval_sts-test_spearman_manhattan": 0.09039525717692232,
"eval_sts-test_spearman_max": 0.315952670306405,
"eval_vitaminc-pairs_loss": 2.698580741882324,
"eval_vitaminc-pairs_runtime": 1.4747,
"eval_vitaminc-pairs_samples_per_second": 73.236,
"eval_vitaminc-pairs_steps_per_second": 1.356,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_negation-triplets_loss": 5.142906665802002,
"eval_negation-triplets_runtime": 0.2993,
"eval_negation-triplets_samples_per_second": 213.865,
"eval_negation-triplets_steps_per_second": 3.342,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_scitail-pairs-pos_loss": 1.9216958284378052,
"eval_scitail-pairs-pos_runtime": 0.3834,
"eval_scitail-pairs-pos_samples_per_second": 140.842,
"eval_scitail-pairs-pos_steps_per_second": 2.608,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_xsum-pairs_loss": 6.073049545288086,
"eval_xsum-pairs_runtime": 3.1587,
"eval_xsum-pairs_samples_per_second": 40.523,
"eval_xsum-pairs_steps_per_second": 0.633,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_sciq_pairs_loss": 0.3449864387512207,
"eval_sciq_pairs_runtime": 3.3747,
"eval_sciq_pairs_samples_per_second": 37.93,
"eval_sciq_pairs_steps_per_second": 0.593,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_qasc_pairs_loss": 3.2267842292785645,
"eval_qasc_pairs_runtime": 0.6576,
"eval_qasc_pairs_samples_per_second": 194.646,
"eval_qasc_pairs_steps_per_second": 3.041,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_openbookqa_pairs_loss": 4.405983924865723,
"eval_openbookqa_pairs_runtime": 0.6107,
"eval_openbookqa_pairs_samples_per_second": 209.594,
"eval_openbookqa_pairs_steps_per_second": 3.275,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_msmarco_pairs_loss": 6.937691688537598,
"eval_msmarco_pairs_runtime": 1.3091,
"eval_msmarco_pairs_samples_per_second": 97.779,
"eval_msmarco_pairs_steps_per_second": 1.528,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_nq_pairs_loss": 6.794108867645264,
"eval_nq_pairs_runtime": 2.3968,
"eval_nq_pairs_samples_per_second": 53.404,
"eval_nq_pairs_steps_per_second": 0.834,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_trivia_pairs_loss": 6.3355631828308105,
"eval_trivia_pairs_runtime": 4.4974,
"eval_trivia_pairs_samples_per_second": 28.461,
"eval_trivia_pairs_steps_per_second": 0.445,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_gooaq_pairs_loss": 6.405998706817627,
"eval_gooaq_pairs_runtime": 0.8745,
"eval_gooaq_pairs_samples_per_second": 146.37,
"eval_gooaq_pairs_steps_per_second": 2.287,
"step": 27
},
{
"epoch": 0.1524347212420607,
"eval_paws-pos_loss": 2.2308223247528076,
"eval_paws-pos_runtime": 0.6998,
"eval_paws-pos_samples_per_second": 182.908,
"eval_paws-pos_steps_per_second": 2.858,
"step": 27
},
{
"epoch": 0.16937191249117856,
"grad_norm": 5.885251522064209,
"learning_rate": 6.818181818181818e-06,
"loss": 5.7653,
"step": 30
},
{
"epoch": 0.1863091037402964,
"grad_norm": 7.357480049133301,
"learning_rate": 7.500000000000001e-06,
"loss": 6.1259,
"step": 33
},
{
"epoch": 0.20324629498941427,
"grad_norm": 7.321795463562012,
"learning_rate": 8.181818181818183e-06,
"loss": 5.7539,
"step": 36
},
{
"epoch": 0.22018348623853212,
"grad_norm": 4.239792346954346,
"learning_rate": 8.863636363636365e-06,
"loss": 6.0131,
"step": 39
},
{
"epoch": 0.23712067748764998,
"grad_norm": 3.9554407596588135,
"learning_rate": 9.545454545454547e-06,
"loss": 6.0074,
"step": 42
},
{
"epoch": 0.25405786873676783,
"grad_norm": 4.406026840209961,
"learning_rate": 1.0227272727272729e-05,
"loss": 5.7125,
"step": 45
},
{
"epoch": 0.2709950599858857,
"grad_norm": 7.235893249511719,
"learning_rate": 1.0909090909090909e-05,
"loss": 5.5634,
"step": 48
},
{
"epoch": 0.28793225123500354,
"grad_norm": 5.330288410186768,
"learning_rate": 1.1590909090909093e-05,
"loss": 5.2924,
"step": 51
},
{
"epoch": 0.3048694424841214,
"grad_norm": 7.216403961181641,
"learning_rate": 1.2272727272727274e-05,
"loss": 5.2286,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.046875,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.54296875,
"eval_VitaminC_cosine_accuracy_threshold": 0.9328227043151855,
"eval_VitaminC_cosine_ap": 0.5212059026196154,
"eval_VitaminC_cosine_f1": 0.6576819407008085,
"eval_VitaminC_cosine_f1_threshold": 0.7373804450035095,
"eval_VitaminC_cosine_precision": 0.49193548387096775,
"eval_VitaminC_cosine_recall": 0.991869918699187,
"eval_VitaminC_dot_accuracy": 0.55078125,
"eval_VitaminC_dot_accuracy_threshold": 418.2774658203125,
"eval_VitaminC_dot_ap": 0.5160594099493883,
"eval_VitaminC_dot_f1": 0.6521739130434782,
"eval_VitaminC_dot_f1_threshold": 291.5081481933594,
"eval_VitaminC_dot_precision": 0.4897959183673469,
"eval_VitaminC_dot_recall": 0.975609756097561,
"eval_VitaminC_euclidean_accuracy": 0.5390625,
"eval_VitaminC_euclidean_accuracy_threshold": 8.120429039001465,
"eval_VitaminC_euclidean_ap": 0.5224837623095228,
"eval_VitaminC_euclidean_f1": 0.6576819407008085,
"eval_VitaminC_euclidean_f1_threshold": 14.879999160766602,
"eval_VitaminC_euclidean_precision": 0.49193548387096775,
"eval_VitaminC_euclidean_recall": 0.991869918699187,
"eval_VitaminC_manhattan_accuracy": 0.53515625,
"eval_VitaminC_manhattan_accuracy_threshold": 137.40658569335938,
"eval_VitaminC_manhattan_ap": 0.5186382518671783,
"eval_VitaminC_manhattan_f1": 0.6576086956521738,
"eval_VitaminC_manhattan_f1_threshold": 263.32452392578125,
"eval_VitaminC_manhattan_precision": 0.49387755102040815,
"eval_VitaminC_manhattan_recall": 0.983739837398374,
"eval_VitaminC_max_accuracy": 0.55078125,
"eval_VitaminC_max_accuracy_threshold": 418.2774658203125,
"eval_VitaminC_max_ap": 0.5224837623095228,
"eval_VitaminC_max_f1": 0.6576819407008085,
"eval_VitaminC_max_f1_threshold": 291.5081481933594,
"eval_VitaminC_max_precision": 0.49387755102040815,
"eval_VitaminC_max_recall": 0.991869918699187,
"eval_sequential_score": 0.5224837623095228,
"eval_sts-test_pearson_cosine": 0.14377091128453176,
"eval_sts-test_pearson_dot": 0.24728387094758872,
"eval_sts-test_pearson_euclidean": 0.14604155960515372,
"eval_sts-test_pearson_manhattan": 0.1446467532231986,
"eval_sts-test_pearson_max": 0.24728387094758872,
"eval_sts-test_spearman_cosine": 0.1968510434344728,
"eval_sts-test_spearman_dot": 0.29467218283745694,
"eval_sts-test_spearman_euclidean": 0.17218164683969664,
"eval_sts-test_spearman_manhattan": 0.17741843340856742,
"eval_sts-test_spearman_max": 0.29467218283745694,
"eval_vitaminc-pairs_loss": 2.664700746536255,
"eval_vitaminc-pairs_runtime": 1.4487,
"eval_vitaminc-pairs_samples_per_second": 74.551,
"eval_vitaminc-pairs_steps_per_second": 1.381,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_negation-triplets_loss": 4.6218037605285645,
"eval_negation-triplets_runtime": 0.2971,
"eval_negation-triplets_samples_per_second": 215.438,
"eval_negation-triplets_steps_per_second": 3.366,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_scitail-pairs-pos_loss": 1.2413936853408813,
"eval_scitail-pairs-pos_runtime": 0.372,
"eval_scitail-pairs-pos_samples_per_second": 145.175,
"eval_scitail-pairs-pos_steps_per_second": 2.688,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_xsum-pairs_loss": 5.249766826629639,
"eval_xsum-pairs_runtime": 3.1506,
"eval_xsum-pairs_samples_per_second": 40.627,
"eval_xsum-pairs_steps_per_second": 0.635,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_sciq_pairs_loss": 0.2961578667163849,
"eval_sciq_pairs_runtime": 3.2909,
"eval_sciq_pairs_samples_per_second": 38.895,
"eval_sciq_pairs_steps_per_second": 0.608,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_qasc_pairs_loss": 2.530872344970703,
"eval_qasc_pairs_runtime": 0.6255,
"eval_qasc_pairs_samples_per_second": 204.63,
"eval_qasc_pairs_steps_per_second": 3.197,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_openbookqa_pairs_loss": 3.8855104446411133,
"eval_openbookqa_pairs_runtime": 0.5742,
"eval_openbookqa_pairs_samples_per_second": 222.914,
"eval_openbookqa_pairs_steps_per_second": 3.483,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_msmarco_pairs_loss": 5.246406555175781,
"eval_msmarco_pairs_runtime": 1.2872,
"eval_msmarco_pairs_samples_per_second": 99.442,
"eval_msmarco_pairs_steps_per_second": 1.554,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_nq_pairs_loss": 5.332630157470703,
"eval_nq_pairs_runtime": 2.3739,
"eval_nq_pairs_samples_per_second": 53.92,
"eval_nq_pairs_steps_per_second": 0.843,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_trivia_pairs_loss": 5.647429943084717,
"eval_trivia_pairs_runtime": 4.4729,
"eval_trivia_pairs_samples_per_second": 28.617,
"eval_trivia_pairs_steps_per_second": 0.447,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_gooaq_pairs_loss": 5.225871562957764,
"eval_gooaq_pairs_runtime": 0.8715,
"eval_gooaq_pairs_samples_per_second": 146.868,
"eval_gooaq_pairs_steps_per_second": 2.295,
"step": 54
},
{
"epoch": 0.3048694424841214,
"eval_paws-pos_loss": 0.8335962891578674,
"eval_paws-pos_runtime": 0.6844,
"eval_paws-pos_samples_per_second": 187.036,
"eval_paws-pos_steps_per_second": 2.922,
"step": 54
},
{
"epoch": 0.32180663373323926,
"grad_norm": 6.847682952880859,
"learning_rate": 1.2954545454545455e-05,
"loss": 4.4811,
"step": 57
},
{
"epoch": 0.3387438249823571,
"grad_norm": 8.383002281188965,
"learning_rate": 1.3636363636363637e-05,
"loss": 4.4239,
"step": 60
},
{
"epoch": 0.35568101623147497,
"grad_norm": 7.014843463897705,
"learning_rate": 1.431818181818182e-05,
"loss": 4.0273,
"step": 63
},
{
"epoch": 0.3726182074805928,
"grad_norm": 5.9739885330200195,
"learning_rate": 1.5000000000000002e-05,
"loss": 3.4508,
"step": 66
},
{
"epoch": 0.3895553987297107,
"grad_norm": 11.202752113342285,
"learning_rate": 1.5681818181818182e-05,
"loss": 3.9702,
"step": 69
},
{
"epoch": 0.40649258997882853,
"grad_norm": 7.064818859100342,
"learning_rate": 1.6363636363636366e-05,
"loss": 3.5295,
"step": 72
},
{
"epoch": 0.4234297812279464,
"grad_norm": 5.912719249725342,
"learning_rate": 1.7045454545454546e-05,
"loss": 3.6395,
"step": 75
},
{
"epoch": 0.44036697247706424,
"grad_norm": 5.033207893371582,
"learning_rate": 1.772727272727273e-05,
"loss": 3.2398,
"step": 78
},
{
"epoch": 0.4573041637261821,
"grad_norm": 5.218384265899658,
"learning_rate": 1.840909090909091e-05,
"loss": 3.116,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.5546875,
"eval_VitaminC_cosine_accuracy_threshold": 0.9041332006454468,
"eval_VitaminC_cosine_ap": 0.5292859731465609,
"eval_VitaminC_cosine_f1": 0.6542553191489362,
"eval_VitaminC_cosine_f1_threshold": 0.452939510345459,
"eval_VitaminC_cosine_precision": 0.48616600790513836,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.5546875,
"eval_VitaminC_dot_accuracy_threshold": 414.42559814453125,
"eval_VitaminC_dot_ap": 0.5222732504955002,
"eval_VitaminC_dot_f1": 0.6542553191489362,
"eval_VitaminC_dot_f1_threshold": 212.6934814453125,
"eval_VitaminC_dot_precision": 0.48616600790513836,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.5546875,
"eval_VitaminC_euclidean_accuracy_threshold": 9.18377685546875,
"eval_VitaminC_euclidean_ap": 0.5291787221346742,
"eval_VitaminC_euclidean_f1": 0.6542553191489362,
"eval_VitaminC_euclidean_f1_threshold": 22.683509826660156,
"eval_VitaminC_euclidean_precision": 0.48616600790513836,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.55859375,
"eval_VitaminC_manhattan_accuracy_threshold": 173.8212127685547,
"eval_VitaminC_manhattan_ap": 0.5305698453165033,
"eval_VitaminC_manhattan_f1": 0.6542553191489362,
"eval_VitaminC_manhattan_f1_threshold": 415.5366516113281,
"eval_VitaminC_manhattan_precision": 0.48616600790513836,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.55859375,
"eval_VitaminC_max_accuracy_threshold": 414.42559814453125,
"eval_VitaminC_max_ap": 0.5305698453165033,
"eval_VitaminC_max_f1": 0.6542553191489362,
"eval_VitaminC_max_f1_threshold": 415.5366516113281,
"eval_VitaminC_max_precision": 0.48616600790513836,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5305698453165033,
"eval_sts-test_pearson_cosine": 0.45494716382349193,
"eval_sts-test_pearson_dot": 0.44837123659858896,
"eval_sts-test_pearson_euclidean": 0.4480861256491879,
"eval_sts-test_pearson_manhattan": 0.4417008219313264,
"eval_sts-test_pearson_max": 0.45494716382349193,
"eval_sts-test_spearman_cosine": 0.48921418507251446,
"eval_sts-test_spearman_dot": 0.46707725062744593,
"eval_sts-test_spearman_euclidean": 0.4610824798409968,
"eval_sts-test_spearman_manhattan": 0.46068648052845956,
"eval_sts-test_spearman_max": 0.48921418507251446,
"eval_vitaminc-pairs_loss": 2.5043575763702393,
"eval_vitaminc-pairs_runtime": 1.4778,
"eval_vitaminc-pairs_samples_per_second": 73.079,
"eval_vitaminc-pairs_steps_per_second": 1.353,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_negation-triplets_loss": 3.4229447841644287,
"eval_negation-triplets_runtime": 0.2991,
"eval_negation-triplets_samples_per_second": 213.954,
"eval_negation-triplets_steps_per_second": 3.343,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_scitail-pairs-pos_loss": 0.2784869372844696,
"eval_scitail-pairs-pos_runtime": 0.3633,
"eval_scitail-pairs-pos_samples_per_second": 148.649,
"eval_scitail-pairs-pos_steps_per_second": 2.753,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_xsum-pairs_loss": 2.428964614868164,
"eval_xsum-pairs_runtime": 3.1548,
"eval_xsum-pairs_samples_per_second": 40.573,
"eval_xsum-pairs_steps_per_second": 0.634,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_sciq_pairs_loss": 0.15256048738956451,
"eval_sciq_pairs_runtime": 3.2432,
"eval_sciq_pairs_samples_per_second": 39.467,
"eval_sciq_pairs_steps_per_second": 0.617,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_qasc_pairs_loss": 1.2902077436447144,
"eval_qasc_pairs_runtime": 0.6211,
"eval_qasc_pairs_samples_per_second": 206.085,
"eval_qasc_pairs_steps_per_second": 3.22,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_openbookqa_pairs_loss": 2.4784862995147705,
"eval_openbookqa_pairs_runtime": 0.5758,
"eval_openbookqa_pairs_samples_per_second": 222.308,
"eval_openbookqa_pairs_steps_per_second": 3.474,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_msmarco_pairs_loss": 2.967724084854126,
"eval_msmarco_pairs_runtime": 1.2944,
"eval_msmarco_pairs_samples_per_second": 98.885,
"eval_msmarco_pairs_steps_per_second": 1.545,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_nq_pairs_loss": 3.358661413192749,
"eval_nq_pairs_runtime": 2.3827,
"eval_nq_pairs_samples_per_second": 53.722,
"eval_nq_pairs_steps_per_second": 0.839,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_trivia_pairs_loss": 3.1391680240631104,
"eval_trivia_pairs_runtime": 4.4155,
"eval_trivia_pairs_samples_per_second": 28.989,
"eval_trivia_pairs_steps_per_second": 0.453,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_gooaq_pairs_loss": 2.8774912357330322,
"eval_gooaq_pairs_runtime": 0.8746,
"eval_gooaq_pairs_samples_per_second": 146.346,
"eval_gooaq_pairs_steps_per_second": 2.287,
"step": 81
},
{
"epoch": 0.4573041637261821,
"eval_paws-pos_loss": 0.19754411280155182,
"eval_paws-pos_runtime": 0.684,
"eval_paws-pos_samples_per_second": 187.141,
"eval_paws-pos_steps_per_second": 2.924,
"step": 81
},
{
"epoch": 0.47424135497529996,
"grad_norm": 5.149569988250732,
"learning_rate": 1.9090909090909094e-05,
"loss": 2.6049,
"step": 84
},
{
"epoch": 0.4911785462244178,
"grad_norm": 5.012928009033203,
"learning_rate": 1.9772727272727274e-05,
"loss": 2.7738,
"step": 87
},
{
"epoch": 0.5081157374735357,
"grad_norm": 4.880725383758545,
"learning_rate": 2.0454545454545457e-05,
"loss": 2.5416,
"step": 90
},
{
"epoch": 0.5250529287226535,
"grad_norm": 5.618528366088867,
"learning_rate": 2.113636363636364e-05,
"loss": 2.3913,
"step": 93
},
{
"epoch": 0.5419901199717714,
"grad_norm": 5.020515441894531,
"learning_rate": 2.1818181818181818e-05,
"loss": 2.3144,
"step": 96
},
{
"epoch": 0.5589273112208892,
"grad_norm": 4.818451404571533,
"learning_rate": 2.25e-05,
"loss": 2.1857,
"step": 99
},
{
"epoch": 0.5758645024700071,
"grad_norm": 5.094771385192871,
"learning_rate": 2.3181818181818185e-05,
"loss": 1.8881,
"step": 102
},
{
"epoch": 0.592801693719125,
"grad_norm": 3.795962333679199,
"learning_rate": 2.3863636363636365e-05,
"loss": 2.2699,
"step": 105
},
{
"epoch": 0.6097388849682428,
"grad_norm": 4.46245813369751,
"learning_rate": 2.454545454545455e-05,
"loss": 2.1425,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.5546875,
"eval_VitaminC_cosine_accuracy_threshold": 0.8830112218856812,
"eval_VitaminC_cosine_ap": 0.5302172957740995,
"eval_VitaminC_cosine_f1": 0.6558265582655827,
"eval_VitaminC_cosine_f1_threshold": 0.5253933668136597,
"eval_VitaminC_cosine_precision": 0.491869918699187,
"eval_VitaminC_cosine_recall": 0.983739837398374,
"eval_VitaminC_dot_accuracy": 0.5390625,
"eval_VitaminC_dot_accuracy_threshold": 427.5576171875,
"eval_VitaminC_dot_ap": 0.517120157327104,
"eval_VitaminC_dot_f1": 0.6542553191489362,
"eval_VitaminC_dot_f1_threshold": 175.80963134765625,
"eval_VitaminC_dot_precision": 0.48616600790513836,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.5625,
"eval_VitaminC_euclidean_accuracy_threshold": 10.817148208618164,
"eval_VitaminC_euclidean_ap": 0.532255112376416,
"eval_VitaminC_euclidean_f1": 0.6558265582655827,
"eval_VitaminC_euclidean_f1_threshold": 21.10729217529297,
"eval_VitaminC_euclidean_precision": 0.491869918699187,
"eval_VitaminC_euclidean_recall": 0.983739837398374,
"eval_VitaminC_manhattan_accuracy": 0.5546875,
"eval_VitaminC_manhattan_accuracy_threshold": 224.70416259765625,
"eval_VitaminC_manhattan_ap": 0.5298930718604624,
"eval_VitaminC_manhattan_f1": 0.6558265582655827,
"eval_VitaminC_manhattan_f1_threshold": 415.3311767578125,
"eval_VitaminC_manhattan_precision": 0.491869918699187,
"eval_VitaminC_manhattan_recall": 0.983739837398374,
"eval_VitaminC_max_accuracy": 0.5625,
"eval_VitaminC_max_accuracy_threshold": 427.5576171875,
"eval_VitaminC_max_ap": 0.532255112376416,
"eval_VitaminC_max_f1": 0.6558265582655827,
"eval_VitaminC_max_f1_threshold": 415.3311767578125,
"eval_VitaminC_max_precision": 0.491869918699187,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.532255112376416,
"eval_sts-test_pearson_cosine": 0.755963151531783,
"eval_sts-test_pearson_dot": 0.7384823091540473,
"eval_sts-test_pearson_euclidean": 0.764089555623164,
"eval_sts-test_pearson_manhattan": 0.7670467479701304,
"eval_sts-test_pearson_max": 0.7670467479701304,
"eval_sts-test_spearman_cosine": 0.7806331583677342,
"eval_sts-test_spearman_dot": 0.7442842883778696,
"eval_sts-test_spearman_euclidean": 0.7674205303105437,
"eval_sts-test_spearman_manhattan": 0.7664974867050092,
"eval_sts-test_spearman_max": 0.7806331583677342,
"eval_vitaminc-pairs_loss": 2.721674919128418,
"eval_vitaminc-pairs_runtime": 1.4468,
"eval_vitaminc-pairs_samples_per_second": 74.65,
"eval_vitaminc-pairs_steps_per_second": 1.382,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_negation-triplets_loss": 2.338909387588501,
"eval_negation-triplets_runtime": 0.3017,
"eval_negation-triplets_samples_per_second": 212.101,
"eval_negation-triplets_steps_per_second": 3.314,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_scitail-pairs-pos_loss": 0.23291125893592834,
"eval_scitail-pairs-pos_runtime": 0.3664,
"eval_scitail-pairs-pos_samples_per_second": 147.385,
"eval_scitail-pairs-pos_steps_per_second": 2.729,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_xsum-pairs_loss": 1.2065516710281372,
"eval_xsum-pairs_runtime": 3.1488,
"eval_xsum-pairs_samples_per_second": 40.65,
"eval_xsum-pairs_steps_per_second": 0.635,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_sciq_pairs_loss": 0.09487833082675934,
"eval_sciq_pairs_runtime": 3.2618,
"eval_sciq_pairs_samples_per_second": 39.242,
"eval_sciq_pairs_steps_per_second": 0.613,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_qasc_pairs_loss": 0.8461999297142029,
"eval_qasc_pairs_runtime": 0.6246,
"eval_qasc_pairs_samples_per_second": 204.93,
"eval_qasc_pairs_steps_per_second": 3.202,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_openbookqa_pairs_loss": 1.5739191770553589,
"eval_openbookqa_pairs_runtime": 0.5751,
"eval_openbookqa_pairs_samples_per_second": 222.568,
"eval_openbookqa_pairs_steps_per_second": 3.478,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_msmarco_pairs_loss": 1.6446179151535034,
"eval_msmarco_pairs_runtime": 1.2828,
"eval_msmarco_pairs_samples_per_second": 99.784,
"eval_msmarco_pairs_steps_per_second": 1.559,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_nq_pairs_loss": 2.364896535873413,
"eval_nq_pairs_runtime": 2.3802,
"eval_nq_pairs_samples_per_second": 53.777,
"eval_nq_pairs_steps_per_second": 0.84,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_trivia_pairs_loss": 1.7080069780349731,
"eval_trivia_pairs_runtime": 4.4372,
"eval_trivia_pairs_samples_per_second": 28.847,
"eval_trivia_pairs_steps_per_second": 0.451,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_gooaq_pairs_loss": 1.7924479246139526,
"eval_gooaq_pairs_runtime": 0.8761,
"eval_gooaq_pairs_samples_per_second": 146.094,
"eval_gooaq_pairs_steps_per_second": 2.283,
"step": 108
},
{
"epoch": 0.6097388849682428,
"eval_paws-pos_loss": 0.08000019192695618,
"eval_paws-pos_runtime": 0.6839,
"eval_paws-pos_samples_per_second": 187.168,
"eval_paws-pos_steps_per_second": 2.924,
"step": 108
},
{
"epoch": 0.6266760762173607,
"grad_norm": 4.418070316314697,
"learning_rate": 2.5227272727272732e-05,
"loss": 2.1276,
"step": 111
},
{
"epoch": 0.6436132674664785,
"grad_norm": 4.3495259284973145,
"learning_rate": 2.590909090909091e-05,
"loss": 1.7531,
"step": 114
},
{
"epoch": 0.6605504587155964,
"grad_norm": 4.294332027435303,
"learning_rate": 2.6590909090909093e-05,
"loss": 2.0179,
"step": 117
},
{
"epoch": 0.6774876499647142,
"grad_norm": 3.4215610027313232,
"learning_rate": 2.7272727272727273e-05,
"loss": 1.5305,
"step": 120
},
{
"epoch": 0.6944248412138321,
"grad_norm": 4.37844181060791,
"learning_rate": 2.7954545454545457e-05,
"loss": 1.6925,
"step": 123
},
{
"epoch": 0.7113620324629499,
"grad_norm": 4.019878387451172,
"learning_rate": 2.863636363636364e-05,
"loss": 1.5248,
"step": 126
},
{
"epoch": 0.7282992237120678,
"grad_norm": 4.662445068359375,
"learning_rate": 2.931818181818182e-05,
"loss": 1.523,
"step": 129
},
{
"epoch": 0.7452364149611856,
"grad_norm": 4.6323161125183105,
"learning_rate": 3.0000000000000004e-05,
"loss": 1.5474,
"step": 132
},
{
"epoch": 0.7621736062103035,
"grad_norm": 4.586575984954834,
"learning_rate": 3.068181818181819e-05,
"loss": 1.7221,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.56640625,
"eval_VitaminC_cosine_accuracy_threshold": 0.8478574156761169,
"eval_VitaminC_cosine_ap": 0.5325579595957614,
"eval_VitaminC_cosine_f1": 0.6559999999999999,
"eval_VitaminC_cosine_f1_threshold": 0.35839784145355225,
"eval_VitaminC_cosine_precision": 0.4880952380952381,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.5625,
"eval_VitaminC_dot_accuracy_threshold": 366.9839172363281,
"eval_VitaminC_dot_ap": 0.5326813797607027,
"eval_VitaminC_dot_f1": 0.6559999999999999,
"eval_VitaminC_dot_f1_threshold": 157.35829162597656,
"eval_VitaminC_dot_precision": 0.4880952380952381,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.5625,
"eval_VitaminC_euclidean_accuracy_threshold": 12.044445037841797,
"eval_VitaminC_euclidean_ap": 0.5304103559932005,
"eval_VitaminC_euclidean_f1": 0.6542553191489362,
"eval_VitaminC_euclidean_f1_threshold": 24.461441040039062,
"eval_VitaminC_euclidean_precision": 0.48616600790513836,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.5625,
"eval_VitaminC_manhattan_accuracy_threshold": 239.24815368652344,
"eval_VitaminC_manhattan_ap": 0.5314780667834758,
"eval_VitaminC_manhattan_f1": 0.6575342465753424,
"eval_VitaminC_manhattan_f1_threshold": 400.6834716796875,
"eval_VitaminC_manhattan_precision": 0.49586776859504134,
"eval_VitaminC_manhattan_recall": 0.975609756097561,
"eval_VitaminC_max_accuracy": 0.56640625,
"eval_VitaminC_max_accuracy_threshold": 366.9839172363281,
"eval_VitaminC_max_ap": 0.5326813797607027,
"eval_VitaminC_max_f1": 0.6575342465753424,
"eval_VitaminC_max_f1_threshold": 400.6834716796875,
"eval_VitaminC_max_precision": 0.49586776859504134,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5326813797607027,
"eval_sts-test_pearson_cosine": 0.7919597804368175,
"eval_sts-test_pearson_dot": 0.7994867531185785,
"eval_sts-test_pearson_euclidean": 0.8117960113303863,
"eval_sts-test_pearson_manhattan": 0.8144714466358016,
"eval_sts-test_pearson_max": 0.8144714466358016,
"eval_sts-test_spearman_cosine": 0.831478610786181,
"eval_sts-test_spearman_dot": 0.8192534746855707,
"eval_sts-test_spearman_euclidean": 0.8185577905406703,
"eval_sts-test_spearman_manhattan": 0.8154771593606782,
"eval_sts-test_spearman_max": 0.831478610786181,
"eval_vitaminc-pairs_loss": 2.852091073989868,
"eval_vitaminc-pairs_runtime": 1.4427,
"eval_vitaminc-pairs_samples_per_second": 74.858,
"eval_vitaminc-pairs_steps_per_second": 1.386,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_negation-triplets_loss": 2.074247121810913,
"eval_negation-triplets_runtime": 0.3,
"eval_negation-triplets_samples_per_second": 213.353,
"eval_negation-triplets_steps_per_second": 3.334,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_scitail-pairs-pos_loss": 0.2149849385023117,
"eval_scitail-pairs-pos_runtime": 0.3744,
"eval_scitail-pairs-pos_samples_per_second": 144.219,
"eval_scitail-pairs-pos_steps_per_second": 2.671,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_xsum-pairs_loss": 0.7706837058067322,
"eval_xsum-pairs_runtime": 3.1609,
"eval_xsum-pairs_samples_per_second": 40.495,
"eval_xsum-pairs_steps_per_second": 0.633,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_sciq_pairs_loss": 0.07513368874788284,
"eval_sciq_pairs_runtime": 3.2949,
"eval_sciq_pairs_samples_per_second": 38.848,
"eval_sciq_pairs_steps_per_second": 0.607,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_qasc_pairs_loss": 0.6355602741241455,
"eval_qasc_pairs_runtime": 0.6392,
"eval_qasc_pairs_samples_per_second": 200.246,
"eval_qasc_pairs_steps_per_second": 3.129,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_openbookqa_pairs_loss": 1.4014525413513184,
"eval_openbookqa_pairs_runtime": 0.622,
"eval_openbookqa_pairs_samples_per_second": 205.786,
"eval_openbookqa_pairs_steps_per_second": 3.215,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_msmarco_pairs_loss": 1.1524099111557007,
"eval_msmarco_pairs_runtime": 1.31,
"eval_msmarco_pairs_samples_per_second": 97.709,
"eval_msmarco_pairs_steps_per_second": 1.527,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_nq_pairs_loss": 1.7768574953079224,
"eval_nq_pairs_runtime": 2.3979,
"eval_nq_pairs_samples_per_second": 53.379,
"eval_nq_pairs_steps_per_second": 0.834,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_trivia_pairs_loss": 1.4495295286178589,
"eval_trivia_pairs_runtime": 4.4194,
"eval_trivia_pairs_samples_per_second": 28.964,
"eval_trivia_pairs_steps_per_second": 0.453,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_gooaq_pairs_loss": 1.3955378532409668,
"eval_gooaq_pairs_runtime": 0.8788,
"eval_gooaq_pairs_samples_per_second": 145.649,
"eval_gooaq_pairs_steps_per_second": 2.276,
"step": 135
},
{
"epoch": 0.7621736062103035,
"eval_paws-pos_loss": 0.06006813049316406,
"eval_paws-pos_runtime": 0.6896,
"eval_paws-pos_samples_per_second": 185.603,
"eval_paws-pos_steps_per_second": 2.9,
"step": 135
},
{
"epoch": 0.7791107974594214,
"grad_norm": 3.864208936691284,
"learning_rate": 3.1363636363636365e-05,
"loss": 1.5366,
"step": 138
},
{
"epoch": 0.7960479887085392,
"grad_norm": 3.837550640106201,
"learning_rate": 3.204545454545455e-05,
"loss": 1.3045,
"step": 141
},
{
"epoch": 0.8129851799576571,
"grad_norm": 3.5258102416992188,
"learning_rate": 3.272727272727273e-05,
"loss": 1.1999,
"step": 144
},
{
"epoch": 0.8299223712067749,
"grad_norm": 3.4431183338165283,
"learning_rate": 3.340909090909091e-05,
"loss": 1.3483,
"step": 147
},
{
"epoch": 0.8468595624558928,
"grad_norm": 3.6455864906311035,
"learning_rate": 3.409090909090909e-05,
"loss": 1.2009,
"step": 150
},
{
"epoch": 0.8637967537050106,
"grad_norm": 4.508525371551514,
"learning_rate": 3.4772727272727276e-05,
"loss": 1.4495,
"step": 153
},
{
"epoch": 0.8807339449541285,
"grad_norm": 3.0432400703430176,
"learning_rate": 3.545454545454546e-05,
"loss": 1.2329,
"step": 156
},
{
"epoch": 0.8976711362032463,
"grad_norm": 3.0190365314483643,
"learning_rate": 3.613636363636364e-05,
"loss": 1.1905,
"step": 159
},
{
"epoch": 0.9146083274523642,
"grad_norm": 3.74668288230896,
"learning_rate": 3.681818181818182e-05,
"loss": 1.277,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.57421875,
"eval_VitaminC_cosine_accuracy_threshold": 0.8101799488067627,
"eval_VitaminC_cosine_ap": 0.5298515171639175,
"eval_VitaminC_cosine_f1": 0.6542553191489362,
"eval_VitaminC_cosine_f1_threshold": 0.345889687538147,
"eval_VitaminC_cosine_precision": 0.48616600790513836,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.55078125,
"eval_VitaminC_dot_accuracy_threshold": 373.5804443359375,
"eval_VitaminC_dot_ap": 0.5310954683437364,
"eval_VitaminC_dot_f1": 0.6542553191489362,
"eval_VitaminC_dot_f1_threshold": 155.41326904296875,
"eval_VitaminC_dot_precision": 0.48616600790513836,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.57421875,
"eval_VitaminC_euclidean_accuracy_threshold": 13.60124683380127,
"eval_VitaminC_euclidean_ap": 0.5286057955992807,
"eval_VitaminC_euclidean_f1": 0.6577540106951871,
"eval_VitaminC_euclidean_f1_threshold": 22.904512405395508,
"eval_VitaminC_euclidean_precision": 0.4900398406374502,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.57421875,
"eval_VitaminC_manhattan_accuracy_threshold": 262.37322998046875,
"eval_VitaminC_manhattan_ap": 0.5253560845853567,
"eval_VitaminC_manhattan_f1": 0.6559999999999999,
"eval_VitaminC_manhattan_f1_threshold": 465.94549560546875,
"eval_VitaminC_manhattan_precision": 0.4880952380952381,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.57421875,
"eval_VitaminC_max_accuracy_threshold": 373.5804443359375,
"eval_VitaminC_max_ap": 0.5310954683437364,
"eval_VitaminC_max_f1": 0.6577540106951871,
"eval_VitaminC_max_f1_threshold": 465.94549560546875,
"eval_VitaminC_max_precision": 0.4900398406374502,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5310954683437364,
"eval_sts-test_pearson_cosine": 0.8067612938723231,
"eval_sts-test_pearson_dot": 0.8217874837658639,
"eval_sts-test_pearson_euclidean": 0.827948115812785,
"eval_sts-test_pearson_manhattan": 0.8261527694953693,
"eval_sts-test_pearson_max": 0.827948115812785,
"eval_sts-test_spearman_cosine": 0.8547777638284432,
"eval_sts-test_spearman_dot": 0.8498786150097738,
"eval_sts-test_spearman_euclidean": 0.8373845860667446,
"eval_sts-test_spearman_manhattan": 0.8324507067477893,
"eval_sts-test_spearman_max": 0.8547777638284432,
"eval_vitaminc-pairs_loss": 2.776399612426758,
"eval_vitaminc-pairs_runtime": 1.4503,
"eval_vitaminc-pairs_samples_per_second": 74.467,
"eval_vitaminc-pairs_steps_per_second": 1.379,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_negation-triplets_loss": 2.005451202392578,
"eval_negation-triplets_runtime": 0.2981,
"eval_negation-triplets_samples_per_second": 214.709,
"eval_negation-triplets_steps_per_second": 3.355,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_scitail-pairs-pos_loss": 0.19877880811691284,
"eval_scitail-pairs-pos_runtime": 0.3623,
"eval_scitail-pairs-pos_samples_per_second": 149.043,
"eval_scitail-pairs-pos_steps_per_second": 2.76,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_xsum-pairs_loss": 0.5586928725242615,
"eval_xsum-pairs_runtime": 3.1466,
"eval_xsum-pairs_samples_per_second": 40.679,
"eval_xsum-pairs_steps_per_second": 0.636,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_sciq_pairs_loss": 0.06038254499435425,
"eval_sciq_pairs_runtime": 3.4092,
"eval_sciq_pairs_samples_per_second": 37.545,
"eval_sciq_pairs_steps_per_second": 0.587,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_qasc_pairs_loss": 0.49434012174606323,
"eval_qasc_pairs_runtime": 0.6342,
"eval_qasc_pairs_samples_per_second": 201.832,
"eval_qasc_pairs_steps_per_second": 3.154,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_openbookqa_pairs_loss": 1.1903400421142578,
"eval_openbookqa_pairs_runtime": 0.5754,
"eval_openbookqa_pairs_samples_per_second": 222.449,
"eval_openbookqa_pairs_steps_per_second": 3.476,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_msmarco_pairs_loss": 0.8656420707702637,
"eval_msmarco_pairs_runtime": 1.2858,
"eval_msmarco_pairs_samples_per_second": 99.547,
"eval_msmarco_pairs_steps_per_second": 1.555,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_nq_pairs_loss": 1.1553651094436646,
"eval_nq_pairs_runtime": 2.3754,
"eval_nq_pairs_samples_per_second": 53.885,
"eval_nq_pairs_steps_per_second": 0.842,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_trivia_pairs_loss": 1.2928619384765625,
"eval_trivia_pairs_runtime": 4.4084,
"eval_trivia_pairs_samples_per_second": 29.035,
"eval_trivia_pairs_steps_per_second": 0.454,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_gooaq_pairs_loss": 1.1580811738967896,
"eval_gooaq_pairs_runtime": 0.8731,
"eval_gooaq_pairs_samples_per_second": 146.607,
"eval_gooaq_pairs_steps_per_second": 2.291,
"step": 162
},
{
"epoch": 0.9146083274523642,
"eval_paws-pos_loss": 0.052534349262714386,
"eval_paws-pos_runtime": 0.6835,
"eval_paws-pos_samples_per_second": 187.258,
"eval_paws-pos_steps_per_second": 2.926,
"step": 162
},
{
"epoch": 0.9315455187014821,
"grad_norm": 4.7817864418029785,
"learning_rate": 3.7500000000000003e-05,
"loss": 1.339,
"step": 165
},
{
"epoch": 0.9484827099505999,
"grad_norm": 4.000570774078369,
"learning_rate": 3.818181818181819e-05,
"loss": 1.1535,
"step": 168
},
{
"epoch": 0.9654199011997178,
"grad_norm": 3.5971670150756836,
"learning_rate": 3.8863636363636364e-05,
"loss": 1.1643,
"step": 171
},
{
"epoch": 0.9823570924488356,
"grad_norm": 3.6582131385803223,
"learning_rate": 3.954545454545455e-05,
"loss": 1.2221,
"step": 174
},
{
"epoch": 0.9992942836979535,
"grad_norm": 4.0953898429870605,
"learning_rate": 3.9999477905707075e-05,
"loss": 1.0974,
"step": 177
},
{
"epoch": 1.0162314749470713,
"grad_norm": 4.092026233673096,
"learning_rate": 3.999164730903481e-05,
"loss": 1.0984,
"step": 180
},
{
"epoch": 1.0331686661961892,
"grad_norm": 3.6480906009674072,
"learning_rate": 3.997442539262898e-05,
"loss": 1.0543,
"step": 183
},
{
"epoch": 1.050105857445307,
"grad_norm": 3.433056592941284,
"learning_rate": 3.99478242943326e-05,
"loss": 1.0994,
"step": 186
},
{
"epoch": 1.067043048694425,
"grad_norm": 3.507981777191162,
"learning_rate": 3.991186276234698e-05,
"loss": 1.0621,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.578125,
"eval_VitaminC_cosine_accuracy_threshold": 0.7840081453323364,
"eval_VitaminC_cosine_ap": 0.5400770399437144,
"eval_VitaminC_cosine_f1": 0.6577540106951871,
"eval_VitaminC_cosine_f1_threshold": 0.39448243379592896,
"eval_VitaminC_cosine_precision": 0.4900398406374502,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.5625,
"eval_VitaminC_dot_accuracy_threshold": 323.20281982421875,
"eval_VitaminC_dot_ap": 0.5420016101916201,
"eval_VitaminC_dot_f1": 0.6575342465753424,
"eval_VitaminC_dot_f1_threshold": 198.04354858398438,
"eval_VitaminC_dot_precision": 0.49586776859504134,
"eval_VitaminC_dot_recall": 0.975609756097561,
"eval_VitaminC_euclidean_accuracy": 0.5859375,
"eval_VitaminC_euclidean_accuracy_threshold": 13.84214973449707,
"eval_VitaminC_euclidean_ap": 0.5392157650683609,
"eval_VitaminC_euclidean_f1": 0.6577540106951871,
"eval_VitaminC_euclidean_f1_threshold": 22.595678329467773,
"eval_VitaminC_euclidean_precision": 0.4900398406374502,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.5703125,
"eval_VitaminC_manhattan_accuracy_threshold": 275.1253356933594,
"eval_VitaminC_manhattan_ap": 0.5341380380767263,
"eval_VitaminC_manhattan_f1": 0.6576819407008085,
"eval_VitaminC_manhattan_f1_threshold": 457.04986572265625,
"eval_VitaminC_manhattan_precision": 0.49193548387096775,
"eval_VitaminC_manhattan_recall": 0.991869918699187,
"eval_VitaminC_max_accuracy": 0.5859375,
"eval_VitaminC_max_accuracy_threshold": 323.20281982421875,
"eval_VitaminC_max_ap": 0.5420016101916201,
"eval_VitaminC_max_f1": 0.6577540106951871,
"eval_VitaminC_max_f1_threshold": 457.04986572265625,
"eval_VitaminC_max_precision": 0.49586776859504134,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5420016101916201,
"eval_sts-test_pearson_cosine": 0.8193410747427454,
"eval_sts-test_pearson_dot": 0.8275444476338831,
"eval_sts-test_pearson_euclidean": 0.8464528142983967,
"eval_sts-test_pearson_manhattan": 0.8440476980962159,
"eval_sts-test_pearson_max": 0.8464528142983967,
"eval_sts-test_spearman_cosine": 0.8680272706642878,
"eval_sts-test_spearman_dot": 0.8555529342729671,
"eval_sts-test_spearman_euclidean": 0.8542457068859202,
"eval_sts-test_spearman_manhattan": 0.8510265117511795,
"eval_sts-test_spearman_max": 0.8680272706642878,
"eval_vitaminc-pairs_loss": 2.6755428314208984,
"eval_vitaminc-pairs_runtime": 1.4509,
"eval_vitaminc-pairs_samples_per_second": 74.437,
"eval_vitaminc-pairs_steps_per_second": 1.378,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_negation-triplets_loss": 1.9071491956710815,
"eval_negation-triplets_runtime": 0.3051,
"eval_negation-triplets_samples_per_second": 209.756,
"eval_negation-triplets_steps_per_second": 3.277,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_scitail-pairs-pos_loss": 0.18539850413799286,
"eval_scitail-pairs-pos_runtime": 0.4199,
"eval_scitail-pairs-pos_samples_per_second": 128.604,
"eval_scitail-pairs-pos_steps_per_second": 2.382,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_xsum-pairs_loss": 0.38365328311920166,
"eval_xsum-pairs_runtime": 3.1907,
"eval_xsum-pairs_samples_per_second": 40.116,
"eval_xsum-pairs_steps_per_second": 0.627,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_sciq_pairs_loss": 0.05558515340089798,
"eval_sciq_pairs_runtime": 3.2891,
"eval_sciq_pairs_samples_per_second": 38.917,
"eval_sciq_pairs_steps_per_second": 0.608,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_qasc_pairs_loss": 0.40469691157341003,
"eval_qasc_pairs_runtime": 0.6267,
"eval_qasc_pairs_samples_per_second": 204.245,
"eval_qasc_pairs_steps_per_second": 3.191,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_openbookqa_pairs_loss": 1.0837312936782837,
"eval_openbookqa_pairs_runtime": 0.5765,
"eval_openbookqa_pairs_samples_per_second": 222.02,
"eval_openbookqa_pairs_steps_per_second": 3.469,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_msmarco_pairs_loss": 0.6897398233413696,
"eval_msmarco_pairs_runtime": 1.2918,
"eval_msmarco_pairs_samples_per_second": 99.089,
"eval_msmarco_pairs_steps_per_second": 1.548,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_nq_pairs_loss": 0.9603796601295471,
"eval_nq_pairs_runtime": 2.3975,
"eval_nq_pairs_samples_per_second": 53.39,
"eval_nq_pairs_steps_per_second": 0.834,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_trivia_pairs_loss": 1.200446605682373,
"eval_trivia_pairs_runtime": 4.4582,
"eval_trivia_pairs_samples_per_second": 28.711,
"eval_trivia_pairs_steps_per_second": 0.449,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_gooaq_pairs_loss": 1.0353316068649292,
"eval_gooaq_pairs_runtime": 0.8765,
"eval_gooaq_pairs_samples_per_second": 146.042,
"eval_gooaq_pairs_steps_per_second": 2.282,
"step": 189
},
{
"epoch": 1.067043048694425,
"eval_paws-pos_loss": 0.042069558054208755,
"eval_paws-pos_runtime": 0.6909,
"eval_paws-pos_samples_per_second": 185.263,
"eval_paws-pos_steps_per_second": 2.895,
"step": 189
},
{
"epoch": 1.0839802399435428,
"grad_norm": 2.979419469833374,
"learning_rate": 3.986656614201813e-05,
"loss": 0.8724,
"step": 192
},
{
"epoch": 1.1009174311926606,
"grad_norm": 2.835219144821167,
"learning_rate": 3.981196635797361e-05,
"loss": 0.9381,
"step": 195
},
{
"epoch": 1.1178546224417785,
"grad_norm": 3.6650869846343994,
"learning_rate": 3.974810189162238e-05,
"loss": 0.9617,
"step": 198
},
{
"epoch": 1.1347918136908963,
"grad_norm": 4.188896656036377,
"learning_rate": 3.967501775403343e-05,
"loss": 1.0139,
"step": 201
},
{
"epoch": 1.1517290049400142,
"grad_norm": 3.1624915599823,
"learning_rate": 3.959276545421244e-05,
"loss": 1.1073,
"step": 204
},
{
"epoch": 1.168666196189132,
"grad_norm": 3.245002508163452,
"learning_rate": 3.950140296279871e-05,
"loss": 0.8365,
"step": 207
},
{
"epoch": 1.18560338743825,
"grad_norm": 4.376185894012451,
"learning_rate": 3.9400994671208e-05,
"loss": 1.1012,
"step": 210
},
{
"epoch": 1.2025405786873677,
"grad_norm": 3.236583948135376,
"learning_rate": 3.9291611346250066e-05,
"loss": 1.0016,
"step": 213
},
{
"epoch": 1.2194777699364856,
"grad_norm": 3.7601733207702637,
"learning_rate": 3.9173330080252904e-05,
"loss": 1.0957,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.57421875,
"eval_VitaminC_cosine_accuracy_threshold": 0.7863086462020874,
"eval_VitaminC_cosine_ap": 0.538511783260847,
"eval_VitaminC_cosine_f1": 0.6577540106951871,
"eval_VitaminC_cosine_f1_threshold": 0.4006580412387848,
"eval_VitaminC_cosine_precision": 0.4900398406374502,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.57421875,
"eval_VitaminC_dot_accuracy_threshold": 323.53277587890625,
"eval_VitaminC_dot_ap": 0.5304994537787167,
"eval_VitaminC_dot_f1": 0.6577540106951871,
"eval_VitaminC_dot_f1_threshold": 166.45921325683594,
"eval_VitaminC_dot_precision": 0.4900398406374502,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.578125,
"eval_VitaminC_euclidean_accuracy_threshold": 13.631423950195312,
"eval_VitaminC_euclidean_ap": 0.5363284984763951,
"eval_VitaminC_euclidean_f1": 0.6542553191489362,
"eval_VitaminC_euclidean_f1_threshold": 25.392715454101562,
"eval_VitaminC_euclidean_precision": 0.48616600790513836,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.57421875,
"eval_VitaminC_manhattan_accuracy_threshold": 283.5897216796875,
"eval_VitaminC_manhattan_ap": 0.5327191155331534,
"eval_VitaminC_manhattan_f1": 0.6559999999999999,
"eval_VitaminC_manhattan_f1_threshold": 491.0370178222656,
"eval_VitaminC_manhattan_precision": 0.4880952380952381,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.578125,
"eval_VitaminC_max_accuracy_threshold": 323.53277587890625,
"eval_VitaminC_max_ap": 0.538511783260847,
"eval_VitaminC_max_f1": 0.6577540106951871,
"eval_VitaminC_max_f1_threshold": 491.0370178222656,
"eval_VitaminC_max_precision": 0.4900398406374502,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.538511783260847,
"eval_sts-test_pearson_cosine": 0.8156684919084325,
"eval_sts-test_pearson_dot": 0.8230786053133633,
"eval_sts-test_pearson_euclidean": 0.845348828865422,
"eval_sts-test_pearson_manhattan": 0.8432655375716184,
"eval_sts-test_pearson_max": 0.845348828865422,
"eval_sts-test_spearman_cosine": 0.8655524539841267,
"eval_sts-test_spearman_dot": 0.8507196659909223,
"eval_sts-test_spearman_euclidean": 0.8547050804103192,
"eval_sts-test_spearman_manhattan": 0.8508668230591436,
"eval_sts-test_spearman_max": 0.8655524539841267,
"eval_vitaminc-pairs_loss": 2.5465524196624756,
"eval_vitaminc-pairs_runtime": 1.4425,
"eval_vitaminc-pairs_samples_per_second": 74.869,
"eval_vitaminc-pairs_steps_per_second": 1.386,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_negation-triplets_loss": 1.9161474704742432,
"eval_negation-triplets_runtime": 0.2994,
"eval_negation-triplets_samples_per_second": 213.785,
"eval_negation-triplets_steps_per_second": 3.34,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_scitail-pairs-pos_loss": 0.19009728729724884,
"eval_scitail-pairs-pos_runtime": 0.3745,
"eval_scitail-pairs-pos_samples_per_second": 144.203,
"eval_scitail-pairs-pos_steps_per_second": 2.67,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_xsum-pairs_loss": 0.35912859439849854,
"eval_xsum-pairs_runtime": 3.1543,
"eval_xsum-pairs_samples_per_second": 40.58,
"eval_xsum-pairs_steps_per_second": 0.634,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_sciq_pairs_loss": 0.05168920382857323,
"eval_sciq_pairs_runtime": 3.2561,
"eval_sciq_pairs_samples_per_second": 39.31,
"eval_sciq_pairs_steps_per_second": 0.614,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_qasc_pairs_loss": 0.30753791332244873,
"eval_qasc_pairs_runtime": 0.6201,
"eval_qasc_pairs_samples_per_second": 206.418,
"eval_qasc_pairs_steps_per_second": 3.225,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_openbookqa_pairs_loss": 0.9365726113319397,
"eval_openbookqa_pairs_runtime": 0.5832,
"eval_openbookqa_pairs_samples_per_second": 219.496,
"eval_openbookqa_pairs_steps_per_second": 3.43,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_msmarco_pairs_loss": 0.5819053053855896,
"eval_msmarco_pairs_runtime": 1.2858,
"eval_msmarco_pairs_samples_per_second": 99.551,
"eval_msmarco_pairs_steps_per_second": 1.555,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_nq_pairs_loss": 0.8172401785850525,
"eval_nq_pairs_runtime": 2.3809,
"eval_nq_pairs_samples_per_second": 53.761,
"eval_nq_pairs_steps_per_second": 0.84,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_trivia_pairs_loss": 1.1411677598953247,
"eval_trivia_pairs_runtime": 4.4162,
"eval_trivia_pairs_samples_per_second": 28.984,
"eval_trivia_pairs_steps_per_second": 0.453,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_gooaq_pairs_loss": 0.9686058759689331,
"eval_gooaq_pairs_runtime": 0.8788,
"eval_gooaq_pairs_samples_per_second": 145.645,
"eval_gooaq_pairs_steps_per_second": 2.276,
"step": 216
},
{
"epoch": 1.2194777699364856,
"eval_paws-pos_loss": 0.03953952714800835,
"eval_paws-pos_runtime": 0.708,
"eval_paws-pos_samples_per_second": 180.782,
"eval_paws-pos_steps_per_second": 2.825,
"step": 216
},
{
"epoch": 1.2364149611856035,
"grad_norm": 3.566471576690674,
"learning_rate": 3.904623423672881e-05,
"loss": 1.1273,
"step": 219
},
{
"epoch": 1.2533521524347213,
"grad_norm": 4.086460590362549,
"learning_rate": 3.891041339162053e-05,
"loss": 1.2568,
"step": 222
},
{
"epoch": 1.2702893436838392,
"grad_norm": 3.2877376079559326,
"learning_rate": 3.876596327016904e-05,
"loss": 0.873,
"step": 225
},
{
"epoch": 1.287226534932957,
"grad_norm": 3.383211851119995,
"learning_rate": 3.861298567944728e-05,
"loss": 1.0003,
"step": 228
},
{
"epoch": 1.3041637261820749,
"grad_norm": 3.8474605083465576,
"learning_rate": 3.8451588436607487e-05,
"loss": 1.142,
"step": 231
},
{
"epoch": 1.3211009174311927,
"grad_norm": 3.027008533477783,
"learning_rate": 3.8281885292892706e-05,
"loss": 0.807,
"step": 234
},
{
"epoch": 1.3380381086803106,
"grad_norm": 2.9607250690460205,
"learning_rate": 3.810399585346599e-05,
"loss": 1.0231,
"step": 237
},
{
"epoch": 1.3549752999294284,
"grad_norm": 2.511488676071167,
"learning_rate": 3.791804549311382e-05,
"loss": 0.797,
"step": 240
},
{
"epoch": 1.3719124911785463,
"grad_norm": 2.603672504425049,
"learning_rate": 3.7724165267883146e-05,
"loss": 0.8473,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.578125,
"eval_VitaminC_cosine_accuracy_threshold": 0.7651997804641724,
"eval_VitaminC_cosine_ap": 0.5427753322056709,
"eval_VitaminC_cosine_f1": 0.6595174262734584,
"eval_VitaminC_cosine_f1_threshold": 0.38563254475593567,
"eval_VitaminC_cosine_precision": 0.492,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.5703125,
"eval_VitaminC_dot_accuracy_threshold": 330.23577880859375,
"eval_VitaminC_dot_ap": 0.5507967714924796,
"eval_VitaminC_dot_f1": 0.6595174262734584,
"eval_VitaminC_dot_f1_threshold": 160.55694580078125,
"eval_VitaminC_dot_precision": 0.492,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.578125,
"eval_VitaminC_euclidean_accuracy_threshold": 13.033781051635742,
"eval_VitaminC_euclidean_ap": 0.5406935655135654,
"eval_VitaminC_euclidean_f1": 0.6576819407008085,
"eval_VitaminC_euclidean_f1_threshold": 22.224994659423828,
"eval_VitaminC_euclidean_precision": 0.49193548387096775,
"eval_VitaminC_euclidean_recall": 0.991869918699187,
"eval_VitaminC_manhattan_accuracy": 0.57421875,
"eval_VitaminC_manhattan_accuracy_threshold": 274.7045593261719,
"eval_VitaminC_manhattan_ap": 0.5366045405118165,
"eval_VitaminC_manhattan_f1": 0.6577540106951871,
"eval_VitaminC_manhattan_f1_threshold": 475.4096374511719,
"eval_VitaminC_manhattan_precision": 0.4900398406374502,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.578125,
"eval_VitaminC_max_accuracy_threshold": 330.23577880859375,
"eval_VitaminC_max_ap": 0.5507967714924796,
"eval_VitaminC_max_f1": 0.6595174262734584,
"eval_VitaminC_max_f1_threshold": 475.4096374511719,
"eval_VitaminC_max_precision": 0.492,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5507967714924796,
"eval_sts-test_pearson_cosine": 0.8204982036322743,
"eval_sts-test_pearson_dot": 0.8243481169631539,
"eval_sts-test_pearson_euclidean": 0.8495098083065487,
"eval_sts-test_pearson_manhattan": 0.8491539225772841,
"eval_sts-test_pearson_max": 0.8495098083065487,
"eval_sts-test_spearman_cosine": 0.8687444375928703,
"eval_sts-test_spearman_dot": 0.8509044179305871,
"eval_sts-test_spearman_euclidean": 0.8563313271350431,
"eval_sts-test_spearman_manhattan": 0.8563900467437737,
"eval_sts-test_spearman_max": 0.8687444375928703,
"eval_vitaminc-pairs_loss": 2.5139691829681396,
"eval_vitaminc-pairs_runtime": 1.449,
"eval_vitaminc-pairs_samples_per_second": 74.533,
"eval_vitaminc-pairs_steps_per_second": 1.38,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_negation-triplets_loss": 1.8629425764083862,
"eval_negation-triplets_runtime": 0.3014,
"eval_negation-triplets_samples_per_second": 212.31,
"eval_negation-triplets_steps_per_second": 3.317,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_scitail-pairs-pos_loss": 0.17502877116203308,
"eval_scitail-pairs-pos_runtime": 0.3707,
"eval_scitail-pairs-pos_samples_per_second": 145.673,
"eval_scitail-pairs-pos_steps_per_second": 2.698,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_xsum-pairs_loss": 0.2802315950393677,
"eval_xsum-pairs_runtime": 3.1565,
"eval_xsum-pairs_samples_per_second": 40.551,
"eval_xsum-pairs_steps_per_second": 0.634,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_sciq_pairs_loss": 0.046695925295352936,
"eval_sciq_pairs_runtime": 3.2866,
"eval_sciq_pairs_samples_per_second": 38.946,
"eval_sciq_pairs_steps_per_second": 0.609,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_qasc_pairs_loss": 0.2354799211025238,
"eval_qasc_pairs_runtime": 0.6228,
"eval_qasc_pairs_samples_per_second": 205.533,
"eval_qasc_pairs_steps_per_second": 3.211,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_openbookqa_pairs_loss": 0.8562020659446716,
"eval_openbookqa_pairs_runtime": 0.5764,
"eval_openbookqa_pairs_samples_per_second": 222.058,
"eval_openbookqa_pairs_steps_per_second": 3.47,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_msmarco_pairs_loss": 0.5559017658233643,
"eval_msmarco_pairs_runtime": 1.2826,
"eval_msmarco_pairs_samples_per_second": 99.801,
"eval_msmarco_pairs_steps_per_second": 1.559,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_nq_pairs_loss": 0.743526041507721,
"eval_nq_pairs_runtime": 2.3784,
"eval_nq_pairs_samples_per_second": 53.817,
"eval_nq_pairs_steps_per_second": 0.841,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_trivia_pairs_loss": 1.106662392616272,
"eval_trivia_pairs_runtime": 4.4193,
"eval_trivia_pairs_samples_per_second": 28.964,
"eval_trivia_pairs_steps_per_second": 0.453,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_gooaq_pairs_loss": 0.8928955793380737,
"eval_gooaq_pairs_runtime": 0.8831,
"eval_gooaq_pairs_samples_per_second": 144.944,
"eval_gooaq_pairs_steps_per_second": 2.265,
"step": 243
},
{
"epoch": 1.3719124911785463,
"eval_paws-pos_loss": 0.03428014740347862,
"eval_paws-pos_runtime": 0.6872,
"eval_paws-pos_samples_per_second": 186.261,
"eval_paws-pos_steps_per_second": 2.91,
"step": 243
},
{
"epoch": 1.3888496824276642,
"grad_norm": 4.478828430175781,
"learning_rate": 3.752249182271433e-05,
"loss": 0.9531,
"step": 246
},
{
"epoch": 1.405786873676782,
"grad_norm": 3.3206863403320312,
"learning_rate": 3.731316729513507e-05,
"loss": 0.9023,
"step": 249
},
{
"epoch": 1.4227240649258999,
"grad_norm": 3.4713878631591797,
"learning_rate": 3.7096339215083274e-05,
"loss": 0.8922,
"step": 252
},
{
"epoch": 1.4396612561750177,
"grad_norm": 3.4212491512298584,
"learning_rate": 3.687216040092931e-05,
"loss": 0.9874,
"step": 255
},
{
"epoch": 1.4565984474241356,
"grad_norm": 3.398963689804077,
"learning_rate": 3.6640788851771084e-05,
"loss": 0.8508,
"step": 258
},
{
"epoch": 1.4735356386732534,
"grad_norm": 3.350128650665283,
"learning_rate": 3.64023876360778e-05,
"loss": 0.7149,
"step": 261
},
{
"epoch": 1.4904728299223713,
"grad_norm": 3.438978433609009,
"learning_rate": 3.615712477676081e-05,
"loss": 0.894,
"step": 264
},
{
"epoch": 1.5074100211714891,
"grad_norm": 3.1700806617736816,
"learning_rate": 3.5905173132752725e-05,
"loss": 0.867,
"step": 267
},
{
"epoch": 1.524347212420607,
"grad_norm": 3.1567916870117188,
"learning_rate": 3.5646710277178006e-05,
"loss": 0.7493,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.58203125,
"eval_VitaminC_cosine_accuracy_threshold": 0.7990785241127014,
"eval_VitaminC_cosine_ap": 0.5489113961762149,
"eval_VitaminC_cosine_f1": 0.6595174262734584,
"eval_VitaminC_cosine_f1_threshold": 0.3687684237957001,
"eval_VitaminC_cosine_precision": 0.492,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.57421875,
"eval_VitaminC_dot_accuracy_threshold": 328.30560302734375,
"eval_VitaminC_dot_ap": 0.5498735151014204,
"eval_VitaminC_dot_f1": 0.6595174262734584,
"eval_VitaminC_dot_f1_threshold": 153.01849365234375,
"eval_VitaminC_dot_precision": 0.492,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.578125,
"eval_VitaminC_euclidean_accuracy_threshold": 12.773557662963867,
"eval_VitaminC_euclidean_ap": 0.5426159894851803,
"eval_VitaminC_euclidean_f1": 0.6559999999999999,
"eval_VitaminC_euclidean_f1_threshold": 23.71053123474121,
"eval_VitaminC_euclidean_precision": 0.4880952380952381,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.58203125,
"eval_VitaminC_manhattan_accuracy_threshold": 272.04931640625,
"eval_VitaminC_manhattan_ap": 0.5396432749419082,
"eval_VitaminC_manhattan_f1": 0.6577540106951871,
"eval_VitaminC_manhattan_f1_threshold": 494.33001708984375,
"eval_VitaminC_manhattan_precision": 0.4900398406374502,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.58203125,
"eval_VitaminC_max_accuracy_threshold": 328.30560302734375,
"eval_VitaminC_max_ap": 0.5498735151014204,
"eval_VitaminC_max_f1": 0.6595174262734584,
"eval_VitaminC_max_f1_threshold": 494.33001708984375,
"eval_VitaminC_max_precision": 0.492,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5498735151014204,
"eval_sts-test_pearson_cosine": 0.8213785723785002,
"eval_sts-test_pearson_dot": 0.8169840312248031,
"eval_sts-test_pearson_euclidean": 0.8549065829936804,
"eval_sts-test_pearson_manhattan": 0.8559014033008101,
"eval_sts-test_pearson_max": 0.8559014033008101,
"eval_sts-test_spearman_cosine": 0.871560114440785,
"eval_sts-test_spearman_dot": 0.8412461164335756,
"eval_sts-test_spearman_euclidean": 0.8616554770242205,
"eval_sts-test_spearman_manhattan": 0.86344749922969,
"eval_sts-test_spearman_max": 0.871560114440785,
"eval_vitaminc-pairs_loss": 2.5574047565460205,
"eval_vitaminc-pairs_runtime": 1.4466,
"eval_vitaminc-pairs_samples_per_second": 74.658,
"eval_vitaminc-pairs_steps_per_second": 1.383,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_negation-triplets_loss": 1.853515386581421,
"eval_negation-triplets_runtime": 0.2992,
"eval_negation-triplets_samples_per_second": 213.896,
"eval_negation-triplets_steps_per_second": 3.342,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_scitail-pairs-pos_loss": 0.1692524254322052,
"eval_scitail-pairs-pos_runtime": 0.3739,
"eval_scitail-pairs-pos_samples_per_second": 144.426,
"eval_scitail-pairs-pos_steps_per_second": 2.675,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_xsum-pairs_loss": 0.22170975804328918,
"eval_xsum-pairs_runtime": 3.1517,
"eval_xsum-pairs_samples_per_second": 40.613,
"eval_xsum-pairs_steps_per_second": 0.635,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_sciq_pairs_loss": 0.04346679896116257,
"eval_sciq_pairs_runtime": 3.2686,
"eval_sciq_pairs_samples_per_second": 39.16,
"eval_sciq_pairs_steps_per_second": 0.612,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_qasc_pairs_loss": 0.24427936971187592,
"eval_qasc_pairs_runtime": 0.6217,
"eval_qasc_pairs_samples_per_second": 205.897,
"eval_qasc_pairs_steps_per_second": 3.217,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_openbookqa_pairs_loss": 0.7998915910720825,
"eval_openbookqa_pairs_runtime": 0.576,
"eval_openbookqa_pairs_samples_per_second": 222.206,
"eval_openbookqa_pairs_steps_per_second": 3.472,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_msmarco_pairs_loss": 0.5027381777763367,
"eval_msmarco_pairs_runtime": 1.2901,
"eval_msmarco_pairs_samples_per_second": 99.216,
"eval_msmarco_pairs_steps_per_second": 1.55,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_nq_pairs_loss": 0.6529555916786194,
"eval_nq_pairs_runtime": 2.3842,
"eval_nq_pairs_samples_per_second": 53.687,
"eval_nq_pairs_steps_per_second": 0.839,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_trivia_pairs_loss": 1.0634211301803589,
"eval_trivia_pairs_runtime": 4.4089,
"eval_trivia_pairs_samples_per_second": 29.032,
"eval_trivia_pairs_steps_per_second": 0.454,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_gooaq_pairs_loss": 0.800453245639801,
"eval_gooaq_pairs_runtime": 0.8705,
"eval_gooaq_pairs_samples_per_second": 147.034,
"eval_gooaq_pairs_steps_per_second": 2.297,
"step": 270
},
{
"epoch": 1.524347212420607,
"eval_paws-pos_loss": 0.031901415437459946,
"eval_paws-pos_runtime": 0.6828,
"eval_paws-pos_samples_per_second": 187.456,
"eval_paws-pos_steps_per_second": 2.929,
"step": 270
},
{
"epoch": 1.5412844036697249,
"grad_norm": 3.258525848388672,
"learning_rate": 3.5381918372201175e-05,
"loss": 0.7974,
"step": 273
},
{
"epoch": 1.5582215949188427,
"grad_norm": 2.9689552783966064,
"learning_rate": 3.5110984040640627e-05,
"loss": 0.797,
"step": 276
},
{
"epoch": 1.5751587861679606,
"grad_norm": 3.50411057472229,
"learning_rate": 3.483409823443864e-05,
"loss": 0.6749,
"step": 279
},
{
"epoch": 1.5920959774170784,
"grad_norm": 2.840614080429077,
"learning_rate": 3.4551456100080266e-05,
"loss": 0.9325,
"step": 282
},
{
"epoch": 1.6090331686661963,
"grad_norm": 2.934267044067383,
"learning_rate": 3.426325684105594e-05,
"loss": 0.8418,
"step": 285
},
{
"epoch": 1.6259703599153141,
"grad_norm": 3.5037455558776855,
"learning_rate": 3.396970357746474e-05,
"loss": 1.0135,
"step": 288
},
{
"epoch": 1.642907551164432,
"grad_norm": 3.349975109100342,
"learning_rate": 3.3671003202857315e-05,
"loss": 0.6961,
"step": 291
},
{
"epoch": 1.6598447424135498,
"grad_norm": 3.207557439804077,
"learning_rate": 3.336736623841924e-05,
"loss": 0.9361,
"step": 294
},
{
"epoch": 1.6767819336626677,
"grad_norm": 2.0259296894073486,
"learning_rate": 3.305900668459766e-05,
"loss": 0.6747,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.57421875,
"eval_VitaminC_cosine_accuracy_threshold": 0.7887165546417236,
"eval_VitaminC_cosine_ap": 0.5443802154749287,
"eval_VitaminC_cosine_f1": 0.6595174262734584,
"eval_VitaminC_cosine_f1_threshold": 0.35189926624298096,
"eval_VitaminC_cosine_precision": 0.492,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.5625,
"eval_VitaminC_dot_accuracy_threshold": 335.016357421875,
"eval_VitaminC_dot_ap": 0.5460930199557891,
"eval_VitaminC_dot_f1": 0.6594594594594595,
"eval_VitaminC_dot_f1_threshold": 158.6214599609375,
"eval_VitaminC_dot_precision": 0.4939271255060729,
"eval_VitaminC_dot_recall": 0.991869918699187,
"eval_VitaminC_euclidean_accuracy": 0.57421875,
"eval_VitaminC_euclidean_accuracy_threshold": 13.359209060668945,
"eval_VitaminC_euclidean_ap": 0.5420558119789205,
"eval_VitaminC_euclidean_f1": 0.6577540106951871,
"eval_VitaminC_euclidean_f1_threshold": 23.44475746154785,
"eval_VitaminC_euclidean_precision": 0.4900398406374502,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.578125,
"eval_VitaminC_manhattan_accuracy_threshold": 309.7850646972656,
"eval_VitaminC_manhattan_ap": 0.5398712022586767,
"eval_VitaminC_manhattan_f1": 0.6595174262734584,
"eval_VitaminC_manhattan_f1_threshold": 486.6765441894531,
"eval_VitaminC_manhattan_precision": 0.492,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.578125,
"eval_VitaminC_max_accuracy_threshold": 335.016357421875,
"eval_VitaminC_max_ap": 0.5460930199557891,
"eval_VitaminC_max_f1": 0.6595174262734584,
"eval_VitaminC_max_f1_threshold": 486.6765441894531,
"eval_VitaminC_max_precision": 0.4939271255060729,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5460930199557891,
"eval_sts-test_pearson_cosine": 0.8332392980969607,
"eval_sts-test_pearson_dot": 0.8346600863241642,
"eval_sts-test_pearson_euclidean": 0.8653211336269704,
"eval_sts-test_pearson_manhattan": 0.8653335270474869,
"eval_sts-test_pearson_max": 0.8653335270474869,
"eval_sts-test_spearman_cosine": 0.8786841635561152,
"eval_sts-test_spearman_dot": 0.8596876540389535,
"eval_sts-test_spearman_euclidean": 0.8687344122938186,
"eval_sts-test_spearman_manhattan": 0.8687734393508408,
"eval_sts-test_spearman_max": 0.8786841635561152,
"eval_vitaminc-pairs_loss": 2.4870808124542236,
"eval_vitaminc-pairs_runtime": 1.4506,
"eval_vitaminc-pairs_samples_per_second": 74.451,
"eval_vitaminc-pairs_steps_per_second": 1.379,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_negation-triplets_loss": 1.7349412441253662,
"eval_negation-triplets_runtime": 0.2993,
"eval_negation-triplets_samples_per_second": 213.838,
"eval_negation-triplets_steps_per_second": 3.341,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_scitail-pairs-pos_loss": 0.15961770713329315,
"eval_scitail-pairs-pos_runtime": 0.3704,
"eval_scitail-pairs-pos_samples_per_second": 145.808,
"eval_scitail-pairs-pos_steps_per_second": 2.7,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_xsum-pairs_loss": 0.22417353093624115,
"eval_xsum-pairs_runtime": 3.1629,
"eval_xsum-pairs_samples_per_second": 40.469,
"eval_xsum-pairs_steps_per_second": 0.632,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_sciq_pairs_loss": 0.03957323729991913,
"eval_sciq_pairs_runtime": 3.2788,
"eval_sciq_pairs_samples_per_second": 39.039,
"eval_sciq_pairs_steps_per_second": 0.61,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_qasc_pairs_loss": 0.19627788662910461,
"eval_qasc_pairs_runtime": 0.6246,
"eval_qasc_pairs_samples_per_second": 204.945,
"eval_qasc_pairs_steps_per_second": 3.202,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_openbookqa_pairs_loss": 0.7668256163597107,
"eval_openbookqa_pairs_runtime": 0.5769,
"eval_openbookqa_pairs_samples_per_second": 221.888,
"eval_openbookqa_pairs_steps_per_second": 3.467,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_msmarco_pairs_loss": 0.5024800300598145,
"eval_msmarco_pairs_runtime": 1.287,
"eval_msmarco_pairs_samples_per_second": 99.457,
"eval_msmarco_pairs_steps_per_second": 1.554,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_nq_pairs_loss": 0.6426529288291931,
"eval_nq_pairs_runtime": 2.3694,
"eval_nq_pairs_samples_per_second": 54.023,
"eval_nq_pairs_steps_per_second": 0.844,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_trivia_pairs_loss": 0.9762344360351562,
"eval_trivia_pairs_runtime": 4.4202,
"eval_trivia_pairs_samples_per_second": 28.958,
"eval_trivia_pairs_steps_per_second": 0.452,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_gooaq_pairs_loss": 0.7546207904815674,
"eval_gooaq_pairs_runtime": 0.8779,
"eval_gooaq_pairs_samples_per_second": 145.803,
"eval_gooaq_pairs_steps_per_second": 2.278,
"step": 297
},
{
"epoch": 1.6767819336626677,
"eval_paws-pos_loss": 0.029145879670977592,
"eval_paws-pos_runtime": 0.6938,
"eval_paws-pos_samples_per_second": 184.484,
"eval_paws-pos_steps_per_second": 2.883,
"step": 297
},
{
"epoch": 1.6937191249117856,
"grad_norm": 2.766063928604126,
"learning_rate": 3.274614187027587e-05,
"loss": 0.7786,
"step": 300
},
{
"epoch": 1.7106563161609034,
"grad_norm": 3.1933176517486572,
"learning_rate": 3.2428992299601946e-05,
"loss": 0.7171,
"step": 303
},
{
"epoch": 1.7275935074100213,
"grad_norm": 3.0088443756103516,
"learning_rate": 3.2107781496579536e-05,
"loss": 0.6627,
"step": 306
},
{
"epoch": 1.7445306986591391,
"grad_norm": 3.13895845413208,
"learning_rate": 3.178273584753023e-05,
"loss": 0.6711,
"step": 309
},
{
"epoch": 1.761467889908257,
"grad_norm": 3.34114933013916,
"learning_rate": 3.145408444153868e-05,
"loss": 0.9076,
"step": 312
},
{
"epoch": 1.7784050811573748,
"grad_norm": 2.5035502910614014,
"learning_rate": 3.1122058908992746e-05,
"loss": 0.7414,
"step": 315
},
{
"epoch": 1.7953422724064927,
"grad_norm": 2.284698247909546,
"learning_rate": 3.078689325833264e-05,
"loss": 0.582,
"step": 318
},
{
"epoch": 1.8122794636556105,
"grad_norm": 2.643444538116455,
"learning_rate": 3.044882371112396e-05,
"loss": 0.6068,
"step": 321
},
{
"epoch": 1.8292166549047284,
"grad_norm": 2.37386155128479,
"learning_rate": 3.0108088535571016e-05,
"loss": 0.6219,
"step": 324
},
{
"epoch": 1.8292166549047284,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.57421875,
"eval_VitaminC_cosine_accuracy_threshold": 0.7700088024139404,
"eval_VitaminC_cosine_ap": 0.5429726522180547,
"eval_VitaminC_cosine_f1": 0.6576819407008085,
"eval_VitaminC_cosine_f1_threshold": 0.33469462394714355,
"eval_VitaminC_cosine_precision": 0.49193548387096775,
"eval_VitaminC_cosine_recall": 0.991869918699187,
"eval_VitaminC_dot_accuracy": 0.57421875,
"eval_VitaminC_dot_accuracy_threshold": 309.1703186035156,
"eval_VitaminC_dot_ap": 0.5531704143247085,
"eval_VitaminC_dot_f1": 0.6594594594594595,
"eval_VitaminC_dot_f1_threshold": 145.4818115234375,
"eval_VitaminC_dot_precision": 0.4939271255060729,
"eval_VitaminC_dot_recall": 0.991869918699187,
"eval_VitaminC_euclidean_accuracy": 0.578125,
"eval_VitaminC_euclidean_accuracy_threshold": 13.186519622802734,
"eval_VitaminC_euclidean_ap": 0.5414693053767123,
"eval_VitaminC_euclidean_f1": 0.6559999999999999,
"eval_VitaminC_euclidean_f1_threshold": 23.980300903320312,
"eval_VitaminC_euclidean_precision": 0.4880952380952381,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.5703125,
"eval_VitaminC_manhattan_accuracy_threshold": 248.66549682617188,
"eval_VitaminC_manhattan_ap": 0.5403554311371019,
"eval_VitaminC_manhattan_f1": 0.6577540106951871,
"eval_VitaminC_manhattan_f1_threshold": 493.069580078125,
"eval_VitaminC_manhattan_precision": 0.4900398406374502,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.578125,
"eval_VitaminC_max_accuracy_threshold": 309.1703186035156,
"eval_VitaminC_max_ap": 0.5531704143247085,
"eval_VitaminC_max_f1": 0.6594594594594595,
"eval_VitaminC_max_f1_threshold": 493.069580078125,
"eval_VitaminC_max_precision": 0.4939271255060729,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5531704143247085,
"eval_sts-test_pearson_cosine": 0.8353147289025764,
"eval_sts-test_pearson_dot": 0.8340239073162183,
"eval_sts-test_pearson_euclidean": 0.8678198295516475,
"eval_sts-test_pearson_manhattan": 0.8679311647036958,
"eval_sts-test_pearson_max": 0.8679311647036958,
"eval_sts-test_spearman_cosine": 0.8807142866140599,
"eval_sts-test_spearman_dot": 0.8548662012879339,
"eval_sts-test_spearman_euclidean": 0.8730904047317294,
"eval_sts-test_spearman_manhattan": 0.8734591925182695,
"eval_sts-test_spearman_max": 0.8807142866140599,
"eval_vitaminc-pairs_loss": 2.519745111465454,
"eval_vitaminc-pairs_runtime": 1.5111,
"eval_vitaminc-pairs_samples_per_second": 71.47,
"eval_vitaminc-pairs_steps_per_second": 1.324,
"step": 324
},
{
"epoch": 1.8292166549047284,
"eval_negation-triplets_loss": 1.701598882675171,
"eval_negation-triplets_runtime": 0.3083,
"eval_negation-triplets_samples_per_second": 207.571,
"eval_negation-triplets_steps_per_second": 3.243,
"step": 324
},
{
"epoch": 1.8292166549047284,
"eval_scitail-pairs-pos_loss": 0.1535351276397705,
"eval_scitail-pairs-pos_runtime": 0.4139,
"eval_scitail-pairs-pos_samples_per_second": 130.461,
"eval_scitail-pairs-pos_steps_per_second": 2.416,
"step": 324
},
{
"epoch": 1.8292166549047284,
"eval_xsum-pairs_loss": 0.16304434835910797,
"eval_xsum-pairs_runtime": 3.173,
"eval_xsum-pairs_samples_per_second": 40.34,
"eval_xsum-pairs_steps_per_second": 0.63,
"step": 324
},
{
"epoch": 1.8292166549047284,
"eval_sciq_pairs_loss": 0.03826402127742767,
"eval_sciq_pairs_runtime": 3.2871,
"eval_sciq_pairs_samples_per_second": 38.94,
"eval_sciq_pairs_steps_per_second": 0.608,
"step": 324
},
{
"epoch": 1.8292166549047284,
"eval_qasc_pairs_loss": 0.20441913604736328,
"eval_qasc_pairs_runtime": 0.6223,
"eval_qasc_pairs_samples_per_second": 205.692,
"eval_qasc_pairs_steps_per_second": 3.214,
"step": 324
},
{
"epoch": 1.8292166549047284,
"eval_openbookqa_pairs_loss": 0.7109480500221252,
"eval_openbookqa_pairs_runtime": 0.5785,
"eval_openbookqa_pairs_samples_per_second": 221.25,
"eval_openbookqa_pairs_steps_per_second": 3.457,
"step": 324
},
{
"epoch": 1.8292166549047284,
"eval_msmarco_pairs_loss": 0.48586779832839966,
"eval_msmarco_pairs_runtime": 1.2912,
"eval_msmarco_pairs_samples_per_second": 99.129,
"eval_msmarco_pairs_steps_per_second": 1.549,
"step": 324
},
{
"epoch": 1.8292166549047284,
"eval_nq_pairs_loss": 0.5532824397087097,
"eval_nq_pairs_runtime": 2.3796,
"eval_nq_pairs_samples_per_second": 53.791,
"eval_nq_pairs_steps_per_second": 0.84,
"step": 324
},
{
"epoch": 1.8292166549047284,
"eval_trivia_pairs_loss": 1.0205955505371094,
"eval_trivia_pairs_runtime": 4.4187,
"eval_trivia_pairs_samples_per_second": 28.968,
"eval_trivia_pairs_steps_per_second": 0.453,
"step": 324
},
{
"epoch": 1.8292166549047284,
"eval_gooaq_pairs_loss": 0.7736483812332153,
"eval_gooaq_pairs_runtime": 0.8757,
"eval_gooaq_pairs_samples_per_second": 146.171,
"eval_gooaq_pairs_steps_per_second": 2.284,
"step": 324
},
{
"epoch": 1.8292166549047284,
"eval_paws-pos_loss": 0.0273247379809618,
"eval_paws-pos_runtime": 0.6877,
"eval_paws-pos_samples_per_second": 186.121,
"eval_paws-pos_steps_per_second": 2.908,
"step": 324
},
{
"epoch": 1.8461538461538463,
"grad_norm": 2.632672071456909,
"learning_rate": 2.9764927878587643e-05,
"loss": 0.5862,
"step": 327
},
{
"epoch": 1.8630910374029641,
"grad_norm": 2.9056813716888428,
"learning_rate": 2.9419583596543924e-05,
"loss": 0.678,
"step": 330
},
{
"epoch": 1.880028228652082,
"grad_norm": 2.693070411682129,
"learning_rate": 2.907229908480814e-05,
"loss": 0.6272,
"step": 333
},
{
"epoch": 1.8969654199011998,
"grad_norm": 2.2290945053100586,
"learning_rate": 2.8723319106204032e-05,
"loss": 0.5048,
"step": 336
},
{
"epoch": 1.9139026111503177,
"grad_norm": 2.5947606563568115,
"learning_rate": 2.8372889618504275e-05,
"loss": 0.7653,
"step": 339
},
{
"epoch": 1.9308398023994355,
"grad_norm": 3.1747825145721436,
"learning_rate": 2.8021257601081767e-05,
"loss": 0.6613,
"step": 342
},
{
"epoch": 1.9477769936485534,
"grad_norm": 2.438523054122925,
"learning_rate": 2.766867088084095e-05,
"loss": 0.6122,
"step": 345
},
{
"epoch": 1.9647141848976712,
"grad_norm": 2.645747423171997,
"learning_rate": 2.7315377957551712e-05,
"loss": 0.5939,
"step": 348
},
{
"epoch": 1.981651376146789,
"grad_norm": 3.985382556915283,
"learning_rate": 2.696162782870916e-05,
"loss": 0.6923,
"step": 351
},
{
"epoch": 1.981651376146789,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.57421875,
"eval_VitaminC_cosine_accuracy_threshold": 0.7852457165718079,
"eval_VitaminC_cosine_ap": 0.5489275869827654,
"eval_VitaminC_cosine_f1": 0.6612466124661246,
"eval_VitaminC_cosine_f1_threshold": 0.3671841323375702,
"eval_VitaminC_cosine_precision": 0.4959349593495935,
"eval_VitaminC_cosine_recall": 0.991869918699187,
"eval_VitaminC_dot_accuracy": 0.5703125,
"eval_VitaminC_dot_accuracy_threshold": 312.1104736328125,
"eval_VitaminC_dot_ap": 0.5559525201108009,
"eval_VitaminC_dot_f1": 0.6612466124661246,
"eval_VitaminC_dot_f1_threshold": 150.29818725585938,
"eval_VitaminC_dot_precision": 0.4959349593495935,
"eval_VitaminC_dot_recall": 0.991869918699187,
"eval_VitaminC_euclidean_accuracy": 0.58203125,
"eval_VitaminC_euclidean_accuracy_threshold": 14.372268676757812,
"eval_VitaminC_euclidean_ap": 0.544755914591283,
"eval_VitaminC_euclidean_f1": 0.6576819407008085,
"eval_VitaminC_euclidean_f1_threshold": 23.06924819946289,
"eval_VitaminC_euclidean_precision": 0.49193548387096775,
"eval_VitaminC_euclidean_recall": 0.991869918699187,
"eval_VitaminC_manhattan_accuracy": 0.57421875,
"eval_VitaminC_manhattan_accuracy_threshold": 263.9018859863281,
"eval_VitaminC_manhattan_ap": 0.541522211031207,
"eval_VitaminC_manhattan_f1": 0.6595174262734584,
"eval_VitaminC_manhattan_f1_threshold": 502.340576171875,
"eval_VitaminC_manhattan_precision": 0.492,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.58203125,
"eval_VitaminC_max_accuracy_threshold": 312.1104736328125,
"eval_VitaminC_max_ap": 0.5559525201108009,
"eval_VitaminC_max_f1": 0.6612466124661246,
"eval_VitaminC_max_f1_threshold": 502.340576171875,
"eval_VitaminC_max_precision": 0.4959349593495935,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5559525201108009,
"eval_sts-test_pearson_cosine": 0.8362775201898809,
"eval_sts-test_pearson_dot": 0.8352671053392853,
"eval_sts-test_pearson_euclidean": 0.8700319618710969,
"eval_sts-test_pearson_manhattan": 0.8715864724519946,
"eval_sts-test_pearson_max": 0.8715864724519946,
"eval_sts-test_spearman_cosine": 0.8836928745715628,
"eval_sts-test_spearman_dot": 0.857968315251608,
"eval_sts-test_spearman_euclidean": 0.8761363054114356,
"eval_sts-test_spearman_manhattan": 0.8777002520634819,
"eval_sts-test_spearman_max": 0.8836928745715628,
"eval_vitaminc-pairs_loss": 2.437910795211792,
"eval_vitaminc-pairs_runtime": 1.4456,
"eval_vitaminc-pairs_samples_per_second": 74.71,
"eval_vitaminc-pairs_steps_per_second": 1.384,
"step": 351
},
{
"epoch": 1.981651376146789,
"eval_negation-triplets_loss": 1.7345324754714966,
"eval_negation-triplets_runtime": 0.2986,
"eval_negation-triplets_samples_per_second": 214.362,
"eval_negation-triplets_steps_per_second": 3.349,
"step": 351
},
{
"epoch": 1.981651376146789,
"eval_scitail-pairs-pos_loss": 0.14812646806240082,
"eval_scitail-pairs-pos_runtime": 0.3719,
"eval_scitail-pairs-pos_samples_per_second": 145.183,
"eval_scitail-pairs-pos_steps_per_second": 2.689,
"step": 351
},
{
"epoch": 1.981651376146789,
"eval_xsum-pairs_loss": 0.1463930606842041,
"eval_xsum-pairs_runtime": 3.152,
"eval_xsum-pairs_samples_per_second": 40.609,
"eval_xsum-pairs_steps_per_second": 0.635,
"step": 351
},
{
"epoch": 1.981651376146789,
"eval_sciq_pairs_loss": 0.03820851817727089,
"eval_sciq_pairs_runtime": 3.2627,
"eval_sciq_pairs_samples_per_second": 39.231,
"eval_sciq_pairs_steps_per_second": 0.613,
"step": 351
},
{
"epoch": 1.981651376146789,
"eval_qasc_pairs_loss": 0.16403906047344208,
"eval_qasc_pairs_runtime": 0.6219,
"eval_qasc_pairs_samples_per_second": 205.822,
"eval_qasc_pairs_steps_per_second": 3.216,
"step": 351
},
{
"epoch": 1.981651376146789,
"eval_openbookqa_pairs_loss": 0.755411684513092,
"eval_openbookqa_pairs_runtime": 0.5745,
"eval_openbookqa_pairs_samples_per_second": 222.788,
"eval_openbookqa_pairs_steps_per_second": 3.481,
"step": 351
},
{
"epoch": 1.981651376146789,
"eval_msmarco_pairs_loss": 0.43477028608322144,
"eval_msmarco_pairs_runtime": 1.2879,
"eval_msmarco_pairs_samples_per_second": 99.389,
"eval_msmarco_pairs_steps_per_second": 1.553,
"step": 351
},
{
"epoch": 1.981651376146789,
"eval_nq_pairs_loss": 0.5431913733482361,
"eval_nq_pairs_runtime": 2.372,
"eval_nq_pairs_samples_per_second": 53.962,
"eval_nq_pairs_steps_per_second": 0.843,
"step": 351
},
{
"epoch": 1.981651376146789,
"eval_trivia_pairs_loss": 0.9581867456436157,
"eval_trivia_pairs_runtime": 4.4272,
"eval_trivia_pairs_samples_per_second": 28.912,
"eval_trivia_pairs_steps_per_second": 0.452,
"step": 351
},
{
"epoch": 1.981651376146789,
"eval_gooaq_pairs_loss": 0.7219691872596741,
"eval_gooaq_pairs_runtime": 0.8764,
"eval_gooaq_pairs_samples_per_second": 146.055,
"eval_gooaq_pairs_steps_per_second": 2.282,
"step": 351
},
{
"epoch": 1.981651376146789,
"eval_paws-pos_loss": 0.026377690955996513,
"eval_paws-pos_runtime": 0.6874,
"eval_paws-pos_samples_per_second": 186.22,
"eval_paws-pos_steps_per_second": 2.91,
"step": 351
},
{
"epoch": 1.998588567395907,
"grad_norm": 2.807307481765747,
"learning_rate": 2.660766981404253e-05,
"loss": 0.5712,
"step": 354
},
{
"epoch": 2.015525758645025,
"grad_norm": 3.1135761737823486,
"learning_rate": 2.6253753379797e-05,
"loss": 0.5969,
"step": 357
},
{
"epoch": 2.0324629498941427,
"grad_norm": 2.701498508453369,
"learning_rate": 2.5900127962912265e-05,
"loss": 0.5881,
"step": 360
},
{
"epoch": 2.0494001411432605,
"grad_norm": 2.1898539066314697,
"learning_rate": 2.554704279522176e-05,
"loss": 0.6005,
"step": 363
},
{
"epoch": 2.0663373323923784,
"grad_norm": 2.3954033851623535,
"learning_rate": 2.5194746727796408e-05,
"loss": 0.6066,
"step": 366
},
{
"epoch": 2.0832745236414962,
"grad_norm": 1.9525569677352905,
"learning_rate": 2.4843488055556773e-05,
"loss": 0.4921,
"step": 369
},
{
"epoch": 2.100211714890614,
"grad_norm": 2.005103588104248,
"learning_rate": 2.449351434227714e-05,
"loss": 0.5354,
"step": 372
},
{
"epoch": 2.117148906139732,
"grad_norm": 2.4554927349090576,
"learning_rate": 2.414507224610495e-05,
"loss": 0.5602,
"step": 375
},
{
"epoch": 2.13408609738885,
"grad_norm": 2.926708698272705,
"learning_rate": 2.3798407345718434e-05,
"loss": 0.5686,
"step": 378
},
{
"epoch": 2.13408609738885,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.57421875,
"eval_VitaminC_cosine_accuracy_threshold": 0.8050106763839722,
"eval_VitaminC_cosine_ap": 0.544869760591425,
"eval_VitaminC_cosine_f1": 0.6577540106951871,
"eval_VitaminC_cosine_f1_threshold": 0.2933539152145386,
"eval_VitaminC_cosine_precision": 0.4900398406374502,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.5625,
"eval_VitaminC_dot_accuracy_threshold": 350.54046630859375,
"eval_VitaminC_dot_ap": 0.5538743151996848,
"eval_VitaminC_dot_f1": 0.6577540106951871,
"eval_VitaminC_dot_f1_threshold": 122.50220489501953,
"eval_VitaminC_dot_precision": 0.4900398406374502,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.578125,
"eval_VitaminC_euclidean_accuracy_threshold": 12.29859447479248,
"eval_VitaminC_euclidean_ap": 0.5417581979676633,
"eval_VitaminC_euclidean_f1": 0.6559999999999999,
"eval_VitaminC_euclidean_f1_threshold": 24.298545837402344,
"eval_VitaminC_euclidean_precision": 0.4880952380952381,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.57421875,
"eval_VitaminC_manhattan_accuracy_threshold": 261.48309326171875,
"eval_VitaminC_manhattan_ap": 0.5389765713900105,
"eval_VitaminC_manhattan_f1": 0.6559999999999999,
"eval_VitaminC_manhattan_f1_threshold": 519.0216064453125,
"eval_VitaminC_manhattan_precision": 0.4880952380952381,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.578125,
"eval_VitaminC_max_accuracy_threshold": 350.54046630859375,
"eval_VitaminC_max_ap": 0.5538743151996848,
"eval_VitaminC_max_f1": 0.6577540106951871,
"eval_VitaminC_max_f1_threshold": 519.0216064453125,
"eval_VitaminC_max_precision": 0.4900398406374502,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5538743151996848,
"eval_sts-test_pearson_cosine": 0.8323841899935347,
"eval_sts-test_pearson_dot": 0.8319981087315044,
"eval_sts-test_pearson_euclidean": 0.8653758499419844,
"eval_sts-test_pearson_manhattan": 0.8666968424133361,
"eval_sts-test_pearson_max": 0.8666968424133361,
"eval_sts-test_spearman_cosine": 0.8804171081064596,
"eval_sts-test_spearman_dot": 0.8574101209222718,
"eval_sts-test_spearman_euclidean": 0.8715185810589999,
"eval_sts-test_spearman_manhattan": 0.8731244191392259,
"eval_sts-test_spearman_max": 0.8804171081064596,
"eval_vitaminc-pairs_loss": 2.390805959701538,
"eval_vitaminc-pairs_runtime": 1.4923,
"eval_vitaminc-pairs_samples_per_second": 72.374,
"eval_vitaminc-pairs_steps_per_second": 1.34,
"step": 378
},
{
"epoch": 2.13408609738885,
"eval_negation-triplets_loss": 1.7677762508392334,
"eval_negation-triplets_runtime": 0.3036,
"eval_negation-triplets_samples_per_second": 210.833,
"eval_negation-triplets_steps_per_second": 3.294,
"step": 378
},
{
"epoch": 2.13408609738885,
"eval_scitail-pairs-pos_loss": 0.14010007679462433,
"eval_scitail-pairs-pos_runtime": 0.3847,
"eval_scitail-pairs-pos_samples_per_second": 140.379,
"eval_scitail-pairs-pos_steps_per_second": 2.6,
"step": 378
},
{
"epoch": 2.13408609738885,
"eval_xsum-pairs_loss": 0.1453721672296524,
"eval_xsum-pairs_runtime": 3.1712,
"eval_xsum-pairs_samples_per_second": 40.363,
"eval_xsum-pairs_steps_per_second": 0.631,
"step": 378
},
{
"epoch": 2.13408609738885,
"eval_sciq_pairs_loss": 0.03739440068602562,
"eval_sciq_pairs_runtime": 3.3277,
"eval_sciq_pairs_samples_per_second": 38.466,
"eval_sciq_pairs_steps_per_second": 0.601,
"step": 378
},
{
"epoch": 2.13408609738885,
"eval_qasc_pairs_loss": 0.1603582501411438,
"eval_qasc_pairs_runtime": 0.632,
"eval_qasc_pairs_samples_per_second": 202.534,
"eval_qasc_pairs_steps_per_second": 3.165,
"step": 378
},
{
"epoch": 2.13408609738885,
"eval_openbookqa_pairs_loss": 0.7796258330345154,
"eval_openbookqa_pairs_runtime": 0.5856,
"eval_openbookqa_pairs_samples_per_second": 218.585,
"eval_openbookqa_pairs_steps_per_second": 3.415,
"step": 378
},
{
"epoch": 2.13408609738885,
"eval_msmarco_pairs_loss": 0.4246203303337097,
"eval_msmarco_pairs_runtime": 1.297,
"eval_msmarco_pairs_samples_per_second": 98.689,
"eval_msmarco_pairs_steps_per_second": 1.542,
"step": 378
},
{
"epoch": 2.13408609738885,
"eval_nq_pairs_loss": 0.5298404097557068,
"eval_nq_pairs_runtime": 2.3877,
"eval_nq_pairs_samples_per_second": 53.609,
"eval_nq_pairs_steps_per_second": 0.838,
"step": 378
},
{
"epoch": 2.13408609738885,
"eval_trivia_pairs_loss": 0.9613967537879944,
"eval_trivia_pairs_runtime": 4.4311,
"eval_trivia_pairs_samples_per_second": 28.887,
"eval_trivia_pairs_steps_per_second": 0.451,
"step": 378
},
{
"epoch": 2.13408609738885,
"eval_gooaq_pairs_loss": 0.6964626908302307,
"eval_gooaq_pairs_runtime": 0.8843,
"eval_gooaq_pairs_samples_per_second": 144.755,
"eval_gooaq_pairs_steps_per_second": 2.262,
"step": 378
},
{
"epoch": 2.13408609738885,
"eval_paws-pos_loss": 0.02705618366599083,
"eval_paws-pos_runtime": 0.6932,
"eval_paws-pos_samples_per_second": 184.655,
"eval_paws-pos_steps_per_second": 2.885,
"step": 378
},
{
"epoch": 2.1510232886379677,
"grad_norm": 2.418947458267212,
"learning_rate": 2.345376396724515e-05,
"loss": 0.6496,
"step": 381
},
{
"epoch": 2.1679604798870855,
"grad_norm": 2.5201969146728516,
"learning_rate": 2.311138501206319e-05,
"loss": 0.4713,
"step": 384
},
{
"epoch": 2.1848976711362034,
"grad_norm": 3.0134377479553223,
"learning_rate": 2.277151178560665e-05,
"loss": 0.6345,
"step": 387
},
{
"epoch": 2.2018348623853212,
"grad_norm": 2.368422031402588,
"learning_rate": 2.2434383827295833e-05,
"loss": 0.5994,
"step": 390
},
{
"epoch": 2.218772053634439,
"grad_norm": 3.164980411529541,
"learning_rate": 2.210023874171213e-05,
"loss": 0.6763,
"step": 393
},
{
"epoch": 2.235709244883557,
"grad_norm": 2.83431077003479,
"learning_rate": 2.1769312031136583e-05,
"loss": 0.7254,
"step": 396
},
{
"epoch": 2.252646436132675,
"grad_norm": 3.2441203594207764,
"learning_rate": 2.14418369295701e-05,
"loss": 0.8032,
"step": 399
},
{
"epoch": 2.2695836273817926,
"grad_norm": 2.215298652648926,
"learning_rate": 2.1118044238352392e-05,
"loss": 0.4914,
"step": 402
},
{
"epoch": 2.2865208186309105,
"grad_norm": 2.700486183166504,
"learning_rate": 2.0798162163495322e-05,
"loss": 0.6307,
"step": 405
},
{
"epoch": 2.2865208186309105,
"eval_NLI-v2_cosine_accuracy": 1.0,
"eval_NLI-v2_dot_accuracy": 0.0,
"eval_NLI-v2_euclidean_accuracy": 1.0,
"eval_NLI-v2_manhattan_accuracy": 1.0,
"eval_NLI-v2_max_accuracy": 1.0,
"eval_VitaminC_cosine_accuracy": 0.578125,
"eval_VitaminC_cosine_accuracy_threshold": 0.8052636384963989,
"eval_VitaminC_cosine_ap": 0.5479388360307975,
"eval_VitaminC_cosine_f1": 0.6577540106951871,
"eval_VitaminC_cosine_f1_threshold": 0.3108493387699127,
"eval_VitaminC_cosine_precision": 0.4900398406374502,
"eval_VitaminC_cosine_recall": 1.0,
"eval_VitaminC_dot_accuracy": 0.58203125,
"eval_VitaminC_dot_accuracy_threshold": 318.633056640625,
"eval_VitaminC_dot_ap": 0.5533499611019033,
"eval_VitaminC_dot_f1": 0.6577540106951871,
"eval_VitaminC_dot_f1_threshold": 125.5129165649414,
"eval_VitaminC_dot_precision": 0.4900398406374502,
"eval_VitaminC_dot_recall": 1.0,
"eval_VitaminC_euclidean_accuracy": 0.58203125,
"eval_VitaminC_euclidean_accuracy_threshold": 12.9645357131958,
"eval_VitaminC_euclidean_ap": 0.541753017593475,
"eval_VitaminC_euclidean_f1": 0.6559999999999999,
"eval_VitaminC_euclidean_f1_threshold": 23.908817291259766,
"eval_VitaminC_euclidean_precision": 0.4880952380952381,
"eval_VitaminC_euclidean_recall": 1.0,
"eval_VitaminC_manhattan_accuracy": 0.578125,
"eval_VitaminC_manhattan_accuracy_threshold": 266.60528564453125,
"eval_VitaminC_manhattan_ap": 0.5411403083150335,
"eval_VitaminC_manhattan_f1": 0.6559999999999999,
"eval_VitaminC_manhattan_f1_threshold": 512.4686279296875,
"eval_VitaminC_manhattan_precision": 0.4880952380952381,
"eval_VitaminC_manhattan_recall": 1.0,
"eval_VitaminC_max_accuracy": 0.58203125,
"eval_VitaminC_max_accuracy_threshold": 318.633056640625,
"eval_VitaminC_max_ap": 0.5533499611019033,
"eval_VitaminC_max_f1": 0.6577540106951871,
"eval_VitaminC_max_f1_threshold": 512.4686279296875,
"eval_VitaminC_max_precision": 0.4900398406374502,
"eval_VitaminC_max_recall": 1.0,
"eval_sequential_score": 0.5533499611019033,
"eval_sts-test_pearson_cosine": 0.8404451477820003,
"eval_sts-test_pearson_dot": 0.8376741383364052,
"eval_sts-test_pearson_euclidean": 0.873696402540065,
"eval_sts-test_pearson_manhattan": 0.8739146310077538,
"eval_sts-test_pearson_max": 0.8739146310077538,
"eval_sts-test_spearman_cosine": 0.8859238616569335,
"eval_sts-test_spearman_dot": 0.8626544264654313,
"eval_sts-test_spearman_euclidean": 0.8767156244780591,
"eval_sts-test_spearman_manhattan": 0.8785835525192047,
"eval_sts-test_spearman_max": 0.8859238616569335,
"eval_vitaminc-pairs_loss": 2.438774347305298,
"eval_vitaminc-pairs_runtime": 1.4716,
"eval_vitaminc-pairs_samples_per_second": 73.39,
"eval_vitaminc-pairs_steps_per_second": 1.359,
"step": 405
},
{
"epoch": 2.2865208186309105,
"eval_negation-triplets_loss": 1.7093145847320557,
"eval_negation-triplets_runtime": 0.3027,
"eval_negation-triplets_samples_per_second": 211.422,
"eval_negation-triplets_steps_per_second": 3.303,
"step": 405
},
{
"epoch": 2.2865208186309105,
"eval_scitail-pairs-pos_loss": 0.11918405443429947,
"eval_scitail-pairs-pos_runtime": 0.3806,
"eval_scitail-pairs-pos_samples_per_second": 141.888,
"eval_scitail-pairs-pos_steps_per_second": 2.628,
"step": 405
},
{
"epoch": 2.2865208186309105,
"eval_xsum-pairs_loss": 0.13078594207763672,
"eval_xsum-pairs_runtime": 3.1593,
"eval_xsum-pairs_samples_per_second": 40.515,
"eval_xsum-pairs_steps_per_second": 0.633,
"step": 405
},
{
"epoch": 2.2865208186309105,
"eval_sciq_pairs_loss": 0.03792291879653931,
"eval_sciq_pairs_runtime": 3.3679,
"eval_sciq_pairs_samples_per_second": 38.006,
"eval_sciq_pairs_steps_per_second": 0.594,
"step": 405
},
{
"epoch": 2.2865208186309105,
"eval_qasc_pairs_loss": 0.1465962529182434,
"eval_qasc_pairs_runtime": 0.6708,
"eval_qasc_pairs_samples_per_second": 190.809,
"eval_qasc_pairs_steps_per_second": 2.981,
"step": 405
},
{
"epoch": 2.2865208186309105,
"eval_openbookqa_pairs_loss": 0.74336838722229,
"eval_openbookqa_pairs_runtime": 0.6017,
"eval_openbookqa_pairs_samples_per_second": 212.742,
"eval_openbookqa_pairs_steps_per_second": 3.324,
"step": 405
},
{
"epoch": 2.2865208186309105,
"eval_msmarco_pairs_loss": 0.3927748501300812,
"eval_msmarco_pairs_runtime": 1.3092,
"eval_msmarco_pairs_samples_per_second": 97.767,
"eval_msmarco_pairs_steps_per_second": 1.528,
"step": 405
},
{
"epoch": 2.2865208186309105,
"eval_nq_pairs_loss": 0.4998345375061035,
"eval_nq_pairs_runtime": 2.4116,
"eval_nq_pairs_samples_per_second": 53.077,
"eval_nq_pairs_steps_per_second": 0.829,
"step": 405
},
{
"epoch": 2.2865208186309105,
"eval_trivia_pairs_loss": 0.9862285852432251,
"eval_trivia_pairs_runtime": 4.4317,
"eval_trivia_pairs_samples_per_second": 28.883,
"eval_trivia_pairs_steps_per_second": 0.451,
"step": 405
},
{
"epoch": 2.2865208186309105,
"eval_gooaq_pairs_loss": 0.697635293006897,
"eval_gooaq_pairs_runtime": 0.8801,
"eval_gooaq_pairs_samples_per_second": 145.443,
"eval_gooaq_pairs_steps_per_second": 2.273,
"step": 405
},
{
"epoch": 2.2865208186309105,
"eval_paws-pos_loss": 0.02622571960091591,
"eval_paws-pos_runtime": 0.6966,
"eval_paws-pos_samples_per_second": 183.756,
"eval_paws-pos_steps_per_second": 2.871,
"step": 405
},
{
"epoch": 2.3034580098800284,
"grad_norm": 2.7358224391937256,
"learning_rate": 2.0482416154845496e-05,
"loss": 0.7493,
"step": 408
},
{
"epoch": 2.320395201129146,
"grad_norm": 2.2785451412200928,
"learning_rate": 2.0171028747189386e-05,
"loss": 0.5139,
"step": 411
},
{
"epoch": 2.337332392378264,
"grad_norm": 2.1454882621765137,
"learning_rate": 1.9864219403412882e-05,
"loss": 0.6364,
"step": 414
},
{
"epoch": 2.354269583627382,
"grad_norm": 2.206393003463745,
"learning_rate": 1.9562204359825967e-05,
"loss": 0.4763,
"step": 417
},
{
"epoch": 2.3712067748765,
"grad_norm": 2.2492825984954834,
"learning_rate": 1.92651964737614e-05,
"loss": 0.583,
"step": 420
},
{
"epoch": 2.3881439661256176,
"grad_norm": 3.24066162109375,
"learning_rate": 1.8973405073554915e-05,
"loss": 0.5912,
"step": 423
},
{
"epoch": 2.4050811573747355,
"grad_norm": 2.6232211589813232,
"learning_rate": 1.868703581101257e-05,
"loss": 0.5936,
"step": 426
}
],
"logging_steps": 3,
"max_steps": 531,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 107,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 160,
"trial_name": null,
"trial_params": null
}