{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.4163726182074807, "eval_steps": 27, "global_step": 428, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016937191249117856, "grad_norm": 34.22002029418945, "learning_rate": 6.818181818181818e-07, "loss": 7.2372, "step": 3 }, { "epoch": 0.03387438249823571, "grad_norm": 21.76839828491211, "learning_rate": 1.3636363636363636e-06, "loss": 6.855, "step": 6 }, { "epoch": 0.05081157374735357, "grad_norm": 21.260774612426758, "learning_rate": 2.0454545454545457e-06, "loss": 7.4707, "step": 9 }, { "epoch": 0.06774876499647142, "grad_norm": 16.885921478271484, "learning_rate": 2.7272727272727272e-06, "loss": 7.0187, "step": 12 }, { "epoch": 0.08468595624558928, "grad_norm": 19.509899139404297, "learning_rate": 3.409090909090909e-06, "loss": 6.6756, "step": 15 }, { "epoch": 0.10162314749470713, "grad_norm": 7.9427289962768555, "learning_rate": 4.0909090909090915e-06, "loss": 6.0155, "step": 18 }, { "epoch": 0.11856033874382499, "grad_norm": 7.325345039367676, "learning_rate": 4.772727272727273e-06, "loss": 6.1644, "step": 21 }, { "epoch": 0.13549752999294284, "grad_norm": 7.544689655303955, "learning_rate": 5.4545454545454545e-06, "loss": 6.2158, "step": 24 }, { "epoch": 0.1524347212420607, "grad_norm": 5.141758918762207, "learning_rate": 6.136363636363637e-06, "loss": 6.1369, "step": 27 }, { "epoch": 0.1524347212420607, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.109375, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.5546875, "eval_VitaminC_cosine_accuracy_threshold": 0.9544724822044373, "eval_VitaminC_cosine_ap": 0.5356492030729136, "eval_VitaminC_cosine_f1": 0.6542553191489362, "eval_VitaminC_cosine_f1_threshold": 0.7148199081420898, "eval_VitaminC_cosine_precision": 0.48616600790513836, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.55078125, "eval_VitaminC_dot_accuracy_threshold": 414.4264831542969, "eval_VitaminC_dot_ap": 0.5108219546857565, "eval_VitaminC_dot_f1": 0.6507936507936508, "eval_VitaminC_dot_f1_threshold": 271.6522521972656, "eval_VitaminC_dot_precision": 0.4823529411764706, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.55078125, "eval_VitaminC_euclidean_accuracy_threshold": 6.519885063171387, "eval_VitaminC_euclidean_ap": 0.5226419655984281, "eval_VitaminC_euclidean_f1": 0.6505376344086021, "eval_VitaminC_euclidean_f1_threshold": 15.194067001342773, "eval_VitaminC_euclidean_precision": 0.4859437751004016, "eval_VitaminC_euclidean_recall": 0.983739837398374, "eval_VitaminC_manhattan_accuracy": 0.546875, "eval_VitaminC_manhattan_accuracy_threshold": 149.20114135742188, "eval_VitaminC_manhattan_ap": 0.5237451656134715, "eval_VitaminC_manhattan_f1": 0.6542553191489362, "eval_VitaminC_manhattan_f1_threshold": 259.007080078125, "eval_VitaminC_manhattan_precision": 0.48616600790513836, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.5546875, "eval_VitaminC_max_accuracy_threshold": 414.4264831542969, "eval_VitaminC_max_ap": 0.5356492030729136, "eval_VitaminC_max_f1": 0.6542553191489362, "eval_VitaminC_max_f1_threshold": 271.6522521972656, "eval_VitaminC_max_precision": 0.48616600790513836, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5356492030729136, "eval_sts-test_pearson_cosine": 0.056062031998983373, "eval_sts-test_pearson_dot": 0.2979259445723872, "eval_sts-test_pearson_euclidean": 0.0498319208592713, "eval_sts-test_pearson_manhattan": 0.07381429239121526, "eval_sts-test_pearson_max": 0.2979259445723872, "eval_sts-test_spearman_cosine": 0.1066788491614481, "eval_sts-test_spearman_dot": 0.315952670306405, "eval_sts-test_spearman_euclidean": 0.07303394554435191, "eval_sts-test_spearman_manhattan": 0.09039525717692232, "eval_sts-test_spearman_max": 0.315952670306405, "eval_vitaminc-pairs_loss": 2.698580741882324, "eval_vitaminc-pairs_runtime": 1.4747, "eval_vitaminc-pairs_samples_per_second": 73.236, "eval_vitaminc-pairs_steps_per_second": 1.356, "step": 27 }, { "epoch": 0.1524347212420607, "eval_negation-triplets_loss": 5.142906665802002, "eval_negation-triplets_runtime": 0.2993, "eval_negation-triplets_samples_per_second": 213.865, "eval_negation-triplets_steps_per_second": 3.342, "step": 27 }, { "epoch": 0.1524347212420607, "eval_scitail-pairs-pos_loss": 1.9216958284378052, "eval_scitail-pairs-pos_runtime": 0.3834, "eval_scitail-pairs-pos_samples_per_second": 140.842, "eval_scitail-pairs-pos_steps_per_second": 2.608, "step": 27 }, { "epoch": 0.1524347212420607, "eval_xsum-pairs_loss": 6.073049545288086, "eval_xsum-pairs_runtime": 3.1587, "eval_xsum-pairs_samples_per_second": 40.523, "eval_xsum-pairs_steps_per_second": 0.633, "step": 27 }, { "epoch": 0.1524347212420607, "eval_sciq_pairs_loss": 0.3449864387512207, "eval_sciq_pairs_runtime": 3.3747, "eval_sciq_pairs_samples_per_second": 37.93, "eval_sciq_pairs_steps_per_second": 0.593, "step": 27 }, { "epoch": 0.1524347212420607, "eval_qasc_pairs_loss": 3.2267842292785645, "eval_qasc_pairs_runtime": 0.6576, "eval_qasc_pairs_samples_per_second": 194.646, "eval_qasc_pairs_steps_per_second": 3.041, "step": 27 }, { "epoch": 0.1524347212420607, "eval_openbookqa_pairs_loss": 4.405983924865723, "eval_openbookqa_pairs_runtime": 0.6107, "eval_openbookqa_pairs_samples_per_second": 209.594, "eval_openbookqa_pairs_steps_per_second": 3.275, "step": 27 }, { "epoch": 0.1524347212420607, "eval_msmarco_pairs_loss": 6.937691688537598, "eval_msmarco_pairs_runtime": 1.3091, "eval_msmarco_pairs_samples_per_second": 97.779, "eval_msmarco_pairs_steps_per_second": 1.528, "step": 27 }, { "epoch": 0.1524347212420607, "eval_nq_pairs_loss": 6.794108867645264, "eval_nq_pairs_runtime": 2.3968, "eval_nq_pairs_samples_per_second": 53.404, "eval_nq_pairs_steps_per_second": 0.834, "step": 27 }, { "epoch": 0.1524347212420607, "eval_trivia_pairs_loss": 6.3355631828308105, "eval_trivia_pairs_runtime": 4.4974, "eval_trivia_pairs_samples_per_second": 28.461, "eval_trivia_pairs_steps_per_second": 0.445, "step": 27 }, { "epoch": 0.1524347212420607, "eval_gooaq_pairs_loss": 6.405998706817627, "eval_gooaq_pairs_runtime": 0.8745, "eval_gooaq_pairs_samples_per_second": 146.37, "eval_gooaq_pairs_steps_per_second": 2.287, "step": 27 }, { "epoch": 0.1524347212420607, "eval_paws-pos_loss": 2.2308223247528076, "eval_paws-pos_runtime": 0.6998, "eval_paws-pos_samples_per_second": 182.908, "eval_paws-pos_steps_per_second": 2.858, "step": 27 }, { "epoch": 0.16937191249117856, "grad_norm": 5.885251522064209, "learning_rate": 6.818181818181818e-06, "loss": 5.7653, "step": 30 }, { "epoch": 0.1863091037402964, "grad_norm": 7.357480049133301, "learning_rate": 7.500000000000001e-06, "loss": 6.1259, "step": 33 }, { "epoch": 0.20324629498941427, "grad_norm": 7.321795463562012, "learning_rate": 8.181818181818183e-06, "loss": 5.7539, "step": 36 }, { "epoch": 0.22018348623853212, "grad_norm": 4.239792346954346, "learning_rate": 8.863636363636365e-06, "loss": 6.0131, "step": 39 }, { "epoch": 0.23712067748764998, "grad_norm": 3.9554407596588135, "learning_rate": 9.545454545454547e-06, "loss": 6.0074, "step": 42 }, { "epoch": 0.25405786873676783, "grad_norm": 4.406026840209961, "learning_rate": 1.0227272727272729e-05, "loss": 5.7125, "step": 45 }, { "epoch": 0.2709950599858857, "grad_norm": 7.235893249511719, "learning_rate": 1.0909090909090909e-05, "loss": 5.5634, "step": 48 }, { "epoch": 0.28793225123500354, "grad_norm": 5.330288410186768, "learning_rate": 1.1590909090909093e-05, "loss": 5.2924, "step": 51 }, { "epoch": 0.3048694424841214, "grad_norm": 7.216403961181641, "learning_rate": 1.2272727272727274e-05, "loss": 5.2286, "step": 54 }, { "epoch": 0.3048694424841214, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.046875, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.54296875, "eval_VitaminC_cosine_accuracy_threshold": 0.9328227043151855, "eval_VitaminC_cosine_ap": 0.5212059026196154, "eval_VitaminC_cosine_f1": 0.6576819407008085, "eval_VitaminC_cosine_f1_threshold": 0.7373804450035095, "eval_VitaminC_cosine_precision": 0.49193548387096775, "eval_VitaminC_cosine_recall": 0.991869918699187, "eval_VitaminC_dot_accuracy": 0.55078125, "eval_VitaminC_dot_accuracy_threshold": 418.2774658203125, "eval_VitaminC_dot_ap": 0.5160594099493883, "eval_VitaminC_dot_f1": 0.6521739130434782, "eval_VitaminC_dot_f1_threshold": 291.5081481933594, "eval_VitaminC_dot_precision": 0.4897959183673469, "eval_VitaminC_dot_recall": 0.975609756097561, "eval_VitaminC_euclidean_accuracy": 0.5390625, "eval_VitaminC_euclidean_accuracy_threshold": 8.120429039001465, "eval_VitaminC_euclidean_ap": 0.5224837623095228, "eval_VitaminC_euclidean_f1": 0.6576819407008085, "eval_VitaminC_euclidean_f1_threshold": 14.879999160766602, "eval_VitaminC_euclidean_precision": 0.49193548387096775, "eval_VitaminC_euclidean_recall": 0.991869918699187, "eval_VitaminC_manhattan_accuracy": 0.53515625, "eval_VitaminC_manhattan_accuracy_threshold": 137.40658569335938, "eval_VitaminC_manhattan_ap": 0.5186382518671783, "eval_VitaminC_manhattan_f1": 0.6576086956521738, "eval_VitaminC_manhattan_f1_threshold": 263.32452392578125, "eval_VitaminC_manhattan_precision": 0.49387755102040815, "eval_VitaminC_manhattan_recall": 0.983739837398374, "eval_VitaminC_max_accuracy": 0.55078125, "eval_VitaminC_max_accuracy_threshold": 418.2774658203125, "eval_VitaminC_max_ap": 0.5224837623095228, "eval_VitaminC_max_f1": 0.6576819407008085, "eval_VitaminC_max_f1_threshold": 291.5081481933594, "eval_VitaminC_max_precision": 0.49387755102040815, "eval_VitaminC_max_recall": 0.991869918699187, "eval_sequential_score": 0.5224837623095228, "eval_sts-test_pearson_cosine": 0.14377091128453176, "eval_sts-test_pearson_dot": 0.24728387094758872, "eval_sts-test_pearson_euclidean": 0.14604155960515372, "eval_sts-test_pearson_manhattan": 0.1446467532231986, "eval_sts-test_pearson_max": 0.24728387094758872, "eval_sts-test_spearman_cosine": 0.1968510434344728, "eval_sts-test_spearman_dot": 0.29467218283745694, "eval_sts-test_spearman_euclidean": 0.17218164683969664, "eval_sts-test_spearman_manhattan": 0.17741843340856742, "eval_sts-test_spearman_max": 0.29467218283745694, "eval_vitaminc-pairs_loss": 2.664700746536255, "eval_vitaminc-pairs_runtime": 1.4487, "eval_vitaminc-pairs_samples_per_second": 74.551, "eval_vitaminc-pairs_steps_per_second": 1.381, "step": 54 }, { "epoch": 0.3048694424841214, "eval_negation-triplets_loss": 4.6218037605285645, "eval_negation-triplets_runtime": 0.2971, "eval_negation-triplets_samples_per_second": 215.438, "eval_negation-triplets_steps_per_second": 3.366, "step": 54 }, { "epoch": 0.3048694424841214, "eval_scitail-pairs-pos_loss": 1.2413936853408813, "eval_scitail-pairs-pos_runtime": 0.372, "eval_scitail-pairs-pos_samples_per_second": 145.175, "eval_scitail-pairs-pos_steps_per_second": 2.688, "step": 54 }, { "epoch": 0.3048694424841214, "eval_xsum-pairs_loss": 5.249766826629639, "eval_xsum-pairs_runtime": 3.1506, "eval_xsum-pairs_samples_per_second": 40.627, "eval_xsum-pairs_steps_per_second": 0.635, "step": 54 }, { "epoch": 0.3048694424841214, "eval_sciq_pairs_loss": 0.2961578667163849, "eval_sciq_pairs_runtime": 3.2909, "eval_sciq_pairs_samples_per_second": 38.895, "eval_sciq_pairs_steps_per_second": 0.608, "step": 54 }, { "epoch": 0.3048694424841214, "eval_qasc_pairs_loss": 2.530872344970703, "eval_qasc_pairs_runtime": 0.6255, "eval_qasc_pairs_samples_per_second": 204.63, "eval_qasc_pairs_steps_per_second": 3.197, "step": 54 }, { "epoch": 0.3048694424841214, "eval_openbookqa_pairs_loss": 3.8855104446411133, "eval_openbookqa_pairs_runtime": 0.5742, "eval_openbookqa_pairs_samples_per_second": 222.914, "eval_openbookqa_pairs_steps_per_second": 3.483, "step": 54 }, { "epoch": 0.3048694424841214, "eval_msmarco_pairs_loss": 5.246406555175781, "eval_msmarco_pairs_runtime": 1.2872, "eval_msmarco_pairs_samples_per_second": 99.442, "eval_msmarco_pairs_steps_per_second": 1.554, "step": 54 }, { "epoch": 0.3048694424841214, "eval_nq_pairs_loss": 5.332630157470703, "eval_nq_pairs_runtime": 2.3739, "eval_nq_pairs_samples_per_second": 53.92, "eval_nq_pairs_steps_per_second": 0.843, "step": 54 }, { "epoch": 0.3048694424841214, "eval_trivia_pairs_loss": 5.647429943084717, "eval_trivia_pairs_runtime": 4.4729, "eval_trivia_pairs_samples_per_second": 28.617, "eval_trivia_pairs_steps_per_second": 0.447, "step": 54 }, { "epoch": 0.3048694424841214, "eval_gooaq_pairs_loss": 5.225871562957764, "eval_gooaq_pairs_runtime": 0.8715, "eval_gooaq_pairs_samples_per_second": 146.868, "eval_gooaq_pairs_steps_per_second": 2.295, "step": 54 }, { "epoch": 0.3048694424841214, "eval_paws-pos_loss": 0.8335962891578674, "eval_paws-pos_runtime": 0.6844, "eval_paws-pos_samples_per_second": 187.036, "eval_paws-pos_steps_per_second": 2.922, "step": 54 }, { "epoch": 0.32180663373323926, "grad_norm": 6.847682952880859, "learning_rate": 1.2954545454545455e-05, "loss": 4.4811, "step": 57 }, { "epoch": 0.3387438249823571, "grad_norm": 8.383002281188965, "learning_rate": 1.3636363636363637e-05, "loss": 4.4239, "step": 60 }, { "epoch": 0.35568101623147497, "grad_norm": 7.014843463897705, "learning_rate": 1.431818181818182e-05, "loss": 4.0273, "step": 63 }, { "epoch": 0.3726182074805928, "grad_norm": 5.9739885330200195, "learning_rate": 1.5000000000000002e-05, "loss": 3.4508, "step": 66 }, { "epoch": 0.3895553987297107, "grad_norm": 11.202752113342285, "learning_rate": 1.5681818181818182e-05, "loss": 3.9702, "step": 69 }, { "epoch": 0.40649258997882853, "grad_norm": 7.064818859100342, "learning_rate": 1.6363636363636366e-05, "loss": 3.5295, "step": 72 }, { "epoch": 0.4234297812279464, "grad_norm": 5.912719249725342, "learning_rate": 1.7045454545454546e-05, "loss": 3.6395, "step": 75 }, { "epoch": 0.44036697247706424, "grad_norm": 5.033207893371582, "learning_rate": 1.772727272727273e-05, "loss": 3.2398, "step": 78 }, { "epoch": 0.4573041637261821, "grad_norm": 5.218384265899658, "learning_rate": 1.840909090909091e-05, "loss": 3.116, "step": 81 }, { "epoch": 0.4573041637261821, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.5546875, "eval_VitaminC_cosine_accuracy_threshold": 0.9041332006454468, "eval_VitaminC_cosine_ap": 0.5292859731465609, "eval_VitaminC_cosine_f1": 0.6542553191489362, "eval_VitaminC_cosine_f1_threshold": 0.452939510345459, "eval_VitaminC_cosine_precision": 0.48616600790513836, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.5546875, "eval_VitaminC_dot_accuracy_threshold": 414.42559814453125, "eval_VitaminC_dot_ap": 0.5222732504955002, "eval_VitaminC_dot_f1": 0.6542553191489362, "eval_VitaminC_dot_f1_threshold": 212.6934814453125, "eval_VitaminC_dot_precision": 0.48616600790513836, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.5546875, "eval_VitaminC_euclidean_accuracy_threshold": 9.18377685546875, "eval_VitaminC_euclidean_ap": 0.5291787221346742, "eval_VitaminC_euclidean_f1": 0.6542553191489362, "eval_VitaminC_euclidean_f1_threshold": 22.683509826660156, "eval_VitaminC_euclidean_precision": 0.48616600790513836, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.55859375, "eval_VitaminC_manhattan_accuracy_threshold": 173.8212127685547, "eval_VitaminC_manhattan_ap": 0.5305698453165033, "eval_VitaminC_manhattan_f1": 0.6542553191489362, "eval_VitaminC_manhattan_f1_threshold": 415.5366516113281, "eval_VitaminC_manhattan_precision": 0.48616600790513836, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.55859375, "eval_VitaminC_max_accuracy_threshold": 414.42559814453125, "eval_VitaminC_max_ap": 0.5305698453165033, "eval_VitaminC_max_f1": 0.6542553191489362, "eval_VitaminC_max_f1_threshold": 415.5366516113281, "eval_VitaminC_max_precision": 0.48616600790513836, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5305698453165033, "eval_sts-test_pearson_cosine": 0.45494716382349193, "eval_sts-test_pearson_dot": 0.44837123659858896, "eval_sts-test_pearson_euclidean": 0.4480861256491879, "eval_sts-test_pearson_manhattan": 0.4417008219313264, "eval_sts-test_pearson_max": 0.45494716382349193, "eval_sts-test_spearman_cosine": 0.48921418507251446, "eval_sts-test_spearman_dot": 0.46707725062744593, "eval_sts-test_spearman_euclidean": 0.4610824798409968, "eval_sts-test_spearman_manhattan": 0.46068648052845956, "eval_sts-test_spearman_max": 0.48921418507251446, "eval_vitaminc-pairs_loss": 2.5043575763702393, "eval_vitaminc-pairs_runtime": 1.4778, "eval_vitaminc-pairs_samples_per_second": 73.079, "eval_vitaminc-pairs_steps_per_second": 1.353, "step": 81 }, { "epoch": 0.4573041637261821, "eval_negation-triplets_loss": 3.4229447841644287, "eval_negation-triplets_runtime": 0.2991, "eval_negation-triplets_samples_per_second": 213.954, "eval_negation-triplets_steps_per_second": 3.343, "step": 81 }, { "epoch": 0.4573041637261821, "eval_scitail-pairs-pos_loss": 0.2784869372844696, "eval_scitail-pairs-pos_runtime": 0.3633, "eval_scitail-pairs-pos_samples_per_second": 148.649, "eval_scitail-pairs-pos_steps_per_second": 2.753, "step": 81 }, { "epoch": 0.4573041637261821, "eval_xsum-pairs_loss": 2.428964614868164, "eval_xsum-pairs_runtime": 3.1548, "eval_xsum-pairs_samples_per_second": 40.573, "eval_xsum-pairs_steps_per_second": 0.634, "step": 81 }, { "epoch": 0.4573041637261821, "eval_sciq_pairs_loss": 0.15256048738956451, "eval_sciq_pairs_runtime": 3.2432, "eval_sciq_pairs_samples_per_second": 39.467, "eval_sciq_pairs_steps_per_second": 0.617, "step": 81 }, { "epoch": 0.4573041637261821, "eval_qasc_pairs_loss": 1.2902077436447144, "eval_qasc_pairs_runtime": 0.6211, "eval_qasc_pairs_samples_per_second": 206.085, "eval_qasc_pairs_steps_per_second": 3.22, "step": 81 }, { "epoch": 0.4573041637261821, "eval_openbookqa_pairs_loss": 2.4784862995147705, "eval_openbookqa_pairs_runtime": 0.5758, "eval_openbookqa_pairs_samples_per_second": 222.308, "eval_openbookqa_pairs_steps_per_second": 3.474, "step": 81 }, { "epoch": 0.4573041637261821, "eval_msmarco_pairs_loss": 2.967724084854126, "eval_msmarco_pairs_runtime": 1.2944, "eval_msmarco_pairs_samples_per_second": 98.885, "eval_msmarco_pairs_steps_per_second": 1.545, "step": 81 }, { "epoch": 0.4573041637261821, "eval_nq_pairs_loss": 3.358661413192749, "eval_nq_pairs_runtime": 2.3827, "eval_nq_pairs_samples_per_second": 53.722, "eval_nq_pairs_steps_per_second": 0.839, "step": 81 }, { "epoch": 0.4573041637261821, "eval_trivia_pairs_loss": 3.1391680240631104, "eval_trivia_pairs_runtime": 4.4155, "eval_trivia_pairs_samples_per_second": 28.989, "eval_trivia_pairs_steps_per_second": 0.453, "step": 81 }, { "epoch": 0.4573041637261821, "eval_gooaq_pairs_loss": 2.8774912357330322, "eval_gooaq_pairs_runtime": 0.8746, "eval_gooaq_pairs_samples_per_second": 146.346, "eval_gooaq_pairs_steps_per_second": 2.287, "step": 81 }, { "epoch": 0.4573041637261821, "eval_paws-pos_loss": 0.19754411280155182, "eval_paws-pos_runtime": 0.684, "eval_paws-pos_samples_per_second": 187.141, "eval_paws-pos_steps_per_second": 2.924, "step": 81 }, { "epoch": 0.47424135497529996, "grad_norm": 5.149569988250732, "learning_rate": 1.9090909090909094e-05, "loss": 2.6049, "step": 84 }, { "epoch": 0.4911785462244178, "grad_norm": 5.012928009033203, "learning_rate": 1.9772727272727274e-05, "loss": 2.7738, "step": 87 }, { "epoch": 0.5081157374735357, "grad_norm": 4.880725383758545, "learning_rate": 2.0454545454545457e-05, "loss": 2.5416, "step": 90 }, { "epoch": 0.5250529287226535, "grad_norm": 5.618528366088867, "learning_rate": 2.113636363636364e-05, "loss": 2.3913, "step": 93 }, { "epoch": 0.5419901199717714, "grad_norm": 5.020515441894531, "learning_rate": 2.1818181818181818e-05, "loss": 2.3144, "step": 96 }, { "epoch": 0.5589273112208892, "grad_norm": 4.818451404571533, "learning_rate": 2.25e-05, "loss": 2.1857, "step": 99 }, { "epoch": 0.5758645024700071, "grad_norm": 5.094771385192871, "learning_rate": 2.3181818181818185e-05, "loss": 1.8881, "step": 102 }, { "epoch": 0.592801693719125, "grad_norm": 3.795962333679199, "learning_rate": 2.3863636363636365e-05, "loss": 2.2699, "step": 105 }, { "epoch": 0.6097388849682428, "grad_norm": 4.46245813369751, "learning_rate": 2.454545454545455e-05, "loss": 2.1425, "step": 108 }, { "epoch": 0.6097388849682428, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.5546875, "eval_VitaminC_cosine_accuracy_threshold": 0.8830112218856812, "eval_VitaminC_cosine_ap": 0.5302172957740995, "eval_VitaminC_cosine_f1": 0.6558265582655827, "eval_VitaminC_cosine_f1_threshold": 0.5253933668136597, "eval_VitaminC_cosine_precision": 0.491869918699187, "eval_VitaminC_cosine_recall": 0.983739837398374, "eval_VitaminC_dot_accuracy": 0.5390625, "eval_VitaminC_dot_accuracy_threshold": 427.5576171875, "eval_VitaminC_dot_ap": 0.517120157327104, "eval_VitaminC_dot_f1": 0.6542553191489362, "eval_VitaminC_dot_f1_threshold": 175.80963134765625, "eval_VitaminC_dot_precision": 0.48616600790513836, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.5625, "eval_VitaminC_euclidean_accuracy_threshold": 10.817148208618164, "eval_VitaminC_euclidean_ap": 0.532255112376416, "eval_VitaminC_euclidean_f1": 0.6558265582655827, "eval_VitaminC_euclidean_f1_threshold": 21.10729217529297, "eval_VitaminC_euclidean_precision": 0.491869918699187, "eval_VitaminC_euclidean_recall": 0.983739837398374, "eval_VitaminC_manhattan_accuracy": 0.5546875, "eval_VitaminC_manhattan_accuracy_threshold": 224.70416259765625, "eval_VitaminC_manhattan_ap": 0.5298930718604624, "eval_VitaminC_manhattan_f1": 0.6558265582655827, "eval_VitaminC_manhattan_f1_threshold": 415.3311767578125, "eval_VitaminC_manhattan_precision": 0.491869918699187, "eval_VitaminC_manhattan_recall": 0.983739837398374, "eval_VitaminC_max_accuracy": 0.5625, "eval_VitaminC_max_accuracy_threshold": 427.5576171875, "eval_VitaminC_max_ap": 0.532255112376416, "eval_VitaminC_max_f1": 0.6558265582655827, "eval_VitaminC_max_f1_threshold": 415.3311767578125, "eval_VitaminC_max_precision": 0.491869918699187, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.532255112376416, "eval_sts-test_pearson_cosine": 0.755963151531783, "eval_sts-test_pearson_dot": 0.7384823091540473, "eval_sts-test_pearson_euclidean": 0.764089555623164, "eval_sts-test_pearson_manhattan": 0.7670467479701304, "eval_sts-test_pearson_max": 0.7670467479701304, "eval_sts-test_spearman_cosine": 0.7806331583677342, "eval_sts-test_spearman_dot": 0.7442842883778696, "eval_sts-test_spearman_euclidean": 0.7674205303105437, "eval_sts-test_spearman_manhattan": 0.7664974867050092, "eval_sts-test_spearman_max": 0.7806331583677342, "eval_vitaminc-pairs_loss": 2.721674919128418, "eval_vitaminc-pairs_runtime": 1.4468, "eval_vitaminc-pairs_samples_per_second": 74.65, "eval_vitaminc-pairs_steps_per_second": 1.382, "step": 108 }, { "epoch": 0.6097388849682428, "eval_negation-triplets_loss": 2.338909387588501, "eval_negation-triplets_runtime": 0.3017, "eval_negation-triplets_samples_per_second": 212.101, "eval_negation-triplets_steps_per_second": 3.314, "step": 108 }, { "epoch": 0.6097388849682428, "eval_scitail-pairs-pos_loss": 0.23291125893592834, "eval_scitail-pairs-pos_runtime": 0.3664, "eval_scitail-pairs-pos_samples_per_second": 147.385, "eval_scitail-pairs-pos_steps_per_second": 2.729, "step": 108 }, { "epoch": 0.6097388849682428, "eval_xsum-pairs_loss": 1.2065516710281372, "eval_xsum-pairs_runtime": 3.1488, "eval_xsum-pairs_samples_per_second": 40.65, "eval_xsum-pairs_steps_per_second": 0.635, "step": 108 }, { "epoch": 0.6097388849682428, "eval_sciq_pairs_loss": 0.09487833082675934, "eval_sciq_pairs_runtime": 3.2618, "eval_sciq_pairs_samples_per_second": 39.242, "eval_sciq_pairs_steps_per_second": 0.613, "step": 108 }, { "epoch": 0.6097388849682428, "eval_qasc_pairs_loss": 0.8461999297142029, "eval_qasc_pairs_runtime": 0.6246, "eval_qasc_pairs_samples_per_second": 204.93, "eval_qasc_pairs_steps_per_second": 3.202, "step": 108 }, { "epoch": 0.6097388849682428, "eval_openbookqa_pairs_loss": 1.5739191770553589, "eval_openbookqa_pairs_runtime": 0.5751, "eval_openbookqa_pairs_samples_per_second": 222.568, "eval_openbookqa_pairs_steps_per_second": 3.478, "step": 108 }, { "epoch": 0.6097388849682428, "eval_msmarco_pairs_loss": 1.6446179151535034, "eval_msmarco_pairs_runtime": 1.2828, "eval_msmarco_pairs_samples_per_second": 99.784, "eval_msmarco_pairs_steps_per_second": 1.559, "step": 108 }, { "epoch": 0.6097388849682428, "eval_nq_pairs_loss": 2.364896535873413, "eval_nq_pairs_runtime": 2.3802, "eval_nq_pairs_samples_per_second": 53.777, "eval_nq_pairs_steps_per_second": 0.84, "step": 108 }, { "epoch": 0.6097388849682428, "eval_trivia_pairs_loss": 1.7080069780349731, "eval_trivia_pairs_runtime": 4.4372, "eval_trivia_pairs_samples_per_second": 28.847, "eval_trivia_pairs_steps_per_second": 0.451, "step": 108 }, { "epoch": 0.6097388849682428, "eval_gooaq_pairs_loss": 1.7924479246139526, "eval_gooaq_pairs_runtime": 0.8761, "eval_gooaq_pairs_samples_per_second": 146.094, "eval_gooaq_pairs_steps_per_second": 2.283, "step": 108 }, { "epoch": 0.6097388849682428, "eval_paws-pos_loss": 0.08000019192695618, "eval_paws-pos_runtime": 0.6839, "eval_paws-pos_samples_per_second": 187.168, "eval_paws-pos_steps_per_second": 2.924, "step": 108 }, { "epoch": 0.6266760762173607, "grad_norm": 4.418070316314697, "learning_rate": 2.5227272727272732e-05, "loss": 2.1276, "step": 111 }, { "epoch": 0.6436132674664785, "grad_norm": 4.3495259284973145, "learning_rate": 2.590909090909091e-05, "loss": 1.7531, "step": 114 }, { "epoch": 0.6605504587155964, "grad_norm": 4.294332027435303, "learning_rate": 2.6590909090909093e-05, "loss": 2.0179, "step": 117 }, { "epoch": 0.6774876499647142, "grad_norm": 3.4215610027313232, "learning_rate": 2.7272727272727273e-05, "loss": 1.5305, "step": 120 }, { "epoch": 0.6944248412138321, "grad_norm": 4.37844181060791, "learning_rate": 2.7954545454545457e-05, "loss": 1.6925, "step": 123 }, { "epoch": 0.7113620324629499, "grad_norm": 4.019878387451172, "learning_rate": 2.863636363636364e-05, "loss": 1.5248, "step": 126 }, { "epoch": 0.7282992237120678, "grad_norm": 4.662445068359375, "learning_rate": 2.931818181818182e-05, "loss": 1.523, "step": 129 }, { "epoch": 0.7452364149611856, "grad_norm": 4.6323161125183105, "learning_rate": 3.0000000000000004e-05, "loss": 1.5474, "step": 132 }, { "epoch": 0.7621736062103035, "grad_norm": 4.586575984954834, "learning_rate": 3.068181818181819e-05, "loss": 1.7221, "step": 135 }, { "epoch": 0.7621736062103035, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.56640625, "eval_VitaminC_cosine_accuracy_threshold": 0.8478574156761169, "eval_VitaminC_cosine_ap": 0.5325579595957614, "eval_VitaminC_cosine_f1": 0.6559999999999999, "eval_VitaminC_cosine_f1_threshold": 0.35839784145355225, "eval_VitaminC_cosine_precision": 0.4880952380952381, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.5625, "eval_VitaminC_dot_accuracy_threshold": 366.9839172363281, "eval_VitaminC_dot_ap": 0.5326813797607027, "eval_VitaminC_dot_f1": 0.6559999999999999, "eval_VitaminC_dot_f1_threshold": 157.35829162597656, "eval_VitaminC_dot_precision": 0.4880952380952381, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.5625, "eval_VitaminC_euclidean_accuracy_threshold": 12.044445037841797, "eval_VitaminC_euclidean_ap": 0.5304103559932005, "eval_VitaminC_euclidean_f1": 0.6542553191489362, "eval_VitaminC_euclidean_f1_threshold": 24.461441040039062, "eval_VitaminC_euclidean_precision": 0.48616600790513836, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.5625, "eval_VitaminC_manhattan_accuracy_threshold": 239.24815368652344, "eval_VitaminC_manhattan_ap": 0.5314780667834758, "eval_VitaminC_manhattan_f1": 0.6575342465753424, "eval_VitaminC_manhattan_f1_threshold": 400.6834716796875, "eval_VitaminC_manhattan_precision": 0.49586776859504134, "eval_VitaminC_manhattan_recall": 0.975609756097561, "eval_VitaminC_max_accuracy": 0.56640625, "eval_VitaminC_max_accuracy_threshold": 366.9839172363281, "eval_VitaminC_max_ap": 0.5326813797607027, "eval_VitaminC_max_f1": 0.6575342465753424, "eval_VitaminC_max_f1_threshold": 400.6834716796875, "eval_VitaminC_max_precision": 0.49586776859504134, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5326813797607027, "eval_sts-test_pearson_cosine": 0.7919597804368175, "eval_sts-test_pearson_dot": 0.7994867531185785, "eval_sts-test_pearson_euclidean": 0.8117960113303863, "eval_sts-test_pearson_manhattan": 0.8144714466358016, "eval_sts-test_pearson_max": 0.8144714466358016, "eval_sts-test_spearman_cosine": 0.831478610786181, "eval_sts-test_spearman_dot": 0.8192534746855707, "eval_sts-test_spearman_euclidean": 0.8185577905406703, "eval_sts-test_spearman_manhattan": 0.8154771593606782, "eval_sts-test_spearman_max": 0.831478610786181, "eval_vitaminc-pairs_loss": 2.852091073989868, "eval_vitaminc-pairs_runtime": 1.4427, "eval_vitaminc-pairs_samples_per_second": 74.858, "eval_vitaminc-pairs_steps_per_second": 1.386, "step": 135 }, { "epoch": 0.7621736062103035, "eval_negation-triplets_loss": 2.074247121810913, "eval_negation-triplets_runtime": 0.3, "eval_negation-triplets_samples_per_second": 213.353, "eval_negation-triplets_steps_per_second": 3.334, "step": 135 }, { "epoch": 0.7621736062103035, "eval_scitail-pairs-pos_loss": 0.2149849385023117, "eval_scitail-pairs-pos_runtime": 0.3744, "eval_scitail-pairs-pos_samples_per_second": 144.219, "eval_scitail-pairs-pos_steps_per_second": 2.671, "step": 135 }, { "epoch": 0.7621736062103035, "eval_xsum-pairs_loss": 0.7706837058067322, "eval_xsum-pairs_runtime": 3.1609, "eval_xsum-pairs_samples_per_second": 40.495, "eval_xsum-pairs_steps_per_second": 0.633, "step": 135 }, { "epoch": 0.7621736062103035, "eval_sciq_pairs_loss": 0.07513368874788284, "eval_sciq_pairs_runtime": 3.2949, "eval_sciq_pairs_samples_per_second": 38.848, "eval_sciq_pairs_steps_per_second": 0.607, "step": 135 }, { "epoch": 0.7621736062103035, "eval_qasc_pairs_loss": 0.6355602741241455, "eval_qasc_pairs_runtime": 0.6392, "eval_qasc_pairs_samples_per_second": 200.246, "eval_qasc_pairs_steps_per_second": 3.129, "step": 135 }, { "epoch": 0.7621736062103035, "eval_openbookqa_pairs_loss": 1.4014525413513184, "eval_openbookqa_pairs_runtime": 0.622, "eval_openbookqa_pairs_samples_per_second": 205.786, "eval_openbookqa_pairs_steps_per_second": 3.215, "step": 135 }, { "epoch": 0.7621736062103035, "eval_msmarco_pairs_loss": 1.1524099111557007, "eval_msmarco_pairs_runtime": 1.31, "eval_msmarco_pairs_samples_per_second": 97.709, "eval_msmarco_pairs_steps_per_second": 1.527, "step": 135 }, { "epoch": 0.7621736062103035, "eval_nq_pairs_loss": 1.7768574953079224, "eval_nq_pairs_runtime": 2.3979, "eval_nq_pairs_samples_per_second": 53.379, "eval_nq_pairs_steps_per_second": 0.834, "step": 135 }, { "epoch": 0.7621736062103035, "eval_trivia_pairs_loss": 1.4495295286178589, "eval_trivia_pairs_runtime": 4.4194, "eval_trivia_pairs_samples_per_second": 28.964, "eval_trivia_pairs_steps_per_second": 0.453, "step": 135 }, { "epoch": 0.7621736062103035, "eval_gooaq_pairs_loss": 1.3955378532409668, "eval_gooaq_pairs_runtime": 0.8788, "eval_gooaq_pairs_samples_per_second": 145.649, "eval_gooaq_pairs_steps_per_second": 2.276, "step": 135 }, { "epoch": 0.7621736062103035, "eval_paws-pos_loss": 0.06006813049316406, "eval_paws-pos_runtime": 0.6896, "eval_paws-pos_samples_per_second": 185.603, "eval_paws-pos_steps_per_second": 2.9, "step": 135 }, { "epoch": 0.7791107974594214, "grad_norm": 3.864208936691284, "learning_rate": 3.1363636363636365e-05, "loss": 1.5366, "step": 138 }, { "epoch": 0.7960479887085392, "grad_norm": 3.837550640106201, "learning_rate": 3.204545454545455e-05, "loss": 1.3045, "step": 141 }, { "epoch": 0.8129851799576571, "grad_norm": 3.5258102416992188, "learning_rate": 3.272727272727273e-05, "loss": 1.1999, "step": 144 }, { "epoch": 0.8299223712067749, "grad_norm": 3.4431183338165283, "learning_rate": 3.340909090909091e-05, "loss": 1.3483, "step": 147 }, { "epoch": 0.8468595624558928, "grad_norm": 3.6455864906311035, "learning_rate": 3.409090909090909e-05, "loss": 1.2009, "step": 150 }, { "epoch": 0.8637967537050106, "grad_norm": 4.508525371551514, "learning_rate": 3.4772727272727276e-05, "loss": 1.4495, "step": 153 }, { "epoch": 0.8807339449541285, "grad_norm": 3.0432400703430176, "learning_rate": 3.545454545454546e-05, "loss": 1.2329, "step": 156 }, { "epoch": 0.8976711362032463, "grad_norm": 3.0190365314483643, "learning_rate": 3.613636363636364e-05, "loss": 1.1905, "step": 159 }, { "epoch": 0.9146083274523642, "grad_norm": 3.74668288230896, "learning_rate": 3.681818181818182e-05, "loss": 1.277, "step": 162 }, { "epoch": 0.9146083274523642, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.57421875, "eval_VitaminC_cosine_accuracy_threshold": 0.8101799488067627, "eval_VitaminC_cosine_ap": 0.5298515171639175, "eval_VitaminC_cosine_f1": 0.6542553191489362, "eval_VitaminC_cosine_f1_threshold": 0.345889687538147, "eval_VitaminC_cosine_precision": 0.48616600790513836, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.55078125, "eval_VitaminC_dot_accuracy_threshold": 373.5804443359375, "eval_VitaminC_dot_ap": 0.5310954683437364, "eval_VitaminC_dot_f1": 0.6542553191489362, "eval_VitaminC_dot_f1_threshold": 155.41326904296875, "eval_VitaminC_dot_precision": 0.48616600790513836, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.57421875, "eval_VitaminC_euclidean_accuracy_threshold": 13.60124683380127, "eval_VitaminC_euclidean_ap": 0.5286057955992807, "eval_VitaminC_euclidean_f1": 0.6577540106951871, "eval_VitaminC_euclidean_f1_threshold": 22.904512405395508, "eval_VitaminC_euclidean_precision": 0.4900398406374502, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.57421875, "eval_VitaminC_manhattan_accuracy_threshold": 262.37322998046875, "eval_VitaminC_manhattan_ap": 0.5253560845853567, "eval_VitaminC_manhattan_f1": 0.6559999999999999, "eval_VitaminC_manhattan_f1_threshold": 465.94549560546875, "eval_VitaminC_manhattan_precision": 0.4880952380952381, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.57421875, "eval_VitaminC_max_accuracy_threshold": 373.5804443359375, "eval_VitaminC_max_ap": 0.5310954683437364, "eval_VitaminC_max_f1": 0.6577540106951871, "eval_VitaminC_max_f1_threshold": 465.94549560546875, "eval_VitaminC_max_precision": 0.4900398406374502, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5310954683437364, "eval_sts-test_pearson_cosine": 0.8067612938723231, "eval_sts-test_pearson_dot": 0.8217874837658639, "eval_sts-test_pearson_euclidean": 0.827948115812785, "eval_sts-test_pearson_manhattan": 0.8261527694953693, "eval_sts-test_pearson_max": 0.827948115812785, "eval_sts-test_spearman_cosine": 0.8547777638284432, "eval_sts-test_spearman_dot": 0.8498786150097738, "eval_sts-test_spearman_euclidean": 0.8373845860667446, "eval_sts-test_spearman_manhattan": 0.8324507067477893, "eval_sts-test_spearman_max": 0.8547777638284432, "eval_vitaminc-pairs_loss": 2.776399612426758, "eval_vitaminc-pairs_runtime": 1.4503, "eval_vitaminc-pairs_samples_per_second": 74.467, "eval_vitaminc-pairs_steps_per_second": 1.379, "step": 162 }, { "epoch": 0.9146083274523642, "eval_negation-triplets_loss": 2.005451202392578, "eval_negation-triplets_runtime": 0.2981, "eval_negation-triplets_samples_per_second": 214.709, "eval_negation-triplets_steps_per_second": 3.355, "step": 162 }, { "epoch": 0.9146083274523642, "eval_scitail-pairs-pos_loss": 0.19877880811691284, "eval_scitail-pairs-pos_runtime": 0.3623, "eval_scitail-pairs-pos_samples_per_second": 149.043, "eval_scitail-pairs-pos_steps_per_second": 2.76, "step": 162 }, { "epoch": 0.9146083274523642, "eval_xsum-pairs_loss": 0.5586928725242615, "eval_xsum-pairs_runtime": 3.1466, "eval_xsum-pairs_samples_per_second": 40.679, "eval_xsum-pairs_steps_per_second": 0.636, "step": 162 }, { "epoch": 0.9146083274523642, "eval_sciq_pairs_loss": 0.06038254499435425, "eval_sciq_pairs_runtime": 3.4092, "eval_sciq_pairs_samples_per_second": 37.545, "eval_sciq_pairs_steps_per_second": 0.587, "step": 162 }, { "epoch": 0.9146083274523642, "eval_qasc_pairs_loss": 0.49434012174606323, "eval_qasc_pairs_runtime": 0.6342, "eval_qasc_pairs_samples_per_second": 201.832, "eval_qasc_pairs_steps_per_second": 3.154, "step": 162 }, { "epoch": 0.9146083274523642, "eval_openbookqa_pairs_loss": 1.1903400421142578, "eval_openbookqa_pairs_runtime": 0.5754, "eval_openbookqa_pairs_samples_per_second": 222.449, "eval_openbookqa_pairs_steps_per_second": 3.476, "step": 162 }, { "epoch": 0.9146083274523642, "eval_msmarco_pairs_loss": 0.8656420707702637, "eval_msmarco_pairs_runtime": 1.2858, "eval_msmarco_pairs_samples_per_second": 99.547, "eval_msmarco_pairs_steps_per_second": 1.555, "step": 162 }, { "epoch": 0.9146083274523642, "eval_nq_pairs_loss": 1.1553651094436646, "eval_nq_pairs_runtime": 2.3754, "eval_nq_pairs_samples_per_second": 53.885, "eval_nq_pairs_steps_per_second": 0.842, "step": 162 }, { "epoch": 0.9146083274523642, "eval_trivia_pairs_loss": 1.2928619384765625, "eval_trivia_pairs_runtime": 4.4084, "eval_trivia_pairs_samples_per_second": 29.035, "eval_trivia_pairs_steps_per_second": 0.454, "step": 162 }, { "epoch": 0.9146083274523642, "eval_gooaq_pairs_loss": 1.1580811738967896, "eval_gooaq_pairs_runtime": 0.8731, "eval_gooaq_pairs_samples_per_second": 146.607, "eval_gooaq_pairs_steps_per_second": 2.291, "step": 162 }, { "epoch": 0.9146083274523642, "eval_paws-pos_loss": 0.052534349262714386, "eval_paws-pos_runtime": 0.6835, "eval_paws-pos_samples_per_second": 187.258, "eval_paws-pos_steps_per_second": 2.926, "step": 162 }, { "epoch": 0.9315455187014821, "grad_norm": 4.7817864418029785, "learning_rate": 3.7500000000000003e-05, "loss": 1.339, "step": 165 }, { "epoch": 0.9484827099505999, "grad_norm": 4.000570774078369, "learning_rate": 3.818181818181819e-05, "loss": 1.1535, "step": 168 }, { "epoch": 0.9654199011997178, "grad_norm": 3.5971670150756836, "learning_rate": 3.8863636363636364e-05, "loss": 1.1643, "step": 171 }, { "epoch": 0.9823570924488356, "grad_norm": 3.6582131385803223, "learning_rate": 3.954545454545455e-05, "loss": 1.2221, "step": 174 }, { "epoch": 0.9992942836979535, "grad_norm": 4.0953898429870605, "learning_rate": 3.9999477905707075e-05, "loss": 1.0974, "step": 177 }, { "epoch": 1.0162314749470713, "grad_norm": 4.092026233673096, "learning_rate": 3.999164730903481e-05, "loss": 1.0984, "step": 180 }, { "epoch": 1.0331686661961892, "grad_norm": 3.6480906009674072, "learning_rate": 3.997442539262898e-05, "loss": 1.0543, "step": 183 }, { "epoch": 1.050105857445307, "grad_norm": 3.433056592941284, "learning_rate": 3.99478242943326e-05, "loss": 1.0994, "step": 186 }, { "epoch": 1.067043048694425, "grad_norm": 3.507981777191162, "learning_rate": 3.991186276234698e-05, "loss": 1.0621, "step": 189 }, { "epoch": 1.067043048694425, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.578125, "eval_VitaminC_cosine_accuracy_threshold": 0.7840081453323364, "eval_VitaminC_cosine_ap": 0.5400770399437144, "eval_VitaminC_cosine_f1": 0.6577540106951871, "eval_VitaminC_cosine_f1_threshold": 0.39448243379592896, "eval_VitaminC_cosine_precision": 0.4900398406374502, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.5625, "eval_VitaminC_dot_accuracy_threshold": 323.20281982421875, "eval_VitaminC_dot_ap": 0.5420016101916201, "eval_VitaminC_dot_f1": 0.6575342465753424, "eval_VitaminC_dot_f1_threshold": 198.04354858398438, "eval_VitaminC_dot_precision": 0.49586776859504134, "eval_VitaminC_dot_recall": 0.975609756097561, "eval_VitaminC_euclidean_accuracy": 0.5859375, "eval_VitaminC_euclidean_accuracy_threshold": 13.84214973449707, "eval_VitaminC_euclidean_ap": 0.5392157650683609, "eval_VitaminC_euclidean_f1": 0.6577540106951871, "eval_VitaminC_euclidean_f1_threshold": 22.595678329467773, "eval_VitaminC_euclidean_precision": 0.4900398406374502, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.5703125, "eval_VitaminC_manhattan_accuracy_threshold": 275.1253356933594, "eval_VitaminC_manhattan_ap": 0.5341380380767263, "eval_VitaminC_manhattan_f1": 0.6576819407008085, "eval_VitaminC_manhattan_f1_threshold": 457.04986572265625, "eval_VitaminC_manhattan_precision": 0.49193548387096775, "eval_VitaminC_manhattan_recall": 0.991869918699187, "eval_VitaminC_max_accuracy": 0.5859375, "eval_VitaminC_max_accuracy_threshold": 323.20281982421875, "eval_VitaminC_max_ap": 0.5420016101916201, "eval_VitaminC_max_f1": 0.6577540106951871, "eval_VitaminC_max_f1_threshold": 457.04986572265625, "eval_VitaminC_max_precision": 0.49586776859504134, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5420016101916201, "eval_sts-test_pearson_cosine": 0.8193410747427454, "eval_sts-test_pearson_dot": 0.8275444476338831, "eval_sts-test_pearson_euclidean": 0.8464528142983967, "eval_sts-test_pearson_manhattan": 0.8440476980962159, "eval_sts-test_pearson_max": 0.8464528142983967, "eval_sts-test_spearman_cosine": 0.8680272706642878, "eval_sts-test_spearman_dot": 0.8555529342729671, "eval_sts-test_spearman_euclidean": 0.8542457068859202, "eval_sts-test_spearman_manhattan": 0.8510265117511795, "eval_sts-test_spearman_max": 0.8680272706642878, "eval_vitaminc-pairs_loss": 2.6755428314208984, "eval_vitaminc-pairs_runtime": 1.4509, "eval_vitaminc-pairs_samples_per_second": 74.437, "eval_vitaminc-pairs_steps_per_second": 1.378, "step": 189 }, { "epoch": 1.067043048694425, "eval_negation-triplets_loss": 1.9071491956710815, "eval_negation-triplets_runtime": 0.3051, "eval_negation-triplets_samples_per_second": 209.756, "eval_negation-triplets_steps_per_second": 3.277, "step": 189 }, { "epoch": 1.067043048694425, "eval_scitail-pairs-pos_loss": 0.18539850413799286, "eval_scitail-pairs-pos_runtime": 0.4199, "eval_scitail-pairs-pos_samples_per_second": 128.604, "eval_scitail-pairs-pos_steps_per_second": 2.382, "step": 189 }, { "epoch": 1.067043048694425, "eval_xsum-pairs_loss": 0.38365328311920166, "eval_xsum-pairs_runtime": 3.1907, "eval_xsum-pairs_samples_per_second": 40.116, "eval_xsum-pairs_steps_per_second": 0.627, "step": 189 }, { "epoch": 1.067043048694425, "eval_sciq_pairs_loss": 0.05558515340089798, "eval_sciq_pairs_runtime": 3.2891, "eval_sciq_pairs_samples_per_second": 38.917, "eval_sciq_pairs_steps_per_second": 0.608, "step": 189 }, { "epoch": 1.067043048694425, "eval_qasc_pairs_loss": 0.40469691157341003, "eval_qasc_pairs_runtime": 0.6267, "eval_qasc_pairs_samples_per_second": 204.245, "eval_qasc_pairs_steps_per_second": 3.191, "step": 189 }, { "epoch": 1.067043048694425, "eval_openbookqa_pairs_loss": 1.0837312936782837, "eval_openbookqa_pairs_runtime": 0.5765, "eval_openbookqa_pairs_samples_per_second": 222.02, "eval_openbookqa_pairs_steps_per_second": 3.469, "step": 189 }, { "epoch": 1.067043048694425, "eval_msmarco_pairs_loss": 0.6897398233413696, "eval_msmarco_pairs_runtime": 1.2918, "eval_msmarco_pairs_samples_per_second": 99.089, "eval_msmarco_pairs_steps_per_second": 1.548, "step": 189 }, { "epoch": 1.067043048694425, "eval_nq_pairs_loss": 0.9603796601295471, "eval_nq_pairs_runtime": 2.3975, "eval_nq_pairs_samples_per_second": 53.39, "eval_nq_pairs_steps_per_second": 0.834, "step": 189 }, { "epoch": 1.067043048694425, "eval_trivia_pairs_loss": 1.200446605682373, "eval_trivia_pairs_runtime": 4.4582, "eval_trivia_pairs_samples_per_second": 28.711, "eval_trivia_pairs_steps_per_second": 0.449, "step": 189 }, { "epoch": 1.067043048694425, "eval_gooaq_pairs_loss": 1.0353316068649292, "eval_gooaq_pairs_runtime": 0.8765, "eval_gooaq_pairs_samples_per_second": 146.042, "eval_gooaq_pairs_steps_per_second": 2.282, "step": 189 }, { "epoch": 1.067043048694425, "eval_paws-pos_loss": 0.042069558054208755, "eval_paws-pos_runtime": 0.6909, "eval_paws-pos_samples_per_second": 185.263, "eval_paws-pos_steps_per_second": 2.895, "step": 189 }, { "epoch": 1.0839802399435428, "grad_norm": 2.979419469833374, "learning_rate": 3.986656614201813e-05, "loss": 0.8724, "step": 192 }, { "epoch": 1.1009174311926606, "grad_norm": 2.835219144821167, "learning_rate": 3.981196635797361e-05, "loss": 0.9381, "step": 195 }, { "epoch": 1.1178546224417785, "grad_norm": 3.6650869846343994, "learning_rate": 3.974810189162238e-05, "loss": 0.9617, "step": 198 }, { "epoch": 1.1347918136908963, "grad_norm": 4.188896656036377, "learning_rate": 3.967501775403343e-05, "loss": 1.0139, "step": 201 }, { "epoch": 1.1517290049400142, "grad_norm": 3.1624915599823, "learning_rate": 3.959276545421244e-05, "loss": 1.1073, "step": 204 }, { "epoch": 1.168666196189132, "grad_norm": 3.245002508163452, "learning_rate": 3.950140296279871e-05, "loss": 0.8365, "step": 207 }, { "epoch": 1.18560338743825, "grad_norm": 4.376185894012451, "learning_rate": 3.9400994671208e-05, "loss": 1.1012, "step": 210 }, { "epoch": 1.2025405786873677, "grad_norm": 3.236583948135376, "learning_rate": 3.9291611346250066e-05, "loss": 1.0016, "step": 213 }, { "epoch": 1.2194777699364856, "grad_norm": 3.7601733207702637, "learning_rate": 3.9173330080252904e-05, "loss": 1.0957, "step": 216 }, { "epoch": 1.2194777699364856, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.57421875, "eval_VitaminC_cosine_accuracy_threshold": 0.7863086462020874, "eval_VitaminC_cosine_ap": 0.538511783260847, "eval_VitaminC_cosine_f1": 0.6577540106951871, "eval_VitaminC_cosine_f1_threshold": 0.4006580412387848, "eval_VitaminC_cosine_precision": 0.4900398406374502, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.57421875, "eval_VitaminC_dot_accuracy_threshold": 323.53277587890625, "eval_VitaminC_dot_ap": 0.5304994537787167, "eval_VitaminC_dot_f1": 0.6577540106951871, "eval_VitaminC_dot_f1_threshold": 166.45921325683594, "eval_VitaminC_dot_precision": 0.4900398406374502, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.578125, "eval_VitaminC_euclidean_accuracy_threshold": 13.631423950195312, "eval_VitaminC_euclidean_ap": 0.5363284984763951, "eval_VitaminC_euclidean_f1": 0.6542553191489362, "eval_VitaminC_euclidean_f1_threshold": 25.392715454101562, "eval_VitaminC_euclidean_precision": 0.48616600790513836, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.57421875, "eval_VitaminC_manhattan_accuracy_threshold": 283.5897216796875, "eval_VitaminC_manhattan_ap": 0.5327191155331534, "eval_VitaminC_manhattan_f1": 0.6559999999999999, "eval_VitaminC_manhattan_f1_threshold": 491.0370178222656, "eval_VitaminC_manhattan_precision": 0.4880952380952381, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.578125, "eval_VitaminC_max_accuracy_threshold": 323.53277587890625, "eval_VitaminC_max_ap": 0.538511783260847, "eval_VitaminC_max_f1": 0.6577540106951871, "eval_VitaminC_max_f1_threshold": 491.0370178222656, "eval_VitaminC_max_precision": 0.4900398406374502, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.538511783260847, "eval_sts-test_pearson_cosine": 0.8156684919084325, "eval_sts-test_pearson_dot": 0.8230786053133633, "eval_sts-test_pearson_euclidean": 0.845348828865422, "eval_sts-test_pearson_manhattan": 0.8432655375716184, "eval_sts-test_pearson_max": 0.845348828865422, "eval_sts-test_spearman_cosine": 0.8655524539841267, "eval_sts-test_spearman_dot": 0.8507196659909223, "eval_sts-test_spearman_euclidean": 0.8547050804103192, "eval_sts-test_spearman_manhattan": 0.8508668230591436, "eval_sts-test_spearman_max": 0.8655524539841267, "eval_vitaminc-pairs_loss": 2.5465524196624756, "eval_vitaminc-pairs_runtime": 1.4425, "eval_vitaminc-pairs_samples_per_second": 74.869, "eval_vitaminc-pairs_steps_per_second": 1.386, "step": 216 }, { "epoch": 1.2194777699364856, "eval_negation-triplets_loss": 1.9161474704742432, "eval_negation-triplets_runtime": 0.2994, "eval_negation-triplets_samples_per_second": 213.785, "eval_negation-triplets_steps_per_second": 3.34, "step": 216 }, { "epoch": 1.2194777699364856, "eval_scitail-pairs-pos_loss": 0.19009728729724884, "eval_scitail-pairs-pos_runtime": 0.3745, "eval_scitail-pairs-pos_samples_per_second": 144.203, "eval_scitail-pairs-pos_steps_per_second": 2.67, "step": 216 }, { "epoch": 1.2194777699364856, "eval_xsum-pairs_loss": 0.35912859439849854, "eval_xsum-pairs_runtime": 3.1543, "eval_xsum-pairs_samples_per_second": 40.58, "eval_xsum-pairs_steps_per_second": 0.634, "step": 216 }, { "epoch": 1.2194777699364856, "eval_sciq_pairs_loss": 0.05168920382857323, "eval_sciq_pairs_runtime": 3.2561, "eval_sciq_pairs_samples_per_second": 39.31, "eval_sciq_pairs_steps_per_second": 0.614, "step": 216 }, { "epoch": 1.2194777699364856, "eval_qasc_pairs_loss": 0.30753791332244873, "eval_qasc_pairs_runtime": 0.6201, "eval_qasc_pairs_samples_per_second": 206.418, "eval_qasc_pairs_steps_per_second": 3.225, "step": 216 }, { "epoch": 1.2194777699364856, "eval_openbookqa_pairs_loss": 0.9365726113319397, "eval_openbookqa_pairs_runtime": 0.5832, "eval_openbookqa_pairs_samples_per_second": 219.496, "eval_openbookqa_pairs_steps_per_second": 3.43, "step": 216 }, { "epoch": 1.2194777699364856, "eval_msmarco_pairs_loss": 0.5819053053855896, "eval_msmarco_pairs_runtime": 1.2858, "eval_msmarco_pairs_samples_per_second": 99.551, "eval_msmarco_pairs_steps_per_second": 1.555, "step": 216 }, { "epoch": 1.2194777699364856, "eval_nq_pairs_loss": 0.8172401785850525, "eval_nq_pairs_runtime": 2.3809, "eval_nq_pairs_samples_per_second": 53.761, "eval_nq_pairs_steps_per_second": 0.84, "step": 216 }, { "epoch": 1.2194777699364856, "eval_trivia_pairs_loss": 1.1411677598953247, "eval_trivia_pairs_runtime": 4.4162, "eval_trivia_pairs_samples_per_second": 28.984, "eval_trivia_pairs_steps_per_second": 0.453, "step": 216 }, { "epoch": 1.2194777699364856, "eval_gooaq_pairs_loss": 0.9686058759689331, "eval_gooaq_pairs_runtime": 0.8788, "eval_gooaq_pairs_samples_per_second": 145.645, "eval_gooaq_pairs_steps_per_second": 2.276, "step": 216 }, { "epoch": 1.2194777699364856, "eval_paws-pos_loss": 0.03953952714800835, "eval_paws-pos_runtime": 0.708, "eval_paws-pos_samples_per_second": 180.782, "eval_paws-pos_steps_per_second": 2.825, "step": 216 }, { "epoch": 1.2364149611856035, "grad_norm": 3.566471576690674, "learning_rate": 3.904623423672881e-05, "loss": 1.1273, "step": 219 }, { "epoch": 1.2533521524347213, "grad_norm": 4.086460590362549, "learning_rate": 3.891041339162053e-05, "loss": 1.2568, "step": 222 }, { "epoch": 1.2702893436838392, "grad_norm": 3.2877376079559326, "learning_rate": 3.876596327016904e-05, "loss": 0.873, "step": 225 }, { "epoch": 1.287226534932957, "grad_norm": 3.383211851119995, "learning_rate": 3.861298567944728e-05, "loss": 1.0003, "step": 228 }, { "epoch": 1.3041637261820749, "grad_norm": 3.8474605083465576, "learning_rate": 3.8451588436607487e-05, "loss": 1.142, "step": 231 }, { "epoch": 1.3211009174311927, "grad_norm": 3.027008533477783, "learning_rate": 3.8281885292892706e-05, "loss": 0.807, "step": 234 }, { "epoch": 1.3380381086803106, "grad_norm": 2.9607250690460205, "learning_rate": 3.810399585346599e-05, "loss": 1.0231, "step": 237 }, { "epoch": 1.3549752999294284, "grad_norm": 2.511488676071167, "learning_rate": 3.791804549311382e-05, "loss": 0.797, "step": 240 }, { "epoch": 1.3719124911785463, "grad_norm": 2.603672504425049, "learning_rate": 3.7724165267883146e-05, "loss": 0.8473, "step": 243 }, { "epoch": 1.3719124911785463, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.578125, "eval_VitaminC_cosine_accuracy_threshold": 0.7651997804641724, "eval_VitaminC_cosine_ap": 0.5427753322056709, "eval_VitaminC_cosine_f1": 0.6595174262734584, "eval_VitaminC_cosine_f1_threshold": 0.38563254475593567, "eval_VitaminC_cosine_precision": 0.492, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.5703125, "eval_VitaminC_dot_accuracy_threshold": 330.23577880859375, "eval_VitaminC_dot_ap": 0.5507967714924796, "eval_VitaminC_dot_f1": 0.6595174262734584, "eval_VitaminC_dot_f1_threshold": 160.55694580078125, "eval_VitaminC_dot_precision": 0.492, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.578125, "eval_VitaminC_euclidean_accuracy_threshold": 13.033781051635742, "eval_VitaminC_euclidean_ap": 0.5406935655135654, "eval_VitaminC_euclidean_f1": 0.6576819407008085, "eval_VitaminC_euclidean_f1_threshold": 22.224994659423828, "eval_VitaminC_euclidean_precision": 0.49193548387096775, "eval_VitaminC_euclidean_recall": 0.991869918699187, "eval_VitaminC_manhattan_accuracy": 0.57421875, "eval_VitaminC_manhattan_accuracy_threshold": 274.7045593261719, "eval_VitaminC_manhattan_ap": 0.5366045405118165, "eval_VitaminC_manhattan_f1": 0.6577540106951871, "eval_VitaminC_manhattan_f1_threshold": 475.4096374511719, "eval_VitaminC_manhattan_precision": 0.4900398406374502, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.578125, "eval_VitaminC_max_accuracy_threshold": 330.23577880859375, "eval_VitaminC_max_ap": 0.5507967714924796, "eval_VitaminC_max_f1": 0.6595174262734584, "eval_VitaminC_max_f1_threshold": 475.4096374511719, "eval_VitaminC_max_precision": 0.492, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5507967714924796, "eval_sts-test_pearson_cosine": 0.8204982036322743, "eval_sts-test_pearson_dot": 0.8243481169631539, "eval_sts-test_pearson_euclidean": 0.8495098083065487, "eval_sts-test_pearson_manhattan": 0.8491539225772841, "eval_sts-test_pearson_max": 0.8495098083065487, "eval_sts-test_spearman_cosine": 0.8687444375928703, "eval_sts-test_spearman_dot": 0.8509044179305871, "eval_sts-test_spearman_euclidean": 0.8563313271350431, "eval_sts-test_spearman_manhattan": 0.8563900467437737, "eval_sts-test_spearman_max": 0.8687444375928703, "eval_vitaminc-pairs_loss": 2.5139691829681396, "eval_vitaminc-pairs_runtime": 1.449, "eval_vitaminc-pairs_samples_per_second": 74.533, "eval_vitaminc-pairs_steps_per_second": 1.38, "step": 243 }, { "epoch": 1.3719124911785463, "eval_negation-triplets_loss": 1.8629425764083862, "eval_negation-triplets_runtime": 0.3014, "eval_negation-triplets_samples_per_second": 212.31, "eval_negation-triplets_steps_per_second": 3.317, "step": 243 }, { "epoch": 1.3719124911785463, "eval_scitail-pairs-pos_loss": 0.17502877116203308, "eval_scitail-pairs-pos_runtime": 0.3707, "eval_scitail-pairs-pos_samples_per_second": 145.673, "eval_scitail-pairs-pos_steps_per_second": 2.698, "step": 243 }, { "epoch": 1.3719124911785463, "eval_xsum-pairs_loss": 0.2802315950393677, "eval_xsum-pairs_runtime": 3.1565, "eval_xsum-pairs_samples_per_second": 40.551, "eval_xsum-pairs_steps_per_second": 0.634, "step": 243 }, { "epoch": 1.3719124911785463, "eval_sciq_pairs_loss": 0.046695925295352936, "eval_sciq_pairs_runtime": 3.2866, "eval_sciq_pairs_samples_per_second": 38.946, "eval_sciq_pairs_steps_per_second": 0.609, "step": 243 }, { "epoch": 1.3719124911785463, "eval_qasc_pairs_loss": 0.2354799211025238, "eval_qasc_pairs_runtime": 0.6228, "eval_qasc_pairs_samples_per_second": 205.533, "eval_qasc_pairs_steps_per_second": 3.211, "step": 243 }, { "epoch": 1.3719124911785463, "eval_openbookqa_pairs_loss": 0.8562020659446716, "eval_openbookqa_pairs_runtime": 0.5764, "eval_openbookqa_pairs_samples_per_second": 222.058, "eval_openbookqa_pairs_steps_per_second": 3.47, "step": 243 }, { "epoch": 1.3719124911785463, "eval_msmarco_pairs_loss": 0.5559017658233643, "eval_msmarco_pairs_runtime": 1.2826, "eval_msmarco_pairs_samples_per_second": 99.801, "eval_msmarco_pairs_steps_per_second": 1.559, "step": 243 }, { "epoch": 1.3719124911785463, "eval_nq_pairs_loss": 0.743526041507721, "eval_nq_pairs_runtime": 2.3784, "eval_nq_pairs_samples_per_second": 53.817, "eval_nq_pairs_steps_per_second": 0.841, "step": 243 }, { "epoch": 1.3719124911785463, "eval_trivia_pairs_loss": 1.106662392616272, "eval_trivia_pairs_runtime": 4.4193, "eval_trivia_pairs_samples_per_second": 28.964, "eval_trivia_pairs_steps_per_second": 0.453, "step": 243 }, { "epoch": 1.3719124911785463, "eval_gooaq_pairs_loss": 0.8928955793380737, "eval_gooaq_pairs_runtime": 0.8831, "eval_gooaq_pairs_samples_per_second": 144.944, "eval_gooaq_pairs_steps_per_second": 2.265, "step": 243 }, { "epoch": 1.3719124911785463, "eval_paws-pos_loss": 0.03428014740347862, "eval_paws-pos_runtime": 0.6872, "eval_paws-pos_samples_per_second": 186.261, "eval_paws-pos_steps_per_second": 2.91, "step": 243 }, { "epoch": 1.3888496824276642, "grad_norm": 4.478828430175781, "learning_rate": 3.752249182271433e-05, "loss": 0.9531, "step": 246 }, { "epoch": 1.405786873676782, "grad_norm": 3.3206863403320312, "learning_rate": 3.731316729513507e-05, "loss": 0.9023, "step": 249 }, { "epoch": 1.4227240649258999, "grad_norm": 3.4713878631591797, "learning_rate": 3.7096339215083274e-05, "loss": 0.8922, "step": 252 }, { "epoch": 1.4396612561750177, "grad_norm": 3.4212491512298584, "learning_rate": 3.687216040092931e-05, "loss": 0.9874, "step": 255 }, { "epoch": 1.4565984474241356, "grad_norm": 3.398963689804077, "learning_rate": 3.6640788851771084e-05, "loss": 0.8508, "step": 258 }, { "epoch": 1.4735356386732534, "grad_norm": 3.350128650665283, "learning_rate": 3.64023876360778e-05, "loss": 0.7149, "step": 261 }, { "epoch": 1.4904728299223713, "grad_norm": 3.438978433609009, "learning_rate": 3.615712477676081e-05, "loss": 0.894, "step": 264 }, { "epoch": 1.5074100211714891, "grad_norm": 3.1700806617736816, "learning_rate": 3.5905173132752725e-05, "loss": 0.867, "step": 267 }, { "epoch": 1.524347212420607, "grad_norm": 3.1567916870117188, "learning_rate": 3.5646710277178006e-05, "loss": 0.7493, "step": 270 }, { "epoch": 1.524347212420607, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.58203125, "eval_VitaminC_cosine_accuracy_threshold": 0.7990785241127014, "eval_VitaminC_cosine_ap": 0.5489113961762149, "eval_VitaminC_cosine_f1": 0.6595174262734584, "eval_VitaminC_cosine_f1_threshold": 0.3687684237957001, "eval_VitaminC_cosine_precision": 0.492, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.57421875, "eval_VitaminC_dot_accuracy_threshold": 328.30560302734375, "eval_VitaminC_dot_ap": 0.5498735151014204, "eval_VitaminC_dot_f1": 0.6595174262734584, "eval_VitaminC_dot_f1_threshold": 153.01849365234375, "eval_VitaminC_dot_precision": 0.492, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.578125, "eval_VitaminC_euclidean_accuracy_threshold": 12.773557662963867, "eval_VitaminC_euclidean_ap": 0.5426159894851803, "eval_VitaminC_euclidean_f1": 0.6559999999999999, "eval_VitaminC_euclidean_f1_threshold": 23.71053123474121, "eval_VitaminC_euclidean_precision": 0.4880952380952381, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.58203125, "eval_VitaminC_manhattan_accuracy_threshold": 272.04931640625, "eval_VitaminC_manhattan_ap": 0.5396432749419082, "eval_VitaminC_manhattan_f1": 0.6577540106951871, "eval_VitaminC_manhattan_f1_threshold": 494.33001708984375, "eval_VitaminC_manhattan_precision": 0.4900398406374502, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.58203125, "eval_VitaminC_max_accuracy_threshold": 328.30560302734375, "eval_VitaminC_max_ap": 0.5498735151014204, "eval_VitaminC_max_f1": 0.6595174262734584, "eval_VitaminC_max_f1_threshold": 494.33001708984375, "eval_VitaminC_max_precision": 0.492, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5498735151014204, "eval_sts-test_pearson_cosine": 0.8213785723785002, "eval_sts-test_pearson_dot": 0.8169840312248031, "eval_sts-test_pearson_euclidean": 0.8549065829936804, "eval_sts-test_pearson_manhattan": 0.8559014033008101, "eval_sts-test_pearson_max": 0.8559014033008101, "eval_sts-test_spearman_cosine": 0.871560114440785, "eval_sts-test_spearman_dot": 0.8412461164335756, "eval_sts-test_spearman_euclidean": 0.8616554770242205, "eval_sts-test_spearman_manhattan": 0.86344749922969, "eval_sts-test_spearman_max": 0.871560114440785, "eval_vitaminc-pairs_loss": 2.5574047565460205, "eval_vitaminc-pairs_runtime": 1.4466, "eval_vitaminc-pairs_samples_per_second": 74.658, "eval_vitaminc-pairs_steps_per_second": 1.383, "step": 270 }, { "epoch": 1.524347212420607, "eval_negation-triplets_loss": 1.853515386581421, "eval_negation-triplets_runtime": 0.2992, "eval_negation-triplets_samples_per_second": 213.896, "eval_negation-triplets_steps_per_second": 3.342, "step": 270 }, { "epoch": 1.524347212420607, "eval_scitail-pairs-pos_loss": 0.1692524254322052, "eval_scitail-pairs-pos_runtime": 0.3739, "eval_scitail-pairs-pos_samples_per_second": 144.426, "eval_scitail-pairs-pos_steps_per_second": 2.675, "step": 270 }, { "epoch": 1.524347212420607, "eval_xsum-pairs_loss": 0.22170975804328918, "eval_xsum-pairs_runtime": 3.1517, "eval_xsum-pairs_samples_per_second": 40.613, "eval_xsum-pairs_steps_per_second": 0.635, "step": 270 }, { "epoch": 1.524347212420607, "eval_sciq_pairs_loss": 0.04346679896116257, "eval_sciq_pairs_runtime": 3.2686, "eval_sciq_pairs_samples_per_second": 39.16, "eval_sciq_pairs_steps_per_second": 0.612, "step": 270 }, { "epoch": 1.524347212420607, "eval_qasc_pairs_loss": 0.24427936971187592, "eval_qasc_pairs_runtime": 0.6217, "eval_qasc_pairs_samples_per_second": 205.897, "eval_qasc_pairs_steps_per_second": 3.217, "step": 270 }, { "epoch": 1.524347212420607, "eval_openbookqa_pairs_loss": 0.7998915910720825, "eval_openbookqa_pairs_runtime": 0.576, "eval_openbookqa_pairs_samples_per_second": 222.206, "eval_openbookqa_pairs_steps_per_second": 3.472, "step": 270 }, { "epoch": 1.524347212420607, "eval_msmarco_pairs_loss": 0.5027381777763367, "eval_msmarco_pairs_runtime": 1.2901, "eval_msmarco_pairs_samples_per_second": 99.216, "eval_msmarco_pairs_steps_per_second": 1.55, "step": 270 }, { "epoch": 1.524347212420607, "eval_nq_pairs_loss": 0.6529555916786194, "eval_nq_pairs_runtime": 2.3842, "eval_nq_pairs_samples_per_second": 53.687, "eval_nq_pairs_steps_per_second": 0.839, "step": 270 }, { "epoch": 1.524347212420607, "eval_trivia_pairs_loss": 1.0634211301803589, "eval_trivia_pairs_runtime": 4.4089, "eval_trivia_pairs_samples_per_second": 29.032, "eval_trivia_pairs_steps_per_second": 0.454, "step": 270 }, { "epoch": 1.524347212420607, "eval_gooaq_pairs_loss": 0.800453245639801, "eval_gooaq_pairs_runtime": 0.8705, "eval_gooaq_pairs_samples_per_second": 147.034, "eval_gooaq_pairs_steps_per_second": 2.297, "step": 270 }, { "epoch": 1.524347212420607, "eval_paws-pos_loss": 0.031901415437459946, "eval_paws-pos_runtime": 0.6828, "eval_paws-pos_samples_per_second": 187.456, "eval_paws-pos_steps_per_second": 2.929, "step": 270 }, { "epoch": 1.5412844036697249, "grad_norm": 3.258525848388672, "learning_rate": 3.5381918372201175e-05, "loss": 0.7974, "step": 273 }, { "epoch": 1.5582215949188427, "grad_norm": 2.9689552783966064, "learning_rate": 3.5110984040640627e-05, "loss": 0.797, "step": 276 }, { "epoch": 1.5751587861679606, "grad_norm": 3.50411057472229, "learning_rate": 3.483409823443864e-05, "loss": 0.6749, "step": 279 }, { "epoch": 1.5920959774170784, "grad_norm": 2.840614080429077, "learning_rate": 3.4551456100080266e-05, "loss": 0.9325, "step": 282 }, { "epoch": 1.6090331686661963, "grad_norm": 2.934267044067383, "learning_rate": 3.426325684105594e-05, "loss": 0.8418, "step": 285 }, { "epoch": 1.6259703599153141, "grad_norm": 3.5037455558776855, "learning_rate": 3.396970357746474e-05, "loss": 1.0135, "step": 288 }, { "epoch": 1.642907551164432, "grad_norm": 3.349975109100342, "learning_rate": 3.3671003202857315e-05, "loss": 0.6961, "step": 291 }, { "epoch": 1.6598447424135498, "grad_norm": 3.207557439804077, "learning_rate": 3.336736623841924e-05, "loss": 0.9361, "step": 294 }, { "epoch": 1.6767819336626677, "grad_norm": 2.0259296894073486, "learning_rate": 3.305900668459766e-05, "loss": 0.6747, "step": 297 }, { "epoch": 1.6767819336626677, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.57421875, "eval_VitaminC_cosine_accuracy_threshold": 0.7887165546417236, "eval_VitaminC_cosine_ap": 0.5443802154749287, "eval_VitaminC_cosine_f1": 0.6595174262734584, "eval_VitaminC_cosine_f1_threshold": 0.35189926624298096, "eval_VitaminC_cosine_precision": 0.492, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.5625, "eval_VitaminC_dot_accuracy_threshold": 335.016357421875, "eval_VitaminC_dot_ap": 0.5460930199557891, "eval_VitaminC_dot_f1": 0.6594594594594595, "eval_VitaminC_dot_f1_threshold": 158.6214599609375, "eval_VitaminC_dot_precision": 0.4939271255060729, "eval_VitaminC_dot_recall": 0.991869918699187, "eval_VitaminC_euclidean_accuracy": 0.57421875, "eval_VitaminC_euclidean_accuracy_threshold": 13.359209060668945, "eval_VitaminC_euclidean_ap": 0.5420558119789205, "eval_VitaminC_euclidean_f1": 0.6577540106951871, "eval_VitaminC_euclidean_f1_threshold": 23.44475746154785, "eval_VitaminC_euclidean_precision": 0.4900398406374502, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.578125, "eval_VitaminC_manhattan_accuracy_threshold": 309.7850646972656, "eval_VitaminC_manhattan_ap": 0.5398712022586767, "eval_VitaminC_manhattan_f1": 0.6595174262734584, "eval_VitaminC_manhattan_f1_threshold": 486.6765441894531, "eval_VitaminC_manhattan_precision": 0.492, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.578125, "eval_VitaminC_max_accuracy_threshold": 335.016357421875, "eval_VitaminC_max_ap": 0.5460930199557891, "eval_VitaminC_max_f1": 0.6595174262734584, "eval_VitaminC_max_f1_threshold": 486.6765441894531, "eval_VitaminC_max_precision": 0.4939271255060729, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5460930199557891, "eval_sts-test_pearson_cosine": 0.8332392980969607, "eval_sts-test_pearson_dot": 0.8346600863241642, "eval_sts-test_pearson_euclidean": 0.8653211336269704, "eval_sts-test_pearson_manhattan": 0.8653335270474869, "eval_sts-test_pearson_max": 0.8653335270474869, "eval_sts-test_spearman_cosine": 0.8786841635561152, "eval_sts-test_spearman_dot": 0.8596876540389535, "eval_sts-test_spearman_euclidean": 0.8687344122938186, "eval_sts-test_spearman_manhattan": 0.8687734393508408, "eval_sts-test_spearman_max": 0.8786841635561152, "eval_vitaminc-pairs_loss": 2.4870808124542236, "eval_vitaminc-pairs_runtime": 1.4506, "eval_vitaminc-pairs_samples_per_second": 74.451, "eval_vitaminc-pairs_steps_per_second": 1.379, "step": 297 }, { "epoch": 1.6767819336626677, "eval_negation-triplets_loss": 1.7349412441253662, "eval_negation-triplets_runtime": 0.2993, "eval_negation-triplets_samples_per_second": 213.838, "eval_negation-triplets_steps_per_second": 3.341, "step": 297 }, { "epoch": 1.6767819336626677, "eval_scitail-pairs-pos_loss": 0.15961770713329315, "eval_scitail-pairs-pos_runtime": 0.3704, "eval_scitail-pairs-pos_samples_per_second": 145.808, "eval_scitail-pairs-pos_steps_per_second": 2.7, "step": 297 }, { "epoch": 1.6767819336626677, "eval_xsum-pairs_loss": 0.22417353093624115, "eval_xsum-pairs_runtime": 3.1629, "eval_xsum-pairs_samples_per_second": 40.469, "eval_xsum-pairs_steps_per_second": 0.632, "step": 297 }, { "epoch": 1.6767819336626677, "eval_sciq_pairs_loss": 0.03957323729991913, "eval_sciq_pairs_runtime": 3.2788, "eval_sciq_pairs_samples_per_second": 39.039, "eval_sciq_pairs_steps_per_second": 0.61, "step": 297 }, { "epoch": 1.6767819336626677, "eval_qasc_pairs_loss": 0.19627788662910461, "eval_qasc_pairs_runtime": 0.6246, "eval_qasc_pairs_samples_per_second": 204.945, "eval_qasc_pairs_steps_per_second": 3.202, "step": 297 }, { "epoch": 1.6767819336626677, "eval_openbookqa_pairs_loss": 0.7668256163597107, "eval_openbookqa_pairs_runtime": 0.5769, "eval_openbookqa_pairs_samples_per_second": 221.888, "eval_openbookqa_pairs_steps_per_second": 3.467, "step": 297 }, { "epoch": 1.6767819336626677, "eval_msmarco_pairs_loss": 0.5024800300598145, "eval_msmarco_pairs_runtime": 1.287, "eval_msmarco_pairs_samples_per_second": 99.457, "eval_msmarco_pairs_steps_per_second": 1.554, "step": 297 }, { "epoch": 1.6767819336626677, "eval_nq_pairs_loss": 0.6426529288291931, "eval_nq_pairs_runtime": 2.3694, "eval_nq_pairs_samples_per_second": 54.023, "eval_nq_pairs_steps_per_second": 0.844, "step": 297 }, { "epoch": 1.6767819336626677, "eval_trivia_pairs_loss": 0.9762344360351562, "eval_trivia_pairs_runtime": 4.4202, "eval_trivia_pairs_samples_per_second": 28.958, "eval_trivia_pairs_steps_per_second": 0.452, "step": 297 }, { "epoch": 1.6767819336626677, "eval_gooaq_pairs_loss": 0.7546207904815674, "eval_gooaq_pairs_runtime": 0.8779, "eval_gooaq_pairs_samples_per_second": 145.803, "eval_gooaq_pairs_steps_per_second": 2.278, "step": 297 }, { "epoch": 1.6767819336626677, "eval_paws-pos_loss": 0.029145879670977592, "eval_paws-pos_runtime": 0.6938, "eval_paws-pos_samples_per_second": 184.484, "eval_paws-pos_steps_per_second": 2.883, "step": 297 }, { "epoch": 1.6937191249117856, "grad_norm": 2.766063928604126, "learning_rate": 3.274614187027587e-05, "loss": 0.7786, "step": 300 }, { "epoch": 1.7106563161609034, "grad_norm": 3.1933176517486572, "learning_rate": 3.2428992299601946e-05, "loss": 0.7171, "step": 303 }, { "epoch": 1.7275935074100213, "grad_norm": 3.0088443756103516, "learning_rate": 3.2107781496579536e-05, "loss": 0.6627, "step": 306 }, { "epoch": 1.7445306986591391, "grad_norm": 3.13895845413208, "learning_rate": 3.178273584753023e-05, "loss": 0.6711, "step": 309 }, { "epoch": 1.761467889908257, "grad_norm": 3.34114933013916, "learning_rate": 3.145408444153868e-05, "loss": 0.9076, "step": 312 }, { "epoch": 1.7784050811573748, "grad_norm": 2.5035502910614014, "learning_rate": 3.1122058908992746e-05, "loss": 0.7414, "step": 315 }, { "epoch": 1.7953422724064927, "grad_norm": 2.284698247909546, "learning_rate": 3.078689325833264e-05, "loss": 0.582, "step": 318 }, { "epoch": 1.8122794636556105, "grad_norm": 2.643444538116455, "learning_rate": 3.044882371112396e-05, "loss": 0.6068, "step": 321 }, { "epoch": 1.8292166549047284, "grad_norm": 2.37386155128479, "learning_rate": 3.0108088535571016e-05, "loss": 0.6219, "step": 324 }, { "epoch": 1.8292166549047284, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.57421875, "eval_VitaminC_cosine_accuracy_threshold": 0.7700088024139404, "eval_VitaminC_cosine_ap": 0.5429726522180547, "eval_VitaminC_cosine_f1": 0.6576819407008085, "eval_VitaminC_cosine_f1_threshold": 0.33469462394714355, "eval_VitaminC_cosine_precision": 0.49193548387096775, "eval_VitaminC_cosine_recall": 0.991869918699187, "eval_VitaminC_dot_accuracy": 0.57421875, "eval_VitaminC_dot_accuracy_threshold": 309.1703186035156, "eval_VitaminC_dot_ap": 0.5531704143247085, "eval_VitaminC_dot_f1": 0.6594594594594595, "eval_VitaminC_dot_f1_threshold": 145.4818115234375, "eval_VitaminC_dot_precision": 0.4939271255060729, "eval_VitaminC_dot_recall": 0.991869918699187, "eval_VitaminC_euclidean_accuracy": 0.578125, "eval_VitaminC_euclidean_accuracy_threshold": 13.186519622802734, "eval_VitaminC_euclidean_ap": 0.5414693053767123, "eval_VitaminC_euclidean_f1": 0.6559999999999999, "eval_VitaminC_euclidean_f1_threshold": 23.980300903320312, "eval_VitaminC_euclidean_precision": 0.4880952380952381, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.5703125, "eval_VitaminC_manhattan_accuracy_threshold": 248.66549682617188, "eval_VitaminC_manhattan_ap": 0.5403554311371019, "eval_VitaminC_manhattan_f1": 0.6577540106951871, "eval_VitaminC_manhattan_f1_threshold": 493.069580078125, "eval_VitaminC_manhattan_precision": 0.4900398406374502, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.578125, "eval_VitaminC_max_accuracy_threshold": 309.1703186035156, "eval_VitaminC_max_ap": 0.5531704143247085, "eval_VitaminC_max_f1": 0.6594594594594595, "eval_VitaminC_max_f1_threshold": 493.069580078125, "eval_VitaminC_max_precision": 0.4939271255060729, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5531704143247085, "eval_sts-test_pearson_cosine": 0.8353147289025764, "eval_sts-test_pearson_dot": 0.8340239073162183, "eval_sts-test_pearson_euclidean": 0.8678198295516475, "eval_sts-test_pearson_manhattan": 0.8679311647036958, "eval_sts-test_pearson_max": 0.8679311647036958, "eval_sts-test_spearman_cosine": 0.8807142866140599, "eval_sts-test_spearman_dot": 0.8548662012879339, "eval_sts-test_spearman_euclidean": 0.8730904047317294, "eval_sts-test_spearman_manhattan": 0.8734591925182695, "eval_sts-test_spearman_max": 0.8807142866140599, "eval_vitaminc-pairs_loss": 2.519745111465454, "eval_vitaminc-pairs_runtime": 1.5111, "eval_vitaminc-pairs_samples_per_second": 71.47, "eval_vitaminc-pairs_steps_per_second": 1.324, "step": 324 }, { "epoch": 1.8292166549047284, "eval_negation-triplets_loss": 1.701598882675171, "eval_negation-triplets_runtime": 0.3083, "eval_negation-triplets_samples_per_second": 207.571, "eval_negation-triplets_steps_per_second": 3.243, "step": 324 }, { "epoch": 1.8292166549047284, "eval_scitail-pairs-pos_loss": 0.1535351276397705, "eval_scitail-pairs-pos_runtime": 0.4139, "eval_scitail-pairs-pos_samples_per_second": 130.461, "eval_scitail-pairs-pos_steps_per_second": 2.416, "step": 324 }, { "epoch": 1.8292166549047284, "eval_xsum-pairs_loss": 0.16304434835910797, "eval_xsum-pairs_runtime": 3.173, "eval_xsum-pairs_samples_per_second": 40.34, "eval_xsum-pairs_steps_per_second": 0.63, "step": 324 }, { "epoch": 1.8292166549047284, "eval_sciq_pairs_loss": 0.03826402127742767, "eval_sciq_pairs_runtime": 3.2871, "eval_sciq_pairs_samples_per_second": 38.94, "eval_sciq_pairs_steps_per_second": 0.608, "step": 324 }, { "epoch": 1.8292166549047284, "eval_qasc_pairs_loss": 0.20441913604736328, "eval_qasc_pairs_runtime": 0.6223, "eval_qasc_pairs_samples_per_second": 205.692, "eval_qasc_pairs_steps_per_second": 3.214, "step": 324 }, { "epoch": 1.8292166549047284, "eval_openbookqa_pairs_loss": 0.7109480500221252, "eval_openbookqa_pairs_runtime": 0.5785, "eval_openbookqa_pairs_samples_per_second": 221.25, "eval_openbookqa_pairs_steps_per_second": 3.457, "step": 324 }, { "epoch": 1.8292166549047284, "eval_msmarco_pairs_loss": 0.48586779832839966, "eval_msmarco_pairs_runtime": 1.2912, "eval_msmarco_pairs_samples_per_second": 99.129, "eval_msmarco_pairs_steps_per_second": 1.549, "step": 324 }, { "epoch": 1.8292166549047284, "eval_nq_pairs_loss": 0.5532824397087097, "eval_nq_pairs_runtime": 2.3796, "eval_nq_pairs_samples_per_second": 53.791, "eval_nq_pairs_steps_per_second": 0.84, "step": 324 }, { "epoch": 1.8292166549047284, "eval_trivia_pairs_loss": 1.0205955505371094, "eval_trivia_pairs_runtime": 4.4187, "eval_trivia_pairs_samples_per_second": 28.968, "eval_trivia_pairs_steps_per_second": 0.453, "step": 324 }, { "epoch": 1.8292166549047284, "eval_gooaq_pairs_loss": 0.7736483812332153, "eval_gooaq_pairs_runtime": 0.8757, "eval_gooaq_pairs_samples_per_second": 146.171, "eval_gooaq_pairs_steps_per_second": 2.284, "step": 324 }, { "epoch": 1.8292166549047284, "eval_paws-pos_loss": 0.0273247379809618, "eval_paws-pos_runtime": 0.6877, "eval_paws-pos_samples_per_second": 186.121, "eval_paws-pos_steps_per_second": 2.908, "step": 324 }, { "epoch": 1.8461538461538463, "grad_norm": 2.632672071456909, "learning_rate": 2.9764927878587643e-05, "loss": 0.5862, "step": 327 }, { "epoch": 1.8630910374029641, "grad_norm": 2.9056813716888428, "learning_rate": 2.9419583596543924e-05, "loss": 0.678, "step": 330 }, { "epoch": 1.880028228652082, "grad_norm": 2.693070411682129, "learning_rate": 2.907229908480814e-05, "loss": 0.6272, "step": 333 }, { "epoch": 1.8969654199011998, "grad_norm": 2.2290945053100586, "learning_rate": 2.8723319106204032e-05, "loss": 0.5048, "step": 336 }, { "epoch": 1.9139026111503177, "grad_norm": 2.5947606563568115, "learning_rate": 2.8372889618504275e-05, "loss": 0.7653, "step": 339 }, { "epoch": 1.9308398023994355, "grad_norm": 3.1747825145721436, "learning_rate": 2.8021257601081767e-05, "loss": 0.6613, "step": 342 }, { "epoch": 1.9477769936485534, "grad_norm": 2.438523054122925, "learning_rate": 2.766867088084095e-05, "loss": 0.6122, "step": 345 }, { "epoch": 1.9647141848976712, "grad_norm": 2.645747423171997, "learning_rate": 2.7315377957551712e-05, "loss": 0.5939, "step": 348 }, { "epoch": 1.981651376146789, "grad_norm": 3.985382556915283, "learning_rate": 2.696162782870916e-05, "loss": 0.6923, "step": 351 }, { "epoch": 1.981651376146789, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.57421875, "eval_VitaminC_cosine_accuracy_threshold": 0.7852457165718079, "eval_VitaminC_cosine_ap": 0.5489275869827654, "eval_VitaminC_cosine_f1": 0.6612466124661246, "eval_VitaminC_cosine_f1_threshold": 0.3671841323375702, "eval_VitaminC_cosine_precision": 0.4959349593495935, "eval_VitaminC_cosine_recall": 0.991869918699187, "eval_VitaminC_dot_accuracy": 0.5703125, "eval_VitaminC_dot_accuracy_threshold": 312.1104736328125, "eval_VitaminC_dot_ap": 0.5559525201108009, "eval_VitaminC_dot_f1": 0.6612466124661246, "eval_VitaminC_dot_f1_threshold": 150.29818725585938, "eval_VitaminC_dot_precision": 0.4959349593495935, "eval_VitaminC_dot_recall": 0.991869918699187, "eval_VitaminC_euclidean_accuracy": 0.58203125, "eval_VitaminC_euclidean_accuracy_threshold": 14.372268676757812, "eval_VitaminC_euclidean_ap": 0.544755914591283, "eval_VitaminC_euclidean_f1": 0.6576819407008085, "eval_VitaminC_euclidean_f1_threshold": 23.06924819946289, "eval_VitaminC_euclidean_precision": 0.49193548387096775, "eval_VitaminC_euclidean_recall": 0.991869918699187, "eval_VitaminC_manhattan_accuracy": 0.57421875, "eval_VitaminC_manhattan_accuracy_threshold": 263.9018859863281, "eval_VitaminC_manhattan_ap": 0.541522211031207, "eval_VitaminC_manhattan_f1": 0.6595174262734584, "eval_VitaminC_manhattan_f1_threshold": 502.340576171875, "eval_VitaminC_manhattan_precision": 0.492, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.58203125, "eval_VitaminC_max_accuracy_threshold": 312.1104736328125, "eval_VitaminC_max_ap": 0.5559525201108009, "eval_VitaminC_max_f1": 0.6612466124661246, "eval_VitaminC_max_f1_threshold": 502.340576171875, "eval_VitaminC_max_precision": 0.4959349593495935, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5559525201108009, "eval_sts-test_pearson_cosine": 0.8362775201898809, "eval_sts-test_pearson_dot": 0.8352671053392853, "eval_sts-test_pearson_euclidean": 0.8700319618710969, "eval_sts-test_pearson_manhattan": 0.8715864724519946, "eval_sts-test_pearson_max": 0.8715864724519946, "eval_sts-test_spearman_cosine": 0.8836928745715628, "eval_sts-test_spearman_dot": 0.857968315251608, "eval_sts-test_spearman_euclidean": 0.8761363054114356, "eval_sts-test_spearman_manhattan": 0.8777002520634819, "eval_sts-test_spearman_max": 0.8836928745715628, "eval_vitaminc-pairs_loss": 2.437910795211792, "eval_vitaminc-pairs_runtime": 1.4456, "eval_vitaminc-pairs_samples_per_second": 74.71, "eval_vitaminc-pairs_steps_per_second": 1.384, "step": 351 }, { "epoch": 1.981651376146789, "eval_negation-triplets_loss": 1.7345324754714966, "eval_negation-triplets_runtime": 0.2986, "eval_negation-triplets_samples_per_second": 214.362, "eval_negation-triplets_steps_per_second": 3.349, "step": 351 }, { "epoch": 1.981651376146789, "eval_scitail-pairs-pos_loss": 0.14812646806240082, "eval_scitail-pairs-pos_runtime": 0.3719, "eval_scitail-pairs-pos_samples_per_second": 145.183, "eval_scitail-pairs-pos_steps_per_second": 2.689, "step": 351 }, { "epoch": 1.981651376146789, "eval_xsum-pairs_loss": 0.1463930606842041, "eval_xsum-pairs_runtime": 3.152, "eval_xsum-pairs_samples_per_second": 40.609, "eval_xsum-pairs_steps_per_second": 0.635, "step": 351 }, { "epoch": 1.981651376146789, "eval_sciq_pairs_loss": 0.03820851817727089, "eval_sciq_pairs_runtime": 3.2627, "eval_sciq_pairs_samples_per_second": 39.231, "eval_sciq_pairs_steps_per_second": 0.613, "step": 351 }, { "epoch": 1.981651376146789, "eval_qasc_pairs_loss": 0.16403906047344208, "eval_qasc_pairs_runtime": 0.6219, "eval_qasc_pairs_samples_per_second": 205.822, "eval_qasc_pairs_steps_per_second": 3.216, "step": 351 }, { "epoch": 1.981651376146789, "eval_openbookqa_pairs_loss": 0.755411684513092, "eval_openbookqa_pairs_runtime": 0.5745, "eval_openbookqa_pairs_samples_per_second": 222.788, "eval_openbookqa_pairs_steps_per_second": 3.481, "step": 351 }, { "epoch": 1.981651376146789, "eval_msmarco_pairs_loss": 0.43477028608322144, "eval_msmarco_pairs_runtime": 1.2879, "eval_msmarco_pairs_samples_per_second": 99.389, "eval_msmarco_pairs_steps_per_second": 1.553, "step": 351 }, { "epoch": 1.981651376146789, "eval_nq_pairs_loss": 0.5431913733482361, "eval_nq_pairs_runtime": 2.372, "eval_nq_pairs_samples_per_second": 53.962, "eval_nq_pairs_steps_per_second": 0.843, "step": 351 }, { "epoch": 1.981651376146789, "eval_trivia_pairs_loss": 0.9581867456436157, "eval_trivia_pairs_runtime": 4.4272, "eval_trivia_pairs_samples_per_second": 28.912, "eval_trivia_pairs_steps_per_second": 0.452, "step": 351 }, { "epoch": 1.981651376146789, "eval_gooaq_pairs_loss": 0.7219691872596741, "eval_gooaq_pairs_runtime": 0.8764, "eval_gooaq_pairs_samples_per_second": 146.055, "eval_gooaq_pairs_steps_per_second": 2.282, "step": 351 }, { "epoch": 1.981651376146789, "eval_paws-pos_loss": 0.026377690955996513, "eval_paws-pos_runtime": 0.6874, "eval_paws-pos_samples_per_second": 186.22, "eval_paws-pos_steps_per_second": 2.91, "step": 351 }, { "epoch": 1.998588567395907, "grad_norm": 2.807307481765747, "learning_rate": 2.660766981404253e-05, "loss": 0.5712, "step": 354 }, { "epoch": 2.015525758645025, "grad_norm": 3.1135761737823486, "learning_rate": 2.6253753379797e-05, "loss": 0.5969, "step": 357 }, { "epoch": 2.0324629498941427, "grad_norm": 2.701498508453369, "learning_rate": 2.5900127962912265e-05, "loss": 0.5881, "step": 360 }, { "epoch": 2.0494001411432605, "grad_norm": 2.1898539066314697, "learning_rate": 2.554704279522176e-05, "loss": 0.6005, "step": 363 }, { "epoch": 2.0663373323923784, "grad_norm": 2.3954033851623535, "learning_rate": 2.5194746727796408e-05, "loss": 0.6066, "step": 366 }, { "epoch": 2.0832745236414962, "grad_norm": 1.9525569677352905, "learning_rate": 2.4843488055556773e-05, "loss": 0.4921, "step": 369 }, { "epoch": 2.100211714890614, "grad_norm": 2.005103588104248, "learning_rate": 2.449351434227714e-05, "loss": 0.5354, "step": 372 }, { "epoch": 2.117148906139732, "grad_norm": 2.4554927349090576, "learning_rate": 2.414507224610495e-05, "loss": 0.5602, "step": 375 }, { "epoch": 2.13408609738885, "grad_norm": 2.926708698272705, "learning_rate": 2.3798407345718434e-05, "loss": 0.5686, "step": 378 }, { "epoch": 2.13408609738885, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.57421875, "eval_VitaminC_cosine_accuracy_threshold": 0.8050106763839722, "eval_VitaminC_cosine_ap": 0.544869760591425, "eval_VitaminC_cosine_f1": 0.6577540106951871, "eval_VitaminC_cosine_f1_threshold": 0.2933539152145386, "eval_VitaminC_cosine_precision": 0.4900398406374502, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.5625, "eval_VitaminC_dot_accuracy_threshold": 350.54046630859375, "eval_VitaminC_dot_ap": 0.5538743151996848, "eval_VitaminC_dot_f1": 0.6577540106951871, "eval_VitaminC_dot_f1_threshold": 122.50220489501953, "eval_VitaminC_dot_precision": 0.4900398406374502, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.578125, "eval_VitaminC_euclidean_accuracy_threshold": 12.29859447479248, "eval_VitaminC_euclidean_ap": 0.5417581979676633, "eval_VitaminC_euclidean_f1": 0.6559999999999999, "eval_VitaminC_euclidean_f1_threshold": 24.298545837402344, "eval_VitaminC_euclidean_precision": 0.4880952380952381, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.57421875, "eval_VitaminC_manhattan_accuracy_threshold": 261.48309326171875, "eval_VitaminC_manhattan_ap": 0.5389765713900105, "eval_VitaminC_manhattan_f1": 0.6559999999999999, "eval_VitaminC_manhattan_f1_threshold": 519.0216064453125, "eval_VitaminC_manhattan_precision": 0.4880952380952381, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.578125, "eval_VitaminC_max_accuracy_threshold": 350.54046630859375, "eval_VitaminC_max_ap": 0.5538743151996848, "eval_VitaminC_max_f1": 0.6577540106951871, "eval_VitaminC_max_f1_threshold": 519.0216064453125, "eval_VitaminC_max_precision": 0.4900398406374502, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5538743151996848, "eval_sts-test_pearson_cosine": 0.8323841899935347, "eval_sts-test_pearson_dot": 0.8319981087315044, "eval_sts-test_pearson_euclidean": 0.8653758499419844, "eval_sts-test_pearson_manhattan": 0.8666968424133361, "eval_sts-test_pearson_max": 0.8666968424133361, "eval_sts-test_spearman_cosine": 0.8804171081064596, "eval_sts-test_spearman_dot": 0.8574101209222718, "eval_sts-test_spearman_euclidean": 0.8715185810589999, "eval_sts-test_spearman_manhattan": 0.8731244191392259, "eval_sts-test_spearman_max": 0.8804171081064596, "eval_vitaminc-pairs_loss": 2.390805959701538, "eval_vitaminc-pairs_runtime": 1.4923, "eval_vitaminc-pairs_samples_per_second": 72.374, "eval_vitaminc-pairs_steps_per_second": 1.34, "step": 378 }, { "epoch": 2.13408609738885, "eval_negation-triplets_loss": 1.7677762508392334, "eval_negation-triplets_runtime": 0.3036, "eval_negation-triplets_samples_per_second": 210.833, "eval_negation-triplets_steps_per_second": 3.294, "step": 378 }, { "epoch": 2.13408609738885, "eval_scitail-pairs-pos_loss": 0.14010007679462433, "eval_scitail-pairs-pos_runtime": 0.3847, "eval_scitail-pairs-pos_samples_per_second": 140.379, "eval_scitail-pairs-pos_steps_per_second": 2.6, "step": 378 }, { "epoch": 2.13408609738885, "eval_xsum-pairs_loss": 0.1453721672296524, "eval_xsum-pairs_runtime": 3.1712, "eval_xsum-pairs_samples_per_second": 40.363, "eval_xsum-pairs_steps_per_second": 0.631, "step": 378 }, { "epoch": 2.13408609738885, "eval_sciq_pairs_loss": 0.03739440068602562, "eval_sciq_pairs_runtime": 3.3277, "eval_sciq_pairs_samples_per_second": 38.466, "eval_sciq_pairs_steps_per_second": 0.601, "step": 378 }, { "epoch": 2.13408609738885, "eval_qasc_pairs_loss": 0.1603582501411438, "eval_qasc_pairs_runtime": 0.632, "eval_qasc_pairs_samples_per_second": 202.534, "eval_qasc_pairs_steps_per_second": 3.165, "step": 378 }, { "epoch": 2.13408609738885, "eval_openbookqa_pairs_loss": 0.7796258330345154, "eval_openbookqa_pairs_runtime": 0.5856, "eval_openbookqa_pairs_samples_per_second": 218.585, "eval_openbookqa_pairs_steps_per_second": 3.415, "step": 378 }, { "epoch": 2.13408609738885, "eval_msmarco_pairs_loss": 0.4246203303337097, "eval_msmarco_pairs_runtime": 1.297, "eval_msmarco_pairs_samples_per_second": 98.689, "eval_msmarco_pairs_steps_per_second": 1.542, "step": 378 }, { "epoch": 2.13408609738885, "eval_nq_pairs_loss": 0.5298404097557068, "eval_nq_pairs_runtime": 2.3877, "eval_nq_pairs_samples_per_second": 53.609, "eval_nq_pairs_steps_per_second": 0.838, "step": 378 }, { "epoch": 2.13408609738885, "eval_trivia_pairs_loss": 0.9613967537879944, "eval_trivia_pairs_runtime": 4.4311, "eval_trivia_pairs_samples_per_second": 28.887, "eval_trivia_pairs_steps_per_second": 0.451, "step": 378 }, { "epoch": 2.13408609738885, "eval_gooaq_pairs_loss": 0.6964626908302307, "eval_gooaq_pairs_runtime": 0.8843, "eval_gooaq_pairs_samples_per_second": 144.755, "eval_gooaq_pairs_steps_per_second": 2.262, "step": 378 }, { "epoch": 2.13408609738885, "eval_paws-pos_loss": 0.02705618366599083, "eval_paws-pos_runtime": 0.6932, "eval_paws-pos_samples_per_second": 184.655, "eval_paws-pos_steps_per_second": 2.885, "step": 378 }, { "epoch": 2.1510232886379677, "grad_norm": 2.418947458267212, "learning_rate": 2.345376396724515e-05, "loss": 0.6496, "step": 381 }, { "epoch": 2.1679604798870855, "grad_norm": 2.5201969146728516, "learning_rate": 2.311138501206319e-05, "loss": 0.4713, "step": 384 }, { "epoch": 2.1848976711362034, "grad_norm": 3.0134377479553223, "learning_rate": 2.277151178560665e-05, "loss": 0.6345, "step": 387 }, { "epoch": 2.2018348623853212, "grad_norm": 2.368422031402588, "learning_rate": 2.2434383827295833e-05, "loss": 0.5994, "step": 390 }, { "epoch": 2.218772053634439, "grad_norm": 3.164980411529541, "learning_rate": 2.210023874171213e-05, "loss": 0.6763, "step": 393 }, { "epoch": 2.235709244883557, "grad_norm": 2.83431077003479, "learning_rate": 2.1769312031136583e-05, "loss": 0.7254, "step": 396 }, { "epoch": 2.252646436132675, "grad_norm": 3.2441203594207764, "learning_rate": 2.14418369295701e-05, "loss": 0.8032, "step": 399 }, { "epoch": 2.2695836273817926, "grad_norm": 2.215298652648926, "learning_rate": 2.1118044238352392e-05, "loss": 0.4914, "step": 402 }, { "epoch": 2.2865208186309105, "grad_norm": 2.700486183166504, "learning_rate": 2.0798162163495322e-05, "loss": 0.6307, "step": 405 }, { "epoch": 2.2865208186309105, "eval_NLI-v2_cosine_accuracy": 1.0, "eval_NLI-v2_dot_accuracy": 0.0, "eval_NLI-v2_euclidean_accuracy": 1.0, "eval_NLI-v2_manhattan_accuracy": 1.0, "eval_NLI-v2_max_accuracy": 1.0, "eval_VitaminC_cosine_accuracy": 0.578125, "eval_VitaminC_cosine_accuracy_threshold": 0.8052636384963989, "eval_VitaminC_cosine_ap": 0.5479388360307975, "eval_VitaminC_cosine_f1": 0.6577540106951871, "eval_VitaminC_cosine_f1_threshold": 0.3108493387699127, "eval_VitaminC_cosine_precision": 0.4900398406374502, "eval_VitaminC_cosine_recall": 1.0, "eval_VitaminC_dot_accuracy": 0.58203125, "eval_VitaminC_dot_accuracy_threshold": 318.633056640625, "eval_VitaminC_dot_ap": 0.5533499611019033, "eval_VitaminC_dot_f1": 0.6577540106951871, "eval_VitaminC_dot_f1_threshold": 125.5129165649414, "eval_VitaminC_dot_precision": 0.4900398406374502, "eval_VitaminC_dot_recall": 1.0, "eval_VitaminC_euclidean_accuracy": 0.58203125, "eval_VitaminC_euclidean_accuracy_threshold": 12.9645357131958, "eval_VitaminC_euclidean_ap": 0.541753017593475, "eval_VitaminC_euclidean_f1": 0.6559999999999999, "eval_VitaminC_euclidean_f1_threshold": 23.908817291259766, "eval_VitaminC_euclidean_precision": 0.4880952380952381, "eval_VitaminC_euclidean_recall": 1.0, "eval_VitaminC_manhattan_accuracy": 0.578125, "eval_VitaminC_manhattan_accuracy_threshold": 266.60528564453125, "eval_VitaminC_manhattan_ap": 0.5411403083150335, "eval_VitaminC_manhattan_f1": 0.6559999999999999, "eval_VitaminC_manhattan_f1_threshold": 512.4686279296875, "eval_VitaminC_manhattan_precision": 0.4880952380952381, "eval_VitaminC_manhattan_recall": 1.0, "eval_VitaminC_max_accuracy": 0.58203125, "eval_VitaminC_max_accuracy_threshold": 318.633056640625, "eval_VitaminC_max_ap": 0.5533499611019033, "eval_VitaminC_max_f1": 0.6577540106951871, "eval_VitaminC_max_f1_threshold": 512.4686279296875, "eval_VitaminC_max_precision": 0.4900398406374502, "eval_VitaminC_max_recall": 1.0, "eval_sequential_score": 0.5533499611019033, "eval_sts-test_pearson_cosine": 0.8404451477820003, "eval_sts-test_pearson_dot": 0.8376741383364052, "eval_sts-test_pearson_euclidean": 0.873696402540065, "eval_sts-test_pearson_manhattan": 0.8739146310077538, "eval_sts-test_pearson_max": 0.8739146310077538, "eval_sts-test_spearman_cosine": 0.8859238616569335, "eval_sts-test_spearman_dot": 0.8626544264654313, "eval_sts-test_spearman_euclidean": 0.8767156244780591, "eval_sts-test_spearman_manhattan": 0.8785835525192047, "eval_sts-test_spearman_max": 0.8859238616569335, "eval_vitaminc-pairs_loss": 2.438774347305298, "eval_vitaminc-pairs_runtime": 1.4716, "eval_vitaminc-pairs_samples_per_second": 73.39, "eval_vitaminc-pairs_steps_per_second": 1.359, "step": 405 }, { "epoch": 2.2865208186309105, "eval_negation-triplets_loss": 1.7093145847320557, "eval_negation-triplets_runtime": 0.3027, "eval_negation-triplets_samples_per_second": 211.422, "eval_negation-triplets_steps_per_second": 3.303, "step": 405 }, { "epoch": 2.2865208186309105, "eval_scitail-pairs-pos_loss": 0.11918405443429947, "eval_scitail-pairs-pos_runtime": 0.3806, "eval_scitail-pairs-pos_samples_per_second": 141.888, "eval_scitail-pairs-pos_steps_per_second": 2.628, "step": 405 }, { "epoch": 2.2865208186309105, "eval_xsum-pairs_loss": 0.13078594207763672, "eval_xsum-pairs_runtime": 3.1593, "eval_xsum-pairs_samples_per_second": 40.515, "eval_xsum-pairs_steps_per_second": 0.633, "step": 405 }, { "epoch": 2.2865208186309105, "eval_sciq_pairs_loss": 0.03792291879653931, "eval_sciq_pairs_runtime": 3.3679, "eval_sciq_pairs_samples_per_second": 38.006, "eval_sciq_pairs_steps_per_second": 0.594, "step": 405 }, { "epoch": 2.2865208186309105, "eval_qasc_pairs_loss": 0.1465962529182434, "eval_qasc_pairs_runtime": 0.6708, "eval_qasc_pairs_samples_per_second": 190.809, "eval_qasc_pairs_steps_per_second": 2.981, "step": 405 }, { "epoch": 2.2865208186309105, "eval_openbookqa_pairs_loss": 0.74336838722229, "eval_openbookqa_pairs_runtime": 0.6017, "eval_openbookqa_pairs_samples_per_second": 212.742, "eval_openbookqa_pairs_steps_per_second": 3.324, "step": 405 }, { "epoch": 2.2865208186309105, "eval_msmarco_pairs_loss": 0.3927748501300812, "eval_msmarco_pairs_runtime": 1.3092, "eval_msmarco_pairs_samples_per_second": 97.767, "eval_msmarco_pairs_steps_per_second": 1.528, "step": 405 }, { "epoch": 2.2865208186309105, "eval_nq_pairs_loss": 0.4998345375061035, "eval_nq_pairs_runtime": 2.4116, "eval_nq_pairs_samples_per_second": 53.077, "eval_nq_pairs_steps_per_second": 0.829, "step": 405 }, { "epoch": 2.2865208186309105, "eval_trivia_pairs_loss": 0.9862285852432251, "eval_trivia_pairs_runtime": 4.4317, "eval_trivia_pairs_samples_per_second": 28.883, "eval_trivia_pairs_steps_per_second": 0.451, "step": 405 }, { "epoch": 2.2865208186309105, "eval_gooaq_pairs_loss": 0.697635293006897, "eval_gooaq_pairs_runtime": 0.8801, "eval_gooaq_pairs_samples_per_second": 145.443, "eval_gooaq_pairs_steps_per_second": 2.273, "step": 405 }, { "epoch": 2.2865208186309105, "eval_paws-pos_loss": 0.02622571960091591, "eval_paws-pos_runtime": 0.6966, "eval_paws-pos_samples_per_second": 183.756, "eval_paws-pos_steps_per_second": 2.871, "step": 405 }, { "epoch": 2.3034580098800284, "grad_norm": 2.7358224391937256, "learning_rate": 2.0482416154845496e-05, "loss": 0.7493, "step": 408 }, { "epoch": 2.320395201129146, "grad_norm": 2.2785451412200928, "learning_rate": 2.0171028747189386e-05, "loss": 0.5139, "step": 411 }, { "epoch": 2.337332392378264, "grad_norm": 2.1454882621765137, "learning_rate": 1.9864219403412882e-05, "loss": 0.6364, "step": 414 }, { "epoch": 2.354269583627382, "grad_norm": 2.206393003463745, "learning_rate": 1.9562204359825967e-05, "loss": 0.4763, "step": 417 }, { "epoch": 2.3712067748765, "grad_norm": 2.2492825984954834, "learning_rate": 1.92651964737614e-05, "loss": 0.583, "step": 420 }, { "epoch": 2.3881439661256176, "grad_norm": 3.24066162109375, "learning_rate": 1.8973405073554915e-05, "loss": 0.5912, "step": 423 }, { "epoch": 2.4050811573747355, "grad_norm": 2.6232211589813232, "learning_rate": 1.868703581101257e-05, "loss": 0.5936, "step": 426 } ], "logging_steps": 3, "max_steps": 531, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 107, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 160, "trial_name": null, "trial_params": null }