|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.4163726182074807, |
|
"eval_steps": 27, |
|
"global_step": 428, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.016937191249117856, |
|
"grad_norm": 34.22002029418945, |
|
"learning_rate": 6.818181818181818e-07, |
|
"loss": 7.2372, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03387438249823571, |
|
"grad_norm": 21.76839828491211, |
|
"learning_rate": 1.3636363636363636e-06, |
|
"loss": 6.855, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.05081157374735357, |
|
"grad_norm": 21.260774612426758, |
|
"learning_rate": 2.0454545454545457e-06, |
|
"loss": 7.4707, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.06774876499647142, |
|
"grad_norm": 16.885921478271484, |
|
"learning_rate": 2.7272727272727272e-06, |
|
"loss": 7.0187, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.08468595624558928, |
|
"grad_norm": 19.509899139404297, |
|
"learning_rate": 3.409090909090909e-06, |
|
"loss": 6.6756, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.10162314749470713, |
|
"grad_norm": 7.9427289962768555, |
|
"learning_rate": 4.0909090909090915e-06, |
|
"loss": 6.0155, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.11856033874382499, |
|
"grad_norm": 7.325345039367676, |
|
"learning_rate": 4.772727272727273e-06, |
|
"loss": 6.1644, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.13549752999294284, |
|
"grad_norm": 7.544689655303955, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 6.2158, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1524347212420607, |
|
"grad_norm": 5.141758918762207, |
|
"learning_rate": 6.136363636363637e-06, |
|
"loss": 6.1369, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1524347212420607, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.109375, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.5546875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.9544724822044373, |
|
"eval_VitaminC_cosine_ap": 0.5356492030729136, |
|
"eval_VitaminC_cosine_f1": 0.6542553191489362, |
|
"eval_VitaminC_cosine_f1_threshold": 0.7148199081420898, |
|
"eval_VitaminC_cosine_precision": 0.48616600790513836, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.55078125, |
|
"eval_VitaminC_dot_accuracy_threshold": 414.4264831542969, |
|
"eval_VitaminC_dot_ap": 0.5108219546857565, |
|
"eval_VitaminC_dot_f1": 0.6507936507936508, |
|
"eval_VitaminC_dot_f1_threshold": 271.6522521972656, |
|
"eval_VitaminC_dot_precision": 0.4823529411764706, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.55078125, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 6.519885063171387, |
|
"eval_VitaminC_euclidean_ap": 0.5226419655984281, |
|
"eval_VitaminC_euclidean_f1": 0.6505376344086021, |
|
"eval_VitaminC_euclidean_f1_threshold": 15.194067001342773, |
|
"eval_VitaminC_euclidean_precision": 0.4859437751004016, |
|
"eval_VitaminC_euclidean_recall": 0.983739837398374, |
|
"eval_VitaminC_manhattan_accuracy": 0.546875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 149.20114135742188, |
|
"eval_VitaminC_manhattan_ap": 0.5237451656134715, |
|
"eval_VitaminC_manhattan_f1": 0.6542553191489362, |
|
"eval_VitaminC_manhattan_f1_threshold": 259.007080078125, |
|
"eval_VitaminC_manhattan_precision": 0.48616600790513836, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.5546875, |
|
"eval_VitaminC_max_accuracy_threshold": 414.4264831542969, |
|
"eval_VitaminC_max_ap": 0.5356492030729136, |
|
"eval_VitaminC_max_f1": 0.6542553191489362, |
|
"eval_VitaminC_max_f1_threshold": 271.6522521972656, |
|
"eval_VitaminC_max_precision": 0.48616600790513836, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5356492030729136, |
|
"eval_sts-test_pearson_cosine": 0.056062031998983373, |
|
"eval_sts-test_pearson_dot": 0.2979259445723872, |
|
"eval_sts-test_pearson_euclidean": 0.0498319208592713, |
|
"eval_sts-test_pearson_manhattan": 0.07381429239121526, |
|
"eval_sts-test_pearson_max": 0.2979259445723872, |
|
"eval_sts-test_spearman_cosine": 0.1066788491614481, |
|
"eval_sts-test_spearman_dot": 0.315952670306405, |
|
"eval_sts-test_spearman_euclidean": 0.07303394554435191, |
|
"eval_sts-test_spearman_manhattan": 0.09039525717692232, |
|
"eval_sts-test_spearman_max": 0.315952670306405, |
|
"eval_vitaminc-pairs_loss": 2.698580741882324, |
|
"eval_vitaminc-pairs_runtime": 1.4747, |
|
"eval_vitaminc-pairs_samples_per_second": 73.236, |
|
"eval_vitaminc-pairs_steps_per_second": 1.356, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1524347212420607, |
|
"eval_negation-triplets_loss": 5.142906665802002, |
|
"eval_negation-triplets_runtime": 0.2993, |
|
"eval_negation-triplets_samples_per_second": 213.865, |
|
"eval_negation-triplets_steps_per_second": 3.342, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1524347212420607, |
|
"eval_scitail-pairs-pos_loss": 1.9216958284378052, |
|
"eval_scitail-pairs-pos_runtime": 0.3834, |
|
"eval_scitail-pairs-pos_samples_per_second": 140.842, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.608, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1524347212420607, |
|
"eval_xsum-pairs_loss": 6.073049545288086, |
|
"eval_xsum-pairs_runtime": 3.1587, |
|
"eval_xsum-pairs_samples_per_second": 40.523, |
|
"eval_xsum-pairs_steps_per_second": 0.633, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1524347212420607, |
|
"eval_sciq_pairs_loss": 0.3449864387512207, |
|
"eval_sciq_pairs_runtime": 3.3747, |
|
"eval_sciq_pairs_samples_per_second": 37.93, |
|
"eval_sciq_pairs_steps_per_second": 0.593, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1524347212420607, |
|
"eval_qasc_pairs_loss": 3.2267842292785645, |
|
"eval_qasc_pairs_runtime": 0.6576, |
|
"eval_qasc_pairs_samples_per_second": 194.646, |
|
"eval_qasc_pairs_steps_per_second": 3.041, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1524347212420607, |
|
"eval_openbookqa_pairs_loss": 4.405983924865723, |
|
"eval_openbookqa_pairs_runtime": 0.6107, |
|
"eval_openbookqa_pairs_samples_per_second": 209.594, |
|
"eval_openbookqa_pairs_steps_per_second": 3.275, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1524347212420607, |
|
"eval_msmarco_pairs_loss": 6.937691688537598, |
|
"eval_msmarco_pairs_runtime": 1.3091, |
|
"eval_msmarco_pairs_samples_per_second": 97.779, |
|
"eval_msmarco_pairs_steps_per_second": 1.528, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1524347212420607, |
|
"eval_nq_pairs_loss": 6.794108867645264, |
|
"eval_nq_pairs_runtime": 2.3968, |
|
"eval_nq_pairs_samples_per_second": 53.404, |
|
"eval_nq_pairs_steps_per_second": 0.834, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1524347212420607, |
|
"eval_trivia_pairs_loss": 6.3355631828308105, |
|
"eval_trivia_pairs_runtime": 4.4974, |
|
"eval_trivia_pairs_samples_per_second": 28.461, |
|
"eval_trivia_pairs_steps_per_second": 0.445, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1524347212420607, |
|
"eval_gooaq_pairs_loss": 6.405998706817627, |
|
"eval_gooaq_pairs_runtime": 0.8745, |
|
"eval_gooaq_pairs_samples_per_second": 146.37, |
|
"eval_gooaq_pairs_steps_per_second": 2.287, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.1524347212420607, |
|
"eval_paws-pos_loss": 2.2308223247528076, |
|
"eval_paws-pos_runtime": 0.6998, |
|
"eval_paws-pos_samples_per_second": 182.908, |
|
"eval_paws-pos_steps_per_second": 2.858, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.16937191249117856, |
|
"grad_norm": 5.885251522064209, |
|
"learning_rate": 6.818181818181818e-06, |
|
"loss": 5.7653, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1863091037402964, |
|
"grad_norm": 7.357480049133301, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 6.1259, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.20324629498941427, |
|
"grad_norm": 7.321795463562012, |
|
"learning_rate": 8.181818181818183e-06, |
|
"loss": 5.7539, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.22018348623853212, |
|
"grad_norm": 4.239792346954346, |
|
"learning_rate": 8.863636363636365e-06, |
|
"loss": 6.0131, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.23712067748764998, |
|
"grad_norm": 3.9554407596588135, |
|
"learning_rate": 9.545454545454547e-06, |
|
"loss": 6.0074, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.25405786873676783, |
|
"grad_norm": 4.406026840209961, |
|
"learning_rate": 1.0227272727272729e-05, |
|
"loss": 5.7125, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.2709950599858857, |
|
"grad_norm": 7.235893249511719, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 5.5634, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.28793225123500354, |
|
"grad_norm": 5.330288410186768, |
|
"learning_rate": 1.1590909090909093e-05, |
|
"loss": 5.2924, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.3048694424841214, |
|
"grad_norm": 7.216403961181641, |
|
"learning_rate": 1.2272727272727274e-05, |
|
"loss": 5.2286, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3048694424841214, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.046875, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.54296875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.9328227043151855, |
|
"eval_VitaminC_cosine_ap": 0.5212059026196154, |
|
"eval_VitaminC_cosine_f1": 0.6576819407008085, |
|
"eval_VitaminC_cosine_f1_threshold": 0.7373804450035095, |
|
"eval_VitaminC_cosine_precision": 0.49193548387096775, |
|
"eval_VitaminC_cosine_recall": 0.991869918699187, |
|
"eval_VitaminC_dot_accuracy": 0.55078125, |
|
"eval_VitaminC_dot_accuracy_threshold": 418.2774658203125, |
|
"eval_VitaminC_dot_ap": 0.5160594099493883, |
|
"eval_VitaminC_dot_f1": 0.6521739130434782, |
|
"eval_VitaminC_dot_f1_threshold": 291.5081481933594, |
|
"eval_VitaminC_dot_precision": 0.4897959183673469, |
|
"eval_VitaminC_dot_recall": 0.975609756097561, |
|
"eval_VitaminC_euclidean_accuracy": 0.5390625, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 8.120429039001465, |
|
"eval_VitaminC_euclidean_ap": 0.5224837623095228, |
|
"eval_VitaminC_euclidean_f1": 0.6576819407008085, |
|
"eval_VitaminC_euclidean_f1_threshold": 14.879999160766602, |
|
"eval_VitaminC_euclidean_precision": 0.49193548387096775, |
|
"eval_VitaminC_euclidean_recall": 0.991869918699187, |
|
"eval_VitaminC_manhattan_accuracy": 0.53515625, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 137.40658569335938, |
|
"eval_VitaminC_manhattan_ap": 0.5186382518671783, |
|
"eval_VitaminC_manhattan_f1": 0.6576086956521738, |
|
"eval_VitaminC_manhattan_f1_threshold": 263.32452392578125, |
|
"eval_VitaminC_manhattan_precision": 0.49387755102040815, |
|
"eval_VitaminC_manhattan_recall": 0.983739837398374, |
|
"eval_VitaminC_max_accuracy": 0.55078125, |
|
"eval_VitaminC_max_accuracy_threshold": 418.2774658203125, |
|
"eval_VitaminC_max_ap": 0.5224837623095228, |
|
"eval_VitaminC_max_f1": 0.6576819407008085, |
|
"eval_VitaminC_max_f1_threshold": 291.5081481933594, |
|
"eval_VitaminC_max_precision": 0.49387755102040815, |
|
"eval_VitaminC_max_recall": 0.991869918699187, |
|
"eval_sequential_score": 0.5224837623095228, |
|
"eval_sts-test_pearson_cosine": 0.14377091128453176, |
|
"eval_sts-test_pearson_dot": 0.24728387094758872, |
|
"eval_sts-test_pearson_euclidean": 0.14604155960515372, |
|
"eval_sts-test_pearson_manhattan": 0.1446467532231986, |
|
"eval_sts-test_pearson_max": 0.24728387094758872, |
|
"eval_sts-test_spearman_cosine": 0.1968510434344728, |
|
"eval_sts-test_spearman_dot": 0.29467218283745694, |
|
"eval_sts-test_spearman_euclidean": 0.17218164683969664, |
|
"eval_sts-test_spearman_manhattan": 0.17741843340856742, |
|
"eval_sts-test_spearman_max": 0.29467218283745694, |
|
"eval_vitaminc-pairs_loss": 2.664700746536255, |
|
"eval_vitaminc-pairs_runtime": 1.4487, |
|
"eval_vitaminc-pairs_samples_per_second": 74.551, |
|
"eval_vitaminc-pairs_steps_per_second": 1.381, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3048694424841214, |
|
"eval_negation-triplets_loss": 4.6218037605285645, |
|
"eval_negation-triplets_runtime": 0.2971, |
|
"eval_negation-triplets_samples_per_second": 215.438, |
|
"eval_negation-triplets_steps_per_second": 3.366, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3048694424841214, |
|
"eval_scitail-pairs-pos_loss": 1.2413936853408813, |
|
"eval_scitail-pairs-pos_runtime": 0.372, |
|
"eval_scitail-pairs-pos_samples_per_second": 145.175, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.688, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3048694424841214, |
|
"eval_xsum-pairs_loss": 5.249766826629639, |
|
"eval_xsum-pairs_runtime": 3.1506, |
|
"eval_xsum-pairs_samples_per_second": 40.627, |
|
"eval_xsum-pairs_steps_per_second": 0.635, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3048694424841214, |
|
"eval_sciq_pairs_loss": 0.2961578667163849, |
|
"eval_sciq_pairs_runtime": 3.2909, |
|
"eval_sciq_pairs_samples_per_second": 38.895, |
|
"eval_sciq_pairs_steps_per_second": 0.608, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3048694424841214, |
|
"eval_qasc_pairs_loss": 2.530872344970703, |
|
"eval_qasc_pairs_runtime": 0.6255, |
|
"eval_qasc_pairs_samples_per_second": 204.63, |
|
"eval_qasc_pairs_steps_per_second": 3.197, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3048694424841214, |
|
"eval_openbookqa_pairs_loss": 3.8855104446411133, |
|
"eval_openbookqa_pairs_runtime": 0.5742, |
|
"eval_openbookqa_pairs_samples_per_second": 222.914, |
|
"eval_openbookqa_pairs_steps_per_second": 3.483, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3048694424841214, |
|
"eval_msmarco_pairs_loss": 5.246406555175781, |
|
"eval_msmarco_pairs_runtime": 1.2872, |
|
"eval_msmarco_pairs_samples_per_second": 99.442, |
|
"eval_msmarco_pairs_steps_per_second": 1.554, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3048694424841214, |
|
"eval_nq_pairs_loss": 5.332630157470703, |
|
"eval_nq_pairs_runtime": 2.3739, |
|
"eval_nq_pairs_samples_per_second": 53.92, |
|
"eval_nq_pairs_steps_per_second": 0.843, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3048694424841214, |
|
"eval_trivia_pairs_loss": 5.647429943084717, |
|
"eval_trivia_pairs_runtime": 4.4729, |
|
"eval_trivia_pairs_samples_per_second": 28.617, |
|
"eval_trivia_pairs_steps_per_second": 0.447, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3048694424841214, |
|
"eval_gooaq_pairs_loss": 5.225871562957764, |
|
"eval_gooaq_pairs_runtime": 0.8715, |
|
"eval_gooaq_pairs_samples_per_second": 146.868, |
|
"eval_gooaq_pairs_steps_per_second": 2.295, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3048694424841214, |
|
"eval_paws-pos_loss": 0.8335962891578674, |
|
"eval_paws-pos_runtime": 0.6844, |
|
"eval_paws-pos_samples_per_second": 187.036, |
|
"eval_paws-pos_steps_per_second": 2.922, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.32180663373323926, |
|
"grad_norm": 6.847682952880859, |
|
"learning_rate": 1.2954545454545455e-05, |
|
"loss": 4.4811, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.3387438249823571, |
|
"grad_norm": 8.383002281188965, |
|
"learning_rate": 1.3636363636363637e-05, |
|
"loss": 4.4239, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.35568101623147497, |
|
"grad_norm": 7.014843463897705, |
|
"learning_rate": 1.431818181818182e-05, |
|
"loss": 4.0273, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.3726182074805928, |
|
"grad_norm": 5.9739885330200195, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 3.4508, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.3895553987297107, |
|
"grad_norm": 11.202752113342285, |
|
"learning_rate": 1.5681818181818182e-05, |
|
"loss": 3.9702, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.40649258997882853, |
|
"grad_norm": 7.064818859100342, |
|
"learning_rate": 1.6363636363636366e-05, |
|
"loss": 3.5295, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.4234297812279464, |
|
"grad_norm": 5.912719249725342, |
|
"learning_rate": 1.7045454545454546e-05, |
|
"loss": 3.6395, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.44036697247706424, |
|
"grad_norm": 5.033207893371582, |
|
"learning_rate": 1.772727272727273e-05, |
|
"loss": 3.2398, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.4573041637261821, |
|
"grad_norm": 5.218384265899658, |
|
"learning_rate": 1.840909090909091e-05, |
|
"loss": 3.116, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4573041637261821, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.5546875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.9041332006454468, |
|
"eval_VitaminC_cosine_ap": 0.5292859731465609, |
|
"eval_VitaminC_cosine_f1": 0.6542553191489362, |
|
"eval_VitaminC_cosine_f1_threshold": 0.452939510345459, |
|
"eval_VitaminC_cosine_precision": 0.48616600790513836, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.5546875, |
|
"eval_VitaminC_dot_accuracy_threshold": 414.42559814453125, |
|
"eval_VitaminC_dot_ap": 0.5222732504955002, |
|
"eval_VitaminC_dot_f1": 0.6542553191489362, |
|
"eval_VitaminC_dot_f1_threshold": 212.6934814453125, |
|
"eval_VitaminC_dot_precision": 0.48616600790513836, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.5546875, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 9.18377685546875, |
|
"eval_VitaminC_euclidean_ap": 0.5291787221346742, |
|
"eval_VitaminC_euclidean_f1": 0.6542553191489362, |
|
"eval_VitaminC_euclidean_f1_threshold": 22.683509826660156, |
|
"eval_VitaminC_euclidean_precision": 0.48616600790513836, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.55859375, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 173.8212127685547, |
|
"eval_VitaminC_manhattan_ap": 0.5305698453165033, |
|
"eval_VitaminC_manhattan_f1": 0.6542553191489362, |
|
"eval_VitaminC_manhattan_f1_threshold": 415.5366516113281, |
|
"eval_VitaminC_manhattan_precision": 0.48616600790513836, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.55859375, |
|
"eval_VitaminC_max_accuracy_threshold": 414.42559814453125, |
|
"eval_VitaminC_max_ap": 0.5305698453165033, |
|
"eval_VitaminC_max_f1": 0.6542553191489362, |
|
"eval_VitaminC_max_f1_threshold": 415.5366516113281, |
|
"eval_VitaminC_max_precision": 0.48616600790513836, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5305698453165033, |
|
"eval_sts-test_pearson_cosine": 0.45494716382349193, |
|
"eval_sts-test_pearson_dot": 0.44837123659858896, |
|
"eval_sts-test_pearson_euclidean": 0.4480861256491879, |
|
"eval_sts-test_pearson_manhattan": 0.4417008219313264, |
|
"eval_sts-test_pearson_max": 0.45494716382349193, |
|
"eval_sts-test_spearman_cosine": 0.48921418507251446, |
|
"eval_sts-test_spearman_dot": 0.46707725062744593, |
|
"eval_sts-test_spearman_euclidean": 0.4610824798409968, |
|
"eval_sts-test_spearman_manhattan": 0.46068648052845956, |
|
"eval_sts-test_spearman_max": 0.48921418507251446, |
|
"eval_vitaminc-pairs_loss": 2.5043575763702393, |
|
"eval_vitaminc-pairs_runtime": 1.4778, |
|
"eval_vitaminc-pairs_samples_per_second": 73.079, |
|
"eval_vitaminc-pairs_steps_per_second": 1.353, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4573041637261821, |
|
"eval_negation-triplets_loss": 3.4229447841644287, |
|
"eval_negation-triplets_runtime": 0.2991, |
|
"eval_negation-triplets_samples_per_second": 213.954, |
|
"eval_negation-triplets_steps_per_second": 3.343, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4573041637261821, |
|
"eval_scitail-pairs-pos_loss": 0.2784869372844696, |
|
"eval_scitail-pairs-pos_runtime": 0.3633, |
|
"eval_scitail-pairs-pos_samples_per_second": 148.649, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.753, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4573041637261821, |
|
"eval_xsum-pairs_loss": 2.428964614868164, |
|
"eval_xsum-pairs_runtime": 3.1548, |
|
"eval_xsum-pairs_samples_per_second": 40.573, |
|
"eval_xsum-pairs_steps_per_second": 0.634, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4573041637261821, |
|
"eval_sciq_pairs_loss": 0.15256048738956451, |
|
"eval_sciq_pairs_runtime": 3.2432, |
|
"eval_sciq_pairs_samples_per_second": 39.467, |
|
"eval_sciq_pairs_steps_per_second": 0.617, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4573041637261821, |
|
"eval_qasc_pairs_loss": 1.2902077436447144, |
|
"eval_qasc_pairs_runtime": 0.6211, |
|
"eval_qasc_pairs_samples_per_second": 206.085, |
|
"eval_qasc_pairs_steps_per_second": 3.22, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4573041637261821, |
|
"eval_openbookqa_pairs_loss": 2.4784862995147705, |
|
"eval_openbookqa_pairs_runtime": 0.5758, |
|
"eval_openbookqa_pairs_samples_per_second": 222.308, |
|
"eval_openbookqa_pairs_steps_per_second": 3.474, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4573041637261821, |
|
"eval_msmarco_pairs_loss": 2.967724084854126, |
|
"eval_msmarco_pairs_runtime": 1.2944, |
|
"eval_msmarco_pairs_samples_per_second": 98.885, |
|
"eval_msmarco_pairs_steps_per_second": 1.545, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4573041637261821, |
|
"eval_nq_pairs_loss": 3.358661413192749, |
|
"eval_nq_pairs_runtime": 2.3827, |
|
"eval_nq_pairs_samples_per_second": 53.722, |
|
"eval_nq_pairs_steps_per_second": 0.839, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4573041637261821, |
|
"eval_trivia_pairs_loss": 3.1391680240631104, |
|
"eval_trivia_pairs_runtime": 4.4155, |
|
"eval_trivia_pairs_samples_per_second": 28.989, |
|
"eval_trivia_pairs_steps_per_second": 0.453, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4573041637261821, |
|
"eval_gooaq_pairs_loss": 2.8774912357330322, |
|
"eval_gooaq_pairs_runtime": 0.8746, |
|
"eval_gooaq_pairs_samples_per_second": 146.346, |
|
"eval_gooaq_pairs_steps_per_second": 2.287, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4573041637261821, |
|
"eval_paws-pos_loss": 0.19754411280155182, |
|
"eval_paws-pos_runtime": 0.684, |
|
"eval_paws-pos_samples_per_second": 187.141, |
|
"eval_paws-pos_steps_per_second": 2.924, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.47424135497529996, |
|
"grad_norm": 5.149569988250732, |
|
"learning_rate": 1.9090909090909094e-05, |
|
"loss": 2.6049, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.4911785462244178, |
|
"grad_norm": 5.012928009033203, |
|
"learning_rate": 1.9772727272727274e-05, |
|
"loss": 2.7738, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.5081157374735357, |
|
"grad_norm": 4.880725383758545, |
|
"learning_rate": 2.0454545454545457e-05, |
|
"loss": 2.5416, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5250529287226535, |
|
"grad_norm": 5.618528366088867, |
|
"learning_rate": 2.113636363636364e-05, |
|
"loss": 2.3913, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.5419901199717714, |
|
"grad_norm": 5.020515441894531, |
|
"learning_rate": 2.1818181818181818e-05, |
|
"loss": 2.3144, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.5589273112208892, |
|
"grad_norm": 4.818451404571533, |
|
"learning_rate": 2.25e-05, |
|
"loss": 2.1857, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.5758645024700071, |
|
"grad_norm": 5.094771385192871, |
|
"learning_rate": 2.3181818181818185e-05, |
|
"loss": 1.8881, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.592801693719125, |
|
"grad_norm": 3.795962333679199, |
|
"learning_rate": 2.3863636363636365e-05, |
|
"loss": 2.2699, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6097388849682428, |
|
"grad_norm": 4.46245813369751, |
|
"learning_rate": 2.454545454545455e-05, |
|
"loss": 2.1425, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6097388849682428, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.5546875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8830112218856812, |
|
"eval_VitaminC_cosine_ap": 0.5302172957740995, |
|
"eval_VitaminC_cosine_f1": 0.6558265582655827, |
|
"eval_VitaminC_cosine_f1_threshold": 0.5253933668136597, |
|
"eval_VitaminC_cosine_precision": 0.491869918699187, |
|
"eval_VitaminC_cosine_recall": 0.983739837398374, |
|
"eval_VitaminC_dot_accuracy": 0.5390625, |
|
"eval_VitaminC_dot_accuracy_threshold": 427.5576171875, |
|
"eval_VitaminC_dot_ap": 0.517120157327104, |
|
"eval_VitaminC_dot_f1": 0.6542553191489362, |
|
"eval_VitaminC_dot_f1_threshold": 175.80963134765625, |
|
"eval_VitaminC_dot_precision": 0.48616600790513836, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.5625, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 10.817148208618164, |
|
"eval_VitaminC_euclidean_ap": 0.532255112376416, |
|
"eval_VitaminC_euclidean_f1": 0.6558265582655827, |
|
"eval_VitaminC_euclidean_f1_threshold": 21.10729217529297, |
|
"eval_VitaminC_euclidean_precision": 0.491869918699187, |
|
"eval_VitaminC_euclidean_recall": 0.983739837398374, |
|
"eval_VitaminC_manhattan_accuracy": 0.5546875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 224.70416259765625, |
|
"eval_VitaminC_manhattan_ap": 0.5298930718604624, |
|
"eval_VitaminC_manhattan_f1": 0.6558265582655827, |
|
"eval_VitaminC_manhattan_f1_threshold": 415.3311767578125, |
|
"eval_VitaminC_manhattan_precision": 0.491869918699187, |
|
"eval_VitaminC_manhattan_recall": 0.983739837398374, |
|
"eval_VitaminC_max_accuracy": 0.5625, |
|
"eval_VitaminC_max_accuracy_threshold": 427.5576171875, |
|
"eval_VitaminC_max_ap": 0.532255112376416, |
|
"eval_VitaminC_max_f1": 0.6558265582655827, |
|
"eval_VitaminC_max_f1_threshold": 415.3311767578125, |
|
"eval_VitaminC_max_precision": 0.491869918699187, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.532255112376416, |
|
"eval_sts-test_pearson_cosine": 0.755963151531783, |
|
"eval_sts-test_pearson_dot": 0.7384823091540473, |
|
"eval_sts-test_pearson_euclidean": 0.764089555623164, |
|
"eval_sts-test_pearson_manhattan": 0.7670467479701304, |
|
"eval_sts-test_pearson_max": 0.7670467479701304, |
|
"eval_sts-test_spearman_cosine": 0.7806331583677342, |
|
"eval_sts-test_spearman_dot": 0.7442842883778696, |
|
"eval_sts-test_spearman_euclidean": 0.7674205303105437, |
|
"eval_sts-test_spearman_manhattan": 0.7664974867050092, |
|
"eval_sts-test_spearman_max": 0.7806331583677342, |
|
"eval_vitaminc-pairs_loss": 2.721674919128418, |
|
"eval_vitaminc-pairs_runtime": 1.4468, |
|
"eval_vitaminc-pairs_samples_per_second": 74.65, |
|
"eval_vitaminc-pairs_steps_per_second": 1.382, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6097388849682428, |
|
"eval_negation-triplets_loss": 2.338909387588501, |
|
"eval_negation-triplets_runtime": 0.3017, |
|
"eval_negation-triplets_samples_per_second": 212.101, |
|
"eval_negation-triplets_steps_per_second": 3.314, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6097388849682428, |
|
"eval_scitail-pairs-pos_loss": 0.23291125893592834, |
|
"eval_scitail-pairs-pos_runtime": 0.3664, |
|
"eval_scitail-pairs-pos_samples_per_second": 147.385, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.729, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6097388849682428, |
|
"eval_xsum-pairs_loss": 1.2065516710281372, |
|
"eval_xsum-pairs_runtime": 3.1488, |
|
"eval_xsum-pairs_samples_per_second": 40.65, |
|
"eval_xsum-pairs_steps_per_second": 0.635, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6097388849682428, |
|
"eval_sciq_pairs_loss": 0.09487833082675934, |
|
"eval_sciq_pairs_runtime": 3.2618, |
|
"eval_sciq_pairs_samples_per_second": 39.242, |
|
"eval_sciq_pairs_steps_per_second": 0.613, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6097388849682428, |
|
"eval_qasc_pairs_loss": 0.8461999297142029, |
|
"eval_qasc_pairs_runtime": 0.6246, |
|
"eval_qasc_pairs_samples_per_second": 204.93, |
|
"eval_qasc_pairs_steps_per_second": 3.202, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6097388849682428, |
|
"eval_openbookqa_pairs_loss": 1.5739191770553589, |
|
"eval_openbookqa_pairs_runtime": 0.5751, |
|
"eval_openbookqa_pairs_samples_per_second": 222.568, |
|
"eval_openbookqa_pairs_steps_per_second": 3.478, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6097388849682428, |
|
"eval_msmarco_pairs_loss": 1.6446179151535034, |
|
"eval_msmarco_pairs_runtime": 1.2828, |
|
"eval_msmarco_pairs_samples_per_second": 99.784, |
|
"eval_msmarco_pairs_steps_per_second": 1.559, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6097388849682428, |
|
"eval_nq_pairs_loss": 2.364896535873413, |
|
"eval_nq_pairs_runtime": 2.3802, |
|
"eval_nq_pairs_samples_per_second": 53.777, |
|
"eval_nq_pairs_steps_per_second": 0.84, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6097388849682428, |
|
"eval_trivia_pairs_loss": 1.7080069780349731, |
|
"eval_trivia_pairs_runtime": 4.4372, |
|
"eval_trivia_pairs_samples_per_second": 28.847, |
|
"eval_trivia_pairs_steps_per_second": 0.451, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6097388849682428, |
|
"eval_gooaq_pairs_loss": 1.7924479246139526, |
|
"eval_gooaq_pairs_runtime": 0.8761, |
|
"eval_gooaq_pairs_samples_per_second": 146.094, |
|
"eval_gooaq_pairs_steps_per_second": 2.283, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6097388849682428, |
|
"eval_paws-pos_loss": 0.08000019192695618, |
|
"eval_paws-pos_runtime": 0.6839, |
|
"eval_paws-pos_samples_per_second": 187.168, |
|
"eval_paws-pos_steps_per_second": 2.924, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.6266760762173607, |
|
"grad_norm": 4.418070316314697, |
|
"learning_rate": 2.5227272727272732e-05, |
|
"loss": 2.1276, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.6436132674664785, |
|
"grad_norm": 4.3495259284973145, |
|
"learning_rate": 2.590909090909091e-05, |
|
"loss": 1.7531, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.6605504587155964, |
|
"grad_norm": 4.294332027435303, |
|
"learning_rate": 2.6590909090909093e-05, |
|
"loss": 2.0179, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.6774876499647142, |
|
"grad_norm": 3.4215610027313232, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 1.5305, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6944248412138321, |
|
"grad_norm": 4.37844181060791, |
|
"learning_rate": 2.7954545454545457e-05, |
|
"loss": 1.6925, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.7113620324629499, |
|
"grad_norm": 4.019878387451172, |
|
"learning_rate": 2.863636363636364e-05, |
|
"loss": 1.5248, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.7282992237120678, |
|
"grad_norm": 4.662445068359375, |
|
"learning_rate": 2.931818181818182e-05, |
|
"loss": 1.523, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.7452364149611856, |
|
"grad_norm": 4.6323161125183105, |
|
"learning_rate": 3.0000000000000004e-05, |
|
"loss": 1.5474, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.7621736062103035, |
|
"grad_norm": 4.586575984954834, |
|
"learning_rate": 3.068181818181819e-05, |
|
"loss": 1.7221, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7621736062103035, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.56640625, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8478574156761169, |
|
"eval_VitaminC_cosine_ap": 0.5325579595957614, |
|
"eval_VitaminC_cosine_f1": 0.6559999999999999, |
|
"eval_VitaminC_cosine_f1_threshold": 0.35839784145355225, |
|
"eval_VitaminC_cosine_precision": 0.4880952380952381, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.5625, |
|
"eval_VitaminC_dot_accuracy_threshold": 366.9839172363281, |
|
"eval_VitaminC_dot_ap": 0.5326813797607027, |
|
"eval_VitaminC_dot_f1": 0.6559999999999999, |
|
"eval_VitaminC_dot_f1_threshold": 157.35829162597656, |
|
"eval_VitaminC_dot_precision": 0.4880952380952381, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.5625, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 12.044445037841797, |
|
"eval_VitaminC_euclidean_ap": 0.5304103559932005, |
|
"eval_VitaminC_euclidean_f1": 0.6542553191489362, |
|
"eval_VitaminC_euclidean_f1_threshold": 24.461441040039062, |
|
"eval_VitaminC_euclidean_precision": 0.48616600790513836, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.5625, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 239.24815368652344, |
|
"eval_VitaminC_manhattan_ap": 0.5314780667834758, |
|
"eval_VitaminC_manhattan_f1": 0.6575342465753424, |
|
"eval_VitaminC_manhattan_f1_threshold": 400.6834716796875, |
|
"eval_VitaminC_manhattan_precision": 0.49586776859504134, |
|
"eval_VitaminC_manhattan_recall": 0.975609756097561, |
|
"eval_VitaminC_max_accuracy": 0.56640625, |
|
"eval_VitaminC_max_accuracy_threshold": 366.9839172363281, |
|
"eval_VitaminC_max_ap": 0.5326813797607027, |
|
"eval_VitaminC_max_f1": 0.6575342465753424, |
|
"eval_VitaminC_max_f1_threshold": 400.6834716796875, |
|
"eval_VitaminC_max_precision": 0.49586776859504134, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5326813797607027, |
|
"eval_sts-test_pearson_cosine": 0.7919597804368175, |
|
"eval_sts-test_pearson_dot": 0.7994867531185785, |
|
"eval_sts-test_pearson_euclidean": 0.8117960113303863, |
|
"eval_sts-test_pearson_manhattan": 0.8144714466358016, |
|
"eval_sts-test_pearson_max": 0.8144714466358016, |
|
"eval_sts-test_spearman_cosine": 0.831478610786181, |
|
"eval_sts-test_spearman_dot": 0.8192534746855707, |
|
"eval_sts-test_spearman_euclidean": 0.8185577905406703, |
|
"eval_sts-test_spearman_manhattan": 0.8154771593606782, |
|
"eval_sts-test_spearman_max": 0.831478610786181, |
|
"eval_vitaminc-pairs_loss": 2.852091073989868, |
|
"eval_vitaminc-pairs_runtime": 1.4427, |
|
"eval_vitaminc-pairs_samples_per_second": 74.858, |
|
"eval_vitaminc-pairs_steps_per_second": 1.386, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7621736062103035, |
|
"eval_negation-triplets_loss": 2.074247121810913, |
|
"eval_negation-triplets_runtime": 0.3, |
|
"eval_negation-triplets_samples_per_second": 213.353, |
|
"eval_negation-triplets_steps_per_second": 3.334, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7621736062103035, |
|
"eval_scitail-pairs-pos_loss": 0.2149849385023117, |
|
"eval_scitail-pairs-pos_runtime": 0.3744, |
|
"eval_scitail-pairs-pos_samples_per_second": 144.219, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.671, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7621736062103035, |
|
"eval_xsum-pairs_loss": 0.7706837058067322, |
|
"eval_xsum-pairs_runtime": 3.1609, |
|
"eval_xsum-pairs_samples_per_second": 40.495, |
|
"eval_xsum-pairs_steps_per_second": 0.633, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7621736062103035, |
|
"eval_sciq_pairs_loss": 0.07513368874788284, |
|
"eval_sciq_pairs_runtime": 3.2949, |
|
"eval_sciq_pairs_samples_per_second": 38.848, |
|
"eval_sciq_pairs_steps_per_second": 0.607, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7621736062103035, |
|
"eval_qasc_pairs_loss": 0.6355602741241455, |
|
"eval_qasc_pairs_runtime": 0.6392, |
|
"eval_qasc_pairs_samples_per_second": 200.246, |
|
"eval_qasc_pairs_steps_per_second": 3.129, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7621736062103035, |
|
"eval_openbookqa_pairs_loss": 1.4014525413513184, |
|
"eval_openbookqa_pairs_runtime": 0.622, |
|
"eval_openbookqa_pairs_samples_per_second": 205.786, |
|
"eval_openbookqa_pairs_steps_per_second": 3.215, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7621736062103035, |
|
"eval_msmarco_pairs_loss": 1.1524099111557007, |
|
"eval_msmarco_pairs_runtime": 1.31, |
|
"eval_msmarco_pairs_samples_per_second": 97.709, |
|
"eval_msmarco_pairs_steps_per_second": 1.527, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7621736062103035, |
|
"eval_nq_pairs_loss": 1.7768574953079224, |
|
"eval_nq_pairs_runtime": 2.3979, |
|
"eval_nq_pairs_samples_per_second": 53.379, |
|
"eval_nq_pairs_steps_per_second": 0.834, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7621736062103035, |
|
"eval_trivia_pairs_loss": 1.4495295286178589, |
|
"eval_trivia_pairs_runtime": 4.4194, |
|
"eval_trivia_pairs_samples_per_second": 28.964, |
|
"eval_trivia_pairs_steps_per_second": 0.453, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7621736062103035, |
|
"eval_gooaq_pairs_loss": 1.3955378532409668, |
|
"eval_gooaq_pairs_runtime": 0.8788, |
|
"eval_gooaq_pairs_samples_per_second": 145.649, |
|
"eval_gooaq_pairs_steps_per_second": 2.276, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7621736062103035, |
|
"eval_paws-pos_loss": 0.06006813049316406, |
|
"eval_paws-pos_runtime": 0.6896, |
|
"eval_paws-pos_samples_per_second": 185.603, |
|
"eval_paws-pos_steps_per_second": 2.9, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.7791107974594214, |
|
"grad_norm": 3.864208936691284, |
|
"learning_rate": 3.1363636363636365e-05, |
|
"loss": 1.5366, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.7960479887085392, |
|
"grad_norm": 3.837550640106201, |
|
"learning_rate": 3.204545454545455e-05, |
|
"loss": 1.3045, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.8129851799576571, |
|
"grad_norm": 3.5258102416992188, |
|
"learning_rate": 3.272727272727273e-05, |
|
"loss": 1.1999, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.8299223712067749, |
|
"grad_norm": 3.4431183338165283, |
|
"learning_rate": 3.340909090909091e-05, |
|
"loss": 1.3483, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.8468595624558928, |
|
"grad_norm": 3.6455864906311035, |
|
"learning_rate": 3.409090909090909e-05, |
|
"loss": 1.2009, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8637967537050106, |
|
"grad_norm": 4.508525371551514, |
|
"learning_rate": 3.4772727272727276e-05, |
|
"loss": 1.4495, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.8807339449541285, |
|
"grad_norm": 3.0432400703430176, |
|
"learning_rate": 3.545454545454546e-05, |
|
"loss": 1.2329, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.8976711362032463, |
|
"grad_norm": 3.0190365314483643, |
|
"learning_rate": 3.613636363636364e-05, |
|
"loss": 1.1905, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.9146083274523642, |
|
"grad_norm": 3.74668288230896, |
|
"learning_rate": 3.681818181818182e-05, |
|
"loss": 1.277, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9146083274523642, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.57421875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8101799488067627, |
|
"eval_VitaminC_cosine_ap": 0.5298515171639175, |
|
"eval_VitaminC_cosine_f1": 0.6542553191489362, |
|
"eval_VitaminC_cosine_f1_threshold": 0.345889687538147, |
|
"eval_VitaminC_cosine_precision": 0.48616600790513836, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.55078125, |
|
"eval_VitaminC_dot_accuracy_threshold": 373.5804443359375, |
|
"eval_VitaminC_dot_ap": 0.5310954683437364, |
|
"eval_VitaminC_dot_f1": 0.6542553191489362, |
|
"eval_VitaminC_dot_f1_threshold": 155.41326904296875, |
|
"eval_VitaminC_dot_precision": 0.48616600790513836, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.57421875, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.60124683380127, |
|
"eval_VitaminC_euclidean_ap": 0.5286057955992807, |
|
"eval_VitaminC_euclidean_f1": 0.6577540106951871, |
|
"eval_VitaminC_euclidean_f1_threshold": 22.904512405395508, |
|
"eval_VitaminC_euclidean_precision": 0.4900398406374502, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.57421875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 262.37322998046875, |
|
"eval_VitaminC_manhattan_ap": 0.5253560845853567, |
|
"eval_VitaminC_manhattan_f1": 0.6559999999999999, |
|
"eval_VitaminC_manhattan_f1_threshold": 465.94549560546875, |
|
"eval_VitaminC_manhattan_precision": 0.4880952380952381, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.57421875, |
|
"eval_VitaminC_max_accuracy_threshold": 373.5804443359375, |
|
"eval_VitaminC_max_ap": 0.5310954683437364, |
|
"eval_VitaminC_max_f1": 0.6577540106951871, |
|
"eval_VitaminC_max_f1_threshold": 465.94549560546875, |
|
"eval_VitaminC_max_precision": 0.4900398406374502, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5310954683437364, |
|
"eval_sts-test_pearson_cosine": 0.8067612938723231, |
|
"eval_sts-test_pearson_dot": 0.8217874837658639, |
|
"eval_sts-test_pearson_euclidean": 0.827948115812785, |
|
"eval_sts-test_pearson_manhattan": 0.8261527694953693, |
|
"eval_sts-test_pearson_max": 0.827948115812785, |
|
"eval_sts-test_spearman_cosine": 0.8547777638284432, |
|
"eval_sts-test_spearman_dot": 0.8498786150097738, |
|
"eval_sts-test_spearman_euclidean": 0.8373845860667446, |
|
"eval_sts-test_spearman_manhattan": 0.8324507067477893, |
|
"eval_sts-test_spearman_max": 0.8547777638284432, |
|
"eval_vitaminc-pairs_loss": 2.776399612426758, |
|
"eval_vitaminc-pairs_runtime": 1.4503, |
|
"eval_vitaminc-pairs_samples_per_second": 74.467, |
|
"eval_vitaminc-pairs_steps_per_second": 1.379, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9146083274523642, |
|
"eval_negation-triplets_loss": 2.005451202392578, |
|
"eval_negation-triplets_runtime": 0.2981, |
|
"eval_negation-triplets_samples_per_second": 214.709, |
|
"eval_negation-triplets_steps_per_second": 3.355, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9146083274523642, |
|
"eval_scitail-pairs-pos_loss": 0.19877880811691284, |
|
"eval_scitail-pairs-pos_runtime": 0.3623, |
|
"eval_scitail-pairs-pos_samples_per_second": 149.043, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.76, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9146083274523642, |
|
"eval_xsum-pairs_loss": 0.5586928725242615, |
|
"eval_xsum-pairs_runtime": 3.1466, |
|
"eval_xsum-pairs_samples_per_second": 40.679, |
|
"eval_xsum-pairs_steps_per_second": 0.636, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9146083274523642, |
|
"eval_sciq_pairs_loss": 0.06038254499435425, |
|
"eval_sciq_pairs_runtime": 3.4092, |
|
"eval_sciq_pairs_samples_per_second": 37.545, |
|
"eval_sciq_pairs_steps_per_second": 0.587, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9146083274523642, |
|
"eval_qasc_pairs_loss": 0.49434012174606323, |
|
"eval_qasc_pairs_runtime": 0.6342, |
|
"eval_qasc_pairs_samples_per_second": 201.832, |
|
"eval_qasc_pairs_steps_per_second": 3.154, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9146083274523642, |
|
"eval_openbookqa_pairs_loss": 1.1903400421142578, |
|
"eval_openbookqa_pairs_runtime": 0.5754, |
|
"eval_openbookqa_pairs_samples_per_second": 222.449, |
|
"eval_openbookqa_pairs_steps_per_second": 3.476, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9146083274523642, |
|
"eval_msmarco_pairs_loss": 0.8656420707702637, |
|
"eval_msmarco_pairs_runtime": 1.2858, |
|
"eval_msmarco_pairs_samples_per_second": 99.547, |
|
"eval_msmarco_pairs_steps_per_second": 1.555, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9146083274523642, |
|
"eval_nq_pairs_loss": 1.1553651094436646, |
|
"eval_nq_pairs_runtime": 2.3754, |
|
"eval_nq_pairs_samples_per_second": 53.885, |
|
"eval_nq_pairs_steps_per_second": 0.842, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9146083274523642, |
|
"eval_trivia_pairs_loss": 1.2928619384765625, |
|
"eval_trivia_pairs_runtime": 4.4084, |
|
"eval_trivia_pairs_samples_per_second": 29.035, |
|
"eval_trivia_pairs_steps_per_second": 0.454, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9146083274523642, |
|
"eval_gooaq_pairs_loss": 1.1580811738967896, |
|
"eval_gooaq_pairs_runtime": 0.8731, |
|
"eval_gooaq_pairs_samples_per_second": 146.607, |
|
"eval_gooaq_pairs_steps_per_second": 2.291, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9146083274523642, |
|
"eval_paws-pos_loss": 0.052534349262714386, |
|
"eval_paws-pos_runtime": 0.6835, |
|
"eval_paws-pos_samples_per_second": 187.258, |
|
"eval_paws-pos_steps_per_second": 2.926, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.9315455187014821, |
|
"grad_norm": 4.7817864418029785, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 1.339, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.9484827099505999, |
|
"grad_norm": 4.000570774078369, |
|
"learning_rate": 3.818181818181819e-05, |
|
"loss": 1.1535, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.9654199011997178, |
|
"grad_norm": 3.5971670150756836, |
|
"learning_rate": 3.8863636363636364e-05, |
|
"loss": 1.1643, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.9823570924488356, |
|
"grad_norm": 3.6582131385803223, |
|
"learning_rate": 3.954545454545455e-05, |
|
"loss": 1.2221, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.9992942836979535, |
|
"grad_norm": 4.0953898429870605, |
|
"learning_rate": 3.9999477905707075e-05, |
|
"loss": 1.0974, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.0162314749470713, |
|
"grad_norm": 4.092026233673096, |
|
"learning_rate": 3.999164730903481e-05, |
|
"loss": 1.0984, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0331686661961892, |
|
"grad_norm": 3.6480906009674072, |
|
"learning_rate": 3.997442539262898e-05, |
|
"loss": 1.0543, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.050105857445307, |
|
"grad_norm": 3.433056592941284, |
|
"learning_rate": 3.99478242943326e-05, |
|
"loss": 1.0994, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.067043048694425, |
|
"grad_norm": 3.507981777191162, |
|
"learning_rate": 3.991186276234698e-05, |
|
"loss": 1.0621, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.067043048694425, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.578125, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7840081453323364, |
|
"eval_VitaminC_cosine_ap": 0.5400770399437144, |
|
"eval_VitaminC_cosine_f1": 0.6577540106951871, |
|
"eval_VitaminC_cosine_f1_threshold": 0.39448243379592896, |
|
"eval_VitaminC_cosine_precision": 0.4900398406374502, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.5625, |
|
"eval_VitaminC_dot_accuracy_threshold": 323.20281982421875, |
|
"eval_VitaminC_dot_ap": 0.5420016101916201, |
|
"eval_VitaminC_dot_f1": 0.6575342465753424, |
|
"eval_VitaminC_dot_f1_threshold": 198.04354858398438, |
|
"eval_VitaminC_dot_precision": 0.49586776859504134, |
|
"eval_VitaminC_dot_recall": 0.975609756097561, |
|
"eval_VitaminC_euclidean_accuracy": 0.5859375, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.84214973449707, |
|
"eval_VitaminC_euclidean_ap": 0.5392157650683609, |
|
"eval_VitaminC_euclidean_f1": 0.6577540106951871, |
|
"eval_VitaminC_euclidean_f1_threshold": 22.595678329467773, |
|
"eval_VitaminC_euclidean_precision": 0.4900398406374502, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.5703125, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 275.1253356933594, |
|
"eval_VitaminC_manhattan_ap": 0.5341380380767263, |
|
"eval_VitaminC_manhattan_f1": 0.6576819407008085, |
|
"eval_VitaminC_manhattan_f1_threshold": 457.04986572265625, |
|
"eval_VitaminC_manhattan_precision": 0.49193548387096775, |
|
"eval_VitaminC_manhattan_recall": 0.991869918699187, |
|
"eval_VitaminC_max_accuracy": 0.5859375, |
|
"eval_VitaminC_max_accuracy_threshold": 323.20281982421875, |
|
"eval_VitaminC_max_ap": 0.5420016101916201, |
|
"eval_VitaminC_max_f1": 0.6577540106951871, |
|
"eval_VitaminC_max_f1_threshold": 457.04986572265625, |
|
"eval_VitaminC_max_precision": 0.49586776859504134, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5420016101916201, |
|
"eval_sts-test_pearson_cosine": 0.8193410747427454, |
|
"eval_sts-test_pearson_dot": 0.8275444476338831, |
|
"eval_sts-test_pearson_euclidean": 0.8464528142983967, |
|
"eval_sts-test_pearson_manhattan": 0.8440476980962159, |
|
"eval_sts-test_pearson_max": 0.8464528142983967, |
|
"eval_sts-test_spearman_cosine": 0.8680272706642878, |
|
"eval_sts-test_spearman_dot": 0.8555529342729671, |
|
"eval_sts-test_spearman_euclidean": 0.8542457068859202, |
|
"eval_sts-test_spearman_manhattan": 0.8510265117511795, |
|
"eval_sts-test_spearman_max": 0.8680272706642878, |
|
"eval_vitaminc-pairs_loss": 2.6755428314208984, |
|
"eval_vitaminc-pairs_runtime": 1.4509, |
|
"eval_vitaminc-pairs_samples_per_second": 74.437, |
|
"eval_vitaminc-pairs_steps_per_second": 1.378, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.067043048694425, |
|
"eval_negation-triplets_loss": 1.9071491956710815, |
|
"eval_negation-triplets_runtime": 0.3051, |
|
"eval_negation-triplets_samples_per_second": 209.756, |
|
"eval_negation-triplets_steps_per_second": 3.277, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.067043048694425, |
|
"eval_scitail-pairs-pos_loss": 0.18539850413799286, |
|
"eval_scitail-pairs-pos_runtime": 0.4199, |
|
"eval_scitail-pairs-pos_samples_per_second": 128.604, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.382, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.067043048694425, |
|
"eval_xsum-pairs_loss": 0.38365328311920166, |
|
"eval_xsum-pairs_runtime": 3.1907, |
|
"eval_xsum-pairs_samples_per_second": 40.116, |
|
"eval_xsum-pairs_steps_per_second": 0.627, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.067043048694425, |
|
"eval_sciq_pairs_loss": 0.05558515340089798, |
|
"eval_sciq_pairs_runtime": 3.2891, |
|
"eval_sciq_pairs_samples_per_second": 38.917, |
|
"eval_sciq_pairs_steps_per_second": 0.608, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.067043048694425, |
|
"eval_qasc_pairs_loss": 0.40469691157341003, |
|
"eval_qasc_pairs_runtime": 0.6267, |
|
"eval_qasc_pairs_samples_per_second": 204.245, |
|
"eval_qasc_pairs_steps_per_second": 3.191, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.067043048694425, |
|
"eval_openbookqa_pairs_loss": 1.0837312936782837, |
|
"eval_openbookqa_pairs_runtime": 0.5765, |
|
"eval_openbookqa_pairs_samples_per_second": 222.02, |
|
"eval_openbookqa_pairs_steps_per_second": 3.469, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.067043048694425, |
|
"eval_msmarco_pairs_loss": 0.6897398233413696, |
|
"eval_msmarco_pairs_runtime": 1.2918, |
|
"eval_msmarco_pairs_samples_per_second": 99.089, |
|
"eval_msmarco_pairs_steps_per_second": 1.548, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.067043048694425, |
|
"eval_nq_pairs_loss": 0.9603796601295471, |
|
"eval_nq_pairs_runtime": 2.3975, |
|
"eval_nq_pairs_samples_per_second": 53.39, |
|
"eval_nq_pairs_steps_per_second": 0.834, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.067043048694425, |
|
"eval_trivia_pairs_loss": 1.200446605682373, |
|
"eval_trivia_pairs_runtime": 4.4582, |
|
"eval_trivia_pairs_samples_per_second": 28.711, |
|
"eval_trivia_pairs_steps_per_second": 0.449, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.067043048694425, |
|
"eval_gooaq_pairs_loss": 1.0353316068649292, |
|
"eval_gooaq_pairs_runtime": 0.8765, |
|
"eval_gooaq_pairs_samples_per_second": 146.042, |
|
"eval_gooaq_pairs_steps_per_second": 2.282, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.067043048694425, |
|
"eval_paws-pos_loss": 0.042069558054208755, |
|
"eval_paws-pos_runtime": 0.6909, |
|
"eval_paws-pos_samples_per_second": 185.263, |
|
"eval_paws-pos_steps_per_second": 2.895, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.0839802399435428, |
|
"grad_norm": 2.979419469833374, |
|
"learning_rate": 3.986656614201813e-05, |
|
"loss": 0.8724, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.1009174311926606, |
|
"grad_norm": 2.835219144821167, |
|
"learning_rate": 3.981196635797361e-05, |
|
"loss": 0.9381, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.1178546224417785, |
|
"grad_norm": 3.6650869846343994, |
|
"learning_rate": 3.974810189162238e-05, |
|
"loss": 0.9617, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.1347918136908963, |
|
"grad_norm": 4.188896656036377, |
|
"learning_rate": 3.967501775403343e-05, |
|
"loss": 1.0139, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 1.1517290049400142, |
|
"grad_norm": 3.1624915599823, |
|
"learning_rate": 3.959276545421244e-05, |
|
"loss": 1.1073, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.168666196189132, |
|
"grad_norm": 3.245002508163452, |
|
"learning_rate": 3.950140296279871e-05, |
|
"loss": 0.8365, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 1.18560338743825, |
|
"grad_norm": 4.376185894012451, |
|
"learning_rate": 3.9400994671208e-05, |
|
"loss": 1.1012, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.2025405786873677, |
|
"grad_norm": 3.236583948135376, |
|
"learning_rate": 3.9291611346250066e-05, |
|
"loss": 1.0016, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 1.2194777699364856, |
|
"grad_norm": 3.7601733207702637, |
|
"learning_rate": 3.9173330080252904e-05, |
|
"loss": 1.0957, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.2194777699364856, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.57421875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7863086462020874, |
|
"eval_VitaminC_cosine_ap": 0.538511783260847, |
|
"eval_VitaminC_cosine_f1": 0.6577540106951871, |
|
"eval_VitaminC_cosine_f1_threshold": 0.4006580412387848, |
|
"eval_VitaminC_cosine_precision": 0.4900398406374502, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.57421875, |
|
"eval_VitaminC_dot_accuracy_threshold": 323.53277587890625, |
|
"eval_VitaminC_dot_ap": 0.5304994537787167, |
|
"eval_VitaminC_dot_f1": 0.6577540106951871, |
|
"eval_VitaminC_dot_f1_threshold": 166.45921325683594, |
|
"eval_VitaminC_dot_precision": 0.4900398406374502, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.578125, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.631423950195312, |
|
"eval_VitaminC_euclidean_ap": 0.5363284984763951, |
|
"eval_VitaminC_euclidean_f1": 0.6542553191489362, |
|
"eval_VitaminC_euclidean_f1_threshold": 25.392715454101562, |
|
"eval_VitaminC_euclidean_precision": 0.48616600790513836, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.57421875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 283.5897216796875, |
|
"eval_VitaminC_manhattan_ap": 0.5327191155331534, |
|
"eval_VitaminC_manhattan_f1": 0.6559999999999999, |
|
"eval_VitaminC_manhattan_f1_threshold": 491.0370178222656, |
|
"eval_VitaminC_manhattan_precision": 0.4880952380952381, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.578125, |
|
"eval_VitaminC_max_accuracy_threshold": 323.53277587890625, |
|
"eval_VitaminC_max_ap": 0.538511783260847, |
|
"eval_VitaminC_max_f1": 0.6577540106951871, |
|
"eval_VitaminC_max_f1_threshold": 491.0370178222656, |
|
"eval_VitaminC_max_precision": 0.4900398406374502, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.538511783260847, |
|
"eval_sts-test_pearson_cosine": 0.8156684919084325, |
|
"eval_sts-test_pearson_dot": 0.8230786053133633, |
|
"eval_sts-test_pearson_euclidean": 0.845348828865422, |
|
"eval_sts-test_pearson_manhattan": 0.8432655375716184, |
|
"eval_sts-test_pearson_max": 0.845348828865422, |
|
"eval_sts-test_spearman_cosine": 0.8655524539841267, |
|
"eval_sts-test_spearman_dot": 0.8507196659909223, |
|
"eval_sts-test_spearman_euclidean": 0.8547050804103192, |
|
"eval_sts-test_spearman_manhattan": 0.8508668230591436, |
|
"eval_sts-test_spearman_max": 0.8655524539841267, |
|
"eval_vitaminc-pairs_loss": 2.5465524196624756, |
|
"eval_vitaminc-pairs_runtime": 1.4425, |
|
"eval_vitaminc-pairs_samples_per_second": 74.869, |
|
"eval_vitaminc-pairs_steps_per_second": 1.386, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.2194777699364856, |
|
"eval_negation-triplets_loss": 1.9161474704742432, |
|
"eval_negation-triplets_runtime": 0.2994, |
|
"eval_negation-triplets_samples_per_second": 213.785, |
|
"eval_negation-triplets_steps_per_second": 3.34, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.2194777699364856, |
|
"eval_scitail-pairs-pos_loss": 0.19009728729724884, |
|
"eval_scitail-pairs-pos_runtime": 0.3745, |
|
"eval_scitail-pairs-pos_samples_per_second": 144.203, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.67, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.2194777699364856, |
|
"eval_xsum-pairs_loss": 0.35912859439849854, |
|
"eval_xsum-pairs_runtime": 3.1543, |
|
"eval_xsum-pairs_samples_per_second": 40.58, |
|
"eval_xsum-pairs_steps_per_second": 0.634, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.2194777699364856, |
|
"eval_sciq_pairs_loss": 0.05168920382857323, |
|
"eval_sciq_pairs_runtime": 3.2561, |
|
"eval_sciq_pairs_samples_per_second": 39.31, |
|
"eval_sciq_pairs_steps_per_second": 0.614, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.2194777699364856, |
|
"eval_qasc_pairs_loss": 0.30753791332244873, |
|
"eval_qasc_pairs_runtime": 0.6201, |
|
"eval_qasc_pairs_samples_per_second": 206.418, |
|
"eval_qasc_pairs_steps_per_second": 3.225, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.2194777699364856, |
|
"eval_openbookqa_pairs_loss": 0.9365726113319397, |
|
"eval_openbookqa_pairs_runtime": 0.5832, |
|
"eval_openbookqa_pairs_samples_per_second": 219.496, |
|
"eval_openbookqa_pairs_steps_per_second": 3.43, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.2194777699364856, |
|
"eval_msmarco_pairs_loss": 0.5819053053855896, |
|
"eval_msmarco_pairs_runtime": 1.2858, |
|
"eval_msmarco_pairs_samples_per_second": 99.551, |
|
"eval_msmarco_pairs_steps_per_second": 1.555, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.2194777699364856, |
|
"eval_nq_pairs_loss": 0.8172401785850525, |
|
"eval_nq_pairs_runtime": 2.3809, |
|
"eval_nq_pairs_samples_per_second": 53.761, |
|
"eval_nq_pairs_steps_per_second": 0.84, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.2194777699364856, |
|
"eval_trivia_pairs_loss": 1.1411677598953247, |
|
"eval_trivia_pairs_runtime": 4.4162, |
|
"eval_trivia_pairs_samples_per_second": 28.984, |
|
"eval_trivia_pairs_steps_per_second": 0.453, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.2194777699364856, |
|
"eval_gooaq_pairs_loss": 0.9686058759689331, |
|
"eval_gooaq_pairs_runtime": 0.8788, |
|
"eval_gooaq_pairs_samples_per_second": 145.645, |
|
"eval_gooaq_pairs_steps_per_second": 2.276, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.2194777699364856, |
|
"eval_paws-pos_loss": 0.03953952714800835, |
|
"eval_paws-pos_runtime": 0.708, |
|
"eval_paws-pos_samples_per_second": 180.782, |
|
"eval_paws-pos_steps_per_second": 2.825, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.2364149611856035, |
|
"grad_norm": 3.566471576690674, |
|
"learning_rate": 3.904623423672881e-05, |
|
"loss": 1.1273, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 1.2533521524347213, |
|
"grad_norm": 4.086460590362549, |
|
"learning_rate": 3.891041339162053e-05, |
|
"loss": 1.2568, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.2702893436838392, |
|
"grad_norm": 3.2877376079559326, |
|
"learning_rate": 3.876596327016904e-05, |
|
"loss": 0.873, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.287226534932957, |
|
"grad_norm": 3.383211851119995, |
|
"learning_rate": 3.861298567944728e-05, |
|
"loss": 1.0003, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.3041637261820749, |
|
"grad_norm": 3.8474605083465576, |
|
"learning_rate": 3.8451588436607487e-05, |
|
"loss": 1.142, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.3211009174311927, |
|
"grad_norm": 3.027008533477783, |
|
"learning_rate": 3.8281885292892706e-05, |
|
"loss": 0.807, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.3380381086803106, |
|
"grad_norm": 2.9607250690460205, |
|
"learning_rate": 3.810399585346599e-05, |
|
"loss": 1.0231, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.3549752999294284, |
|
"grad_norm": 2.511488676071167, |
|
"learning_rate": 3.791804549311382e-05, |
|
"loss": 0.797, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.3719124911785463, |
|
"grad_norm": 2.603672504425049, |
|
"learning_rate": 3.7724165267883146e-05, |
|
"loss": 0.8473, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.3719124911785463, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.578125, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7651997804641724, |
|
"eval_VitaminC_cosine_ap": 0.5427753322056709, |
|
"eval_VitaminC_cosine_f1": 0.6595174262734584, |
|
"eval_VitaminC_cosine_f1_threshold": 0.38563254475593567, |
|
"eval_VitaminC_cosine_precision": 0.492, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.5703125, |
|
"eval_VitaminC_dot_accuracy_threshold": 330.23577880859375, |
|
"eval_VitaminC_dot_ap": 0.5507967714924796, |
|
"eval_VitaminC_dot_f1": 0.6595174262734584, |
|
"eval_VitaminC_dot_f1_threshold": 160.55694580078125, |
|
"eval_VitaminC_dot_precision": 0.492, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.578125, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.033781051635742, |
|
"eval_VitaminC_euclidean_ap": 0.5406935655135654, |
|
"eval_VitaminC_euclidean_f1": 0.6576819407008085, |
|
"eval_VitaminC_euclidean_f1_threshold": 22.224994659423828, |
|
"eval_VitaminC_euclidean_precision": 0.49193548387096775, |
|
"eval_VitaminC_euclidean_recall": 0.991869918699187, |
|
"eval_VitaminC_manhattan_accuracy": 0.57421875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 274.7045593261719, |
|
"eval_VitaminC_manhattan_ap": 0.5366045405118165, |
|
"eval_VitaminC_manhattan_f1": 0.6577540106951871, |
|
"eval_VitaminC_manhattan_f1_threshold": 475.4096374511719, |
|
"eval_VitaminC_manhattan_precision": 0.4900398406374502, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.578125, |
|
"eval_VitaminC_max_accuracy_threshold": 330.23577880859375, |
|
"eval_VitaminC_max_ap": 0.5507967714924796, |
|
"eval_VitaminC_max_f1": 0.6595174262734584, |
|
"eval_VitaminC_max_f1_threshold": 475.4096374511719, |
|
"eval_VitaminC_max_precision": 0.492, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5507967714924796, |
|
"eval_sts-test_pearson_cosine": 0.8204982036322743, |
|
"eval_sts-test_pearson_dot": 0.8243481169631539, |
|
"eval_sts-test_pearson_euclidean": 0.8495098083065487, |
|
"eval_sts-test_pearson_manhattan": 0.8491539225772841, |
|
"eval_sts-test_pearson_max": 0.8495098083065487, |
|
"eval_sts-test_spearman_cosine": 0.8687444375928703, |
|
"eval_sts-test_spearman_dot": 0.8509044179305871, |
|
"eval_sts-test_spearman_euclidean": 0.8563313271350431, |
|
"eval_sts-test_spearman_manhattan": 0.8563900467437737, |
|
"eval_sts-test_spearman_max": 0.8687444375928703, |
|
"eval_vitaminc-pairs_loss": 2.5139691829681396, |
|
"eval_vitaminc-pairs_runtime": 1.449, |
|
"eval_vitaminc-pairs_samples_per_second": 74.533, |
|
"eval_vitaminc-pairs_steps_per_second": 1.38, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.3719124911785463, |
|
"eval_negation-triplets_loss": 1.8629425764083862, |
|
"eval_negation-triplets_runtime": 0.3014, |
|
"eval_negation-triplets_samples_per_second": 212.31, |
|
"eval_negation-triplets_steps_per_second": 3.317, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.3719124911785463, |
|
"eval_scitail-pairs-pos_loss": 0.17502877116203308, |
|
"eval_scitail-pairs-pos_runtime": 0.3707, |
|
"eval_scitail-pairs-pos_samples_per_second": 145.673, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.698, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.3719124911785463, |
|
"eval_xsum-pairs_loss": 0.2802315950393677, |
|
"eval_xsum-pairs_runtime": 3.1565, |
|
"eval_xsum-pairs_samples_per_second": 40.551, |
|
"eval_xsum-pairs_steps_per_second": 0.634, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.3719124911785463, |
|
"eval_sciq_pairs_loss": 0.046695925295352936, |
|
"eval_sciq_pairs_runtime": 3.2866, |
|
"eval_sciq_pairs_samples_per_second": 38.946, |
|
"eval_sciq_pairs_steps_per_second": 0.609, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.3719124911785463, |
|
"eval_qasc_pairs_loss": 0.2354799211025238, |
|
"eval_qasc_pairs_runtime": 0.6228, |
|
"eval_qasc_pairs_samples_per_second": 205.533, |
|
"eval_qasc_pairs_steps_per_second": 3.211, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.3719124911785463, |
|
"eval_openbookqa_pairs_loss": 0.8562020659446716, |
|
"eval_openbookqa_pairs_runtime": 0.5764, |
|
"eval_openbookqa_pairs_samples_per_second": 222.058, |
|
"eval_openbookqa_pairs_steps_per_second": 3.47, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.3719124911785463, |
|
"eval_msmarco_pairs_loss": 0.5559017658233643, |
|
"eval_msmarco_pairs_runtime": 1.2826, |
|
"eval_msmarco_pairs_samples_per_second": 99.801, |
|
"eval_msmarco_pairs_steps_per_second": 1.559, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.3719124911785463, |
|
"eval_nq_pairs_loss": 0.743526041507721, |
|
"eval_nq_pairs_runtime": 2.3784, |
|
"eval_nq_pairs_samples_per_second": 53.817, |
|
"eval_nq_pairs_steps_per_second": 0.841, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.3719124911785463, |
|
"eval_trivia_pairs_loss": 1.106662392616272, |
|
"eval_trivia_pairs_runtime": 4.4193, |
|
"eval_trivia_pairs_samples_per_second": 28.964, |
|
"eval_trivia_pairs_steps_per_second": 0.453, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.3719124911785463, |
|
"eval_gooaq_pairs_loss": 0.8928955793380737, |
|
"eval_gooaq_pairs_runtime": 0.8831, |
|
"eval_gooaq_pairs_samples_per_second": 144.944, |
|
"eval_gooaq_pairs_steps_per_second": 2.265, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.3719124911785463, |
|
"eval_paws-pos_loss": 0.03428014740347862, |
|
"eval_paws-pos_runtime": 0.6872, |
|
"eval_paws-pos_samples_per_second": 186.261, |
|
"eval_paws-pos_steps_per_second": 2.91, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 1.3888496824276642, |
|
"grad_norm": 4.478828430175781, |
|
"learning_rate": 3.752249182271433e-05, |
|
"loss": 0.9531, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.405786873676782, |
|
"grad_norm": 3.3206863403320312, |
|
"learning_rate": 3.731316729513507e-05, |
|
"loss": 0.9023, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 1.4227240649258999, |
|
"grad_norm": 3.4713878631591797, |
|
"learning_rate": 3.7096339215083274e-05, |
|
"loss": 0.8922, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.4396612561750177, |
|
"grad_norm": 3.4212491512298584, |
|
"learning_rate": 3.687216040092931e-05, |
|
"loss": 0.9874, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.4565984474241356, |
|
"grad_norm": 3.398963689804077, |
|
"learning_rate": 3.6640788851771084e-05, |
|
"loss": 0.8508, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.4735356386732534, |
|
"grad_norm": 3.350128650665283, |
|
"learning_rate": 3.64023876360778e-05, |
|
"loss": 0.7149, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.4904728299223713, |
|
"grad_norm": 3.438978433609009, |
|
"learning_rate": 3.615712477676081e-05, |
|
"loss": 0.894, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.5074100211714891, |
|
"grad_norm": 3.1700806617736816, |
|
"learning_rate": 3.5905173132752725e-05, |
|
"loss": 0.867, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.524347212420607, |
|
"grad_norm": 3.1567916870117188, |
|
"learning_rate": 3.5646710277178006e-05, |
|
"loss": 0.7493, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.524347212420607, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.58203125, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7990785241127014, |
|
"eval_VitaminC_cosine_ap": 0.5489113961762149, |
|
"eval_VitaminC_cosine_f1": 0.6595174262734584, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3687684237957001, |
|
"eval_VitaminC_cosine_precision": 0.492, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.57421875, |
|
"eval_VitaminC_dot_accuracy_threshold": 328.30560302734375, |
|
"eval_VitaminC_dot_ap": 0.5498735151014204, |
|
"eval_VitaminC_dot_f1": 0.6595174262734584, |
|
"eval_VitaminC_dot_f1_threshold": 153.01849365234375, |
|
"eval_VitaminC_dot_precision": 0.492, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.578125, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 12.773557662963867, |
|
"eval_VitaminC_euclidean_ap": 0.5426159894851803, |
|
"eval_VitaminC_euclidean_f1": 0.6559999999999999, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.71053123474121, |
|
"eval_VitaminC_euclidean_precision": 0.4880952380952381, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.58203125, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 272.04931640625, |
|
"eval_VitaminC_manhattan_ap": 0.5396432749419082, |
|
"eval_VitaminC_manhattan_f1": 0.6577540106951871, |
|
"eval_VitaminC_manhattan_f1_threshold": 494.33001708984375, |
|
"eval_VitaminC_manhattan_precision": 0.4900398406374502, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.58203125, |
|
"eval_VitaminC_max_accuracy_threshold": 328.30560302734375, |
|
"eval_VitaminC_max_ap": 0.5498735151014204, |
|
"eval_VitaminC_max_f1": 0.6595174262734584, |
|
"eval_VitaminC_max_f1_threshold": 494.33001708984375, |
|
"eval_VitaminC_max_precision": 0.492, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5498735151014204, |
|
"eval_sts-test_pearson_cosine": 0.8213785723785002, |
|
"eval_sts-test_pearson_dot": 0.8169840312248031, |
|
"eval_sts-test_pearson_euclidean": 0.8549065829936804, |
|
"eval_sts-test_pearson_manhattan": 0.8559014033008101, |
|
"eval_sts-test_pearson_max": 0.8559014033008101, |
|
"eval_sts-test_spearman_cosine": 0.871560114440785, |
|
"eval_sts-test_spearman_dot": 0.8412461164335756, |
|
"eval_sts-test_spearman_euclidean": 0.8616554770242205, |
|
"eval_sts-test_spearman_manhattan": 0.86344749922969, |
|
"eval_sts-test_spearman_max": 0.871560114440785, |
|
"eval_vitaminc-pairs_loss": 2.5574047565460205, |
|
"eval_vitaminc-pairs_runtime": 1.4466, |
|
"eval_vitaminc-pairs_samples_per_second": 74.658, |
|
"eval_vitaminc-pairs_steps_per_second": 1.383, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.524347212420607, |
|
"eval_negation-triplets_loss": 1.853515386581421, |
|
"eval_negation-triplets_runtime": 0.2992, |
|
"eval_negation-triplets_samples_per_second": 213.896, |
|
"eval_negation-triplets_steps_per_second": 3.342, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.524347212420607, |
|
"eval_scitail-pairs-pos_loss": 0.1692524254322052, |
|
"eval_scitail-pairs-pos_runtime": 0.3739, |
|
"eval_scitail-pairs-pos_samples_per_second": 144.426, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.675, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.524347212420607, |
|
"eval_xsum-pairs_loss": 0.22170975804328918, |
|
"eval_xsum-pairs_runtime": 3.1517, |
|
"eval_xsum-pairs_samples_per_second": 40.613, |
|
"eval_xsum-pairs_steps_per_second": 0.635, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.524347212420607, |
|
"eval_sciq_pairs_loss": 0.04346679896116257, |
|
"eval_sciq_pairs_runtime": 3.2686, |
|
"eval_sciq_pairs_samples_per_second": 39.16, |
|
"eval_sciq_pairs_steps_per_second": 0.612, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.524347212420607, |
|
"eval_qasc_pairs_loss": 0.24427936971187592, |
|
"eval_qasc_pairs_runtime": 0.6217, |
|
"eval_qasc_pairs_samples_per_second": 205.897, |
|
"eval_qasc_pairs_steps_per_second": 3.217, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.524347212420607, |
|
"eval_openbookqa_pairs_loss": 0.7998915910720825, |
|
"eval_openbookqa_pairs_runtime": 0.576, |
|
"eval_openbookqa_pairs_samples_per_second": 222.206, |
|
"eval_openbookqa_pairs_steps_per_second": 3.472, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.524347212420607, |
|
"eval_msmarco_pairs_loss": 0.5027381777763367, |
|
"eval_msmarco_pairs_runtime": 1.2901, |
|
"eval_msmarco_pairs_samples_per_second": 99.216, |
|
"eval_msmarco_pairs_steps_per_second": 1.55, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.524347212420607, |
|
"eval_nq_pairs_loss": 0.6529555916786194, |
|
"eval_nq_pairs_runtime": 2.3842, |
|
"eval_nq_pairs_samples_per_second": 53.687, |
|
"eval_nq_pairs_steps_per_second": 0.839, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.524347212420607, |
|
"eval_trivia_pairs_loss": 1.0634211301803589, |
|
"eval_trivia_pairs_runtime": 4.4089, |
|
"eval_trivia_pairs_samples_per_second": 29.032, |
|
"eval_trivia_pairs_steps_per_second": 0.454, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.524347212420607, |
|
"eval_gooaq_pairs_loss": 0.800453245639801, |
|
"eval_gooaq_pairs_runtime": 0.8705, |
|
"eval_gooaq_pairs_samples_per_second": 147.034, |
|
"eval_gooaq_pairs_steps_per_second": 2.297, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.524347212420607, |
|
"eval_paws-pos_loss": 0.031901415437459946, |
|
"eval_paws-pos_runtime": 0.6828, |
|
"eval_paws-pos_samples_per_second": 187.456, |
|
"eval_paws-pos_steps_per_second": 2.929, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.5412844036697249, |
|
"grad_norm": 3.258525848388672, |
|
"learning_rate": 3.5381918372201175e-05, |
|
"loss": 0.7974, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.5582215949188427, |
|
"grad_norm": 2.9689552783966064, |
|
"learning_rate": 3.5110984040640627e-05, |
|
"loss": 0.797, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.5751587861679606, |
|
"grad_norm": 3.50411057472229, |
|
"learning_rate": 3.483409823443864e-05, |
|
"loss": 0.6749, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.5920959774170784, |
|
"grad_norm": 2.840614080429077, |
|
"learning_rate": 3.4551456100080266e-05, |
|
"loss": 0.9325, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.6090331686661963, |
|
"grad_norm": 2.934267044067383, |
|
"learning_rate": 3.426325684105594e-05, |
|
"loss": 0.8418, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.6259703599153141, |
|
"grad_norm": 3.5037455558776855, |
|
"learning_rate": 3.396970357746474e-05, |
|
"loss": 1.0135, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.642907551164432, |
|
"grad_norm": 3.349975109100342, |
|
"learning_rate": 3.3671003202857315e-05, |
|
"loss": 0.6961, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.6598447424135498, |
|
"grad_norm": 3.207557439804077, |
|
"learning_rate": 3.336736623841924e-05, |
|
"loss": 0.9361, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.6767819336626677, |
|
"grad_norm": 2.0259296894073486, |
|
"learning_rate": 3.305900668459766e-05, |
|
"loss": 0.6747, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.6767819336626677, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.57421875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7887165546417236, |
|
"eval_VitaminC_cosine_ap": 0.5443802154749287, |
|
"eval_VitaminC_cosine_f1": 0.6595174262734584, |
|
"eval_VitaminC_cosine_f1_threshold": 0.35189926624298096, |
|
"eval_VitaminC_cosine_precision": 0.492, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.5625, |
|
"eval_VitaminC_dot_accuracy_threshold": 335.016357421875, |
|
"eval_VitaminC_dot_ap": 0.5460930199557891, |
|
"eval_VitaminC_dot_f1": 0.6594594594594595, |
|
"eval_VitaminC_dot_f1_threshold": 158.6214599609375, |
|
"eval_VitaminC_dot_precision": 0.4939271255060729, |
|
"eval_VitaminC_dot_recall": 0.991869918699187, |
|
"eval_VitaminC_euclidean_accuracy": 0.57421875, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.359209060668945, |
|
"eval_VitaminC_euclidean_ap": 0.5420558119789205, |
|
"eval_VitaminC_euclidean_f1": 0.6577540106951871, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.44475746154785, |
|
"eval_VitaminC_euclidean_precision": 0.4900398406374502, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.578125, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 309.7850646972656, |
|
"eval_VitaminC_manhattan_ap": 0.5398712022586767, |
|
"eval_VitaminC_manhattan_f1": 0.6595174262734584, |
|
"eval_VitaminC_manhattan_f1_threshold": 486.6765441894531, |
|
"eval_VitaminC_manhattan_precision": 0.492, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.578125, |
|
"eval_VitaminC_max_accuracy_threshold": 335.016357421875, |
|
"eval_VitaminC_max_ap": 0.5460930199557891, |
|
"eval_VitaminC_max_f1": 0.6595174262734584, |
|
"eval_VitaminC_max_f1_threshold": 486.6765441894531, |
|
"eval_VitaminC_max_precision": 0.4939271255060729, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5460930199557891, |
|
"eval_sts-test_pearson_cosine": 0.8332392980969607, |
|
"eval_sts-test_pearson_dot": 0.8346600863241642, |
|
"eval_sts-test_pearson_euclidean": 0.8653211336269704, |
|
"eval_sts-test_pearson_manhattan": 0.8653335270474869, |
|
"eval_sts-test_pearson_max": 0.8653335270474869, |
|
"eval_sts-test_spearman_cosine": 0.8786841635561152, |
|
"eval_sts-test_spearman_dot": 0.8596876540389535, |
|
"eval_sts-test_spearman_euclidean": 0.8687344122938186, |
|
"eval_sts-test_spearman_manhattan": 0.8687734393508408, |
|
"eval_sts-test_spearman_max": 0.8786841635561152, |
|
"eval_vitaminc-pairs_loss": 2.4870808124542236, |
|
"eval_vitaminc-pairs_runtime": 1.4506, |
|
"eval_vitaminc-pairs_samples_per_second": 74.451, |
|
"eval_vitaminc-pairs_steps_per_second": 1.379, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.6767819336626677, |
|
"eval_negation-triplets_loss": 1.7349412441253662, |
|
"eval_negation-triplets_runtime": 0.2993, |
|
"eval_negation-triplets_samples_per_second": 213.838, |
|
"eval_negation-triplets_steps_per_second": 3.341, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.6767819336626677, |
|
"eval_scitail-pairs-pos_loss": 0.15961770713329315, |
|
"eval_scitail-pairs-pos_runtime": 0.3704, |
|
"eval_scitail-pairs-pos_samples_per_second": 145.808, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.7, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.6767819336626677, |
|
"eval_xsum-pairs_loss": 0.22417353093624115, |
|
"eval_xsum-pairs_runtime": 3.1629, |
|
"eval_xsum-pairs_samples_per_second": 40.469, |
|
"eval_xsum-pairs_steps_per_second": 0.632, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.6767819336626677, |
|
"eval_sciq_pairs_loss": 0.03957323729991913, |
|
"eval_sciq_pairs_runtime": 3.2788, |
|
"eval_sciq_pairs_samples_per_second": 39.039, |
|
"eval_sciq_pairs_steps_per_second": 0.61, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.6767819336626677, |
|
"eval_qasc_pairs_loss": 0.19627788662910461, |
|
"eval_qasc_pairs_runtime": 0.6246, |
|
"eval_qasc_pairs_samples_per_second": 204.945, |
|
"eval_qasc_pairs_steps_per_second": 3.202, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.6767819336626677, |
|
"eval_openbookqa_pairs_loss": 0.7668256163597107, |
|
"eval_openbookqa_pairs_runtime": 0.5769, |
|
"eval_openbookqa_pairs_samples_per_second": 221.888, |
|
"eval_openbookqa_pairs_steps_per_second": 3.467, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.6767819336626677, |
|
"eval_msmarco_pairs_loss": 0.5024800300598145, |
|
"eval_msmarco_pairs_runtime": 1.287, |
|
"eval_msmarco_pairs_samples_per_second": 99.457, |
|
"eval_msmarco_pairs_steps_per_second": 1.554, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.6767819336626677, |
|
"eval_nq_pairs_loss": 0.6426529288291931, |
|
"eval_nq_pairs_runtime": 2.3694, |
|
"eval_nq_pairs_samples_per_second": 54.023, |
|
"eval_nq_pairs_steps_per_second": 0.844, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.6767819336626677, |
|
"eval_trivia_pairs_loss": 0.9762344360351562, |
|
"eval_trivia_pairs_runtime": 4.4202, |
|
"eval_trivia_pairs_samples_per_second": 28.958, |
|
"eval_trivia_pairs_steps_per_second": 0.452, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.6767819336626677, |
|
"eval_gooaq_pairs_loss": 0.7546207904815674, |
|
"eval_gooaq_pairs_runtime": 0.8779, |
|
"eval_gooaq_pairs_samples_per_second": 145.803, |
|
"eval_gooaq_pairs_steps_per_second": 2.278, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.6767819336626677, |
|
"eval_paws-pos_loss": 0.029145879670977592, |
|
"eval_paws-pos_runtime": 0.6938, |
|
"eval_paws-pos_samples_per_second": 184.484, |
|
"eval_paws-pos_steps_per_second": 2.883, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.6937191249117856, |
|
"grad_norm": 2.766063928604126, |
|
"learning_rate": 3.274614187027587e-05, |
|
"loss": 0.7786, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.7106563161609034, |
|
"grad_norm": 3.1933176517486572, |
|
"learning_rate": 3.2428992299601946e-05, |
|
"loss": 0.7171, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.7275935074100213, |
|
"grad_norm": 3.0088443756103516, |
|
"learning_rate": 3.2107781496579536e-05, |
|
"loss": 0.6627, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.7445306986591391, |
|
"grad_norm": 3.13895845413208, |
|
"learning_rate": 3.178273584753023e-05, |
|
"loss": 0.6711, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.761467889908257, |
|
"grad_norm": 3.34114933013916, |
|
"learning_rate": 3.145408444153868e-05, |
|
"loss": 0.9076, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.7784050811573748, |
|
"grad_norm": 2.5035502910614014, |
|
"learning_rate": 3.1122058908992746e-05, |
|
"loss": 0.7414, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.7953422724064927, |
|
"grad_norm": 2.284698247909546, |
|
"learning_rate": 3.078689325833264e-05, |
|
"loss": 0.582, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.8122794636556105, |
|
"grad_norm": 2.643444538116455, |
|
"learning_rate": 3.044882371112396e-05, |
|
"loss": 0.6068, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.8292166549047284, |
|
"grad_norm": 2.37386155128479, |
|
"learning_rate": 3.0108088535571016e-05, |
|
"loss": 0.6219, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.8292166549047284, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.57421875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7700088024139404, |
|
"eval_VitaminC_cosine_ap": 0.5429726522180547, |
|
"eval_VitaminC_cosine_f1": 0.6576819407008085, |
|
"eval_VitaminC_cosine_f1_threshold": 0.33469462394714355, |
|
"eval_VitaminC_cosine_precision": 0.49193548387096775, |
|
"eval_VitaminC_cosine_recall": 0.991869918699187, |
|
"eval_VitaminC_dot_accuracy": 0.57421875, |
|
"eval_VitaminC_dot_accuracy_threshold": 309.1703186035156, |
|
"eval_VitaminC_dot_ap": 0.5531704143247085, |
|
"eval_VitaminC_dot_f1": 0.6594594594594595, |
|
"eval_VitaminC_dot_f1_threshold": 145.4818115234375, |
|
"eval_VitaminC_dot_precision": 0.4939271255060729, |
|
"eval_VitaminC_dot_recall": 0.991869918699187, |
|
"eval_VitaminC_euclidean_accuracy": 0.578125, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 13.186519622802734, |
|
"eval_VitaminC_euclidean_ap": 0.5414693053767123, |
|
"eval_VitaminC_euclidean_f1": 0.6559999999999999, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.980300903320312, |
|
"eval_VitaminC_euclidean_precision": 0.4880952380952381, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.5703125, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 248.66549682617188, |
|
"eval_VitaminC_manhattan_ap": 0.5403554311371019, |
|
"eval_VitaminC_manhattan_f1": 0.6577540106951871, |
|
"eval_VitaminC_manhattan_f1_threshold": 493.069580078125, |
|
"eval_VitaminC_manhattan_precision": 0.4900398406374502, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.578125, |
|
"eval_VitaminC_max_accuracy_threshold": 309.1703186035156, |
|
"eval_VitaminC_max_ap": 0.5531704143247085, |
|
"eval_VitaminC_max_f1": 0.6594594594594595, |
|
"eval_VitaminC_max_f1_threshold": 493.069580078125, |
|
"eval_VitaminC_max_precision": 0.4939271255060729, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5531704143247085, |
|
"eval_sts-test_pearson_cosine": 0.8353147289025764, |
|
"eval_sts-test_pearson_dot": 0.8340239073162183, |
|
"eval_sts-test_pearson_euclidean": 0.8678198295516475, |
|
"eval_sts-test_pearson_manhattan": 0.8679311647036958, |
|
"eval_sts-test_pearson_max": 0.8679311647036958, |
|
"eval_sts-test_spearman_cosine": 0.8807142866140599, |
|
"eval_sts-test_spearman_dot": 0.8548662012879339, |
|
"eval_sts-test_spearman_euclidean": 0.8730904047317294, |
|
"eval_sts-test_spearman_manhattan": 0.8734591925182695, |
|
"eval_sts-test_spearman_max": 0.8807142866140599, |
|
"eval_vitaminc-pairs_loss": 2.519745111465454, |
|
"eval_vitaminc-pairs_runtime": 1.5111, |
|
"eval_vitaminc-pairs_samples_per_second": 71.47, |
|
"eval_vitaminc-pairs_steps_per_second": 1.324, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.8292166549047284, |
|
"eval_negation-triplets_loss": 1.701598882675171, |
|
"eval_negation-triplets_runtime": 0.3083, |
|
"eval_negation-triplets_samples_per_second": 207.571, |
|
"eval_negation-triplets_steps_per_second": 3.243, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.8292166549047284, |
|
"eval_scitail-pairs-pos_loss": 0.1535351276397705, |
|
"eval_scitail-pairs-pos_runtime": 0.4139, |
|
"eval_scitail-pairs-pos_samples_per_second": 130.461, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.416, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.8292166549047284, |
|
"eval_xsum-pairs_loss": 0.16304434835910797, |
|
"eval_xsum-pairs_runtime": 3.173, |
|
"eval_xsum-pairs_samples_per_second": 40.34, |
|
"eval_xsum-pairs_steps_per_second": 0.63, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.8292166549047284, |
|
"eval_sciq_pairs_loss": 0.03826402127742767, |
|
"eval_sciq_pairs_runtime": 3.2871, |
|
"eval_sciq_pairs_samples_per_second": 38.94, |
|
"eval_sciq_pairs_steps_per_second": 0.608, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.8292166549047284, |
|
"eval_qasc_pairs_loss": 0.20441913604736328, |
|
"eval_qasc_pairs_runtime": 0.6223, |
|
"eval_qasc_pairs_samples_per_second": 205.692, |
|
"eval_qasc_pairs_steps_per_second": 3.214, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.8292166549047284, |
|
"eval_openbookqa_pairs_loss": 0.7109480500221252, |
|
"eval_openbookqa_pairs_runtime": 0.5785, |
|
"eval_openbookqa_pairs_samples_per_second": 221.25, |
|
"eval_openbookqa_pairs_steps_per_second": 3.457, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.8292166549047284, |
|
"eval_msmarco_pairs_loss": 0.48586779832839966, |
|
"eval_msmarco_pairs_runtime": 1.2912, |
|
"eval_msmarco_pairs_samples_per_second": 99.129, |
|
"eval_msmarco_pairs_steps_per_second": 1.549, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.8292166549047284, |
|
"eval_nq_pairs_loss": 0.5532824397087097, |
|
"eval_nq_pairs_runtime": 2.3796, |
|
"eval_nq_pairs_samples_per_second": 53.791, |
|
"eval_nq_pairs_steps_per_second": 0.84, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.8292166549047284, |
|
"eval_trivia_pairs_loss": 1.0205955505371094, |
|
"eval_trivia_pairs_runtime": 4.4187, |
|
"eval_trivia_pairs_samples_per_second": 28.968, |
|
"eval_trivia_pairs_steps_per_second": 0.453, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.8292166549047284, |
|
"eval_gooaq_pairs_loss": 0.7736483812332153, |
|
"eval_gooaq_pairs_runtime": 0.8757, |
|
"eval_gooaq_pairs_samples_per_second": 146.171, |
|
"eval_gooaq_pairs_steps_per_second": 2.284, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.8292166549047284, |
|
"eval_paws-pos_loss": 0.0273247379809618, |
|
"eval_paws-pos_runtime": 0.6877, |
|
"eval_paws-pos_samples_per_second": 186.121, |
|
"eval_paws-pos_steps_per_second": 2.908, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.8461538461538463, |
|
"grad_norm": 2.632672071456909, |
|
"learning_rate": 2.9764927878587643e-05, |
|
"loss": 0.5862, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.8630910374029641, |
|
"grad_norm": 2.9056813716888428, |
|
"learning_rate": 2.9419583596543924e-05, |
|
"loss": 0.678, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.880028228652082, |
|
"grad_norm": 2.693070411682129, |
|
"learning_rate": 2.907229908480814e-05, |
|
"loss": 0.6272, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.8969654199011998, |
|
"grad_norm": 2.2290945053100586, |
|
"learning_rate": 2.8723319106204032e-05, |
|
"loss": 0.5048, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.9139026111503177, |
|
"grad_norm": 2.5947606563568115, |
|
"learning_rate": 2.8372889618504275e-05, |
|
"loss": 0.7653, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.9308398023994355, |
|
"grad_norm": 3.1747825145721436, |
|
"learning_rate": 2.8021257601081767e-05, |
|
"loss": 0.6613, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.9477769936485534, |
|
"grad_norm": 2.438523054122925, |
|
"learning_rate": 2.766867088084095e-05, |
|
"loss": 0.6122, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.9647141848976712, |
|
"grad_norm": 2.645747423171997, |
|
"learning_rate": 2.7315377957551712e-05, |
|
"loss": 0.5939, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"grad_norm": 3.985382556915283, |
|
"learning_rate": 2.696162782870916e-05, |
|
"loss": 0.6923, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.57421875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.7852457165718079, |
|
"eval_VitaminC_cosine_ap": 0.5489275869827654, |
|
"eval_VitaminC_cosine_f1": 0.6612466124661246, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3671841323375702, |
|
"eval_VitaminC_cosine_precision": 0.4959349593495935, |
|
"eval_VitaminC_cosine_recall": 0.991869918699187, |
|
"eval_VitaminC_dot_accuracy": 0.5703125, |
|
"eval_VitaminC_dot_accuracy_threshold": 312.1104736328125, |
|
"eval_VitaminC_dot_ap": 0.5559525201108009, |
|
"eval_VitaminC_dot_f1": 0.6612466124661246, |
|
"eval_VitaminC_dot_f1_threshold": 150.29818725585938, |
|
"eval_VitaminC_dot_precision": 0.4959349593495935, |
|
"eval_VitaminC_dot_recall": 0.991869918699187, |
|
"eval_VitaminC_euclidean_accuracy": 0.58203125, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 14.372268676757812, |
|
"eval_VitaminC_euclidean_ap": 0.544755914591283, |
|
"eval_VitaminC_euclidean_f1": 0.6576819407008085, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.06924819946289, |
|
"eval_VitaminC_euclidean_precision": 0.49193548387096775, |
|
"eval_VitaminC_euclidean_recall": 0.991869918699187, |
|
"eval_VitaminC_manhattan_accuracy": 0.57421875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 263.9018859863281, |
|
"eval_VitaminC_manhattan_ap": 0.541522211031207, |
|
"eval_VitaminC_manhattan_f1": 0.6595174262734584, |
|
"eval_VitaminC_manhattan_f1_threshold": 502.340576171875, |
|
"eval_VitaminC_manhattan_precision": 0.492, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.58203125, |
|
"eval_VitaminC_max_accuracy_threshold": 312.1104736328125, |
|
"eval_VitaminC_max_ap": 0.5559525201108009, |
|
"eval_VitaminC_max_f1": 0.6612466124661246, |
|
"eval_VitaminC_max_f1_threshold": 502.340576171875, |
|
"eval_VitaminC_max_precision": 0.4959349593495935, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5559525201108009, |
|
"eval_sts-test_pearson_cosine": 0.8362775201898809, |
|
"eval_sts-test_pearson_dot": 0.8352671053392853, |
|
"eval_sts-test_pearson_euclidean": 0.8700319618710969, |
|
"eval_sts-test_pearson_manhattan": 0.8715864724519946, |
|
"eval_sts-test_pearson_max": 0.8715864724519946, |
|
"eval_sts-test_spearman_cosine": 0.8836928745715628, |
|
"eval_sts-test_spearman_dot": 0.857968315251608, |
|
"eval_sts-test_spearman_euclidean": 0.8761363054114356, |
|
"eval_sts-test_spearman_manhattan": 0.8777002520634819, |
|
"eval_sts-test_spearman_max": 0.8836928745715628, |
|
"eval_vitaminc-pairs_loss": 2.437910795211792, |
|
"eval_vitaminc-pairs_runtime": 1.4456, |
|
"eval_vitaminc-pairs_samples_per_second": 74.71, |
|
"eval_vitaminc-pairs_steps_per_second": 1.384, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"eval_negation-triplets_loss": 1.7345324754714966, |
|
"eval_negation-triplets_runtime": 0.2986, |
|
"eval_negation-triplets_samples_per_second": 214.362, |
|
"eval_negation-triplets_steps_per_second": 3.349, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"eval_scitail-pairs-pos_loss": 0.14812646806240082, |
|
"eval_scitail-pairs-pos_runtime": 0.3719, |
|
"eval_scitail-pairs-pos_samples_per_second": 145.183, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.689, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"eval_xsum-pairs_loss": 0.1463930606842041, |
|
"eval_xsum-pairs_runtime": 3.152, |
|
"eval_xsum-pairs_samples_per_second": 40.609, |
|
"eval_xsum-pairs_steps_per_second": 0.635, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"eval_sciq_pairs_loss": 0.03820851817727089, |
|
"eval_sciq_pairs_runtime": 3.2627, |
|
"eval_sciq_pairs_samples_per_second": 39.231, |
|
"eval_sciq_pairs_steps_per_second": 0.613, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"eval_qasc_pairs_loss": 0.16403906047344208, |
|
"eval_qasc_pairs_runtime": 0.6219, |
|
"eval_qasc_pairs_samples_per_second": 205.822, |
|
"eval_qasc_pairs_steps_per_second": 3.216, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"eval_openbookqa_pairs_loss": 0.755411684513092, |
|
"eval_openbookqa_pairs_runtime": 0.5745, |
|
"eval_openbookqa_pairs_samples_per_second": 222.788, |
|
"eval_openbookqa_pairs_steps_per_second": 3.481, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"eval_msmarco_pairs_loss": 0.43477028608322144, |
|
"eval_msmarco_pairs_runtime": 1.2879, |
|
"eval_msmarco_pairs_samples_per_second": 99.389, |
|
"eval_msmarco_pairs_steps_per_second": 1.553, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"eval_nq_pairs_loss": 0.5431913733482361, |
|
"eval_nq_pairs_runtime": 2.372, |
|
"eval_nq_pairs_samples_per_second": 53.962, |
|
"eval_nq_pairs_steps_per_second": 0.843, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"eval_trivia_pairs_loss": 0.9581867456436157, |
|
"eval_trivia_pairs_runtime": 4.4272, |
|
"eval_trivia_pairs_samples_per_second": 28.912, |
|
"eval_trivia_pairs_steps_per_second": 0.452, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"eval_gooaq_pairs_loss": 0.7219691872596741, |
|
"eval_gooaq_pairs_runtime": 0.8764, |
|
"eval_gooaq_pairs_samples_per_second": 146.055, |
|
"eval_gooaq_pairs_steps_per_second": 2.282, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.981651376146789, |
|
"eval_paws-pos_loss": 0.026377690955996513, |
|
"eval_paws-pos_runtime": 0.6874, |
|
"eval_paws-pos_samples_per_second": 186.22, |
|
"eval_paws-pos_steps_per_second": 2.91, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.998588567395907, |
|
"grad_norm": 2.807307481765747, |
|
"learning_rate": 2.660766981404253e-05, |
|
"loss": 0.5712, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.015525758645025, |
|
"grad_norm": 3.1135761737823486, |
|
"learning_rate": 2.6253753379797e-05, |
|
"loss": 0.5969, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 2.0324629498941427, |
|
"grad_norm": 2.701498508453369, |
|
"learning_rate": 2.5900127962912265e-05, |
|
"loss": 0.5881, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.0494001411432605, |
|
"grad_norm": 2.1898539066314697, |
|
"learning_rate": 2.554704279522176e-05, |
|
"loss": 0.6005, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 2.0663373323923784, |
|
"grad_norm": 2.3954033851623535, |
|
"learning_rate": 2.5194746727796408e-05, |
|
"loss": 0.6066, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.0832745236414962, |
|
"grad_norm": 1.9525569677352905, |
|
"learning_rate": 2.4843488055556773e-05, |
|
"loss": 0.4921, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 2.100211714890614, |
|
"grad_norm": 2.005103588104248, |
|
"learning_rate": 2.449351434227714e-05, |
|
"loss": 0.5354, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.117148906139732, |
|
"grad_norm": 2.4554927349090576, |
|
"learning_rate": 2.414507224610495e-05, |
|
"loss": 0.5602, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.13408609738885, |
|
"grad_norm": 2.926708698272705, |
|
"learning_rate": 2.3798407345718434e-05, |
|
"loss": 0.5686, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.13408609738885, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.57421875, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8050106763839722, |
|
"eval_VitaminC_cosine_ap": 0.544869760591425, |
|
"eval_VitaminC_cosine_f1": 0.6577540106951871, |
|
"eval_VitaminC_cosine_f1_threshold": 0.2933539152145386, |
|
"eval_VitaminC_cosine_precision": 0.4900398406374502, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.5625, |
|
"eval_VitaminC_dot_accuracy_threshold": 350.54046630859375, |
|
"eval_VitaminC_dot_ap": 0.5538743151996848, |
|
"eval_VitaminC_dot_f1": 0.6577540106951871, |
|
"eval_VitaminC_dot_f1_threshold": 122.50220489501953, |
|
"eval_VitaminC_dot_precision": 0.4900398406374502, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.578125, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 12.29859447479248, |
|
"eval_VitaminC_euclidean_ap": 0.5417581979676633, |
|
"eval_VitaminC_euclidean_f1": 0.6559999999999999, |
|
"eval_VitaminC_euclidean_f1_threshold": 24.298545837402344, |
|
"eval_VitaminC_euclidean_precision": 0.4880952380952381, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.57421875, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 261.48309326171875, |
|
"eval_VitaminC_manhattan_ap": 0.5389765713900105, |
|
"eval_VitaminC_manhattan_f1": 0.6559999999999999, |
|
"eval_VitaminC_manhattan_f1_threshold": 519.0216064453125, |
|
"eval_VitaminC_manhattan_precision": 0.4880952380952381, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.578125, |
|
"eval_VitaminC_max_accuracy_threshold": 350.54046630859375, |
|
"eval_VitaminC_max_ap": 0.5538743151996848, |
|
"eval_VitaminC_max_f1": 0.6577540106951871, |
|
"eval_VitaminC_max_f1_threshold": 519.0216064453125, |
|
"eval_VitaminC_max_precision": 0.4900398406374502, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5538743151996848, |
|
"eval_sts-test_pearson_cosine": 0.8323841899935347, |
|
"eval_sts-test_pearson_dot": 0.8319981087315044, |
|
"eval_sts-test_pearson_euclidean": 0.8653758499419844, |
|
"eval_sts-test_pearson_manhattan": 0.8666968424133361, |
|
"eval_sts-test_pearson_max": 0.8666968424133361, |
|
"eval_sts-test_spearman_cosine": 0.8804171081064596, |
|
"eval_sts-test_spearman_dot": 0.8574101209222718, |
|
"eval_sts-test_spearman_euclidean": 0.8715185810589999, |
|
"eval_sts-test_spearman_manhattan": 0.8731244191392259, |
|
"eval_sts-test_spearman_max": 0.8804171081064596, |
|
"eval_vitaminc-pairs_loss": 2.390805959701538, |
|
"eval_vitaminc-pairs_runtime": 1.4923, |
|
"eval_vitaminc-pairs_samples_per_second": 72.374, |
|
"eval_vitaminc-pairs_steps_per_second": 1.34, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.13408609738885, |
|
"eval_negation-triplets_loss": 1.7677762508392334, |
|
"eval_negation-triplets_runtime": 0.3036, |
|
"eval_negation-triplets_samples_per_second": 210.833, |
|
"eval_negation-triplets_steps_per_second": 3.294, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.13408609738885, |
|
"eval_scitail-pairs-pos_loss": 0.14010007679462433, |
|
"eval_scitail-pairs-pos_runtime": 0.3847, |
|
"eval_scitail-pairs-pos_samples_per_second": 140.379, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.6, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.13408609738885, |
|
"eval_xsum-pairs_loss": 0.1453721672296524, |
|
"eval_xsum-pairs_runtime": 3.1712, |
|
"eval_xsum-pairs_samples_per_second": 40.363, |
|
"eval_xsum-pairs_steps_per_second": 0.631, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.13408609738885, |
|
"eval_sciq_pairs_loss": 0.03739440068602562, |
|
"eval_sciq_pairs_runtime": 3.3277, |
|
"eval_sciq_pairs_samples_per_second": 38.466, |
|
"eval_sciq_pairs_steps_per_second": 0.601, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.13408609738885, |
|
"eval_qasc_pairs_loss": 0.1603582501411438, |
|
"eval_qasc_pairs_runtime": 0.632, |
|
"eval_qasc_pairs_samples_per_second": 202.534, |
|
"eval_qasc_pairs_steps_per_second": 3.165, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.13408609738885, |
|
"eval_openbookqa_pairs_loss": 0.7796258330345154, |
|
"eval_openbookqa_pairs_runtime": 0.5856, |
|
"eval_openbookqa_pairs_samples_per_second": 218.585, |
|
"eval_openbookqa_pairs_steps_per_second": 3.415, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.13408609738885, |
|
"eval_msmarco_pairs_loss": 0.4246203303337097, |
|
"eval_msmarco_pairs_runtime": 1.297, |
|
"eval_msmarco_pairs_samples_per_second": 98.689, |
|
"eval_msmarco_pairs_steps_per_second": 1.542, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.13408609738885, |
|
"eval_nq_pairs_loss": 0.5298404097557068, |
|
"eval_nq_pairs_runtime": 2.3877, |
|
"eval_nq_pairs_samples_per_second": 53.609, |
|
"eval_nq_pairs_steps_per_second": 0.838, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.13408609738885, |
|
"eval_trivia_pairs_loss": 0.9613967537879944, |
|
"eval_trivia_pairs_runtime": 4.4311, |
|
"eval_trivia_pairs_samples_per_second": 28.887, |
|
"eval_trivia_pairs_steps_per_second": 0.451, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.13408609738885, |
|
"eval_gooaq_pairs_loss": 0.6964626908302307, |
|
"eval_gooaq_pairs_runtime": 0.8843, |
|
"eval_gooaq_pairs_samples_per_second": 144.755, |
|
"eval_gooaq_pairs_steps_per_second": 2.262, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.13408609738885, |
|
"eval_paws-pos_loss": 0.02705618366599083, |
|
"eval_paws-pos_runtime": 0.6932, |
|
"eval_paws-pos_samples_per_second": 184.655, |
|
"eval_paws-pos_steps_per_second": 2.885, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.1510232886379677, |
|
"grad_norm": 2.418947458267212, |
|
"learning_rate": 2.345376396724515e-05, |
|
"loss": 0.6496, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 2.1679604798870855, |
|
"grad_norm": 2.5201969146728516, |
|
"learning_rate": 2.311138501206319e-05, |
|
"loss": 0.4713, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.1848976711362034, |
|
"grad_norm": 3.0134377479553223, |
|
"learning_rate": 2.277151178560665e-05, |
|
"loss": 0.6345, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 2.2018348623853212, |
|
"grad_norm": 2.368422031402588, |
|
"learning_rate": 2.2434383827295833e-05, |
|
"loss": 0.5994, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.218772053634439, |
|
"grad_norm": 3.164980411529541, |
|
"learning_rate": 2.210023874171213e-05, |
|
"loss": 0.6763, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 2.235709244883557, |
|
"grad_norm": 2.83431077003479, |
|
"learning_rate": 2.1769312031136583e-05, |
|
"loss": 0.7254, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.252646436132675, |
|
"grad_norm": 3.2441203594207764, |
|
"learning_rate": 2.14418369295701e-05, |
|
"loss": 0.8032, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 2.2695836273817926, |
|
"grad_norm": 2.215298652648926, |
|
"learning_rate": 2.1118044238352392e-05, |
|
"loss": 0.4914, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.2865208186309105, |
|
"grad_norm": 2.700486183166504, |
|
"learning_rate": 2.0798162163495322e-05, |
|
"loss": 0.6307, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.2865208186309105, |
|
"eval_NLI-v2_cosine_accuracy": 1.0, |
|
"eval_NLI-v2_dot_accuracy": 0.0, |
|
"eval_NLI-v2_euclidean_accuracy": 1.0, |
|
"eval_NLI-v2_manhattan_accuracy": 1.0, |
|
"eval_NLI-v2_max_accuracy": 1.0, |
|
"eval_VitaminC_cosine_accuracy": 0.578125, |
|
"eval_VitaminC_cosine_accuracy_threshold": 0.8052636384963989, |
|
"eval_VitaminC_cosine_ap": 0.5479388360307975, |
|
"eval_VitaminC_cosine_f1": 0.6577540106951871, |
|
"eval_VitaminC_cosine_f1_threshold": 0.3108493387699127, |
|
"eval_VitaminC_cosine_precision": 0.4900398406374502, |
|
"eval_VitaminC_cosine_recall": 1.0, |
|
"eval_VitaminC_dot_accuracy": 0.58203125, |
|
"eval_VitaminC_dot_accuracy_threshold": 318.633056640625, |
|
"eval_VitaminC_dot_ap": 0.5533499611019033, |
|
"eval_VitaminC_dot_f1": 0.6577540106951871, |
|
"eval_VitaminC_dot_f1_threshold": 125.5129165649414, |
|
"eval_VitaminC_dot_precision": 0.4900398406374502, |
|
"eval_VitaminC_dot_recall": 1.0, |
|
"eval_VitaminC_euclidean_accuracy": 0.58203125, |
|
"eval_VitaminC_euclidean_accuracy_threshold": 12.9645357131958, |
|
"eval_VitaminC_euclidean_ap": 0.541753017593475, |
|
"eval_VitaminC_euclidean_f1": 0.6559999999999999, |
|
"eval_VitaminC_euclidean_f1_threshold": 23.908817291259766, |
|
"eval_VitaminC_euclidean_precision": 0.4880952380952381, |
|
"eval_VitaminC_euclidean_recall": 1.0, |
|
"eval_VitaminC_manhattan_accuracy": 0.578125, |
|
"eval_VitaminC_manhattan_accuracy_threshold": 266.60528564453125, |
|
"eval_VitaminC_manhattan_ap": 0.5411403083150335, |
|
"eval_VitaminC_manhattan_f1": 0.6559999999999999, |
|
"eval_VitaminC_manhattan_f1_threshold": 512.4686279296875, |
|
"eval_VitaminC_manhattan_precision": 0.4880952380952381, |
|
"eval_VitaminC_manhattan_recall": 1.0, |
|
"eval_VitaminC_max_accuracy": 0.58203125, |
|
"eval_VitaminC_max_accuracy_threshold": 318.633056640625, |
|
"eval_VitaminC_max_ap": 0.5533499611019033, |
|
"eval_VitaminC_max_f1": 0.6577540106951871, |
|
"eval_VitaminC_max_f1_threshold": 512.4686279296875, |
|
"eval_VitaminC_max_precision": 0.4900398406374502, |
|
"eval_VitaminC_max_recall": 1.0, |
|
"eval_sequential_score": 0.5533499611019033, |
|
"eval_sts-test_pearson_cosine": 0.8404451477820003, |
|
"eval_sts-test_pearson_dot": 0.8376741383364052, |
|
"eval_sts-test_pearson_euclidean": 0.873696402540065, |
|
"eval_sts-test_pearson_manhattan": 0.8739146310077538, |
|
"eval_sts-test_pearson_max": 0.8739146310077538, |
|
"eval_sts-test_spearman_cosine": 0.8859238616569335, |
|
"eval_sts-test_spearman_dot": 0.8626544264654313, |
|
"eval_sts-test_spearman_euclidean": 0.8767156244780591, |
|
"eval_sts-test_spearman_manhattan": 0.8785835525192047, |
|
"eval_sts-test_spearman_max": 0.8859238616569335, |
|
"eval_vitaminc-pairs_loss": 2.438774347305298, |
|
"eval_vitaminc-pairs_runtime": 1.4716, |
|
"eval_vitaminc-pairs_samples_per_second": 73.39, |
|
"eval_vitaminc-pairs_steps_per_second": 1.359, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.2865208186309105, |
|
"eval_negation-triplets_loss": 1.7093145847320557, |
|
"eval_negation-triplets_runtime": 0.3027, |
|
"eval_negation-triplets_samples_per_second": 211.422, |
|
"eval_negation-triplets_steps_per_second": 3.303, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.2865208186309105, |
|
"eval_scitail-pairs-pos_loss": 0.11918405443429947, |
|
"eval_scitail-pairs-pos_runtime": 0.3806, |
|
"eval_scitail-pairs-pos_samples_per_second": 141.888, |
|
"eval_scitail-pairs-pos_steps_per_second": 2.628, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.2865208186309105, |
|
"eval_xsum-pairs_loss": 0.13078594207763672, |
|
"eval_xsum-pairs_runtime": 3.1593, |
|
"eval_xsum-pairs_samples_per_second": 40.515, |
|
"eval_xsum-pairs_steps_per_second": 0.633, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.2865208186309105, |
|
"eval_sciq_pairs_loss": 0.03792291879653931, |
|
"eval_sciq_pairs_runtime": 3.3679, |
|
"eval_sciq_pairs_samples_per_second": 38.006, |
|
"eval_sciq_pairs_steps_per_second": 0.594, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.2865208186309105, |
|
"eval_qasc_pairs_loss": 0.1465962529182434, |
|
"eval_qasc_pairs_runtime": 0.6708, |
|
"eval_qasc_pairs_samples_per_second": 190.809, |
|
"eval_qasc_pairs_steps_per_second": 2.981, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.2865208186309105, |
|
"eval_openbookqa_pairs_loss": 0.74336838722229, |
|
"eval_openbookqa_pairs_runtime": 0.6017, |
|
"eval_openbookqa_pairs_samples_per_second": 212.742, |
|
"eval_openbookqa_pairs_steps_per_second": 3.324, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.2865208186309105, |
|
"eval_msmarco_pairs_loss": 0.3927748501300812, |
|
"eval_msmarco_pairs_runtime": 1.3092, |
|
"eval_msmarco_pairs_samples_per_second": 97.767, |
|
"eval_msmarco_pairs_steps_per_second": 1.528, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.2865208186309105, |
|
"eval_nq_pairs_loss": 0.4998345375061035, |
|
"eval_nq_pairs_runtime": 2.4116, |
|
"eval_nq_pairs_samples_per_second": 53.077, |
|
"eval_nq_pairs_steps_per_second": 0.829, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.2865208186309105, |
|
"eval_trivia_pairs_loss": 0.9862285852432251, |
|
"eval_trivia_pairs_runtime": 4.4317, |
|
"eval_trivia_pairs_samples_per_second": 28.883, |
|
"eval_trivia_pairs_steps_per_second": 0.451, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.2865208186309105, |
|
"eval_gooaq_pairs_loss": 0.697635293006897, |
|
"eval_gooaq_pairs_runtime": 0.8801, |
|
"eval_gooaq_pairs_samples_per_second": 145.443, |
|
"eval_gooaq_pairs_steps_per_second": 2.273, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.2865208186309105, |
|
"eval_paws-pos_loss": 0.02622571960091591, |
|
"eval_paws-pos_runtime": 0.6966, |
|
"eval_paws-pos_samples_per_second": 183.756, |
|
"eval_paws-pos_steps_per_second": 2.871, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.3034580098800284, |
|
"grad_norm": 2.7358224391937256, |
|
"learning_rate": 2.0482416154845496e-05, |
|
"loss": 0.7493, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.320395201129146, |
|
"grad_norm": 2.2785451412200928, |
|
"learning_rate": 2.0171028747189386e-05, |
|
"loss": 0.5139, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 2.337332392378264, |
|
"grad_norm": 2.1454882621765137, |
|
"learning_rate": 1.9864219403412882e-05, |
|
"loss": 0.6364, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 2.354269583627382, |
|
"grad_norm": 2.206393003463745, |
|
"learning_rate": 1.9562204359825967e-05, |
|
"loss": 0.4763, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 2.3712067748765, |
|
"grad_norm": 2.2492825984954834, |
|
"learning_rate": 1.92651964737614e-05, |
|
"loss": 0.583, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.3881439661256176, |
|
"grad_norm": 3.24066162109375, |
|
"learning_rate": 1.8973405073554915e-05, |
|
"loss": 0.5912, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 2.4050811573747355, |
|
"grad_norm": 2.6232211589813232, |
|
"learning_rate": 1.868703581101257e-05, |
|
"loss": 0.5936, |
|
"step": 426 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 531, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 107, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 160, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|