bobox's picture
Training in progress, step 17217, checkpoint
4859fef verified
raw
history blame contribute delete
No virus
244 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.7002823086574654,
"eval_steps": 479,
"global_step": 17217,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0075282308657465494,
"grad_norm": 87.12931823730469,
"learning_rate": 3.147218736930155e-07,
"loss": 12.3074,
"step": 48
},
{
"epoch": 0.015056461731493099,
"grad_norm": 64.15751647949219,
"learning_rate": 6.660393140945211e-07,
"loss": 15.7221,
"step": 96
},
{
"epoch": 0.02258469259723965,
"grad_norm": 81.39651489257812,
"learning_rate": 1.0173567544960265e-06,
"loss": 10.8027,
"step": 144
},
{
"epoch": 0.030112923462986198,
"grad_norm": 22.048904418945312,
"learning_rate": 1.3686741948975323e-06,
"loss": 8.9559,
"step": 192
},
{
"epoch": 0.037641154328732745,
"grad_norm": 11.542724609375,
"learning_rate": 1.7126725219573398e-06,
"loss": 8.8511,
"step": 240
},
{
"epoch": 0.0451693851944793,
"grad_norm": 10.625059127807617,
"learning_rate": 2.063989962358846e-06,
"loss": 9.3478,
"step": 288
},
{
"epoch": 0.05269761606022585,
"grad_norm": 14.21434211730957,
"learning_rate": 2.415307402760351e-06,
"loss": 8.8892,
"step": 336
},
{
"epoch": 0.060225846925972396,
"grad_norm": 13.216053009033203,
"learning_rate": 2.7666248431618565e-06,
"loss": 8.3008,
"step": 384
},
{
"epoch": 0.06775407779171895,
"grad_norm": 22.503334045410156,
"learning_rate": 3.117942283563362e-06,
"loss": 7.3455,
"step": 432
},
{
"epoch": 0.07512547051442911,
"eval_nli-pairs_loss": 6.591032028198242,
"eval_nli-pairs_runtime": 4.3469,
"eval_nli-pairs_samples_per_second": 46.01,
"eval_nli-pairs_steps_per_second": 1.15,
"eval_sts-test_pearson_cosine": 0.39488461174644296,
"eval_sts-test_pearson_dot": 0.15593446481859455,
"eval_sts-test_pearson_euclidean": 0.39975070029693277,
"eval_sts-test_pearson_manhattan": 0.4314268556737928,
"eval_sts-test_pearson_max": 0.4314268556737928,
"eval_sts-test_spearman_cosine": 0.3997824055251076,
"eval_sts-test_spearman_dot": 0.14324216739430146,
"eval_sts-test_spearman_euclidean": 0.40262274612650517,
"eval_sts-test_spearman_manhattan": 0.42925492969387746,
"eval_sts-test_spearman_max": 0.42925492969387746,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_vitaminc-pairs_loss": 6.247874736785889,
"eval_vitaminc-pairs_runtime": 1.4382,
"eval_vitaminc-pairs_samples_per_second": 115.426,
"eval_vitaminc-pairs_steps_per_second": 2.781,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_sts-label_loss": 3.371708869934082,
"eval_sts-label_runtime": 0.3925,
"eval_sts-label_samples_per_second": 509.603,
"eval_sts-label_steps_per_second": 12.74,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_qnli-contrastive_loss": 3.5311310291290283,
"eval_qnli-contrastive_runtime": 0.2814,
"eval_qnli-contrastive_samples_per_second": 710.798,
"eval_qnli-contrastive_steps_per_second": 17.77,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_scitail-pairs-qa_loss": 5.4017333984375,
"eval_scitail-pairs-qa_runtime": 1.1087,
"eval_scitail-pairs-qa_samples_per_second": 180.387,
"eval_scitail-pairs-qa_steps_per_second": 4.51,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_scitail-pairs-pos_loss": 4.29502534866333,
"eval_scitail-pairs-pos_runtime": 2.4269,
"eval_scitail-pairs-pos_samples_per_second": 82.41,
"eval_scitail-pairs-pos_steps_per_second": 2.06,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_xsum-pairs_loss": 3.540722370147705,
"eval_xsum-pairs_runtime": 1.0447,
"eval_xsum-pairs_samples_per_second": 191.444,
"eval_xsum-pairs_steps_per_second": 4.786,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_compression-pairs_loss": 2.8060033321380615,
"eval_compression-pairs_runtime": 0.2399,
"eval_compression-pairs_samples_per_second": 833.621,
"eval_compression-pairs_steps_per_second": 20.841,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_sciq_pairs_loss": 10.621454238891602,
"eval_sciq_pairs_runtime": 9.0638,
"eval_sciq_pairs_samples_per_second": 22.066,
"eval_sciq_pairs_steps_per_second": 0.552,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_qasc_pairs_loss": 7.7197771072387695,
"eval_qasc_pairs_runtime": 1.2078,
"eval_qasc_pairs_samples_per_second": 165.595,
"eval_qasc_pairs_steps_per_second": 4.14,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_openbookqa_pairs_loss": 7.620975494384766,
"eval_openbookqa_pairs_runtime": 1.053,
"eval_openbookqa_pairs_samples_per_second": 189.941,
"eval_openbookqa_pairs_steps_per_second": 4.749,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_msmarco_pairs_loss": 8.353594779968262,
"eval_msmarco_pairs_runtime": 2.5338,
"eval_msmarco_pairs_samples_per_second": 78.932,
"eval_msmarco_pairs_steps_per_second": 1.973,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_nq_pairs_loss": 7.995354652404785,
"eval_nq_pairs_runtime": 5.7107,
"eval_nq_pairs_samples_per_second": 35.022,
"eval_nq_pairs_steps_per_second": 0.876,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_trivia_pairs_loss": 8.177907943725586,
"eval_trivia_pairs_runtime": 9.1824,
"eval_trivia_pairs_samples_per_second": 21.781,
"eval_trivia_pairs_steps_per_second": 0.545,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_quora_pairs_loss": 1.1983369588851929,
"eval_quora_pairs_runtime": 0.659,
"eval_quora_pairs_samples_per_second": 303.48,
"eval_quora_pairs_steps_per_second": 7.587,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_gooaq_pairs_loss": 7.4573974609375,
"eval_gooaq_pairs_runtime": 1.5917,
"eval_gooaq_pairs_samples_per_second": 125.65,
"eval_gooaq_pairs_steps_per_second": 3.141,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_mrpc_pairs_loss": 2.1152825355529785,
"eval_mrpc_pairs_runtime": 0.241,
"eval_mrpc_pairs_samples_per_second": 829.751,
"eval_mrpc_pairs_steps_per_second": 20.744,
"step": 479
},
{
"epoch": 0.07528230865746549,
"grad_norm": 17.753456115722656,
"learning_rate": 3.469259723964868e-06,
"loss": 8.0369,
"step": 480
},
{
"epoch": 0.08281053952321205,
"grad_norm": 12.012594223022461,
"learning_rate": 3.820577164366374e-06,
"loss": 6.2732,
"step": 528
},
{
"epoch": 0.0903387703889586,
"grad_norm": 34.92698287963867,
"learning_rate": 4.1718946047678796e-06,
"loss": 7.8529,
"step": 576
},
{
"epoch": 0.09786700125470514,
"grad_norm": 23.564632415771484,
"learning_rate": 4.523212045169385e-06,
"loss": 5.8643,
"step": 624
},
{
"epoch": 0.1053952321204517,
"grad_norm": 22.126293182373047,
"learning_rate": 4.874529485570891e-06,
"loss": 6.3179,
"step": 672
},
{
"epoch": 0.11292346298619825,
"grad_norm": 25.067686080932617,
"learning_rate": 5.225846925972396e-06,
"loss": 6.1175,
"step": 720
},
{
"epoch": 0.12045169385194479,
"grad_norm": 29.170730590820312,
"learning_rate": 5.577164366373902e-06,
"loss": 5.2392,
"step": 768
},
{
"epoch": 0.12797992471769135,
"grad_norm": 29.377540588378906,
"learning_rate": 5.928481806775407e-06,
"loss": 5.8324,
"step": 816
},
{
"epoch": 0.1355081555834379,
"grad_norm": 33.512088775634766,
"learning_rate": 6.279799247176913e-06,
"loss": 5.1523,
"step": 864
},
{
"epoch": 0.14303638644918445,
"grad_norm": 32.54931640625,
"learning_rate": 6.6311166875784185e-06,
"loss": 6.0303,
"step": 912
},
{
"epoch": 0.15025094102885822,
"eval_nli-pairs_loss": 4.317643165588379,
"eval_nli-pairs_runtime": 4.0158,
"eval_nli-pairs_samples_per_second": 49.803,
"eval_nli-pairs_steps_per_second": 1.245,
"eval_sts-test_pearson_cosine": 0.6751726661173544,
"eval_sts-test_pearson_dot": 0.5308173325280101,
"eval_sts-test_pearson_euclidean": 0.6660519042507951,
"eval_sts-test_pearson_manhattan": 0.6723411683739887,
"eval_sts-test_pearson_max": 0.6751726661173544,
"eval_sts-test_spearman_cosine": 0.6458966208807124,
"eval_sts-test_spearman_dot": 0.5040208096497271,
"eval_sts-test_spearman_euclidean": 0.6447205374312966,
"eval_sts-test_spearman_manhattan": 0.6498560301461127,
"eval_sts-test_spearman_max": 0.6498560301461127,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_vitaminc-pairs_loss": 6.306981563568115,
"eval_vitaminc-pairs_runtime": 1.4737,
"eval_vitaminc-pairs_samples_per_second": 112.643,
"eval_vitaminc-pairs_steps_per_second": 2.714,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_sts-label_loss": 3.7677345275878906,
"eval_sts-label_runtime": 0.4136,
"eval_sts-label_samples_per_second": 483.59,
"eval_sts-label_steps_per_second": 12.09,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_qnli-contrastive_loss": 2.8633975982666016,
"eval_qnli-contrastive_runtime": 0.2816,
"eval_qnli-contrastive_samples_per_second": 710.123,
"eval_qnli-contrastive_steps_per_second": 17.753,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_scitail-pairs-qa_loss": 1.4301409721374512,
"eval_scitail-pairs-qa_runtime": 1.0526,
"eval_scitail-pairs-qa_samples_per_second": 190.003,
"eval_scitail-pairs-qa_steps_per_second": 4.75,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_scitail-pairs-pos_loss": 2.468087673187256,
"eval_scitail-pairs-pos_runtime": 2.3275,
"eval_scitail-pairs-pos_samples_per_second": 85.928,
"eval_scitail-pairs-pos_steps_per_second": 2.148,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_xsum-pairs_loss": 2.153658151626587,
"eval_xsum-pairs_runtime": 1.0409,
"eval_xsum-pairs_samples_per_second": 192.151,
"eval_xsum-pairs_steps_per_second": 4.804,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_compression-pairs_loss": 1.6288033723831177,
"eval_compression-pairs_runtime": 0.2383,
"eval_compression-pairs_samples_per_second": 839.253,
"eval_compression-pairs_steps_per_second": 20.981,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_sciq_pairs_loss": 9.78779411315918,
"eval_sciq_pairs_runtime": 9.0233,
"eval_sciq_pairs_samples_per_second": 22.165,
"eval_sciq_pairs_steps_per_second": 0.554,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_qasc_pairs_loss": 3.7814972400665283,
"eval_qasc_pairs_runtime": 1.2108,
"eval_qasc_pairs_samples_per_second": 165.179,
"eval_qasc_pairs_steps_per_second": 4.129,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_openbookqa_pairs_loss": 5.005772590637207,
"eval_openbookqa_pairs_runtime": 1.0415,
"eval_openbookqa_pairs_samples_per_second": 192.037,
"eval_openbookqa_pairs_steps_per_second": 4.801,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_msmarco_pairs_loss": 4.574879169464111,
"eval_msmarco_pairs_runtime": 2.527,
"eval_msmarco_pairs_samples_per_second": 79.146,
"eval_msmarco_pairs_steps_per_second": 1.979,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_nq_pairs_loss": 5.281248569488525,
"eval_nq_pairs_runtime": 5.6503,
"eval_nq_pairs_samples_per_second": 35.397,
"eval_nq_pairs_steps_per_second": 0.885,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_trivia_pairs_loss": 4.913428783416748,
"eval_trivia_pairs_runtime": 9.0564,
"eval_trivia_pairs_samples_per_second": 22.084,
"eval_trivia_pairs_steps_per_second": 0.552,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_quora_pairs_loss": 0.9212128520011902,
"eval_quora_pairs_runtime": 0.6293,
"eval_quora_pairs_samples_per_second": 317.839,
"eval_quora_pairs_steps_per_second": 7.946,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_gooaq_pairs_loss": 3.932173490524292,
"eval_gooaq_pairs_runtime": 1.6066,
"eval_gooaq_pairs_samples_per_second": 124.483,
"eval_gooaq_pairs_steps_per_second": 3.112,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_mrpc_pairs_loss": 1.0853501558303833,
"eval_mrpc_pairs_runtime": 0.2402,
"eval_mrpc_pairs_samples_per_second": 832.56,
"eval_mrpc_pairs_steps_per_second": 20.814,
"step": 958
},
{
"epoch": 0.15056461731493098,
"grad_norm": 28.37123680114746,
"learning_rate": 6.982434127979924e-06,
"loss": 5.7748,
"step": 960
},
{
"epoch": 0.15809284818067754,
"grad_norm": 26.708221435546875,
"learning_rate": 7.33375156838143e-06,
"loss": 4.8728,
"step": 1008
},
{
"epoch": 0.1656210790464241,
"grad_norm": 26.786447525024414,
"learning_rate": 7.685069008782934e-06,
"loss": 4.7375,
"step": 1056
},
{
"epoch": 0.17314930991217065,
"grad_norm": 26.215879440307617,
"learning_rate": 8.03638644918444e-06,
"loss": 4.6766,
"step": 1104
},
{
"epoch": 0.1806775407779172,
"grad_norm": 35.618831634521484,
"learning_rate": 8.387703889585947e-06,
"loss": 4.3209,
"step": 1152
},
{
"epoch": 0.18820577164366373,
"grad_norm": 37.166072845458984,
"learning_rate": 8.739021329987453e-06,
"loss": 3.7761,
"step": 1200
},
{
"epoch": 0.19573400250941028,
"grad_norm": 35.78367233276367,
"learning_rate": 9.090338770388957e-06,
"loss": 4.2161,
"step": 1248
},
{
"epoch": 0.20326223337515684,
"grad_norm": 36.299678802490234,
"learning_rate": 9.441656210790464e-06,
"loss": 4.9089,
"step": 1296
},
{
"epoch": 0.2107904642409034,
"grad_norm": 18.610933303833008,
"learning_rate": 9.792973651191968e-06,
"loss": 4.3406,
"step": 1344
},
{
"epoch": 0.21831869510664995,
"grad_norm": 9.592538833618164,
"learning_rate": 1.0144291091593475e-05,
"loss": 3.5664,
"step": 1392
},
{
"epoch": 0.22537641154328733,
"eval_nli-pairs_loss": 3.2245519161224365,
"eval_nli-pairs_runtime": 4.0436,
"eval_nli-pairs_samples_per_second": 49.461,
"eval_nli-pairs_steps_per_second": 1.237,
"eval_sts-test_pearson_cosine": 0.7002978854888552,
"eval_sts-test_pearson_dot": 0.5685392445320393,
"eval_sts-test_pearson_euclidean": 0.6963744527231541,
"eval_sts-test_pearson_manhattan": 0.7050517306003169,
"eval_sts-test_pearson_max": 0.7050517306003169,
"eval_sts-test_spearman_cosine": 0.6718756239728468,
"eval_sts-test_spearman_dot": 0.5416448961602434,
"eval_sts-test_spearman_euclidean": 0.6742379556154348,
"eval_sts-test_spearman_manhattan": 0.6824201536078427,
"eval_sts-test_spearman_max": 0.6824201536078427,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_vitaminc-pairs_loss": 6.715206623077393,
"eval_vitaminc-pairs_runtime": 1.4251,
"eval_vitaminc-pairs_samples_per_second": 116.481,
"eval_vitaminc-pairs_steps_per_second": 2.807,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_sts-label_loss": 4.016364097595215,
"eval_sts-label_runtime": 0.4049,
"eval_sts-label_samples_per_second": 493.95,
"eval_sts-label_steps_per_second": 12.349,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_qnli-contrastive_loss": 1.999517560005188,
"eval_qnli-contrastive_runtime": 0.2804,
"eval_qnli-contrastive_samples_per_second": 713.282,
"eval_qnli-contrastive_steps_per_second": 17.832,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_scitail-pairs-qa_loss": 1.0403239727020264,
"eval_scitail-pairs-qa_runtime": 1.0483,
"eval_scitail-pairs-qa_samples_per_second": 190.793,
"eval_scitail-pairs-qa_steps_per_second": 4.77,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_scitail-pairs-pos_loss": 1.9232473373413086,
"eval_scitail-pairs-pos_runtime": 2.3447,
"eval_scitail-pairs-pos_samples_per_second": 85.298,
"eval_scitail-pairs-pos_steps_per_second": 2.132,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_xsum-pairs_loss": 1.6821197271347046,
"eval_xsum-pairs_runtime": 1.0422,
"eval_xsum-pairs_samples_per_second": 191.901,
"eval_xsum-pairs_steps_per_second": 4.798,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_compression-pairs_loss": 1.1713249683380127,
"eval_compression-pairs_runtime": 0.2392,
"eval_compression-pairs_samples_per_second": 836.05,
"eval_compression-pairs_steps_per_second": 20.901,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_sciq_pairs_loss": 9.443825721740723,
"eval_sciq_pairs_runtime": 8.9916,
"eval_sciq_pairs_samples_per_second": 22.243,
"eval_sciq_pairs_steps_per_second": 0.556,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_qasc_pairs_loss": 2.9044029712677,
"eval_qasc_pairs_runtime": 1.2182,
"eval_qasc_pairs_samples_per_second": 164.182,
"eval_qasc_pairs_steps_per_second": 4.105,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_openbookqa_pairs_loss": 4.360418796539307,
"eval_openbookqa_pairs_runtime": 1.0522,
"eval_openbookqa_pairs_samples_per_second": 190.077,
"eval_openbookqa_pairs_steps_per_second": 4.752,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_msmarco_pairs_loss": 3.516049861907959,
"eval_msmarco_pairs_runtime": 2.5595,
"eval_msmarco_pairs_samples_per_second": 78.139,
"eval_msmarco_pairs_steps_per_second": 1.953,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_nq_pairs_loss": 4.016308784484863,
"eval_nq_pairs_runtime": 5.6561,
"eval_nq_pairs_samples_per_second": 35.36,
"eval_nq_pairs_steps_per_second": 0.884,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_trivia_pairs_loss": 3.781872272491455,
"eval_trivia_pairs_runtime": 9.0801,
"eval_trivia_pairs_samples_per_second": 22.026,
"eval_trivia_pairs_steps_per_second": 0.551,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_quora_pairs_loss": 0.8747495412826538,
"eval_quora_pairs_runtime": 0.6229,
"eval_quora_pairs_samples_per_second": 321.076,
"eval_quora_pairs_steps_per_second": 8.027,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_gooaq_pairs_loss": 3.0769765377044678,
"eval_gooaq_pairs_runtime": 1.552,
"eval_gooaq_pairs_samples_per_second": 128.863,
"eval_gooaq_pairs_steps_per_second": 3.222,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_mrpc_pairs_loss": 0.7370794415473938,
"eval_mrpc_pairs_runtime": 0.2401,
"eval_mrpc_pairs_samples_per_second": 832.867,
"eval_mrpc_pairs_steps_per_second": 20.822,
"step": 1437
},
{
"epoch": 0.2258469259723965,
"grad_norm": 29.73522186279297,
"learning_rate": 1.049560853199498e-05,
"loss": 4.7194,
"step": 1440
},
{
"epoch": 0.23337515683814303,
"grad_norm": 30.467117309570312,
"learning_rate": 1.0846925972396486e-05,
"loss": 3.6345,
"step": 1488
},
{
"epoch": 0.24090338770388958,
"grad_norm": 24.454021453857422,
"learning_rate": 1.1198243412797992e-05,
"loss": 3.5947,
"step": 1536
},
{
"epoch": 0.24843161856963614,
"grad_norm": 20.165475845336914,
"learning_rate": 1.1549560853199497e-05,
"loss": 4.0526,
"step": 1584
},
{
"epoch": 0.2559598494353827,
"grad_norm": 34.79319381713867,
"learning_rate": 1.1900878293601003e-05,
"loss": 3.7962,
"step": 1632
},
{
"epoch": 0.26348808030112925,
"grad_norm": 127.97925567626953,
"learning_rate": 1.2252195734002508e-05,
"loss": 4.1927,
"step": 1680
},
{
"epoch": 0.2710163111668758,
"grad_norm": 27.80243682861328,
"learning_rate": 1.2603513174404014e-05,
"loss": 3.6351,
"step": 1728
},
{
"epoch": 0.27854454203262236,
"grad_norm": 31.81105613708496,
"learning_rate": 1.295483061480552e-05,
"loss": 3.4256,
"step": 1776
},
{
"epoch": 0.2860727728983689,
"grad_norm": 32.932865142822266,
"learning_rate": 1.3306148055207025e-05,
"loss": 3.3175,
"step": 1824
},
{
"epoch": 0.2936010037641154,
"grad_norm": 31.197385787963867,
"learning_rate": 1.365746549560853e-05,
"loss": 3.4984,
"step": 1872
},
{
"epoch": 0.30050188205771644,
"eval_nli-pairs_loss": 2.852742910385132,
"eval_nli-pairs_runtime": 4.1529,
"eval_nli-pairs_samples_per_second": 48.16,
"eval_nli-pairs_steps_per_second": 1.204,
"eval_sts-test_pearson_cosine": 0.7132313507241694,
"eval_sts-test_pearson_dot": 0.559846529627866,
"eval_sts-test_pearson_euclidean": 0.7145939583366395,
"eval_sts-test_pearson_manhattan": 0.724552982808093,
"eval_sts-test_pearson_max": 0.724552982808093,
"eval_sts-test_spearman_cosine": 0.6912239915389706,
"eval_sts-test_spearman_dot": 0.5394217029355446,
"eval_sts-test_spearman_euclidean": 0.6946616748545426,
"eval_sts-test_spearman_manhattan": 0.70491424059339,
"eval_sts-test_spearman_max": 0.70491424059339,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_vitaminc-pairs_loss": 6.633151054382324,
"eval_vitaminc-pairs_runtime": 1.4454,
"eval_vitaminc-pairs_samples_per_second": 114.849,
"eval_vitaminc-pairs_steps_per_second": 2.767,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_sts-label_loss": 3.8717281818389893,
"eval_sts-label_runtime": 0.4016,
"eval_sts-label_samples_per_second": 498.049,
"eval_sts-label_steps_per_second": 12.451,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_qnli-contrastive_loss": 1.4170150756835938,
"eval_qnli-contrastive_runtime": 0.2814,
"eval_qnli-contrastive_samples_per_second": 710.85,
"eval_qnli-contrastive_steps_per_second": 17.771,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_scitail-pairs-qa_loss": 0.6900365948677063,
"eval_scitail-pairs-qa_runtime": 1.0611,
"eval_scitail-pairs-qa_samples_per_second": 188.492,
"eval_scitail-pairs-qa_steps_per_second": 4.712,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_scitail-pairs-pos_loss": 1.352358102798462,
"eval_scitail-pairs-pos_runtime": 2.3596,
"eval_scitail-pairs-pos_samples_per_second": 84.761,
"eval_scitail-pairs-pos_steps_per_second": 2.119,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_xsum-pairs_loss": 1.376610517501831,
"eval_xsum-pairs_runtime": 1.0393,
"eval_xsum-pairs_samples_per_second": 192.443,
"eval_xsum-pairs_steps_per_second": 4.811,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_compression-pairs_loss": 0.871735692024231,
"eval_compression-pairs_runtime": 0.2351,
"eval_compression-pairs_samples_per_second": 850.579,
"eval_compression-pairs_steps_per_second": 21.264,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_sciq_pairs_loss": 9.191713333129883,
"eval_sciq_pairs_runtime": 9.1572,
"eval_sciq_pairs_samples_per_second": 21.841,
"eval_sciq_pairs_steps_per_second": 0.546,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_qasc_pairs_loss": 2.369694709777832,
"eval_qasc_pairs_runtime": 1.2239,
"eval_qasc_pairs_samples_per_second": 163.415,
"eval_qasc_pairs_steps_per_second": 4.085,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_openbookqa_pairs_loss": 3.9601967334747314,
"eval_openbookqa_pairs_runtime": 1.0681,
"eval_openbookqa_pairs_samples_per_second": 187.247,
"eval_openbookqa_pairs_steps_per_second": 4.681,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_msmarco_pairs_loss": 3.0808801651000977,
"eval_msmarco_pairs_runtime": 2.5507,
"eval_msmarco_pairs_samples_per_second": 78.409,
"eval_msmarco_pairs_steps_per_second": 1.96,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_nq_pairs_loss": 3.4922549724578857,
"eval_nq_pairs_runtime": 5.7154,
"eval_nq_pairs_samples_per_second": 34.993,
"eval_nq_pairs_steps_per_second": 0.875,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_trivia_pairs_loss": 3.4910638332366943,
"eval_trivia_pairs_runtime": 9.1195,
"eval_trivia_pairs_samples_per_second": 21.931,
"eval_trivia_pairs_steps_per_second": 0.548,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_quora_pairs_loss": 0.833874523639679,
"eval_quora_pairs_runtime": 0.6419,
"eval_quora_pairs_samples_per_second": 311.554,
"eval_quora_pairs_steps_per_second": 7.789,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_gooaq_pairs_loss": 2.622526168823242,
"eval_gooaq_pairs_runtime": 1.5751,
"eval_gooaq_pairs_samples_per_second": 126.977,
"eval_gooaq_pairs_steps_per_second": 3.174,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_mrpc_pairs_loss": 0.4888114929199219,
"eval_mrpc_pairs_runtime": 0.2398,
"eval_mrpc_pairs_samples_per_second": 833.994,
"eval_mrpc_pairs_steps_per_second": 20.85,
"step": 1916
},
{
"epoch": 0.30112923462986196,
"grad_norm": 6.916851997375488,
"learning_rate": 1.4008782936010036e-05,
"loss": 2.7233,
"step": 1920
},
{
"epoch": 0.3086574654956085,
"grad_norm": 17.678085327148438,
"learning_rate": 1.4360100376411543e-05,
"loss": 3.6816,
"step": 1968
},
{
"epoch": 0.3161856963613551,
"grad_norm": 92.62138366699219,
"learning_rate": 1.4711417816813047e-05,
"loss": 3.3232,
"step": 2016
},
{
"epoch": 0.3237139272271016,
"grad_norm": 27.2542781829834,
"learning_rate": 1.5062735257214554e-05,
"loss": 3.3469,
"step": 2064
},
{
"epoch": 0.3312421580928482,
"grad_norm": 6.005978584289551,
"learning_rate": 1.5414052697616058e-05,
"loss": 3.7509,
"step": 2112
},
{
"epoch": 0.33877038895859474,
"grad_norm": 16.488624572753906,
"learning_rate": 1.5765370138017566e-05,
"loss": 3.1811,
"step": 2160
},
{
"epoch": 0.3462986198243413,
"grad_norm": 11.462204933166504,
"learning_rate": 1.611668757841907e-05,
"loss": 3.3341,
"step": 2208
},
{
"epoch": 0.35382685069008785,
"grad_norm": 33.485206604003906,
"learning_rate": 1.6468005018820577e-05,
"loss": 2.764,
"step": 2256
},
{
"epoch": 0.3613550815558344,
"grad_norm": 25.066240310668945,
"learning_rate": 1.681932245922208e-05,
"loss": 3.6488,
"step": 2304
},
{
"epoch": 0.36888331242158096,
"grad_norm": 28.305265426635742,
"learning_rate": 1.7170639899623588e-05,
"loss": 2.721,
"step": 2352
},
{
"epoch": 0.3756273525721455,
"eval_nli-pairs_loss": 2.527458667755127,
"eval_nli-pairs_runtime": 4.1153,
"eval_nli-pairs_samples_per_second": 48.599,
"eval_nli-pairs_steps_per_second": 1.215,
"eval_sts-test_pearson_cosine": 0.7258900302408404,
"eval_sts-test_pearson_dot": 0.5655223839113195,
"eval_sts-test_pearson_euclidean": 0.7228747263710285,
"eval_sts-test_pearson_manhattan": 0.732591374373909,
"eval_sts-test_pearson_max": 0.732591374373909,
"eval_sts-test_spearman_cosine": 0.707910346125958,
"eval_sts-test_spearman_dot": 0.5482635095738919,
"eval_sts-test_spearman_euclidean": 0.7064759533156177,
"eval_sts-test_spearman_manhattan": 0.7166423493246757,
"eval_sts-test_spearman_max": 0.7166423493246757,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_vitaminc-pairs_loss": 6.437549114227295,
"eval_vitaminc-pairs_runtime": 1.4278,
"eval_vitaminc-pairs_samples_per_second": 116.261,
"eval_vitaminc-pairs_steps_per_second": 2.801,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_sts-label_loss": 4.1980671882629395,
"eval_sts-label_runtime": 0.3956,
"eval_sts-label_samples_per_second": 505.555,
"eval_sts-label_steps_per_second": 12.639,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_qnli-contrastive_loss": 1.0682133436203003,
"eval_qnli-contrastive_runtime": 0.2789,
"eval_qnli-contrastive_samples_per_second": 717.152,
"eval_qnli-contrastive_steps_per_second": 17.929,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_scitail-pairs-qa_loss": 0.5046552419662476,
"eval_scitail-pairs-qa_runtime": 1.0451,
"eval_scitail-pairs-qa_samples_per_second": 191.365,
"eval_scitail-pairs-qa_steps_per_second": 4.784,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_scitail-pairs-pos_loss": 1.1998459100723267,
"eval_scitail-pairs-pos_runtime": 2.3442,
"eval_scitail-pairs-pos_samples_per_second": 85.316,
"eval_scitail-pairs-pos_steps_per_second": 2.133,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_xsum-pairs_loss": 1.1817097663879395,
"eval_xsum-pairs_runtime": 1.0372,
"eval_xsum-pairs_samples_per_second": 192.835,
"eval_xsum-pairs_steps_per_second": 4.821,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_compression-pairs_loss": 0.6974765062332153,
"eval_compression-pairs_runtime": 0.2369,
"eval_compression-pairs_samples_per_second": 844.401,
"eval_compression-pairs_steps_per_second": 21.11,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_sciq_pairs_loss": 8.970888137817383,
"eval_sciq_pairs_runtime": 9.0441,
"eval_sciq_pairs_samples_per_second": 22.114,
"eval_sciq_pairs_steps_per_second": 0.553,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_qasc_pairs_loss": 1.9235339164733887,
"eval_qasc_pairs_runtime": 1.2061,
"eval_qasc_pairs_samples_per_second": 165.828,
"eval_qasc_pairs_steps_per_second": 4.146,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_openbookqa_pairs_loss": 3.6225194931030273,
"eval_openbookqa_pairs_runtime": 1.0455,
"eval_openbookqa_pairs_samples_per_second": 191.296,
"eval_openbookqa_pairs_steps_per_second": 4.782,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_msmarco_pairs_loss": 2.664341926574707,
"eval_msmarco_pairs_runtime": 2.5305,
"eval_msmarco_pairs_samples_per_second": 79.036,
"eval_msmarco_pairs_steps_per_second": 1.976,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_nq_pairs_loss": 3.055206298828125,
"eval_nq_pairs_runtime": 5.6527,
"eval_nq_pairs_samples_per_second": 35.381,
"eval_nq_pairs_steps_per_second": 0.885,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_trivia_pairs_loss": 2.9497525691986084,
"eval_trivia_pairs_runtime": 9.0334,
"eval_trivia_pairs_samples_per_second": 22.14,
"eval_trivia_pairs_steps_per_second": 0.554,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_quora_pairs_loss": 0.7771684527397156,
"eval_quora_pairs_runtime": 0.626,
"eval_quora_pairs_samples_per_second": 319.495,
"eval_quora_pairs_steps_per_second": 7.987,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_gooaq_pairs_loss": 2.266879081726074,
"eval_gooaq_pairs_runtime": 1.5425,
"eval_gooaq_pairs_samples_per_second": 129.664,
"eval_gooaq_pairs_steps_per_second": 3.242,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_mrpc_pairs_loss": 0.36913084983825684,
"eval_mrpc_pairs_runtime": 0.2383,
"eval_mrpc_pairs_samples_per_second": 839.153,
"eval_mrpc_pairs_steps_per_second": 20.979,
"step": 2395
},
{
"epoch": 0.37641154328732745,
"grad_norm": 36.60768127441406,
"learning_rate": 1.752195734002509e-05,
"loss": 3.3609,
"step": 2400
},
{
"epoch": 0.383939774153074,
"grad_norm": 24.15782928466797,
"learning_rate": 1.7873274780426595e-05,
"loss": 2.6252,
"step": 2448
},
{
"epoch": 0.39146800501882056,
"grad_norm": 7.509932041168213,
"learning_rate": 1.8224592220828106e-05,
"loss": 3.5142,
"step": 2496
},
{
"epoch": 0.3989962358845671,
"grad_norm": 29.380950927734375,
"learning_rate": 1.857590966122961e-05,
"loss": 4.0597,
"step": 2544
},
{
"epoch": 0.4065244667503137,
"grad_norm": 28.593975067138672,
"learning_rate": 1.8927227101631114e-05,
"loss": 2.8512,
"step": 2592
},
{
"epoch": 0.41405269761606023,
"grad_norm": 21.228628158569336,
"learning_rate": 1.927854454203262e-05,
"loss": 2.717,
"step": 2640
},
{
"epoch": 0.4215809284818068,
"grad_norm": 43.00386047363281,
"learning_rate": 1.962986198243413e-05,
"loss": 3.4717,
"step": 2688
},
{
"epoch": 0.42910915934755334,
"grad_norm": 25.004785537719727,
"learning_rate": 1.9981179422835632e-05,
"loss": 3.1105,
"step": 2736
},
{
"epoch": 0.4366373902132999,
"grad_norm": 7.555154323577881,
"learning_rate": 2.0332496863237136e-05,
"loss": 2.7798,
"step": 2784
},
{
"epoch": 0.44416562107904645,
"grad_norm": 30.839733123779297,
"learning_rate": 2.0683814303638643e-05,
"loss": 3.3606,
"step": 2832
},
{
"epoch": 0.45075282308657466,
"eval_nli-pairs_loss": 2.284590721130371,
"eval_nli-pairs_runtime": 4.0714,
"eval_nli-pairs_samples_per_second": 49.123,
"eval_nli-pairs_steps_per_second": 1.228,
"eval_sts-test_pearson_cosine": 0.7382507781851606,
"eval_sts-test_pearson_dot": 0.5710221319397019,
"eval_sts-test_pearson_euclidean": 0.7307583601561211,
"eval_sts-test_pearson_manhattan": 0.7394202696141936,
"eval_sts-test_pearson_max": 0.7394202696141936,
"eval_sts-test_spearman_cosine": 0.7211579109789371,
"eval_sts-test_spearman_dot": 0.5515579746967598,
"eval_sts-test_spearman_euclidean": 0.7142073811971875,
"eval_sts-test_spearman_manhattan": 0.7240537218564107,
"eval_sts-test_spearman_max": 0.7240537218564107,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_vitaminc-pairs_loss": 6.35264253616333,
"eval_vitaminc-pairs_runtime": 1.4349,
"eval_vitaminc-pairs_samples_per_second": 115.687,
"eval_vitaminc-pairs_steps_per_second": 2.788,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_sts-label_loss": 4.186042308807373,
"eval_sts-label_runtime": 0.3983,
"eval_sts-label_samples_per_second": 502.129,
"eval_sts-label_steps_per_second": 12.553,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_qnli-contrastive_loss": 0.781445324420929,
"eval_qnli-contrastive_runtime": 0.2765,
"eval_qnli-contrastive_samples_per_second": 723.448,
"eval_qnli-contrastive_steps_per_second": 18.086,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_scitail-pairs-qa_loss": 0.4217279851436615,
"eval_scitail-pairs-qa_runtime": 1.0438,
"eval_scitail-pairs-qa_samples_per_second": 191.612,
"eval_scitail-pairs-qa_steps_per_second": 4.79,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_scitail-pairs-pos_loss": 1.051362156867981,
"eval_scitail-pairs-pos_runtime": 2.3425,
"eval_scitail-pairs-pos_samples_per_second": 85.379,
"eval_scitail-pairs-pos_steps_per_second": 2.134,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_xsum-pairs_loss": 1.0554753541946411,
"eval_xsum-pairs_runtime": 1.044,
"eval_xsum-pairs_samples_per_second": 191.573,
"eval_xsum-pairs_steps_per_second": 4.789,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_compression-pairs_loss": 0.6035106778144836,
"eval_compression-pairs_runtime": 0.241,
"eval_compression-pairs_samples_per_second": 830.038,
"eval_compression-pairs_steps_per_second": 20.751,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_sciq_pairs_loss": 8.811105728149414,
"eval_sciq_pairs_runtime": 9.0357,
"eval_sciq_pairs_samples_per_second": 22.134,
"eval_sciq_pairs_steps_per_second": 0.553,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_qasc_pairs_loss": 1.615903377532959,
"eval_qasc_pairs_runtime": 1.214,
"eval_qasc_pairs_samples_per_second": 164.746,
"eval_qasc_pairs_steps_per_second": 4.119,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_openbookqa_pairs_loss": 3.4049320220947266,
"eval_openbookqa_pairs_runtime": 1.0554,
"eval_openbookqa_pairs_samples_per_second": 189.509,
"eval_openbookqa_pairs_steps_per_second": 4.738,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_msmarco_pairs_loss": 2.3909060955047607,
"eval_msmarco_pairs_runtime": 2.5301,
"eval_msmarco_pairs_samples_per_second": 79.048,
"eval_msmarco_pairs_steps_per_second": 1.976,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_nq_pairs_loss": 2.794445753097534,
"eval_nq_pairs_runtime": 5.6752,
"eval_nq_pairs_samples_per_second": 35.241,
"eval_nq_pairs_steps_per_second": 0.881,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_trivia_pairs_loss": 2.753361701965332,
"eval_trivia_pairs_runtime": 9.0766,
"eval_trivia_pairs_samples_per_second": 22.035,
"eval_trivia_pairs_steps_per_second": 0.551,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_quora_pairs_loss": 0.205492302775383,
"eval_quora_pairs_runtime": 0.6182,
"eval_quora_pairs_samples_per_second": 323.536,
"eval_quora_pairs_steps_per_second": 8.088,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_gooaq_pairs_loss": 2.038878917694092,
"eval_gooaq_pairs_runtime": 1.5488,
"eval_gooaq_pairs_samples_per_second": 129.134,
"eval_gooaq_pairs_steps_per_second": 3.228,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_mrpc_pairs_loss": 0.3230588436126709,
"eval_mrpc_pairs_runtime": 0.2358,
"eval_mrpc_pairs_samples_per_second": 848.229,
"eval_mrpc_pairs_steps_per_second": 21.206,
"step": 2874
},
{
"epoch": 0.451693851944793,
"grad_norm": 5.271574020385742,
"learning_rate": 2.103513174404015e-05,
"loss": 2.6918,
"step": 2880
},
{
"epoch": 0.4592220828105395,
"grad_norm": 21.954103469848633,
"learning_rate": 2.1386449184441654e-05,
"loss": 2.8354,
"step": 2928
},
{
"epoch": 0.46675031367628605,
"grad_norm": 28.671293258666992,
"learning_rate": 2.173776662484316e-05,
"loss": 2.9499,
"step": 2976
},
{
"epoch": 0.4742785445420326,
"grad_norm": 26.562397003173828,
"learning_rate": 2.2089084065244666e-05,
"loss": 2.6211,
"step": 3024
},
{
"epoch": 0.48180677540777916,
"grad_norm": 48.511756896972656,
"learning_rate": 2.2440401505646173e-05,
"loss": 3.3356,
"step": 3072
},
{
"epoch": 0.4893350062735257,
"grad_norm": 46.71563720703125,
"learning_rate": 2.2791718946047677e-05,
"loss": 2.846,
"step": 3120
},
{
"epoch": 0.4968632371392723,
"grad_norm": 24.524322509765625,
"learning_rate": 2.3143036386449184e-05,
"loss": 2.4866,
"step": 3168
},
{
"epoch": 0.5043914680050188,
"grad_norm": 85.22843933105469,
"learning_rate": 2.3494353826850688e-05,
"loss": 2.6334,
"step": 3216
},
{
"epoch": 0.5119196988707654,
"grad_norm": 28.435443878173828,
"learning_rate": 2.384567126725219e-05,
"loss": 2.6118,
"step": 3264
},
{
"epoch": 0.5194479297365119,
"grad_norm": 21.590103149414062,
"learning_rate": 2.41969887076537e-05,
"loss": 2.5833,
"step": 3312
},
{
"epoch": 0.5258782936010038,
"eval_nli-pairs_loss": 2.0752949714660645,
"eval_nli-pairs_runtime": 4.0304,
"eval_nli-pairs_samples_per_second": 49.623,
"eval_nli-pairs_steps_per_second": 1.241,
"eval_sts-test_pearson_cosine": 0.7401847199967786,
"eval_sts-test_pearson_dot": 0.5441501995975192,
"eval_sts-test_pearson_euclidean": 0.7344996320188322,
"eval_sts-test_pearson_manhattan": 0.7394640598472787,
"eval_sts-test_pearson_max": 0.7401847199967786,
"eval_sts-test_spearman_cosine": 0.7300085598018916,
"eval_sts-test_spearman_dot": 0.5241747185593542,
"eval_sts-test_spearman_euclidean": 0.7194131601167465,
"eval_sts-test_spearman_manhattan": 0.726961581928453,
"eval_sts-test_spearman_max": 0.7300085598018916,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_vitaminc-pairs_loss": 6.441956996917725,
"eval_vitaminc-pairs_runtime": 1.4416,
"eval_vitaminc-pairs_samples_per_second": 115.149,
"eval_vitaminc-pairs_steps_per_second": 2.775,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_sts-label_loss": 4.200085639953613,
"eval_sts-label_runtime": 0.3949,
"eval_sts-label_samples_per_second": 506.436,
"eval_sts-label_steps_per_second": 12.661,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_qnli-contrastive_loss": 0.5195684432983398,
"eval_qnli-contrastive_runtime": 0.2809,
"eval_qnli-contrastive_samples_per_second": 712.107,
"eval_qnli-contrastive_steps_per_second": 17.803,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_scitail-pairs-qa_loss": 0.35189124941825867,
"eval_scitail-pairs-qa_runtime": 1.0578,
"eval_scitail-pairs-qa_samples_per_second": 189.064,
"eval_scitail-pairs-qa_steps_per_second": 4.727,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_scitail-pairs-pos_loss": 0.8873756527900696,
"eval_scitail-pairs-pos_runtime": 2.4029,
"eval_scitail-pairs-pos_samples_per_second": 83.232,
"eval_scitail-pairs-pos_steps_per_second": 2.081,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_xsum-pairs_loss": 0.939339280128479,
"eval_xsum-pairs_runtime": 1.041,
"eval_xsum-pairs_samples_per_second": 192.121,
"eval_xsum-pairs_steps_per_second": 4.803,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_compression-pairs_loss": 0.5007131695747375,
"eval_compression-pairs_runtime": 0.2338,
"eval_compression-pairs_samples_per_second": 855.479,
"eval_compression-pairs_steps_per_second": 21.387,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_sciq_pairs_loss": 8.558987617492676,
"eval_sciq_pairs_runtime": 9.0984,
"eval_sciq_pairs_samples_per_second": 21.982,
"eval_sciq_pairs_steps_per_second": 0.55,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_qasc_pairs_loss": 1.4318852424621582,
"eval_qasc_pairs_runtime": 1.2286,
"eval_qasc_pairs_samples_per_second": 162.79,
"eval_qasc_pairs_steps_per_second": 4.07,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_openbookqa_pairs_loss": 3.1973114013671875,
"eval_openbookqa_pairs_runtime": 1.0491,
"eval_openbookqa_pairs_samples_per_second": 190.633,
"eval_openbookqa_pairs_steps_per_second": 4.766,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_msmarco_pairs_loss": 2.2080254554748535,
"eval_msmarco_pairs_runtime": 2.5223,
"eval_msmarco_pairs_samples_per_second": 79.294,
"eval_msmarco_pairs_steps_per_second": 1.982,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_nq_pairs_loss": 2.5810558795928955,
"eval_nq_pairs_runtime": 5.6341,
"eval_nq_pairs_samples_per_second": 35.498,
"eval_nq_pairs_steps_per_second": 0.887,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_trivia_pairs_loss": 2.655771255493164,
"eval_trivia_pairs_runtime": 9.0716,
"eval_trivia_pairs_samples_per_second": 22.047,
"eval_trivia_pairs_steps_per_second": 0.551,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_quora_pairs_loss": 0.5028819441795349,
"eval_quora_pairs_runtime": 0.6144,
"eval_quora_pairs_samples_per_second": 325.522,
"eval_quora_pairs_steps_per_second": 8.138,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_gooaq_pairs_loss": 1.8867437839508057,
"eval_gooaq_pairs_runtime": 1.5505,
"eval_gooaq_pairs_samples_per_second": 128.994,
"eval_gooaq_pairs_steps_per_second": 3.225,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_mrpc_pairs_loss": 0.2580638825893402,
"eval_mrpc_pairs_runtime": 0.2364,
"eval_mrpc_pairs_samples_per_second": 846.008,
"eval_mrpc_pairs_steps_per_second": 21.15,
"step": 3353
},
{
"epoch": 0.5269761606022585,
"grad_norm": 2.6962711811065674,
"learning_rate": 2.4548306148055206e-05,
"loss": 2.3251,
"step": 3360
},
{
"epoch": 0.534504391468005,
"grad_norm": 35.47948455810547,
"learning_rate": 2.489962358845671e-05,
"loss": 2.8494,
"step": 3408
},
{
"epoch": 0.5420326223337516,
"grad_norm": 8.13453483581543,
"learning_rate": 2.5250941028858214e-05,
"loss": 2.4009,
"step": 3456
},
{
"epoch": 0.5495608531994981,
"grad_norm": 20.041057586669922,
"learning_rate": 2.560225846925972e-05,
"loss": 2.5952,
"step": 3504
},
{
"epoch": 0.5570890840652447,
"grad_norm": 23.942073822021484,
"learning_rate": 2.595357590966123e-05,
"loss": 2.2798,
"step": 3552
},
{
"epoch": 0.5646173149309912,
"grad_norm": 17.675006866455078,
"learning_rate": 2.6304893350062732e-05,
"loss": 2.308,
"step": 3600
},
{
"epoch": 0.5721455457967378,
"grad_norm": 24.20000457763672,
"learning_rate": 2.6656210790464236e-05,
"loss": 2.122,
"step": 3648
},
{
"epoch": 0.5796737766624843,
"grad_norm": 30.06256866455078,
"learning_rate": 2.7007528230865747e-05,
"loss": 2.7901,
"step": 3696
},
{
"epoch": 0.5872020075282308,
"grad_norm": 22.547115325927734,
"learning_rate": 2.735884567126725e-05,
"loss": 2.0671,
"step": 3744
},
{
"epoch": 0.5947302383939774,
"grad_norm": 34.11716079711914,
"learning_rate": 2.7710163111668754e-05,
"loss": 2.366,
"step": 3792
},
{
"epoch": 0.6010037641154329,
"eval_nli-pairs_loss": 1.8900150060653687,
"eval_nli-pairs_runtime": 4.0481,
"eval_nli-pairs_samples_per_second": 49.406,
"eval_nli-pairs_steps_per_second": 1.235,
"eval_sts-test_pearson_cosine": 0.752143976340549,
"eval_sts-test_pearson_dot": 0.5694102087200895,
"eval_sts-test_pearson_euclidean": 0.7457585181878474,
"eval_sts-test_pearson_manhattan": 0.7525316002813096,
"eval_sts-test_pearson_max": 0.7525316002813096,
"eval_sts-test_spearman_cosine": 0.7404216272264129,
"eval_sts-test_spearman_dot": 0.5485789739808921,
"eval_sts-test_spearman_euclidean": 0.728675089641457,
"eval_sts-test_spearman_manhattan": 0.7367562035227414,
"eval_sts-test_spearman_max": 0.7404216272264129,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_vitaminc-pairs_loss": 6.0831098556518555,
"eval_vitaminc-pairs_runtime": 1.4528,
"eval_vitaminc-pairs_samples_per_second": 114.264,
"eval_vitaminc-pairs_steps_per_second": 2.753,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_sts-label_loss": 4.197264671325684,
"eval_sts-label_runtime": 0.4176,
"eval_sts-label_samples_per_second": 478.893,
"eval_sts-label_steps_per_second": 11.972,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_qnli-contrastive_loss": 0.5115653872489929,
"eval_qnli-contrastive_runtime": 0.3027,
"eval_qnli-contrastive_samples_per_second": 660.784,
"eval_qnli-contrastive_steps_per_second": 16.52,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_scitail-pairs-qa_loss": 0.29788386821746826,
"eval_scitail-pairs-qa_runtime": 1.0654,
"eval_scitail-pairs-qa_samples_per_second": 187.719,
"eval_scitail-pairs-qa_steps_per_second": 4.693,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_scitail-pairs-pos_loss": 0.8727617859840393,
"eval_scitail-pairs-pos_runtime": 2.3677,
"eval_scitail-pairs-pos_samples_per_second": 84.471,
"eval_scitail-pairs-pos_steps_per_second": 2.112,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_xsum-pairs_loss": 0.8608022928237915,
"eval_xsum-pairs_runtime": 1.0435,
"eval_xsum-pairs_samples_per_second": 191.671,
"eval_xsum-pairs_steps_per_second": 4.792,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_compression-pairs_loss": 0.4411359429359436,
"eval_compression-pairs_runtime": 0.2345,
"eval_compression-pairs_samples_per_second": 852.821,
"eval_compression-pairs_steps_per_second": 21.321,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_sciq_pairs_loss": 8.294719696044922,
"eval_sciq_pairs_runtime": 9.2141,
"eval_sciq_pairs_samples_per_second": 21.706,
"eval_sciq_pairs_steps_per_second": 0.543,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_qasc_pairs_loss": 1.1894803047180176,
"eval_qasc_pairs_runtime": 1.2518,
"eval_qasc_pairs_samples_per_second": 159.774,
"eval_qasc_pairs_steps_per_second": 3.994,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_openbookqa_pairs_loss": 2.8579885959625244,
"eval_openbookqa_pairs_runtime": 1.0874,
"eval_openbookqa_pairs_samples_per_second": 183.92,
"eval_openbookqa_pairs_steps_per_second": 4.598,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_msmarco_pairs_loss": 1.9733755588531494,
"eval_msmarco_pairs_runtime": 2.5486,
"eval_msmarco_pairs_samples_per_second": 78.476,
"eval_msmarco_pairs_steps_per_second": 1.962,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_nq_pairs_loss": 2.206907033920288,
"eval_nq_pairs_runtime": 5.7528,
"eval_nq_pairs_samples_per_second": 34.766,
"eval_nq_pairs_steps_per_second": 0.869,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_trivia_pairs_loss": 2.332620620727539,
"eval_trivia_pairs_runtime": 9.1703,
"eval_trivia_pairs_samples_per_second": 21.809,
"eval_trivia_pairs_steps_per_second": 0.545,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_quora_pairs_loss": 0.48870089650154114,
"eval_quora_pairs_runtime": 0.6491,
"eval_quora_pairs_samples_per_second": 308.142,
"eval_quora_pairs_steps_per_second": 7.704,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_gooaq_pairs_loss": 1.598087191581726,
"eval_gooaq_pairs_runtime": 1.5759,
"eval_gooaq_pairs_samples_per_second": 126.912,
"eval_gooaq_pairs_steps_per_second": 3.173,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_mrpc_pairs_loss": 0.2343733161687851,
"eval_mrpc_pairs_runtime": 0.2484,
"eval_mrpc_pairs_samples_per_second": 805.097,
"eval_mrpc_pairs_steps_per_second": 20.127,
"step": 3832
},
{
"epoch": 0.6022584692597239,
"grad_norm": 1.486786127090454,
"learning_rate": 2.806148055207026e-05,
"loss": 1.9614,
"step": 3840
},
{
"epoch": 0.6097867001254705,
"grad_norm": 23.297300338745117,
"learning_rate": 2.841279799247177e-05,
"loss": 2.3589,
"step": 3888
},
{
"epoch": 0.617314930991217,
"grad_norm": 16.00516700744629,
"learning_rate": 2.8764115432873273e-05,
"loss": 2.1475,
"step": 3936
},
{
"epoch": 0.6248431618569636,
"grad_norm": 24.357616424560547,
"learning_rate": 2.9115432873274777e-05,
"loss": 2.1312,
"step": 3984
},
{
"epoch": 0.6323713927227101,
"grad_norm": 28.798917770385742,
"learning_rate": 2.946675031367628e-05,
"loss": 2.5716,
"step": 4032
},
{
"epoch": 0.6398996235884568,
"grad_norm": 18.239490509033203,
"learning_rate": 2.981806775407779e-05,
"loss": 2.2249,
"step": 4080
},
{
"epoch": 0.6474278544542033,
"grad_norm": 19.50409507751465,
"learning_rate": 3.0169385194479295e-05,
"loss": 2.6331,
"step": 4128
},
{
"epoch": 0.6549560853199499,
"grad_norm": 12.110575675964355,
"learning_rate": 3.05207026348808e-05,
"loss": 2.7637,
"step": 4176
},
{
"epoch": 0.6624843161856964,
"grad_norm": 6.904999256134033,
"learning_rate": 3.087202007528231e-05,
"loss": 1.8973,
"step": 4224
},
{
"epoch": 0.6700125470514429,
"grad_norm": 9.007365226745605,
"learning_rate": 3.1223337515683813e-05,
"loss": 2.3181,
"step": 4272
},
{
"epoch": 0.676129234629862,
"eval_nli-pairs_loss": 1.7111084461212158,
"eval_nli-pairs_runtime": 4.0305,
"eval_nli-pairs_samples_per_second": 49.622,
"eval_nli-pairs_steps_per_second": 1.241,
"eval_sts-test_pearson_cosine": 0.7375865838793885,
"eval_sts-test_pearson_dot": 0.5355907015359193,
"eval_sts-test_pearson_euclidean": 0.7266850031847317,
"eval_sts-test_pearson_manhattan": 0.7357621558005936,
"eval_sts-test_pearson_max": 0.7375865838793885,
"eval_sts-test_spearman_cosine": 0.7273524041973777,
"eval_sts-test_spearman_dot": 0.5084902224306463,
"eval_sts-test_spearman_euclidean": 0.7071419579928555,
"eval_sts-test_spearman_manhattan": 0.7177664681655631,
"eval_sts-test_spearman_max": 0.7273524041973777,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_vitaminc-pairs_loss": 6.216845989227295,
"eval_vitaminc-pairs_runtime": 1.4703,
"eval_vitaminc-pairs_samples_per_second": 112.902,
"eval_vitaminc-pairs_steps_per_second": 2.721,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_sts-label_loss": 4.384557723999023,
"eval_sts-label_runtime": 0.3912,
"eval_sts-label_samples_per_second": 511.22,
"eval_sts-label_steps_per_second": 12.78,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_qnli-contrastive_loss": 0.40437957644462585,
"eval_qnli-contrastive_runtime": 0.281,
"eval_qnli-contrastive_samples_per_second": 711.864,
"eval_qnli-contrastive_steps_per_second": 17.797,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_scitail-pairs-qa_loss": 0.2210184931755066,
"eval_scitail-pairs-qa_runtime": 1.0575,
"eval_scitail-pairs-qa_samples_per_second": 189.117,
"eval_scitail-pairs-qa_steps_per_second": 4.728,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_scitail-pairs-pos_loss": 0.9065079689025879,
"eval_scitail-pairs-pos_runtime": 2.3488,
"eval_scitail-pairs-pos_samples_per_second": 85.151,
"eval_scitail-pairs-pos_steps_per_second": 2.129,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_xsum-pairs_loss": 0.8169436454772949,
"eval_xsum-pairs_runtime": 1.0409,
"eval_xsum-pairs_samples_per_second": 192.145,
"eval_xsum-pairs_steps_per_second": 4.804,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_compression-pairs_loss": 0.391815721988678,
"eval_compression-pairs_runtime": 0.2361,
"eval_compression-pairs_samples_per_second": 847.066,
"eval_compression-pairs_steps_per_second": 21.177,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_sciq_pairs_loss": 0.6230970025062561,
"eval_sciq_pairs_runtime": 9.0874,
"eval_sciq_pairs_samples_per_second": 22.008,
"eval_sciq_pairs_steps_per_second": 0.55,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_qasc_pairs_loss": 1.1559942960739136,
"eval_qasc_pairs_runtime": 1.2507,
"eval_qasc_pairs_samples_per_second": 159.907,
"eval_qasc_pairs_steps_per_second": 3.998,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_openbookqa_pairs_loss": 2.8303356170654297,
"eval_openbookqa_pairs_runtime": 1.0524,
"eval_openbookqa_pairs_samples_per_second": 190.041,
"eval_openbookqa_pairs_steps_per_second": 4.751,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_msmarco_pairs_loss": 1.9672399759292603,
"eval_msmarco_pairs_runtime": 2.518,
"eval_msmarco_pairs_samples_per_second": 79.428,
"eval_msmarco_pairs_steps_per_second": 1.986,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_nq_pairs_loss": 2.169950008392334,
"eval_nq_pairs_runtime": 5.6541,
"eval_nq_pairs_samples_per_second": 35.372,
"eval_nq_pairs_steps_per_second": 0.884,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_trivia_pairs_loss": 2.198312520980835,
"eval_trivia_pairs_runtime": 9.0535,
"eval_trivia_pairs_samples_per_second": 22.091,
"eval_trivia_pairs_steps_per_second": 0.552,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_quora_pairs_loss": 0.3780948519706726,
"eval_quora_pairs_runtime": 0.6375,
"eval_quora_pairs_samples_per_second": 313.737,
"eval_quora_pairs_steps_per_second": 7.843,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_gooaq_pairs_loss": 1.5646275281906128,
"eval_gooaq_pairs_runtime": 1.5531,
"eval_gooaq_pairs_samples_per_second": 128.772,
"eval_gooaq_pairs_steps_per_second": 3.219,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_mrpc_pairs_loss": 0.18545588850975037,
"eval_mrpc_pairs_runtime": 0.2382,
"eval_mrpc_pairs_samples_per_second": 839.569,
"eval_mrpc_pairs_steps_per_second": 20.989,
"step": 4311
},
{
"epoch": 0.6775407779171895,
"grad_norm": 15.639892578125,
"learning_rate": 3.157465495608532e-05,
"loss": 2.2329,
"step": 4320
},
{
"epoch": 0.685069008782936,
"grad_norm": 7.185269832611084,
"learning_rate": 3.192597239648682e-05,
"loss": 2.7864,
"step": 4368
},
{
"epoch": 0.6925972396486826,
"grad_norm": 10.165898323059082,
"learning_rate": 3.227728983688833e-05,
"loss": 2.5277,
"step": 4416
},
{
"epoch": 0.7001254705144291,
"grad_norm": 20.132612228393555,
"learning_rate": 3.2628607277289836e-05,
"loss": 2.526,
"step": 4464
},
{
"epoch": 0.7076537013801757,
"grad_norm": 6.147126197814941,
"learning_rate": 3.297992471769134e-05,
"loss": 1.5993,
"step": 4512
},
{
"epoch": 0.7151819322459222,
"grad_norm": 9.857342720031738,
"learning_rate": 3.3331242158092843e-05,
"loss": 2.0452,
"step": 4560
},
{
"epoch": 0.7227101631116688,
"grad_norm": 22.434364318847656,
"learning_rate": 3.3682559598494354e-05,
"loss": 2.0458,
"step": 4608
},
{
"epoch": 0.7302383939774153,
"grad_norm": 22.420066833496094,
"learning_rate": 3.403387703889586e-05,
"loss": 1.854,
"step": 4656
},
{
"epoch": 0.7377666248431619,
"grad_norm": 24.213205337524414,
"learning_rate": 3.438519447929736e-05,
"loss": 2.31,
"step": 4704
},
{
"epoch": 0.7452948557089084,
"grad_norm": 2.1184492111206055,
"learning_rate": 3.473651191969887e-05,
"loss": 1.7484,
"step": 4752
},
{
"epoch": 0.751254705144291,
"eval_nli-pairs_loss": 1.5204579830169678,
"eval_nli-pairs_runtime": 4.3227,
"eval_nli-pairs_samples_per_second": 46.267,
"eval_nli-pairs_steps_per_second": 1.157,
"eval_sts-test_pearson_cosine": 0.753550468294361,
"eval_sts-test_pearson_dot": 0.576164453162354,
"eval_sts-test_pearson_euclidean": 0.7433413992355353,
"eval_sts-test_pearson_manhattan": 0.7483177470711824,
"eval_sts-test_pearson_max": 0.753550468294361,
"eval_sts-test_spearman_cosine": 0.7510075785449373,
"eval_sts-test_spearman_dot": 0.5438417987754244,
"eval_sts-test_spearman_euclidean": 0.7271758422639625,
"eval_sts-test_spearman_manhattan": 0.7334567781451864,
"eval_sts-test_spearman_max": 0.7510075785449373,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_vitaminc-pairs_loss": 5.992164134979248,
"eval_vitaminc-pairs_runtime": 1.4716,
"eval_vitaminc-pairs_samples_per_second": 112.802,
"eval_vitaminc-pairs_steps_per_second": 2.718,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_sts-label_loss": 4.175446033477783,
"eval_sts-label_runtime": 0.3991,
"eval_sts-label_samples_per_second": 501.105,
"eval_sts-label_steps_per_second": 12.528,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_qnli-contrastive_loss": 0.4543713629245758,
"eval_qnli-contrastive_runtime": 0.2765,
"eval_qnli-contrastive_samples_per_second": 723.231,
"eval_qnli-contrastive_steps_per_second": 18.081,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_scitail-pairs-qa_loss": 0.20784999430179596,
"eval_scitail-pairs-qa_runtime": 1.0567,
"eval_scitail-pairs-qa_samples_per_second": 189.26,
"eval_scitail-pairs-qa_steps_per_second": 4.732,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_scitail-pairs-pos_loss": 0.8679056167602539,
"eval_scitail-pairs-pos_runtime": 2.3456,
"eval_scitail-pairs-pos_samples_per_second": 85.268,
"eval_scitail-pairs-pos_steps_per_second": 2.132,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_xsum-pairs_loss": 0.6944636702537537,
"eval_xsum-pairs_runtime": 1.0379,
"eval_xsum-pairs_samples_per_second": 192.694,
"eval_xsum-pairs_steps_per_second": 4.817,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_compression-pairs_loss": 0.34194332361221313,
"eval_compression-pairs_runtime": 0.2354,
"eval_compression-pairs_samples_per_second": 849.798,
"eval_compression-pairs_steps_per_second": 21.245,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_sciq_pairs_loss": 7.352969646453857,
"eval_sciq_pairs_runtime": 9.0502,
"eval_sciq_pairs_samples_per_second": 22.099,
"eval_sciq_pairs_steps_per_second": 0.552,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_qasc_pairs_loss": 0.9425787329673767,
"eval_qasc_pairs_runtime": 1.2081,
"eval_qasc_pairs_samples_per_second": 165.543,
"eval_qasc_pairs_steps_per_second": 4.139,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_openbookqa_pairs_loss": 2.5082011222839355,
"eval_openbookqa_pairs_runtime": 1.0492,
"eval_openbookqa_pairs_samples_per_second": 190.618,
"eval_openbookqa_pairs_steps_per_second": 4.765,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_msmarco_pairs_loss": 1.696744441986084,
"eval_msmarco_pairs_runtime": 2.5162,
"eval_msmarco_pairs_samples_per_second": 79.484,
"eval_msmarco_pairs_steps_per_second": 1.987,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_nq_pairs_loss": 1.8095602989196777,
"eval_nq_pairs_runtime": 5.6348,
"eval_nq_pairs_samples_per_second": 35.494,
"eval_nq_pairs_steps_per_second": 0.887,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_trivia_pairs_loss": 2.0272486209869385,
"eval_trivia_pairs_runtime": 9.0671,
"eval_trivia_pairs_samples_per_second": 22.058,
"eval_trivia_pairs_steps_per_second": 0.551,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_quora_pairs_loss": 0.15705542266368866,
"eval_quora_pairs_runtime": 0.6326,
"eval_quora_pairs_samples_per_second": 316.138,
"eval_quora_pairs_steps_per_second": 7.903,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_gooaq_pairs_loss": 1.374332308769226,
"eval_gooaq_pairs_runtime": 1.5484,
"eval_gooaq_pairs_samples_per_second": 129.164,
"eval_gooaq_pairs_steps_per_second": 3.229,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_mrpc_pairs_loss": 0.17204828560352325,
"eval_mrpc_pairs_runtime": 0.2358,
"eval_mrpc_pairs_samples_per_second": 848.332,
"eval_mrpc_pairs_steps_per_second": 21.208,
"step": 4790
},
{
"epoch": 0.7528230865746549,
"grad_norm": 1.4021190404891968,
"learning_rate": 3.4999758305020584e-05,
"loss": 1.7113,
"step": 4800
},
{
"epoch": 0.7603513174404015,
"grad_norm": 19.776817321777344,
"learning_rate": 3.499395795931671e-05,
"loss": 2.3696,
"step": 4848
},
{
"epoch": 0.767879548306148,
"grad_norm": 28.693845748901367,
"learning_rate": 3.49804263115427e-05,
"loss": 2.2947,
"step": 4896
},
{
"epoch": 0.7754077791718946,
"grad_norm": 1.3631008863449097,
"learning_rate": 3.495916934189221e-05,
"loss": 1.8841,
"step": 4944
},
{
"epoch": 0.7829360100376411,
"grad_norm": 40.640262603759766,
"learning_rate": 3.4930196444697477e-05,
"loss": 2.084,
"step": 4992
},
{
"epoch": 0.7904642409033877,
"grad_norm": 20.45759391784668,
"learning_rate": 3.489352042427762e-05,
"loss": 1.9297,
"step": 5040
},
{
"epoch": 0.7979924717691342,
"grad_norm": 24.276058197021484,
"learning_rate": 3.484915748927982e-05,
"loss": 2.0521,
"step": 5088
},
{
"epoch": 0.8055207026348808,
"grad_norm": 24.93791389465332,
"learning_rate": 3.4797127245516105e-05,
"loss": 1.7092,
"step": 5136
},
{
"epoch": 0.8130489335006273,
"grad_norm": 25.131153106689453,
"learning_rate": 3.4737452687298694e-05,
"loss": 1.7394,
"step": 5184
},
{
"epoch": 0.820577164366374,
"grad_norm": 3.779459238052368,
"learning_rate": 3.467016018727788e-05,
"loss": 2.567,
"step": 5232
},
{
"epoch": 0.8263801756587202,
"eval_nli-pairs_loss": 1.4584167003631592,
"eval_nli-pairs_runtime": 3.9955,
"eval_nli-pairs_samples_per_second": 50.056,
"eval_nli-pairs_steps_per_second": 1.251,
"eval_sts-test_pearson_cosine": 0.7484577894142428,
"eval_sts-test_pearson_dot": 0.5289676422936789,
"eval_sts-test_pearson_euclidean": 0.743677607180833,
"eval_sts-test_pearson_manhattan": 0.7474581577502462,
"eval_sts-test_pearson_max": 0.7484577894142428,
"eval_sts-test_spearman_cosine": 0.7507798204197761,
"eval_sts-test_spearman_dot": 0.5016451185199292,
"eval_sts-test_spearman_euclidean": 0.7307379850546868,
"eval_sts-test_spearman_manhattan": 0.7367432097081014,
"eval_sts-test_spearman_max": 0.7507798204197761,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_vitaminc-pairs_loss": 5.56383752822876,
"eval_vitaminc-pairs_runtime": 1.4339,
"eval_vitaminc-pairs_samples_per_second": 115.772,
"eval_vitaminc-pairs_steps_per_second": 2.79,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_sts-label_loss": 4.355674743652344,
"eval_sts-label_runtime": 0.401,
"eval_sts-label_samples_per_second": 498.723,
"eval_sts-label_steps_per_second": 12.468,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_qnli-contrastive_loss": 0.2912294566631317,
"eval_qnli-contrastive_runtime": 0.2801,
"eval_qnli-contrastive_samples_per_second": 714.044,
"eval_qnli-contrastive_steps_per_second": 17.851,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_scitail-pairs-qa_loss": 0.19145721197128296,
"eval_scitail-pairs-qa_runtime": 1.0728,
"eval_scitail-pairs-qa_samples_per_second": 186.429,
"eval_scitail-pairs-qa_steps_per_second": 4.661,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_scitail-pairs-pos_loss": 0.7433645725250244,
"eval_scitail-pairs-pos_runtime": 2.3785,
"eval_scitail-pairs-pos_samples_per_second": 84.086,
"eval_scitail-pairs-pos_steps_per_second": 2.102,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_xsum-pairs_loss": 0.6145637631416321,
"eval_xsum-pairs_runtime": 1.0393,
"eval_xsum-pairs_samples_per_second": 192.428,
"eval_xsum-pairs_steps_per_second": 4.811,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_compression-pairs_loss": 0.29557526111602783,
"eval_compression-pairs_runtime": 0.2372,
"eval_compression-pairs_samples_per_second": 843.266,
"eval_compression-pairs_steps_per_second": 21.082,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_sciq_pairs_loss": 0.5607883930206299,
"eval_sciq_pairs_runtime": 9.0381,
"eval_sciq_pairs_samples_per_second": 22.128,
"eval_sciq_pairs_steps_per_second": 0.553,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_qasc_pairs_loss": 0.8776007294654846,
"eval_qasc_pairs_runtime": 1.2111,
"eval_qasc_pairs_samples_per_second": 165.141,
"eval_qasc_pairs_steps_per_second": 4.129,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_openbookqa_pairs_loss": 2.414658784866333,
"eval_openbookqa_pairs_runtime": 1.052,
"eval_openbookqa_pairs_samples_per_second": 190.106,
"eval_openbookqa_pairs_steps_per_second": 4.753,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_msmarco_pairs_loss": 1.615893840789795,
"eval_msmarco_pairs_runtime": 2.5183,
"eval_msmarco_pairs_samples_per_second": 79.419,
"eval_msmarco_pairs_steps_per_second": 1.985,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_nq_pairs_loss": 1.5788501501083374,
"eval_nq_pairs_runtime": 5.6429,
"eval_nq_pairs_samples_per_second": 35.443,
"eval_nq_pairs_steps_per_second": 0.886,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_trivia_pairs_loss": 1.8346160650253296,
"eval_trivia_pairs_runtime": 9.0522,
"eval_trivia_pairs_samples_per_second": 22.094,
"eval_trivia_pairs_steps_per_second": 0.552,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_quora_pairs_loss": 0.23968417942523956,
"eval_quora_pairs_runtime": 0.6179,
"eval_quora_pairs_samples_per_second": 323.693,
"eval_quora_pairs_steps_per_second": 8.092,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_gooaq_pairs_loss": 1.3264899253845215,
"eval_gooaq_pairs_runtime": 1.549,
"eval_gooaq_pairs_samples_per_second": 129.112,
"eval_gooaq_pairs_steps_per_second": 3.228,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_mrpc_pairs_loss": 0.14705294370651245,
"eval_mrpc_pairs_runtime": 0.2494,
"eval_mrpc_pairs_samples_per_second": 801.787,
"eval_mrpc_pairs_steps_per_second": 20.045,
"step": 5269
},
{
"epoch": 0.8281053952321205,
"grad_norm": 17.636714935302734,
"learning_rate": 3.459527948478686e-05,
"loss": 2.3021,
"step": 5280
},
{
"epoch": 0.835633626097867,
"grad_norm": 23.402650833129883,
"learning_rate": 3.4512843672698696e-05,
"loss": 1.5502,
"step": 5328
},
{
"epoch": 0.8431618569636136,
"grad_norm": 13.210539817810059,
"learning_rate": 3.4422889182801225e-05,
"loss": 1.7324,
"step": 5376
},
{
"epoch": 0.8506900878293601,
"grad_norm": 14.795612335205078,
"learning_rate": 3.4325455769696324e-05,
"loss": 1.8119,
"step": 5424
},
{
"epoch": 0.8582183186951067,
"grad_norm": 14.047534942626953,
"learning_rate": 3.422058649323072e-05,
"loss": 1.8507,
"step": 5472
},
{
"epoch": 0.8657465495608532,
"grad_norm": 0.7366377711296082,
"learning_rate": 3.4108327699466066e-05,
"loss": 1.7362,
"step": 5520
},
{
"epoch": 0.8732747804265998,
"grad_norm": 16.555519104003906,
"learning_rate": 3.398872900019673e-05,
"loss": 2.082,
"step": 5568
},
{
"epoch": 0.8808030112923463,
"grad_norm": 16.52071189880371,
"learning_rate": 3.386184325102423e-05,
"loss": 2.1483,
"step": 5616
},
{
"epoch": 0.8883312421580929,
"grad_norm": 16.03848648071289,
"learning_rate": 3.372772652799824e-05,
"loss": 1.3961,
"step": 5664
},
{
"epoch": 0.8958594730238394,
"grad_norm": 15.494946479797363,
"learning_rate": 3.358643810283421e-05,
"loss": 1.6331,
"step": 5712
},
{
"epoch": 0.9015056461731493,
"eval_nli-pairs_loss": 1.4454108476638794,
"eval_nli-pairs_runtime": 4.0041,
"eval_nli-pairs_samples_per_second": 49.949,
"eval_nli-pairs_steps_per_second": 1.249,
"eval_sts-test_pearson_cosine": 0.7644735043371528,
"eval_sts-test_pearson_dot": 0.5461512421131185,
"eval_sts-test_pearson_euclidean": 0.7520132891230207,
"eval_sts-test_pearson_manhattan": 0.7535418655995262,
"eval_sts-test_pearson_max": 0.7644735043371528,
"eval_sts-test_spearman_cosine": 0.7630644782411757,
"eval_sts-test_spearman_dot": 0.5239487411838791,
"eval_sts-test_spearman_euclidean": 0.7392793315112096,
"eval_sts-test_spearman_manhattan": 0.7426354353655322,
"eval_sts-test_spearman_max": 0.7630644782411757,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_vitaminc-pairs_loss": 5.328937530517578,
"eval_vitaminc-pairs_runtime": 1.4411,
"eval_vitaminc-pairs_samples_per_second": 115.19,
"eval_vitaminc-pairs_steps_per_second": 2.776,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_sts-label_loss": 4.186919212341309,
"eval_sts-label_runtime": 0.4046,
"eval_sts-label_samples_per_second": 494.357,
"eval_sts-label_steps_per_second": 12.359,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_qnli-contrastive_loss": 0.35569697618484497,
"eval_qnli-contrastive_runtime": 0.2807,
"eval_qnli-contrastive_samples_per_second": 712.531,
"eval_qnli-contrastive_steps_per_second": 17.813,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_scitail-pairs-qa_loss": 0.18789875507354736,
"eval_scitail-pairs-qa_runtime": 1.1135,
"eval_scitail-pairs-qa_samples_per_second": 179.622,
"eval_scitail-pairs-qa_steps_per_second": 4.491,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_scitail-pairs-pos_loss": 0.715129017829895,
"eval_scitail-pairs-pos_runtime": 2.3885,
"eval_scitail-pairs-pos_samples_per_second": 83.736,
"eval_scitail-pairs-pos_steps_per_second": 2.093,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_xsum-pairs_loss": 0.624691367149353,
"eval_xsum-pairs_runtime": 1.0408,
"eval_xsum-pairs_samples_per_second": 192.169,
"eval_xsum-pairs_steps_per_second": 4.804,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_compression-pairs_loss": 0.30371707677841187,
"eval_compression-pairs_runtime": 0.2581,
"eval_compression-pairs_samples_per_second": 774.8,
"eval_compression-pairs_steps_per_second": 19.37,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_sciq_pairs_loss": 0.5236299633979797,
"eval_sciq_pairs_runtime": 9.205,
"eval_sciq_pairs_samples_per_second": 21.727,
"eval_sciq_pairs_steps_per_second": 0.543,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_qasc_pairs_loss": 0.8543006777763367,
"eval_qasc_pairs_runtime": 1.238,
"eval_qasc_pairs_samples_per_second": 161.556,
"eval_qasc_pairs_steps_per_second": 4.039,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_openbookqa_pairs_loss": 2.3740031719207764,
"eval_openbookqa_pairs_runtime": 1.1145,
"eval_openbookqa_pairs_samples_per_second": 179.452,
"eval_openbookqa_pairs_steps_per_second": 4.486,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_msmarco_pairs_loss": 1.4328840970993042,
"eval_msmarco_pairs_runtime": 2.5725,
"eval_msmarco_pairs_samples_per_second": 77.746,
"eval_msmarco_pairs_steps_per_second": 1.944,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_nq_pairs_loss": 1.5343101024627686,
"eval_nq_pairs_runtime": 5.7416,
"eval_nq_pairs_samples_per_second": 34.833,
"eval_nq_pairs_steps_per_second": 0.871,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_trivia_pairs_loss": 1.7511711120605469,
"eval_trivia_pairs_runtime": 9.1035,
"eval_trivia_pairs_samples_per_second": 21.97,
"eval_trivia_pairs_steps_per_second": 0.549,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_quora_pairs_loss": 0.2826410233974457,
"eval_quora_pairs_runtime": 0.6444,
"eval_quora_pairs_samples_per_second": 310.354,
"eval_quora_pairs_steps_per_second": 7.759,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_gooaq_pairs_loss": 1.2128998041152954,
"eval_gooaq_pairs_runtime": 1.5919,
"eval_gooaq_pairs_samples_per_second": 125.634,
"eval_gooaq_pairs_steps_per_second": 3.141,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_mrpc_pairs_loss": 0.16468097269535065,
"eval_mrpc_pairs_runtime": 0.2496,
"eval_mrpc_pairs_samples_per_second": 801.385,
"eval_mrpc_pairs_steps_per_second": 20.035,
"step": 5748
},
{
"epoch": 0.903387703889586,
"grad_norm": 2.899136543273926,
"learning_rate": 3.3438040416718773e-05,
"loss": 1.9863,
"step": 5760
},
{
"epoch": 0.9109159347553325,
"grad_norm": 14.919694900512695,
"learning_rate": 3.3282599052714414e-05,
"loss": 1.6917,
"step": 5808
},
{
"epoch": 0.918444165621079,
"grad_norm": 1.626105785369873,
"learning_rate": 3.312018270677559e-05,
"loss": 1.7409,
"step": 5856
},
{
"epoch": 0.9259723964868256,
"grad_norm": 15.8577299118042,
"learning_rate": 3.295086315738918e-05,
"loss": 1.4397,
"step": 5904
},
{
"epoch": 0.9335006273525721,
"grad_norm": 10.255402565002441,
"learning_rate": 3.277471523385255e-05,
"loss": 1.2977,
"step": 5952
},
{
"epoch": 0.9410288582183187,
"grad_norm": 31.09028434753418,
"learning_rate": 3.259181678320349e-05,
"loss": 1.5429,
"step": 6000
},
{
"epoch": 0.9485570890840652,
"grad_norm": 15.244925498962402,
"learning_rate": 3.2402248635816294e-05,
"loss": 1.6939,
"step": 6048
},
{
"epoch": 0.9560853199498118,
"grad_norm": 19.404956817626953,
"learning_rate": 3.2206094569679564e-05,
"loss": 1.5573,
"step": 6096
},
{
"epoch": 0.9636135508155583,
"grad_norm": 16.607341766357422,
"learning_rate": 3.200344127337121e-05,
"loss": 1.5873,
"step": 6144
},
{
"epoch": 0.9711417816813049,
"grad_norm": 1.1432667970657349,
"learning_rate": 3.179437830774722e-05,
"loss": 1.8802,
"step": 6192
},
{
"epoch": 0.9766311166875784,
"eval_nli-pairs_loss": 1.3268091678619385,
"eval_nli-pairs_runtime": 4.0761,
"eval_nli-pairs_samples_per_second": 49.066,
"eval_nli-pairs_steps_per_second": 1.227,
"eval_sts-test_pearson_cosine": 0.7536875808596679,
"eval_sts-test_pearson_dot": 0.5240503602011544,
"eval_sts-test_pearson_euclidean": 0.7440772899082505,
"eval_sts-test_pearson_manhattan": 0.7465814532083983,
"eval_sts-test_pearson_max": 0.7536875808596679,
"eval_sts-test_spearman_cosine": 0.7584726286921011,
"eval_sts-test_spearman_dot": 0.49490205708473545,
"eval_sts-test_spearman_euclidean": 0.7307844981527315,
"eval_sts-test_spearman_manhattan": 0.7362283105144983,
"eval_sts-test_spearman_max": 0.7584726286921011,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_vitaminc-pairs_loss": 5.498671054840088,
"eval_vitaminc-pairs_runtime": 1.4379,
"eval_vitaminc-pairs_samples_per_second": 115.449,
"eval_vitaminc-pairs_steps_per_second": 2.782,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_sts-label_loss": 4.3516716957092285,
"eval_sts-label_runtime": 0.3976,
"eval_sts-label_samples_per_second": 503.027,
"eval_sts-label_steps_per_second": 12.576,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_qnli-contrastive_loss": 0.30157506465911865,
"eval_qnli-contrastive_runtime": 0.2787,
"eval_qnli-contrastive_samples_per_second": 717.742,
"eval_qnli-contrastive_steps_per_second": 17.944,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_scitail-pairs-qa_loss": 0.14389516413211823,
"eval_scitail-pairs-qa_runtime": 1.0465,
"eval_scitail-pairs-qa_samples_per_second": 191.121,
"eval_scitail-pairs-qa_steps_per_second": 4.778,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_scitail-pairs-pos_loss": 0.6142529249191284,
"eval_scitail-pairs-pos_runtime": 2.3525,
"eval_scitail-pairs-pos_samples_per_second": 85.018,
"eval_scitail-pairs-pos_steps_per_second": 2.125,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_xsum-pairs_loss": 0.5579215288162231,
"eval_xsum-pairs_runtime": 1.0381,
"eval_xsum-pairs_samples_per_second": 192.655,
"eval_xsum-pairs_steps_per_second": 4.816,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_compression-pairs_loss": 0.2538767158985138,
"eval_compression-pairs_runtime": 0.2367,
"eval_compression-pairs_samples_per_second": 844.99,
"eval_compression-pairs_steps_per_second": 21.125,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_sciq_pairs_loss": 0.43967145681381226,
"eval_sciq_pairs_runtime": 9.0489,
"eval_sciq_pairs_samples_per_second": 22.102,
"eval_sciq_pairs_steps_per_second": 0.553,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_qasc_pairs_loss": 0.7632485032081604,
"eval_qasc_pairs_runtime": 1.2216,
"eval_qasc_pairs_samples_per_second": 163.713,
"eval_qasc_pairs_steps_per_second": 4.093,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_openbookqa_pairs_loss": 2.370297908782959,
"eval_openbookqa_pairs_runtime": 1.051,
"eval_openbookqa_pairs_samples_per_second": 190.297,
"eval_openbookqa_pairs_steps_per_second": 4.757,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_msmarco_pairs_loss": 1.4191588163375854,
"eval_msmarco_pairs_runtime": 2.5294,
"eval_msmarco_pairs_samples_per_second": 79.071,
"eval_msmarco_pairs_steps_per_second": 1.977,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_nq_pairs_loss": 1.3847519159317017,
"eval_nq_pairs_runtime": 5.6464,
"eval_nq_pairs_samples_per_second": 35.421,
"eval_nq_pairs_steps_per_second": 0.886,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_trivia_pairs_loss": 1.6301060914993286,
"eval_trivia_pairs_runtime": 9.06,
"eval_trivia_pairs_samples_per_second": 22.075,
"eval_trivia_pairs_steps_per_second": 0.552,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_quora_pairs_loss": 0.25898078083992004,
"eval_quora_pairs_runtime": 0.6161,
"eval_quora_pairs_samples_per_second": 324.63,
"eval_quora_pairs_steps_per_second": 8.116,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_gooaq_pairs_loss": 1.1585972309112549,
"eval_gooaq_pairs_runtime": 1.547,
"eval_gooaq_pairs_samples_per_second": 129.286,
"eval_gooaq_pairs_steps_per_second": 3.232,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_mrpc_pairs_loss": 0.12749388813972473,
"eval_mrpc_pairs_runtime": 0.243,
"eval_mrpc_pairs_samples_per_second": 822.939,
"eval_mrpc_pairs_steps_per_second": 20.573,
"step": 6227
},
{
"epoch": 0.9786700125470514,
"grad_norm": 20.923500061035156,
"learning_rate": 3.157899806636098e-05,
"loss": 1.9813,
"step": 6240
},
{
"epoch": 0.986198243412798,
"grad_norm": 24.244462966918945,
"learning_rate": 3.13573957346308e-05,
"loss": 2.2932,
"step": 6288
},
{
"epoch": 0.9937264742785445,
"grad_norm": 16.324562072753906,
"learning_rate": 3.112966924777352e-05,
"loss": 1.6308,
"step": 6336
},
{
"epoch": 1.001254705144291,
"grad_norm": 14.824076652526855,
"learning_rate": 3.0895919247522884e-05,
"loss": 1.497,
"step": 6384
},
{
"epoch": 1.0087829360100375,
"grad_norm": 18.46307373046875,
"learning_rate": 3.065624903765184e-05,
"loss": 1.758,
"step": 6432
},
{
"epoch": 1.0163111668757843,
"grad_norm": 16.654727935791016,
"learning_rate": 3.0410764538318303e-05,
"loss": 1.6188,
"step": 6480
},
{
"epoch": 1.0238393977415308,
"grad_norm": 2.175520896911621,
"learning_rate": 3.0159574239254692e-05,
"loss": 2.1126,
"step": 6528
},
{
"epoch": 1.0313676286072773,
"grad_norm": 20.126880645751953,
"learning_rate": 2.990278915182182e-05,
"loss": 1.6129,
"step": 6576
},
{
"epoch": 1.0388958594730238,
"grad_norm": 24.355375289916992,
"learning_rate": 2.964052275994841e-05,
"loss": 1.4584,
"step": 6624
},
{
"epoch": 1.0464240903387705,
"grad_norm": 11.541900634765625,
"learning_rate": 2.9372890969977852e-05,
"loss": 1.5975,
"step": 6672
},
{
"epoch": 1.0517565872020076,
"eval_nli-pairs_loss": 1.3095624446868896,
"eval_nli-pairs_runtime": 4.0202,
"eval_nli-pairs_samples_per_second": 49.749,
"eval_nli-pairs_steps_per_second": 1.244,
"eval_sts-test_pearson_cosine": 0.7778731779702309,
"eval_sts-test_pearson_dot": 0.5593371858287987,
"eval_sts-test_pearson_euclidean": 0.7587435793478869,
"eval_sts-test_pearson_manhattan": 0.7610239287161901,
"eval_sts-test_pearson_max": 0.7778731779702309,
"eval_sts-test_spearman_cosine": 0.7782906514134392,
"eval_sts-test_spearman_dot": 0.5371251012528334,
"eval_sts-test_spearman_euclidean": 0.7452348039865185,
"eval_sts-test_spearman_manhattan": 0.7492626260972672,
"eval_sts-test_spearman_max": 0.7782906514134392,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_vitaminc-pairs_loss": 5.074347496032715,
"eval_vitaminc-pairs_runtime": 1.4418,
"eval_vitaminc-pairs_samples_per_second": 115.134,
"eval_vitaminc-pairs_steps_per_second": 2.774,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_sts-label_loss": 4.242542266845703,
"eval_sts-label_runtime": 0.414,
"eval_sts-label_samples_per_second": 483.143,
"eval_sts-label_steps_per_second": 12.079,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_qnli-contrastive_loss": 0.26250946521759033,
"eval_qnli-contrastive_runtime": 0.2857,
"eval_qnli-contrastive_samples_per_second": 700.035,
"eval_qnli-contrastive_steps_per_second": 17.501,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_scitail-pairs-qa_loss": 0.15288515388965607,
"eval_scitail-pairs-qa_runtime": 1.1069,
"eval_scitail-pairs-qa_samples_per_second": 180.69,
"eval_scitail-pairs-qa_steps_per_second": 4.517,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_scitail-pairs-pos_loss": 0.5382486581802368,
"eval_scitail-pairs-pos_runtime": 2.3648,
"eval_scitail-pairs-pos_samples_per_second": 84.574,
"eval_scitail-pairs-pos_steps_per_second": 2.114,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_xsum-pairs_loss": 0.48308631777763367,
"eval_xsum-pairs_runtime": 1.0411,
"eval_xsum-pairs_samples_per_second": 192.11,
"eval_xsum-pairs_steps_per_second": 4.803,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_compression-pairs_loss": 0.23988038301467896,
"eval_compression-pairs_runtime": 0.2389,
"eval_compression-pairs_samples_per_second": 837.159,
"eval_compression-pairs_steps_per_second": 20.929,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_sciq_pairs_loss": 0.3883107304573059,
"eval_sciq_pairs_runtime": 9.1094,
"eval_sciq_pairs_samples_per_second": 21.955,
"eval_sciq_pairs_steps_per_second": 0.549,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_qasc_pairs_loss": 0.6684954762458801,
"eval_qasc_pairs_runtime": 1.2249,
"eval_qasc_pairs_samples_per_second": 163.273,
"eval_qasc_pairs_steps_per_second": 4.082,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_openbookqa_pairs_loss": 2.076920986175537,
"eval_openbookqa_pairs_runtime": 1.0566,
"eval_openbookqa_pairs_samples_per_second": 189.291,
"eval_openbookqa_pairs_steps_per_second": 4.732,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_msmarco_pairs_loss": 1.3436123132705688,
"eval_msmarco_pairs_runtime": 2.5384,
"eval_msmarco_pairs_samples_per_second": 78.791,
"eval_msmarco_pairs_steps_per_second": 1.97,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_nq_pairs_loss": 1.2744060754776,
"eval_nq_pairs_runtime": 5.6694,
"eval_nq_pairs_samples_per_second": 35.277,
"eval_nq_pairs_steps_per_second": 0.882,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_trivia_pairs_loss": 1.356447458267212,
"eval_trivia_pairs_runtime": 9.0814,
"eval_trivia_pairs_samples_per_second": 22.023,
"eval_trivia_pairs_steps_per_second": 0.551,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_quora_pairs_loss": 0.20534881949424744,
"eval_quora_pairs_runtime": 0.6243,
"eval_quora_pairs_samples_per_second": 320.363,
"eval_quora_pairs_steps_per_second": 8.009,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_gooaq_pairs_loss": 1.131415843963623,
"eval_gooaq_pairs_runtime": 1.6004,
"eval_gooaq_pairs_samples_per_second": 124.966,
"eval_gooaq_pairs_steps_per_second": 3.124,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_mrpc_pairs_loss": 0.12877897918224335,
"eval_mrpc_pairs_runtime": 0.2425,
"eval_mrpc_pairs_samples_per_second": 824.742,
"eval_mrpc_pairs_steps_per_second": 20.619,
"step": 6706
},
{
"epoch": 1.053952321204517,
"grad_norm": 12.653462409973145,
"learning_rate": 2.9100012059444395e-05,
"loss": 1.6933,
"step": 6720
},
{
"epoch": 1.0614805520702635,
"grad_norm": 13.93374252319336,
"learning_rate": 2.8822006624801445e-05,
"loss": 1.2931,
"step": 6768
},
{
"epoch": 1.06900878293601,
"grad_norm": 17.680423736572266,
"learning_rate": 2.8538997528125016e-05,
"loss": 1.8077,
"step": 6816
},
{
"epoch": 1.0765370138017567,
"grad_norm": 14.7294921875,
"learning_rate": 2.8251109842815857e-05,
"loss": 1.4748,
"step": 6864
},
{
"epoch": 1.0840652446675032,
"grad_norm": 26.121625900268555,
"learning_rate": 2.795847079832445e-05,
"loss": 1.225,
"step": 6912
},
{
"epoch": 1.0915934755332497,
"grad_norm": 5.665552616119385,
"learning_rate": 2.7661209723922966e-05,
"loss": 1.7627,
"step": 6960
},
{
"epoch": 1.0991217063989962,
"grad_norm": 1.2186944484710693,
"learning_rate": 2.7359457991549365e-05,
"loss": 1.5613,
"step": 7008
},
{
"epoch": 1.1066499372647427,
"grad_norm": 4.569064617156982,
"learning_rate": 2.7053348957748744e-05,
"loss": 1.6548,
"step": 7056
},
{
"epoch": 1.1141781681304894,
"grad_norm": 11.330352783203125,
"learning_rate": 2.6743017904737505e-05,
"loss": 1.4033,
"step": 7104
},
{
"epoch": 1.121706398996236,
"grad_norm": 27.65351676940918,
"learning_rate": 2.6428601980616635e-05,
"loss": 1.3999,
"step": 7152
},
{
"epoch": 1.1268820577164367,
"eval_nli-pairs_loss": 1.212507724761963,
"eval_nli-pairs_runtime": 4.0093,
"eval_nli-pairs_samples_per_second": 49.884,
"eval_nli-pairs_steps_per_second": 1.247,
"eval_sts-test_pearson_cosine": 0.771726417661154,
"eval_sts-test_pearson_dot": 0.5582118135503709,
"eval_sts-test_pearson_euclidean": 0.7582436737188462,
"eval_sts-test_pearson_manhattan": 0.7602002352027541,
"eval_sts-test_pearson_max": 0.771726417661154,
"eval_sts-test_spearman_cosine": 0.7760137659698252,
"eval_sts-test_spearman_dot": 0.5314151368436631,
"eval_sts-test_spearman_euclidean": 0.7447358001087161,
"eval_sts-test_spearman_manhattan": 0.7487681272112986,
"eval_sts-test_spearman_max": 0.7760137659698252,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_vitaminc-pairs_loss": 4.9936699867248535,
"eval_vitaminc-pairs_runtime": 1.4575,
"eval_vitaminc-pairs_samples_per_second": 113.89,
"eval_vitaminc-pairs_steps_per_second": 2.744,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_sts-label_loss": 4.1703057289123535,
"eval_sts-label_runtime": 0.4168,
"eval_sts-label_samples_per_second": 479.874,
"eval_sts-label_steps_per_second": 11.997,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_qnli-contrastive_loss": 0.31846657395362854,
"eval_qnli-contrastive_runtime": 0.2838,
"eval_qnli-contrastive_samples_per_second": 704.663,
"eval_qnli-contrastive_steps_per_second": 17.617,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_scitail-pairs-qa_loss": 0.1417759656906128,
"eval_scitail-pairs-qa_runtime": 1.0581,
"eval_scitail-pairs-qa_samples_per_second": 189.022,
"eval_scitail-pairs-qa_steps_per_second": 4.726,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_scitail-pairs-pos_loss": 0.5202088356018066,
"eval_scitail-pairs-pos_runtime": 2.4081,
"eval_scitail-pairs-pos_samples_per_second": 83.054,
"eval_scitail-pairs-pos_steps_per_second": 2.076,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_xsum-pairs_loss": 0.44685080647468567,
"eval_xsum-pairs_runtime": 1.0416,
"eval_xsum-pairs_samples_per_second": 192.009,
"eval_xsum-pairs_steps_per_second": 4.8,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_compression-pairs_loss": 0.22411338984966278,
"eval_compression-pairs_runtime": 0.2438,
"eval_compression-pairs_samples_per_second": 820.201,
"eval_compression-pairs_steps_per_second": 20.505,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_sciq_pairs_loss": 0.37388285994529724,
"eval_sciq_pairs_runtime": 9.0875,
"eval_sciq_pairs_samples_per_second": 22.008,
"eval_sciq_pairs_steps_per_second": 0.55,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_qasc_pairs_loss": 0.6410768032073975,
"eval_qasc_pairs_runtime": 1.2169,
"eval_qasc_pairs_samples_per_second": 164.348,
"eval_qasc_pairs_steps_per_second": 4.109,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_openbookqa_pairs_loss": 2.043297052383423,
"eval_openbookqa_pairs_runtime": 1.0552,
"eval_openbookqa_pairs_samples_per_second": 189.539,
"eval_openbookqa_pairs_steps_per_second": 4.738,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_msmarco_pairs_loss": 1.2795078754425049,
"eval_msmarco_pairs_runtime": 2.5343,
"eval_msmarco_pairs_samples_per_second": 78.919,
"eval_msmarco_pairs_steps_per_second": 1.973,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_nq_pairs_loss": 1.1771754026412964,
"eval_nq_pairs_runtime": 5.6589,
"eval_nq_pairs_samples_per_second": 35.343,
"eval_nq_pairs_steps_per_second": 0.884,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_trivia_pairs_loss": 1.305577039718628,
"eval_trivia_pairs_runtime": 9.0872,
"eval_trivia_pairs_samples_per_second": 22.009,
"eval_trivia_pairs_steps_per_second": 0.55,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_quora_pairs_loss": 0.2645386755466461,
"eval_quora_pairs_runtime": 0.6429,
"eval_quora_pairs_samples_per_second": 311.086,
"eval_quora_pairs_steps_per_second": 7.777,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_gooaq_pairs_loss": 1.0562756061553955,
"eval_gooaq_pairs_runtime": 1.5772,
"eval_gooaq_pairs_samples_per_second": 126.809,
"eval_gooaq_pairs_steps_per_second": 3.17,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_mrpc_pairs_loss": 0.1197453960776329,
"eval_mrpc_pairs_runtime": 0.2393,
"eval_mrpc_pairs_samples_per_second": 835.714,
"eval_mrpc_pairs_steps_per_second": 20.893,
"step": 7185
},
{
"epoch": 1.1292346298619824,
"grad_norm": 26.328977584838867,
"learning_rate": 2.611024013876024e-05,
"loss": 1.707,
"step": 7200
},
{
"epoch": 1.136762860727729,
"grad_norm": 14.730170249938965,
"learning_rate": 2.578807307640633e-05,
"loss": 1.2812,
"step": 7248
},
{
"epoch": 1.1442910915934754,
"grad_norm": 16.989944458007812,
"learning_rate": 2.546224317247695e-05,
"loss": 2.0209,
"step": 7296
},
{
"epoch": 1.1518193224592221,
"grad_norm": 15.942720413208008,
"learning_rate": 2.5132894424654982e-05,
"loss": 1.465,
"step": 7344
},
{
"epoch": 1.1593475533249686,
"grad_norm": 13.42844009399414,
"learning_rate": 2.4800172385745665e-05,
"loss": 1.6181,
"step": 7392
},
{
"epoch": 1.1668757841907151,
"grad_norm": 2.6255691051483154,
"learning_rate": 2.446422409935082e-05,
"loss": 1.3386,
"step": 7440
},
{
"epoch": 1.1744040150564616,
"grad_norm": 11.230644226074219,
"learning_rate": 2.412519803488417e-05,
"loss": 1.4015,
"step": 7488
},
{
"epoch": 1.1819322459222084,
"grad_norm": 17.679582595825195,
"learning_rate": 2.3783244021956605e-05,
"loss": 1.5057,
"step": 7536
},
{
"epoch": 1.1894604767879549,
"grad_norm": 12.440255165100098,
"learning_rate": 2.3438513184160302e-05,
"loss": 1.2441,
"step": 7584
},
{
"epoch": 1.1969887076537014,
"grad_norm": 17.67916488647461,
"learning_rate": 2.309115787228094e-05,
"loss": 1.1718,
"step": 7632
},
{
"epoch": 1.2020075282308658,
"eval_nli-pairs_loss": 1.1914833784103394,
"eval_nli-pairs_runtime": 4.1213,
"eval_nli-pairs_samples_per_second": 48.528,
"eval_nli-pairs_steps_per_second": 1.213,
"eval_sts-test_pearson_cosine": 0.7800804097388158,
"eval_sts-test_pearson_dot": 0.5541152451346555,
"eval_sts-test_pearson_euclidean": 0.7555650256629914,
"eval_sts-test_pearson_manhattan": 0.7570661501424889,
"eval_sts-test_pearson_max": 0.7800804097388158,
"eval_sts-test_spearman_cosine": 0.7837125392806782,
"eval_sts-test_spearman_dot": 0.5299991823870257,
"eval_sts-test_spearman_euclidean": 0.7420646288254193,
"eval_sts-test_spearman_manhattan": 0.7454823370503758,
"eval_sts-test_spearman_max": 0.7837125392806782,
"step": 7664
},
{
"epoch": 1.2020075282308658,
"eval_vitaminc-pairs_loss": 4.815649509429932,
"eval_vitaminc-pairs_runtime": 1.4965,
"eval_vitaminc-pairs_samples_per_second": 110.928,
"eval_vitaminc-pairs_steps_per_second": 2.673,
"step": 7664
},
{
"epoch": 1.2020075282308658,
"eval_sts-label_loss": 4.200761795043945,
"eval_sts-label_runtime": 0.4104,
"eval_sts-label_samples_per_second": 487.308,
"eval_sts-label_steps_per_second": 12.183,
"step": 7664
},
{
"epoch": 1.2020075282308658,
"eval_qnli-contrastive_loss": 0.21287916600704193,
"eval_qnli-contrastive_runtime": 0.2849,
"eval_qnli-contrastive_samples_per_second": 702.086,
"eval_qnli-contrastive_steps_per_second": 17.552,
"step": 7664
},
{
"epoch": 1.2020075282308658,
"eval_scitail-pairs-qa_loss": 0.12968133389949799,
"eval_scitail-pairs-qa_runtime": 1.0998,
"eval_scitail-pairs-qa_samples_per_second": 181.845,
"eval_scitail-pairs-qa_steps_per_second": 4.546,
"step": 7664
},
{
"epoch": 1.2020075282308658,
"eval_scitail-pairs-pos_loss": 0.4889526665210724,
"eval_scitail-pairs-pos_runtime": 2.3956,
"eval_scitail-pairs-pos_samples_per_second": 83.486,
"eval_scitail-pairs-pos_steps_per_second": 2.087,
"step": 7664
},
{
"epoch": 1.2020075282308658,
"eval_xsum-pairs_loss": 0.42897719144821167,
"eval_xsum-pairs_runtime": 1.0492,
"eval_xsum-pairs_samples_per_second": 190.623,
"eval_xsum-pairs_steps_per_second": 4.766,
"step": 7664
},
{
"epoch": 1.2020075282308658,
"eval_compression-pairs_loss": 0.20111607015132904,
"eval_compression-pairs_runtime": 0.2549,
"eval_compression-pairs_samples_per_second": 784.74,
"eval_compression-pairs_steps_per_second": 19.618,
"step": 7664
},
{
"epoch": 1.2020075282308658,
"eval_sciq_pairs_loss": 0.36706623435020447,
"eval_sciq_pairs_runtime": 9.1959,
"eval_sciq_pairs_samples_per_second": 21.749,
"eval_sciq_pairs_steps_per_second": 0.544,
"step": 7664
},
{
"epoch": 1.2020075282308658,
"eval_qasc_pairs_loss": 0.5133731961250305,
"eval_qasc_pairs_runtime": 1.2209,
"eval_qasc_pairs_samples_per_second": 163.813,
"eval_qasc_pairs_steps_per_second": 4.095,
"step": 7664
},
{
"epoch": 1.2020075282308658,
"eval_openbookqa_pairs_loss": 1.9764889478683472,
"eval_openbookqa_pairs_runtime": 1.1177,
"eval_openbookqa_pairs_samples_per_second": 178.939,
"eval_openbookqa_pairs_steps_per_second": 4.473,
"step": 7664
},
{
"epoch": 1.2020075282308658,
"eval_msmarco_pairs_loss": 1.1972062587738037,
"eval_msmarco_pairs_runtime": 2.5812,
"eval_msmarco_pairs_samples_per_second": 77.485,
"eval_msmarco_pairs_steps_per_second": 1.937,
"step": 7664
},
{
"epoch": 1.2020075282308658,
"eval_nq_pairs_loss": 1.1358052492141724,
"eval_nq_pairs_runtime": 5.7952,
"eval_nq_pairs_samples_per_second": 34.511,
"eval_nq_pairs_steps_per_second": 0.863,
"step": 7664
},
{
"epoch": 1.2020075282308658,
"eval_trivia_pairs_loss": 1.3059653043746948,
"eval_trivia_pairs_runtime": 9.1651,
"eval_trivia_pairs_samples_per_second": 21.822,
"eval_trivia_pairs_steps_per_second": 0.546,
"step": 7664
},
{
"epoch": 1.2020075282308658,
"eval_quora_pairs_loss": 0.23538345098495483,
"eval_quora_pairs_runtime": 0.6553,
"eval_quora_pairs_samples_per_second": 305.181,
"eval_quora_pairs_steps_per_second": 7.63,
"step": 7664
},
{
"epoch": 1.2020075282308658,
"eval_gooaq_pairs_loss": 1.0195775032043457,
"eval_gooaq_pairs_runtime": 1.5615,
"eval_gooaq_pairs_samples_per_second": 128.081,
"eval_gooaq_pairs_steps_per_second": 3.202,
"step": 7664
},
{
"epoch": 1.2020075282308658,
"eval_mrpc_pairs_loss": 0.10825151205062866,
"eval_mrpc_pairs_runtime": 0.2544,
"eval_mrpc_pairs_samples_per_second": 786.113,
"eval_mrpc_pairs_steps_per_second": 19.653,
"step": 7664
},
{
"epoch": 1.2045169385194479,
"grad_norm": 11.216833114624023,
"learning_rate": 2.2741331596967632e-05,
"loss": 1.7562,
"step": 7680
},
{
"epoch": 1.2120451693851946,
"grad_norm": 0.7245489358901978,
"learning_rate": 2.238918896089022e-05,
"loss": 1.2954,
"step": 7728
},
{
"epoch": 1.219573400250941,
"grad_norm": 18.958086013793945,
"learning_rate": 2.2034885590413966e-05,
"loss": 1.7821,
"step": 7776
},
{
"epoch": 1.2271016311166876,
"grad_norm": 1.1195874214172363,
"learning_rate": 2.1678578066821842e-05,
"loss": 1.4494,
"step": 7824
},
{
"epoch": 1.234629861982434,
"grad_norm": 10.63464641571045,
"learning_rate": 2.1320423857114807e-05,
"loss": 1.1674,
"step": 7872
},
{
"epoch": 1.2421580928481806,
"grad_norm": 20.8350830078125,
"learning_rate": 2.0960581244420636e-05,
"loss": 1.1511,
"step": 7920
},
{
"epoch": 1.2496863237139273,
"grad_norm": 20.231365203857422,
"learning_rate": 2.0599209258042067e-05,
"loss": 1.1629,
"step": 7968
},
{
"epoch": 1.2572145545796738,
"grad_norm": 2.3765740394592285,
"learning_rate": 2.023646760317521e-05,
"loss": 1.167,
"step": 8016
},
{
"epoch": 1.2647427854454203,
"grad_norm": 0.45109543204307556,
"learning_rate": 1.9872516590329177e-05,
"loss": 1.5169,
"step": 8064
},
{
"epoch": 1.272271016311167,
"grad_norm": 19.704111099243164,
"learning_rate": 1.9507517064478322e-05,
"loss": 1.1595,
"step": 8112
},
{
"epoch": 1.2771329987452948,
"eval_nli-pairs_loss": 1.117584466934204,
"eval_nli-pairs_runtime": 4.0331,
"eval_nli-pairs_samples_per_second": 49.59,
"eval_nli-pairs_steps_per_second": 1.24,
"eval_sts-test_pearson_cosine": 0.7735358515290777,
"eval_sts-test_pearson_dot": 0.560523750110175,
"eval_sts-test_pearson_euclidean": 0.7469529952792658,
"eval_sts-test_pearson_manhattan": 0.7479614812850265,
"eval_sts-test_pearson_max": 0.7735358515290777,
"eval_sts-test_spearman_cosine": 0.7789103785997893,
"eval_sts-test_spearman_dot": 0.534930979573433,
"eval_sts-test_spearman_euclidean": 0.7320475561187462,
"eval_sts-test_spearman_manhattan": 0.7359660045883268,
"eval_sts-test_spearman_max": 0.7789103785997893,
"step": 8143
},
{
"epoch": 1.2771329987452948,
"eval_vitaminc-pairs_loss": 4.841164588928223,
"eval_vitaminc-pairs_runtime": 1.4299,
"eval_vitaminc-pairs_samples_per_second": 116.092,
"eval_vitaminc-pairs_steps_per_second": 2.797,
"step": 8143
},
{
"epoch": 1.2771329987452948,
"eval_sts-label_loss": 4.0971784591674805,
"eval_sts-label_runtime": 0.4026,
"eval_sts-label_samples_per_second": 496.749,
"eval_sts-label_steps_per_second": 12.419,
"step": 8143
},
{
"epoch": 1.2771329987452948,
"eval_qnli-contrastive_loss": 0.2050691843032837,
"eval_qnli-contrastive_runtime": 0.2817,
"eval_qnli-contrastive_samples_per_second": 710.023,
"eval_qnli-contrastive_steps_per_second": 17.751,
"step": 8143
},
{
"epoch": 1.2771329987452948,
"eval_scitail-pairs-qa_loss": 0.11168017983436584,
"eval_scitail-pairs-qa_runtime": 1.056,
"eval_scitail-pairs-qa_samples_per_second": 189.398,
"eval_scitail-pairs-qa_steps_per_second": 4.735,
"step": 8143
},
{
"epoch": 1.2771329987452948,
"eval_scitail-pairs-pos_loss": 0.495766818523407,
"eval_scitail-pairs-pos_runtime": 2.4272,
"eval_scitail-pairs-pos_samples_per_second": 82.4,
"eval_scitail-pairs-pos_steps_per_second": 2.06,
"step": 8143
},
{
"epoch": 1.2771329987452948,
"eval_xsum-pairs_loss": 0.43334144353866577,
"eval_xsum-pairs_runtime": 1.0522,
"eval_xsum-pairs_samples_per_second": 190.086,
"eval_xsum-pairs_steps_per_second": 4.752,
"step": 8143
},
{
"epoch": 1.2771329987452948,
"eval_compression-pairs_loss": 0.1837506741285324,
"eval_compression-pairs_runtime": 0.2435,
"eval_compression-pairs_samples_per_second": 821.487,
"eval_compression-pairs_steps_per_second": 20.537,
"step": 8143
},
{
"epoch": 1.2771329987452948,
"eval_sciq_pairs_loss": 0.35108524560928345,
"eval_sciq_pairs_runtime": 9.102,
"eval_sciq_pairs_samples_per_second": 21.973,
"eval_sciq_pairs_steps_per_second": 0.549,
"step": 8143
},
{
"epoch": 1.2771329987452948,
"eval_qasc_pairs_loss": 0.486527681350708,
"eval_qasc_pairs_runtime": 1.2312,
"eval_qasc_pairs_samples_per_second": 162.45,
"eval_qasc_pairs_steps_per_second": 4.061,
"step": 8143
},
{
"epoch": 1.2771329987452948,
"eval_openbookqa_pairs_loss": 1.9944250583648682,
"eval_openbookqa_pairs_runtime": 1.0997,
"eval_openbookqa_pairs_samples_per_second": 181.872,
"eval_openbookqa_pairs_steps_per_second": 4.547,
"step": 8143
},
{
"epoch": 1.2771329987452948,
"eval_msmarco_pairs_loss": 1.1277974843978882,
"eval_msmarco_pairs_runtime": 2.5357,
"eval_msmarco_pairs_samples_per_second": 78.874,
"eval_msmarco_pairs_steps_per_second": 1.972,
"step": 8143
},
{
"epoch": 1.2771329987452948,
"eval_nq_pairs_loss": 1.092708945274353,
"eval_nq_pairs_runtime": 5.6524,
"eval_nq_pairs_samples_per_second": 35.383,
"eval_nq_pairs_steps_per_second": 0.885,
"step": 8143
},
{
"epoch": 1.2771329987452948,
"eval_trivia_pairs_loss": 1.3160641193389893,
"eval_trivia_pairs_runtime": 9.0422,
"eval_trivia_pairs_samples_per_second": 22.118,
"eval_trivia_pairs_steps_per_second": 0.553,
"step": 8143
},
{
"epoch": 1.2771329987452948,
"eval_quora_pairs_loss": 0.1377909630537033,
"eval_quora_pairs_runtime": 0.6252,
"eval_quora_pairs_samples_per_second": 319.913,
"eval_quora_pairs_steps_per_second": 7.998,
"step": 8143
},
{
"epoch": 1.2771329987452948,
"eval_gooaq_pairs_loss": 0.9788944721221924,
"eval_gooaq_pairs_runtime": 1.5536,
"eval_gooaq_pairs_samples_per_second": 128.731,
"eval_gooaq_pairs_steps_per_second": 3.218,
"step": 8143
},
{
"epoch": 1.2771329987452948,
"eval_mrpc_pairs_loss": 0.09496909379959106,
"eval_mrpc_pairs_runtime": 0.2386,
"eval_mrpc_pairs_samples_per_second": 838.384,
"eval_mrpc_pairs_steps_per_second": 20.96,
"step": 8143
},
{
"epoch": 1.2797992471769133,
"grad_norm": 10.626935958862305,
"learning_rate": 1.914163033397814e-05,
"loss": 1.4598,
"step": 8160
},
{
"epoch": 1.28732747804266,
"grad_norm": 70.21989440917969,
"learning_rate": 1.8775018099276515e-05,
"loss": 1.2038,
"step": 8208
},
{
"epoch": 1.2948557089084065,
"grad_norm": 13.136975288391113,
"learning_rate": 1.8407842381451562e-05,
"loss": 1.2381,
"step": 8256
},
{
"epoch": 1.302383939774153,
"grad_norm": 14.311907768249512,
"learning_rate": 1.804026545060783e-05,
"loss": 0.9591,
"step": 8304
},
{
"epoch": 1.3099121706398997,
"grad_norm": 11.372726440429688,
"learning_rate": 1.767244975416249e-05,
"loss": 1.3451,
"step": 8352
},
{
"epoch": 1.3174404015056462,
"grad_norm": 12.1253080368042,
"learning_rate": 1.7304557845053075e-05,
"loss": 1.2669,
"step": 8400
},
{
"epoch": 1.3249686323713927,
"grad_norm": 10.53577995300293,
"learning_rate": 1.693675230989863e-05,
"loss": 1.4754,
"step": 8448
},
{
"epoch": 1.3324968632371392,
"grad_norm": 16.669851303100586,
"learning_rate": 1.6576849494355775e-05,
"loss": 1.2094,
"step": 8496
},
{
"epoch": 1.3400250941028857,
"grad_norm": 0.5680477023124695,
"learning_rate": 1.6209694016709658e-05,
"loss": 1.4406,
"step": 8544
},
{
"epoch": 1.3475533249686324,
"grad_norm": 13.915863990783691,
"learning_rate": 1.5843108778533816e-05,
"loss": 1.3063,
"step": 8592
},
{
"epoch": 1.352258469259724,
"eval_nli-pairs_loss": 1.1088637113571167,
"eval_nli-pairs_runtime": 4.0091,
"eval_nli-pairs_samples_per_second": 49.887,
"eval_nli-pairs_steps_per_second": 1.247,
"eval_sts-test_pearson_cosine": 0.7694392469514715,
"eval_sts-test_pearson_dot": 0.5341434503970798,
"eval_sts-test_pearson_euclidean": 0.7443691249920426,
"eval_sts-test_pearson_manhattan": 0.7461066741203818,
"eval_sts-test_pearson_max": 0.7694392469514715,
"eval_sts-test_spearman_cosine": 0.7756586193959978,
"eval_sts-test_spearman_dot": 0.5051721749088769,
"eval_sts-test_spearman_euclidean": 0.73008171628055,
"eval_sts-test_spearman_manhattan": 0.7347429927090328,
"eval_sts-test_spearman_max": 0.7756586193959978,
"step": 8622
},
{
"epoch": 1.352258469259724,
"eval_vitaminc-pairs_loss": 4.769205093383789,
"eval_vitaminc-pairs_runtime": 1.4375,
"eval_vitaminc-pairs_samples_per_second": 115.477,
"eval_vitaminc-pairs_steps_per_second": 2.783,
"step": 8622
},
{
"epoch": 1.352258469259724,
"eval_sts-label_loss": 4.07755184173584,
"eval_sts-label_runtime": 0.4049,
"eval_sts-label_samples_per_second": 493.895,
"eval_sts-label_steps_per_second": 12.347,
"step": 8622
},
{
"epoch": 1.352258469259724,
"eval_qnli-contrastive_loss": 0.17923730611801147,
"eval_qnli-contrastive_runtime": 0.2806,
"eval_qnli-contrastive_samples_per_second": 712.745,
"eval_qnli-contrastive_steps_per_second": 17.819,
"step": 8622
},
{
"epoch": 1.352258469259724,
"eval_scitail-pairs-qa_loss": 0.10800071805715561,
"eval_scitail-pairs-qa_runtime": 1.1053,
"eval_scitail-pairs-qa_samples_per_second": 180.952,
"eval_scitail-pairs-qa_steps_per_second": 4.524,
"step": 8622
},
{
"epoch": 1.352258469259724,
"eval_scitail-pairs-pos_loss": 0.48035934567451477,
"eval_scitail-pairs-pos_runtime": 2.3848,
"eval_scitail-pairs-pos_samples_per_second": 83.864,
"eval_scitail-pairs-pos_steps_per_second": 2.097,
"step": 8622
},
{
"epoch": 1.352258469259724,
"eval_xsum-pairs_loss": 0.408091276884079,
"eval_xsum-pairs_runtime": 1.043,
"eval_xsum-pairs_samples_per_second": 191.746,
"eval_xsum-pairs_steps_per_second": 4.794,
"step": 8622
},
{
"epoch": 1.352258469259724,
"eval_compression-pairs_loss": 0.17393065989017487,
"eval_compression-pairs_runtime": 0.2388,
"eval_compression-pairs_samples_per_second": 837.593,
"eval_compression-pairs_steps_per_second": 20.94,
"step": 8622
},
{
"epoch": 1.352258469259724,
"eval_sciq_pairs_loss": 0.3497403562068939,
"eval_sciq_pairs_runtime": 9.1128,
"eval_sciq_pairs_samples_per_second": 21.947,
"eval_sciq_pairs_steps_per_second": 0.549,
"step": 8622
},
{
"epoch": 1.352258469259724,
"eval_qasc_pairs_loss": 0.4620342552661896,
"eval_qasc_pairs_runtime": 1.2208,
"eval_qasc_pairs_samples_per_second": 163.822,
"eval_qasc_pairs_steps_per_second": 4.096,
"step": 8622
},
{
"epoch": 1.352258469259724,
"eval_openbookqa_pairs_loss": 1.9002811908721924,
"eval_openbookqa_pairs_runtime": 1.0593,
"eval_openbookqa_pairs_samples_per_second": 188.81,
"eval_openbookqa_pairs_steps_per_second": 4.72,
"step": 8622
},
{
"epoch": 1.352258469259724,
"eval_msmarco_pairs_loss": 1.0928370952606201,
"eval_msmarco_pairs_runtime": 2.5227,
"eval_msmarco_pairs_samples_per_second": 79.281,
"eval_msmarco_pairs_steps_per_second": 1.982,
"step": 8622
},
{
"epoch": 1.352258469259724,
"eval_nq_pairs_loss": 1.0306727886199951,
"eval_nq_pairs_runtime": 5.6685,
"eval_nq_pairs_samples_per_second": 35.283,
"eval_nq_pairs_steps_per_second": 0.882,
"step": 8622
},
{
"epoch": 1.352258469259724,
"eval_trivia_pairs_loss": 1.3031796216964722,
"eval_trivia_pairs_runtime": 9.0827,
"eval_trivia_pairs_samples_per_second": 22.02,
"eval_trivia_pairs_steps_per_second": 0.55,
"step": 8622
},
{
"epoch": 1.352258469259724,
"eval_quora_pairs_loss": 0.16622459888458252,
"eval_quora_pairs_runtime": 0.6233,
"eval_quora_pairs_samples_per_second": 320.863,
"eval_quora_pairs_steps_per_second": 8.022,
"step": 8622
},
{
"epoch": 1.352258469259724,
"eval_gooaq_pairs_loss": 0.9280920624732971,
"eval_gooaq_pairs_runtime": 1.5586,
"eval_gooaq_pairs_samples_per_second": 128.317,
"eval_gooaq_pairs_steps_per_second": 3.208,
"step": 8622
},
{
"epoch": 1.352258469259724,
"eval_mrpc_pairs_loss": 0.09042897820472717,
"eval_mrpc_pairs_runtime": 0.2477,
"eval_mrpc_pairs_samples_per_second": 807.373,
"eval_mrpc_pairs_steps_per_second": 20.184,
"step": 8622
},
{
"epoch": 1.355081555834379,
"grad_norm": 10.317242622375488,
"learning_rate": 1.5477255788975485e-05,
"loss": 1.1662,
"step": 8640
},
{
"epoch": 1.3626097867001254,
"grad_norm": 14.411606788635254,
"learning_rate": 1.5112296733571015e-05,
"loss": 1.1592,
"step": 8688
},
{
"epoch": 1.370138017565872,
"grad_norm": 14.328046798706055,
"learning_rate": 1.474839290279036e-05,
"loss": 1.1742,
"step": 8736
},
{
"epoch": 1.3776662484316184,
"grad_norm": 15.689784049987793,
"learning_rate": 1.4385705120756155e-05,
"loss": 1.2304,
"step": 8784
},
{
"epoch": 1.3851944792973652,
"grad_norm": 1.1622222661972046,
"learning_rate": 1.4024393674168905e-05,
"loss": 1.0637,
"step": 8832
},
{
"epoch": 1.3927227101631117,
"grad_norm": 55.88763427734375,
"learning_rate": 1.3664618241469671e-05,
"loss": 1.8133,
"step": 8880
},
{
"epoch": 1.4002509410288582,
"grad_norm": 11.148764610290527,
"learning_rate": 1.3306537822271584e-05,
"loss": 1.682,
"step": 8928
},
{
"epoch": 1.4077791718946049,
"grad_norm": 34.30131149291992,
"learning_rate": 1.2950310667091337e-05,
"loss": 1.1823,
"step": 8976
},
{
"epoch": 1.4153074027603514,
"grad_norm": 1.3407506942749023,
"learning_rate": 1.2596094207411785e-05,
"loss": 1.2953,
"step": 9024
},
{
"epoch": 1.4228356336260979,
"grad_norm": 0.3961174190044403,
"learning_rate": 1.2244044986106427e-05,
"loss": 0.9769,
"step": 9072
},
{
"epoch": 1.427383939774153,
"eval_nli-pairs_loss": 1.065342664718628,
"eval_nli-pairs_runtime": 4.0707,
"eval_nli-pairs_samples_per_second": 49.132,
"eval_nli-pairs_steps_per_second": 1.228,
"eval_sts-test_pearson_cosine": 0.7731513422471518,
"eval_sts-test_pearson_dot": 0.5353916353979121,
"eval_sts-test_pearson_euclidean": 0.7469853367274143,
"eval_sts-test_pearson_manhattan": 0.7481039749789392,
"eval_sts-test_pearson_max": 0.7731513422471518,
"eval_sts-test_spearman_cosine": 0.7792255681202213,
"eval_sts-test_spearman_dot": 0.5083392134944579,
"eval_sts-test_spearman_euclidean": 0.7335576317363027,
"eval_sts-test_spearman_manhattan": 0.7370337922992453,
"eval_sts-test_spearman_max": 0.7792255681202213,
"step": 9101
},
{
"epoch": 1.427383939774153,
"eval_vitaminc-pairs_loss": 4.8438720703125,
"eval_vitaminc-pairs_runtime": 1.4283,
"eval_vitaminc-pairs_samples_per_second": 116.218,
"eval_vitaminc-pairs_steps_per_second": 2.8,
"step": 9101
},
{
"epoch": 1.427383939774153,
"eval_sts-label_loss": 4.092858791351318,
"eval_sts-label_runtime": 0.407,
"eval_sts-label_samples_per_second": 491.445,
"eval_sts-label_steps_per_second": 12.286,
"step": 9101
},
{
"epoch": 1.427383939774153,
"eval_qnli-contrastive_loss": 0.15653812885284424,
"eval_qnli-contrastive_runtime": 0.2813,
"eval_qnli-contrastive_samples_per_second": 711.049,
"eval_qnli-contrastive_steps_per_second": 17.776,
"step": 9101
},
{
"epoch": 1.427383939774153,
"eval_scitail-pairs-qa_loss": 0.10110773146152496,
"eval_scitail-pairs-qa_runtime": 1.0598,
"eval_scitail-pairs-qa_samples_per_second": 188.718,
"eval_scitail-pairs-qa_steps_per_second": 4.718,
"step": 9101
},
{
"epoch": 1.427383939774153,
"eval_scitail-pairs-pos_loss": 0.46982836723327637,
"eval_scitail-pairs-pos_runtime": 2.3471,
"eval_scitail-pairs-pos_samples_per_second": 85.213,
"eval_scitail-pairs-pos_steps_per_second": 2.13,
"step": 9101
},
{
"epoch": 1.427383939774153,
"eval_xsum-pairs_loss": 0.3937150537967682,
"eval_xsum-pairs_runtime": 1.0402,
"eval_xsum-pairs_samples_per_second": 192.278,
"eval_xsum-pairs_steps_per_second": 4.807,
"step": 9101
},
{
"epoch": 1.427383939774153,
"eval_compression-pairs_loss": 0.16437682509422302,
"eval_compression-pairs_runtime": 0.2405,
"eval_compression-pairs_samples_per_second": 831.709,
"eval_compression-pairs_steps_per_second": 20.793,
"step": 9101
},
{
"epoch": 1.427383939774153,
"eval_sciq_pairs_loss": 0.3407261073589325,
"eval_sciq_pairs_runtime": 9.0549,
"eval_sciq_pairs_samples_per_second": 22.088,
"eval_sciq_pairs_steps_per_second": 0.552,
"step": 9101
},
{
"epoch": 1.427383939774153,
"eval_qasc_pairs_loss": 0.4374461770057678,
"eval_qasc_pairs_runtime": 1.2131,
"eval_qasc_pairs_samples_per_second": 164.864,
"eval_qasc_pairs_steps_per_second": 4.122,
"step": 9101
},
{
"epoch": 1.427383939774153,
"eval_openbookqa_pairs_loss": 1.935410737991333,
"eval_openbookqa_pairs_runtime": 1.0551,
"eval_openbookqa_pairs_samples_per_second": 189.556,
"eval_openbookqa_pairs_steps_per_second": 4.739,
"step": 9101
},
{
"epoch": 1.427383939774153,
"eval_msmarco_pairs_loss": 1.043725609779358,
"eval_msmarco_pairs_runtime": 2.5259,
"eval_msmarco_pairs_samples_per_second": 79.179,
"eval_msmarco_pairs_steps_per_second": 1.979,
"step": 9101
},
{
"epoch": 1.427383939774153,
"eval_nq_pairs_loss": 0.9651347398757935,
"eval_nq_pairs_runtime": 5.6774,
"eval_nq_pairs_samples_per_second": 35.227,
"eval_nq_pairs_steps_per_second": 0.881,
"step": 9101
},
{
"epoch": 1.427383939774153,
"eval_trivia_pairs_loss": 1.2430963516235352,
"eval_trivia_pairs_runtime": 9.0564,
"eval_trivia_pairs_samples_per_second": 22.084,
"eval_trivia_pairs_steps_per_second": 0.552,
"step": 9101
},
{
"epoch": 1.427383939774153,
"eval_quora_pairs_loss": 0.21621538698673248,
"eval_quora_pairs_runtime": 0.6252,
"eval_quora_pairs_samples_per_second": 319.908,
"eval_quora_pairs_steps_per_second": 7.998,
"step": 9101
},
{
"epoch": 1.427383939774153,
"eval_gooaq_pairs_loss": 0.8927494287490845,
"eval_gooaq_pairs_runtime": 1.5775,
"eval_gooaq_pairs_samples_per_second": 126.779,
"eval_gooaq_pairs_steps_per_second": 3.169,
"step": 9101
},
{
"epoch": 1.427383939774153,
"eval_mrpc_pairs_loss": 0.08728782832622528,
"eval_mrpc_pairs_runtime": 0.2422,
"eval_mrpc_pairs_samples_per_second": 825.773,
"eval_mrpc_pairs_steps_per_second": 20.644,
"step": 9101
},
{
"epoch": 1.4303638644918444,
"grad_norm": 10.624916076660156,
"learning_rate": 1.1894318588256689e-05,
"loss": 1.2641,
"step": 9120
},
{
"epoch": 1.4378920953575909,
"grad_norm": 16.24142074584961,
"learning_rate": 1.1547069572392431e-05,
"loss": 1.4688,
"step": 9168
},
{
"epoch": 1.4454203262233376,
"grad_norm": 18.161853790283203,
"learning_rate": 1.120245140218616e-05,
"loss": 1.2061,
"step": 9216
},
{
"epoch": 1.452948557089084,
"grad_norm": 9.601255416870117,
"learning_rate": 1.0860616378631118e-05,
"loss": 1.0768,
"step": 9264
},
{
"epoch": 1.4604767879548306,
"grad_norm": 10.885648727416992,
"learning_rate": 1.0521715572733149e-05,
"loss": 0.9741,
"step": 9312
},
{
"epoch": 1.468005018820577,
"grad_norm": 4.2853007316589355,
"learning_rate": 1.018589875874622e-05,
"loss": 0.9977,
"step": 9360
},
{
"epoch": 1.4755332496863236,
"grad_norm": 14.765129089355469,
"learning_rate": 9.853314347980975e-06,
"loss": 1.308,
"step": 9408
},
{
"epoch": 1.4830614805520703,
"grad_norm": 0.6372986435890198,
"learning_rate": 9.524109323215664e-06,
"loss": 1.3859,
"step": 9456
},
{
"epoch": 1.4905897114178168,
"grad_norm": 5.864046573638916,
"learning_rate": 9.19842917373838e-06,
"loss": 0.9261,
"step": 9504
},
{
"epoch": 1.4981179422835633,
"grad_norm": 2.619722366333008,
"learning_rate": 8.87641783104935e-06,
"loss": 1.0473,
"step": 9552
},
{
"epoch": 1.502509410288582,
"eval_nli-pairs_loss": 1.0287954807281494,
"eval_nli-pairs_runtime": 4.1819,
"eval_nli-pairs_samples_per_second": 47.825,
"eval_nli-pairs_steps_per_second": 1.196,
"eval_sts-test_pearson_cosine": 0.772936269755451,
"eval_sts-test_pearson_dot": 0.5408568837341375,
"eval_sts-test_pearson_euclidean": 0.7431310400735589,
"eval_sts-test_pearson_manhattan": 0.7441695396815142,
"eval_sts-test_pearson_max": 0.772936269755451,
"eval_sts-test_spearman_cosine": 0.7808770614234607,
"eval_sts-test_spearman_dot": 0.5146216887404633,
"eval_sts-test_spearman_euclidean": 0.7291900713010305,
"eval_sts-test_spearman_manhattan": 0.732990460571053,
"eval_sts-test_spearman_max": 0.7808770614234607,
"step": 9580
},
{
"epoch": 1.502509410288582,
"eval_vitaminc-pairs_loss": 4.766480445861816,
"eval_vitaminc-pairs_runtime": 1.5156,
"eval_vitaminc-pairs_samples_per_second": 109.528,
"eval_vitaminc-pairs_steps_per_second": 2.639,
"step": 9580
},
{
"epoch": 1.502509410288582,
"eval_sts-label_loss": 4.150259494781494,
"eval_sts-label_runtime": 0.4493,
"eval_sts-label_samples_per_second": 445.153,
"eval_sts-label_steps_per_second": 11.129,
"step": 9580
},
{
"epoch": 1.502509410288582,
"eval_qnli-contrastive_loss": 0.1541556417942047,
"eval_qnli-contrastive_runtime": 0.296,
"eval_qnli-contrastive_samples_per_second": 675.668,
"eval_qnli-contrastive_steps_per_second": 16.892,
"step": 9580
},
{
"epoch": 1.502509410288582,
"eval_scitail-pairs-qa_loss": 0.1008567065000534,
"eval_scitail-pairs-qa_runtime": 1.0997,
"eval_scitail-pairs-qa_samples_per_second": 181.866,
"eval_scitail-pairs-qa_steps_per_second": 4.547,
"step": 9580
},
{
"epoch": 1.502509410288582,
"eval_scitail-pairs-pos_loss": 0.47411486506462097,
"eval_scitail-pairs-pos_runtime": 2.4599,
"eval_scitail-pairs-pos_samples_per_second": 81.304,
"eval_scitail-pairs-pos_steps_per_second": 2.033,
"step": 9580
},
{
"epoch": 1.502509410288582,
"eval_xsum-pairs_loss": 0.3825688064098358,
"eval_xsum-pairs_runtime": 1.0466,
"eval_xsum-pairs_samples_per_second": 191.088,
"eval_xsum-pairs_steps_per_second": 4.777,
"step": 9580
},
{
"epoch": 1.502509410288582,
"eval_compression-pairs_loss": 0.16390098631381989,
"eval_compression-pairs_runtime": 0.2542,
"eval_compression-pairs_samples_per_second": 786.663,
"eval_compression-pairs_steps_per_second": 19.667,
"step": 9580
},
{
"epoch": 1.502509410288582,
"eval_sciq_pairs_loss": 0.3327814042568207,
"eval_sciq_pairs_runtime": 9.1285,
"eval_sciq_pairs_samples_per_second": 21.909,
"eval_sciq_pairs_steps_per_second": 0.548,
"step": 9580
},
{
"epoch": 1.502509410288582,
"eval_qasc_pairs_loss": 0.42448753118515015,
"eval_qasc_pairs_runtime": 1.238,
"eval_qasc_pairs_samples_per_second": 161.554,
"eval_qasc_pairs_steps_per_second": 4.039,
"step": 9580
},
{
"epoch": 1.502509410288582,
"eval_openbookqa_pairs_loss": 1.9015610218048096,
"eval_openbookqa_pairs_runtime": 1.0684,
"eval_openbookqa_pairs_samples_per_second": 187.196,
"eval_openbookqa_pairs_steps_per_second": 4.68,
"step": 9580
},
{
"epoch": 1.502509410288582,
"eval_msmarco_pairs_loss": 1.049147605895996,
"eval_msmarco_pairs_runtime": 2.5566,
"eval_msmarco_pairs_samples_per_second": 78.23,
"eval_msmarco_pairs_steps_per_second": 1.956,
"step": 9580
},
{
"epoch": 1.502509410288582,
"eval_nq_pairs_loss": 0.938487708568573,
"eval_nq_pairs_runtime": 5.7298,
"eval_nq_pairs_samples_per_second": 34.905,
"eval_nq_pairs_steps_per_second": 0.873,
"step": 9580
},
{
"epoch": 1.502509410288582,
"eval_trivia_pairs_loss": 1.1729891300201416,
"eval_trivia_pairs_runtime": 9.1105,
"eval_trivia_pairs_samples_per_second": 21.953,
"eval_trivia_pairs_steps_per_second": 0.549,
"step": 9580
},
{
"epoch": 1.502509410288582,
"eval_quora_pairs_loss": 0.24502086639404297,
"eval_quora_pairs_runtime": 0.6539,
"eval_quora_pairs_samples_per_second": 305.842,
"eval_quora_pairs_steps_per_second": 7.646,
"step": 9580
},
{
"epoch": 1.502509410288582,
"eval_gooaq_pairs_loss": 0.8770759105682373,
"eval_gooaq_pairs_runtime": 1.6351,
"eval_gooaq_pairs_samples_per_second": 122.318,
"eval_gooaq_pairs_steps_per_second": 3.058,
"step": 9580
},
{
"epoch": 1.502509410288582,
"eval_mrpc_pairs_loss": 0.08623871207237244,
"eval_mrpc_pairs_runtime": 0.2404,
"eval_mrpc_pairs_samples_per_second": 832.077,
"eval_mrpc_pairs_steps_per_second": 20.802,
"step": 9580
},
{
"epoch": 1.50564617314931,
"grad_norm": 11.285247802734375,
"learning_rate": 8.558217605251645e-06,
"loss": 1.1806,
"step": 9600
},
{
"epoch": 1.5131744040150563,
"grad_norm": 10.830084800720215,
"learning_rate": 8.24396912215848e-06,
"loss": 1.1989,
"step": 9648
},
{
"epoch": 1.520702634880803,
"grad_norm": 9.796110153198242,
"learning_rate": 7.933811261144869e-06,
"loss": 1.0403,
"step": 9696
},
{
"epoch": 1.5282308657465495,
"grad_norm": 0.9950674176216125,
"learning_rate": 7.627881093771088e-06,
"loss": 1.0811,
"step": 9744
},
{
"epoch": 1.535759096612296,
"grad_norm": 11.353304862976074,
"learning_rate": 7.326313823205104e-06,
"loss": 1.3524,
"step": 9792
},
{
"epoch": 1.5432873274780428,
"grad_norm": 12.985937118530273,
"learning_rate": 7.029242724470705e-06,
"loss": 0.9578,
"step": 9840
},
{
"epoch": 1.5508155583437893,
"grad_norm": 8.885215759277344,
"learning_rate": 6.736799085547775e-06,
"loss": 1.2745,
"step": 9888
},
{
"epoch": 1.5583437892095358,
"grad_norm": 0.3098331391811371,
"learning_rate": 6.4491121493507095e-06,
"loss": 1.0615,
"step": 9936
},
{
"epoch": 1.5658720200752823,
"grad_norm": 14.668235778808594,
"learning_rate": 6.1663090566106425e-06,
"loss": 0.9778,
"step": 9984
},
{
"epoch": 1.5734002509410288,
"grad_norm": 14.634328842163086,
"learning_rate": 5.888514789686705e-06,
"loss": 1.017,
"step": 10032
},
{
"epoch": 1.5776348808030112,
"eval_nli-pairs_loss": 1.018608808517456,
"eval_nli-pairs_runtime": 3.9852,
"eval_nli-pairs_samples_per_second": 50.186,
"eval_nli-pairs_steps_per_second": 1.255,
"eval_sts-test_pearson_cosine": 0.7763527186202379,
"eval_sts-test_pearson_dot": 0.5518277089774203,
"eval_sts-test_pearson_euclidean": 0.7484152647439328,
"eval_sts-test_pearson_manhattan": 0.7486588645110376,
"eval_sts-test_pearson_max": 0.7763527186202379,
"eval_sts-test_spearman_cosine": 0.7862296389332735,
"eval_sts-test_spearman_dot": 0.5246898972209683,
"eval_sts-test_spearman_euclidean": 0.7340067918071436,
"eval_sts-test_spearman_manhattan": 0.7368940903638915,
"eval_sts-test_spearman_max": 0.7862296389332735,
"step": 10059
},
{
"epoch": 1.5776348808030112,
"eval_vitaminc-pairs_loss": 4.802022457122803,
"eval_vitaminc-pairs_runtime": 1.4418,
"eval_vitaminc-pairs_samples_per_second": 115.137,
"eval_vitaminc-pairs_steps_per_second": 2.774,
"step": 10059
},
{
"epoch": 1.5776348808030112,
"eval_sts-label_loss": 4.154313087463379,
"eval_sts-label_runtime": 0.4056,
"eval_sts-label_samples_per_second": 493.08,
"eval_sts-label_steps_per_second": 12.327,
"step": 10059
},
{
"epoch": 1.5776348808030112,
"eval_qnli-contrastive_loss": 0.1918843388557434,
"eval_qnli-contrastive_runtime": 0.2808,
"eval_qnli-contrastive_samples_per_second": 712.295,
"eval_qnli-contrastive_steps_per_second": 17.807,
"step": 10059
},
{
"epoch": 1.5776348808030112,
"eval_scitail-pairs-qa_loss": 0.10166393220424652,
"eval_scitail-pairs-qa_runtime": 1.0544,
"eval_scitail-pairs-qa_samples_per_second": 189.685,
"eval_scitail-pairs-qa_steps_per_second": 4.742,
"step": 10059
},
{
"epoch": 1.5776348808030112,
"eval_scitail-pairs-pos_loss": 0.4371533691883087,
"eval_scitail-pairs-pos_runtime": 2.4374,
"eval_scitail-pairs-pos_samples_per_second": 82.054,
"eval_scitail-pairs-pos_steps_per_second": 2.051,
"step": 10059
},
{
"epoch": 1.5776348808030112,
"eval_xsum-pairs_loss": 0.3739396035671234,
"eval_xsum-pairs_runtime": 1.0423,
"eval_xsum-pairs_samples_per_second": 191.881,
"eval_xsum-pairs_steps_per_second": 4.797,
"step": 10059
},
{
"epoch": 1.5776348808030112,
"eval_compression-pairs_loss": 0.16310901939868927,
"eval_compression-pairs_runtime": 0.2376,
"eval_compression-pairs_samples_per_second": 841.702,
"eval_compression-pairs_steps_per_second": 21.043,
"step": 10059
},
{
"epoch": 1.5776348808030112,
"eval_sciq_pairs_loss": 0.3224416971206665,
"eval_sciq_pairs_runtime": 9.0722,
"eval_sciq_pairs_samples_per_second": 22.045,
"eval_sciq_pairs_steps_per_second": 0.551,
"step": 10059
},
{
"epoch": 1.5776348808030112,
"eval_qasc_pairs_loss": 0.4264788031578064,
"eval_qasc_pairs_runtime": 1.2568,
"eval_qasc_pairs_samples_per_second": 159.131,
"eval_qasc_pairs_steps_per_second": 3.978,
"step": 10059
},
{
"epoch": 1.5776348808030112,
"eval_openbookqa_pairs_loss": 1.824275016784668,
"eval_openbookqa_pairs_runtime": 1.0491,
"eval_openbookqa_pairs_samples_per_second": 190.648,
"eval_openbookqa_pairs_steps_per_second": 4.766,
"step": 10059
},
{
"epoch": 1.5776348808030112,
"eval_msmarco_pairs_loss": 1.0266730785369873,
"eval_msmarco_pairs_runtime": 2.5174,
"eval_msmarco_pairs_samples_per_second": 79.447,
"eval_msmarco_pairs_steps_per_second": 1.986,
"step": 10059
},
{
"epoch": 1.5776348808030112,
"eval_nq_pairs_loss": 0.9473356604576111,
"eval_nq_pairs_runtime": 5.6496,
"eval_nq_pairs_samples_per_second": 35.401,
"eval_nq_pairs_steps_per_second": 0.885,
"step": 10059
},
{
"epoch": 1.5776348808030112,
"eval_trivia_pairs_loss": 1.104791522026062,
"eval_trivia_pairs_runtime": 9.073,
"eval_trivia_pairs_samples_per_second": 22.044,
"eval_trivia_pairs_steps_per_second": 0.551,
"step": 10059
},
{
"epoch": 1.5776348808030112,
"eval_quora_pairs_loss": 0.2634066939353943,
"eval_quora_pairs_runtime": 0.6209,
"eval_quora_pairs_samples_per_second": 322.127,
"eval_quora_pairs_steps_per_second": 8.053,
"step": 10059
},
{
"epoch": 1.5776348808030112,
"eval_gooaq_pairs_loss": 0.8539897203445435,
"eval_gooaq_pairs_runtime": 1.5537,
"eval_gooaq_pairs_samples_per_second": 128.725,
"eval_gooaq_pairs_steps_per_second": 3.218,
"step": 10059
},
{
"epoch": 1.5776348808030112,
"eval_mrpc_pairs_loss": 0.08671045303344727,
"eval_mrpc_pairs_runtime": 0.237,
"eval_mrpc_pairs_samples_per_second": 843.81,
"eval_mrpc_pairs_steps_per_second": 21.095,
"step": 10059
},
{
"epoch": 1.5809284818067755,
"grad_norm": 4.454440593719482,
"learning_rate": 5.615852117331175e-06,
"loss": 1.3413,
"step": 10080
},
{
"epoch": 1.588456712672522,
"grad_norm": 0.9061813950538635,
"learning_rate": 5.348441540432878e-06,
"loss": 1.1091,
"step": 10128
},
{
"epoch": 1.5959849435382685,
"grad_norm": 5.169127464294434,
"learning_rate": 5.086401238762887e-06,
"loss": 1.3101,
"step": 10176
},
{
"epoch": 1.6035131744040152,
"grad_norm": 4.494983196258545,
"learning_rate": 4.8298470187459895e-06,
"loss": 1.1126,
"step": 10224
},
{
"epoch": 1.6110414052697615,
"grad_norm": 0.4734310507774353,
"learning_rate": 4.578892262281069e-06,
"loss": 0.7667,
"step": 10272
},
{
"epoch": 1.6185696361355082,
"grad_norm": 0.4003587067127228,
"learning_rate": 4.333647876632947e-06,
"loss": 1.0281,
"step": 10320
},
{
"epoch": 1.6260978670012547,
"grad_norm": 1.0558795928955078,
"learning_rate": 4.094222245417886e-06,
"loss": 1.2234,
"step": 10368
},
{
"epoch": 1.6336260978670012,
"grad_norm": 1.4687271118164062,
"learning_rate": 3.860721180704409e-06,
"loss": 0.9402,
"step": 10416
},
{
"epoch": 1.641154328732748,
"grad_norm": 16.27718734741211,
"learning_rate": 3.6332478762505653e-06,
"loss": 0.9698,
"step": 10464
},
{
"epoch": 1.6486825595984942,
"grad_norm": 12.507079124450684,
"learning_rate": 3.411902861898354e-06,
"loss": 1.1373,
"step": 10512
},
{
"epoch": 1.6527603513174403,
"eval_nli-pairs_loss": 1.0013196468353271,
"eval_nli-pairs_runtime": 4.0021,
"eval_nli-pairs_samples_per_second": 49.974,
"eval_nli-pairs_steps_per_second": 1.249,
"eval_sts-test_pearson_cosine": 0.7756730383098025,
"eval_sts-test_pearson_dot": 0.547730552868467,
"eval_sts-test_pearson_euclidean": 0.7481152916902115,
"eval_sts-test_pearson_manhattan": 0.7486290345837316,
"eval_sts-test_pearson_max": 0.7756730383098025,
"eval_sts-test_spearman_cosine": 0.7844179279938138,
"eval_sts-test_spearman_dot": 0.5204811914426826,
"eval_sts-test_spearman_euclidean": 0.7335913694810947,
"eval_sts-test_spearman_manhattan": 0.7368797089125716,
"eval_sts-test_spearman_max": 0.7844179279938138,
"step": 10538
},
{
"epoch": 1.6527603513174403,
"eval_vitaminc-pairs_loss": 4.780285835266113,
"eval_vitaminc-pairs_runtime": 1.4992,
"eval_vitaminc-pairs_samples_per_second": 110.723,
"eval_vitaminc-pairs_steps_per_second": 2.668,
"step": 10538
},
{
"epoch": 1.6527603513174403,
"eval_sts-label_loss": 4.135310173034668,
"eval_sts-label_runtime": 0.4056,
"eval_sts-label_samples_per_second": 493.077,
"eval_sts-label_steps_per_second": 12.327,
"step": 10538
},
{
"epoch": 1.6527603513174403,
"eval_qnli-contrastive_loss": 0.18042831122875214,
"eval_qnli-contrastive_runtime": 0.2833,
"eval_qnli-contrastive_samples_per_second": 706.004,
"eval_qnli-contrastive_steps_per_second": 17.65,
"step": 10538
},
{
"epoch": 1.6527603513174403,
"eval_scitail-pairs-qa_loss": 0.09958843886852264,
"eval_scitail-pairs-qa_runtime": 1.0443,
"eval_scitail-pairs-qa_samples_per_second": 191.524,
"eval_scitail-pairs-qa_steps_per_second": 4.788,
"step": 10538
},
{
"epoch": 1.6527603513174403,
"eval_scitail-pairs-pos_loss": 0.4427280128002167,
"eval_scitail-pairs-pos_runtime": 2.3552,
"eval_scitail-pairs-pos_samples_per_second": 84.918,
"eval_scitail-pairs-pos_steps_per_second": 2.123,
"step": 10538
},
{
"epoch": 1.6527603513174403,
"eval_xsum-pairs_loss": 0.3688097298145294,
"eval_xsum-pairs_runtime": 1.041,
"eval_xsum-pairs_samples_per_second": 192.117,
"eval_xsum-pairs_steps_per_second": 4.803,
"step": 10538
},
{
"epoch": 1.6527603513174403,
"eval_compression-pairs_loss": 0.15796488523483276,
"eval_compression-pairs_runtime": 0.2407,
"eval_compression-pairs_samples_per_second": 830.774,
"eval_compression-pairs_steps_per_second": 20.769,
"step": 10538
},
{
"epoch": 1.6527603513174403,
"eval_sciq_pairs_loss": 0.3223775029182434,
"eval_sciq_pairs_runtime": 9.094,
"eval_sciq_pairs_samples_per_second": 21.992,
"eval_sciq_pairs_steps_per_second": 0.55,
"step": 10538
},
{
"epoch": 1.6527603513174403,
"eval_qasc_pairs_loss": 0.4077293872833252,
"eval_qasc_pairs_runtime": 1.2194,
"eval_qasc_pairs_samples_per_second": 164.013,
"eval_qasc_pairs_steps_per_second": 4.1,
"step": 10538
},
{
"epoch": 1.6527603513174403,
"eval_openbookqa_pairs_loss": 1.8293620347976685,
"eval_openbookqa_pairs_runtime": 1.0475,
"eval_openbookqa_pairs_samples_per_second": 190.939,
"eval_openbookqa_pairs_steps_per_second": 4.773,
"step": 10538
},
{
"epoch": 1.6527603513174403,
"eval_msmarco_pairs_loss": 1.0322593450546265,
"eval_msmarco_pairs_runtime": 2.5279,
"eval_msmarco_pairs_samples_per_second": 79.117,
"eval_msmarco_pairs_steps_per_second": 1.978,
"step": 10538
},
{
"epoch": 1.6527603513174403,
"eval_nq_pairs_loss": 0.9389599561691284,
"eval_nq_pairs_runtime": 5.6466,
"eval_nq_pairs_samples_per_second": 35.42,
"eval_nq_pairs_steps_per_second": 0.885,
"step": 10538
},
{
"epoch": 1.6527603513174403,
"eval_trivia_pairs_loss": 1.1042495965957642,
"eval_trivia_pairs_runtime": 9.0489,
"eval_trivia_pairs_samples_per_second": 22.102,
"eval_trivia_pairs_steps_per_second": 0.553,
"step": 10538
},
{
"epoch": 1.6527603513174403,
"eval_quora_pairs_loss": 0.17337936162948608,
"eval_quora_pairs_runtime": 0.6556,
"eval_quora_pairs_samples_per_second": 305.082,
"eval_quora_pairs_steps_per_second": 7.627,
"step": 10538
},
{
"epoch": 1.6527603513174403,
"eval_gooaq_pairs_loss": 0.8494808673858643,
"eval_gooaq_pairs_runtime": 1.5517,
"eval_gooaq_pairs_samples_per_second": 128.894,
"eval_gooaq_pairs_steps_per_second": 3.222,
"step": 10538
},
{
"epoch": 1.6527603513174403,
"eval_mrpc_pairs_loss": 0.08429060131311417,
"eval_mrpc_pairs_runtime": 0.2427,
"eval_mrpc_pairs_samples_per_second": 824.181,
"eval_mrpc_pairs_steps_per_second": 20.605,
"step": 10538
},
{
"epoch": 1.656210790464241,
"grad_norm": 9.579873085021973,
"learning_rate": 3.196783959145439e-06,
"loss": 1.433,
"step": 10560
},
{
"epoch": 1.6637390213299874,
"grad_norm": 20.199016571044922,
"learning_rate": 2.9879862379138003e-06,
"loss": 0.7482,
"step": 10608
},
{
"epoch": 1.671267252195734,
"grad_norm": 0.24710102379322052,
"learning_rate": 2.7897522915539458e-06,
"loss": 1.1968,
"step": 10656
},
{
"epoch": 1.6787954830614806,
"grad_norm": 9.337010383605957,
"learning_rate": 2.5937345579013605e-06,
"loss": 1.2424,
"step": 10704
},
{
"epoch": 1.6863237139272271,
"grad_norm": 13.432022094726562,
"learning_rate": 2.4043045181907913e-06,
"loss": 1.379,
"step": 10752
},
{
"epoch": 1.6938519447929736,
"grad_norm": 8.947489738464355,
"learning_rate": 2.2215458893753594e-06,
"loss": 1.4127,
"step": 10800
},
{
"epoch": 1.7013801756587204,
"grad_norm": 16.45635414123535,
"learning_rate": 2.045539440036334e-06,
"loss": 1.203,
"step": 10848
},
{
"epoch": 1.7089084065244666,
"grad_norm": 15.3703031539917,
"learning_rate": 1.8763629546881908e-06,
"loss": 0.7367,
"step": 10896
},
{
"epoch": 1.7164366373902133,
"grad_norm": 4.174514293670654,
"learning_rate": 1.714091199402421e-06,
"loss": 0.9207,
"step": 10944
},
{
"epoch": 1.7239648682559598,
"grad_norm": 2.0591108798980713,
"learning_rate": 1.558795888765306e-06,
"loss": 1.0067,
"step": 10992
},
{
"epoch": 1.7278858218318696,
"eval_nli-pairs_loss": 0.9888688325881958,
"eval_nli-pairs_runtime": 3.9929,
"eval_nli-pairs_samples_per_second": 50.089,
"eval_nli-pairs_steps_per_second": 1.252,
"eval_sts-test_pearson_cosine": 0.7760455578346633,
"eval_sts-test_pearson_dot": 0.5470527605127045,
"eval_sts-test_pearson_euclidean": 0.7462734713816128,
"eval_sts-test_pearson_manhattan": 0.7469300157882987,
"eval_sts-test_pearson_max": 0.7760455578346633,
"eval_sts-test_spearman_cosine": 0.7843247414472263,
"eval_sts-test_spearman_dot": 0.5195687627478929,
"eval_sts-test_spearman_euclidean": 0.7314459897716622,
"eval_sts-test_spearman_manhattan": 0.7349029838686436,
"eval_sts-test_spearman_max": 0.7843247414472263,
"step": 11017
},
{
"epoch": 1.7278858218318696,
"eval_vitaminc-pairs_loss": 4.805673122406006,
"eval_vitaminc-pairs_runtime": 1.4386,
"eval_vitaminc-pairs_samples_per_second": 115.388,
"eval_vitaminc-pairs_steps_per_second": 2.78,
"step": 11017
},
{
"epoch": 1.7278858218318696,
"eval_sts-label_loss": 4.089193344116211,
"eval_sts-label_runtime": 0.4049,
"eval_sts-label_samples_per_second": 493.975,
"eval_sts-label_steps_per_second": 12.349,
"step": 11017
},
{
"epoch": 1.7278858218318696,
"eval_qnli-contrastive_loss": 0.16640476882457733,
"eval_qnli-contrastive_runtime": 0.2871,
"eval_qnli-contrastive_samples_per_second": 696.519,
"eval_qnli-contrastive_steps_per_second": 17.413,
"step": 11017
},
{
"epoch": 1.7278858218318696,
"eval_scitail-pairs-qa_loss": 0.09589708596467972,
"eval_scitail-pairs-qa_runtime": 1.0968,
"eval_scitail-pairs-qa_samples_per_second": 182.341,
"eval_scitail-pairs-qa_steps_per_second": 4.559,
"step": 11017
},
{
"epoch": 1.7278858218318696,
"eval_scitail-pairs-pos_loss": 0.44937801361083984,
"eval_scitail-pairs-pos_runtime": 2.4011,
"eval_scitail-pairs-pos_samples_per_second": 83.296,
"eval_scitail-pairs-pos_steps_per_second": 2.082,
"step": 11017
},
{
"epoch": 1.7278858218318696,
"eval_xsum-pairs_loss": 0.3645179867744446,
"eval_xsum-pairs_runtime": 1.0406,
"eval_xsum-pairs_samples_per_second": 192.196,
"eval_xsum-pairs_steps_per_second": 4.805,
"step": 11017
},
{
"epoch": 1.7278858218318696,
"eval_compression-pairs_loss": 0.15408411622047424,
"eval_compression-pairs_runtime": 0.2368,
"eval_compression-pairs_samples_per_second": 844.756,
"eval_compression-pairs_steps_per_second": 21.119,
"step": 11017
},
{
"epoch": 1.7278858218318696,
"eval_sciq_pairs_loss": 0.3195297122001648,
"eval_sciq_pairs_runtime": 9.0709,
"eval_sciq_pairs_samples_per_second": 22.049,
"eval_sciq_pairs_steps_per_second": 0.551,
"step": 11017
},
{
"epoch": 1.7278858218318696,
"eval_qasc_pairs_loss": 0.40569430589675903,
"eval_qasc_pairs_runtime": 1.2166,
"eval_qasc_pairs_samples_per_second": 164.395,
"eval_qasc_pairs_steps_per_second": 4.11,
"step": 11017
},
{
"epoch": 1.7278858218318696,
"eval_openbookqa_pairs_loss": 1.8376811742782593,
"eval_openbookqa_pairs_runtime": 1.0469,
"eval_openbookqa_pairs_samples_per_second": 191.033,
"eval_openbookqa_pairs_steps_per_second": 4.776,
"step": 11017
},
{
"epoch": 1.7278858218318696,
"eval_msmarco_pairs_loss": 1.0271800756454468,
"eval_msmarco_pairs_runtime": 2.5127,
"eval_msmarco_pairs_samples_per_second": 79.597,
"eval_msmarco_pairs_steps_per_second": 1.99,
"step": 11017
},
{
"epoch": 1.7278858218318696,
"eval_nq_pairs_loss": 0.9162300229072571,
"eval_nq_pairs_runtime": 5.647,
"eval_nq_pairs_samples_per_second": 35.417,
"eval_nq_pairs_steps_per_second": 0.885,
"step": 11017
},
{
"epoch": 1.7278858218318696,
"eval_trivia_pairs_loss": 1.0903488397598267,
"eval_trivia_pairs_runtime": 9.0283,
"eval_trivia_pairs_samples_per_second": 22.153,
"eval_trivia_pairs_steps_per_second": 0.554,
"step": 11017
},
{
"epoch": 1.7278858218318696,
"eval_quora_pairs_loss": 0.19430270791053772,
"eval_quora_pairs_runtime": 0.6239,
"eval_quora_pairs_samples_per_second": 320.54,
"eval_quora_pairs_steps_per_second": 8.013,
"step": 11017
},
{
"epoch": 1.7278858218318696,
"eval_gooaq_pairs_loss": 0.8347020149230957,
"eval_gooaq_pairs_runtime": 1.5623,
"eval_gooaq_pairs_samples_per_second": 128.013,
"eval_gooaq_pairs_steps_per_second": 3.2,
"step": 11017
},
{
"epoch": 1.7278858218318696,
"eval_mrpc_pairs_loss": 0.08172078430652618,
"eval_mrpc_pairs_runtime": 0.2448,
"eval_mrpc_pairs_samples_per_second": 816.98,
"eval_mrpc_pairs_steps_per_second": 20.425,
"step": 11017
},
{
"epoch": 1.7314930991217063,
"grad_norm": 16.512481689453125,
"learning_rate": 1.410545654184303e-06,
"loss": 0.8782,
"step": 11040
},
{
"epoch": 1.739021329987453,
"grad_norm": 15.815868377685547,
"learning_rate": 1.2694060135569684e-06,
"loss": 1.1617,
"step": 11088
},
{
"epoch": 1.7465495608531993,
"grad_norm": 0.39903897047042847,
"learning_rate": 1.135439342315913e-06,
"loss": 1.0122,
"step": 11136
},
{
"epoch": 1.754077791718946,
"grad_norm": 7.398995876312256,
"learning_rate": 1.0087048458625138e-06,
"loss": 0.6694,
"step": 11184
},
{
"epoch": 1.7616060225846926,
"grad_norm": 10.535834312438965,
"learning_rate": 8.892585334016114e-07,
"loss": 1.1209,
"step": 11232
},
{
"epoch": 1.769134253450439,
"grad_norm": 14.994894027709961,
"learning_rate": 7.771531931887249e-07,
"loss": 1.1,
"step": 11280
},
{
"epoch": 1.7766624843161858,
"grad_norm": 29.789087295532227,
"learning_rate": 6.724383692007457e-07,
"loss": 0.9796,
"step": 11328
},
{
"epoch": 1.7841907151819323,
"grad_norm": 16.97585105895996,
"learning_rate": 5.751603392404059e-07,
"loss": 0.9533,
"step": 11376
},
{
"epoch": 1.7917189460476788,
"grad_norm": 36.554412841796875,
"learning_rate": 4.853620944842135e-07,
"loss": 0.9207,
"step": 11424
},
{
"epoch": 1.7992471769134255,
"grad_norm": 12.20507526397705,
"learning_rate": 4.030833204828687e-07,
"loss": 1.0267,
"step": 11472
},
{
"epoch": 1.8030112923462986,
"eval_nli-pairs_loss": 0.9834117293357849,
"eval_nli-pairs_runtime": 4.454,
"eval_nli-pairs_samples_per_second": 44.904,
"eval_nli-pairs_steps_per_second": 1.123,
"eval_sts-test_pearson_cosine": 0.7757193524880511,
"eval_sts-test_pearson_dot": 0.5461517523115149,
"eval_sts-test_pearson_euclidean": 0.7464325603201095,
"eval_sts-test_pearson_manhattan": 0.7470492681562052,
"eval_sts-test_pearson_max": 0.7757193524880511,
"eval_sts-test_spearman_cosine": 0.7841895178722113,
"eval_sts-test_spearman_dot": 0.5190518591730641,
"eval_sts-test_spearman_euclidean": 0.7317065051907755,
"eval_sts-test_spearman_manhattan": 0.7348866670601053,
"eval_sts-test_spearman_max": 0.7841895178722113,
"step": 11496
},
{
"epoch": 1.8030112923462986,
"eval_vitaminc-pairs_loss": 4.804275989532471,
"eval_vitaminc-pairs_runtime": 1.4573,
"eval_vitaminc-pairs_samples_per_second": 113.908,
"eval_vitaminc-pairs_steps_per_second": 2.745,
"step": 11496
},
{
"epoch": 1.8030112923462986,
"eval_sts-label_loss": 4.131558418273926,
"eval_sts-label_runtime": 0.4133,
"eval_sts-label_samples_per_second": 483.878,
"eval_sts-label_steps_per_second": 12.097,
"step": 11496
},
{
"epoch": 1.8030112923462986,
"eval_qnli-contrastive_loss": 0.16553626954555511,
"eval_qnli-contrastive_runtime": 0.2817,
"eval_qnli-contrastive_samples_per_second": 709.973,
"eval_qnli-contrastive_steps_per_second": 17.749,
"step": 11496
},
{
"epoch": 1.8030112923462986,
"eval_scitail-pairs-qa_loss": 0.09527866542339325,
"eval_scitail-pairs-qa_runtime": 1.0708,
"eval_scitail-pairs-qa_samples_per_second": 186.771,
"eval_scitail-pairs-qa_steps_per_second": 4.669,
"step": 11496
},
{
"epoch": 1.8030112923462986,
"eval_scitail-pairs-pos_loss": 0.446532279253006,
"eval_scitail-pairs-pos_runtime": 2.3976,
"eval_scitail-pairs-pos_samples_per_second": 83.418,
"eval_scitail-pairs-pos_steps_per_second": 2.085,
"step": 11496
},
{
"epoch": 1.8030112923462986,
"eval_xsum-pairs_loss": 0.36277005076408386,
"eval_xsum-pairs_runtime": 1.0497,
"eval_xsum-pairs_samples_per_second": 190.534,
"eval_xsum-pairs_steps_per_second": 4.763,
"step": 11496
},
{
"epoch": 1.8030112923462986,
"eval_compression-pairs_loss": 0.15356417000293732,
"eval_compression-pairs_runtime": 0.2565,
"eval_compression-pairs_samples_per_second": 779.676,
"eval_compression-pairs_steps_per_second": 19.492,
"step": 11496
},
{
"epoch": 1.8030112923462986,
"eval_sciq_pairs_loss": 0.3197508752346039,
"eval_sciq_pairs_runtime": 9.2253,
"eval_sciq_pairs_samples_per_second": 21.679,
"eval_sciq_pairs_steps_per_second": 0.542,
"step": 11496
},
{
"epoch": 1.8030112923462986,
"eval_qasc_pairs_loss": 0.40177223086357117,
"eval_qasc_pairs_runtime": 1.2313,
"eval_qasc_pairs_samples_per_second": 162.427,
"eval_qasc_pairs_steps_per_second": 4.061,
"step": 11496
},
{
"epoch": 1.8030112923462986,
"eval_openbookqa_pairs_loss": 1.8343558311462402,
"eval_openbookqa_pairs_runtime": 1.0933,
"eval_openbookqa_pairs_samples_per_second": 182.939,
"eval_openbookqa_pairs_steps_per_second": 4.573,
"step": 11496
},
{
"epoch": 1.8030112923462986,
"eval_msmarco_pairs_loss": 1.0230004787445068,
"eval_msmarco_pairs_runtime": 2.5513,
"eval_msmarco_pairs_samples_per_second": 78.392,
"eval_msmarco_pairs_steps_per_second": 1.96,
"step": 11496
},
{
"epoch": 1.8030112923462986,
"eval_nq_pairs_loss": 0.9057186841964722,
"eval_nq_pairs_runtime": 5.7628,
"eval_nq_pairs_samples_per_second": 34.705,
"eval_nq_pairs_steps_per_second": 0.868,
"step": 11496
},
{
"epoch": 1.8030112923462986,
"eval_trivia_pairs_loss": 1.0895284414291382,
"eval_trivia_pairs_runtime": 9.1178,
"eval_trivia_pairs_samples_per_second": 21.935,
"eval_trivia_pairs_steps_per_second": 0.548,
"step": 11496
},
{
"epoch": 1.8030112923462986,
"eval_quora_pairs_loss": 0.1568813920021057,
"eval_quora_pairs_runtime": 0.6394,
"eval_quora_pairs_samples_per_second": 312.772,
"eval_quora_pairs_steps_per_second": 7.819,
"step": 11496
},
{
"epoch": 1.8030112923462986,
"eval_gooaq_pairs_loss": 0.8383786082267761,
"eval_gooaq_pairs_runtime": 1.5566,
"eval_gooaq_pairs_samples_per_second": 128.488,
"eval_gooaq_pairs_steps_per_second": 3.212,
"step": 11496
},
{
"epoch": 1.8030112923462986,
"eval_mrpc_pairs_loss": 0.08112096786499023,
"eval_mrpc_pairs_runtime": 0.2439,
"eval_mrpc_pairs_samples_per_second": 819.882,
"eval_mrpc_pairs_steps_per_second": 20.497,
"step": 11496
},
{
"epoch": 1.8067754077791718,
"grad_norm": 24.458271026611328,
"learning_rate": 3.2836037962258505e-07,
"loss": 0.8635,
"step": 11520
},
{
"epoch": 1.8143036386449185,
"grad_norm": 6.1133222579956055,
"learning_rate": 2.612262950550589e-07,
"loss": 0.9252,
"step": 11568
},
{
"epoch": 1.821831869510665,
"grad_norm": 10.278335571289062,
"learning_rate": 2.0171073610316003e-07,
"loss": 1.32,
"step": 11616
},
{
"epoch": 1.8293601003764115,
"grad_norm": 14.742138862609863,
"learning_rate": 1.4984000514884242e-07,
"loss": 1.3209,
"step": 11664
},
{
"epoch": 1.8368883312421582,
"grad_norm": 7.676764011383057,
"learning_rate": 1.056370260090303e-07,
"loss": 0.8251,
"step": 11712
},
{
"epoch": 1.8444165621079045,
"grad_norm": 1.2442536354064941,
"learning_rate": 6.912133380464951e-08,
"loss": 0.8179,
"step": 11760
},
{
"epoch": 1.8519447929736512,
"grad_norm": 15.847275733947754,
"learning_rate": 4.030906632723735e-08,
"loss": 0.8666,
"step": 11808
},
{
"epoch": 1.8594730238393977,
"grad_norm": 0.36177679896354675,
"learning_rate": 1.9212956906992372e-08,
"loss": 0.9244,
"step": 11856
},
{
"epoch": 1.8670012547051442,
"grad_norm": 8.72006607055664,
"learning_rate": 5.842328785392869e-09,
"loss": 0.9468,
"step": 11904
},
{
"epoch": 1.874529485570891,
"grad_norm": 15.35616397857666,
"learning_rate": 2.0309099486470748e-10,
"loss": 1.2313,
"step": 11952
},
{
"epoch": 1.8781367628607277,
"eval_nli-pairs_loss": 1.0213488340377808,
"eval_nli-pairs_runtime": 3.9888,
"eval_nli-pairs_samples_per_second": 50.14,
"eval_nli-pairs_steps_per_second": 1.254,
"eval_sts-test_pearson_cosine": 0.7751861574983592,
"eval_sts-test_pearson_dot": 0.5406109395741264,
"eval_sts-test_pearson_euclidean": 0.7432969762219094,
"eval_sts-test_pearson_manhattan": 0.7450333218883929,
"eval_sts-test_pearson_max": 0.7751861574983592,
"eval_sts-test_spearman_cosine": 0.7794274058735736,
"eval_sts-test_spearman_dot": 0.5144540377132975,
"eval_sts-test_spearman_euclidean": 0.727450852531023,
"eval_sts-test_spearman_manhattan": 0.7318298848990707,
"eval_sts-test_spearman_max": 0.7794274058735736,
"step": 11975
},
{
"epoch": 1.8781367628607277,
"eval_vitaminc-pairs_loss": 4.819454193115234,
"eval_vitaminc-pairs_runtime": 1.4385,
"eval_vitaminc-pairs_samples_per_second": 115.395,
"eval_vitaminc-pairs_steps_per_second": 2.781,
"step": 11975
},
{
"epoch": 1.8781367628607277,
"eval_sts-label_loss": 4.0922932624816895,
"eval_sts-label_runtime": 0.403,
"eval_sts-label_samples_per_second": 496.282,
"eval_sts-label_steps_per_second": 12.407,
"step": 11975
},
{
"epoch": 1.8781367628607277,
"eval_qnli-contrastive_loss": 0.14450308680534363,
"eval_qnli-contrastive_runtime": 0.2805,
"eval_qnli-contrastive_samples_per_second": 712.965,
"eval_qnli-contrastive_steps_per_second": 17.824,
"step": 11975
},
{
"epoch": 1.8781367628607277,
"eval_scitail-pairs-qa_loss": 0.0972411260008812,
"eval_scitail-pairs-qa_runtime": 1.0535,
"eval_scitail-pairs-qa_samples_per_second": 189.845,
"eval_scitail-pairs-qa_steps_per_second": 4.746,
"step": 11975
},
{
"epoch": 1.8781367628607277,
"eval_scitail-pairs-pos_loss": 0.47073617577552795,
"eval_scitail-pairs-pos_runtime": 2.4056,
"eval_scitail-pairs-pos_samples_per_second": 83.141,
"eval_scitail-pairs-pos_steps_per_second": 2.079,
"step": 11975
},
{
"epoch": 1.8781367628607277,
"eval_xsum-pairs_loss": 0.3865247964859009,
"eval_xsum-pairs_runtime": 1.0467,
"eval_xsum-pairs_samples_per_second": 191.083,
"eval_xsum-pairs_steps_per_second": 4.777,
"step": 11975
},
{
"epoch": 1.8781367628607277,
"eval_compression-pairs_loss": 0.15423807501792908,
"eval_compression-pairs_runtime": 0.2378,
"eval_compression-pairs_samples_per_second": 841.003,
"eval_compression-pairs_steps_per_second": 21.025,
"step": 11975
},
{
"epoch": 1.8781367628607277,
"eval_sciq_pairs_loss": 0.3360276520252228,
"eval_sciq_pairs_runtime": 9.0552,
"eval_sciq_pairs_samples_per_second": 22.087,
"eval_sciq_pairs_steps_per_second": 0.552,
"step": 11975
},
{
"epoch": 1.8781367628607277,
"eval_qasc_pairs_loss": 0.40982764959335327,
"eval_qasc_pairs_runtime": 1.2436,
"eval_qasc_pairs_samples_per_second": 160.828,
"eval_qasc_pairs_steps_per_second": 4.021,
"step": 11975
},
{
"epoch": 1.8781367628607277,
"eval_openbookqa_pairs_loss": 1.8729889392852783,
"eval_openbookqa_pairs_runtime": 1.106,
"eval_openbookqa_pairs_samples_per_second": 180.839,
"eval_openbookqa_pairs_steps_per_second": 4.521,
"step": 11975
},
{
"epoch": 1.8781367628607277,
"eval_msmarco_pairs_loss": 1.0765292644500732,
"eval_msmarco_pairs_runtime": 2.5194,
"eval_msmarco_pairs_samples_per_second": 79.385,
"eval_msmarco_pairs_steps_per_second": 1.985,
"step": 11975
},
{
"epoch": 1.8781367628607277,
"eval_nq_pairs_loss": 0.9723155498504639,
"eval_nq_pairs_runtime": 5.6285,
"eval_nq_pairs_samples_per_second": 35.534,
"eval_nq_pairs_steps_per_second": 0.888,
"step": 11975
},
{
"epoch": 1.8781367628607277,
"eval_trivia_pairs_loss": 1.2097830772399902,
"eval_trivia_pairs_runtime": 9.0412,
"eval_trivia_pairs_samples_per_second": 22.121,
"eval_trivia_pairs_steps_per_second": 0.553,
"step": 11975
},
{
"epoch": 1.8781367628607277,
"eval_quora_pairs_loss": 0.1777983158826828,
"eval_quora_pairs_runtime": 0.6175,
"eval_quora_pairs_samples_per_second": 323.895,
"eval_quora_pairs_steps_per_second": 8.097,
"step": 11975
},
{
"epoch": 1.8781367628607277,
"eval_gooaq_pairs_loss": 0.8855485320091248,
"eval_gooaq_pairs_runtime": 1.5417,
"eval_gooaq_pairs_samples_per_second": 129.724,
"eval_gooaq_pairs_steps_per_second": 3.243,
"step": 11975
},
{
"epoch": 1.8781367628607277,
"eval_mrpc_pairs_loss": 0.0837549939751625,
"eval_mrpc_pairs_runtime": 0.2368,
"eval_mrpc_pairs_samples_per_second": 844.455,
"eval_mrpc_pairs_steps_per_second": 21.111,
"step": 11975
},
{
"epoch": 1.8820577164366374,
"grad_norm": 6.167259216308594,
"learning_rate": 3.4997702264252654e-05,
"loss": 1.0019,
"step": 12000
},
{
"epoch": 1.889585947302384,
"grad_norm": 0.49681809544563293,
"learning_rate": 3.498787466266714e-05,
"loss": 0.8415,
"step": 12048
},
{
"epoch": 1.8971141781681304,
"grad_norm": 11.792595863342285,
"learning_rate": 3.4970318447471354e-05,
"loss": 1.0103,
"step": 12096
},
{
"epoch": 1.904642409033877,
"grad_norm": 0.8563029766082764,
"learning_rate": 3.494504137748141e-05,
"loss": 1.1249,
"step": 12144
},
{
"epoch": 1.9121706398996237,
"grad_norm": 36.67128372192383,
"learning_rate": 3.4912054623677835e-05,
"loss": 1.2655,
"step": 12192
},
{
"epoch": 1.9196988707653702,
"grad_norm": 15.591288566589355,
"learning_rate": 3.4871372764268616e-05,
"loss": 0.8125,
"step": 12240
},
{
"epoch": 1.9272271016311167,
"grad_norm": 0.8200180530548096,
"learning_rate": 3.482301377824655e-05,
"loss": 0.8559,
"step": 12288
},
{
"epoch": 1.9347553324968634,
"grad_norm": 0.6030488014221191,
"learning_rate": 3.476699903744353e-05,
"loss": 0.8416,
"step": 12336
},
{
"epoch": 1.9422835633626097,
"grad_norm": 17.357330322265625,
"learning_rate": 3.470335329708547e-05,
"loss": 1.0393,
"step": 12384
},
{
"epoch": 1.9498117942283564,
"grad_norm": 15.783160209655762,
"learning_rate": 3.463210468485197e-05,
"loss": 0.946,
"step": 12432
},
{
"epoch": 1.9532622333751568,
"eval_nli-pairs_loss": 1.0603748559951782,
"eval_nli-pairs_runtime": 4.0941,
"eval_nli-pairs_samples_per_second": 48.85,
"eval_nli-pairs_steps_per_second": 1.221,
"eval_sts-test_pearson_cosine": 0.7721603126678054,
"eval_sts-test_pearson_dot": 0.5349835988482088,
"eval_sts-test_pearson_euclidean": 0.742188046420877,
"eval_sts-test_pearson_manhattan": 0.740692747387156,
"eval_sts-test_pearson_max": 0.7721603126678054,
"eval_sts-test_spearman_cosine": 0.7864643989345994,
"eval_sts-test_spearman_dot": 0.5098554969670107,
"eval_sts-test_spearman_euclidean": 0.7318205135578197,
"eval_sts-test_spearman_manhattan": 0.7339408946246045,
"eval_sts-test_spearman_max": 0.7864643989345994,
"step": 12454
},
{
"epoch": 1.9532622333751568,
"eval_vitaminc-pairs_loss": 4.802134990692139,
"eval_vitaminc-pairs_runtime": 1.4328,
"eval_vitaminc-pairs_samples_per_second": 115.859,
"eval_vitaminc-pairs_steps_per_second": 2.792,
"step": 12454
},
{
"epoch": 1.9532622333751568,
"eval_sts-label_loss": 4.164713382720947,
"eval_sts-label_runtime": 0.4062,
"eval_sts-label_samples_per_second": 492.426,
"eval_sts-label_steps_per_second": 12.311,
"step": 12454
},
{
"epoch": 1.9532622333751568,
"eval_qnli-contrastive_loss": 0.1906559020280838,
"eval_qnli-contrastive_runtime": 0.285,
"eval_qnli-contrastive_samples_per_second": 701.782,
"eval_qnli-contrastive_steps_per_second": 17.545,
"step": 12454
},
{
"epoch": 1.9532622333751568,
"eval_scitail-pairs-qa_loss": 0.09206719696521759,
"eval_scitail-pairs-qa_runtime": 1.0605,
"eval_scitail-pairs-qa_samples_per_second": 188.59,
"eval_scitail-pairs-qa_steps_per_second": 4.715,
"step": 12454
},
{
"epoch": 1.9532622333751568,
"eval_scitail-pairs-pos_loss": 0.4270685613155365,
"eval_scitail-pairs-pos_runtime": 2.3609,
"eval_scitail-pairs-pos_samples_per_second": 84.714,
"eval_scitail-pairs-pos_steps_per_second": 2.118,
"step": 12454
},
{
"epoch": 1.9532622333751568,
"eval_xsum-pairs_loss": 0.386574923992157,
"eval_xsum-pairs_runtime": 1.0761,
"eval_xsum-pairs_samples_per_second": 185.848,
"eval_xsum-pairs_steps_per_second": 4.646,
"step": 12454
},
{
"epoch": 1.9532622333751568,
"eval_compression-pairs_loss": 0.15844617784023285,
"eval_compression-pairs_runtime": 0.2411,
"eval_compression-pairs_samples_per_second": 829.616,
"eval_compression-pairs_steps_per_second": 20.74,
"step": 12454
},
{
"epoch": 1.9532622333751568,
"eval_sciq_pairs_loss": 0.32742640376091003,
"eval_sciq_pairs_runtime": 9.0687,
"eval_sciq_pairs_samples_per_second": 22.054,
"eval_sciq_pairs_steps_per_second": 0.551,
"step": 12454
},
{
"epoch": 1.9532622333751568,
"eval_qasc_pairs_loss": 0.44627976417541504,
"eval_qasc_pairs_runtime": 1.2115,
"eval_qasc_pairs_samples_per_second": 165.083,
"eval_qasc_pairs_steps_per_second": 4.127,
"step": 12454
},
{
"epoch": 1.9532622333751568,
"eval_openbookqa_pairs_loss": 1.8450264930725098,
"eval_openbookqa_pairs_runtime": 1.0468,
"eval_openbookqa_pairs_samples_per_second": 191.053,
"eval_openbookqa_pairs_steps_per_second": 4.776,
"step": 12454
},
{
"epoch": 1.9532622333751568,
"eval_msmarco_pairs_loss": 1.0895458459854126,
"eval_msmarco_pairs_runtime": 2.5207,
"eval_msmarco_pairs_samples_per_second": 79.343,
"eval_msmarco_pairs_steps_per_second": 1.984,
"step": 12454
},
{
"epoch": 1.9532622333751568,
"eval_nq_pairs_loss": 0.9783583283424377,
"eval_nq_pairs_runtime": 5.6317,
"eval_nq_pairs_samples_per_second": 35.513,
"eval_nq_pairs_steps_per_second": 0.888,
"step": 12454
},
{
"epoch": 1.9532622333751568,
"eval_trivia_pairs_loss": 1.1956011056900024,
"eval_trivia_pairs_runtime": 9.0547,
"eval_trivia_pairs_samples_per_second": 22.088,
"eval_trivia_pairs_steps_per_second": 0.552,
"step": 12454
},
{
"epoch": 1.9532622333751568,
"eval_quora_pairs_loss": 0.2143821269273758,
"eval_quora_pairs_runtime": 0.6206,
"eval_quora_pairs_samples_per_second": 322.293,
"eval_quora_pairs_steps_per_second": 8.057,
"step": 12454
},
{
"epoch": 1.9532622333751568,
"eval_gooaq_pairs_loss": 0.890216052532196,
"eval_gooaq_pairs_runtime": 1.545,
"eval_gooaq_pairs_samples_per_second": 129.45,
"eval_gooaq_pairs_steps_per_second": 3.236,
"step": 12454
},
{
"epoch": 1.9532622333751568,
"eval_mrpc_pairs_loss": 0.08200729638338089,
"eval_mrpc_pairs_runtime": 0.2402,
"eval_mrpc_pairs_samples_per_second": 832.672,
"eval_mrpc_pairs_steps_per_second": 20.817,
"step": 12454
},
{
"epoch": 1.9573400250941029,
"grad_norm": 9.203104972839355,
"learning_rate": 3.455328468844549e-05,
"loss": 1.0748,
"step": 12480
},
{
"epoch": 1.9648682559598494,
"grad_norm": 1.351884126663208,
"learning_rate": 3.4466928141675676e-05,
"loss": 0.8413,
"step": 12528
},
{
"epoch": 1.972396486825596,
"grad_norm": 13.26588249206543,
"learning_rate": 3.4375104727660065e-05,
"loss": 1.2138,
"step": 12576
},
{
"epoch": 1.9799247176913424,
"grad_norm": 7.214226245880127,
"learning_rate": 3.427394779454246e-05,
"loss": 1.2554,
"step": 12624
},
{
"epoch": 1.987452948557089,
"grad_norm": 9.377580642700195,
"learning_rate": 3.416537776156432e-05,
"loss": 1.4695,
"step": 12672
},
{
"epoch": 1.9949811794228356,
"grad_norm": 14.431234359741211,
"learning_rate": 3.4049442610304357e-05,
"loss": 0.9206,
"step": 12720
},
{
"epoch": 2.002509410288582,
"grad_norm": 39.02935028076172,
"learning_rate": 3.392619357729103e-05,
"loss": 0.9354,
"step": 12768
},
{
"epoch": 2.010037641154329,
"grad_norm": 0.6344715356826782,
"learning_rate": 3.379568513135902e-05,
"loss": 0.8492,
"step": 12816
},
{
"epoch": 2.017565872020075,
"grad_norm": 12.117080688476562,
"learning_rate": 3.365797494957723e-05,
"loss": 0.9937,
"step": 12864
},
{
"epoch": 2.025094102885822,
"grad_norm": 8.92581844329834,
"learning_rate": 3.351312389175882e-05,
"loss": 1.423,
"step": 12912
},
{
"epoch": 2.028387703889586,
"eval_nli-pairs_loss": 1.1507288217544556,
"eval_nli-pairs_runtime": 3.9823,
"eval_nli-pairs_samples_per_second": 50.222,
"eval_nli-pairs_steps_per_second": 1.256,
"eval_sts-test_pearson_cosine": 0.7722624362390709,
"eval_sts-test_pearson_dot": 0.5303764984983903,
"eval_sts-test_pearson_euclidean": 0.7411643880467264,
"eval_sts-test_pearson_manhattan": 0.7391579789846456,
"eval_sts-test_pearson_max": 0.7722624362390709,
"eval_sts-test_spearman_cosine": 0.7821947805835431,
"eval_sts-test_spearman_dot": 0.5068928313854526,
"eval_sts-test_spearman_euclidean": 0.7301088773642002,
"eval_sts-test_spearman_manhattan": 0.7326196614978934,
"eval_sts-test_spearman_max": 0.7821947805835431,
"step": 12933
},
{
"epoch": 2.028387703889586,
"eval_vitaminc-pairs_loss": 4.279026985168457,
"eval_vitaminc-pairs_runtime": 1.4397,
"eval_vitaminc-pairs_samples_per_second": 115.303,
"eval_vitaminc-pairs_steps_per_second": 2.778,
"step": 12933
},
{
"epoch": 2.028387703889586,
"eval_sts-label_loss": 4.452165603637695,
"eval_sts-label_runtime": 0.4088,
"eval_sts-label_samples_per_second": 489.236,
"eval_sts-label_steps_per_second": 12.231,
"step": 12933
},
{
"epoch": 2.028387703889586,
"eval_qnli-contrastive_loss": 0.17014659941196442,
"eval_qnli-contrastive_runtime": 0.2882,
"eval_qnli-contrastive_samples_per_second": 693.965,
"eval_qnli-contrastive_steps_per_second": 17.349,
"step": 12933
},
{
"epoch": 2.028387703889586,
"eval_scitail-pairs-qa_loss": 0.09296510368585587,
"eval_scitail-pairs-qa_runtime": 1.0809,
"eval_scitail-pairs-qa_samples_per_second": 185.023,
"eval_scitail-pairs-qa_steps_per_second": 4.626,
"step": 12933
},
{
"epoch": 2.028387703889586,
"eval_scitail-pairs-pos_loss": 0.4562944173812866,
"eval_scitail-pairs-pos_runtime": 2.376,
"eval_scitail-pairs-pos_samples_per_second": 84.175,
"eval_scitail-pairs-pos_steps_per_second": 2.104,
"step": 12933
},
{
"epoch": 2.028387703889586,
"eval_xsum-pairs_loss": 0.3622417151927948,
"eval_xsum-pairs_runtime": 1.0416,
"eval_xsum-pairs_samples_per_second": 192.012,
"eval_xsum-pairs_steps_per_second": 4.8,
"step": 12933
},
{
"epoch": 2.028387703889586,
"eval_compression-pairs_loss": 0.15833701193332672,
"eval_compression-pairs_runtime": 0.2407,
"eval_compression-pairs_samples_per_second": 831.007,
"eval_compression-pairs_steps_per_second": 20.775,
"step": 12933
},
{
"epoch": 2.028387703889586,
"eval_sciq_pairs_loss": 0.3300960958003998,
"eval_sciq_pairs_runtime": 9.1169,
"eval_sciq_pairs_samples_per_second": 21.937,
"eval_sciq_pairs_steps_per_second": 0.548,
"step": 12933
},
{
"epoch": 2.028387703889586,
"eval_qasc_pairs_loss": 0.4440248906612396,
"eval_qasc_pairs_runtime": 1.2186,
"eval_qasc_pairs_samples_per_second": 164.116,
"eval_qasc_pairs_steps_per_second": 4.103,
"step": 12933
},
{
"epoch": 2.028387703889586,
"eval_openbookqa_pairs_loss": 1.9062250852584839,
"eval_openbookqa_pairs_runtime": 1.054,
"eval_openbookqa_pairs_samples_per_second": 189.748,
"eval_openbookqa_pairs_steps_per_second": 4.744,
"step": 12933
},
{
"epoch": 2.028387703889586,
"eval_msmarco_pairs_loss": 1.0610954761505127,
"eval_msmarco_pairs_runtime": 2.5303,
"eval_msmarco_pairs_samples_per_second": 79.042,
"eval_msmarco_pairs_steps_per_second": 1.976,
"step": 12933
},
{
"epoch": 2.028387703889586,
"eval_nq_pairs_loss": 0.9248062968254089,
"eval_nq_pairs_runtime": 5.6477,
"eval_nq_pairs_samples_per_second": 35.413,
"eval_nq_pairs_steps_per_second": 0.885,
"step": 12933
},
{
"epoch": 2.028387703889586,
"eval_trivia_pairs_loss": 1.1339099407196045,
"eval_trivia_pairs_runtime": 9.046,
"eval_trivia_pairs_samples_per_second": 22.109,
"eval_trivia_pairs_steps_per_second": 0.553,
"step": 12933
},
{
"epoch": 2.028387703889586,
"eval_quora_pairs_loss": 0.197435200214386,
"eval_quora_pairs_runtime": 0.6181,
"eval_quora_pairs_samples_per_second": 323.589,
"eval_quora_pairs_steps_per_second": 8.09,
"step": 12933
},
{
"epoch": 2.028387703889586,
"eval_gooaq_pairs_loss": 0.8682229518890381,
"eval_gooaq_pairs_runtime": 1.5695,
"eval_gooaq_pairs_samples_per_second": 127.427,
"eval_gooaq_pairs_steps_per_second": 3.186,
"step": 12933
},
{
"epoch": 2.028387703889586,
"eval_mrpc_pairs_loss": 0.08277301490306854,
"eval_mrpc_pairs_runtime": 0.2472,
"eval_mrpc_pairs_samples_per_second": 809.045,
"eval_mrpc_pairs_steps_per_second": 20.226,
"step": 12933
},
{
"epoch": 2.0326223337515685,
"grad_norm": 8.625104904174805,
"learning_rate": 3.3361195973564865e-05,
"loss": 0.9107,
"step": 12960
},
{
"epoch": 2.040150564617315,
"grad_norm": 11.012100219726562,
"learning_rate": 3.320225833821311e-05,
"loss": 0.8694,
"step": 13008
},
{
"epoch": 2.0476787954830615,
"grad_norm": 18.39250946044922,
"learning_rate": 3.303638122680463e-05,
"loss": 1.2955,
"step": 13056
},
{
"epoch": 2.055207026348808,
"grad_norm": 0.33739256858825684,
"learning_rate": 3.286363794728137e-05,
"loss": 0.7578,
"step": 13104
},
{
"epoch": 2.0627352572145545,
"grad_norm": 11.637800216674805,
"learning_rate": 3.268410484202841e-05,
"loss": 0.8352,
"step": 13152
},
{
"epoch": 2.0702634880803013,
"grad_norm": 3.6323413848876953,
"learning_rate": 3.249786125413502e-05,
"loss": 1.1223,
"step": 13200
},
{
"epoch": 2.0777917189460475,
"grad_norm": 2.320704936981201,
"learning_rate": 3.230498949232972e-05,
"loss": 0.8278,
"step": 13248
},
{
"epoch": 2.0853199498117942,
"grad_norm": 19.040969848632812,
"learning_rate": 3.2105574794604686e-05,
"loss": 0.9415,
"step": 13296
},
{
"epoch": 2.092848180677541,
"grad_norm": 0.5109882950782776,
"learning_rate": 3.18997052905455e-05,
"loss": 1.017,
"step": 13344
},
{
"epoch": 2.1003764115432872,
"grad_norm": 15.141865730285645,
"learning_rate": 3.1687471962383085e-05,
"loss": 1.0765,
"step": 13392
},
{
"epoch": 2.103513174404015,
"eval_nli-pairs_loss": 1.051226019859314,
"eval_nli-pairs_runtime": 4.0486,
"eval_nli-pairs_samples_per_second": 49.4,
"eval_nli-pairs_steps_per_second": 1.235,
"eval_sts-test_pearson_cosine": 0.7812575172994014,
"eval_sts-test_pearson_dot": 0.5319105750599413,
"eval_sts-test_pearson_euclidean": 0.750747218773846,
"eval_sts-test_pearson_manhattan": 0.7509656763912702,
"eval_sts-test_pearson_max": 0.7812575172994014,
"eval_sts-test_spearman_cosine": 0.7910115404740932,
"eval_sts-test_spearman_dot": 0.510004063994186,
"eval_sts-test_spearman_euclidean": 0.7384195619199833,
"eval_sts-test_spearman_manhattan": 0.7411514762626671,
"eval_sts-test_spearman_max": 0.7910115404740932,
"step": 13412
},
{
"epoch": 2.103513174404015,
"eval_vitaminc-pairs_loss": 4.327915668487549,
"eval_vitaminc-pairs_runtime": 1.4479,
"eval_vitaminc-pairs_samples_per_second": 114.649,
"eval_vitaminc-pairs_steps_per_second": 2.763,
"step": 13412
},
{
"epoch": 2.103513174404015,
"eval_sts-label_loss": 4.202516078948975,
"eval_sts-label_runtime": 0.4274,
"eval_sts-label_samples_per_second": 467.911,
"eval_sts-label_steps_per_second": 11.698,
"step": 13412
},
{
"epoch": 2.103513174404015,
"eval_qnli-contrastive_loss": 0.17492428421974182,
"eval_qnli-contrastive_runtime": 0.2825,
"eval_qnli-contrastive_samples_per_second": 707.881,
"eval_qnli-contrastive_steps_per_second": 17.697,
"step": 13412
},
{
"epoch": 2.103513174404015,
"eval_scitail-pairs-qa_loss": 0.09003904461860657,
"eval_scitail-pairs-qa_runtime": 1.0939,
"eval_scitail-pairs-qa_samples_per_second": 182.836,
"eval_scitail-pairs-qa_steps_per_second": 4.571,
"step": 13412
},
{
"epoch": 2.103513174404015,
"eval_scitail-pairs-pos_loss": 0.46629810333251953,
"eval_scitail-pairs-pos_runtime": 2.4148,
"eval_scitail-pairs-pos_samples_per_second": 82.823,
"eval_scitail-pairs-pos_steps_per_second": 2.071,
"step": 13412
},
{
"epoch": 2.103513174404015,
"eval_xsum-pairs_loss": 0.34408459067344666,
"eval_xsum-pairs_runtime": 1.0742,
"eval_xsum-pairs_samples_per_second": 186.18,
"eval_xsum-pairs_steps_per_second": 4.655,
"step": 13412
},
{
"epoch": 2.103513174404015,
"eval_compression-pairs_loss": 0.1467471718788147,
"eval_compression-pairs_runtime": 0.2637,
"eval_compression-pairs_samples_per_second": 758.571,
"eval_compression-pairs_steps_per_second": 18.964,
"step": 13412
},
{
"epoch": 2.103513174404015,
"eval_sciq_pairs_loss": 0.31176942586898804,
"eval_sciq_pairs_runtime": 9.1255,
"eval_sciq_pairs_samples_per_second": 21.917,
"eval_sciq_pairs_steps_per_second": 0.548,
"step": 13412
},
{
"epoch": 2.103513174404015,
"eval_qasc_pairs_loss": 0.41609370708465576,
"eval_qasc_pairs_runtime": 1.2726,
"eval_qasc_pairs_samples_per_second": 157.157,
"eval_qasc_pairs_steps_per_second": 3.929,
"step": 13412
},
{
"epoch": 2.103513174404015,
"eval_openbookqa_pairs_loss": 1.8419994115829468,
"eval_openbookqa_pairs_runtime": 1.1105,
"eval_openbookqa_pairs_samples_per_second": 180.091,
"eval_openbookqa_pairs_steps_per_second": 4.502,
"step": 13412
},
{
"epoch": 2.103513174404015,
"eval_msmarco_pairs_loss": 1.1004538536071777,
"eval_msmarco_pairs_runtime": 2.5657,
"eval_msmarco_pairs_samples_per_second": 77.95,
"eval_msmarco_pairs_steps_per_second": 1.949,
"step": 13412
},
{
"epoch": 2.103513174404015,
"eval_nq_pairs_loss": 0.9002810716629028,
"eval_nq_pairs_runtime": 5.6938,
"eval_nq_pairs_samples_per_second": 35.126,
"eval_nq_pairs_steps_per_second": 0.878,
"step": 13412
},
{
"epoch": 2.103513174404015,
"eval_trivia_pairs_loss": 1.086058259010315,
"eval_trivia_pairs_runtime": 9.1706,
"eval_trivia_pairs_samples_per_second": 21.809,
"eval_trivia_pairs_steps_per_second": 0.545,
"step": 13412
},
{
"epoch": 2.103513174404015,
"eval_quora_pairs_loss": 0.1935713142156601,
"eval_quora_pairs_runtime": 0.6501,
"eval_quora_pairs_samples_per_second": 307.657,
"eval_quora_pairs_steps_per_second": 7.691,
"step": 13412
},
{
"epoch": 2.103513174404015,
"eval_gooaq_pairs_loss": 0.883912980556488,
"eval_gooaq_pairs_runtime": 1.5812,
"eval_gooaq_pairs_samples_per_second": 126.486,
"eval_gooaq_pairs_steps_per_second": 3.162,
"step": 13412
},
{
"epoch": 2.103513174404015,
"eval_mrpc_pairs_loss": 0.08029545843601227,
"eval_mrpc_pairs_runtime": 0.2872,
"eval_mrpc_pairs_samples_per_second": 696.382,
"eval_mrpc_pairs_steps_per_second": 17.41,
"step": 13412
},
{
"epoch": 2.107904642409034,
"grad_norm": 0.8797653317451477,
"learning_rate": 3.146896860478492e-05,
"loss": 1.0399,
"step": 13440
},
{
"epoch": 2.1154328732747802,
"grad_norm": 0.8809025287628174,
"learning_rate": 3.1244291783403247e-05,
"loss": 0.9078,
"step": 13488
},
{
"epoch": 2.122961104140527,
"grad_norm": 12.174256324768066,
"learning_rate": 3.101354079219879e-05,
"loss": 0.9414,
"step": 13536
},
{
"epoch": 2.1304893350062737,
"grad_norm": 7.465297698974609,
"learning_rate": 3.07768176095586e-05,
"loss": 0.9909,
"step": 13584
},
{
"epoch": 2.13801756587202,
"grad_norm": 13.727421760559082,
"learning_rate": 3.053422685322763e-05,
"loss": 1.1089,
"step": 13632
},
{
"epoch": 2.1455457967377667,
"grad_norm": 15.126276969909668,
"learning_rate": 3.0285875734073832e-05,
"loss": 1.315,
"step": 13680
},
{
"epoch": 2.1530740276035134,
"grad_norm": 7.151478290557861,
"learning_rate": 3.0031874008707226e-05,
"loss": 0.8123,
"step": 13728
},
{
"epoch": 2.1606022584692597,
"grad_norm": 9.124503135681152,
"learning_rate": 2.977233393097396e-05,
"loss": 1.2184,
"step": 13776
},
{
"epoch": 2.1681304893350064,
"grad_norm": 14.512338638305664,
"learning_rate": 2.9507370202346677e-05,
"loss": 0.7468,
"step": 13824
},
{
"epoch": 2.1756587202007527,
"grad_norm": 7.921672344207764,
"learning_rate": 2.9237099921233195e-05,
"loss": 0.794,
"step": 13872
},
{
"epoch": 2.1786386449184443,
"eval_nli-pairs_loss": 0.9923555254936218,
"eval_nli-pairs_runtime": 4.0616,
"eval_nli-pairs_samples_per_second": 49.242,
"eval_nli-pairs_steps_per_second": 1.231,
"eval_sts-test_pearson_cosine": 0.7764138225504212,
"eval_sts-test_pearson_dot": 0.5280954278502723,
"eval_sts-test_pearson_euclidean": 0.7424878084852355,
"eval_sts-test_pearson_manhattan": 0.7418329788550313,
"eval_sts-test_pearson_max": 0.7764138225504212,
"eval_sts-test_spearman_cosine": 0.7855803224985884,
"eval_sts-test_spearman_dot": 0.5061382013250837,
"eval_sts-test_spearman_euclidean": 0.7273293251792637,
"eval_sts-test_spearman_manhattan": 0.7299928706510171,
"eval_sts-test_spearman_max": 0.7855803224985884,
"step": 13891
},
{
"epoch": 2.1786386449184443,
"eval_vitaminc-pairs_loss": 4.433829307556152,
"eval_vitaminc-pairs_runtime": 1.4393,
"eval_vitaminc-pairs_samples_per_second": 115.33,
"eval_vitaminc-pairs_steps_per_second": 2.779,
"step": 13891
},
{
"epoch": 2.1786386449184443,
"eval_sts-label_loss": 4.203037738800049,
"eval_sts-label_runtime": 0.4039,
"eval_sts-label_samples_per_second": 495.17,
"eval_sts-label_steps_per_second": 12.379,
"step": 13891
},
{
"epoch": 2.1786386449184443,
"eval_qnli-contrastive_loss": 0.14975911378860474,
"eval_qnli-contrastive_runtime": 0.2846,
"eval_qnli-contrastive_samples_per_second": 702.864,
"eval_qnli-contrastive_steps_per_second": 17.572,
"step": 13891
},
{
"epoch": 2.1786386449184443,
"eval_scitail-pairs-qa_loss": 0.08012403547763824,
"eval_scitail-pairs-qa_runtime": 1.0534,
"eval_scitail-pairs-qa_samples_per_second": 189.868,
"eval_scitail-pairs-qa_steps_per_second": 4.747,
"step": 13891
},
{
"epoch": 2.1786386449184443,
"eval_scitail-pairs-pos_loss": 0.42602407932281494,
"eval_scitail-pairs-pos_runtime": 2.3645,
"eval_scitail-pairs-pos_samples_per_second": 84.586,
"eval_scitail-pairs-pos_steps_per_second": 2.115,
"step": 13891
},
{
"epoch": 2.1786386449184443,
"eval_xsum-pairs_loss": 0.3360922336578369,
"eval_xsum-pairs_runtime": 1.0464,
"eval_xsum-pairs_samples_per_second": 191.13,
"eval_xsum-pairs_steps_per_second": 4.778,
"step": 13891
},
{
"epoch": 2.1786386449184443,
"eval_compression-pairs_loss": 0.1300394982099533,
"eval_compression-pairs_runtime": 0.2447,
"eval_compression-pairs_samples_per_second": 817.251,
"eval_compression-pairs_steps_per_second": 20.431,
"step": 13891
},
{
"epoch": 2.1786386449184443,
"eval_sciq_pairs_loss": 0.3116128444671631,
"eval_sciq_pairs_runtime": 9.0757,
"eval_sciq_pairs_samples_per_second": 22.037,
"eval_sciq_pairs_steps_per_second": 0.551,
"step": 13891
},
{
"epoch": 2.1786386449184443,
"eval_qasc_pairs_loss": 0.361092746257782,
"eval_qasc_pairs_runtime": 1.2089,
"eval_qasc_pairs_samples_per_second": 165.434,
"eval_qasc_pairs_steps_per_second": 4.136,
"step": 13891
},
{
"epoch": 2.1786386449184443,
"eval_openbookqa_pairs_loss": 1.8503968715667725,
"eval_openbookqa_pairs_runtime": 1.0605,
"eval_openbookqa_pairs_samples_per_second": 188.596,
"eval_openbookqa_pairs_steps_per_second": 4.715,
"step": 13891
},
{
"epoch": 2.1786386449184443,
"eval_msmarco_pairs_loss": 1.0155786275863647,
"eval_msmarco_pairs_runtime": 2.5332,
"eval_msmarco_pairs_samples_per_second": 78.953,
"eval_msmarco_pairs_steps_per_second": 1.974,
"step": 13891
},
{
"epoch": 2.1786386449184443,
"eval_nq_pairs_loss": 0.8908740878105164,
"eval_nq_pairs_runtime": 5.6668,
"eval_nq_pairs_samples_per_second": 35.294,
"eval_nq_pairs_steps_per_second": 0.882,
"step": 13891
},
{
"epoch": 2.1786386449184443,
"eval_trivia_pairs_loss": 1.1117126941680908,
"eval_trivia_pairs_runtime": 9.1174,
"eval_trivia_pairs_samples_per_second": 21.936,
"eval_trivia_pairs_steps_per_second": 0.548,
"step": 13891
},
{
"epoch": 2.1786386449184443,
"eval_quora_pairs_loss": 0.10165992379188538,
"eval_quora_pairs_runtime": 0.6259,
"eval_quora_pairs_samples_per_second": 319.533,
"eval_quora_pairs_steps_per_second": 7.988,
"step": 13891
},
{
"epoch": 2.1786386449184443,
"eval_gooaq_pairs_loss": 0.8515159487724304,
"eval_gooaq_pairs_runtime": 1.555,
"eval_gooaq_pairs_samples_per_second": 128.615,
"eval_gooaq_pairs_steps_per_second": 3.215,
"step": 13891
},
{
"epoch": 2.1786386449184443,
"eval_mrpc_pairs_loss": 0.0670856311917305,
"eval_mrpc_pairs_runtime": 0.2435,
"eval_mrpc_pairs_samples_per_second": 821.504,
"eval_mrpc_pairs_steps_per_second": 20.538,
"step": 13891
},
{
"epoch": 2.1831869510664994,
"grad_norm": 7.595925807952881,
"learning_rate": 2.896164253122592e-05,
"loss": 0.8892,
"step": 13920
},
{
"epoch": 2.190715181932246,
"grad_norm": 0.5041866898536682,
"learning_rate": 2.8681119768314744e-05,
"loss": 0.7443,
"step": 13968
},
{
"epoch": 2.1982434127979924,
"grad_norm": 10.744353294372559,
"learning_rate": 2.8395655607086955e-05,
"loss": 0.6776,
"step": 14016
},
{
"epoch": 2.205771643663739,
"grad_norm": 13.672101974487305,
"learning_rate": 2.810537620593767e-05,
"loss": 1.2239,
"step": 14064
},
{
"epoch": 2.2132998745294854,
"grad_norm": 3.225550413131714,
"learning_rate": 2.7810409851315294e-05,
"loss": 0.8225,
"step": 14112
},
{
"epoch": 2.220828105395232,
"grad_norm": 3.210339069366455,
"learning_rate": 2.7510886901026408e-05,
"loss": 1.2052,
"step": 14160
},
{
"epoch": 2.228356336260979,
"grad_norm": 6.894575119018555,
"learning_rate": 2.7206939726625295e-05,
"loss": 0.9522,
"step": 14208
},
{
"epoch": 2.235884567126725,
"grad_norm": 11.719073295593262,
"learning_rate": 2.6898702654913484e-05,
"loss": 0.7615,
"step": 14256
},
{
"epoch": 2.243412797992472,
"grad_norm": 15.548043251037598,
"learning_rate": 2.6586311908575173e-05,
"loss": 0.8145,
"step": 14304
},
{
"epoch": 2.250941028858218,
"grad_norm": 17.0067081451416,
"learning_rate": 2.626990554597484e-05,
"loss": 0.6474,
"step": 14352
},
{
"epoch": 2.2537641154328734,
"eval_nli-pairs_loss": 0.9902753829956055,
"eval_nli-pairs_runtime": 3.9871,
"eval_nli-pairs_samples_per_second": 50.162,
"eval_nli-pairs_steps_per_second": 1.254,
"eval_sts-test_pearson_cosine": 0.7808536106776984,
"eval_sts-test_pearson_dot": 0.5357771815123735,
"eval_sts-test_pearson_euclidean": 0.7468127931633377,
"eval_sts-test_pearson_manhattan": 0.7458146588384492,
"eval_sts-test_pearson_max": 0.7808536106776984,
"eval_sts-test_spearman_cosine": 0.7912070672899862,
"eval_sts-test_spearman_dot": 0.5140789847704963,
"eval_sts-test_spearman_euclidean": 0.734115394908759,
"eval_sts-test_spearman_manhattan": 0.7357129898281574,
"eval_sts-test_spearman_max": 0.7912070672899862,
"step": 14370
},
{
"epoch": 2.2537641154328734,
"eval_vitaminc-pairs_loss": 4.326738357543945,
"eval_vitaminc-pairs_runtime": 1.4474,
"eval_vitaminc-pairs_samples_per_second": 114.69,
"eval_vitaminc-pairs_steps_per_second": 2.764,
"step": 14370
},
{
"epoch": 2.2537641154328734,
"eval_sts-label_loss": 4.177000522613525,
"eval_sts-label_runtime": 0.418,
"eval_sts-label_samples_per_second": 478.456,
"eval_sts-label_steps_per_second": 11.961,
"step": 14370
},
{
"epoch": 2.2537641154328734,
"eval_qnli-contrastive_loss": 0.17705124616622925,
"eval_qnli-contrastive_runtime": 0.2935,
"eval_qnli-contrastive_samples_per_second": 681.421,
"eval_qnli-contrastive_steps_per_second": 17.036,
"step": 14370
},
{
"epoch": 2.2537641154328734,
"eval_scitail-pairs-qa_loss": 0.08105171471834183,
"eval_scitail-pairs-qa_runtime": 1.0726,
"eval_scitail-pairs-qa_samples_per_second": 186.462,
"eval_scitail-pairs-qa_steps_per_second": 4.662,
"step": 14370
},
{
"epoch": 2.2537641154328734,
"eval_scitail-pairs-pos_loss": 0.413583904504776,
"eval_scitail-pairs-pos_runtime": 2.3897,
"eval_scitail-pairs-pos_samples_per_second": 83.693,
"eval_scitail-pairs-pos_steps_per_second": 2.092,
"step": 14370
},
{
"epoch": 2.2537641154328734,
"eval_xsum-pairs_loss": 0.30970945954322815,
"eval_xsum-pairs_runtime": 1.0443,
"eval_xsum-pairs_samples_per_second": 191.522,
"eval_xsum-pairs_steps_per_second": 4.788,
"step": 14370
},
{
"epoch": 2.2537641154328734,
"eval_compression-pairs_loss": 0.13123837113380432,
"eval_compression-pairs_runtime": 0.2457,
"eval_compression-pairs_samples_per_second": 813.839,
"eval_compression-pairs_steps_per_second": 20.346,
"step": 14370
},
{
"epoch": 2.2537641154328734,
"eval_sciq_pairs_loss": 0.30473247170448303,
"eval_sciq_pairs_runtime": 9.12,
"eval_sciq_pairs_samples_per_second": 21.93,
"eval_sciq_pairs_steps_per_second": 0.548,
"step": 14370
},
{
"epoch": 2.2537641154328734,
"eval_qasc_pairs_loss": 0.35160771012306213,
"eval_qasc_pairs_runtime": 1.2422,
"eval_qasc_pairs_samples_per_second": 160.998,
"eval_qasc_pairs_steps_per_second": 4.025,
"step": 14370
},
{
"epoch": 2.2537641154328734,
"eval_openbookqa_pairs_loss": 1.7501661777496338,
"eval_openbookqa_pairs_runtime": 1.0571,
"eval_openbookqa_pairs_samples_per_second": 189.201,
"eval_openbookqa_pairs_steps_per_second": 4.73,
"step": 14370
},
{
"epoch": 2.2537641154328734,
"eval_msmarco_pairs_loss": 0.9768362641334534,
"eval_msmarco_pairs_runtime": 2.529,
"eval_msmarco_pairs_samples_per_second": 79.083,
"eval_msmarco_pairs_steps_per_second": 1.977,
"step": 14370
},
{
"epoch": 2.2537641154328734,
"eval_nq_pairs_loss": 0.8456315398216248,
"eval_nq_pairs_runtime": 5.6662,
"eval_nq_pairs_samples_per_second": 35.297,
"eval_nq_pairs_steps_per_second": 0.882,
"step": 14370
},
{
"epoch": 2.2537641154328734,
"eval_trivia_pairs_loss": 1.0666593313217163,
"eval_trivia_pairs_runtime": 9.0944,
"eval_trivia_pairs_samples_per_second": 21.992,
"eval_trivia_pairs_steps_per_second": 0.55,
"step": 14370
},
{
"epoch": 2.2537641154328734,
"eval_quora_pairs_loss": 0.21501043438911438,
"eval_quora_pairs_runtime": 0.6293,
"eval_quora_pairs_samples_per_second": 317.812,
"eval_quora_pairs_steps_per_second": 7.945,
"step": 14370
},
{
"epoch": 2.2537641154328734,
"eval_gooaq_pairs_loss": 0.8077111840248108,
"eval_gooaq_pairs_runtime": 1.5722,
"eval_gooaq_pairs_samples_per_second": 127.211,
"eval_gooaq_pairs_steps_per_second": 3.18,
"step": 14370
},
{
"epoch": 2.2537641154328734,
"eval_mrpc_pairs_loss": 0.07021843641996384,
"eval_mrpc_pairs_runtime": 0.2501,
"eval_mrpc_pairs_samples_per_second": 799.6,
"eval_mrpc_pairs_steps_per_second": 19.99,
"step": 14370
},
{
"epoch": 2.258469259723965,
"grad_norm": 5.989051342010498,
"learning_rate": 2.59496234001435e-05,
"loss": 0.7813,
"step": 14400
},
{
"epoch": 2.2659974905897116,
"grad_norm": 15.973469734191895,
"learning_rate": 2.5625607016980774e-05,
"loss": 0.9862,
"step": 14448
},
{
"epoch": 2.273525721455458,
"grad_norm": 16.60488510131836,
"learning_rate": 2.5297999592699854e-05,
"loss": 0.8707,
"step": 14496
},
{
"epoch": 2.2810539523212046,
"grad_norm": 13.824111938476562,
"learning_rate": 2.496694591054328e-05,
"loss": 0.9914,
"step": 14544
},
{
"epoch": 2.288582183186951,
"grad_norm": 0.27927935123443604,
"learning_rate": 2.4632592276797123e-05,
"loss": 1.0876,
"step": 14592
},
{
"epoch": 2.2961104140526976,
"grad_norm": 26.05866050720215,
"learning_rate": 2.4295086456132303e-05,
"loss": 0.792,
"step": 14640
},
{
"epoch": 2.3036386449184443,
"grad_norm": 13.912291526794434,
"learning_rate": 2.3954577606301227e-05,
"loss": 0.6299,
"step": 14688
},
{
"epoch": 2.3111668757841906,
"grad_norm": 16.938758850097656,
"learning_rate": 2.3611216212218842e-05,
"loss": 0.9561,
"step": 14736
},
{
"epoch": 2.3186951066499373,
"grad_norm": 9.285428047180176,
"learning_rate": 2.3265154019457132e-05,
"loss": 0.8283,
"step": 14784
},
{
"epoch": 2.326223337515684,
"grad_norm": 1.6328561305999756,
"learning_rate": 2.29165439671825e-05,
"loss": 1.0737,
"step": 14832
},
{
"epoch": 2.3288895859473024,
"eval_nli-pairs_loss": 0.9611924290657043,
"eval_nli-pairs_runtime": 3.9911,
"eval_nli-pairs_samples_per_second": 50.111,
"eval_nli-pairs_steps_per_second": 1.253,
"eval_sts-test_pearson_cosine": 0.7845481404056492,
"eval_sts-test_pearson_dot": 0.5385486676413874,
"eval_sts-test_pearson_euclidean": 0.7401100148283691,
"eval_sts-test_pearson_manhattan": 0.7381113824263572,
"eval_sts-test_pearson_max": 0.7845481404056492,
"eval_sts-test_spearman_cosine": 0.794691588986042,
"eval_sts-test_spearman_dot": 0.5190592879825671,
"eval_sts-test_spearman_euclidean": 0.7278460500778233,
"eval_sts-test_spearman_manhattan": 0.7287580068840045,
"eval_sts-test_spearman_max": 0.794691588986042,
"step": 14849
},
{
"epoch": 2.3288895859473024,
"eval_vitaminc-pairs_loss": 4.501389503479004,
"eval_vitaminc-pairs_runtime": 1.4538,
"eval_vitaminc-pairs_samples_per_second": 114.183,
"eval_vitaminc-pairs_steps_per_second": 2.751,
"step": 14849
},
{
"epoch": 2.3288895859473024,
"eval_sts-label_loss": 4.10113000869751,
"eval_sts-label_runtime": 0.4117,
"eval_sts-label_samples_per_second": 485.82,
"eval_sts-label_steps_per_second": 12.146,
"step": 14849
},
{
"epoch": 2.3288895859473024,
"eval_qnli-contrastive_loss": 0.11201102286577225,
"eval_qnli-contrastive_runtime": 0.293,
"eval_qnli-contrastive_samples_per_second": 682.573,
"eval_qnli-contrastive_steps_per_second": 17.064,
"step": 14849
},
{
"epoch": 2.3288895859473024,
"eval_scitail-pairs-qa_loss": 0.0739838182926178,
"eval_scitail-pairs-qa_runtime": 1.0651,
"eval_scitail-pairs-qa_samples_per_second": 187.778,
"eval_scitail-pairs-qa_steps_per_second": 4.694,
"step": 14849
},
{
"epoch": 2.3288895859473024,
"eval_scitail-pairs-pos_loss": 0.42078059911727905,
"eval_scitail-pairs-pos_runtime": 2.4326,
"eval_scitail-pairs-pos_samples_per_second": 82.217,
"eval_scitail-pairs-pos_steps_per_second": 2.055,
"step": 14849
},
{
"epoch": 2.3288895859473024,
"eval_xsum-pairs_loss": 0.3173944354057312,
"eval_xsum-pairs_runtime": 1.0505,
"eval_xsum-pairs_samples_per_second": 190.393,
"eval_xsum-pairs_steps_per_second": 4.76,
"step": 14849
},
{
"epoch": 2.3288895859473024,
"eval_compression-pairs_loss": 0.12480150908231735,
"eval_compression-pairs_runtime": 0.2424,
"eval_compression-pairs_samples_per_second": 825.217,
"eval_compression-pairs_steps_per_second": 20.63,
"step": 14849
},
{
"epoch": 2.3288895859473024,
"eval_sciq_pairs_loss": 0.32480019330978394,
"eval_sciq_pairs_runtime": 9.0956,
"eval_sciq_pairs_samples_per_second": 21.989,
"eval_sciq_pairs_steps_per_second": 0.55,
"step": 14849
},
{
"epoch": 2.3288895859473024,
"eval_qasc_pairs_loss": 0.3498300313949585,
"eval_qasc_pairs_runtime": 1.2463,
"eval_qasc_pairs_samples_per_second": 160.475,
"eval_qasc_pairs_steps_per_second": 4.012,
"step": 14849
},
{
"epoch": 2.3288895859473024,
"eval_openbookqa_pairs_loss": 1.8068655729293823,
"eval_openbookqa_pairs_runtime": 1.0596,
"eval_openbookqa_pairs_samples_per_second": 188.748,
"eval_openbookqa_pairs_steps_per_second": 4.719,
"step": 14849
},
{
"epoch": 2.3288895859473024,
"eval_msmarco_pairs_loss": 0.8900260925292969,
"eval_msmarco_pairs_runtime": 2.5231,
"eval_msmarco_pairs_samples_per_second": 79.268,
"eval_msmarco_pairs_steps_per_second": 1.982,
"step": 14849
},
{
"epoch": 2.3288895859473024,
"eval_nq_pairs_loss": 0.8023759722709656,
"eval_nq_pairs_runtime": 5.6432,
"eval_nq_pairs_samples_per_second": 35.441,
"eval_nq_pairs_steps_per_second": 0.886,
"step": 14849
},
{
"epoch": 2.3288895859473024,
"eval_trivia_pairs_loss": 1.0682170391082764,
"eval_trivia_pairs_runtime": 9.0648,
"eval_trivia_pairs_samples_per_second": 22.063,
"eval_trivia_pairs_steps_per_second": 0.552,
"step": 14849
},
{
"epoch": 2.3288895859473024,
"eval_quora_pairs_loss": 0.20286118984222412,
"eval_quora_pairs_runtime": 0.623,
"eval_quora_pairs_samples_per_second": 321.01,
"eval_quora_pairs_steps_per_second": 8.025,
"step": 14849
},
{
"epoch": 2.3288895859473024,
"eval_gooaq_pairs_loss": 0.7672585844993591,
"eval_gooaq_pairs_runtime": 1.552,
"eval_gooaq_pairs_samples_per_second": 128.866,
"eval_gooaq_pairs_steps_per_second": 3.222,
"step": 14849
},
{
"epoch": 2.3288895859473024,
"eval_mrpc_pairs_loss": 0.06376803666353226,
"eval_mrpc_pairs_runtime": 0.2417,
"eval_mrpc_pairs_samples_per_second": 827.633,
"eval_mrpc_pairs_steps_per_second": 20.691,
"step": 14849
},
{
"epoch": 2.3337515683814303,
"grad_norm": 7.665156364440918,
"learning_rate": 2.2565540120565625e-05,
"loss": 0.9617,
"step": 14880
},
{
"epoch": 2.341279799247177,
"grad_norm": 3.3739352226257324,
"learning_rate": 2.2212297602693703e-05,
"loss": 0.9771,
"step": 14928
},
{
"epoch": 2.3488080301129233,
"grad_norm": 0.46657243371009827,
"learning_rate": 2.185697252601516e-05,
"loss": 0.8833,
"step": 14976
},
{
"epoch": 2.35633626097867,
"grad_norm": 7.030867099761963,
"learning_rate": 2.1499721923347103e-05,
"loss": 0.8536,
"step": 15024
},
{
"epoch": 2.3638644918444167,
"grad_norm": 0.3481261730194092,
"learning_rate": 2.114070367847608e-05,
"loss": 0.8807,
"step": 15072
},
{
"epoch": 2.371392722710163,
"grad_norm": 0.4611968696117401,
"learning_rate": 2.0780076456382662e-05,
"loss": 0.8687,
"step": 15120
},
{
"epoch": 2.3789209535759097,
"grad_norm": 1.0353537797927856,
"learning_rate": 2.041799963312086e-05,
"loss": 1.087,
"step": 15168
},
{
"epoch": 2.3864491844416564,
"grad_norm": 13.026145935058594,
"learning_rate": 2.0054633225383283e-05,
"loss": 0.753,
"step": 15216
},
{
"epoch": 2.3939774153074027,
"grad_norm": 0.20962007343769073,
"learning_rate": 1.9690137819783136e-05,
"loss": 1.294,
"step": 15264
},
{
"epoch": 2.4015056461731494,
"grad_norm": 16.079498291015625,
"learning_rate": 1.9324674501884393e-05,
"loss": 1.3153,
"step": 15312
},
{
"epoch": 2.4040150564617315,
"eval_nli-pairs_loss": 0.9047927260398865,
"eval_nli-pairs_runtime": 4.0211,
"eval_nli-pairs_samples_per_second": 49.738,
"eval_nli-pairs_steps_per_second": 1.243,
"eval_sts-test_pearson_cosine": 0.7849694302161491,
"eval_sts-test_pearson_dot": 0.55042785805372,
"eval_sts-test_pearson_euclidean": 0.7503053391287348,
"eval_sts-test_pearson_manhattan": 0.7490474553550158,
"eval_sts-test_pearson_max": 0.7849694302161491,
"eval_sts-test_spearman_cosine": 0.8010535728811503,
"eval_sts-test_spearman_dot": 0.5277973822094897,
"eval_sts-test_spearman_euclidean": 0.7388703506937401,
"eval_sts-test_spearman_manhattan": 0.7404567760555878,
"eval_sts-test_spearman_max": 0.8010535728811503,
"step": 15328
},
{
"epoch": 2.4040150564617315,
"eval_vitaminc-pairs_loss": 4.314908027648926,
"eval_vitaminc-pairs_runtime": 1.4844,
"eval_vitaminc-pairs_samples_per_second": 111.829,
"eval_vitaminc-pairs_steps_per_second": 2.695,
"step": 15328
},
{
"epoch": 2.4040150564617315,
"eval_sts-label_loss": 4.023238658905029,
"eval_sts-label_runtime": 0.4747,
"eval_sts-label_samples_per_second": 421.302,
"eval_sts-label_steps_per_second": 10.533,
"step": 15328
},
{
"epoch": 2.4040150564617315,
"eval_qnli-contrastive_loss": 0.12602539360523224,
"eval_qnli-contrastive_runtime": 0.2925,
"eval_qnli-contrastive_samples_per_second": 683.776,
"eval_qnli-contrastive_steps_per_second": 17.094,
"step": 15328
},
{
"epoch": 2.4040150564617315,
"eval_scitail-pairs-qa_loss": 0.07097765803337097,
"eval_scitail-pairs-qa_runtime": 1.0894,
"eval_scitail-pairs-qa_samples_per_second": 183.587,
"eval_scitail-pairs-qa_steps_per_second": 4.59,
"step": 15328
},
{
"epoch": 2.4040150564617315,
"eval_scitail-pairs-pos_loss": 0.4015200138092041,
"eval_scitail-pairs-pos_runtime": 2.3966,
"eval_scitail-pairs-pos_samples_per_second": 83.452,
"eval_scitail-pairs-pos_steps_per_second": 2.086,
"step": 15328
},
{
"epoch": 2.4040150564617315,
"eval_xsum-pairs_loss": 0.292790025472641,
"eval_xsum-pairs_runtime": 1.0499,
"eval_xsum-pairs_samples_per_second": 190.498,
"eval_xsum-pairs_steps_per_second": 4.762,
"step": 15328
},
{
"epoch": 2.4040150564617315,
"eval_compression-pairs_loss": 0.12370473891496658,
"eval_compression-pairs_runtime": 0.2448,
"eval_compression-pairs_samples_per_second": 817.057,
"eval_compression-pairs_steps_per_second": 20.426,
"step": 15328
},
{
"epoch": 2.4040150564617315,
"eval_sciq_pairs_loss": 0.30308836698532104,
"eval_sciq_pairs_runtime": 9.2872,
"eval_sciq_pairs_samples_per_second": 21.535,
"eval_sciq_pairs_steps_per_second": 0.538,
"step": 15328
},
{
"epoch": 2.4040150564617315,
"eval_qasc_pairs_loss": 0.32082173228263855,
"eval_qasc_pairs_runtime": 1.2381,
"eval_qasc_pairs_samples_per_second": 161.536,
"eval_qasc_pairs_steps_per_second": 4.038,
"step": 15328
},
{
"epoch": 2.4040150564617315,
"eval_openbookqa_pairs_loss": 1.7333112955093384,
"eval_openbookqa_pairs_runtime": 1.0618,
"eval_openbookqa_pairs_samples_per_second": 188.351,
"eval_openbookqa_pairs_steps_per_second": 4.709,
"step": 15328
},
{
"epoch": 2.4040150564617315,
"eval_msmarco_pairs_loss": 0.811015248298645,
"eval_msmarco_pairs_runtime": 2.5594,
"eval_msmarco_pairs_samples_per_second": 78.142,
"eval_msmarco_pairs_steps_per_second": 1.954,
"step": 15328
},
{
"epoch": 2.4040150564617315,
"eval_nq_pairs_loss": 0.748630702495575,
"eval_nq_pairs_runtime": 5.7123,
"eval_nq_pairs_samples_per_second": 35.012,
"eval_nq_pairs_steps_per_second": 0.875,
"step": 15328
},
{
"epoch": 2.4040150564617315,
"eval_trivia_pairs_loss": 1.0478410720825195,
"eval_trivia_pairs_runtime": 9.1253,
"eval_trivia_pairs_samples_per_second": 21.917,
"eval_trivia_pairs_steps_per_second": 0.548,
"step": 15328
},
{
"epoch": 2.4040150564617315,
"eval_quora_pairs_loss": 0.17154474556446075,
"eval_quora_pairs_runtime": 0.6786,
"eval_quora_pairs_samples_per_second": 294.719,
"eval_quora_pairs_steps_per_second": 7.368,
"step": 15328
},
{
"epoch": 2.4040150564617315,
"eval_gooaq_pairs_loss": 0.7102887034416199,
"eval_gooaq_pairs_runtime": 1.5957,
"eval_gooaq_pairs_samples_per_second": 125.337,
"eval_gooaq_pairs_steps_per_second": 3.133,
"step": 15328
},
{
"epoch": 2.4040150564617315,
"eval_mrpc_pairs_loss": 0.06290537863969803,
"eval_mrpc_pairs_runtime": 0.2704,
"eval_mrpc_pairs_samples_per_second": 739.657,
"eval_mrpc_pairs_steps_per_second": 18.491,
"step": 15328
},
{
"epoch": 2.4090338770388957,
"grad_norm": 15.35144329071045,
"learning_rate": 1.895840478501141e-05,
"loss": 1.0366,
"step": 15360
},
{
"epoch": 2.4165621079046424,
"grad_norm": 1.9469166994094849,
"learning_rate": 1.8591490538869543e-05,
"loss": 0.952,
"step": 15408
},
{
"epoch": 2.424090338770389,
"grad_norm": 8.833171844482422,
"learning_rate": 1.8224093918008228e-05,
"loss": 0.7095,
"step": 15456
},
{
"epoch": 2.4316185696361354,
"grad_norm": 15.024059295654297,
"learning_rate": 1.785637729015819e-05,
"loss": 1.0462,
"step": 15504
},
{
"epoch": 2.439146800501882,
"grad_norm": 44.20988464355469,
"learning_rate": 1.748850316447446e-05,
"loss": 1.1215,
"step": 15552
},
{
"epoch": 2.4466750313676284,
"grad_norm": 0.3975805640220642,
"learning_rate": 1.7120634119716885e-05,
"loss": 1.0121,
"step": 15600
},
{
"epoch": 2.454203262233375,
"grad_norm": 3.834451198577881,
"learning_rate": 1.6752932732399802e-05,
"loss": 0.6641,
"step": 15648
},
{
"epoch": 2.461731493099122,
"grad_norm": 10.030485153198242,
"learning_rate": 1.6385561504942826e-05,
"loss": 0.9271,
"step": 15696
},
{
"epoch": 2.469259723964868,
"grad_norm": 12.702008247375488,
"learning_rate": 1.6018682793854295e-05,
"loss": 0.7701,
"step": 15744
},
{
"epoch": 2.476787954830615,
"grad_norm": 1.2372121810913086,
"learning_rate": 1.5652458737979246e-05,
"loss": 1.0475,
"step": 15792
},
{
"epoch": 2.4791405269761606,
"eval_nli-pairs_loss": 0.8603814840316772,
"eval_nli-pairs_runtime": 4.0744,
"eval_nli-pairs_samples_per_second": 49.087,
"eval_nli-pairs_steps_per_second": 1.227,
"eval_sts-test_pearson_cosine": 0.7842440208266159,
"eval_sts-test_pearson_dot": 0.542125912585153,
"eval_sts-test_pearson_euclidean": 0.7463476734859066,
"eval_sts-test_pearson_manhattan": 0.7445002157509107,
"eval_sts-test_pearson_max": 0.7842440208266159,
"eval_sts-test_spearman_cosine": 0.7985494684910621,
"eval_sts-test_spearman_dot": 0.5198453192518991,
"eval_sts-test_spearman_euclidean": 0.734834768476047,
"eval_sts-test_spearman_manhattan": 0.7356720569274451,
"eval_sts-test_spearman_max": 0.7985494684910621,
"step": 15807
},
{
"epoch": 2.4791405269761606,
"eval_vitaminc-pairs_loss": 4.325559616088867,
"eval_vitaminc-pairs_runtime": 1.4419,
"eval_vitaminc-pairs_samples_per_second": 115.123,
"eval_vitaminc-pairs_steps_per_second": 2.774,
"step": 15807
},
{
"epoch": 2.4791405269761606,
"eval_sts-label_loss": 4.104859352111816,
"eval_sts-label_runtime": 0.4166,
"eval_sts-label_samples_per_second": 480.072,
"eval_sts-label_steps_per_second": 12.002,
"step": 15807
},
{
"epoch": 2.4791405269761606,
"eval_qnli-contrastive_loss": 0.12549176812171936,
"eval_qnli-contrastive_runtime": 0.2856,
"eval_qnli-contrastive_samples_per_second": 700.188,
"eval_qnli-contrastive_steps_per_second": 17.505,
"step": 15807
},
{
"epoch": 2.4791405269761606,
"eval_scitail-pairs-qa_loss": 0.07220196723937988,
"eval_scitail-pairs-qa_runtime": 1.0483,
"eval_scitail-pairs-qa_samples_per_second": 190.789,
"eval_scitail-pairs-qa_steps_per_second": 4.77,
"step": 15807
},
{
"epoch": 2.4791405269761606,
"eval_scitail-pairs-pos_loss": 0.4026014804840088,
"eval_scitail-pairs-pos_runtime": 2.3482,
"eval_scitail-pairs-pos_samples_per_second": 85.171,
"eval_scitail-pairs-pos_steps_per_second": 2.129,
"step": 15807
},
{
"epoch": 2.4791405269761606,
"eval_xsum-pairs_loss": 0.2882743179798126,
"eval_xsum-pairs_runtime": 1.0442,
"eval_xsum-pairs_samples_per_second": 191.525,
"eval_xsum-pairs_steps_per_second": 4.788,
"step": 15807
},
{
"epoch": 2.4791405269761606,
"eval_compression-pairs_loss": 0.11977991461753845,
"eval_compression-pairs_runtime": 0.2438,
"eval_compression-pairs_samples_per_second": 820.426,
"eval_compression-pairs_steps_per_second": 20.511,
"step": 15807
},
{
"epoch": 2.4791405269761606,
"eval_sciq_pairs_loss": 0.29596462845802307,
"eval_sciq_pairs_runtime": 9.0342,
"eval_sciq_pairs_samples_per_second": 22.138,
"eval_sciq_pairs_steps_per_second": 0.553,
"step": 15807
},
{
"epoch": 2.4791405269761606,
"eval_qasc_pairs_loss": 0.306916743516922,
"eval_qasc_pairs_runtime": 1.2122,
"eval_qasc_pairs_samples_per_second": 164.985,
"eval_qasc_pairs_steps_per_second": 4.125,
"step": 15807
},
{
"epoch": 2.4791405269761606,
"eval_openbookqa_pairs_loss": 1.707059621810913,
"eval_openbookqa_pairs_runtime": 1.0501,
"eval_openbookqa_pairs_samples_per_second": 190.467,
"eval_openbookqa_pairs_steps_per_second": 4.762,
"step": 15807
},
{
"epoch": 2.4791405269761606,
"eval_msmarco_pairs_loss": 0.8327271938323975,
"eval_msmarco_pairs_runtime": 2.5294,
"eval_msmarco_pairs_samples_per_second": 79.07,
"eval_msmarco_pairs_steps_per_second": 1.977,
"step": 15807
},
{
"epoch": 2.4791405269761606,
"eval_nq_pairs_loss": 0.743294358253479,
"eval_nq_pairs_runtime": 5.6376,
"eval_nq_pairs_samples_per_second": 35.476,
"eval_nq_pairs_steps_per_second": 0.887,
"step": 15807
},
{
"epoch": 2.4791405269761606,
"eval_trivia_pairs_loss": 1.0136535167694092,
"eval_trivia_pairs_runtime": 9.0617,
"eval_trivia_pairs_samples_per_second": 22.071,
"eval_trivia_pairs_steps_per_second": 0.552,
"step": 15807
},
{
"epoch": 2.4791405269761606,
"eval_quora_pairs_loss": 0.17337770760059357,
"eval_quora_pairs_runtime": 0.6277,
"eval_quora_pairs_samples_per_second": 318.62,
"eval_quora_pairs_steps_per_second": 7.965,
"step": 15807
},
{
"epoch": 2.4791405269761606,
"eval_gooaq_pairs_loss": 0.7018021941184998,
"eval_gooaq_pairs_runtime": 1.5563,
"eval_gooaq_pairs_samples_per_second": 128.507,
"eval_gooaq_pairs_steps_per_second": 3.213,
"step": 15807
},
{
"epoch": 2.4791405269761606,
"eval_mrpc_pairs_loss": 0.06132274493575096,
"eval_mrpc_pairs_runtime": 0.2449,
"eval_mrpc_pairs_samples_per_second": 816.685,
"eval_mrpc_pairs_steps_per_second": 20.417,
"step": 15807
},
{
"epoch": 2.484316185696361,
"grad_norm": 0.8712967038154602,
"learning_rate": 1.5287051186843536e-05,
"loss": 0.9377,
"step": 15840
},
{
"epoch": 2.491844416562108,
"grad_norm": 14.272644996643066,
"learning_rate": 1.4922621629125904e-05,
"loss": 0.6819,
"step": 15888
},
{
"epoch": 2.4993726474278546,
"grad_norm": 15.130535125732422,
"learning_rate": 1.4559331121289415e-05,
"loss": 0.7023,
"step": 15936
},
{
"epoch": 2.506900878293601,
"grad_norm": 35.925716400146484,
"learning_rate": 1.4197340216403982e-05,
"loss": 0.9654,
"step": 15984
},
{
"epoch": 2.5144291091593476,
"grad_norm": 10.591894149780273,
"learning_rate": 1.3836808893191333e-05,
"loss": 1.1008,
"step": 16032
},
{
"epoch": 2.521957340025094,
"grad_norm": 4.002181053161621,
"learning_rate": 1.3477896485323801e-05,
"loss": 0.9646,
"step": 16080
},
{
"epoch": 2.5294855708908406,
"grad_norm": 12.777207374572754,
"learning_rate": 1.3120761611008248e-05,
"loss": 0.7206,
"step": 16128
},
{
"epoch": 2.5370138017565873,
"grad_norm": 2.8435938358306885,
"learning_rate": 1.276556210288605e-05,
"loss": 1.1148,
"step": 16176
},
{
"epoch": 2.544542032622334,
"grad_norm": 10.71541976928711,
"learning_rate": 1.2412454938280398e-05,
"loss": 0.8355,
"step": 16224
},
{
"epoch": 2.5520702634880803,
"grad_norm": 14.534698486328125,
"learning_rate": 1.2061596169821523e-05,
"loss": 0.9936,
"step": 16272
},
{
"epoch": 2.5542659974905897,
"eval_nli-pairs_loss": 0.8486846685409546,
"eval_nli-pairs_runtime": 4.0555,
"eval_nli-pairs_samples_per_second": 49.316,
"eval_nli-pairs_steps_per_second": 1.233,
"eval_sts-test_pearson_cosine": 0.7882956684683076,
"eval_sts-test_pearson_dot": 0.5546506215114156,
"eval_sts-test_pearson_euclidean": 0.7445917011620068,
"eval_sts-test_pearson_manhattan": 0.7422643102793521,
"eval_sts-test_pearson_max": 0.7882956684683076,
"eval_sts-test_spearman_cosine": 0.7999824132283299,
"eval_sts-test_spearman_dot": 0.5341004899831527,
"eval_sts-test_spearman_euclidean": 0.7324801725996299,
"eval_sts-test_spearman_manhattan": 0.7326323200243545,
"eval_sts-test_spearman_max": 0.7999824132283299,
"step": 16286
},
{
"epoch": 2.5542659974905897,
"eval_vitaminc-pairs_loss": 4.4123005867004395,
"eval_vitaminc-pairs_runtime": 1.4366,
"eval_vitaminc-pairs_samples_per_second": 115.548,
"eval_vitaminc-pairs_steps_per_second": 2.784,
"step": 16286
},
{
"epoch": 2.5542659974905897,
"eval_sts-label_loss": 4.199183464050293,
"eval_sts-label_runtime": 0.4205,
"eval_sts-label_samples_per_second": 475.649,
"eval_sts-label_steps_per_second": 11.891,
"step": 16286
},
{
"epoch": 2.5542659974905897,
"eval_qnli-contrastive_loss": 0.12447753548622131,
"eval_qnli-contrastive_runtime": 0.2977,
"eval_qnli-contrastive_samples_per_second": 671.828,
"eval_qnli-contrastive_steps_per_second": 16.796,
"step": 16286
},
{
"epoch": 2.5542659974905897,
"eval_scitail-pairs-qa_loss": 0.07258906215429306,
"eval_scitail-pairs-qa_runtime": 1.0474,
"eval_scitail-pairs-qa_samples_per_second": 190.943,
"eval_scitail-pairs-qa_steps_per_second": 4.774,
"step": 16286
},
{
"epoch": 2.5542659974905897,
"eval_scitail-pairs-pos_loss": 0.39510485529899597,
"eval_scitail-pairs-pos_runtime": 2.3416,
"eval_scitail-pairs-pos_samples_per_second": 85.413,
"eval_scitail-pairs-pos_steps_per_second": 2.135,
"step": 16286
},
{
"epoch": 2.5542659974905897,
"eval_xsum-pairs_loss": 0.2899823486804962,
"eval_xsum-pairs_runtime": 1.0493,
"eval_xsum-pairs_samples_per_second": 190.599,
"eval_xsum-pairs_steps_per_second": 4.765,
"step": 16286
},
{
"epoch": 2.5542659974905897,
"eval_compression-pairs_loss": 0.1180044561624527,
"eval_compression-pairs_runtime": 0.2479,
"eval_compression-pairs_samples_per_second": 806.654,
"eval_compression-pairs_steps_per_second": 20.166,
"step": 16286
},
{
"epoch": 2.5542659974905897,
"eval_sciq_pairs_loss": 0.2835949957370758,
"eval_sciq_pairs_runtime": 9.052,
"eval_sciq_pairs_samples_per_second": 22.095,
"eval_sciq_pairs_steps_per_second": 0.552,
"step": 16286
},
{
"epoch": 2.5542659974905897,
"eval_qasc_pairs_loss": 0.2966997027397156,
"eval_qasc_pairs_runtime": 1.213,
"eval_qasc_pairs_samples_per_second": 164.88,
"eval_qasc_pairs_steps_per_second": 4.122,
"step": 16286
},
{
"epoch": 2.5542659974905897,
"eval_openbookqa_pairs_loss": 1.6882672309875488,
"eval_openbookqa_pairs_runtime": 1.0662,
"eval_openbookqa_pairs_samples_per_second": 187.588,
"eval_openbookqa_pairs_steps_per_second": 4.69,
"step": 16286
},
{
"epoch": 2.5542659974905897,
"eval_msmarco_pairs_loss": 0.8505743145942688,
"eval_msmarco_pairs_runtime": 2.532,
"eval_msmarco_pairs_samples_per_second": 78.988,
"eval_msmarco_pairs_steps_per_second": 1.975,
"step": 16286
},
{
"epoch": 2.5542659974905897,
"eval_nq_pairs_loss": 0.7240181565284729,
"eval_nq_pairs_runtime": 5.6548,
"eval_nq_pairs_samples_per_second": 35.368,
"eval_nq_pairs_steps_per_second": 0.884,
"step": 16286
},
{
"epoch": 2.5542659974905897,
"eval_trivia_pairs_loss": 1.0354830026626587,
"eval_trivia_pairs_runtime": 9.0945,
"eval_trivia_pairs_samples_per_second": 21.991,
"eval_trivia_pairs_steps_per_second": 0.55,
"step": 16286
},
{
"epoch": 2.5542659974905897,
"eval_quora_pairs_loss": 0.2037644237279892,
"eval_quora_pairs_runtime": 0.626,
"eval_quora_pairs_samples_per_second": 319.489,
"eval_quora_pairs_steps_per_second": 7.987,
"step": 16286
},
{
"epoch": 2.5542659974905897,
"eval_gooaq_pairs_loss": 0.7034876346588135,
"eval_gooaq_pairs_runtime": 1.5472,
"eval_gooaq_pairs_samples_per_second": 129.268,
"eval_gooaq_pairs_steps_per_second": 3.232,
"step": 16286
},
{
"epoch": 2.5542659974905897,
"eval_mrpc_pairs_loss": 0.06078115478157997,
"eval_mrpc_pairs_runtime": 0.2496,
"eval_mrpc_pairs_samples_per_second": 801.195,
"eval_mrpc_pairs_steps_per_second": 20.03,
"step": 16286
},
{
"epoch": 2.5595984943538266,
"grad_norm": 6.243753433227539,
"learning_rate": 1.17131408564806e-05,
"loss": 0.9521,
"step": 16320
},
{
"epoch": 2.5671267252195733,
"grad_norm": 12.534939765930176,
"learning_rate": 1.1367242995042782e-05,
"loss": 0.732,
"step": 16368
},
{
"epoch": 2.57465495608532,
"grad_norm": 13.955587387084961,
"learning_rate": 1.102405545204967e-05,
"loss": 0.9313,
"step": 16416
},
{
"epoch": 2.5821831869510667,
"grad_norm": 7.020220756530762,
"learning_rate": 1.068372989624129e-05,
"loss": 1.0431,
"step": 16464
},
{
"epoch": 2.589711417816813,
"grad_norm": 15.900407791137695,
"learning_rate": 1.0346416731527386e-05,
"loss": 1.1909,
"step": 16512
},
{
"epoch": 2.5972396486825597,
"grad_norm": 11.426584243774414,
"learning_rate": 1.0012265030517757e-05,
"loss": 0.9152,
"step": 16560
},
{
"epoch": 2.604767879548306,
"grad_norm": 0.15851224958896637,
"learning_rate": 9.681422468640882e-06,
"loss": 0.9003,
"step": 16608
},
{
"epoch": 2.6122961104140527,
"grad_norm": 1.3470371961593628,
"learning_rate": 9.354035258880128e-06,
"loss": 0.5777,
"step": 16656
},
{
"epoch": 2.6198243412797995,
"grad_norm": 14.521584510803223,
"learning_rate": 9.030248087156094e-06,
"loss": 0.8359,
"step": 16704
},
{
"epoch": 2.6273525721455457,
"grad_norm": 15.556090354919434,
"learning_rate": 8.710204048383997e-06,
"loss": 1.1574,
"step": 16752
},
{
"epoch": 2.6293914680050188,
"eval_nli-pairs_loss": 0.8550112247467041,
"eval_nli-pairs_runtime": 4.0484,
"eval_nli-pairs_samples_per_second": 49.402,
"eval_nli-pairs_steps_per_second": 1.235,
"eval_sts-test_pearson_cosine": 0.7815759376810529,
"eval_sts-test_pearson_dot": 0.5413764939243199,
"eval_sts-test_pearson_euclidean": 0.7421239909657232,
"eval_sts-test_pearson_manhattan": 0.7399423671856244,
"eval_sts-test_pearson_max": 0.7815759376810529,
"eval_sts-test_spearman_cosine": 0.7956109012986412,
"eval_sts-test_spearman_dot": 0.518494920660312,
"eval_sts-test_spearman_euclidean": 0.7301949558022809,
"eval_sts-test_spearman_manhattan": 0.7309810878633547,
"eval_sts-test_spearman_max": 0.7956109012986412,
"step": 16765
},
{
"epoch": 2.6293914680050188,
"eval_vitaminc-pairs_loss": 4.490719795227051,
"eval_vitaminc-pairs_runtime": 1.4427,
"eval_vitaminc-pairs_samples_per_second": 115.058,
"eval_vitaminc-pairs_steps_per_second": 2.772,
"step": 16765
},
{
"epoch": 2.6293914680050188,
"eval_sts-label_loss": 4.298334121704102,
"eval_sts-label_runtime": 0.4246,
"eval_sts-label_samples_per_second": 471.002,
"eval_sts-label_steps_per_second": 11.775,
"step": 16765
},
{
"epoch": 2.6293914680050188,
"eval_qnli-contrastive_loss": 0.14397895336151123,
"eval_qnli-contrastive_runtime": 0.2899,
"eval_qnli-contrastive_samples_per_second": 689.966,
"eval_qnli-contrastive_steps_per_second": 17.249,
"step": 16765
},
{
"epoch": 2.6293914680050188,
"eval_scitail-pairs-qa_loss": 0.06725245714187622,
"eval_scitail-pairs-qa_runtime": 1.0627,
"eval_scitail-pairs-qa_samples_per_second": 188.196,
"eval_scitail-pairs-qa_steps_per_second": 4.705,
"step": 16765
},
{
"epoch": 2.6293914680050188,
"eval_scitail-pairs-pos_loss": 0.3813771903514862,
"eval_scitail-pairs-pos_runtime": 2.3735,
"eval_scitail-pairs-pos_samples_per_second": 84.265,
"eval_scitail-pairs-pos_steps_per_second": 2.107,
"step": 16765
},
{
"epoch": 2.6293914680050188,
"eval_xsum-pairs_loss": 0.27351853251457214,
"eval_xsum-pairs_runtime": 1.0447,
"eval_xsum-pairs_samples_per_second": 191.435,
"eval_xsum-pairs_steps_per_second": 4.786,
"step": 16765
},
{
"epoch": 2.6293914680050188,
"eval_compression-pairs_loss": 0.11104587465524673,
"eval_compression-pairs_runtime": 0.2446,
"eval_compression-pairs_samples_per_second": 817.566,
"eval_compression-pairs_steps_per_second": 20.439,
"step": 16765
},
{
"epoch": 2.6293914680050188,
"eval_sciq_pairs_loss": 0.27802807092666626,
"eval_sciq_pairs_runtime": 9.0532,
"eval_sciq_pairs_samples_per_second": 22.092,
"eval_sciq_pairs_steps_per_second": 0.552,
"step": 16765
},
{
"epoch": 2.6293914680050188,
"eval_qasc_pairs_loss": 0.28649088740348816,
"eval_qasc_pairs_runtime": 1.2422,
"eval_qasc_pairs_samples_per_second": 161.005,
"eval_qasc_pairs_steps_per_second": 4.025,
"step": 16765
},
{
"epoch": 2.6293914680050188,
"eval_openbookqa_pairs_loss": 1.6384446620941162,
"eval_openbookqa_pairs_runtime": 1.0538,
"eval_openbookqa_pairs_samples_per_second": 189.787,
"eval_openbookqa_pairs_steps_per_second": 4.745,
"step": 16765
},
{
"epoch": 2.6293914680050188,
"eval_msmarco_pairs_loss": 0.8094324469566345,
"eval_msmarco_pairs_runtime": 2.537,
"eval_msmarco_pairs_samples_per_second": 78.833,
"eval_msmarco_pairs_steps_per_second": 1.971,
"step": 16765
},
{
"epoch": 2.6293914680050188,
"eval_nq_pairs_loss": 0.6893200874328613,
"eval_nq_pairs_runtime": 5.6388,
"eval_nq_pairs_samples_per_second": 35.468,
"eval_nq_pairs_steps_per_second": 0.887,
"step": 16765
},
{
"epoch": 2.6293914680050188,
"eval_trivia_pairs_loss": 0.9642792344093323,
"eval_trivia_pairs_runtime": 9.0635,
"eval_trivia_pairs_samples_per_second": 22.066,
"eval_trivia_pairs_steps_per_second": 0.552,
"step": 16765
},
{
"epoch": 2.6293914680050188,
"eval_quora_pairs_loss": 0.16321201622486115,
"eval_quora_pairs_runtime": 0.6255,
"eval_quora_pairs_samples_per_second": 319.742,
"eval_quora_pairs_steps_per_second": 7.994,
"step": 16765
},
{
"epoch": 2.6293914680050188,
"eval_gooaq_pairs_loss": 0.677706778049469,
"eval_gooaq_pairs_runtime": 1.5511,
"eval_gooaq_pairs_samples_per_second": 128.938,
"eval_gooaq_pairs_steps_per_second": 3.223,
"step": 16765
},
{
"epoch": 2.6293914680050188,
"eval_mrpc_pairs_loss": 0.0573570542037487,
"eval_mrpc_pairs_runtime": 0.2424,
"eval_mrpc_pairs_samples_per_second": 824.943,
"eval_mrpc_pairs_steps_per_second": 20.624,
"step": 16765
},
{
"epoch": 2.6348808030112925,
"grad_norm": 2.5145435333251953,
"learning_rate": 8.394044583234119e-06,
"loss": 0.8489,
"step": 16800
},
{
"epoch": 2.6424090338770387,
"grad_norm": 3.8153061866760254,
"learning_rate": 8.088370253834692e-06,
"loss": 0.8827,
"step": 16848
},
{
"epoch": 2.6499372647427855,
"grad_norm": 6.136178493499756,
"learning_rate": 7.78030922106024e-06,
"loss": 1.0392,
"step": 16896
},
{
"epoch": 2.657465495608532,
"grad_norm": 1.2771908044815063,
"learning_rate": 7.4765437208127956e-06,
"loss": 1.0452,
"step": 16944
},
{
"epoch": 2.6649937264742785,
"grad_norm": 7.610607147216797,
"learning_rate": 7.177207999604577e-06,
"loss": 0.6671,
"step": 16992
},
{
"epoch": 2.672521957340025,
"grad_norm": 3.370135545730591,
"learning_rate": 6.888528052397871e-06,
"loss": 1.3255,
"step": 17040
},
{
"epoch": 2.6800501882057715,
"grad_norm": 0.2599998116493225,
"learning_rate": 6.598347667558871e-06,
"loss": 0.9193,
"step": 17088
},
{
"epoch": 2.687578419071518,
"grad_norm": 6.205481052398682,
"learning_rate": 6.312985172841809e-06,
"loss": 1.0503,
"step": 17136
},
{
"epoch": 2.695106649937265,
"grad_norm": 1.6039291620254517,
"learning_rate": 6.032566681711187e-06,
"loss": 1.1439,
"step": 17184
}
],
"logging_steps": 48,
"max_steps": 19128,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1913,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 42,
"trial_name": null,
"trial_params": null
}