bobox's picture
Training in progress, step 7652, checkpoint
a9fee8f verified
raw
history blame
106 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.200125470514429,
"eval_steps": 479,
"global_step": 7652,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0075282308657465494,
"grad_norm": 87.12931823730469,
"learning_rate": 3.147218736930155e-07,
"loss": 12.3074,
"step": 48
},
{
"epoch": 0.015056461731493099,
"grad_norm": 64.15751647949219,
"learning_rate": 6.660393140945211e-07,
"loss": 15.7221,
"step": 96
},
{
"epoch": 0.02258469259723965,
"grad_norm": 81.39651489257812,
"learning_rate": 1.0173567544960265e-06,
"loss": 10.8027,
"step": 144
},
{
"epoch": 0.030112923462986198,
"grad_norm": 22.048904418945312,
"learning_rate": 1.3686741948975323e-06,
"loss": 8.9559,
"step": 192
},
{
"epoch": 0.037641154328732745,
"grad_norm": 11.542724609375,
"learning_rate": 1.7126725219573398e-06,
"loss": 8.8511,
"step": 240
},
{
"epoch": 0.0451693851944793,
"grad_norm": 10.625059127807617,
"learning_rate": 2.063989962358846e-06,
"loss": 9.3478,
"step": 288
},
{
"epoch": 0.05269761606022585,
"grad_norm": 14.21434211730957,
"learning_rate": 2.415307402760351e-06,
"loss": 8.8892,
"step": 336
},
{
"epoch": 0.060225846925972396,
"grad_norm": 13.216053009033203,
"learning_rate": 2.7666248431618565e-06,
"loss": 8.3008,
"step": 384
},
{
"epoch": 0.06775407779171895,
"grad_norm": 22.503334045410156,
"learning_rate": 3.117942283563362e-06,
"loss": 7.3455,
"step": 432
},
{
"epoch": 0.07512547051442911,
"eval_nli-pairs_loss": 6.591032028198242,
"eval_nli-pairs_runtime": 4.3469,
"eval_nli-pairs_samples_per_second": 46.01,
"eval_nli-pairs_steps_per_second": 1.15,
"eval_sts-test_pearson_cosine": 0.39488461174644296,
"eval_sts-test_pearson_dot": 0.15593446481859455,
"eval_sts-test_pearson_euclidean": 0.39975070029693277,
"eval_sts-test_pearson_manhattan": 0.4314268556737928,
"eval_sts-test_pearson_max": 0.4314268556737928,
"eval_sts-test_spearman_cosine": 0.3997824055251076,
"eval_sts-test_spearman_dot": 0.14324216739430146,
"eval_sts-test_spearman_euclidean": 0.40262274612650517,
"eval_sts-test_spearman_manhattan": 0.42925492969387746,
"eval_sts-test_spearman_max": 0.42925492969387746,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_vitaminc-pairs_loss": 6.247874736785889,
"eval_vitaminc-pairs_runtime": 1.4382,
"eval_vitaminc-pairs_samples_per_second": 115.426,
"eval_vitaminc-pairs_steps_per_second": 2.781,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_sts-label_loss": 3.371708869934082,
"eval_sts-label_runtime": 0.3925,
"eval_sts-label_samples_per_second": 509.603,
"eval_sts-label_steps_per_second": 12.74,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_qnli-contrastive_loss": 3.5311310291290283,
"eval_qnli-contrastive_runtime": 0.2814,
"eval_qnli-contrastive_samples_per_second": 710.798,
"eval_qnli-contrastive_steps_per_second": 17.77,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_scitail-pairs-qa_loss": 5.4017333984375,
"eval_scitail-pairs-qa_runtime": 1.1087,
"eval_scitail-pairs-qa_samples_per_second": 180.387,
"eval_scitail-pairs-qa_steps_per_second": 4.51,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_scitail-pairs-pos_loss": 4.29502534866333,
"eval_scitail-pairs-pos_runtime": 2.4269,
"eval_scitail-pairs-pos_samples_per_second": 82.41,
"eval_scitail-pairs-pos_steps_per_second": 2.06,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_xsum-pairs_loss": 3.540722370147705,
"eval_xsum-pairs_runtime": 1.0447,
"eval_xsum-pairs_samples_per_second": 191.444,
"eval_xsum-pairs_steps_per_second": 4.786,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_compression-pairs_loss": 2.8060033321380615,
"eval_compression-pairs_runtime": 0.2399,
"eval_compression-pairs_samples_per_second": 833.621,
"eval_compression-pairs_steps_per_second": 20.841,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_sciq_pairs_loss": 10.621454238891602,
"eval_sciq_pairs_runtime": 9.0638,
"eval_sciq_pairs_samples_per_second": 22.066,
"eval_sciq_pairs_steps_per_second": 0.552,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_qasc_pairs_loss": 7.7197771072387695,
"eval_qasc_pairs_runtime": 1.2078,
"eval_qasc_pairs_samples_per_second": 165.595,
"eval_qasc_pairs_steps_per_second": 4.14,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_openbookqa_pairs_loss": 7.620975494384766,
"eval_openbookqa_pairs_runtime": 1.053,
"eval_openbookqa_pairs_samples_per_second": 189.941,
"eval_openbookqa_pairs_steps_per_second": 4.749,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_msmarco_pairs_loss": 8.353594779968262,
"eval_msmarco_pairs_runtime": 2.5338,
"eval_msmarco_pairs_samples_per_second": 78.932,
"eval_msmarco_pairs_steps_per_second": 1.973,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_nq_pairs_loss": 7.995354652404785,
"eval_nq_pairs_runtime": 5.7107,
"eval_nq_pairs_samples_per_second": 35.022,
"eval_nq_pairs_steps_per_second": 0.876,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_trivia_pairs_loss": 8.177907943725586,
"eval_trivia_pairs_runtime": 9.1824,
"eval_trivia_pairs_samples_per_second": 21.781,
"eval_trivia_pairs_steps_per_second": 0.545,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_quora_pairs_loss": 1.1983369588851929,
"eval_quora_pairs_runtime": 0.659,
"eval_quora_pairs_samples_per_second": 303.48,
"eval_quora_pairs_steps_per_second": 7.587,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_gooaq_pairs_loss": 7.4573974609375,
"eval_gooaq_pairs_runtime": 1.5917,
"eval_gooaq_pairs_samples_per_second": 125.65,
"eval_gooaq_pairs_steps_per_second": 3.141,
"step": 479
},
{
"epoch": 0.07512547051442911,
"eval_mrpc_pairs_loss": 2.1152825355529785,
"eval_mrpc_pairs_runtime": 0.241,
"eval_mrpc_pairs_samples_per_second": 829.751,
"eval_mrpc_pairs_steps_per_second": 20.744,
"step": 479
},
{
"epoch": 0.07528230865746549,
"grad_norm": 17.753456115722656,
"learning_rate": 3.469259723964868e-06,
"loss": 8.0369,
"step": 480
},
{
"epoch": 0.08281053952321205,
"grad_norm": 12.012594223022461,
"learning_rate": 3.820577164366374e-06,
"loss": 6.2732,
"step": 528
},
{
"epoch": 0.0903387703889586,
"grad_norm": 34.92698287963867,
"learning_rate": 4.1718946047678796e-06,
"loss": 7.8529,
"step": 576
},
{
"epoch": 0.09786700125470514,
"grad_norm": 23.564632415771484,
"learning_rate": 4.523212045169385e-06,
"loss": 5.8643,
"step": 624
},
{
"epoch": 0.1053952321204517,
"grad_norm": 22.126293182373047,
"learning_rate": 4.874529485570891e-06,
"loss": 6.3179,
"step": 672
},
{
"epoch": 0.11292346298619825,
"grad_norm": 25.067686080932617,
"learning_rate": 5.225846925972396e-06,
"loss": 6.1175,
"step": 720
},
{
"epoch": 0.12045169385194479,
"grad_norm": 29.170730590820312,
"learning_rate": 5.577164366373902e-06,
"loss": 5.2392,
"step": 768
},
{
"epoch": 0.12797992471769135,
"grad_norm": 29.377540588378906,
"learning_rate": 5.928481806775407e-06,
"loss": 5.8324,
"step": 816
},
{
"epoch": 0.1355081555834379,
"grad_norm": 33.512088775634766,
"learning_rate": 6.279799247176913e-06,
"loss": 5.1523,
"step": 864
},
{
"epoch": 0.14303638644918445,
"grad_norm": 32.54931640625,
"learning_rate": 6.6311166875784185e-06,
"loss": 6.0303,
"step": 912
},
{
"epoch": 0.15025094102885822,
"eval_nli-pairs_loss": 4.317643165588379,
"eval_nli-pairs_runtime": 4.0158,
"eval_nli-pairs_samples_per_second": 49.803,
"eval_nli-pairs_steps_per_second": 1.245,
"eval_sts-test_pearson_cosine": 0.6751726661173544,
"eval_sts-test_pearson_dot": 0.5308173325280101,
"eval_sts-test_pearson_euclidean": 0.6660519042507951,
"eval_sts-test_pearson_manhattan": 0.6723411683739887,
"eval_sts-test_pearson_max": 0.6751726661173544,
"eval_sts-test_spearman_cosine": 0.6458966208807124,
"eval_sts-test_spearman_dot": 0.5040208096497271,
"eval_sts-test_spearman_euclidean": 0.6447205374312966,
"eval_sts-test_spearman_manhattan": 0.6498560301461127,
"eval_sts-test_spearman_max": 0.6498560301461127,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_vitaminc-pairs_loss": 6.306981563568115,
"eval_vitaminc-pairs_runtime": 1.4737,
"eval_vitaminc-pairs_samples_per_second": 112.643,
"eval_vitaminc-pairs_steps_per_second": 2.714,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_sts-label_loss": 3.7677345275878906,
"eval_sts-label_runtime": 0.4136,
"eval_sts-label_samples_per_second": 483.59,
"eval_sts-label_steps_per_second": 12.09,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_qnli-contrastive_loss": 2.8633975982666016,
"eval_qnli-contrastive_runtime": 0.2816,
"eval_qnli-contrastive_samples_per_second": 710.123,
"eval_qnli-contrastive_steps_per_second": 17.753,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_scitail-pairs-qa_loss": 1.4301409721374512,
"eval_scitail-pairs-qa_runtime": 1.0526,
"eval_scitail-pairs-qa_samples_per_second": 190.003,
"eval_scitail-pairs-qa_steps_per_second": 4.75,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_scitail-pairs-pos_loss": 2.468087673187256,
"eval_scitail-pairs-pos_runtime": 2.3275,
"eval_scitail-pairs-pos_samples_per_second": 85.928,
"eval_scitail-pairs-pos_steps_per_second": 2.148,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_xsum-pairs_loss": 2.153658151626587,
"eval_xsum-pairs_runtime": 1.0409,
"eval_xsum-pairs_samples_per_second": 192.151,
"eval_xsum-pairs_steps_per_second": 4.804,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_compression-pairs_loss": 1.6288033723831177,
"eval_compression-pairs_runtime": 0.2383,
"eval_compression-pairs_samples_per_second": 839.253,
"eval_compression-pairs_steps_per_second": 20.981,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_sciq_pairs_loss": 9.78779411315918,
"eval_sciq_pairs_runtime": 9.0233,
"eval_sciq_pairs_samples_per_second": 22.165,
"eval_sciq_pairs_steps_per_second": 0.554,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_qasc_pairs_loss": 3.7814972400665283,
"eval_qasc_pairs_runtime": 1.2108,
"eval_qasc_pairs_samples_per_second": 165.179,
"eval_qasc_pairs_steps_per_second": 4.129,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_openbookqa_pairs_loss": 5.005772590637207,
"eval_openbookqa_pairs_runtime": 1.0415,
"eval_openbookqa_pairs_samples_per_second": 192.037,
"eval_openbookqa_pairs_steps_per_second": 4.801,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_msmarco_pairs_loss": 4.574879169464111,
"eval_msmarco_pairs_runtime": 2.527,
"eval_msmarco_pairs_samples_per_second": 79.146,
"eval_msmarco_pairs_steps_per_second": 1.979,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_nq_pairs_loss": 5.281248569488525,
"eval_nq_pairs_runtime": 5.6503,
"eval_nq_pairs_samples_per_second": 35.397,
"eval_nq_pairs_steps_per_second": 0.885,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_trivia_pairs_loss": 4.913428783416748,
"eval_trivia_pairs_runtime": 9.0564,
"eval_trivia_pairs_samples_per_second": 22.084,
"eval_trivia_pairs_steps_per_second": 0.552,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_quora_pairs_loss": 0.9212128520011902,
"eval_quora_pairs_runtime": 0.6293,
"eval_quora_pairs_samples_per_second": 317.839,
"eval_quora_pairs_steps_per_second": 7.946,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_gooaq_pairs_loss": 3.932173490524292,
"eval_gooaq_pairs_runtime": 1.6066,
"eval_gooaq_pairs_samples_per_second": 124.483,
"eval_gooaq_pairs_steps_per_second": 3.112,
"step": 958
},
{
"epoch": 0.15025094102885822,
"eval_mrpc_pairs_loss": 1.0853501558303833,
"eval_mrpc_pairs_runtime": 0.2402,
"eval_mrpc_pairs_samples_per_second": 832.56,
"eval_mrpc_pairs_steps_per_second": 20.814,
"step": 958
},
{
"epoch": 0.15056461731493098,
"grad_norm": 28.37123680114746,
"learning_rate": 6.982434127979924e-06,
"loss": 5.7748,
"step": 960
},
{
"epoch": 0.15809284818067754,
"grad_norm": 26.708221435546875,
"learning_rate": 7.33375156838143e-06,
"loss": 4.8728,
"step": 1008
},
{
"epoch": 0.1656210790464241,
"grad_norm": 26.786447525024414,
"learning_rate": 7.685069008782934e-06,
"loss": 4.7375,
"step": 1056
},
{
"epoch": 0.17314930991217065,
"grad_norm": 26.215879440307617,
"learning_rate": 8.03638644918444e-06,
"loss": 4.6766,
"step": 1104
},
{
"epoch": 0.1806775407779172,
"grad_norm": 35.618831634521484,
"learning_rate": 8.387703889585947e-06,
"loss": 4.3209,
"step": 1152
},
{
"epoch": 0.18820577164366373,
"grad_norm": 37.166072845458984,
"learning_rate": 8.739021329987453e-06,
"loss": 3.7761,
"step": 1200
},
{
"epoch": 0.19573400250941028,
"grad_norm": 35.78367233276367,
"learning_rate": 9.090338770388957e-06,
"loss": 4.2161,
"step": 1248
},
{
"epoch": 0.20326223337515684,
"grad_norm": 36.299678802490234,
"learning_rate": 9.441656210790464e-06,
"loss": 4.9089,
"step": 1296
},
{
"epoch": 0.2107904642409034,
"grad_norm": 18.610933303833008,
"learning_rate": 9.792973651191968e-06,
"loss": 4.3406,
"step": 1344
},
{
"epoch": 0.21831869510664995,
"grad_norm": 9.592538833618164,
"learning_rate": 1.0144291091593475e-05,
"loss": 3.5664,
"step": 1392
},
{
"epoch": 0.22537641154328733,
"eval_nli-pairs_loss": 3.2245519161224365,
"eval_nli-pairs_runtime": 4.0436,
"eval_nli-pairs_samples_per_second": 49.461,
"eval_nli-pairs_steps_per_second": 1.237,
"eval_sts-test_pearson_cosine": 0.7002978854888552,
"eval_sts-test_pearson_dot": 0.5685392445320393,
"eval_sts-test_pearson_euclidean": 0.6963744527231541,
"eval_sts-test_pearson_manhattan": 0.7050517306003169,
"eval_sts-test_pearson_max": 0.7050517306003169,
"eval_sts-test_spearman_cosine": 0.6718756239728468,
"eval_sts-test_spearman_dot": 0.5416448961602434,
"eval_sts-test_spearman_euclidean": 0.6742379556154348,
"eval_sts-test_spearman_manhattan": 0.6824201536078427,
"eval_sts-test_spearman_max": 0.6824201536078427,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_vitaminc-pairs_loss": 6.715206623077393,
"eval_vitaminc-pairs_runtime": 1.4251,
"eval_vitaminc-pairs_samples_per_second": 116.481,
"eval_vitaminc-pairs_steps_per_second": 2.807,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_sts-label_loss": 4.016364097595215,
"eval_sts-label_runtime": 0.4049,
"eval_sts-label_samples_per_second": 493.95,
"eval_sts-label_steps_per_second": 12.349,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_qnli-contrastive_loss": 1.999517560005188,
"eval_qnli-contrastive_runtime": 0.2804,
"eval_qnli-contrastive_samples_per_second": 713.282,
"eval_qnli-contrastive_steps_per_second": 17.832,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_scitail-pairs-qa_loss": 1.0403239727020264,
"eval_scitail-pairs-qa_runtime": 1.0483,
"eval_scitail-pairs-qa_samples_per_second": 190.793,
"eval_scitail-pairs-qa_steps_per_second": 4.77,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_scitail-pairs-pos_loss": 1.9232473373413086,
"eval_scitail-pairs-pos_runtime": 2.3447,
"eval_scitail-pairs-pos_samples_per_second": 85.298,
"eval_scitail-pairs-pos_steps_per_second": 2.132,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_xsum-pairs_loss": 1.6821197271347046,
"eval_xsum-pairs_runtime": 1.0422,
"eval_xsum-pairs_samples_per_second": 191.901,
"eval_xsum-pairs_steps_per_second": 4.798,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_compression-pairs_loss": 1.1713249683380127,
"eval_compression-pairs_runtime": 0.2392,
"eval_compression-pairs_samples_per_second": 836.05,
"eval_compression-pairs_steps_per_second": 20.901,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_sciq_pairs_loss": 9.443825721740723,
"eval_sciq_pairs_runtime": 8.9916,
"eval_sciq_pairs_samples_per_second": 22.243,
"eval_sciq_pairs_steps_per_second": 0.556,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_qasc_pairs_loss": 2.9044029712677,
"eval_qasc_pairs_runtime": 1.2182,
"eval_qasc_pairs_samples_per_second": 164.182,
"eval_qasc_pairs_steps_per_second": 4.105,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_openbookqa_pairs_loss": 4.360418796539307,
"eval_openbookqa_pairs_runtime": 1.0522,
"eval_openbookqa_pairs_samples_per_second": 190.077,
"eval_openbookqa_pairs_steps_per_second": 4.752,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_msmarco_pairs_loss": 3.516049861907959,
"eval_msmarco_pairs_runtime": 2.5595,
"eval_msmarco_pairs_samples_per_second": 78.139,
"eval_msmarco_pairs_steps_per_second": 1.953,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_nq_pairs_loss": 4.016308784484863,
"eval_nq_pairs_runtime": 5.6561,
"eval_nq_pairs_samples_per_second": 35.36,
"eval_nq_pairs_steps_per_second": 0.884,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_trivia_pairs_loss": 3.781872272491455,
"eval_trivia_pairs_runtime": 9.0801,
"eval_trivia_pairs_samples_per_second": 22.026,
"eval_trivia_pairs_steps_per_second": 0.551,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_quora_pairs_loss": 0.8747495412826538,
"eval_quora_pairs_runtime": 0.6229,
"eval_quora_pairs_samples_per_second": 321.076,
"eval_quora_pairs_steps_per_second": 8.027,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_gooaq_pairs_loss": 3.0769765377044678,
"eval_gooaq_pairs_runtime": 1.552,
"eval_gooaq_pairs_samples_per_second": 128.863,
"eval_gooaq_pairs_steps_per_second": 3.222,
"step": 1437
},
{
"epoch": 0.22537641154328733,
"eval_mrpc_pairs_loss": 0.7370794415473938,
"eval_mrpc_pairs_runtime": 0.2401,
"eval_mrpc_pairs_samples_per_second": 832.867,
"eval_mrpc_pairs_steps_per_second": 20.822,
"step": 1437
},
{
"epoch": 0.2258469259723965,
"grad_norm": 29.73522186279297,
"learning_rate": 1.049560853199498e-05,
"loss": 4.7194,
"step": 1440
},
{
"epoch": 0.23337515683814303,
"grad_norm": 30.467117309570312,
"learning_rate": 1.0846925972396486e-05,
"loss": 3.6345,
"step": 1488
},
{
"epoch": 0.24090338770388958,
"grad_norm": 24.454021453857422,
"learning_rate": 1.1198243412797992e-05,
"loss": 3.5947,
"step": 1536
},
{
"epoch": 0.24843161856963614,
"grad_norm": 20.165475845336914,
"learning_rate": 1.1549560853199497e-05,
"loss": 4.0526,
"step": 1584
},
{
"epoch": 0.2559598494353827,
"grad_norm": 34.79319381713867,
"learning_rate": 1.1900878293601003e-05,
"loss": 3.7962,
"step": 1632
},
{
"epoch": 0.26348808030112925,
"grad_norm": 127.97925567626953,
"learning_rate": 1.2252195734002508e-05,
"loss": 4.1927,
"step": 1680
},
{
"epoch": 0.2710163111668758,
"grad_norm": 27.80243682861328,
"learning_rate": 1.2603513174404014e-05,
"loss": 3.6351,
"step": 1728
},
{
"epoch": 0.27854454203262236,
"grad_norm": 31.81105613708496,
"learning_rate": 1.295483061480552e-05,
"loss": 3.4256,
"step": 1776
},
{
"epoch": 0.2860727728983689,
"grad_norm": 32.932865142822266,
"learning_rate": 1.3306148055207025e-05,
"loss": 3.3175,
"step": 1824
},
{
"epoch": 0.2936010037641154,
"grad_norm": 31.197385787963867,
"learning_rate": 1.365746549560853e-05,
"loss": 3.4984,
"step": 1872
},
{
"epoch": 0.30050188205771644,
"eval_nli-pairs_loss": 2.852742910385132,
"eval_nli-pairs_runtime": 4.1529,
"eval_nli-pairs_samples_per_second": 48.16,
"eval_nli-pairs_steps_per_second": 1.204,
"eval_sts-test_pearson_cosine": 0.7132313507241694,
"eval_sts-test_pearson_dot": 0.559846529627866,
"eval_sts-test_pearson_euclidean": 0.7145939583366395,
"eval_sts-test_pearson_manhattan": 0.724552982808093,
"eval_sts-test_pearson_max": 0.724552982808093,
"eval_sts-test_spearman_cosine": 0.6912239915389706,
"eval_sts-test_spearman_dot": 0.5394217029355446,
"eval_sts-test_spearman_euclidean": 0.6946616748545426,
"eval_sts-test_spearman_manhattan": 0.70491424059339,
"eval_sts-test_spearman_max": 0.70491424059339,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_vitaminc-pairs_loss": 6.633151054382324,
"eval_vitaminc-pairs_runtime": 1.4454,
"eval_vitaminc-pairs_samples_per_second": 114.849,
"eval_vitaminc-pairs_steps_per_second": 2.767,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_sts-label_loss": 3.8717281818389893,
"eval_sts-label_runtime": 0.4016,
"eval_sts-label_samples_per_second": 498.049,
"eval_sts-label_steps_per_second": 12.451,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_qnli-contrastive_loss": 1.4170150756835938,
"eval_qnli-contrastive_runtime": 0.2814,
"eval_qnli-contrastive_samples_per_second": 710.85,
"eval_qnli-contrastive_steps_per_second": 17.771,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_scitail-pairs-qa_loss": 0.6900365948677063,
"eval_scitail-pairs-qa_runtime": 1.0611,
"eval_scitail-pairs-qa_samples_per_second": 188.492,
"eval_scitail-pairs-qa_steps_per_second": 4.712,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_scitail-pairs-pos_loss": 1.352358102798462,
"eval_scitail-pairs-pos_runtime": 2.3596,
"eval_scitail-pairs-pos_samples_per_second": 84.761,
"eval_scitail-pairs-pos_steps_per_second": 2.119,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_xsum-pairs_loss": 1.376610517501831,
"eval_xsum-pairs_runtime": 1.0393,
"eval_xsum-pairs_samples_per_second": 192.443,
"eval_xsum-pairs_steps_per_second": 4.811,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_compression-pairs_loss": 0.871735692024231,
"eval_compression-pairs_runtime": 0.2351,
"eval_compression-pairs_samples_per_second": 850.579,
"eval_compression-pairs_steps_per_second": 21.264,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_sciq_pairs_loss": 9.191713333129883,
"eval_sciq_pairs_runtime": 9.1572,
"eval_sciq_pairs_samples_per_second": 21.841,
"eval_sciq_pairs_steps_per_second": 0.546,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_qasc_pairs_loss": 2.369694709777832,
"eval_qasc_pairs_runtime": 1.2239,
"eval_qasc_pairs_samples_per_second": 163.415,
"eval_qasc_pairs_steps_per_second": 4.085,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_openbookqa_pairs_loss": 3.9601967334747314,
"eval_openbookqa_pairs_runtime": 1.0681,
"eval_openbookqa_pairs_samples_per_second": 187.247,
"eval_openbookqa_pairs_steps_per_second": 4.681,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_msmarco_pairs_loss": 3.0808801651000977,
"eval_msmarco_pairs_runtime": 2.5507,
"eval_msmarco_pairs_samples_per_second": 78.409,
"eval_msmarco_pairs_steps_per_second": 1.96,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_nq_pairs_loss": 3.4922549724578857,
"eval_nq_pairs_runtime": 5.7154,
"eval_nq_pairs_samples_per_second": 34.993,
"eval_nq_pairs_steps_per_second": 0.875,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_trivia_pairs_loss": 3.4910638332366943,
"eval_trivia_pairs_runtime": 9.1195,
"eval_trivia_pairs_samples_per_second": 21.931,
"eval_trivia_pairs_steps_per_second": 0.548,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_quora_pairs_loss": 0.833874523639679,
"eval_quora_pairs_runtime": 0.6419,
"eval_quora_pairs_samples_per_second": 311.554,
"eval_quora_pairs_steps_per_second": 7.789,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_gooaq_pairs_loss": 2.622526168823242,
"eval_gooaq_pairs_runtime": 1.5751,
"eval_gooaq_pairs_samples_per_second": 126.977,
"eval_gooaq_pairs_steps_per_second": 3.174,
"step": 1916
},
{
"epoch": 0.30050188205771644,
"eval_mrpc_pairs_loss": 0.4888114929199219,
"eval_mrpc_pairs_runtime": 0.2398,
"eval_mrpc_pairs_samples_per_second": 833.994,
"eval_mrpc_pairs_steps_per_second": 20.85,
"step": 1916
},
{
"epoch": 0.30112923462986196,
"grad_norm": 6.916851997375488,
"learning_rate": 1.4008782936010036e-05,
"loss": 2.7233,
"step": 1920
},
{
"epoch": 0.3086574654956085,
"grad_norm": 17.678085327148438,
"learning_rate": 1.4360100376411543e-05,
"loss": 3.6816,
"step": 1968
},
{
"epoch": 0.3161856963613551,
"grad_norm": 92.62138366699219,
"learning_rate": 1.4711417816813047e-05,
"loss": 3.3232,
"step": 2016
},
{
"epoch": 0.3237139272271016,
"grad_norm": 27.2542781829834,
"learning_rate": 1.5062735257214554e-05,
"loss": 3.3469,
"step": 2064
},
{
"epoch": 0.3312421580928482,
"grad_norm": 6.005978584289551,
"learning_rate": 1.5414052697616058e-05,
"loss": 3.7509,
"step": 2112
},
{
"epoch": 0.33877038895859474,
"grad_norm": 16.488624572753906,
"learning_rate": 1.5765370138017566e-05,
"loss": 3.1811,
"step": 2160
},
{
"epoch": 0.3462986198243413,
"grad_norm": 11.462204933166504,
"learning_rate": 1.611668757841907e-05,
"loss": 3.3341,
"step": 2208
},
{
"epoch": 0.35382685069008785,
"grad_norm": 33.485206604003906,
"learning_rate": 1.6468005018820577e-05,
"loss": 2.764,
"step": 2256
},
{
"epoch": 0.3613550815558344,
"grad_norm": 25.066240310668945,
"learning_rate": 1.681932245922208e-05,
"loss": 3.6488,
"step": 2304
},
{
"epoch": 0.36888331242158096,
"grad_norm": 28.305265426635742,
"learning_rate": 1.7170639899623588e-05,
"loss": 2.721,
"step": 2352
},
{
"epoch": 0.3756273525721455,
"eval_nli-pairs_loss": 2.527458667755127,
"eval_nli-pairs_runtime": 4.1153,
"eval_nli-pairs_samples_per_second": 48.599,
"eval_nli-pairs_steps_per_second": 1.215,
"eval_sts-test_pearson_cosine": 0.7258900302408404,
"eval_sts-test_pearson_dot": 0.5655223839113195,
"eval_sts-test_pearson_euclidean": 0.7228747263710285,
"eval_sts-test_pearson_manhattan": 0.732591374373909,
"eval_sts-test_pearson_max": 0.732591374373909,
"eval_sts-test_spearman_cosine": 0.707910346125958,
"eval_sts-test_spearman_dot": 0.5482635095738919,
"eval_sts-test_spearman_euclidean": 0.7064759533156177,
"eval_sts-test_spearman_manhattan": 0.7166423493246757,
"eval_sts-test_spearman_max": 0.7166423493246757,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_vitaminc-pairs_loss": 6.437549114227295,
"eval_vitaminc-pairs_runtime": 1.4278,
"eval_vitaminc-pairs_samples_per_second": 116.261,
"eval_vitaminc-pairs_steps_per_second": 2.801,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_sts-label_loss": 4.1980671882629395,
"eval_sts-label_runtime": 0.3956,
"eval_sts-label_samples_per_second": 505.555,
"eval_sts-label_steps_per_second": 12.639,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_qnli-contrastive_loss": 1.0682133436203003,
"eval_qnli-contrastive_runtime": 0.2789,
"eval_qnli-contrastive_samples_per_second": 717.152,
"eval_qnli-contrastive_steps_per_second": 17.929,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_scitail-pairs-qa_loss": 0.5046552419662476,
"eval_scitail-pairs-qa_runtime": 1.0451,
"eval_scitail-pairs-qa_samples_per_second": 191.365,
"eval_scitail-pairs-qa_steps_per_second": 4.784,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_scitail-pairs-pos_loss": 1.1998459100723267,
"eval_scitail-pairs-pos_runtime": 2.3442,
"eval_scitail-pairs-pos_samples_per_second": 85.316,
"eval_scitail-pairs-pos_steps_per_second": 2.133,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_xsum-pairs_loss": 1.1817097663879395,
"eval_xsum-pairs_runtime": 1.0372,
"eval_xsum-pairs_samples_per_second": 192.835,
"eval_xsum-pairs_steps_per_second": 4.821,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_compression-pairs_loss": 0.6974765062332153,
"eval_compression-pairs_runtime": 0.2369,
"eval_compression-pairs_samples_per_second": 844.401,
"eval_compression-pairs_steps_per_second": 21.11,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_sciq_pairs_loss": 8.970888137817383,
"eval_sciq_pairs_runtime": 9.0441,
"eval_sciq_pairs_samples_per_second": 22.114,
"eval_sciq_pairs_steps_per_second": 0.553,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_qasc_pairs_loss": 1.9235339164733887,
"eval_qasc_pairs_runtime": 1.2061,
"eval_qasc_pairs_samples_per_second": 165.828,
"eval_qasc_pairs_steps_per_second": 4.146,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_openbookqa_pairs_loss": 3.6225194931030273,
"eval_openbookqa_pairs_runtime": 1.0455,
"eval_openbookqa_pairs_samples_per_second": 191.296,
"eval_openbookqa_pairs_steps_per_second": 4.782,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_msmarco_pairs_loss": 2.664341926574707,
"eval_msmarco_pairs_runtime": 2.5305,
"eval_msmarco_pairs_samples_per_second": 79.036,
"eval_msmarco_pairs_steps_per_second": 1.976,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_nq_pairs_loss": 3.055206298828125,
"eval_nq_pairs_runtime": 5.6527,
"eval_nq_pairs_samples_per_second": 35.381,
"eval_nq_pairs_steps_per_second": 0.885,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_trivia_pairs_loss": 2.9497525691986084,
"eval_trivia_pairs_runtime": 9.0334,
"eval_trivia_pairs_samples_per_second": 22.14,
"eval_trivia_pairs_steps_per_second": 0.554,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_quora_pairs_loss": 0.7771684527397156,
"eval_quora_pairs_runtime": 0.626,
"eval_quora_pairs_samples_per_second": 319.495,
"eval_quora_pairs_steps_per_second": 7.987,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_gooaq_pairs_loss": 2.266879081726074,
"eval_gooaq_pairs_runtime": 1.5425,
"eval_gooaq_pairs_samples_per_second": 129.664,
"eval_gooaq_pairs_steps_per_second": 3.242,
"step": 2395
},
{
"epoch": 0.3756273525721455,
"eval_mrpc_pairs_loss": 0.36913084983825684,
"eval_mrpc_pairs_runtime": 0.2383,
"eval_mrpc_pairs_samples_per_second": 839.153,
"eval_mrpc_pairs_steps_per_second": 20.979,
"step": 2395
},
{
"epoch": 0.37641154328732745,
"grad_norm": 36.60768127441406,
"learning_rate": 1.752195734002509e-05,
"loss": 3.3609,
"step": 2400
},
{
"epoch": 0.383939774153074,
"grad_norm": 24.15782928466797,
"learning_rate": 1.7873274780426595e-05,
"loss": 2.6252,
"step": 2448
},
{
"epoch": 0.39146800501882056,
"grad_norm": 7.509932041168213,
"learning_rate": 1.8224592220828106e-05,
"loss": 3.5142,
"step": 2496
},
{
"epoch": 0.3989962358845671,
"grad_norm": 29.380950927734375,
"learning_rate": 1.857590966122961e-05,
"loss": 4.0597,
"step": 2544
},
{
"epoch": 0.4065244667503137,
"grad_norm": 28.593975067138672,
"learning_rate": 1.8927227101631114e-05,
"loss": 2.8512,
"step": 2592
},
{
"epoch": 0.41405269761606023,
"grad_norm": 21.228628158569336,
"learning_rate": 1.927854454203262e-05,
"loss": 2.717,
"step": 2640
},
{
"epoch": 0.4215809284818068,
"grad_norm": 43.00386047363281,
"learning_rate": 1.962986198243413e-05,
"loss": 3.4717,
"step": 2688
},
{
"epoch": 0.42910915934755334,
"grad_norm": 25.004785537719727,
"learning_rate": 1.9981179422835632e-05,
"loss": 3.1105,
"step": 2736
},
{
"epoch": 0.4366373902132999,
"grad_norm": 7.555154323577881,
"learning_rate": 2.0332496863237136e-05,
"loss": 2.7798,
"step": 2784
},
{
"epoch": 0.44416562107904645,
"grad_norm": 30.839733123779297,
"learning_rate": 2.0683814303638643e-05,
"loss": 3.3606,
"step": 2832
},
{
"epoch": 0.45075282308657466,
"eval_nli-pairs_loss": 2.284590721130371,
"eval_nli-pairs_runtime": 4.0714,
"eval_nli-pairs_samples_per_second": 49.123,
"eval_nli-pairs_steps_per_second": 1.228,
"eval_sts-test_pearson_cosine": 0.7382507781851606,
"eval_sts-test_pearson_dot": 0.5710221319397019,
"eval_sts-test_pearson_euclidean": 0.7307583601561211,
"eval_sts-test_pearson_manhattan": 0.7394202696141936,
"eval_sts-test_pearson_max": 0.7394202696141936,
"eval_sts-test_spearman_cosine": 0.7211579109789371,
"eval_sts-test_spearman_dot": 0.5515579746967598,
"eval_sts-test_spearman_euclidean": 0.7142073811971875,
"eval_sts-test_spearman_manhattan": 0.7240537218564107,
"eval_sts-test_spearman_max": 0.7240537218564107,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_vitaminc-pairs_loss": 6.35264253616333,
"eval_vitaminc-pairs_runtime": 1.4349,
"eval_vitaminc-pairs_samples_per_second": 115.687,
"eval_vitaminc-pairs_steps_per_second": 2.788,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_sts-label_loss": 4.186042308807373,
"eval_sts-label_runtime": 0.3983,
"eval_sts-label_samples_per_second": 502.129,
"eval_sts-label_steps_per_second": 12.553,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_qnli-contrastive_loss": 0.781445324420929,
"eval_qnli-contrastive_runtime": 0.2765,
"eval_qnli-contrastive_samples_per_second": 723.448,
"eval_qnli-contrastive_steps_per_second": 18.086,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_scitail-pairs-qa_loss": 0.4217279851436615,
"eval_scitail-pairs-qa_runtime": 1.0438,
"eval_scitail-pairs-qa_samples_per_second": 191.612,
"eval_scitail-pairs-qa_steps_per_second": 4.79,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_scitail-pairs-pos_loss": 1.051362156867981,
"eval_scitail-pairs-pos_runtime": 2.3425,
"eval_scitail-pairs-pos_samples_per_second": 85.379,
"eval_scitail-pairs-pos_steps_per_second": 2.134,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_xsum-pairs_loss": 1.0554753541946411,
"eval_xsum-pairs_runtime": 1.044,
"eval_xsum-pairs_samples_per_second": 191.573,
"eval_xsum-pairs_steps_per_second": 4.789,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_compression-pairs_loss": 0.6035106778144836,
"eval_compression-pairs_runtime": 0.241,
"eval_compression-pairs_samples_per_second": 830.038,
"eval_compression-pairs_steps_per_second": 20.751,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_sciq_pairs_loss": 8.811105728149414,
"eval_sciq_pairs_runtime": 9.0357,
"eval_sciq_pairs_samples_per_second": 22.134,
"eval_sciq_pairs_steps_per_second": 0.553,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_qasc_pairs_loss": 1.615903377532959,
"eval_qasc_pairs_runtime": 1.214,
"eval_qasc_pairs_samples_per_second": 164.746,
"eval_qasc_pairs_steps_per_second": 4.119,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_openbookqa_pairs_loss": 3.4049320220947266,
"eval_openbookqa_pairs_runtime": 1.0554,
"eval_openbookqa_pairs_samples_per_second": 189.509,
"eval_openbookqa_pairs_steps_per_second": 4.738,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_msmarco_pairs_loss": 2.3909060955047607,
"eval_msmarco_pairs_runtime": 2.5301,
"eval_msmarco_pairs_samples_per_second": 79.048,
"eval_msmarco_pairs_steps_per_second": 1.976,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_nq_pairs_loss": 2.794445753097534,
"eval_nq_pairs_runtime": 5.6752,
"eval_nq_pairs_samples_per_second": 35.241,
"eval_nq_pairs_steps_per_second": 0.881,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_trivia_pairs_loss": 2.753361701965332,
"eval_trivia_pairs_runtime": 9.0766,
"eval_trivia_pairs_samples_per_second": 22.035,
"eval_trivia_pairs_steps_per_second": 0.551,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_quora_pairs_loss": 0.205492302775383,
"eval_quora_pairs_runtime": 0.6182,
"eval_quora_pairs_samples_per_second": 323.536,
"eval_quora_pairs_steps_per_second": 8.088,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_gooaq_pairs_loss": 2.038878917694092,
"eval_gooaq_pairs_runtime": 1.5488,
"eval_gooaq_pairs_samples_per_second": 129.134,
"eval_gooaq_pairs_steps_per_second": 3.228,
"step": 2874
},
{
"epoch": 0.45075282308657466,
"eval_mrpc_pairs_loss": 0.3230588436126709,
"eval_mrpc_pairs_runtime": 0.2358,
"eval_mrpc_pairs_samples_per_second": 848.229,
"eval_mrpc_pairs_steps_per_second": 21.206,
"step": 2874
},
{
"epoch": 0.451693851944793,
"grad_norm": 5.271574020385742,
"learning_rate": 2.103513174404015e-05,
"loss": 2.6918,
"step": 2880
},
{
"epoch": 0.4592220828105395,
"grad_norm": 21.954103469848633,
"learning_rate": 2.1386449184441654e-05,
"loss": 2.8354,
"step": 2928
},
{
"epoch": 0.46675031367628605,
"grad_norm": 28.671293258666992,
"learning_rate": 2.173776662484316e-05,
"loss": 2.9499,
"step": 2976
},
{
"epoch": 0.4742785445420326,
"grad_norm": 26.562397003173828,
"learning_rate": 2.2089084065244666e-05,
"loss": 2.6211,
"step": 3024
},
{
"epoch": 0.48180677540777916,
"grad_norm": 48.511756896972656,
"learning_rate": 2.2440401505646173e-05,
"loss": 3.3356,
"step": 3072
},
{
"epoch": 0.4893350062735257,
"grad_norm": 46.71563720703125,
"learning_rate": 2.2791718946047677e-05,
"loss": 2.846,
"step": 3120
},
{
"epoch": 0.4968632371392723,
"grad_norm": 24.524322509765625,
"learning_rate": 2.3143036386449184e-05,
"loss": 2.4866,
"step": 3168
},
{
"epoch": 0.5043914680050188,
"grad_norm": 85.22843933105469,
"learning_rate": 2.3494353826850688e-05,
"loss": 2.6334,
"step": 3216
},
{
"epoch": 0.5119196988707654,
"grad_norm": 28.435443878173828,
"learning_rate": 2.384567126725219e-05,
"loss": 2.6118,
"step": 3264
},
{
"epoch": 0.5194479297365119,
"grad_norm": 21.590103149414062,
"learning_rate": 2.41969887076537e-05,
"loss": 2.5833,
"step": 3312
},
{
"epoch": 0.5258782936010038,
"eval_nli-pairs_loss": 2.0752949714660645,
"eval_nli-pairs_runtime": 4.0304,
"eval_nli-pairs_samples_per_second": 49.623,
"eval_nli-pairs_steps_per_second": 1.241,
"eval_sts-test_pearson_cosine": 0.7401847199967786,
"eval_sts-test_pearson_dot": 0.5441501995975192,
"eval_sts-test_pearson_euclidean": 0.7344996320188322,
"eval_sts-test_pearson_manhattan": 0.7394640598472787,
"eval_sts-test_pearson_max": 0.7401847199967786,
"eval_sts-test_spearman_cosine": 0.7300085598018916,
"eval_sts-test_spearman_dot": 0.5241747185593542,
"eval_sts-test_spearman_euclidean": 0.7194131601167465,
"eval_sts-test_spearman_manhattan": 0.726961581928453,
"eval_sts-test_spearman_max": 0.7300085598018916,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_vitaminc-pairs_loss": 6.441956996917725,
"eval_vitaminc-pairs_runtime": 1.4416,
"eval_vitaminc-pairs_samples_per_second": 115.149,
"eval_vitaminc-pairs_steps_per_second": 2.775,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_sts-label_loss": 4.200085639953613,
"eval_sts-label_runtime": 0.3949,
"eval_sts-label_samples_per_second": 506.436,
"eval_sts-label_steps_per_second": 12.661,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_qnli-contrastive_loss": 0.5195684432983398,
"eval_qnli-contrastive_runtime": 0.2809,
"eval_qnli-contrastive_samples_per_second": 712.107,
"eval_qnli-contrastive_steps_per_second": 17.803,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_scitail-pairs-qa_loss": 0.35189124941825867,
"eval_scitail-pairs-qa_runtime": 1.0578,
"eval_scitail-pairs-qa_samples_per_second": 189.064,
"eval_scitail-pairs-qa_steps_per_second": 4.727,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_scitail-pairs-pos_loss": 0.8873756527900696,
"eval_scitail-pairs-pos_runtime": 2.4029,
"eval_scitail-pairs-pos_samples_per_second": 83.232,
"eval_scitail-pairs-pos_steps_per_second": 2.081,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_xsum-pairs_loss": 0.939339280128479,
"eval_xsum-pairs_runtime": 1.041,
"eval_xsum-pairs_samples_per_second": 192.121,
"eval_xsum-pairs_steps_per_second": 4.803,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_compression-pairs_loss": 0.5007131695747375,
"eval_compression-pairs_runtime": 0.2338,
"eval_compression-pairs_samples_per_second": 855.479,
"eval_compression-pairs_steps_per_second": 21.387,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_sciq_pairs_loss": 8.558987617492676,
"eval_sciq_pairs_runtime": 9.0984,
"eval_sciq_pairs_samples_per_second": 21.982,
"eval_sciq_pairs_steps_per_second": 0.55,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_qasc_pairs_loss": 1.4318852424621582,
"eval_qasc_pairs_runtime": 1.2286,
"eval_qasc_pairs_samples_per_second": 162.79,
"eval_qasc_pairs_steps_per_second": 4.07,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_openbookqa_pairs_loss": 3.1973114013671875,
"eval_openbookqa_pairs_runtime": 1.0491,
"eval_openbookqa_pairs_samples_per_second": 190.633,
"eval_openbookqa_pairs_steps_per_second": 4.766,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_msmarco_pairs_loss": 2.2080254554748535,
"eval_msmarco_pairs_runtime": 2.5223,
"eval_msmarco_pairs_samples_per_second": 79.294,
"eval_msmarco_pairs_steps_per_second": 1.982,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_nq_pairs_loss": 2.5810558795928955,
"eval_nq_pairs_runtime": 5.6341,
"eval_nq_pairs_samples_per_second": 35.498,
"eval_nq_pairs_steps_per_second": 0.887,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_trivia_pairs_loss": 2.655771255493164,
"eval_trivia_pairs_runtime": 9.0716,
"eval_trivia_pairs_samples_per_second": 22.047,
"eval_trivia_pairs_steps_per_second": 0.551,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_quora_pairs_loss": 0.5028819441795349,
"eval_quora_pairs_runtime": 0.6144,
"eval_quora_pairs_samples_per_second": 325.522,
"eval_quora_pairs_steps_per_second": 8.138,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_gooaq_pairs_loss": 1.8867437839508057,
"eval_gooaq_pairs_runtime": 1.5505,
"eval_gooaq_pairs_samples_per_second": 128.994,
"eval_gooaq_pairs_steps_per_second": 3.225,
"step": 3353
},
{
"epoch": 0.5258782936010038,
"eval_mrpc_pairs_loss": 0.2580638825893402,
"eval_mrpc_pairs_runtime": 0.2364,
"eval_mrpc_pairs_samples_per_second": 846.008,
"eval_mrpc_pairs_steps_per_second": 21.15,
"step": 3353
},
{
"epoch": 0.5269761606022585,
"grad_norm": 2.6962711811065674,
"learning_rate": 2.4548306148055206e-05,
"loss": 2.3251,
"step": 3360
},
{
"epoch": 0.534504391468005,
"grad_norm": 35.47948455810547,
"learning_rate": 2.489962358845671e-05,
"loss": 2.8494,
"step": 3408
},
{
"epoch": 0.5420326223337516,
"grad_norm": 8.13453483581543,
"learning_rate": 2.5250941028858214e-05,
"loss": 2.4009,
"step": 3456
},
{
"epoch": 0.5495608531994981,
"grad_norm": 20.041057586669922,
"learning_rate": 2.560225846925972e-05,
"loss": 2.5952,
"step": 3504
},
{
"epoch": 0.5570890840652447,
"grad_norm": 23.942073822021484,
"learning_rate": 2.595357590966123e-05,
"loss": 2.2798,
"step": 3552
},
{
"epoch": 0.5646173149309912,
"grad_norm": 17.675006866455078,
"learning_rate": 2.6304893350062732e-05,
"loss": 2.308,
"step": 3600
},
{
"epoch": 0.5721455457967378,
"grad_norm": 24.20000457763672,
"learning_rate": 2.6656210790464236e-05,
"loss": 2.122,
"step": 3648
},
{
"epoch": 0.5796737766624843,
"grad_norm": 30.06256866455078,
"learning_rate": 2.7007528230865747e-05,
"loss": 2.7901,
"step": 3696
},
{
"epoch": 0.5872020075282308,
"grad_norm": 22.547115325927734,
"learning_rate": 2.735884567126725e-05,
"loss": 2.0671,
"step": 3744
},
{
"epoch": 0.5947302383939774,
"grad_norm": 34.11716079711914,
"learning_rate": 2.7710163111668754e-05,
"loss": 2.366,
"step": 3792
},
{
"epoch": 0.6010037641154329,
"eval_nli-pairs_loss": 1.8900150060653687,
"eval_nli-pairs_runtime": 4.0481,
"eval_nli-pairs_samples_per_second": 49.406,
"eval_nli-pairs_steps_per_second": 1.235,
"eval_sts-test_pearson_cosine": 0.752143976340549,
"eval_sts-test_pearson_dot": 0.5694102087200895,
"eval_sts-test_pearson_euclidean": 0.7457585181878474,
"eval_sts-test_pearson_manhattan": 0.7525316002813096,
"eval_sts-test_pearson_max": 0.7525316002813096,
"eval_sts-test_spearman_cosine": 0.7404216272264129,
"eval_sts-test_spearman_dot": 0.5485789739808921,
"eval_sts-test_spearman_euclidean": 0.728675089641457,
"eval_sts-test_spearman_manhattan": 0.7367562035227414,
"eval_sts-test_spearman_max": 0.7404216272264129,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_vitaminc-pairs_loss": 6.0831098556518555,
"eval_vitaminc-pairs_runtime": 1.4528,
"eval_vitaminc-pairs_samples_per_second": 114.264,
"eval_vitaminc-pairs_steps_per_second": 2.753,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_sts-label_loss": 4.197264671325684,
"eval_sts-label_runtime": 0.4176,
"eval_sts-label_samples_per_second": 478.893,
"eval_sts-label_steps_per_second": 11.972,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_qnli-contrastive_loss": 0.5115653872489929,
"eval_qnli-contrastive_runtime": 0.3027,
"eval_qnli-contrastive_samples_per_second": 660.784,
"eval_qnli-contrastive_steps_per_second": 16.52,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_scitail-pairs-qa_loss": 0.29788386821746826,
"eval_scitail-pairs-qa_runtime": 1.0654,
"eval_scitail-pairs-qa_samples_per_second": 187.719,
"eval_scitail-pairs-qa_steps_per_second": 4.693,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_scitail-pairs-pos_loss": 0.8727617859840393,
"eval_scitail-pairs-pos_runtime": 2.3677,
"eval_scitail-pairs-pos_samples_per_second": 84.471,
"eval_scitail-pairs-pos_steps_per_second": 2.112,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_xsum-pairs_loss": 0.8608022928237915,
"eval_xsum-pairs_runtime": 1.0435,
"eval_xsum-pairs_samples_per_second": 191.671,
"eval_xsum-pairs_steps_per_second": 4.792,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_compression-pairs_loss": 0.4411359429359436,
"eval_compression-pairs_runtime": 0.2345,
"eval_compression-pairs_samples_per_second": 852.821,
"eval_compression-pairs_steps_per_second": 21.321,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_sciq_pairs_loss": 8.294719696044922,
"eval_sciq_pairs_runtime": 9.2141,
"eval_sciq_pairs_samples_per_second": 21.706,
"eval_sciq_pairs_steps_per_second": 0.543,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_qasc_pairs_loss": 1.1894803047180176,
"eval_qasc_pairs_runtime": 1.2518,
"eval_qasc_pairs_samples_per_second": 159.774,
"eval_qasc_pairs_steps_per_second": 3.994,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_openbookqa_pairs_loss": 2.8579885959625244,
"eval_openbookqa_pairs_runtime": 1.0874,
"eval_openbookqa_pairs_samples_per_second": 183.92,
"eval_openbookqa_pairs_steps_per_second": 4.598,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_msmarco_pairs_loss": 1.9733755588531494,
"eval_msmarco_pairs_runtime": 2.5486,
"eval_msmarco_pairs_samples_per_second": 78.476,
"eval_msmarco_pairs_steps_per_second": 1.962,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_nq_pairs_loss": 2.206907033920288,
"eval_nq_pairs_runtime": 5.7528,
"eval_nq_pairs_samples_per_second": 34.766,
"eval_nq_pairs_steps_per_second": 0.869,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_trivia_pairs_loss": 2.332620620727539,
"eval_trivia_pairs_runtime": 9.1703,
"eval_trivia_pairs_samples_per_second": 21.809,
"eval_trivia_pairs_steps_per_second": 0.545,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_quora_pairs_loss": 0.48870089650154114,
"eval_quora_pairs_runtime": 0.6491,
"eval_quora_pairs_samples_per_second": 308.142,
"eval_quora_pairs_steps_per_second": 7.704,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_gooaq_pairs_loss": 1.598087191581726,
"eval_gooaq_pairs_runtime": 1.5759,
"eval_gooaq_pairs_samples_per_second": 126.912,
"eval_gooaq_pairs_steps_per_second": 3.173,
"step": 3832
},
{
"epoch": 0.6010037641154329,
"eval_mrpc_pairs_loss": 0.2343733161687851,
"eval_mrpc_pairs_runtime": 0.2484,
"eval_mrpc_pairs_samples_per_second": 805.097,
"eval_mrpc_pairs_steps_per_second": 20.127,
"step": 3832
},
{
"epoch": 0.6022584692597239,
"grad_norm": 1.486786127090454,
"learning_rate": 2.806148055207026e-05,
"loss": 1.9614,
"step": 3840
},
{
"epoch": 0.6097867001254705,
"grad_norm": 23.297300338745117,
"learning_rate": 2.841279799247177e-05,
"loss": 2.3589,
"step": 3888
},
{
"epoch": 0.617314930991217,
"grad_norm": 16.00516700744629,
"learning_rate": 2.8764115432873273e-05,
"loss": 2.1475,
"step": 3936
},
{
"epoch": 0.6248431618569636,
"grad_norm": 24.357616424560547,
"learning_rate": 2.9115432873274777e-05,
"loss": 2.1312,
"step": 3984
},
{
"epoch": 0.6323713927227101,
"grad_norm": 28.798917770385742,
"learning_rate": 2.946675031367628e-05,
"loss": 2.5716,
"step": 4032
},
{
"epoch": 0.6398996235884568,
"grad_norm": 18.239490509033203,
"learning_rate": 2.981806775407779e-05,
"loss": 2.2249,
"step": 4080
},
{
"epoch": 0.6474278544542033,
"grad_norm": 19.50409507751465,
"learning_rate": 3.0169385194479295e-05,
"loss": 2.6331,
"step": 4128
},
{
"epoch": 0.6549560853199499,
"grad_norm": 12.110575675964355,
"learning_rate": 3.05207026348808e-05,
"loss": 2.7637,
"step": 4176
},
{
"epoch": 0.6624843161856964,
"grad_norm": 6.904999256134033,
"learning_rate": 3.087202007528231e-05,
"loss": 1.8973,
"step": 4224
},
{
"epoch": 0.6700125470514429,
"grad_norm": 9.007365226745605,
"learning_rate": 3.1223337515683813e-05,
"loss": 2.3181,
"step": 4272
},
{
"epoch": 0.676129234629862,
"eval_nli-pairs_loss": 1.7111084461212158,
"eval_nli-pairs_runtime": 4.0305,
"eval_nli-pairs_samples_per_second": 49.622,
"eval_nli-pairs_steps_per_second": 1.241,
"eval_sts-test_pearson_cosine": 0.7375865838793885,
"eval_sts-test_pearson_dot": 0.5355907015359193,
"eval_sts-test_pearson_euclidean": 0.7266850031847317,
"eval_sts-test_pearson_manhattan": 0.7357621558005936,
"eval_sts-test_pearson_max": 0.7375865838793885,
"eval_sts-test_spearman_cosine": 0.7273524041973777,
"eval_sts-test_spearman_dot": 0.5084902224306463,
"eval_sts-test_spearman_euclidean": 0.7071419579928555,
"eval_sts-test_spearman_manhattan": 0.7177664681655631,
"eval_sts-test_spearman_max": 0.7273524041973777,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_vitaminc-pairs_loss": 6.216845989227295,
"eval_vitaminc-pairs_runtime": 1.4703,
"eval_vitaminc-pairs_samples_per_second": 112.902,
"eval_vitaminc-pairs_steps_per_second": 2.721,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_sts-label_loss": 4.384557723999023,
"eval_sts-label_runtime": 0.3912,
"eval_sts-label_samples_per_second": 511.22,
"eval_sts-label_steps_per_second": 12.78,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_qnli-contrastive_loss": 0.40437957644462585,
"eval_qnli-contrastive_runtime": 0.281,
"eval_qnli-contrastive_samples_per_second": 711.864,
"eval_qnli-contrastive_steps_per_second": 17.797,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_scitail-pairs-qa_loss": 0.2210184931755066,
"eval_scitail-pairs-qa_runtime": 1.0575,
"eval_scitail-pairs-qa_samples_per_second": 189.117,
"eval_scitail-pairs-qa_steps_per_second": 4.728,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_scitail-pairs-pos_loss": 0.9065079689025879,
"eval_scitail-pairs-pos_runtime": 2.3488,
"eval_scitail-pairs-pos_samples_per_second": 85.151,
"eval_scitail-pairs-pos_steps_per_second": 2.129,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_xsum-pairs_loss": 0.8169436454772949,
"eval_xsum-pairs_runtime": 1.0409,
"eval_xsum-pairs_samples_per_second": 192.145,
"eval_xsum-pairs_steps_per_second": 4.804,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_compression-pairs_loss": 0.391815721988678,
"eval_compression-pairs_runtime": 0.2361,
"eval_compression-pairs_samples_per_second": 847.066,
"eval_compression-pairs_steps_per_second": 21.177,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_sciq_pairs_loss": 0.6230970025062561,
"eval_sciq_pairs_runtime": 9.0874,
"eval_sciq_pairs_samples_per_second": 22.008,
"eval_sciq_pairs_steps_per_second": 0.55,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_qasc_pairs_loss": 1.1559942960739136,
"eval_qasc_pairs_runtime": 1.2507,
"eval_qasc_pairs_samples_per_second": 159.907,
"eval_qasc_pairs_steps_per_second": 3.998,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_openbookqa_pairs_loss": 2.8303356170654297,
"eval_openbookqa_pairs_runtime": 1.0524,
"eval_openbookqa_pairs_samples_per_second": 190.041,
"eval_openbookqa_pairs_steps_per_second": 4.751,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_msmarco_pairs_loss": 1.9672399759292603,
"eval_msmarco_pairs_runtime": 2.518,
"eval_msmarco_pairs_samples_per_second": 79.428,
"eval_msmarco_pairs_steps_per_second": 1.986,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_nq_pairs_loss": 2.169950008392334,
"eval_nq_pairs_runtime": 5.6541,
"eval_nq_pairs_samples_per_second": 35.372,
"eval_nq_pairs_steps_per_second": 0.884,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_trivia_pairs_loss": 2.198312520980835,
"eval_trivia_pairs_runtime": 9.0535,
"eval_trivia_pairs_samples_per_second": 22.091,
"eval_trivia_pairs_steps_per_second": 0.552,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_quora_pairs_loss": 0.3780948519706726,
"eval_quora_pairs_runtime": 0.6375,
"eval_quora_pairs_samples_per_second": 313.737,
"eval_quora_pairs_steps_per_second": 7.843,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_gooaq_pairs_loss": 1.5646275281906128,
"eval_gooaq_pairs_runtime": 1.5531,
"eval_gooaq_pairs_samples_per_second": 128.772,
"eval_gooaq_pairs_steps_per_second": 3.219,
"step": 4311
},
{
"epoch": 0.676129234629862,
"eval_mrpc_pairs_loss": 0.18545588850975037,
"eval_mrpc_pairs_runtime": 0.2382,
"eval_mrpc_pairs_samples_per_second": 839.569,
"eval_mrpc_pairs_steps_per_second": 20.989,
"step": 4311
},
{
"epoch": 0.6775407779171895,
"grad_norm": 15.639892578125,
"learning_rate": 3.157465495608532e-05,
"loss": 2.2329,
"step": 4320
},
{
"epoch": 0.685069008782936,
"grad_norm": 7.185269832611084,
"learning_rate": 3.192597239648682e-05,
"loss": 2.7864,
"step": 4368
},
{
"epoch": 0.6925972396486826,
"grad_norm": 10.165898323059082,
"learning_rate": 3.227728983688833e-05,
"loss": 2.5277,
"step": 4416
},
{
"epoch": 0.7001254705144291,
"grad_norm": 20.132612228393555,
"learning_rate": 3.2628607277289836e-05,
"loss": 2.526,
"step": 4464
},
{
"epoch": 0.7076537013801757,
"grad_norm": 6.147126197814941,
"learning_rate": 3.297992471769134e-05,
"loss": 1.5993,
"step": 4512
},
{
"epoch": 0.7151819322459222,
"grad_norm": 9.857342720031738,
"learning_rate": 3.3331242158092843e-05,
"loss": 2.0452,
"step": 4560
},
{
"epoch": 0.7227101631116688,
"grad_norm": 22.434364318847656,
"learning_rate": 3.3682559598494354e-05,
"loss": 2.0458,
"step": 4608
},
{
"epoch": 0.7302383939774153,
"grad_norm": 22.420066833496094,
"learning_rate": 3.403387703889586e-05,
"loss": 1.854,
"step": 4656
},
{
"epoch": 0.7377666248431619,
"grad_norm": 24.213205337524414,
"learning_rate": 3.438519447929736e-05,
"loss": 2.31,
"step": 4704
},
{
"epoch": 0.7452948557089084,
"grad_norm": 2.1184492111206055,
"learning_rate": 3.473651191969887e-05,
"loss": 1.7484,
"step": 4752
},
{
"epoch": 0.751254705144291,
"eval_nli-pairs_loss": 1.5204579830169678,
"eval_nli-pairs_runtime": 4.3227,
"eval_nli-pairs_samples_per_second": 46.267,
"eval_nli-pairs_steps_per_second": 1.157,
"eval_sts-test_pearson_cosine": 0.753550468294361,
"eval_sts-test_pearson_dot": 0.576164453162354,
"eval_sts-test_pearson_euclidean": 0.7433413992355353,
"eval_sts-test_pearson_manhattan": 0.7483177470711824,
"eval_sts-test_pearson_max": 0.753550468294361,
"eval_sts-test_spearman_cosine": 0.7510075785449373,
"eval_sts-test_spearman_dot": 0.5438417987754244,
"eval_sts-test_spearman_euclidean": 0.7271758422639625,
"eval_sts-test_spearman_manhattan": 0.7334567781451864,
"eval_sts-test_spearman_max": 0.7510075785449373,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_vitaminc-pairs_loss": 5.992164134979248,
"eval_vitaminc-pairs_runtime": 1.4716,
"eval_vitaminc-pairs_samples_per_second": 112.802,
"eval_vitaminc-pairs_steps_per_second": 2.718,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_sts-label_loss": 4.175446033477783,
"eval_sts-label_runtime": 0.3991,
"eval_sts-label_samples_per_second": 501.105,
"eval_sts-label_steps_per_second": 12.528,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_qnli-contrastive_loss": 0.4543713629245758,
"eval_qnli-contrastive_runtime": 0.2765,
"eval_qnli-contrastive_samples_per_second": 723.231,
"eval_qnli-contrastive_steps_per_second": 18.081,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_scitail-pairs-qa_loss": 0.20784999430179596,
"eval_scitail-pairs-qa_runtime": 1.0567,
"eval_scitail-pairs-qa_samples_per_second": 189.26,
"eval_scitail-pairs-qa_steps_per_second": 4.732,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_scitail-pairs-pos_loss": 0.8679056167602539,
"eval_scitail-pairs-pos_runtime": 2.3456,
"eval_scitail-pairs-pos_samples_per_second": 85.268,
"eval_scitail-pairs-pos_steps_per_second": 2.132,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_xsum-pairs_loss": 0.6944636702537537,
"eval_xsum-pairs_runtime": 1.0379,
"eval_xsum-pairs_samples_per_second": 192.694,
"eval_xsum-pairs_steps_per_second": 4.817,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_compression-pairs_loss": 0.34194332361221313,
"eval_compression-pairs_runtime": 0.2354,
"eval_compression-pairs_samples_per_second": 849.798,
"eval_compression-pairs_steps_per_second": 21.245,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_sciq_pairs_loss": 7.352969646453857,
"eval_sciq_pairs_runtime": 9.0502,
"eval_sciq_pairs_samples_per_second": 22.099,
"eval_sciq_pairs_steps_per_second": 0.552,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_qasc_pairs_loss": 0.9425787329673767,
"eval_qasc_pairs_runtime": 1.2081,
"eval_qasc_pairs_samples_per_second": 165.543,
"eval_qasc_pairs_steps_per_second": 4.139,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_openbookqa_pairs_loss": 2.5082011222839355,
"eval_openbookqa_pairs_runtime": 1.0492,
"eval_openbookqa_pairs_samples_per_second": 190.618,
"eval_openbookqa_pairs_steps_per_second": 4.765,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_msmarco_pairs_loss": 1.696744441986084,
"eval_msmarco_pairs_runtime": 2.5162,
"eval_msmarco_pairs_samples_per_second": 79.484,
"eval_msmarco_pairs_steps_per_second": 1.987,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_nq_pairs_loss": 1.8095602989196777,
"eval_nq_pairs_runtime": 5.6348,
"eval_nq_pairs_samples_per_second": 35.494,
"eval_nq_pairs_steps_per_second": 0.887,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_trivia_pairs_loss": 2.0272486209869385,
"eval_trivia_pairs_runtime": 9.0671,
"eval_trivia_pairs_samples_per_second": 22.058,
"eval_trivia_pairs_steps_per_second": 0.551,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_quora_pairs_loss": 0.15705542266368866,
"eval_quora_pairs_runtime": 0.6326,
"eval_quora_pairs_samples_per_second": 316.138,
"eval_quora_pairs_steps_per_second": 7.903,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_gooaq_pairs_loss": 1.374332308769226,
"eval_gooaq_pairs_runtime": 1.5484,
"eval_gooaq_pairs_samples_per_second": 129.164,
"eval_gooaq_pairs_steps_per_second": 3.229,
"step": 4790
},
{
"epoch": 0.751254705144291,
"eval_mrpc_pairs_loss": 0.17204828560352325,
"eval_mrpc_pairs_runtime": 0.2358,
"eval_mrpc_pairs_samples_per_second": 848.332,
"eval_mrpc_pairs_steps_per_second": 21.208,
"step": 4790
},
{
"epoch": 0.7528230865746549,
"grad_norm": 1.4021190404891968,
"learning_rate": 3.4999758305020584e-05,
"loss": 1.7113,
"step": 4800
},
{
"epoch": 0.7603513174404015,
"grad_norm": 19.776817321777344,
"learning_rate": 3.499395795931671e-05,
"loss": 2.3696,
"step": 4848
},
{
"epoch": 0.767879548306148,
"grad_norm": 28.693845748901367,
"learning_rate": 3.49804263115427e-05,
"loss": 2.2947,
"step": 4896
},
{
"epoch": 0.7754077791718946,
"grad_norm": 1.3631008863449097,
"learning_rate": 3.495916934189221e-05,
"loss": 1.8841,
"step": 4944
},
{
"epoch": 0.7829360100376411,
"grad_norm": 40.640262603759766,
"learning_rate": 3.4930196444697477e-05,
"loss": 2.084,
"step": 4992
},
{
"epoch": 0.7904642409033877,
"grad_norm": 20.45759391784668,
"learning_rate": 3.489352042427762e-05,
"loss": 1.9297,
"step": 5040
},
{
"epoch": 0.7979924717691342,
"grad_norm": 24.276058197021484,
"learning_rate": 3.484915748927982e-05,
"loss": 2.0521,
"step": 5088
},
{
"epoch": 0.8055207026348808,
"grad_norm": 24.93791389465332,
"learning_rate": 3.4797127245516105e-05,
"loss": 1.7092,
"step": 5136
},
{
"epoch": 0.8130489335006273,
"grad_norm": 25.131153106689453,
"learning_rate": 3.4737452687298694e-05,
"loss": 1.7394,
"step": 5184
},
{
"epoch": 0.820577164366374,
"grad_norm": 3.779459238052368,
"learning_rate": 3.467016018727788e-05,
"loss": 2.567,
"step": 5232
},
{
"epoch": 0.8263801756587202,
"eval_nli-pairs_loss": 1.4584167003631592,
"eval_nli-pairs_runtime": 3.9955,
"eval_nli-pairs_samples_per_second": 50.056,
"eval_nli-pairs_steps_per_second": 1.251,
"eval_sts-test_pearson_cosine": 0.7484577894142428,
"eval_sts-test_pearson_dot": 0.5289676422936789,
"eval_sts-test_pearson_euclidean": 0.743677607180833,
"eval_sts-test_pearson_manhattan": 0.7474581577502462,
"eval_sts-test_pearson_max": 0.7484577894142428,
"eval_sts-test_spearman_cosine": 0.7507798204197761,
"eval_sts-test_spearman_dot": 0.5016451185199292,
"eval_sts-test_spearman_euclidean": 0.7307379850546868,
"eval_sts-test_spearman_manhattan": 0.7367432097081014,
"eval_sts-test_spearman_max": 0.7507798204197761,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_vitaminc-pairs_loss": 5.56383752822876,
"eval_vitaminc-pairs_runtime": 1.4339,
"eval_vitaminc-pairs_samples_per_second": 115.772,
"eval_vitaminc-pairs_steps_per_second": 2.79,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_sts-label_loss": 4.355674743652344,
"eval_sts-label_runtime": 0.401,
"eval_sts-label_samples_per_second": 498.723,
"eval_sts-label_steps_per_second": 12.468,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_qnli-contrastive_loss": 0.2912294566631317,
"eval_qnli-contrastive_runtime": 0.2801,
"eval_qnli-contrastive_samples_per_second": 714.044,
"eval_qnli-contrastive_steps_per_second": 17.851,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_scitail-pairs-qa_loss": 0.19145721197128296,
"eval_scitail-pairs-qa_runtime": 1.0728,
"eval_scitail-pairs-qa_samples_per_second": 186.429,
"eval_scitail-pairs-qa_steps_per_second": 4.661,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_scitail-pairs-pos_loss": 0.7433645725250244,
"eval_scitail-pairs-pos_runtime": 2.3785,
"eval_scitail-pairs-pos_samples_per_second": 84.086,
"eval_scitail-pairs-pos_steps_per_second": 2.102,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_xsum-pairs_loss": 0.6145637631416321,
"eval_xsum-pairs_runtime": 1.0393,
"eval_xsum-pairs_samples_per_second": 192.428,
"eval_xsum-pairs_steps_per_second": 4.811,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_compression-pairs_loss": 0.29557526111602783,
"eval_compression-pairs_runtime": 0.2372,
"eval_compression-pairs_samples_per_second": 843.266,
"eval_compression-pairs_steps_per_second": 21.082,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_sciq_pairs_loss": 0.5607883930206299,
"eval_sciq_pairs_runtime": 9.0381,
"eval_sciq_pairs_samples_per_second": 22.128,
"eval_sciq_pairs_steps_per_second": 0.553,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_qasc_pairs_loss": 0.8776007294654846,
"eval_qasc_pairs_runtime": 1.2111,
"eval_qasc_pairs_samples_per_second": 165.141,
"eval_qasc_pairs_steps_per_second": 4.129,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_openbookqa_pairs_loss": 2.414658784866333,
"eval_openbookqa_pairs_runtime": 1.052,
"eval_openbookqa_pairs_samples_per_second": 190.106,
"eval_openbookqa_pairs_steps_per_second": 4.753,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_msmarco_pairs_loss": 1.615893840789795,
"eval_msmarco_pairs_runtime": 2.5183,
"eval_msmarco_pairs_samples_per_second": 79.419,
"eval_msmarco_pairs_steps_per_second": 1.985,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_nq_pairs_loss": 1.5788501501083374,
"eval_nq_pairs_runtime": 5.6429,
"eval_nq_pairs_samples_per_second": 35.443,
"eval_nq_pairs_steps_per_second": 0.886,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_trivia_pairs_loss": 1.8346160650253296,
"eval_trivia_pairs_runtime": 9.0522,
"eval_trivia_pairs_samples_per_second": 22.094,
"eval_trivia_pairs_steps_per_second": 0.552,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_quora_pairs_loss": 0.23968417942523956,
"eval_quora_pairs_runtime": 0.6179,
"eval_quora_pairs_samples_per_second": 323.693,
"eval_quora_pairs_steps_per_second": 8.092,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_gooaq_pairs_loss": 1.3264899253845215,
"eval_gooaq_pairs_runtime": 1.549,
"eval_gooaq_pairs_samples_per_second": 129.112,
"eval_gooaq_pairs_steps_per_second": 3.228,
"step": 5269
},
{
"epoch": 0.8263801756587202,
"eval_mrpc_pairs_loss": 0.14705294370651245,
"eval_mrpc_pairs_runtime": 0.2494,
"eval_mrpc_pairs_samples_per_second": 801.787,
"eval_mrpc_pairs_steps_per_second": 20.045,
"step": 5269
},
{
"epoch": 0.8281053952321205,
"grad_norm": 17.636714935302734,
"learning_rate": 3.459527948478686e-05,
"loss": 2.3021,
"step": 5280
},
{
"epoch": 0.835633626097867,
"grad_norm": 23.402650833129883,
"learning_rate": 3.4512843672698696e-05,
"loss": 1.5502,
"step": 5328
},
{
"epoch": 0.8431618569636136,
"grad_norm": 13.210539817810059,
"learning_rate": 3.4422889182801225e-05,
"loss": 1.7324,
"step": 5376
},
{
"epoch": 0.8506900878293601,
"grad_norm": 14.795612335205078,
"learning_rate": 3.4325455769696324e-05,
"loss": 1.8119,
"step": 5424
},
{
"epoch": 0.8582183186951067,
"grad_norm": 14.047534942626953,
"learning_rate": 3.422058649323072e-05,
"loss": 1.8507,
"step": 5472
},
{
"epoch": 0.8657465495608532,
"grad_norm": 0.7366377711296082,
"learning_rate": 3.4108327699466066e-05,
"loss": 1.7362,
"step": 5520
},
{
"epoch": 0.8732747804265998,
"grad_norm": 16.555519104003906,
"learning_rate": 3.398872900019673e-05,
"loss": 2.082,
"step": 5568
},
{
"epoch": 0.8808030112923463,
"grad_norm": 16.52071189880371,
"learning_rate": 3.386184325102423e-05,
"loss": 2.1483,
"step": 5616
},
{
"epoch": 0.8883312421580929,
"grad_norm": 16.03848648071289,
"learning_rate": 3.372772652799824e-05,
"loss": 1.3961,
"step": 5664
},
{
"epoch": 0.8958594730238394,
"grad_norm": 15.494946479797363,
"learning_rate": 3.358643810283421e-05,
"loss": 1.6331,
"step": 5712
},
{
"epoch": 0.9015056461731493,
"eval_nli-pairs_loss": 1.4454108476638794,
"eval_nli-pairs_runtime": 4.0041,
"eval_nli-pairs_samples_per_second": 49.949,
"eval_nli-pairs_steps_per_second": 1.249,
"eval_sts-test_pearson_cosine": 0.7644735043371528,
"eval_sts-test_pearson_dot": 0.5461512421131185,
"eval_sts-test_pearson_euclidean": 0.7520132891230207,
"eval_sts-test_pearson_manhattan": 0.7535418655995262,
"eval_sts-test_pearson_max": 0.7644735043371528,
"eval_sts-test_spearman_cosine": 0.7630644782411757,
"eval_sts-test_spearman_dot": 0.5239487411838791,
"eval_sts-test_spearman_euclidean": 0.7392793315112096,
"eval_sts-test_spearman_manhattan": 0.7426354353655322,
"eval_sts-test_spearman_max": 0.7630644782411757,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_vitaminc-pairs_loss": 5.328937530517578,
"eval_vitaminc-pairs_runtime": 1.4411,
"eval_vitaminc-pairs_samples_per_second": 115.19,
"eval_vitaminc-pairs_steps_per_second": 2.776,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_sts-label_loss": 4.186919212341309,
"eval_sts-label_runtime": 0.4046,
"eval_sts-label_samples_per_second": 494.357,
"eval_sts-label_steps_per_second": 12.359,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_qnli-contrastive_loss": 0.35569697618484497,
"eval_qnli-contrastive_runtime": 0.2807,
"eval_qnli-contrastive_samples_per_second": 712.531,
"eval_qnli-contrastive_steps_per_second": 17.813,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_scitail-pairs-qa_loss": 0.18789875507354736,
"eval_scitail-pairs-qa_runtime": 1.1135,
"eval_scitail-pairs-qa_samples_per_second": 179.622,
"eval_scitail-pairs-qa_steps_per_second": 4.491,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_scitail-pairs-pos_loss": 0.715129017829895,
"eval_scitail-pairs-pos_runtime": 2.3885,
"eval_scitail-pairs-pos_samples_per_second": 83.736,
"eval_scitail-pairs-pos_steps_per_second": 2.093,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_xsum-pairs_loss": 0.624691367149353,
"eval_xsum-pairs_runtime": 1.0408,
"eval_xsum-pairs_samples_per_second": 192.169,
"eval_xsum-pairs_steps_per_second": 4.804,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_compression-pairs_loss": 0.30371707677841187,
"eval_compression-pairs_runtime": 0.2581,
"eval_compression-pairs_samples_per_second": 774.8,
"eval_compression-pairs_steps_per_second": 19.37,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_sciq_pairs_loss": 0.5236299633979797,
"eval_sciq_pairs_runtime": 9.205,
"eval_sciq_pairs_samples_per_second": 21.727,
"eval_sciq_pairs_steps_per_second": 0.543,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_qasc_pairs_loss": 0.8543006777763367,
"eval_qasc_pairs_runtime": 1.238,
"eval_qasc_pairs_samples_per_second": 161.556,
"eval_qasc_pairs_steps_per_second": 4.039,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_openbookqa_pairs_loss": 2.3740031719207764,
"eval_openbookqa_pairs_runtime": 1.1145,
"eval_openbookqa_pairs_samples_per_second": 179.452,
"eval_openbookqa_pairs_steps_per_second": 4.486,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_msmarco_pairs_loss": 1.4328840970993042,
"eval_msmarco_pairs_runtime": 2.5725,
"eval_msmarco_pairs_samples_per_second": 77.746,
"eval_msmarco_pairs_steps_per_second": 1.944,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_nq_pairs_loss": 1.5343101024627686,
"eval_nq_pairs_runtime": 5.7416,
"eval_nq_pairs_samples_per_second": 34.833,
"eval_nq_pairs_steps_per_second": 0.871,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_trivia_pairs_loss": 1.7511711120605469,
"eval_trivia_pairs_runtime": 9.1035,
"eval_trivia_pairs_samples_per_second": 21.97,
"eval_trivia_pairs_steps_per_second": 0.549,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_quora_pairs_loss": 0.2826410233974457,
"eval_quora_pairs_runtime": 0.6444,
"eval_quora_pairs_samples_per_second": 310.354,
"eval_quora_pairs_steps_per_second": 7.759,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_gooaq_pairs_loss": 1.2128998041152954,
"eval_gooaq_pairs_runtime": 1.5919,
"eval_gooaq_pairs_samples_per_second": 125.634,
"eval_gooaq_pairs_steps_per_second": 3.141,
"step": 5748
},
{
"epoch": 0.9015056461731493,
"eval_mrpc_pairs_loss": 0.16468097269535065,
"eval_mrpc_pairs_runtime": 0.2496,
"eval_mrpc_pairs_samples_per_second": 801.385,
"eval_mrpc_pairs_steps_per_second": 20.035,
"step": 5748
},
{
"epoch": 0.903387703889586,
"grad_norm": 2.899136543273926,
"learning_rate": 3.3438040416718773e-05,
"loss": 1.9863,
"step": 5760
},
{
"epoch": 0.9109159347553325,
"grad_norm": 14.919694900512695,
"learning_rate": 3.3282599052714414e-05,
"loss": 1.6917,
"step": 5808
},
{
"epoch": 0.918444165621079,
"grad_norm": 1.626105785369873,
"learning_rate": 3.312018270677559e-05,
"loss": 1.7409,
"step": 5856
},
{
"epoch": 0.9259723964868256,
"grad_norm": 15.8577299118042,
"learning_rate": 3.295086315738918e-05,
"loss": 1.4397,
"step": 5904
},
{
"epoch": 0.9335006273525721,
"grad_norm": 10.255402565002441,
"learning_rate": 3.277471523385255e-05,
"loss": 1.2977,
"step": 5952
},
{
"epoch": 0.9410288582183187,
"grad_norm": 31.09028434753418,
"learning_rate": 3.259181678320349e-05,
"loss": 1.5429,
"step": 6000
},
{
"epoch": 0.9485570890840652,
"grad_norm": 15.244925498962402,
"learning_rate": 3.2402248635816294e-05,
"loss": 1.6939,
"step": 6048
},
{
"epoch": 0.9560853199498118,
"grad_norm": 19.404956817626953,
"learning_rate": 3.2206094569679564e-05,
"loss": 1.5573,
"step": 6096
},
{
"epoch": 0.9636135508155583,
"grad_norm": 16.607341766357422,
"learning_rate": 3.200344127337121e-05,
"loss": 1.5873,
"step": 6144
},
{
"epoch": 0.9711417816813049,
"grad_norm": 1.1432667970657349,
"learning_rate": 3.179437830774722e-05,
"loss": 1.8802,
"step": 6192
},
{
"epoch": 0.9766311166875784,
"eval_nli-pairs_loss": 1.3268091678619385,
"eval_nli-pairs_runtime": 4.0761,
"eval_nli-pairs_samples_per_second": 49.066,
"eval_nli-pairs_steps_per_second": 1.227,
"eval_sts-test_pearson_cosine": 0.7536875808596679,
"eval_sts-test_pearson_dot": 0.5240503602011544,
"eval_sts-test_pearson_euclidean": 0.7440772899082505,
"eval_sts-test_pearson_manhattan": 0.7465814532083983,
"eval_sts-test_pearson_max": 0.7536875808596679,
"eval_sts-test_spearman_cosine": 0.7584726286921011,
"eval_sts-test_spearman_dot": 0.49490205708473545,
"eval_sts-test_spearman_euclidean": 0.7307844981527315,
"eval_sts-test_spearman_manhattan": 0.7362283105144983,
"eval_sts-test_spearman_max": 0.7584726286921011,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_vitaminc-pairs_loss": 5.498671054840088,
"eval_vitaminc-pairs_runtime": 1.4379,
"eval_vitaminc-pairs_samples_per_second": 115.449,
"eval_vitaminc-pairs_steps_per_second": 2.782,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_sts-label_loss": 4.3516716957092285,
"eval_sts-label_runtime": 0.3976,
"eval_sts-label_samples_per_second": 503.027,
"eval_sts-label_steps_per_second": 12.576,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_qnli-contrastive_loss": 0.30157506465911865,
"eval_qnli-contrastive_runtime": 0.2787,
"eval_qnli-contrastive_samples_per_second": 717.742,
"eval_qnli-contrastive_steps_per_second": 17.944,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_scitail-pairs-qa_loss": 0.14389516413211823,
"eval_scitail-pairs-qa_runtime": 1.0465,
"eval_scitail-pairs-qa_samples_per_second": 191.121,
"eval_scitail-pairs-qa_steps_per_second": 4.778,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_scitail-pairs-pos_loss": 0.6142529249191284,
"eval_scitail-pairs-pos_runtime": 2.3525,
"eval_scitail-pairs-pos_samples_per_second": 85.018,
"eval_scitail-pairs-pos_steps_per_second": 2.125,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_xsum-pairs_loss": 0.5579215288162231,
"eval_xsum-pairs_runtime": 1.0381,
"eval_xsum-pairs_samples_per_second": 192.655,
"eval_xsum-pairs_steps_per_second": 4.816,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_compression-pairs_loss": 0.2538767158985138,
"eval_compression-pairs_runtime": 0.2367,
"eval_compression-pairs_samples_per_second": 844.99,
"eval_compression-pairs_steps_per_second": 21.125,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_sciq_pairs_loss": 0.43967145681381226,
"eval_sciq_pairs_runtime": 9.0489,
"eval_sciq_pairs_samples_per_second": 22.102,
"eval_sciq_pairs_steps_per_second": 0.553,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_qasc_pairs_loss": 0.7632485032081604,
"eval_qasc_pairs_runtime": 1.2216,
"eval_qasc_pairs_samples_per_second": 163.713,
"eval_qasc_pairs_steps_per_second": 4.093,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_openbookqa_pairs_loss": 2.370297908782959,
"eval_openbookqa_pairs_runtime": 1.051,
"eval_openbookqa_pairs_samples_per_second": 190.297,
"eval_openbookqa_pairs_steps_per_second": 4.757,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_msmarco_pairs_loss": 1.4191588163375854,
"eval_msmarco_pairs_runtime": 2.5294,
"eval_msmarco_pairs_samples_per_second": 79.071,
"eval_msmarco_pairs_steps_per_second": 1.977,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_nq_pairs_loss": 1.3847519159317017,
"eval_nq_pairs_runtime": 5.6464,
"eval_nq_pairs_samples_per_second": 35.421,
"eval_nq_pairs_steps_per_second": 0.886,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_trivia_pairs_loss": 1.6301060914993286,
"eval_trivia_pairs_runtime": 9.06,
"eval_trivia_pairs_samples_per_second": 22.075,
"eval_trivia_pairs_steps_per_second": 0.552,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_quora_pairs_loss": 0.25898078083992004,
"eval_quora_pairs_runtime": 0.6161,
"eval_quora_pairs_samples_per_second": 324.63,
"eval_quora_pairs_steps_per_second": 8.116,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_gooaq_pairs_loss": 1.1585972309112549,
"eval_gooaq_pairs_runtime": 1.547,
"eval_gooaq_pairs_samples_per_second": 129.286,
"eval_gooaq_pairs_steps_per_second": 3.232,
"step": 6227
},
{
"epoch": 0.9766311166875784,
"eval_mrpc_pairs_loss": 0.12749388813972473,
"eval_mrpc_pairs_runtime": 0.243,
"eval_mrpc_pairs_samples_per_second": 822.939,
"eval_mrpc_pairs_steps_per_second": 20.573,
"step": 6227
},
{
"epoch": 0.9786700125470514,
"grad_norm": 20.923500061035156,
"learning_rate": 3.157899806636098e-05,
"loss": 1.9813,
"step": 6240
},
{
"epoch": 0.986198243412798,
"grad_norm": 24.244462966918945,
"learning_rate": 3.13573957346308e-05,
"loss": 2.2932,
"step": 6288
},
{
"epoch": 0.9937264742785445,
"grad_norm": 16.324562072753906,
"learning_rate": 3.112966924777352e-05,
"loss": 1.6308,
"step": 6336
},
{
"epoch": 1.001254705144291,
"grad_norm": 14.824076652526855,
"learning_rate": 3.0895919247522884e-05,
"loss": 1.497,
"step": 6384
},
{
"epoch": 1.0087829360100375,
"grad_norm": 18.46307373046875,
"learning_rate": 3.065624903765184e-05,
"loss": 1.758,
"step": 6432
},
{
"epoch": 1.0163111668757843,
"grad_norm": 16.654727935791016,
"learning_rate": 3.0410764538318303e-05,
"loss": 1.6188,
"step": 6480
},
{
"epoch": 1.0238393977415308,
"grad_norm": 2.175520896911621,
"learning_rate": 3.0159574239254692e-05,
"loss": 2.1126,
"step": 6528
},
{
"epoch": 1.0313676286072773,
"grad_norm": 20.126880645751953,
"learning_rate": 2.990278915182182e-05,
"loss": 1.6129,
"step": 6576
},
{
"epoch": 1.0388958594730238,
"grad_norm": 24.355375289916992,
"learning_rate": 2.964052275994841e-05,
"loss": 1.4584,
"step": 6624
},
{
"epoch": 1.0464240903387705,
"grad_norm": 11.541900634765625,
"learning_rate": 2.9372890969977852e-05,
"loss": 1.5975,
"step": 6672
},
{
"epoch": 1.0517565872020076,
"eval_nli-pairs_loss": 1.3095624446868896,
"eval_nli-pairs_runtime": 4.0202,
"eval_nli-pairs_samples_per_second": 49.749,
"eval_nli-pairs_steps_per_second": 1.244,
"eval_sts-test_pearson_cosine": 0.7778731779702309,
"eval_sts-test_pearson_dot": 0.5593371858287987,
"eval_sts-test_pearson_euclidean": 0.7587435793478869,
"eval_sts-test_pearson_manhattan": 0.7610239287161901,
"eval_sts-test_pearson_max": 0.7778731779702309,
"eval_sts-test_spearman_cosine": 0.7782906514134392,
"eval_sts-test_spearman_dot": 0.5371251012528334,
"eval_sts-test_spearman_euclidean": 0.7452348039865185,
"eval_sts-test_spearman_manhattan": 0.7492626260972672,
"eval_sts-test_spearman_max": 0.7782906514134392,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_vitaminc-pairs_loss": 5.074347496032715,
"eval_vitaminc-pairs_runtime": 1.4418,
"eval_vitaminc-pairs_samples_per_second": 115.134,
"eval_vitaminc-pairs_steps_per_second": 2.774,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_sts-label_loss": 4.242542266845703,
"eval_sts-label_runtime": 0.414,
"eval_sts-label_samples_per_second": 483.143,
"eval_sts-label_steps_per_second": 12.079,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_qnli-contrastive_loss": 0.26250946521759033,
"eval_qnli-contrastive_runtime": 0.2857,
"eval_qnli-contrastive_samples_per_second": 700.035,
"eval_qnli-contrastive_steps_per_second": 17.501,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_scitail-pairs-qa_loss": 0.15288515388965607,
"eval_scitail-pairs-qa_runtime": 1.1069,
"eval_scitail-pairs-qa_samples_per_second": 180.69,
"eval_scitail-pairs-qa_steps_per_second": 4.517,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_scitail-pairs-pos_loss": 0.5382486581802368,
"eval_scitail-pairs-pos_runtime": 2.3648,
"eval_scitail-pairs-pos_samples_per_second": 84.574,
"eval_scitail-pairs-pos_steps_per_second": 2.114,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_xsum-pairs_loss": 0.48308631777763367,
"eval_xsum-pairs_runtime": 1.0411,
"eval_xsum-pairs_samples_per_second": 192.11,
"eval_xsum-pairs_steps_per_second": 4.803,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_compression-pairs_loss": 0.23988038301467896,
"eval_compression-pairs_runtime": 0.2389,
"eval_compression-pairs_samples_per_second": 837.159,
"eval_compression-pairs_steps_per_second": 20.929,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_sciq_pairs_loss": 0.3883107304573059,
"eval_sciq_pairs_runtime": 9.1094,
"eval_sciq_pairs_samples_per_second": 21.955,
"eval_sciq_pairs_steps_per_second": 0.549,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_qasc_pairs_loss": 0.6684954762458801,
"eval_qasc_pairs_runtime": 1.2249,
"eval_qasc_pairs_samples_per_second": 163.273,
"eval_qasc_pairs_steps_per_second": 4.082,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_openbookqa_pairs_loss": 2.076920986175537,
"eval_openbookqa_pairs_runtime": 1.0566,
"eval_openbookqa_pairs_samples_per_second": 189.291,
"eval_openbookqa_pairs_steps_per_second": 4.732,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_msmarco_pairs_loss": 1.3436123132705688,
"eval_msmarco_pairs_runtime": 2.5384,
"eval_msmarco_pairs_samples_per_second": 78.791,
"eval_msmarco_pairs_steps_per_second": 1.97,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_nq_pairs_loss": 1.2744060754776,
"eval_nq_pairs_runtime": 5.6694,
"eval_nq_pairs_samples_per_second": 35.277,
"eval_nq_pairs_steps_per_second": 0.882,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_trivia_pairs_loss": 1.356447458267212,
"eval_trivia_pairs_runtime": 9.0814,
"eval_trivia_pairs_samples_per_second": 22.023,
"eval_trivia_pairs_steps_per_second": 0.551,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_quora_pairs_loss": 0.20534881949424744,
"eval_quora_pairs_runtime": 0.6243,
"eval_quora_pairs_samples_per_second": 320.363,
"eval_quora_pairs_steps_per_second": 8.009,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_gooaq_pairs_loss": 1.131415843963623,
"eval_gooaq_pairs_runtime": 1.6004,
"eval_gooaq_pairs_samples_per_second": 124.966,
"eval_gooaq_pairs_steps_per_second": 3.124,
"step": 6706
},
{
"epoch": 1.0517565872020076,
"eval_mrpc_pairs_loss": 0.12877897918224335,
"eval_mrpc_pairs_runtime": 0.2425,
"eval_mrpc_pairs_samples_per_second": 824.742,
"eval_mrpc_pairs_steps_per_second": 20.619,
"step": 6706
},
{
"epoch": 1.053952321204517,
"grad_norm": 12.653462409973145,
"learning_rate": 2.9100012059444395e-05,
"loss": 1.6933,
"step": 6720
},
{
"epoch": 1.0614805520702635,
"grad_norm": 13.93374252319336,
"learning_rate": 2.8822006624801445e-05,
"loss": 1.2931,
"step": 6768
},
{
"epoch": 1.06900878293601,
"grad_norm": 17.680423736572266,
"learning_rate": 2.8538997528125016e-05,
"loss": 1.8077,
"step": 6816
},
{
"epoch": 1.0765370138017567,
"grad_norm": 14.7294921875,
"learning_rate": 2.8251109842815857e-05,
"loss": 1.4748,
"step": 6864
},
{
"epoch": 1.0840652446675032,
"grad_norm": 26.121625900268555,
"learning_rate": 2.795847079832445e-05,
"loss": 1.225,
"step": 6912
},
{
"epoch": 1.0915934755332497,
"grad_norm": 5.665552616119385,
"learning_rate": 2.7661209723922966e-05,
"loss": 1.7627,
"step": 6960
},
{
"epoch": 1.0991217063989962,
"grad_norm": 1.2186944484710693,
"learning_rate": 2.7359457991549365e-05,
"loss": 1.5613,
"step": 7008
},
{
"epoch": 1.1066499372647427,
"grad_norm": 4.569064617156982,
"learning_rate": 2.7053348957748744e-05,
"loss": 1.6548,
"step": 7056
},
{
"epoch": 1.1141781681304894,
"grad_norm": 11.330352783203125,
"learning_rate": 2.6743017904737505e-05,
"loss": 1.4033,
"step": 7104
},
{
"epoch": 1.121706398996236,
"grad_norm": 27.65351676940918,
"learning_rate": 2.6428601980616635e-05,
"loss": 1.3999,
"step": 7152
},
{
"epoch": 1.1268820577164367,
"eval_nli-pairs_loss": 1.212507724761963,
"eval_nli-pairs_runtime": 4.0093,
"eval_nli-pairs_samples_per_second": 49.884,
"eval_nli-pairs_steps_per_second": 1.247,
"eval_sts-test_pearson_cosine": 0.771726417661154,
"eval_sts-test_pearson_dot": 0.5582118135503709,
"eval_sts-test_pearson_euclidean": 0.7582436737188462,
"eval_sts-test_pearson_manhattan": 0.7602002352027541,
"eval_sts-test_pearson_max": 0.771726417661154,
"eval_sts-test_spearman_cosine": 0.7760137659698252,
"eval_sts-test_spearman_dot": 0.5314151368436631,
"eval_sts-test_spearman_euclidean": 0.7447358001087161,
"eval_sts-test_spearman_manhattan": 0.7487681272112986,
"eval_sts-test_spearman_max": 0.7760137659698252,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_vitaminc-pairs_loss": 4.9936699867248535,
"eval_vitaminc-pairs_runtime": 1.4575,
"eval_vitaminc-pairs_samples_per_second": 113.89,
"eval_vitaminc-pairs_steps_per_second": 2.744,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_sts-label_loss": 4.1703057289123535,
"eval_sts-label_runtime": 0.4168,
"eval_sts-label_samples_per_second": 479.874,
"eval_sts-label_steps_per_second": 11.997,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_qnli-contrastive_loss": 0.31846657395362854,
"eval_qnli-contrastive_runtime": 0.2838,
"eval_qnli-contrastive_samples_per_second": 704.663,
"eval_qnli-contrastive_steps_per_second": 17.617,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_scitail-pairs-qa_loss": 0.1417759656906128,
"eval_scitail-pairs-qa_runtime": 1.0581,
"eval_scitail-pairs-qa_samples_per_second": 189.022,
"eval_scitail-pairs-qa_steps_per_second": 4.726,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_scitail-pairs-pos_loss": 0.5202088356018066,
"eval_scitail-pairs-pos_runtime": 2.4081,
"eval_scitail-pairs-pos_samples_per_second": 83.054,
"eval_scitail-pairs-pos_steps_per_second": 2.076,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_xsum-pairs_loss": 0.44685080647468567,
"eval_xsum-pairs_runtime": 1.0416,
"eval_xsum-pairs_samples_per_second": 192.009,
"eval_xsum-pairs_steps_per_second": 4.8,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_compression-pairs_loss": 0.22411338984966278,
"eval_compression-pairs_runtime": 0.2438,
"eval_compression-pairs_samples_per_second": 820.201,
"eval_compression-pairs_steps_per_second": 20.505,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_sciq_pairs_loss": 0.37388285994529724,
"eval_sciq_pairs_runtime": 9.0875,
"eval_sciq_pairs_samples_per_second": 22.008,
"eval_sciq_pairs_steps_per_second": 0.55,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_qasc_pairs_loss": 0.6410768032073975,
"eval_qasc_pairs_runtime": 1.2169,
"eval_qasc_pairs_samples_per_second": 164.348,
"eval_qasc_pairs_steps_per_second": 4.109,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_openbookqa_pairs_loss": 2.043297052383423,
"eval_openbookqa_pairs_runtime": 1.0552,
"eval_openbookqa_pairs_samples_per_second": 189.539,
"eval_openbookqa_pairs_steps_per_second": 4.738,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_msmarco_pairs_loss": 1.2795078754425049,
"eval_msmarco_pairs_runtime": 2.5343,
"eval_msmarco_pairs_samples_per_second": 78.919,
"eval_msmarco_pairs_steps_per_second": 1.973,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_nq_pairs_loss": 1.1771754026412964,
"eval_nq_pairs_runtime": 5.6589,
"eval_nq_pairs_samples_per_second": 35.343,
"eval_nq_pairs_steps_per_second": 0.884,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_trivia_pairs_loss": 1.305577039718628,
"eval_trivia_pairs_runtime": 9.0872,
"eval_trivia_pairs_samples_per_second": 22.009,
"eval_trivia_pairs_steps_per_second": 0.55,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_quora_pairs_loss": 0.2645386755466461,
"eval_quora_pairs_runtime": 0.6429,
"eval_quora_pairs_samples_per_second": 311.086,
"eval_quora_pairs_steps_per_second": 7.777,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_gooaq_pairs_loss": 1.0562756061553955,
"eval_gooaq_pairs_runtime": 1.5772,
"eval_gooaq_pairs_samples_per_second": 126.809,
"eval_gooaq_pairs_steps_per_second": 3.17,
"step": 7185
},
{
"epoch": 1.1268820577164367,
"eval_mrpc_pairs_loss": 0.1197453960776329,
"eval_mrpc_pairs_runtime": 0.2393,
"eval_mrpc_pairs_samples_per_second": 835.714,
"eval_mrpc_pairs_steps_per_second": 20.893,
"step": 7185
},
{
"epoch": 1.1292346298619824,
"grad_norm": 26.328977584838867,
"learning_rate": 2.611024013876024e-05,
"loss": 1.707,
"step": 7200
},
{
"epoch": 1.136762860727729,
"grad_norm": 14.730170249938965,
"learning_rate": 2.578807307640633e-05,
"loss": 1.2812,
"step": 7248
},
{
"epoch": 1.1442910915934754,
"grad_norm": 16.989944458007812,
"learning_rate": 2.546224317247695e-05,
"loss": 2.0209,
"step": 7296
},
{
"epoch": 1.1518193224592221,
"grad_norm": 15.942720413208008,
"learning_rate": 2.5132894424654982e-05,
"loss": 1.465,
"step": 7344
},
{
"epoch": 1.1593475533249686,
"grad_norm": 13.42844009399414,
"learning_rate": 2.4800172385745665e-05,
"loss": 1.6181,
"step": 7392
},
{
"epoch": 1.1668757841907151,
"grad_norm": 2.6255691051483154,
"learning_rate": 2.446422409935082e-05,
"loss": 1.3386,
"step": 7440
},
{
"epoch": 1.1744040150564616,
"grad_norm": 11.230644226074219,
"learning_rate": 2.412519803488417e-05,
"loss": 1.4015,
"step": 7488
},
{
"epoch": 1.1819322459222084,
"grad_norm": 17.679582595825195,
"learning_rate": 2.3783244021956605e-05,
"loss": 1.5057,
"step": 7536
},
{
"epoch": 1.1894604767879549,
"grad_norm": 12.440255165100098,
"learning_rate": 2.3438513184160302e-05,
"loss": 1.2441,
"step": 7584
},
{
"epoch": 1.1969887076537014,
"grad_norm": 17.67916488647461,
"learning_rate": 2.309115787228094e-05,
"loss": 1.1718,
"step": 7632
}
],
"logging_steps": 48,
"max_steps": 19128,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1913,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 42,
"trial_name": null,
"trial_params": null
}