--- tags: - mteb model-index: - name: IYun-large-zh results: - task: type: STS dataset: type: C-MTEB/AFQMC name: MTEB AFQMC config: default split: validation revision: None metrics: - type: cos_sim_pearson value: 57.37728676415047 - type: cos_sim_spearman value: 60.89131895307699 - type: euclidean_pearson value: 60.056754800315595 - type: euclidean_spearman value: 60.891479787418966 - type: manhattan_pearson value: 60.03850823371572 - type: manhattan_spearman value: 60.8597150048781 - task: type: STS dataset: type: C-MTEB/ATEC name: MTEB ATEC config: default split: test revision: None metrics: - type: cos_sim_pearson value: 57.29704921148904 - type: cos_sim_spearman value: 58.81607331373972 - type: euclidean_pearson value: 63.69251756281332 - type: euclidean_spearman value: 58.81608232068536 - type: manhattan_pearson value: 63.665668138742284 - type: manhattan_spearman value: 58.80224314871406 - task: type: Classification dataset: type: mteb/amazon_reviews_multi name: MTEB AmazonReviewsClassification (zh) config: zh split: test revision: 1399c76144fd37290681b995c656ef9b2e06e26d metrics: - type: accuracy value: 49.672 - type: f1 value: 47.27737512126165 - task: type: STS dataset: type: C-MTEB/BQ name: MTEB BQ config: default split: test revision: None metrics: - type: cos_sim_pearson value: 71.65025725548176 - type: cos_sim_spearman value: 72.53278026251562 - type: euclidean_pearson value: 71.29771814474996 - type: euclidean_spearman value: 72.53241999594584 - type: manhattan_pearson value: 71.29290351258575 - type: manhattan_spearman value: 72.52505531587519 - task: type: Clustering dataset: type: C-MTEB/CLSClusteringP2P name: MTEB CLSClusteringP2P config: default split: test revision: None metrics: - type: v_measure value: 60.19892651814847 - task: type: Clustering dataset: type: C-MTEB/CLSClusteringS2S name: MTEB CLSClusteringS2S config: default split: test revision: None metrics: - type: v_measure value: 58.39897986042561 - task: type: Reranking dataset: type: C-MTEB/CMedQAv1-reranking name: MTEB CMedQAv1 config: default split: test revision: None metrics: - type: map value: 88.73563192647498 - type: mrr value: 91.00214285714286 - task: type: Reranking dataset: type: C-MTEB/CMedQAv2-reranking name: MTEB CMedQAv2 config: default split: test revision: None metrics: - type: map value: 89.42396184634322 - type: mrr value: 91.90503968253968 - task: type: Retrieval dataset: type: C-MTEB/CmedqaRetrieval name: MTEB CmedqaRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 26.950000000000003 - type: map_at_10 value: 39.982 - type: map_at_100 value: 41.844 - type: map_at_1000 value: 41.948 - type: map_at_3 value: 35.664 - type: map_at_5 value: 38.061 - type: mrr_at_1 value: 41.11 - type: mrr_at_10 value: 49.183 - type: mrr_at_100 value: 50.166999999999994 - type: mrr_at_1000 value: 50.205999999999996 - type: mrr_at_3 value: 46.778 - type: mrr_at_5 value: 48.120000000000005 - type: ndcg_at_1 value: 41.11 - type: ndcg_at_10 value: 46.678 - type: ndcg_at_100 value: 53.876000000000005 - type: ndcg_at_1000 value: 55.627 - type: ndcg_at_3 value: 41.429 - type: ndcg_at_5 value: 43.551 - type: precision_at_1 value: 41.11 - type: precision_at_10 value: 10.325 - type: precision_at_100 value: 1.6119999999999999 - type: precision_at_1000 value: 0.184 - type: precision_at_3 value: 23.498 - type: precision_at_5 value: 16.894000000000002 - type: recall_at_1 value: 26.950000000000003 - type: recall_at_10 value: 57.239 - type: recall_at_100 value: 86.9 - type: recall_at_1000 value: 98.581 - type: recall_at_3 value: 41.221000000000004 - type: recall_at_5 value: 47.976 - task: type: PairClassification dataset: type: C-MTEB/CMNLI name: MTEB Cmnli config: default split: validation revision: None metrics: - type: cos_sim_accuracy value: 86.13968597726043 - type: cos_sim_ap value: 90.86724630443385 - type: cos_sim_f1 value: 86.9653767820774 - type: cos_sim_precision value: 83.9724680432645 - type: cos_sim_recall value: 90.17951425554382 - type: dot_accuracy value: 86.13968597726043 - type: dot_ap value: 90.85181504536696 - type: dot_f1 value: 86.9653767820774 - type: dot_precision value: 83.9724680432645 - type: dot_recall value: 90.17951425554382 - type: euclidean_accuracy value: 86.13968597726043 - type: euclidean_ap value: 90.86657368513809 - type: euclidean_f1 value: 86.95208970438327 - type: euclidean_precision value: 84.03940886699507 - type: euclidean_recall value: 90.07391763463569 - type: manhattan_accuracy value: 85.97726042230644 - type: manhattan_ap value: 90.85259484237685 - type: manhattan_f1 value: 86.79435483870968 - type: manhattan_precision value: 83.02796528447445 - type: manhattan_recall value: 90.91869060190075 - type: max_accuracy value: 86.13968597726043 - type: max_ap value: 90.86724630443385 - type: max_f1 value: 86.9653767820774 - task: type: Retrieval dataset: type: C-MTEB/CovidRetrieval name: MTEB CovidRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 73.34 - type: map_at_10 value: 81.722 - type: map_at_100 value: 81.916 - type: map_at_1000 value: 81.919 - type: map_at_3 value: 80.25999999999999 - type: map_at_5 value: 81.11699999999999 - type: mrr_at_1 value: 73.551 - type: mrr_at_10 value: 81.727 - type: mrr_at_100 value: 81.911 - type: mrr_at_1000 value: 81.914 - type: mrr_at_3 value: 80.242 - type: mrr_at_5 value: 81.149 - type: ndcg_at_1 value: 73.551 - type: ndcg_at_10 value: 85.244 - type: ndcg_at_100 value: 86.005 - type: ndcg_at_1000 value: 86.084 - type: ndcg_at_3 value: 82.334 - type: ndcg_at_5 value: 83.878 - type: precision_at_1 value: 73.551 - type: precision_at_10 value: 9.705 - type: precision_at_100 value: 1.0030000000000001 - type: precision_at_1000 value: 0.101 - type: precision_at_3 value: 29.645 - type: precision_at_5 value: 18.567 - type: recall_at_1 value: 73.34 - type: recall_at_10 value: 96.048 - type: recall_at_100 value: 99.262 - type: recall_at_1000 value: 99.895 - type: recall_at_3 value: 88.303 - type: recall_at_5 value: 91.99199999999999 - task: type: Retrieval dataset: type: C-MTEB/DuRetrieval name: MTEB DuRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 26.506 - type: map_at_10 value: 81.29899999999999 - type: map_at_100 value: 83.997 - type: map_at_1000 value: 84.03399999999999 - type: map_at_3 value: 56.69 - type: map_at_5 value: 71.389 - type: mrr_at_1 value: 91.10000000000001 - type: mrr_at_10 value: 93.952 - type: mrr_at_100 value: 94.00500000000001 - type: mrr_at_1000 value: 94.00699999999999 - type: mrr_at_3 value: 93.683 - type: mrr_at_5 value: 93.858 - type: ndcg_at_1 value: 91.10000000000001 - type: ndcg_at_10 value: 88.25699999999999 - type: ndcg_at_100 value: 90.84100000000001 - type: ndcg_at_1000 value: 91.167 - type: ndcg_at_3 value: 87.595 - type: ndcg_at_5 value: 86.346 - type: precision_at_1 value: 91.10000000000001 - type: precision_at_10 value: 42.04 - type: precision_at_100 value: 4.804 - type: precision_at_1000 value: 0.48900000000000005 - type: precision_at_3 value: 78.583 - type: precision_at_5 value: 66.09 - type: recall_at_1 value: 26.506 - type: recall_at_10 value: 89.12299999999999 - type: recall_at_100 value: 97.717 - type: recall_at_1000 value: 99.285 - type: recall_at_3 value: 58.865 - type: recall_at_5 value: 75.753 - task: type: Retrieval dataset: type: C-MTEB/EcomRetrieval name: MTEB EcomRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 52.7 - type: map_at_10 value: 62.239 - type: map_at_100 value: 62.744 - type: map_at_1000 value: 62.755 - type: map_at_3 value: 59.75 - type: map_at_5 value: 61.050000000000004 - type: mrr_at_1 value: 52.7 - type: mrr_at_10 value: 62.239 - type: mrr_at_100 value: 62.744 - type: mrr_at_1000 value: 62.755 - type: mrr_at_3 value: 59.75 - type: mrr_at_5 value: 61.050000000000004 - type: ndcg_at_1 value: 52.7 - type: ndcg_at_10 value: 67.23 - type: ndcg_at_100 value: 69.729 - type: ndcg_at_1000 value: 70.00999999999999 - type: ndcg_at_3 value: 62.025 - type: ndcg_at_5 value: 64.37 - type: precision_at_1 value: 52.7 - type: precision_at_10 value: 8.309999999999999 - type: precision_at_100 value: 0.9490000000000001 - type: precision_at_1000 value: 0.097 - type: precision_at_3 value: 22.867 - type: precision_at_5 value: 14.860000000000001 - type: recall_at_1 value: 52.7 - type: recall_at_10 value: 83.1 - type: recall_at_100 value: 94.89999999999999 - type: recall_at_1000 value: 97.1 - type: recall_at_3 value: 68.60000000000001 - type: recall_at_5 value: 74.3 - task: type: Classification dataset: type: C-MTEB/IFlyTek-classification name: MTEB IFlyTek config: default split: validation revision: None metrics: - type: accuracy value: 52.64332435552135 - type: f1 value: 42.17147347490132 - task: type: Classification dataset: type: C-MTEB/JDReview-classification name: MTEB JDReview config: default split: test revision: None metrics: - type: accuracy value: 87.5984990619137 - type: ap value: 57.59814850574554 - type: f1 value: 82.62140959655022 - task: type: STS dataset: type: C-MTEB/LCQMC name: MTEB LCQMC config: default split: test revision: None metrics: - type: cos_sim_pearson value: 74.58027418203673 - type: cos_sim_spearman value: 79.19473724464046 - type: euclidean_pearson value: 79.2941422188887 - type: euclidean_spearman value: 79.1944889378359 - type: manhattan_pearson value: 79.26535092062532 - type: manhattan_spearman value: 79.17298822899023 - task: type: Reranking dataset: type: C-MTEB/Mmarco-reranking name: MTEB MMarcoReranking config: default split: dev revision: None metrics: - type: map value: 31.611379937191025 - type: mrr value: 30.88968253968254 - task: type: Retrieval dataset: type: C-MTEB/MMarcoRetrieval name: MTEB MMarcoRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 65.603 - type: map_at_10 value: 74.834 - type: map_at_100 value: 75.16199999999999 - type: map_at_1000 value: 75.17399999999999 - type: map_at_3 value: 72.979 - type: map_at_5 value: 74.154 - type: mrr_at_1 value: 67.837 - type: mrr_at_10 value: 75.46199999999999 - type: mrr_at_100 value: 75.751 - type: mrr_at_1000 value: 75.762 - type: mrr_at_3 value: 73.832 - type: mrr_at_5 value: 74.875 - type: ndcg_at_1 value: 67.837 - type: ndcg_at_10 value: 78.636 - type: ndcg_at_100 value: 80.083 - type: ndcg_at_1000 value: 80.394 - type: ndcg_at_3 value: 75.12 - type: ndcg_at_5 value: 77.12 - type: precision_at_1 value: 67.837 - type: precision_at_10 value: 9.536999999999999 - type: precision_at_100 value: 1.0250000000000001 - type: precision_at_1000 value: 0.105 - type: precision_at_3 value: 28.352 - type: precision_at_5 value: 18.074 - type: recall_at_1 value: 65.603 - type: recall_at_10 value: 89.704 - type: recall_at_100 value: 96.2 - type: recall_at_1000 value: 98.588 - type: recall_at_3 value: 80.444 - type: recall_at_5 value: 85.205 - task: type: Classification dataset: type: mteb/amazon_massive_intent name: MTEB MassiveIntentClassification (zh-CN) config: zh-CN split: test revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 metrics: - type: accuracy value: 77.43106926698049 - type: f1 value: 73.96808004721824 - task: type: Classification dataset: type: mteb/amazon_massive_scenario name: MTEB MassiveScenarioClassification (zh-CN) config: zh-CN split: test revision: 7d571f92784cd94a019292a1f45445077d0ef634 metrics: - type: accuracy value: 83.86684599865501 - type: f1 value: 83.05645257324346 - task: type: Retrieval dataset: type: C-MTEB/MedicalRetrieval name: MTEB MedicalRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 55.00000000000001 - type: map_at_10 value: 61.129 - type: map_at_100 value: 61.61 - type: map_at_1000 value: 61.655 - type: map_at_3 value: 59.533 - type: map_at_5 value: 60.478 - type: mrr_at_1 value: 54.900000000000006 - type: mrr_at_10 value: 61.090999999999994 - type: mrr_at_100 value: 61.562 - type: mrr_at_1000 value: 61.608 - type: mrr_at_3 value: 59.483 - type: mrr_at_5 value: 60.428000000000004 - type: ndcg_at_1 value: 55.00000000000001 - type: ndcg_at_10 value: 64.288 - type: ndcg_at_100 value: 66.991 - type: ndcg_at_1000 value: 68.27 - type: ndcg_at_3 value: 61.014 - type: ndcg_at_5 value: 62.68899999999999 - type: precision_at_1 value: 55.00000000000001 - type: precision_at_10 value: 7.430000000000001 - type: precision_at_100 value: 0.878 - type: precision_at_1000 value: 0.098 - type: precision_at_3 value: 21.767 - type: precision_at_5 value: 13.86 - type: recall_at_1 value: 55.00000000000001 - type: recall_at_10 value: 74.3 - type: recall_at_100 value: 87.8 - type: recall_at_1000 value: 98.0 - type: recall_at_3 value: 65.3 - type: recall_at_5 value: 69.3 - task: type: Classification dataset: type: C-MTEB/MultilingualSentiment-classification name: MTEB MultilingualSentiment config: default split: validation revision: None metrics: - type: accuracy value: 78.48333333333333 - type: f1 value: 78.36516159631131 - task: type: PairClassification dataset: type: C-MTEB/OCNLI name: MTEB Ocnli config: default split: validation revision: None metrics: - type: cos_sim_accuracy value: 86.13968597726043 - type: cos_sim_ap value: 90.86724630443385 - type: cos_sim_f1 value: 86.9653767820774 - type: cos_sim_precision value: 83.9724680432645 - type: cos_sim_recall value: 90.17951425554382 - type: dot_accuracy value: 86.13968597726043 - type: dot_ap value: 90.85181504536696 - type: dot_f1 value: 86.9653767820774 - type: dot_precision value: 83.9724680432645 - type: dot_recall value: 90.17951425554382 - type: euclidean_accuracy value: 86.13968597726043 - type: euclidean_ap value: 90.86657368513809 - type: euclidean_f1 value: 86.95208970438327 - type: euclidean_precision value: 84.03940886699507 - type: euclidean_recall value: 90.07391763463569 - type: manhattan_accuracy value: 85.97726042230644 - type: manhattan_ap value: 90.85259484237685 - type: manhattan_f1 value: 86.79435483870968 - type: manhattan_precision value: 83.02796528447445 - type: manhattan_recall value: 90.91869060190075 - type: max_accuracy value: 86.13968597726043 - type: max_ap value: 90.86724630443385 - type: max_f1 value: 86.9653767820774 - task: type: Classification dataset: type: C-MTEB/OnlineShopping-classification name: MTEB OnlineShopping config: default split: test revision: None metrics: - type: accuracy value: 94.33999999999999 - type: ap value: 92.566213965377 - type: f1 value: 94.32981412505542 - task: type: STS dataset: type: C-MTEB/PAWSX name: MTEB PAWSX config: default split: test revision: None metrics: - type: cos_sim_pearson value: 40.59979992480721 - type: cos_sim_spearman value: 45.80272854477526 - type: euclidean_pearson value: 45.51435650601272 - type: euclidean_spearman value: 45.80481880049892 - type: manhattan_pearson value: 45.50783698090448 - type: manhattan_spearman value: 45.7962835896273 - task: type: STS dataset: type: C-MTEB/QBQTC name: MTEB QBQTC config: default split: test revision: None metrics: - type: cos_sim_pearson value: 41.95530336245604 - type: cos_sim_spearman value: 43.94205325290135 - type: euclidean_pearson value: 38.01893281522651 - type: euclidean_spearman value: 43.9411389356089 - type: manhattan_pearson value: 38.158512461951446 - type: manhattan_spearman value: 44.055211140130815 - task: type: STS dataset: type: mteb/sts22-crosslingual-sts name: MTEB STS22 (zh) config: zh split: test revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80 metrics: - type: cos_sim_pearson value: 63.64131281514482 - type: cos_sim_spearman value: 65.17753570208333 - type: euclidean_pearson value: 62.72868744500848 - type: euclidean_spearman value: 65.17730738350589 - type: manhattan_pearson value: 62.76099444782981 - type: manhattan_spearman value: 65.2421498595002 - task: type: STS dataset: type: C-MTEB/STSB name: MTEB STSB config: default split: test revision: None metrics: - type: cos_sim_pearson value: 79.15762053490425 - type: cos_sim_spearman value: 79.47824157657848 - type: euclidean_pearson value: 79.11217669696227 - type: euclidean_spearman value: 79.47857091559331 - type: manhattan_pearson value: 79.07701011877683 - type: manhattan_spearman value: 79.43942682897884 - task: type: Reranking dataset: type: C-MTEB/T2Reranking name: MTEB T2Reranking config: default split: dev revision: None metrics: - type: map value: 67.45068053105526 - type: mrr value: 77.63560439973777 - task: type: Retrieval dataset: type: C-MTEB/T2Retrieval name: MTEB T2Retrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 27.837 - type: map_at_10 value: 77.803 - type: map_at_100 value: 81.402 - type: map_at_1000 value: 81.464 - type: map_at_3 value: 54.879 - type: map_at_5 value: 67.32900000000001 - type: mrr_at_1 value: 90.584 - type: mrr_at_10 value: 93.059 - type: mrr_at_100 value: 93.135 - type: mrr_at_1000 value: 93.138 - type: mrr_at_3 value: 92.659 - type: mrr_at_5 value: 92.914 - type: ndcg_at_1 value: 90.584 - type: ndcg_at_10 value: 85.29299999999999 - type: ndcg_at_100 value: 88.824 - type: ndcg_at_1000 value: 89.4 - type: ndcg_at_3 value: 86.79599999999999 - type: ndcg_at_5 value: 85.353 - type: precision_at_1 value: 90.584 - type: precision_at_10 value: 42.191 - type: precision_at_100 value: 5.0200000000000005 - type: precision_at_1000 value: 0.516 - type: precision_at_3 value: 75.785 - type: precision_at_5 value: 63.417 - type: recall_at_1 value: 27.837 - type: recall_at_10 value: 84.21600000000001 - type: recall_at_100 value: 95.719 - type: recall_at_1000 value: 98.565 - type: recall_at_3 value: 56.574999999999996 - type: recall_at_5 value: 70.682 - task: type: Classification dataset: type: C-MTEB/TNews-classification name: MTEB TNews config: default split: validation revision: None metrics: - type: accuracy value: 54.37 - type: f1 value: 52.57500124627352 - task: type: Clustering dataset: type: C-MTEB/ThuNewsClusteringP2P name: MTEB ThuNewsClusteringP2P config: default split: test revision: None metrics: - type: v_measure value: 76.9781904739968 - task: type: Clustering dataset: type: C-MTEB/ThuNewsClusteringS2S name: MTEB ThuNewsClusteringS2S config: default split: test revision: None metrics: - type: v_measure value: 69.82661181746705 - task: type: Retrieval dataset: type: C-MTEB/VideoRetrieval name: MTEB VideoRetrieval config: default split: dev revision: None metrics: - type: map_at_1 value: 58.699999999999996 - type: map_at_10 value: 68.512 - type: map_at_100 value: 69.018 - type: map_at_1000 value: 69.028 - type: map_at_3 value: 66.51700000000001 - type: map_at_5 value: 67.91199999999999 - type: mrr_at_1 value: 58.599999999999994 - type: mrr_at_10 value: 68.462 - type: mrr_at_100 value: 68.96799999999999 - type: mrr_at_1000 value: 68.978 - type: mrr_at_3 value: 66.467 - type: mrr_at_5 value: 67.862 - type: ndcg_at_1 value: 58.699999999999996 - type: ndcg_at_10 value: 72.88900000000001 - type: ndcg_at_100 value: 75.262 - type: ndcg_at_1000 value: 75.48700000000001 - type: ndcg_at_3 value: 68.96 - type: ndcg_at_5 value: 71.452 - type: precision_at_1 value: 58.699999999999996 - type: precision_at_10 value: 8.64 - type: precision_at_100 value: 0.9730000000000001 - type: precision_at_1000 value: 0.099 - type: precision_at_3 value: 25.333 - type: precision_at_5 value: 16.400000000000002 - type: recall_at_1 value: 58.699999999999996 - type: recall_at_10 value: 86.4 - type: recall_at_100 value: 97.3 - type: recall_at_1000 value: 99.0 - type: recall_at_3 value: 76.0 - type: recall_at_5 value: 82.0 - task: type: Classification dataset: type: C-MTEB/waimai-classification name: MTEB Waimai config: default split: test revision: None metrics: - type: accuracy value: 89.23 - type: ap value: 75.03115536738895 - type: f1 value: 87.71601665295442 --- ### 使用方法 ``` from sentence_transformers import SentenceTransformer sentences = ["sentence1", "sentence2"] model = SentenceTransformer('IYun-large-zh') embeddings_1 = model.encode(sentences, normalize_embeddings=True) embeddings_2 = model.encode(sentences, normalize_embeddings=True) similarity = embeddings_1 @ embeddings_2.T print(similarity) ```