File size: 9,055 Bytes
e5687b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
model_meta:
  sentence-transformers/all-MiniLM-L6-v2:
    link: https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
    revision: 8b3219a92973c328a8e22fadcfa821b5dc75636a
    desc: all-MiniLM-L6-v2 by Sentence Transformers
    seq_len: 512
    size: 23
    dim: 384
    license: Apache 2.0
    organization: Sentence Transformers
    mteb_overall: 56.26
    mteb_retrieval: 41.95
    mteb_sts: 78.90
    mteb_clustering: 42.35
  intfloat/multilingual-e5-small:
    link: https://huggingface.co/intfloat/multilingual-e5-small
    revision: e4ce9877abf3edfe10b0d82785e83bdcb973e22e
    desc: multilingual-e5-small by Microsoft
    seq_len: 512
    size: 44
    dim: 384
    license: MIT License
    organization: Microsoft
    mteb_overall: 57.87
    mteb_retrieval: 46.64
    mteb_sts: 79.10
    mteb_clustering: 37.08
  intfloat/multilingual-e5-large-instruct:
    link: https://huggingface.co/intfloat/multilingual-e5-large-instruct
    revision: baa7be480a7de1539afce709c8f13f833a510e0a
    desc: multilingual-e5-large-instruct by Microsoft
    seq_len: 514
    size: 560
    dim: 1024
    license: MIT License
    organization: Microsoft
    instruction_query_arxiv: Given a query, retrieve a relevant paper title and abstract from arXiv
    instruction_query_wikipedia: Given a query, retrieve a relevant title and passage from Wikipedia    
    instruction_query_stackexchange: Given a query, retrieve a relevant question and answer from Stack Exchange
    instruction_sts: Retrieve semantically similar text
    instruction_clustering: Identify the topic/theme/category of the text
    mteb_overall: 64.41
    mteb_retrieval: 52.47
    mteb_sts: 84.78
    mteb_clustering: 47.10
  intfloat/e5-mistral-7b-instruct:
    link: https://huggingface.co/intfloat/e5-mistral-7b-instruct
    revision: 07163b72af1488142a360786df853f237b1a3ca1
    desc: e5-mistral-7b-instruct by Microsoft
    seq_len: 32768
    size: 7111
    dim: 4096
    license: MIT License
    organization: Microsoft
    instruction_query_arxiv: Given a query, retrieve a relevant paper title and abstract from arXiv
    instruction_query_wikipedia: Given a query, retrieve a relevant title and passage from Wikipedia
    instruction_query_stackexchange: Given a query, retrieve a relevant question and answer from Stack Exchange
    instruction_sts: Retrieve semantically similar text
    instruction_clustering: Identify the topic/theme/category of the text
    mteb_overall: 66.63
    mteb_retrieval: 56.89
    mteb_sts: 84.63
    mteb_clustering: 50.26
  GritLM/GritLM-7B:
    link: https://huggingface.co/GritLM/GritLM-7B
    revision: 13f00a0e36500c80ce12870ea513846a066004af
    desc: GritLM-7B by Contextual AI, HKU, Microsoft
    seq_len: 32768
    size: 7240
    dim: 4096
    license: Apache 2.0
    organization: Contextual AI, HKU, Microsoft
    instruction_query_arxiv: Given a query, retrieve a relevant paper title and abstract from arXiv
    instruction_query_wikipedia: Given a query, retrieve a relevant title and passage from Wikipedia
    instruction_query_stackexchange: Given a query, retrieve a relevant question and answer from Stack Exchange
    instruction_sts: Retrieve semantically similar text
    instruction_clustering: Identify the topic/theme/category of the text
    mteb_overall: 66.76
    mteb_retrieval: 57.41
    mteb_sts: 83.35
    mteb_clustering: 50.61
  BAAI/bge-large-en-v1.5:
    link: https://huggingface.co/BAAI/bge-large-en-v1.5
    revision: d4aa6901d3a41ba39fb536a557fa166f842b0e09
    desc: bge-large-en-v1.5 by BAAI
    seq_len: 512
    size: 335
    dim: 1024
    license: MIT
    organization: BAAI
    mteb_overall: 64.23
    mteb_retrieval: 54.29
    mteb_sts: 83.11
    mteb_clustering: 46.08
  nvidia/NV-Embed-v1:
    link: https://huggingface.co/nvidia/NV-Embed-v1
    revision: 77b11725df91ca45663471a0f2ec6c06e04cbadb
    desc: NV-Embed-v1 by Nvidia
    seq_len: 32768
    size: 7851
    dim: 4096
    license: CC-BY-NC-4.0
    organization: Nvidia
    mteb_overall: 69.32
    mteb_retrieval: 59.36
    mteb_sts: 82.84
    mteb_clustering: 52.8
  Alibaba-NLP/gte-Qwen2-7B-instruct:
    link: https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct
    revision: e26182b2122f4435e8b3ebecbf363990f409b45b
    desc: gte-Qwen2-7B-instruct by Alibaba
    seq_len: 131072
    size: 7613
    dim: 3584
    license: Apache 2.0
    organization: Alibaba
    instruction_query_arxiv: Given a query, retrieve a relevant paper title and abstract from arXiv
    instruction_query_wikipedia: Given a query, retrieve a relevant title and passage from Wikipedia
    instruction_query_stackexchange: Given a query, retrieve a relevant question and answer from Stack Exchange
    instruction_clustering: Identify the topic/theme/category of the text
    instruction_sts: Retrieve semantically similar text
    mteb_overall: 70.24
    mteb_retrieval: 60.25
    mteb_sts: 83.04
    mteb_clustering: 56.92
  Salesforce/SFR-Embedding-2_R:
    link: https://huggingface.co/Salesforce/SFR-Embedding-2_R
    revision: 91762139d94ed4371a9fa31db5551272e0b83818
    desc: SFR-Embedding-2_R by Salesforce
    seq_len: 32768
    size: 7111
    dim: 4096
    license: CC-BY-NC-4.0
    organization: Salesforce
    instruction_query_arxiv: Given a query, retrieve a relevant paper title and abstract from arXiv
    instruction_query_wikipedia: Given a query, retrieve a relevant title and passage from Wikipedia
    instruction_query_stackexchange: Given a query, retrieve a relevant question and answer from Stack Exchange
    instruction_clustering: Identify the topic/theme/category of the text
    instruction_sts: Retrieve semantically similar text
    mteb_overall: 70.31
    mteb_retrieval: 60.18
    mteb_sts: 81.26
    mteb_clustering: 56.17
  jinaai/jina-embeddings-v2-base-en:
    link: https://huggingface.co/jinaai/jina-embeddings-v2-base-en
    revision: 31b72fbf354fea65264ec54edf0b189d94b92d39
    desc: jina-embeddings-v2-base-en by Jina AI
    seq_len: 8192
    size: 137
    dim: 768
    license: Apache 2.0
    organization: Jina AI
    mteb_overall: 60.38
    mteb_retrieval: 47.87
    mteb_sts: 80.70
    mteb_clustering: 41.73
  mixedbread-ai/mxbai-embed-large-v1:
    link: https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1
    revision: 990580e27d329c7408b3741ecff85876e128e203
    desc: mxbai-embed-large-v1 by mixedbread.ai
    seq_len: 512
    size: 335
    dim: 1024
    license: Apache 2.0
    organization: mixedbread.ai
    mteb_overall: 64.68
    mteb_retrieval: 54.39
    mteb_sts: 85.00
    mteb_clustering: 46.71
  nomic-ai/nomic-embed-text-v1.5:
    link: https://huggingface.co/nomic-ai/nomic-embed-text-v1.5
    revision: b0753ae76394dd36bcfb912a46018088bca48be0
    desc: nomic-embed-text-v1.5 by nomic.ai
    seq_len: 8192
    size: 137
    dim: 768
    license: Apache 2.0
    organization: nomic.ai
    mteb_overall: 62.28
    mteb_retrieval: 53.01
    mteb_sts: 81.94
    mteb_clustering: 43.93
  McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised:
    link: https://huggingface.co/McGill-NLP/LLM2Vec-Meta-Llama-3-8B-Instruct-mntp-supervised
    revision: baa8ebf04a1c2500e61288e7dad65e8ae42601a7
    desc: LLM2Vec by McGill
    seq_len: 8192
    size: 7505
    dim: 4096
    license: MIT
    organization: McGill
    mteb_overall: 65.01
    mteb_retrieval: 56.63
    mteb_sts: 83.58
    mteb_clustering: 46.45
  voyage-multilingual-2:
    link: https://docs.voyageai.com/docs/embeddings
    revision: "1"
    desc: voyage-multilingual-2 by Voyage AI
    seq_len: 32000
    dim: 1024
    license: Proprietary
    organization: Voyage AI
  voyage-large-2-instruct:
    link: https://docs.voyageai.com/docs/embeddings
    revision: "1"
    desc: voyage-large-2-instruct by Voyage AI
    seq_len: 16000
    dim: 1024
    license: Proprietary
    organization: Voyage AI
    mteb_overall: 68.28
    mteb_retrieval: 58.28
    mteb_sts: 84.58
    mteb_clustering: 53.35
  text-embedding-004:
    link: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api
    revision: "1"
    desc: text-embedding-004 by Google
    seq_len: 2048
    dim: 768
    license: Proprietary
    organization: Google
    mteb_overall: 66.31
    mteb_retrieval: 55.7
    mteb_sts: 85.07
    mteb_clustering: 47.48
  text-embedding-3-large:
    link: https://platform.openai.com/docs/guides/embeddings
    revision: "1"
    desc: text-embedding-3-large by OpenAI
    seq_len: 8191
    dim: 3072
    license: Proprietary
    organization: OpenAI
    mteb_overall: 64.59
    mteb_retrieval: 55.44
    mteb_sts: 81.73
    mteb_clustering: 49.01
  embed-english-v3.0:
    link: https://docs.cohere.com/docs/cohere-embed
    revision: "1"
    desc: embed-english-v3.0 by Cohere
    seq_len: 512
    dim: 1024
    license: Proprietary
    organization: Cohere
    mteb_overall: 64.47
    mteb_retrieval: 55
    mteb_sts: 82.62
    mteb_clustering: 47.43
  BM25:
    link: https://github.com/xhluca/bm25s
    desc: Fast lexical search via BM25
    license: MIT
    mteb_retrieval: 42.4