Update README.md
Browse files
README.md
CHANGED
@@ -1195,7 +1195,7 @@ model-index:
|
|
1195 |
- type: map_at_5
|
1196 |
value: 15.271
|
1197 |
- type: mrr_at_1
|
1198 |
-
value: 69
|
1199 |
- type: mrr_at_10
|
1200 |
value: 75.304
|
1201 |
- type: mrr_at_100
|
@@ -1219,9 +1219,9 @@ model-index:
|
|
1219 |
- type: ndcg_at_5
|
1220 |
value: 42.104
|
1221 |
- type: precision_at_1
|
1222 |
-
value: 69
|
1223 |
- type: precision_at_10
|
1224 |
-
value: 33
|
1225 |
- type: precision_at_100
|
1226 |
value: 10.75
|
1227 |
- type: precision_at_1000
|
@@ -1815,7 +1815,7 @@ model-index:
|
|
1815 |
- type: ndcg_at_3
|
1816 |
value: 85.435
|
1817 |
- type: ndcg_at_5
|
1818 |
-
value: 87
|
1819 |
- type: precision_at_1
|
1820 |
value: 81.24
|
1821 |
- type: precision_at_10
|
@@ -1910,13 +1910,13 @@ model-index:
|
|
1910 |
- type: precision_at_1
|
1911 |
value: 24.8
|
1912 |
- type: precision_at_10
|
1913 |
-
value: 12
|
1914 |
- type: precision_at_100
|
1915 |
value: 2.5420000000000003
|
1916 |
- type: precision_at_1000
|
1917 |
value: 0.39899999999999997
|
1918 |
- type: precision_at_3
|
1919 |
-
value: 20
|
1920 |
- type: precision_at_5
|
1921 |
value: 17.4
|
1922 |
- type: recall_at_1
|
@@ -2197,7 +2197,7 @@ model-index:
|
|
2197 |
- type: recall_at_100
|
2198 |
value: 96.167
|
2199 |
- type: recall_at_1000
|
2200 |
-
value: 100
|
2201 |
- type: recall_at_3
|
2202 |
value: 74.117
|
2203 |
- type: recall_at_5
|
@@ -2250,7 +2250,7 @@ model-index:
|
|
2250 |
- type: manhattan_precision
|
2251 |
value: 91.72482552342971
|
2252 |
- type: manhattan_recall
|
2253 |
-
value: 92
|
2254 |
- type: max_accuracy
|
2255 |
value: 99.83861386138614
|
2256 |
- type: max_ap
|
@@ -2331,7 +2331,7 @@ model-index:
|
|
2331 |
- type: map_at_5
|
2332 |
value: 1.001
|
2333 |
- type: mrr_at_1
|
2334 |
-
value: 76
|
2335 |
- type: mrr_at_10
|
2336 |
value: 85.667
|
2337 |
- type: mrr_at_100
|
@@ -2343,7 +2343,7 @@ model-index:
|
|
2343 |
- type: mrr_at_5
|
2344 |
value: 85.667
|
2345 |
- type: ndcg_at_1
|
2346 |
-
value: 72
|
2347 |
- type: ndcg_at_10
|
2348 |
value: 68.637
|
2349 |
- type: ndcg_at_100
|
@@ -2355,7 +2355,7 @@ model-index:
|
|
2355 |
- type: ndcg_at_5
|
2356 |
value: 71.808
|
2357 |
- type: precision_at_1
|
2358 |
-
value: 76
|
2359 |
- type: precision_at_10
|
2360 |
value: 73.8
|
2361 |
- type: precision_at_100
|
@@ -2365,7 +2365,7 @@ model-index:
|
|
2365 |
- type: precision_at_3
|
2366 |
value: 74.667
|
2367 |
- type: precision_at_5
|
2368 |
-
value: 78
|
2369 |
- type: recall_at_1
|
2370 |
value: 0.22100000000000003
|
2371 |
- type: recall_at_10
|
@@ -2596,6 +2596,10 @@ model-index:
|
|
2596 |
value: 85.53503846009764
|
2597 |
- type: max_f1
|
2598 |
value: 77.68167368965773
|
|
|
|
|
|
|
|
|
2599 |
---
|
2600 |
|
2601 |
<br><br>
|
@@ -2605,7 +2609,7 @@ model-index:
|
|
2605 |
</p>
|
2606 |
|
2607 |
<p align="center">
|
2608 |
-
<b>The crispy
|
2609 |
</p>
|
2610 |
|
2611 |
# mxbai-embed-2d-large-v1
|
@@ -2617,7 +2621,73 @@ model-index:
|
|
2617 |
|
2618 |
Currently, the best way to use our models is with the most recent version of sentence-transformers.
|
2619 |
|
2620 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2621 |
|
2622 |
### angle-emb
|
2623 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1195 |
- type: map_at_5
|
1196 |
value: 15.271
|
1197 |
- type: mrr_at_1
|
1198 |
+
value: 69
|
1199 |
- type: mrr_at_10
|
1200 |
value: 75.304
|
1201 |
- type: mrr_at_100
|
|
|
1219 |
- type: ndcg_at_5
|
1220 |
value: 42.104
|
1221 |
- type: precision_at_1
|
1222 |
+
value: 69
|
1223 |
- type: precision_at_10
|
1224 |
+
value: 33
|
1225 |
- type: precision_at_100
|
1226 |
value: 10.75
|
1227 |
- type: precision_at_1000
|
|
|
1815 |
- type: ndcg_at_3
|
1816 |
value: 85.435
|
1817 |
- type: ndcg_at_5
|
1818 |
+
value: 87
|
1819 |
- type: precision_at_1
|
1820 |
value: 81.24
|
1821 |
- type: precision_at_10
|
|
|
1910 |
- type: precision_at_1
|
1911 |
value: 24.8
|
1912 |
- type: precision_at_10
|
1913 |
+
value: 12
|
1914 |
- type: precision_at_100
|
1915 |
value: 2.5420000000000003
|
1916 |
- type: precision_at_1000
|
1917 |
value: 0.39899999999999997
|
1918 |
- type: precision_at_3
|
1919 |
+
value: 20
|
1920 |
- type: precision_at_5
|
1921 |
value: 17.4
|
1922 |
- type: recall_at_1
|
|
|
2197 |
- type: recall_at_100
|
2198 |
value: 96.167
|
2199 |
- type: recall_at_1000
|
2200 |
+
value: 100
|
2201 |
- type: recall_at_3
|
2202 |
value: 74.117
|
2203 |
- type: recall_at_5
|
|
|
2250 |
- type: manhattan_precision
|
2251 |
value: 91.72482552342971
|
2252 |
- type: manhattan_recall
|
2253 |
+
value: 92
|
2254 |
- type: max_accuracy
|
2255 |
value: 99.83861386138614
|
2256 |
- type: max_ap
|
|
|
2331 |
- type: map_at_5
|
2332 |
value: 1.001
|
2333 |
- type: mrr_at_1
|
2334 |
+
value: 76
|
2335 |
- type: mrr_at_10
|
2336 |
value: 85.667
|
2337 |
- type: mrr_at_100
|
|
|
2343 |
- type: mrr_at_5
|
2344 |
value: 85.667
|
2345 |
- type: ndcg_at_1
|
2346 |
+
value: 72
|
2347 |
- type: ndcg_at_10
|
2348 |
value: 68.637
|
2349 |
- type: ndcg_at_100
|
|
|
2355 |
- type: ndcg_at_5
|
2356 |
value: 71.808
|
2357 |
- type: precision_at_1
|
2358 |
+
value: 76
|
2359 |
- type: precision_at_10
|
2360 |
value: 73.8
|
2361 |
- type: precision_at_100
|
|
|
2365 |
- type: precision_at_3
|
2366 |
value: 74.667
|
2367 |
- type: precision_at_5
|
2368 |
+
value: 78
|
2369 |
- type: recall_at_1
|
2370 |
value: 0.22100000000000003
|
2371 |
- type: recall_at_10
|
|
|
2596 |
value: 85.53503846009764
|
2597 |
- type: max_f1
|
2598 |
value: 77.68167368965773
|
2599 |
+
license: apache-2.0
|
2600 |
+
language:
|
2601 |
+
- en
|
2602 |
+
library_name: transformers
|
2603 |
---
|
2604 |
|
2605 |
<br><br>
|
|
|
2609 |
</p>
|
2610 |
|
2611 |
<p align="center">
|
2612 |
+
<b>The crispy sentence embedding family from <a href="https://mixedbread.ai"><b>mixedbread ai</b></a>.</b>
|
2613 |
</p>
|
2614 |
|
2615 |
# mxbai-embed-2d-large-v1
|
|
|
2621 |
|
2622 |
Currently, the best way to use our models is with the most recent version of sentence-transformers.
|
2623 |
|
2624 |
+
```bash
|
2625 |
+
python -m pip install -U sentence-transformers
|
2626 |
+
```
|
2627 |
+
|
2628 |
+
|
2629 |
+
```python
|
2630 |
+
from sentence_transformers import models, SentenceTransformer
|
2631 |
+
from sentence_transformers.util import cos_sim
|
2632 |
+
|
2633 |
+
|
2634 |
+
# 1. load model with `cls` pooling
|
2635 |
+
word_embedding_model = models.Transformer("mixedbread-ai/mxbai-embed-2d-large-v1")
|
2636 |
+
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), pooling_mode="cls")
|
2637 |
+
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])
|
2638 |
+
|
2639 |
+
# 2. set adaptive layer and embedding size.
|
2640 |
+
# it is recommended to set layers from 20 to 24.
|
2641 |
+
new_num_layers = 22 # 1d: layer
|
2642 |
+
model[0].auto_model.encoder.layer = model[0].auto_model.encoder.layer[:new_num_layers]
|
2643 |
+
new_embedding_size = 768 # 2d: embedding size
|
2644 |
+
|
2645 |
+
|
2646 |
+
# 3. encode
|
2647 |
+
embeddings = model.encode(
|
2648 |
+
[
|
2649 |
+
'Who is german and likes bread?',
|
2650 |
+
'Everybody in German.'
|
2651 |
+
]
|
2652 |
+
)
|
2653 |
+
|
2654 |
+
# Similarity of the first sentence with the other two
|
2655 |
+
similarities = cos_sim(embeddings[0, :new_embedding_size], embeddings[1, :new_embedding_size])
|
2656 |
+
|
2657 |
+
print('similarities:', similarities)
|
2658 |
+
```
|
2659 |
|
2660 |
### angle-emb
|
2661 |
|
2662 |
+
You can also use the lastest `angle-emb` for inference, as follows:
|
2663 |
+
|
2664 |
+
```bash
|
2665 |
+
python -m pip install -U angle-emb
|
2666 |
+
```
|
2667 |
+
|
2668 |
+
```python
|
2669 |
+
from angle_emb import AnglE
|
2670 |
+
from sentence_transformers.util import cos_sim
|
2671 |
+
|
2672 |
+
# 1. load model
|
2673 |
+
model = AnglE.from_pretrained("mixedbread-ai/mxbai-embed-2d-large-v1", pooling_strategy='cls').cuda()
|
2674 |
+
|
2675 |
+
|
2676 |
+
# 2. set adaptive layer and embedding size.
|
2677 |
+
# it is recommended to set layers from 20 to 24.
|
2678 |
+
layer_index = 22 # 1d: layer
|
2679 |
+
embedding_size = 768 # 2d: embedding size
|
2680 |
+
|
2681 |
+
# 3. encode
|
2682 |
+
embeddings = model.encode([
|
2683 |
+
'Who is german and likes bread?',
|
2684 |
+
'Everybody in German.'
|
2685 |
+
], layer_index=layer_index, embedding_size=embedding_size)
|
2686 |
+
|
2687 |
+
similarities = cos_sim(embeddings[0], embeddings[1:])
|
2688 |
+
print('similarities:', similarities)
|
2689 |
+
```
|
2690 |
+
|
2691 |
+
### Using API
|
2692 |
+
You’ll be able to use the models through our API as well. The API is coming soon and will have some exciting features. Stay tuned!
|
2693 |
+
|