dunzhang
/

stella_en_1.5B_v5

@@ -5471,34 +5471,64 @@ Please refer to the following chapters for specific instructions on how to use t
 # Usage
-You can use `SentenceTransformer` or `transformer` library to encode text.
 ```python
 import os
 import torch
 from transformers import AutoModel, AutoTokenizer
-from sentence_transformers import SentenceTransformer
 from sklearn.preprocessing import normalize
-prompt = "Instruct: Given a web search query, retrieve relevant passages that answer the query.\nQuery: {query}"
-queries = ["query1", "query2"]
-queries = [prompt.replace("{query}", query) for query in queries]
-# doc do not need any prompts
-docs = ["doc1", "doc2"]
 model_dir = "{Your MODEL_PATH}"
-#### method1: SentenceTransformer
-# ！！！！The default dimension is 8192，if you need other dimensions, please copy the files from the `2_Dense_{dims}` folder to overwrite them. For example, `copy -r ./2_Dense_1024/* ./2_Dense/` ！！！！
-model = SentenceTransformer(model_dir, trust_remote_code=True).cuda()
-vectors = model.encode(queries, convert_to_numpy=True, normalize_embeddings=True)
-print(vectors.shape)
-print(vectors[:, :4])
-#### method2：transformers
-vector_linear_directory = "2_Dense"
-vector_dim = 8192
 model = AutoModel.from_pretrained(model_dir, trust_remote_code=True).cuda().eval()
 tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
 vector_linear = torch.nn.Linear(in_features=model.config.hidden_size, out_features=vector_dim)
@@ -5508,17 +5538,34 @@ vector_linear_dict = {
 }
 vector_linear.load_state_dict(vector_linear_dict)
 vector_linear.cuda()
 with torch.no_grad():
     input_data = tokenizer(queries, padding="longest", truncation=True, max_length=512, return_tensors="pt")
     input_data = {k: v.cuda() for k, v in input_data.items()}
     attention_mask = input_data["attention_mask"]
     last_hidden_state = model(**input_data)[0]
     last_hidden = last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0)
-    vectors = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
-    vectors = normalize(vector_linear(vectors).cpu().numpy())
-print(vectors.shape)
-print(vectors[:, :4])
 ```
 # FAQ

 # Usage
+You can use `SentenceTransformers` or `transformers` library to encode text.
+## Sentence Transformers
+```python
+from sentence_transformers import SentenceTransformer
+# This model supports two prompts: "s2p_query" and "s2s_query" for sentence-to-passage and sentence-to-sentence tasks, respectively.
+# They are defined in `config_sentence_transformers.json`
+prompt_name = "s2p_query"
+queries = [
+    "What are some ways to reduce stress?",
+    "What are the benefits of drinking green tea?",
+]
+# docs do not need any prompts
+docs = [
+    "There are many effective ways to reduce stress. Some common techniques include deep breathing, meditation, and physical activity. Engaging in hobbies, spending time in nature, and connecting with loved ones can also help alleviate stress. Additionally, setting boundaries, practicing self-care, and learning to say no can prevent stress from building up.",
+    "Green tea has been consumed for centuries and is known for its potential health benefits. It contains antioxidants that may help protect the body against damage caused by free radicals. Regular consumption of green tea has been associated with improved heart health, enhanced cognitive function, and a reduced risk of certain types of cancer. The polyphenols in green tea may also have anti-inflammatory and weight loss properties.",
+]
+# ！The default dimension is 1024, if you need other dimensions, please clone the model and modify `modules.json` to replace `2_Dense_1024` with another dimension, e.g. `2_Dense_256` or `2_Dense_8192` !
+model = SentenceTransformer("infgrad/stella_en_1.5B_v5", trust_remote_code=True).cuda()
+query_embeddings = model.encode(queries, prompt_name=query_prompt_name)
+doc_embeddings = model.encode(docs)
+print(query_embeddings.shape, doc_embeddings.shape)
+# (2, 1024) (2, 1024)
+similarities = model.similarity(query_embeddings, doc_embeddings)
+print(similarities)
+# tensor([[0.8179, 0.2958],
+#         [0.3194, 0.7854]])
+```
+## Transformers
 ```python
 import os
 import torch
 from transformers import AutoModel, AutoTokenizer
 from sklearn.preprocessing import normalize
+query_prompt = "Instruct: Given a web search query, retrieve relevant passages that answer the query.\nQuery: "
+queries = [
+    "What are some ways to reduce stress?",
+    "What are the benefits of drinking green tea?",
+]
+queries = [query_prompt + query for query in queries]
+# docs do not need any prompts
+docs = [
+    "There are many effective ways to reduce stress. Some common techniques include deep breathing, meditation, and physical activity. Engaging in hobbies, spending time in nature, and connecting with loved ones can also help alleviate stress. Additionally, setting boundaries, practicing self-care, and learning to say no can prevent stress from building up.",
+    "Green tea has been consumed for centuries and is known for its potential health benefits. It contains antioxidants that may help protect the body against damage caused by free radicals. Regular consumption of green tea has been associated with improved heart health, enhanced cognitive function, and a reduced risk of certain types of cancer. The polyphenols in green tea may also have anti-inflammatory and weight loss properties.",
+]
+# The path of your model after cloning it
 model_dir = "{Your MODEL_PATH}"
+vector_dim = 1024
+vector_linear_directory = f"2_Dense_{vector_dim}"
 model = AutoModel.from_pretrained(model_dir, trust_remote_code=True).cuda().eval()
 tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
 vector_linear = torch.nn.Linear(in_features=model.config.hidden_size, out_features=vector_dim)
 }
 vector_linear.load_state_dict(vector_linear_dict)
 vector_linear.cuda()
+# Embed the queries
 with torch.no_grad():
     input_data = tokenizer(queries, padding="longest", truncation=True, max_length=512, return_tensors="pt")
     input_data = {k: v.cuda() for k, v in input_data.items()}
     attention_mask = input_data["attention_mask"]
     last_hidden_state = model(**input_data)[0]
     last_hidden = last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0)
+    query_vectors = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
+    query_vectors = normalize(vector_linear(query_vectors).cpu().numpy())
+# Embed the documents
+with torch.no_grad():
+    input_data = tokenizer(docs, padding="longest", truncation=True, max_length=512, return_tensors="pt")
+    input_data = {k: v.cuda() for k, v in input_data.items()}
+    attention_mask = input_data["attention_mask"]
+    last_hidden_state = model(**input_data)[0]
+    last_hidden = last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0)
+    docs_vectors = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
+    docs_vectors = normalize(vector_linear(docs_vectors).cpu().numpy())
+print(query_vectors.shape, docs_vectors.shape)
+# (2, 1024) (2, 1024)
+similarities = query_vectors @ docs_vectors.T
+print(similarities)
+# [[0.8178789  0.2958377 ]
+#  [0.31938642 0.7853526 ]]
 ```
 # FAQ

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "__version__": {
+    "sentence_transformers": "3.0.1",
+    "transformers": "4.42.3",
+    "pytorch": "2.3.1+cu121"
+  },
+  "prompts": {
+    "s2p_query": "Instruct: Given a web search query, retrieve relevant passages that answer the query.\nQuery: ",
+    "s2s_query": "Instruct: Retrieve semantically similar text.\nQuery: "
+  },
+  "default_prompt_name": null,
+  "similarity_fn_name": "cosine"
+}

modules.json CHANGED Viewed

@@ -14,7 +14,7 @@
  {
   "idx": 2,
   "name": "2",
-  "path": "2_Dense",
   "type": "sentence_transformers.models.Dense"
  }
 ]

  {
   "idx": 2,
   "name": "2",
+  "path": "2_Dense_1024",
   "type": "sentence_transformers.models.Dense"
  }
 ]