How to use onnx model ? I need to accerlate the embedding
#6
by
machine1116
- opened
I need a python code about how to use onnx model , anybody can give me a example code,thanks,my friends
hi @machine1116 :
from transformers import AutoTokenizer, AutoModel
from onnxruntime import InferenceSession
import torch
from numpy.linalg import norm
cos_sim = lambda a,b: (a @ b.T) / (norm(a)*norm(b))
tokenizer = AutoTokenizer.from_pretrained('jinaai/jina-embeddings-v2-base-zh', trust_remote_code=True)
session = InferenceSession(your-downloaded-onnx_model_path)
sentence_pairs = [
('how is the weather today', 'What is the current weather like today?'),
('I went to school today', 'where did you go this morning ?'),
('did you buy another iphone', 'question about buying a new apple device'),
('what did you learn at schole', 'coding and programming'),
]
def embed_onnx(text):
# ONNX Runtime expects NumPy arrays as input
inputs = tokenizer(text, return_tensors="np")
outputs = session.run(output_names=["last_hidden_state"], input_feed=dict(inputs))
def mean_pooling(model_output, attention_mask):
token_embeddings = model_output
input_mask_expanded = (
attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
)
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
input_mask_expanded.sum(1), min=1e-9
)
embeddings = mean_pooling(torch.from_numpy(outputs[0]), torch.from_numpy(inputs['attention_mask']))
return embeddings
for sentences in sentence_pairs:
score = float(cos_sim(embed_onnx(sentences[0]), embed_onnx(sentences[1]))[0][0])
If someone encounters this error when using the script from @bwang0911
onnxruntime.capi.onnxruntime_pybind11_state.InvalidArgument: [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Unexpected input data type. Actual: (tensor(string)) , expected: (tensor(int64))
You might need to convert the values in inputs
to exptected dtype (tensor(int64))
:
inputs = self.tokenizer(text, return_tensors="np")
inputs = { name: np.array(tensor, dtype=np.int64) for name, tensor in inputs.items() } # Add this line
And here is my example script which enables both original model and the onnx model:
import os
import numpy as np
import torch
from pathlib import Path
from typing import Union
from huggingface_hub import hf_hub_download
from numpy.linalg import norm
from onnxruntime import InferenceSession
from tclogger import logger
from transformers import AutoTokenizer, AutoModel
from configs.envs import ENVS
from configs.constants import AVAILABLE_MODELS
if ENVS["HF_ENDPOINT"]:
os.environ["HF_ENDPOINT"] = ENVS["HF_ENDPOINT"]
os.environ["HF_TOKEN"] = ENVS["HF_TOKEN"]
def cosine_similarity(a, b):
return (a @ b.T) / (norm(a) * norm(b))
class JinaAIOnnxEmbedder:
"""https://huggingface.co/jinaai/jina-embeddings-v2-base-zh/discussions/6#65bc55a854ab5eb7b6300893"""
def __init__(self):
self.repo_name = "jinaai/jina-embeddings-v2-base-zh"
self.download_model()
self.load_model()
def download_model(self):
self.onnx_folder = Path(__file__).parent
self.onnx_filename = "onnx/model_quantized.onnx"
self.onnx_path = self.onnx_folder / self.onnx_filename
if not self.onnx_path.exists():
logger.note("> Downloading ONNX model")
hf_hub_download(
repo_id=self.repo_name,
filename=self.onnx_filename,
local_dir=self.onnx_folder,
local_dir_use_symlinks=False,
)
logger.success(f"+ ONNX model downloaded: {self.onnx_path}")
else:
logger.success(f"+ ONNX model loaded: {self.onnx_path}")
def load_model(self):
self.tokenizer = AutoTokenizer.from_pretrained(
self.repo_name, trust_remote_code=True
)
self.session = InferenceSession(self.onnx_path)
def mean_pooling(self, model_output, attention_mask):
token_embeddings = model_output
input_mask_expanded = (
attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
)
return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
input_mask_expanded.sum(1), min=1e-9
)
def encode(self, text: str):
inputs = self.tokenizer(text, return_tensors="np")
inputs = {
name: np.array(tensor, dtype=np.int64) for name, tensor in inputs.items()
}
outputs = self.session.run(
output_names=["last_hidden_state"], input_feed=dict(inputs)
)
embeddings = self.mean_pooling(
torch.from_numpy(outputs[0]), torch.from_numpy(inputs["attention_mask"])
)
return embeddings
class JinaAIEmbedder:
def __init__(self, model_name: str = AVAILABLE_MODELS[0]):
self.model_name = model_name
self.load_model()
def check_model_name(self):
if self.model_name not in AVAILABLE_MODELS:
self.model_name = AVAILABLE_MODELS[0]
return True
def load_model(self):
self.check_model_name()
self.model = AutoModel.from_pretrained(self.model_name, trust_remote_code=True)
def switch_model(self, model_name: str):
if model_name != self.model_name:
self.model_name = model_name
self.load_model()
def encode(self, text: Union[str, list[str]]):
if isinstance(text, str):
text = [text]
return self.model.encode(text)
if __name__ == "__main__":
# embedder = JinaAIEmbedder()
embedder = JinaAIOnnxEmbedder()
texts = ["How is the weather today?", "今天天气怎么样?"]
embeddings = []
for text in texts:
embeddings.append(embedder.encode(text))
logger.success(embeddings)
print(cosine_similarity(embeddings[0], embeddings[1]))
If you would like to use this script, be aware of replacing the following part with your environment config functions, which is aimed to setting envs and constants:
from configs.envs import ENVS
from configs.constants import AVAILABLE_MODELS
if ENVS["HF_ENDPOINT"]:
os.environ["HF_ENDPOINT"] = ENVS["HF_ENDPOINT"]
os.environ["HF_TOKEN"] = ENVS["HF_TOKEN"]
The format of AVAILABLE_MODELS
is like:
AVAILABLE_MODELS = [ "jinaai/jina-embeddings-v2-base-zh" ]
thanks
machine1116
changed discussion status to
closed
machine1116
changed discussion status to
open
machine1116
changed discussion status to
closed
machine1116
changed discussion status to
open