how to export onnx format?
#12
by
nampham1106
- opened
I'm try export to onnx format but optimum not support new architecture. please help me
Yes, the new GTE model architecture is not yet supported in Optimum. We currently do not have the bandwidth to push for the integration of the new GTE framework into Optimum, but we will address this issue later.
Convert to onnx:
import torch
from transformers import AutoModelForTokenClassification, AutoTokenizer
model_original = AutoModelForTokenClassification.from_pretrained('Alibaba-NLP/gte-multilingual-base', trust_remote_code=True)
tokenizer_original = AutoTokenizer.from_pretrained('Alibaba-NLP/gte-multilingual-base')
model_path = "gte-multilingual-base-onnx"
dummy_model_input = tokenizer_original("This is a test for ONNX Runtime!", return_tensors="pt")
torch.onnx.export(
model_original,
tuple(dummy_model_input.values()),
f"{model_path}/model.onnx",
input_names=['input_ids', 'attention_mask'],
output_names=['last_hidden_state'],
dynamic_axes={'input_ids': {0: 'batch_size', 1: 'sequence'},
'attention_mask': {0: 'batch_size', 1: 'sequence'},
'last_hidden_state': {0: 'batch_size', 1: 'sequence'}
},
do_constant_folding=True,
opset_version=14,
)
#let's also save the tokenizer
tokenizer_original.save_pretrained(f"{model_path}/tokenizer")
Now for inference:
import onnxruntime as ort
import numpy as np
from tokenizers import Tokenizer
import os
from typing import Dict, Tuple
class GTEOnnxModel:
def __init__(self, model_path: str, qt: str = None):
"""
Onnx model for GTE
Parameters:
model_path: str - path to the model folder
"""
opt = ort.SessionOptions()
opt.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
opt.log_severity_level = 3
opt.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
self.sess = ort.InferenceSession(os.path.join(model_path, "model.onnx"), opt, providers=["CPUExecutionProvider"])
self.tokenizer = Tokenizer.from_file(os.path.join(model_path, "tokenizer/tokenizer.json"))
def encode(self, text: str) -> Tuple[np.ndarray, Dict[str, float], np.ndarray]:
encoded_inputs = self.tokenizer.encode(text, add_special_tokens=True)
encoded_inputs = {'input_ids': encoded_inputs.ids, 'attention_mask': encoded_inputs.attention_mask}
model_input = {name: np.atleast_2d(value) for name, value in encoded_inputs.items()}
emb = self.sess.run(None, model_input)
# get the CLS embedding and normalize it
dense = emb[1][0,0]
dense = dense / np.linalg.norm(dense)
# get the token weights
tk_weights = emb[0][0,:,0][1:-1]
token_weights = {tk_id:tk_weights[i] for i, tk_id in enumerate(encoded_inputs['input_ids'][1:-1])}
return dense,token_weights
model_onnx = GTEOnnxModel("gte-multilingual-base-onnx")
emb, weights = model_onnx.encode("This is a test for ONNX Runtime!")