yilunzhang commited on
Commit
31faf00
1 Parent(s): 44b10fe

Upload 2 files

Browse files
Files changed (2) hide show
  1. handler.py +40 -0
  2. requirements.txt +3 -0
handler.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Any
2
+
3
+ import torch
4
+ import torch.nn.functional as F
5
+ from transformers import AutoTokenizer
6
+ from optimum.onnxruntime import ORTModelForFeatureExtraction
7
+
8
+
9
+ def mean_pooling(model_output, attention_mask):
10
+ token_embeddings = model_output[0]
11
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
12
+ return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
13
+
14
+
15
+ class EndpointHandler():
16
+
17
+ def __init__(self, path=""):
18
+ self.tokenizer = AutoTokenizer.from_pretrained(path)
19
+ self.model = ORTModelForFeatureExtraction.from_pretrained(path, file_name="model.onnx")
20
+
21
+ def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
22
+ """
23
+ Args:
24
+ data (:obj:):
25
+ includes the input data and the parameters for the inference.
26
+ Return:
27
+ A :obj:`list`:. The list contains the embeddings of the inference inputs
28
+ """
29
+ inputs = data.get("inputs", data)
30
+
31
+ # tokenize the input
32
+ encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, return_tensors='pt')
33
+ # run the model
34
+ outputs = self.model(**encoded_inputs)
35
+ # Perform pooling
36
+ sentence_embeddings = mean_pooling(outputs, encoded_inputs['attention_mask'])
37
+ # Normalize embeddings
38
+ sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
39
+ # postprocess the prediction
40
+ return {"embeddings": sentence_embeddings.tolist()}
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ optimum[onnxruntime]
2
+ mkl-include
3
+ mkl