|
from typing import Dict, List, Any |
|
import torch |
|
from accelerate import Accelerator |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import numpy as np |
|
|
|
|
|
def softmax(x): |
|
z = x - max(x) |
|
numerator = np.exp(z) |
|
denominator = np.sum(numerator) |
|
softmax = numerator/denominator |
|
return softmax |
|
|
|
class EndpointHandler(): |
|
def __init__(self, path=""): |
|
self.accelerator = Accelerator() |
|
self.device = self.accelerator.device |
|
self.model = AutoModelForCausalLM.from_pretrained(path, trust_remote_code=True, device_map="auto") |
|
self.model = self.accelerator.prepare(self.model) |
|
self.tokenizer = AutoTokenizer.from_pretrained(path) |
|
self.options_tokens = [self.tokenizer.encode(choice)[-1] for choice in ["A", "B", "C", "D"]] |
|
|
|
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: |
|
""" |
|
data args: |
|
inputs (:obj: `str` | `PIL.Image` | `np.array`) |
|
kwargss |
|
Return: |
|
A :obj:`list` | `dict`: will be serialized and returned |
|
""" |
|
with torch.no_grad(): |
|
prompt = data.pop("prompt") |
|
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device) |
|
input_size = inputs['input_ids'].size(1) |
|
input_ids = inputs["input_ids"].to(self.device) |
|
inputs.pop("token_type_ids") |
|
outputs = self.model(**inputs) |
|
last_token_logits = outputs.logits[:, -1, :] |
|
options_tokens_logits = last_token_logits[:, self.options_tokens].detach().cpu().numpy() |
|
conf = softmax(options_tokens_logits[0]) |
|
pred = np.argmax(options_tokens_logits[0]) |
|
return [{"pred": pred, "conf":conf}] |