File size: 1,716 Bytes

9a5e8b9
 
 
 
 
 
44ee688
 
 
 
 
9a5e8b9
 
f1ad91f
 
9a5e8b9
f1ad91f
 
 
 
 
e8db095
 
 
f1ad91f
e8db095
9a5e8b9
f1ad91f
 
 
 
9a5e8b9
f1ad91f
9a5e8b9

from typing import  Dict, List, Any
from optimum.onnxruntime import ORTModelForSequenceClassification
from transformers import AutoTokenizer
from optimum.pipelines import pipeline


import torch

if torch.backends.cudnn.is_available():
    print("cudnn:", torch.backends.cudnn.version())

class EndpointHandler():
    def __init__(self, path=""):

        on_cuda = torch.cuda.is_available()
        # load the optimized model

        provider = "CPUExecutionProvider"
        if on_cuda:
            provider = "CUDAExecutionProvider"
        
        model = ORTModelForSequenceClassification.from_pretrained(
            path,
            export=False,
            provider=provider,
        )
        tokenizer = AutoTokenizer.from_pretrained(path)
        
        device = -1
        if on_cuda:
            device = 0
        # create inference pipeline
        self.pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer, device=device)


    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
        """
        Args:
            data (:obj:):
                includes the input data and the parameters for the inference.
        Return:
            A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing :
                - "label": A string representing what the label/class is. There can be multiple labels.
                - "score": A score between 0 and 1 describing how confident the model is for this label/class.
        """
        inputs = data.pop("inputs", data)
        parameters = data.pop("parameters", dict())

        prediction = self.pipeline(inputs, **parameters)

        return prediction