MiniCPM-Llama3-V-2_5-int4 / handler_single.py
SwordElucidator's picture
Rename handler.py to handler_single.py
2ee3b1b verified
raw
history blame
1.38 kB
import base64
from io import BytesIO
from typing import Any, List, Dict
from PIL import Image
from transformers import AutoTokenizer, AutoModel
class EndpointHandler():
def __init__(self, path=""):
# Use a pipeline as a high-level helper
model_name = "SwordElucidator/MiniCPM-Llama3-V-2_5-int4"
model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model.eval()
self.model = model
self.tokenizer = tokenizer
def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
inputs = data.pop("inputs", data)
image = inputs.pop("image", None) # base64 image as bytes
question = inputs.pop("question", None)
msgs = inputs.pop("msgs", None)
parameters = data.pop("parameters", {})
image = Image.open(BytesIO(base64.b64decode(image)))
if not msgs:
msgs = [{'role': 'user', 'content': question}]
res = self.model.chat(
image=image,
msgs=msgs,
tokenizer=self.tokenizer,
sampling=True, # if sampling=False, beam_search will be used by default
temperature=parameters.get('temperature', 0.7),
# system_prompt='' # pass system_prompt if needed
)
return res