|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import torch |
|
import os |
|
import subprocess |
|
import sys |
|
|
|
|
|
from transformers import WhisperForConditionalGeneration, pipeline |
|
|
|
|
|
|
|
def install(package): |
|
subprocess.check_call([sys.executable, "-m", "pip", "install", package]) |
|
|
|
|
|
subprocess.run(["pip", "install", "--upgrade", "pip"], check=True) |
|
subprocess.run(["pip", "install", "--upgrade", "huggingface_hub"], check=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import pip |
|
|
|
|
|
|
|
class EndpointHandler: |
|
def __init__(self, model_dir): |
|
self.model = None |
|
self.tokenizer = None |
|
self.model_dir = model_dir |
|
self.load_model(model_dir) |
|
|
|
def load_model(self, model_dir): |
|
|
|
|
|
model_id = self.model_dir |
|
token = os.getenv("HF_API_TOKEN") |
|
|
|
self.model = AutoModelForCausalLM.from_pretrained(model_id, token=token, from_tf=True) |
|
self.tokenizer = AutoTokenizer.from_pretrained(model_id, token=token) |
|
|
|
def predict(self, inputs): |
|
tokens = self.tokenizer(inputs, return_tensors="pt") |
|
with torch.no_grad(): |
|
outputs = self.model.generate(**tokens) |
|
return self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
try: |
|
|
|
pip.main(['install', 'accelerate']) |
|
|
|
|
|
pip.main(['install', '-i', 'https://pypi.org/simple/', 'bitsandbytes']) |
|
except Exception as e: |
|
print(f"Error installing dependencies: {e}") |
|
|
|
model_dir = "NiCETmtm/Llama3_kw_gen_new" |
|
handler = EndpointHandler(model_dir) |
|
handler.load_model() |
|
|
|
|
|
def inference(event, context): |
|
inputs = event["data"] |
|
outputs = handler.predict(inputs) |
|
return {"predictions": outputs} |
|
|