|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
class Handler: |
|
def __init__(self, model_path): |
|
self.model_path = model_path |
|
self.tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
self.model = AutoModelForCausalLM.from_pretrained(model_path) |
|
self.model.eval() |
|
|
|
def generate_text(self, prompt, max_length=100, num_return_sequences=1, temperature=0.7): |
|
input_ids = self.tokenizer.encode(prompt, return_tensors="pt") |
|
|
|
generated_ids = self.model.generate( |
|
input_ids, |
|
max_length=max_length, |
|
num_return_sequences=num_return_sequences, |
|
temperature=temperature, |
|
pad_token_id=self.tokenizer.eos_token_id, |
|
) |
|
|
|
generated_texts = [self.tokenizer.decode(ids, skip_special_tokens=True) for ids in generated_ids] |
|
return generated_texts |
|
|
|
def __call__(self, request): |
|
|
|
prompt = request["prompt"] |
|
max_length = request.get("max_length", 100) |
|
num_return_sequences = request.get("num_return_sequences", 1) |
|
temperature = request.get("temperature", 0.7) |
|
|
|
|
|
generated_texts = self.generate_text(prompt, max_length, num_return_sequences, temperature) |
|
|
|
|
|
response = { |
|
"generated_texts": generated_texts |
|
} |
|
|
|
return response |
|
|
|
handler = Handler(".") |