from transformers import AutoModelForCausalLM, AutoTokenizer class Handler: def __init__(self, model_path): self.model_path = model_path self.tokenizer = AutoTokenizer.from_pretrained(model_path) self.model = AutoModelForCausalLM.from_pretrained(model_path) self.model.eval() def generate_text(self, prompt, max_length=100, num_return_sequences=1, temperature=0.7): input_ids = self.tokenizer.encode(prompt, return_tensors="pt") generated_ids = self.model.generate( input_ids, max_length=max_length, num_return_sequences=num_return_sequences, temperature=temperature, pad_token_id=self.tokenizer.eos_token_id, ) generated_texts = [self.tokenizer.decode(ids, skip_special_tokens=True) for ids in generated_ids] return generated_texts def __call__(self, request): # Parse the request and extract the necessary information prompt = request["prompt"] max_length = request.get("max_length", 100) num_return_sequences = request.get("num_return_sequences", 1) temperature = request.get("temperature", 0.7) # Generate text based on the prompt and parameters generated_texts = self.generate_text(prompt, max_length, num_return_sequences, temperature) # Prepare the response response = { "generated_texts": generated_texts } return response handler = Handler(".")