import gradio as gr import spaces import time import os import transformers from transformers import pipeline import torch key = (os.getenv('API_KEY')) model_id = "meta-llama/Meta-Llama-3-8B-Instruct" pipeline = transformers.pipeline( "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto", token = key ) messages = [ {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"}, {"role": "user", "content": "Who are you?"}, ] terminators = [ pipeline.tokenizer.eos_token_id, pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") ] outputs = pipeline( messages, max_new_tokens=256, eos_token_id=terminators, do_sample=True, temperature=0.6, top_p=0.9, ) @spaces.GPU(duration=240) # Fonction de génération de texte def generate_text(prompt): inputs = tokenizer(prompt, return_tensors="pt") response_ids = model.generate(inputs.input_ids) response_text = tokenizer.decode(response_ids[0], skip_special_tokens=True) return response_text # Définir une fonction pour l'interface de chat def chatbot(message, history): return generate_text(message) gr.ChatInterface(chatbot).launch()