Spaces:
Runtime error
Runtime error
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
import bitsandbytes as bnb | |
# Charger le modèle quantifié en 8-bit | |
tokenizer = AutoTokenizer.from_pretrained("Hawoly18/llama3.2-3B-Wolof") | |
model = AutoModelForCausalLM.from_pretrained( | |
"Hawoly18/llama3.2-3B-Wolof", | |
load_in_8bit=True, # Utilise la quantification en 8-bit | |
device_map="auto" # Permet l'utilisation automatique des ressources (CPU ici) | |
) | |
if tokenizer.pad_token is None: | |
tokenizer.pad_token = tokenizer.eos_token | |
# Fonction pour générer des réponses | |
def generate_response(question, max_length=512): | |
input_text = f"Question: {question}\nRéponse:" | |
input_ids = tokenizer.encode(input_text, return_tensors='pt', padding=True, truncation=True) | |
attention_mask = input_ids != tokenizer.pad_token_id | |
with torch.no_grad(): | |
output_ids = model.generate( | |
input_ids, | |
max_length=max_length, | |
attention_mask=attention_mask, | |
pad_token_id=tokenizer.eos_token_id, | |
eos_token_id=tokenizer.eos_token_id, | |
num_beams=5, | |
no_repeat_ngram_size=2, | |
early_stopping=True | |
) | |
response = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
response = response.replace(input_text, "").strip() | |
return response | |
# Interface Gradio | |
import gradio as gr | |
interface = gr.Interface( | |
fn=generate_response, | |
inputs="text", | |
outputs="text", | |
title="Model Q&A Interface", | |
description="Ask a question related to BSE and entrepreneurship!", | |
examples=[["yan jumtukaay ci xaral yi BSE moom mën a dimbali ndax moom mën woyal sama liggéey ci entrepreneur yi"]] | |
) | |
interface.launch(share=True) | |