Hawoly18's picture
Update app.py
a6c0613 verified
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import bitsandbytes as bnb
# Charger le modèle quantifié en 8-bit
tokenizer = AutoTokenizer.from_pretrained("Hawoly18/llama3.2-3B-Wolof")
model = AutoModelForCausalLM.from_pretrained(
"Hawoly18/llama3.2-3B-Wolof",
load_in_8bit=True, # Utilise la quantification en 8-bit
device_map="auto" # Permet l'utilisation automatique des ressources (CPU ici)
)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Fonction pour générer des réponses
def generate_response(question, max_length=512):
input_text = f"Question: {question}\nRéponse:"
input_ids = tokenizer.encode(input_text, return_tensors='pt', padding=True, truncation=True)
attention_mask = input_ids != tokenizer.pad_token_id
with torch.no_grad():
output_ids = model.generate(
input_ids,
max_length=max_length,
attention_mask=attention_mask,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
num_beams=5,
no_repeat_ngram_size=2,
early_stopping=True
)
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
response = response.replace(input_text, "").strip()
return response
# Interface Gradio
import gradio as gr
interface = gr.Interface(
fn=generate_response,
inputs="text",
outputs="text",
title="Model Q&A Interface",
description="Ask a question related to BSE and entrepreneurship!",
examples=[["yan jumtukaay ci xaral yi BSE moom mën a dimbali ndax moom mën woyal sama liggéey ci entrepreneur yi"]]
)
interface.launch(share=True)