AdiaLlama3.1 / app.py
Hawoly18's picture
Update app.py
815fa95 verified
import os
#os.environ['HF_HOME'] = 'E:/huggingface_cache'
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("Hawoly18/Adia_Llama3.1")
model = AutoModelForCausalLM.from_pretrained("Hawoly18/Adia_Llama3.1")
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
# Function to generate responses
def generate_response(question, max_length=512):
input_text = f"Question: {question}\nRéponse:"
input_ids = tokenizer.encode(input_text, return_tensors='pt', padding=True, truncation=True)
attention_mask = input_ids != tokenizer.pad_token_id
with torch.no_grad():
output_ids = model.generate(
input_ids,
max_length=max_length,
attention_mask=attention_mask,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
num_beams=5, # Beam search for better quality
no_repeat_ngram_size=2, # Prevent n-gram repetition
early_stopping=True
)
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
response = response.replace(input_text, "").strip()
return response
# Define the Gradio interface
interface = gr.Interface(
fn=generate_response,
inputs="text",
outputs="text",
title="Model Q&A Interface",
description="Ask a question related to BSE and entrepreneurship!",
)
# Launch the interface
interface.launch(share=True)