import os #os.environ['HF_HOME'] = 'E:/huggingface_cache' import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load the model and tokenizer tokenizer = AutoTokenizer.from_pretrained("Hawoly18/Adia_Llama3.1") model = AutoModelForCausalLM.from_pretrained("Hawoly18/Adia_Llama3.1") if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token # Function to generate responses def generate_response(question, max_length=512): input_text = f"Question: {question}\nRéponse:" input_ids = tokenizer.encode(input_text, return_tensors='pt', padding=True, truncation=True) attention_mask = input_ids != tokenizer.pad_token_id with torch.no_grad(): output_ids = model.generate( input_ids, max_length=max_length, attention_mask=attention_mask, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, num_beams=5, # Beam search for better quality no_repeat_ngram_size=2, # Prevent n-gram repetition early_stopping=True ) response = tokenizer.decode(output_ids[0], skip_special_tokens=True) response = response.replace(input_text, "").strip() return response # Define the Gradio interface interface = gr.Interface( fn=generate_response, inputs="text", outputs="text", title="Model Q&A Interface", description="Ask a question related to BSE and entrepreneurship!", ) # Launch the interface interface.launch(share=True)