import os from transformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr from huggingface_hub import login # Load the Hugging Face API token from environment variables hf_token = os.getenv("HF_TOKEN") # Login to huggingface_hub using the token login(token=hf_token) # Model name model_name = "meta-llama/Llama-2-7b-chat-hf" # Load the model and tokenizer with the token tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token) model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=hf_token) # Define the chat function def chat_with_llama2(input_text): inputs = tokenizer(input_text, return_tensors="pt") outputs = model.generate(inputs["input_ids"], max_length=512, do_sample=True, top_p=0.95, top_k=60) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # Create the Gradio interface interface = gr.Interface( fn=chat_with_llama2, inputs="text", outputs="text", title="LLaMa 2 Chat HF", description="Chat with LLaMa 2 model using Hugging Face Transformers and Gradio.", examples=[ ["Hello, LLaMa 2! How are you today?"], ["Can you tell me a joke?"], ["What is the capital of France?"] ] ) # Launch the interface if __name__ == "__main__": interface.launch()