Spaces:

Exched
/

meta-llama-Llama-2-7b-chat-hf

Runtime error

Update app.py

a33cc3d verified 4 months ago

1.23 kB

	import os
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import gradio as gr

	# Load the Hugging Face API token from environment variables
	hf_token = os.getenv("HF_TOKEN")

	# Model name
	model_name = "meta-llama/Llama-2-7b-chat-hf"

	# Load the model and tokenizer with the token
	tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
	model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=hf_token)

	# Define the chat function
	def chat_with_llama2(input_text):
	inputs = tokenizer(input_text, return_tensors="pt")
	outputs = model.generate(inputs["input_ids"], max_length=512, do_sample=True, top_p=0.95, top_k=60)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return response

	# Create the Gradio interface
	interface = gr.Interface(
	fn=chat_with_llama2,
	inputs="text",
	outputs="text",
	title="LLaMa 2 Chat HF",
	description="Chat with LLaMa 2 model using Hugging Face Transformers and Gradio.",
	examples=[
	["Hello, LLaMa 2! How are you today?"],
	["Can you tell me a joke?"],
	["What is the capital of France?"]
	]
	)

	# Launch the interface
	if __name__ == "__main__":
	interface.launch()