Spaces:

Ravinandan
/

llama_3.1_with_vision

Runtime error

Update app.py

b918dff verified 2 months ago

1.44 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import gradio as gr

	# Load the model and tokenizer
	model = AutoModelForCausalLM.from_pretrained(
	"qresearch/llama-3.1-8B-vision-378",
	trust_remote_code=True,
	torch_dtype=torch.float16,
	).to("cuda")

	tokenizer = AutoTokenizer.from_pretrained("qresearch/llama-3.1-8B-vision-378", use_fast=True)

	# Define the function to process the image and instruction
	def describe_image(image, instruction):
	description = model.answer_question(
	image, instruction, tokenizer, max_new_tokens=1000, do_sample=True, temperature=0.3
	)
	return description

	# Create the Gradio interface
	interface = gr.Interface(
	fn=describe_image,
	inputs=[
	gr.Image(type="pil"), # Input for the image
	gr.Textbox(
	placeholder="Enter your instruction here...",
	label="Instruction",
	lines=10, # Increase number of lines for instruction input
	max_lines=20 # Maximum number of lines for scrolling
	)
	],
	outputs=gr.Textbox(
	label="Description",
	lines=10, # Increase number of lines for output
	max_lines=30 # Maximum number of lines for scrolling
	),
	title="LLaMA 3.1 with Vision",
	description="Upload an image and enter an instruction to generate a description based on the provided instruction."
	)

	# Launch the Gradio app
	interface.launch()