Spaces:
Runtime error
Runtime error
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import gradio as gr | |
# Load the model and tokenizer | |
model = AutoModelForCausalLM.from_pretrained( | |
"qresearch/llama-3.1-8B-vision-378", | |
trust_remote_code=True, | |
torch_dtype=torch.float16, | |
).to("cuda") | |
tokenizer = AutoTokenizer.from_pretrained("qresearch/llama-3.1-8B-vision-378", use_fast=True) | |
# Define the function to process the image and instruction | |
def describe_image(image, instruction): | |
description = model.answer_question( | |
image, instruction, tokenizer, max_new_tokens=1000, do_sample=True, temperature=0.3 | |
) | |
return description | |
# Create the Gradio interface | |
interface = gr.Interface( | |
fn=describe_image, | |
inputs=[ | |
gr.Image(type="pil"), # Input for the image | |
gr.Textbox( | |
placeholder="Enter your instruction here...", | |
label="Instruction", | |
lines=10, # Increase number of lines for instruction input | |
max_lines=20 # Maximum number of lines for scrolling | |
) | |
], | |
outputs=gr.Textbox( | |
label="Description", | |
lines=10, # Increase number of lines for output | |
max_lines=30 # Maximum number of lines for scrolling | |
), | |
title="LLaMA 3.1 with Vision", | |
description="Upload an image and enter an instruction to generate a description based on the provided instruction." | |
) | |
# Launch the Gradio app | |
interface.launch() | |