Ravinandan's picture
Update app.py
b918dff verified
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
"qresearch/llama-3.1-8B-vision-378",
trust_remote_code=True,
torch_dtype=torch.float16,
).to("cuda")
tokenizer = AutoTokenizer.from_pretrained("qresearch/llama-3.1-8B-vision-378", use_fast=True)
# Define the function to process the image and instruction
def describe_image(image, instruction):
description = model.answer_question(
image, instruction, tokenizer, max_new_tokens=1000, do_sample=True, temperature=0.3
)
return description
# Create the Gradio interface
interface = gr.Interface(
fn=describe_image,
inputs=[
gr.Image(type="pil"), # Input for the image
gr.Textbox(
placeholder="Enter your instruction here...",
label="Instruction",
lines=10, # Increase number of lines for instruction input
max_lines=20 # Maximum number of lines for scrolling
)
],
outputs=gr.Textbox(
label="Description",
lines=10, # Increase number of lines for output
max_lines=30 # Maximum number of lines for scrolling
),
title="LLaMA 3.1 with Vision",
description="Upload an image and enter an instruction to generate a description based on the provided instruction."
)
# Launch the Gradio app
interface.launch()