llama3.2 / app.py
arjunanand13's picture
Update app.py
5d15a12 verified
import torch
from PIL import Image
from transformers import AutoProcessor, AutoModelForPreTraining
import gradio as gr
import json
import traceback
import os
import re
model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
token = os.getenv("HUGGINGFACE_TOKEN").strip()
processor = AutoProcessor.from_pretrained(model_name, token=token)
model = AutoModelForPreTraining.from_pretrained(
model_name,
quantization_config={"load_in_4bit": True},
token=token
)
if torch.cuda.is_available():
model = model.to('cuda')
def analyze_image(image, prompt):
messages = [
{"role": "user", "content": [
{"type": "image"},
{"type": "text", "text": prompt}
]}
]
input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(
image,
input_text,
add_special_tokens=False,
return_tensors="pt"
).to(model.device)
with torch.no_grad():
output = model.generate(**inputs, max_new_tokens=100)
full_response = processor.decode(output[0])
try:
# Find all JSON-like structures in the response
json_matches = list(re.finditer(r'\{.*?\}', full_response, re.DOTALL))
if json_matches:
# Take the last match
last_json_str = json_matches[-1].group(0)
try:
processed_json = json.loads(last_json_str)
except json.JSONDecodeError as e:
processed_json = {"error": f"Invalid JSON in model output: {e}", "full_response": full_response}
else:
processed_json = {"error": "No JSON found in model output", "full_response": full_response}
except Exception as e:
processed_json = {"error": str(e), "full_response": full_response}
return full_response, processed_json
default_prompt = """Analyze this image and determine if it contains a data logger. A data logger is typically a small, black electronic device used to monitor and record data over time, such as voltage, temperature, or current, via external sensors.
Carefully examine the image and provide a detailed response. If a data logger is present in the image, respond with:
{"present": true, "reason": "Detailed explanation of why you believe it's a data logger, including specific visual cues you've identified"}
If no data logger is visible, respond with:
{"present": false, "reason": "Detailed explanation of why you believe there's no data logger, describing what you see instead"}
Ensure your response is in valid JSON format """
iface = gr.Interface(
fn=analyze_image,
inputs=[
gr.Image(type="pil", label="Upload Image"),
gr.Textbox(label="Prompt", value=default_prompt, lines=10)
],
outputs=[
gr.Textbox(label="Full Response", lines=10),
gr.JSON(label="Processed JSON")
],
title="Llama 3.2 Vision",
cache_examples=False,
description=" ",
examples=[
["bad.png", default_prompt]
]
)
iface.launch()
# import torch
# from PIL import Image
# from transformers import AutoProcessor, AutoModelForPreTraining
# import gradio as gr
# import json
# import traceback
# import os
# import re
# model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct"
# token = os.getenv("HUGGINGFACE_TOKEN").strip()
# processor = AutoProcessor.from_pretrained(model_name, token=token)
# model = AutoModelForPreTraining.from_pretrained(
# model_name,
# quantization_config={"load_in_4bit": True},
# token=token
# )
# if torch.cuda.is_available():
# model = model.to('cuda')
# def analyze_image(image, prompt):
# messages = [
# {"role": "user", "content": [
# {"type": "image"},
# {"type": "text", "text": prompt}
# ]}
# ]
# input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
# inputs = processor(
# image,
# input_text,
# add_special_tokens=False,
# return_tensors="pt"
# ).to(model.device)
# with torch.no_grad():
# output = model.generate(**inputs, max_new_tokens=100)
# full_response = processor.decode(output[0])
# print("Full response:", full_response) # Debug print
# # return full_response
# try:
# json_match = re.search(r'\{.*?\}', full_response, re.DOTALL)
# if json_match:
# json_str = json_match.group(0)
# try:
# return json.loads(json_str)
# except json.JSONDecodeError as e:
# print(f"JSON decode error: {e}")
# return {"error": "Invalid JSON in model output", "full_response": full_response}
# else:
# return {"error": "No JSON found in model output", "full_response": full_response}
# except Exception as e:
# print(f"Error in analyze_image: {e}")
# return {"Full Response": str(e), "full_response": full_response}
# default_prompt = """Analyze this image and determine if it contains a data logger.
# A data logger is typically a small, black electronic device used to monitor and record data
# over time, such as voltage, temperature, or current, via external sensors.
# If a data logger is present in the image, respond with:
# {"present": true, "reason": "Brief explanation of why you believe it's a data logger"}
# If no data logger is visible, respond with:
# {"present": false, "reason": "Brief explanation of why you believe there's no data logger"}
# Ensure your response is in valid JSON format."""
# iface = gr.Interface(
# fn=analyze_image,
# inputs=[
# gr.Image(type="pil", label="Upload Image"),
# gr.Textbox(label="Prompt", value=default_prompt, lines=10)
# ],
# outputs=gr.JSON(label="Analysis Result"),
# title="Data Logger Detection using Llama 3.2 Vision",
# description="Upload an image and customize the prompt to check if it contains a data logger.",
# examples=[
# ["bad.png", default_prompt]
# ]
# )
# iface.launch()