✨ InsightVision: Detect, Analyze, Summarize ✨

import gradio as gr
import cv2
import torch
import numpy as np
from PIL import Image

# Load the YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

# Function to run inference on an image
def run_inference(image):
    # Convert the image from PIL format to a format compatible with OpenCV
    image = np.array(image)

    # Run YOLOv5 inference
    results = model(image)

    # Convert the annotated image from BGR to RGB for display
    annotated_image = results.render()[0]
    annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)

    return Image.fromarray(annotated_image)

# Function to generate a summary for the detected objects
def generate_summary(image):
    results = model(image)
    detected_objects = results.pandas().xyxy[0]
    summary = "Detected objects:\n\n"
    for idx, obj in detected_objects.iterrows():
        summary += f"- {obj['name']} with confidence {obj['confidence']:.2f}\n"
    return summary

# Function to generate a scene description based on the summary
def generate_scene_description(summary):
    if "person" in summary.lower():
        return "This scene might involve people interacting or a social gathering."
    elif "car" in summary.lower() or "truck" in summary.lower():
        return "This could be a street scene or a transportation-related scenario."
    elif "dog" in summary.lower() or "cat" in summary.lower():
        return "This appears to involve pets or animals, possibly in a domestic or outdoor setting."
    else:
        return "This scene involves various objects. It could be a dynamic or static environment."

# Create the Gradio interface with improved UI
with gr.Blocks(css="""
    body {
        font-family: 'Poppins', sans-serif;
        margin: 0;
        background: linear-gradient(135deg, #3D52A0, #7091E6, #8697C4, #ADBBDA, #EDE8F5);
        background-size: 400% 400%;
        animation: gradient-animation 15s ease infinite;
        color: #FFFFFF;
    }
    @keyframes gradient-animation {
        0% { background-position: 0% 50%; }
        50% { background-position: 100% 50%; }
        100% { background-position: 0% 50%; }
    }
    h1 {
        text-align: center;
        color: #FFFFFF;
        font-size: 2.5em;
        font-weight: bold;
        margin-bottom: 0.5em;
        text-shadow: 2px 2px 5px rgba(0, 0, 0, 0.3);
    }
    footer {
        text-align: center;
        margin-top: 20px;
        padding: 10px;
        font-size: 1em;
        color: #FFFFFF;
        background: rgba(61, 82, 160, 0.8);
        border-radius: 8px;
    }
    .gr-button {
        font-size: 1em;
        padding: 12px 24px;
        background-color: #7091E6;
        color: #FFFFFF;
        border: none;
        border-radius: 5px;
        transition: all 0.3s ease-in-out;
    }
    .gr-button:hover {
        background-color: #8697C4;
        transform: scale(1.05);
        box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2);
    }
    .gr-box {
        background: rgba(255, 255, 255, 0.1);
        border: 1px solid rgba(255, 255, 255, 0.3);
        border-radius: 10px;
        padding: 15px;
        box-shadow: 0 4px 10px rgba(0, 0, 0, 0.3);
        color: #FFFFFF;
    }
""") as demo:
    with gr.Row():
        gr.Markdown("<h1>✨ InsightVision: Detect, Analyze, Summarize ✨</h1>")

    with gr.Row():
        with gr.Column(scale=2):
            image_input = gr.Image(label="Upload Image", type="pil", elem_classes="gr-box")
            detect_button = gr.Button("Run Detection", elem_classes="gr-button")
        with gr.Column(scale=3):
            annotated_image_output = gr.Image(label="Detected Image", type="pil", elem_classes="gr-box")
            summary_output = gr.Textbox(label="Detection Summary", lines=10, interactive=False, elem_classes="gr-box")
            scene_description_output = gr.Textbox(label="Scene Description", lines=5, interactive=False, elem_classes="gr-box")
    
    # Actions for buttons
    def detect_and_process(image):
        annotated_image = run_inference(image)
        summary = generate_summary(np.array(image))
        scene_description = generate_scene_description(summary)
        return annotated_image, summary, scene_description
    
    detect_button.click(
        fn=detect_and_process,
        inputs=[image_input],
        outputs=[annotated_image_output, summary_output, scene_description_output]
    )

    gr.Markdown("<footer>Made with ❤️ using Gradio and YOLOv5 | © 2024 InsightVision</footer>")

# Launch the interface
demo.launch()