|
import cv2 |
|
import numpy as np |
|
import gradio as gr |
|
import requests |
|
|
|
|
|
config_file = "ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt" |
|
frozen_model = "frozen_inference_graph.pb" |
|
|
|
|
|
model = cv2.dnn.DetectionModel(frozen_model, config_file) |
|
model.setInputSize(320, 320) |
|
model.setInputScale(1.0 / 127.5) |
|
model.setInputMean((127.5, 127.5, 127.5)) |
|
model.setInputSwapRB(True) |
|
model.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) |
|
model.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA) |
|
|
|
|
|
classLabels = [] |
|
with open('labels.txt', 'rt') as f: |
|
classLabels = f.read().rstrip('\n').split('\n') |
|
|
|
def detect_objects(frame): |
|
""" |
|
Detect objects in a single frame and return their coordinates and names. |
|
:param frame: Input image/frame |
|
:return: List of detected objects with coordinates and names |
|
""" |
|
detections = [] |
|
|
|
|
|
ClassIndex, confidence, bbox = model.detect(frame, confThreshold=0.55) |
|
|
|
if len(ClassIndex) != 0: |
|
for ClassInd, conf, boxes in zip(ClassIndex.flatten(), confidence.flatten(), bbox): |
|
if ClassInd <= 80: |
|
x, y, w, h = boxes |
|
detected_object = { |
|
"name": classLabels[ClassInd - 1], |
|
"coordinates": { |
|
"x": int(x), |
|
"y": int(y), |
|
"width": int(w), |
|
"height": int(h) |
|
} |
|
} |
|
detections.append(detected_object) |
|
|
|
return detections |
|
|
|
def get_image_from_url(url): |
|
response = requests.get(url) |
|
image = np.asarray(bytearray(response.content), dtype="uint8") |
|
image = cv2.imdecode(image, cv2.IMREAD_COLOR) |
|
return image |
|
|
|
def detect_objects_in_image_url(url): |
|
frame = get_image_from_url(url) |
|
detected_objects = detect_objects(frame) |
|
return detected_objects |
|
|
|
|
|
iface = gr.Interface( |
|
fn=detect_objects_in_image_url, |
|
inputs="text", |
|
outputs=gr.JSON(), |
|
title="Object Detection", |
|
description="Enter an image URL to detect objects. The detected objects will be returned as JSON." |
|
) |
|
|
|
|
|
iface.launch() |
|
|