Spaces:
Sleeping
Sleeping
File size: 3,180 Bytes
780389c 8b2cbe6 1e8e71b ccc35d4 082831f 8fbff22 8b2cbe6 385e56e 082831f 1e8e71b 082831f 8fbff22 082831f 790227b ccc35d4 790227b ccc35d4 082831f ccc35d4 1e8e71b 66947f7 385e56e 082831f 385e56e 790227b 082831f 790227b 082831f 385e56e 082831f 8b2cbe6 790227b ccc35d4 8b2cbe6 ccc35d4 8b2cbe6 66947f7 8b2cbe6 790227b 8b2cbe6 66947f7 8b2cbe6 790227b 66947f7 082831f 790227b 66947f7 790227b ccc35d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import spaces
import gradio as gr
import cv2
import tempfile
from PIL import Image, ImageDraw, ImageFont
from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
import torch
import requests
image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd", torch_dtype=torch.float16).to("cuda")
model = torch.compile(model, mode="reduce-overhead")
# Compile by running inference
url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
image = Image.open(requests.get(url, stream=True).raw)
inputs = image_processor(images=image, return_tensors="pt").to("cuda", torch.float16)
with torch.no_grad():
outputs = model(**inputs)
def draw_bounding_boxes(image, results, model, threshold=0.3):
draw = ImageDraw.Draw(image)
for result in results:
for score, label_id, box in zip(
result["scores"], result["labels"], result["boxes"]
):
if score > threshold:
label = model.config.id2label[label_id.item()]
box = [round(i) for i in box.tolist()]
draw.rectangle(box, outline="red", width=3)
draw.text((box[0], box[1]), f"{label}: {score:.2f}", fill="red")
return image
import time
@spaces.GPU
def inference(image, conf_threshold):
inputs = image_processor(images=image, return_tensors="pt")
start = time.time()
with torch.no_grad():
outputs = model(**inputs)
results = image_processor.post_process_object_detection(
outputs, target_sizes=torch.tensor([image.size[::-1]]), threshold=conf_threshold
)
end = time.time()
print("time: ", end - start)
bbs = draw_bounding_boxes(image, results, model, threshold=conf_threshold)
print("bbs: ", time.time() - end)
return bbs
css = """.my-group {max-width: 600px !important; max-height: 600 !important;}
.my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""
with gr.Blocks(css=css) as app:
gr.HTML(
"""
<h1 style='text-align: center'>
Near Real-Time Webcam Stream with RT-DETR
</h1>
"""
)
gr.HTML(
"""
<h3 style='text-align: center'>
<a href='https://arxiv.org/abs/2304.08069' target='_blank'>arXiv</a> | <a href='https://github.com/lyuwenyu/RT-DETR' target='_blank'>github</a>
</h3>
"""
)
with gr.Column(elem_classes=["my-column"]):
with gr.Group(elem_classes=["my-group"]):
image = gr.Image(
type="pil",
label="Image",
sources="webcam",
)
conf_threshold = gr.Slider(
label="Confidence Threshold",
minimum=0.0,
maximum=1.0,
step=0.05,
value=0.85,
)
image.stream(
fn=inference,
inputs=[image, conf_threshold],
outputs=[image],
stream_every=0.1,
time_limit=30,
)
if __name__ == "__main__":
app.launch()
|