YOLOv10-webcam-stream

Sleeping

App Files Files Community

freddyaboulton HF staff commited on Sep 11

Commit

082831f

•

1 Parent(s): 790227b

Try compiled

Browse files

Files changed (2) hide show

app.py +17 -36
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -2,12 +2,18 @@ import spaces
 import gradio as gr
 import cv2
 import tempfile
-from ultralytics import YOLOv10
 from PIL import Image, ImageDraw, ImageFont
 image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
-model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
 def draw_bounding_boxes(image, results, model, threshold=0.3):
     draw = ImageDraw.Draw(image)
@@ -22,47 +28,25 @@ def draw_bounding_boxes(image, results, model, threshold=0.3):
                 draw.text((box[0], box[1]), f"{label}: {score:.2f}", fill="red")
     return image
 @spaces.GPU
 def inference(image, conf_threshold):
     inputs = image_processor(images=image, return_tensors="pt")
     with torch.no_grad():
         outputs = model(**inputs)
     results = image_processor.post_process_object_detection(
-        outputs, target_sizes=torch.tensor([image.size[::-1]]), threshold=0.3
     )
-    return draw_bounding_boxes(image, results, model, threshold=conf_threshold)
-def app():
-    with gr.Blocks():
-        with gr.Row():
-            with gr.Column():
-                image = gr.Image(
-                    type="pil",
-                    label="Image",
-                    visible=True,
-                    sources="webcam",
-                    height=500,
-                    width=500,
-                )
-                conf_threshold = gr.Slider(
-                    label="Confidence Threshold",
-                    minimum=0.0,
-                    maximum=1.0,
-                    step=0.05,
-                    value=0.25,
-                )
-        image.stream(
-            fn=yolov10_inference,
-            inputs=[image, conf_threshold],
-            outputs=[image],
-            stream_every=0.2,
-            time_limit=30,
-        )
 css = """.my-group {max-width: 600px !important; max-height: 600 !important;}
@@ -88,10 +72,7 @@ with gr.Blocks(css=css) as app:
             image = gr.Image(
                 type="pil",
                 label="Image",
-                visible=True,
                 sources="webcam",
-                height=500,
-                width=500,
             )
             conf_threshold = gr.Slider(
                 label="Confidence Threshold",
@@ -104,7 +85,7 @@ with gr.Blocks(css=css) as app:
                 fn=inference,
                 inputs=[image, conf_threshold],
                 outputs=[image],
-                stream_every=0.2,
                 time_limit=30,
             )
 if __name__ == "__main__":

 import gradio as gr
 import cv2
 import tempfile
 from PIL import Image, ImageDraw, ImageFont
+from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
+import torch
 image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
+model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd", torch_dtype=torch.float16).to("cuda")
+model = torch.compile(model, mode="reduce-overhead")
+# Compile by running inference
+inputs = image_processor(images="bus.png", return_tensors="pt").to("cuda", torch.float16)
+with torch.no_grad():
+    outputs = model(**inputs)
 def draw_bounding_boxes(image, results, model, threshold=0.3):
     draw = ImageDraw.Draw(image)
                 draw.text((box[0], box[1]), f"{label}: {score:.2f}", fill="red")
     return image
+import time
 @spaces.GPU
 def inference(image, conf_threshold):
     inputs = image_processor(images=image, return_tensors="pt")
+    start = time.time()
     with torch.no_grad():
         outputs = model(**inputs)
     results = image_processor.post_process_object_detection(
+        outputs, target_sizes=torch.tensor([image.size[::-1]]), threshold=conf_threshold
     )
+    end = time.time()
+    print("time: ", end - start)
+    bbs = draw_bounding_boxes(image, results, model, threshold=conf_threshold)
+    print("bbs: ", time.time() - end)
+    return bbs
 css = """.my-group {max-width: 600px !important; max-height: 600 !important;}
             image = gr.Image(
                 type="pil",
                 label="Image",
                 sources="webcam",
             )
             conf_threshold = gr.Slider(
                 label="Confidence Threshold",
                 fn=inference,
                 inputs=[image, conf_threshold],
                 outputs=[image],
+                stream_every=0.1,
                 time_limit=30,
             )
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-git+https://github.com/THU-MIG/yolov10.git
 safetensors==0.4.3
 gradio-client @ git+https://github.com/gradio-app/gradio@66349fe26827e3a3c15b738a1177e95fec7f5554#subdirectory=client/python
 https://gradio-pypi-previews.s3.amazonaws.com/66349fe26827e3a3c15b738a1177e95fec7f5554/gradio-4.42.0-py3-none-any.whl

 safetensors==0.4.3
+transformers
 gradio-client @ git+https://github.com/gradio-app/gradio@66349fe26827e3a3c15b738a1177e95fec7f5554#subdirectory=client/python
 https://gradio-pypi-previews.s3.amazonaws.com/66349fe26827e3a3c15b738a1177e95fec7f5554/gradio-4.42.0-py3-none-any.whl