YOLOv10-webcam-stream

Sleeping

App Files Files Community

freddyaboulton HF staff commited on Sep 11

Commit

9740995

•

1 Parent(s): 8fbff22

Go back to yolov10

Browse files

Files changed (2) hide show

app.py +25 -63
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -2,81 +2,42 @@ import spaces
 import gradio as gr
 import cv2
 import tempfile
-from PIL import Image, ImageDraw, ImageFont
-from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
-import torch
-import requests
-image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
-model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd", torch_dtype=torch.float16).to("cuda")
-model = torch.compile(model, mode="reduce-overhead")
-# Compile by running inference
-url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
-image = Image.open(requests.get(url, stream=True).raw)
-inputs = image_processor(images=image, return_tensors="pt").to("cuda", torch.float16)
-with torch.no_grad():
-    outputs = model(**inputs)
-def draw_bounding_boxes(image, results, model, threshold=0.3):
-    draw = ImageDraw.Draw(image)
-    for result in results:
-        for score, label_id, box in zip(
-            result["scores"], result["labels"], result["boxes"]
-        ):
-            if score > threshold:
-                label = model.config.id2label[label_id.item()]
-                box = [round(i) for i in box.tolist()]
-                draw.rectangle(box, outline="red", width=3)
-                draw.text((box[0], box[1]), f"{label}: {score:.2f}", fill="red")
-    return image
-import time
 @spaces.GPU
-def inference(image, conf_threshold):
-    inputs = image_processor(images=image, return_tensors="pt")
     start = time.time()
-    with torch.no_grad():
-        outputs = model(**inputs)
-    results = image_processor.post_process_object_detection(
-        outputs, target_sizes=torch.tensor([image.size[::-1]]), threshold=conf_threshold
-    )
     end = time.time()
-    print("time: ", end - start)
-    bbs = draw_bounding_boxes(image, results, model, threshold=conf_threshold)
-    print("bbs: ", time.time() - end)
-    return bbs
-css = """.my-group {max-width: 600px !important; max-height: 600 !important;}
                       .my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""
 with gr.Blocks(css=css) as app:
     gr.HTML(
         """
     <h1 style='text-align: center'>
-    Near Real-Time Webcam Stream with RT-DETR
     </h1>
-    """
-    )
     gr.HTML(
         """
         <h3 style='text-align: center'>
-        <a href='https://arxiv.org/abs/2304.08069' target='_blank'>arXiv</a> | <a href='https://github.com/lyuwenyu/RT-DETR' target='_blank'>github</a>
         </h3>
-        """
-    )
     with gr.Column(elem_classes=["my-column"]):
         with gr.Group(elem_classes=["my-group"]):
-            image = gr.Image(
-                type="pil",
-                label="Image",
-                sources="webcam",
-            )
             conf_threshold = gr.Slider(
                 label="Confidence Threshold",
                 minimum=0.0,
@@ -84,12 +45,13 @@ with gr.Blocks(css=css) as app:
                 step=0.05,
                 value=0.85,
             )
-            image.stream(
-                fn=inference,
-                inputs=[image, conf_threshold],
-                outputs=[image],
-                stream_every=0.1,
-                time_limit=30,
-            )
-if __name__ == "__main__":
     app.launch()

 import gradio as gr
 import cv2
 import tempfile
+from ultralytics import YOLOv10
+model = YOLOv10.from_pretrained(f'jameslahm/yolov10n')
 @spaces.GPU
+def yolov10_inference(image, conf_threshold):
+    width, _ = image.size
+    import time
     start = time.time()
+    results = model.predict(source=image, imgsz=width, conf=conf_threshold)
     end = time.time()
+    print("time", end - start)
+    annotated_image = results[0].plot()
+    return annotated_image[:, :, ::-1]
+css=""".my-group {max-width: 600px !important; max-height: 600 !important;}
                       .my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""
 with gr.Blocks(css=css) as app:
     gr.HTML(
         """
     <h1 style='text-align: center'>
+    YOLOv10 Webcam Stream
     </h1>
+    """)
     gr.HTML(
         """
         <h3 style='text-align: center'>
+        <a href='https://arxiv.org/abs/2405.14458' target='_blank'>arXiv</a> | <a href='https://github.com/THU-MIG/yolov10' target='_blank'>github</a>
         </h3>
+        """)
     with gr.Column(elem_classes=["my-column"]):
         with gr.Group(elem_classes=["my-group"]):
+            image = gr.Image(type="pil", label="Image", sources="webcam")
             conf_threshold = gr.Slider(
                 label="Confidence Threshold",
                 minimum=0.0,
                 step=0.05,
                 value=0.85,
             )
+        image.stream(
+            fn=yolov10_inference,
+            inputs=[image, conf_threshold],
+            outputs=[image],
+            stream_every=0.2,
+            time_limit=30
+        )
+if __name__ == '__main__':
     app.launch()

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
 safetensors==0.4.3
-transformers @ git+https://github.com/yonigozlan/transformers@optim-rt-detr
 gradio-client @ git+https://github.com/gradio-app/gradio@66349fe26827e3a3c15b738a1177e95fec7f5554#subdirectory=client/python
 https://gradio-pypi-previews.s3.amazonaws.com/66349fe26827e3a3c15b738a1177e95fec7f5554/gradio-4.42.0-py3-none-any.whl

 safetensors==0.4.3
+git+https://github.com/THU-MIG/yolov10.git
 gradio-client @ git+https://github.com/gradio-app/gradio@66349fe26827e3a3c15b738a1177e95fec7f5554#subdirectory=client/python
 https://gradio-pypi-previews.s3.amazonaws.com/66349fe26827e3a3c15b738a1177e95fec7f5554/gradio-4.42.0-py3-none-any.whl