PierreBrunelle commited on
Commit
18c960c
1 Parent(s): 324df36

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +155 -0
app.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import gradio.themes as Soft
3
+ import pixeltable as pxt
4
+ from pixeltable.iterators import FrameIterator
5
+ from pixeltable.ext.functions.yolox import yolox
6
+ import PIL.Image
7
+ import PIL.ImageDraw
8
+
9
+ # Creating a UDF to draw bounding boxes
10
+
11
+ @pxt.udf
12
+ def draw_boxes(
13
+ img: PIL.Image.Image, boxes: list[list[float]]
14
+ ) -> PIL.Image.Image:
15
+ result = img.copy() # Create a copy of `img`
16
+ d = PIL.ImageDraw.Draw(result)
17
+ for box in boxes:
18
+ # Draw bounding box rectangles on the copied image
19
+ d.rectangle(box, width=3)
20
+ return result
21
+
22
+ # Gradio Application
23
+ def process_video(video_file, model_id, threshold, progress=gr.Progress()):
24
+ progress(0, desc="Initializing...")
25
+
26
+ # Ensure a clean slate for the demo
27
+ pxt.drop_dir('video_tutorial', force=True)
28
+ pxt.create_dir('video_tutorial')
29
+
30
+ # Create the `videos` table
31
+ videos_table = pxt.create_table(
32
+ 'video_tutorial.videos',
33
+ {'video': pxt.VideoType()}
34
+ )
35
+
36
+ # Create a view for video frames
37
+ frames_view = pxt.create_view(
38
+ 'video_tutorial.frames',
39
+ videos_table,
40
+ iterator=FrameIterator.create(video=videos_table.video, fps=5)
41
+ )
42
+
43
+ # Insert video into Pixeltable table
44
+ videos_table.insert([
45
+ {
46
+ 'video': video_file.name
47
+ }
48
+ ])
49
+
50
+ progress(0.3, desc="Running Model...")
51
+
52
+ # Perform object detection
53
+ frames_view[f'detect_{model_id}'] = yolox(
54
+ frames_view.frame, model_id=model_id, threshold=threshold
55
+ )
56
+
57
+ progress(0.6, desc="Object detection completed...")
58
+
59
+ # Prepare frame gallery
60
+ frame_gallery = frames_view.select(frames_view.frame).where(frames_view.pos % 2 == 0).limit(10).collect()['frame']
61
+
62
+ progress(0.8, desc="Outputs generated, retrieving video...")
63
+
64
+ # Generate output video with bounding boxes
65
+ output_video = frames_view.group_by(videos_table).select(
66
+ pxt.functions.video.make_video(
67
+ frames_view.pos,
68
+ draw_boxes(
69
+ frames_view.frame,
70
+ frames_view[f'detect_{model_id}'].bboxes
71
+ )
72
+ )
73
+ ).collect()['col_0'][0]
74
+
75
+ return output_video, frame_gallery
76
+
77
+ # Gradio interface
78
+ with gr.Blocks(theme=Soft) as demo:
79
+ gr.Markdown(
80
+ """
81
+ <div max-width: 800px; margin: 0 auto;">
82
+ <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png" alt="Pixeltable" style="max-width: 200px; margin-bottom: 20px;" />
83
+ <h1 style="margin-bottom: 0.5em;">Object Detection in Videos</h1>
84
+ </div>
85
+ """
86
+ )
87
+ gr.HTML(
88
+ """
89
+ <p>
90
+ <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #00A4EF; text-decoration: none; font-weight: bold;">Pixeltable</a> is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data.
91
+ </p>
92
+ """
93
+ )
94
+
95
+ # Add the disclaimer
96
+ gr.HTML(
97
+ """
98
+ <div style="background-color: #000000; border: 1px solid #e9ecef; color: #FFFFFF; border-radius: 8px; padding: 15px; margin-bottom: 20px;">
99
+ <strong style="color: #FFFFFF">Disclaimer:</strong> This app is best run on your own hardware with a GPU for optimal performance. This Hugging Face Space uses the free tier (2vCPU, 16GB RAM), which may result in slower processing times, especially for large video files. If you wish to use this app with your own hardware for improved performance, you can <a href="https://huggingface.co/spaces/Pixeltable/Multi-LLM-RAG-with-Groundtruth-Comparison/duplicate" target="_blank" style="color: #00A4EF; text-decoration: none; font-weight: bold;">duplicate this Hugging Face Space</a> and run it locally, or use Google Colab with the Free limited GPU support.
100
+ </div>
101
+ """
102
+ )
103
+
104
+ with gr.Row():
105
+ with gr.Column():
106
+ with gr.Accordion("What This Demo Does", open=True):
107
+ gr.Markdown("""
108
+ 1. **Ingests Videos**: Uploads your Video.
109
+ 2. **Process and Retrieve Data**: Store, version, chunk, and retrieve video and frames.
110
+ 3. **Detects Objects**: Leverages Pixeltable's YOLOX integration to produce object detection results.
111
+ 4. **Visualizes Output**: Displays the processed video alongside a sample of the original frames.
112
+ """)
113
+
114
+ with gr.Column():
115
+ gr.Examples(
116
+ examples=[
117
+ ["https://raw.github.com/pixeltable/pixeltable/release/docs/source/data/bangkok.mp4", "yolox_tiny", 0.25],
118
+ ["https://raw.github.com/pixeltable/pixeltable/release/docs/source/data/bangkok.mp4", "yolox_m", 0.3],
119
+ ],
120
+ inputs=[video_file, model_id, threshold],
121
+ outputs=[output_video, frame_gallery],
122
+ fn=process_video,
123
+ )
124
+
125
+ # File upload components for ground truth and PDF documents
126
+ with gr.Row():
127
+ video_file = gr.File(label="Upload Video", file_count="single")
128
+
129
+ # Add controls for chunking parameters
130
+ with gr.Row():
131
+ model_id = gr.Dropdown(
132
+ choices=['yolox_tiny', 'yolox_m', 'yolox_x'],
133
+ value='yolox_tiny',
134
+ label="YOLOX Model"
135
+ )
136
+
137
+ threshold = gr.Slider(minimum=0.1, maximum=0.9, value=0.25, step=0.05, label="Threshold")
138
+
139
+ # Button to trigger file processing
140
+ process_button = gr.Button("Process Video")
141
+
142
+ with gr.Row():
143
+ output_video = gr.Video(label="Processed Video with Detections")
144
+
145
+ with gr.Row():
146
+ frame_gallery = gr.Gallery(label="Frame Gallery", show_label=True, elem_id="gallery")
147
+
148
+ process_button.click(process_video,
149
+ inputs=[video_file,
150
+ model_id,
151
+ threshold],
152
+ outputs=[output_video, frame_gallery])
153
+
154
+ if __name__ == "__main__":
155
+ demo.launch(debug=True)