SkalskiP commited on
Commit
5b163f1
β€’
1 Parent(s): 7b4534e

Revise to support video processing with Supervision

Browse files

This update entirely overhauls the application to replace the existing image processing functionality with a custom video processing implementation.

Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +39 -19
  3. requirements.txt +1 -0
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🎨
4
  colorFrom: pink
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 4.0.2
8
  app_file: app.py
9
  pinned: false
10
  ---
 
4
  colorFrom: pink
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 3.50.2
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py CHANGED
@@ -1,33 +1,53 @@
1
- from typing import Dict
 
 
2
 
3
  import gradio as gr
4
- import torch
5
- from PIL import Image
6
- from transformers import SamModel, SamProcessor
7
 
8
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
9
- MODEL = SamModel.from_pretrained("facebook/sam-vit-large").to(DEVICE)
10
- PROCESSOR = SamProcessor.from_pretrained("facebook/sam-vit-large")
11
 
12
 
13
- def inference(masked_image: Dict[str, Image.Image]) -> Image.Image:
14
- image = masked_image['image']
15
- mask = masked_image['mask'].resize((256, 256), Image.Resampling.LANCZOS)
16
- return image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
 
19
  with gr.Blocks() as demo:
20
  with gr.Row():
21
  with gr.Column():
22
- input_image = gr.Image(
23
- image_mode='RGB', type='pil', tool="sketch", interactive=True,
24
- brush_radius=20.0, brush_color="#FFFFFF", height=500)
 
 
 
25
  submit_button = gr.Button("Submit")
26
- output_image = gr.Image(image_mode='RGB', type='pil')
 
 
27
 
28
  submit_button.click(
29
- inference,
30
- inputs=[input_image],
31
- outputs=output_image)
32
 
33
- demo.launch(debug=False, show_error=True)
 
1
+ import time
2
+ import uuid
3
+ from typing import Tuple
4
 
5
  import gradio as gr
6
+ import supervision as sv
7
+ from tqdm import tqdm
 
8
 
9
+ START_FRAME = 0
10
+ END_FRAME = 10
11
+ TOTAL = END_FRAME - START_FRAME
12
 
13
 
14
+ def process(
15
+ source_video: str,
16
+ prompt: str,
17
+ confidence: float,
18
+ progress=gr.Progress(track_tqdm=True)
19
+ ) -> Tuple[str, str]:
20
+ name = str(uuid.uuid4())
21
+ video_info = sv.VideoInfo.from_video_path(source_video)
22
+ frame_iterator = iter(sv.get_video_frames_generator(
23
+ source_path=source_video, start=START_FRAME, end=END_FRAME))
24
+
25
+ with sv.VideoSink(f"{name}.mp4", video_info=video_info) as sink:
26
+ for _ in tqdm(range(TOTAL), desc="Masking frames"):
27
+ frame = next(frame_iterator)
28
+ sink.write_frame(frame)
29
+ time.sleep(0.1)
30
+
31
+ return f"{name}.mp4", f"{name}.mp4"
32
 
33
 
34
  with gr.Blocks() as demo:
35
  with gr.Row():
36
  with gr.Column():
37
+ source_video_player = gr.Video(
38
+ label="Source video", source="upload", format="mp4")
39
+ prompt_text = gr.Textbox(
40
+ label="Prompt", value="person")
41
+ confidence_slider = gr.Slider(
42
+ label="Confidence", minimum=0.5, maximum=1.0, step=0.05, value=0.6)
43
  submit_button = gr.Button("Submit")
44
+ with gr.Column():
45
+ masked_video_player = gr.Video(label="Masked video")
46
+ painted_video_player = gr.Video(label="Painted video")
47
 
48
  submit_button.click(
49
+ process,
50
+ inputs=[source_video_player, prompt_text, confidence_slider],
51
+ outputs=[masked_video_player, painted_video_player])
52
 
53
+ demo.queue().launch(debug=False, show_error=True)
requirements.txt CHANGED
@@ -3,6 +3,7 @@ torch
3
  torchvision
4
 
5
  numpy
 
6
  pillow
7
  gradio==3.50.2
8
  transformers
 
3
  torchvision
4
 
5
  numpy
6
+ opencv-python
7
  pillow
8
  gradio==3.50.2
9
  transformers