import subprocess import sys def install(package): subprocess.check_call([sys.executable, "-m", "pip", "install", package]) install("torch==2.4.1") install("torchvision==0.19.1") install("pixeltable==0.2.20") install("git+https://github.com/Megvii-BaseDetection/YOLOX.git") import gradio as gr import pixeltable as pxt from pixeltable.iterators import FrameIterator from pixeltable.ext.functions.yolox import yolox import PIL.Image import PIL.ImageDraw # Creating a UDF to draw bounding boxes @pxt.udf def draw_boxes(img: PIL.Image.Image, boxes: list[list[float]]) -> PIL.Image.Image: result = img.copy() # Create a copy of `img` d = PIL.ImageDraw.Draw(result) for box in boxes: # Draw bounding box rectangles on the copied image d.rectangle(box, width=3) return result # Gradio Application def process_video(video_file, model_id, threshold, progress=gr.Progress()): progress(0, desc="Initializing...") # Ensure a clean slate for the demo pxt.drop_dir('video_tutorial', force=True) pxt.create_dir('video_tutorial') # Create the `videos` table videos_table = pxt.create_table( 'video_tutorial.videos', {'video': pxt.VideoType()} ) # Create a view for video frames frames_view = pxt.create_view( 'video_tutorial.frames', videos_table, iterator=FrameIterator.create(video=videos_table.video, fps=5) ) # Insert video into Pixeltable table videos_table.insert([{'video': video_file.name}]) progress(0.3, desc="Running Model...") # Perform object detection frames_view[f'detect_{model_id}'] = yolox( frames_view.frame, model_id=model_id, threshold=threshold ) progress(0.6, desc="Object detection completed...") # Prepare frame gallery frame_gallery = frames_view.select(frames_view.frame).where(frames_view.pos % 2 == 0).limit(10).collect()['frame'] progress(0.8, desc="Outputs generated, retrieving video...") # Generate output video with bounding boxes output_video = frames_view.group_by(videos_table).select( pxt.functions.video.make_video( frames_view.pos, draw_boxes( frames_view.frame, frames_view[f'detect_{model_id}'].bboxes ) ) ).collect()['col_0'][0] return output_video, frame_gallery # Gradio interface with gr.Blocks(theme=gr.themes.Base()) as demo: gr.Markdown( """
Pixeltable is a declarative interface for working with text, images, embeddings, and even video, enabling you to store, transform, index, and iterate on data.
""" ) # Add the disclaimer gr.HTML( """