Depth-Anything-Video

Running

File size: 22,758 Bytes

545659d
 
82e8e66
545659d
123829d
545659d
 
 
98ce923
545659d
722a74e
 
545659d
a0e06d8
6925966
 
545659d
 
 
f6913f3
 
f80dbe2
 
ac2a5c7
6925966
ac2a5c7
 
 
52145da
6925966
 
f6913f3
 
 
 
 
 
545659d
10b0245
 
123829d
10b0245
123829d
2f95367
3f4959e
3bf2d11
2abae72
0c01bdf
5e79f53
 
545659d
 
 
 
98ce923
545659d
 
123829d
0501944
545659d
cbdb616
 
545659d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
645ddd1
 
4696931
 
545659d
 
 
 
 
f4cc18c
 
fdbb8b2
 
 
545659d
fdbb8b2
545659d
fdbb8b2
 
545659d
fdbb8b2
 
f6913f3
 
ac2a5c7
545659d
 
 
 
ac2a5c7
016503f
 
545659d
096998b
545659d
 
82e8e66
98ce923
545659d
 
 
 
 
f28cd01
e3d2855
 
58be8ed
 
 
764046f
58be8ed
 
 
 
 
4065c3c
a52b01a
286dc9d
86036bf
f1e79b7
 
4065c3c
 
 
0d92a2f
86036bf
a52b01a
3bdab06
a4a9efe
286dc9d
86036bf
f1e79b7
86036bf
 
545659d
fdbb8b2
 
545659d
 
 
 
8f7fb40
ad73ab4
047eeea
ceec5fa
047eeea
f6913f3
 
 
ac2a5c7
545659d
fdbb8b2
f6913f3
 
5a94562
722a74e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fcce986
722a74e
 
 
 
 
 
 
 
 
fcce986
722a74e
7139a06
 
722a74e
010a3dc
 
722a74e
f0fccf1
 
8ee9d42
 
aa63800
 
8ee9d42
 
394988b
 
3b5d5e3
 
 
394988b
 
 
 
 
 
7cca730
8ee9d42
 
f6ef0c9
9352299
722a74e
893bff0
2e36d14
893bff0
2021e3d
ffb2da9
be9e21f
4fdc6b1
5a94562
47178d9
 
a0e06d8
722a74e
 
 
 
 
 
6ab1556
 
b0005a6
 
696f168
f6ef0c9
 
8ee9d42
df7b977
8ee9d42
f6ef0c9
900284f
c2cdeb8
3ceba42
 
 
900284f
a0e06d8
722a74e
 
77b4467
722a74e
564b181
a0e06d8
722a74e
545659d
ed652cd
 
 
7402387
0267daf
09f2f01
683d0ff
b0bee16
0267daf
ed652cd
545659d
 
 
 
 
 
 
 
 
 
 
5e79f53
545659d
dddfbb8
 
a70a18d
545659d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e79f53
 
 
545659d
3bdabf6
545659d
 
 
 
 
c3b945b
ed652cd
f7824e3
5a7c81b
9af81d7
9756969
b0bee16
9756969
c3b945b
 
be68ce1
6925966
 
ed74e6e
b2120af
bb56c78
a8299bd
 
 
 
 
 
37e5c5e
a8299bd
 
 
b2120af
 
37e5c5e
b2120af
a8299bd
bb56c78
 
 
b2120af
5e139ab
b2120af
 
bb56c78
 
 
 
a8299bd
37e5c5e
586cf48
 
a52b01a
 
 
66ab5da
 
d010386
66ab5da
 
 
586cf48
 
 
ad73ab4
41f0d07
586cf48
4f5a8a5
96ee80f
c900035
 
d010386
c900035
 
 
659dc3a
 
 
96ee80f
 
659dc3a
 
d010386
659dc3a
 
 
 
 
 
96ee80f
201037e
bb61f71
 
 
 
 
 
03af147
 
3bb651f
 
 
 
 
 
94a6927
ba45271
03af147
17828e4
4848311
a88f03a
 
41f0d07
a88f03a
 
 
 
03af147
a88f03a
 
 
 
 
17828e4
a88f03a
 
3bb651f
 
 
 
a88f03a
c596c24
9af81d7
c3b945b
cb5ba84
545659d
 
cb5ba84
545659d
 
 
722a74e
4f5a8a5
545659d
d694431
545659d
d694431
722a74e
545659d

import gradio as gr
import cv2
from PIL import Image
import numpy as np
from transformers import pipeline
import os
import torch
import torch.nn.functional as F
from torchvision import transforms
from torchvision.transforms import Compose
import trimesh
from geometry import create_triangles
import tempfile
from functools import partial
import spaces
from zipfile import ZipFile

from depth_anything.dpt import DepthAnything
from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
from moviepy.editor import *

frame_selected = 0

def zip_files(files_in, files_out):
    with ZipFile("depth_result.zip", "w") as zipObj:
        for idx, file in enumerate(files_in):
            zipObj.write(file, file.split("/")[-1])
        for idx, file in enumerate(files_out):
            zipObj.write(file, file.split("/")[-1])
    return "depth_result.zip"

def create_video(frames, fps, type):
    print("building video result")
    clip = ImageSequenceClip(frames, fps=fps)
    clip.write_videofile(type + "_result.mp4", fps=fps)
    
    return type + "_result.mp4"

@torch.no_grad()
def predict_depth(model, image):
    return model(image)["depth"]

@spaces.GPU
def make_video(video_path, outdir='./vis_video_depth', encoder='vits'):
    if encoder not in ["vitl","vitb","vits"]:
        encoder = "vits"

    mapper = {"vits":"small","vitb":"base","vitl":"large"}
    # DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    # model = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(DEVICE).eval()
    # Define path for temporary processed frames
    temp_frame_dir = tempfile.mkdtemp()
    
    margin_width = 50
    to_tensor_transform = transforms.ToTensor()

    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
    # depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_{}14'.format(encoder)).to(DEVICE).eval()
    depth_anything = pipeline(task = "depth-estimation", model=f"nielsr/depth-anything-{mapper[encoder]}")
    
    # total_params = sum(param.numel() for param in depth_anything.parameters())
    # print('Total parameters: {:.2f}M'.format(total_params / 1e6))
    
    transform = Compose([
        Resize(
            width=518,
            height=518,
            resize_target=False,
            keep_aspect_ratio=True,
            ensure_multiple_of=14,
            resize_method='lower_bound',
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
    ])

    if os.path.isfile(video_path):
        if video_path.endswith('txt'):
            with open(video_path, 'r') as f:
                lines = f.read().splitlines()
        else:
            filenames = [video_path]
    else:
        filenames = os.listdir(video_path)
        filenames = [os.path.join(video_path, filename) for filename in filenames if not filename.startswith('.')]
        filenames.sort()
    
    # os.makedirs(outdir, exist_ok=True)
    
    for k, filename in enumerate(filenames):
        file_size = os.path.getsize(filename)/1024/1024
        if file_size > 128.0:
            print(f'File size of {filename} larger than 128Mb, sorry!')
            return filename
        print('Progress {:}/{:},'.format(k+1, len(filenames)), 'Processing', filename)
        
        raw_video = cv2.VideoCapture(filename)
        frame_width, frame_height = int(raw_video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(raw_video.get(cv2.CAP_PROP_FRAME_HEIGHT))
        frame_rate = int(raw_video.get(cv2.CAP_PROP_FPS))
        if frame_rate < 1:
            frame_rate = 1
        cframes = int(raw_video.get(cv2.CAP_PROP_FRAME_COUNT))
        print(f'frames: {cframes}, fps: {frame_rate}')
        # output_width = frame_width * 2 + margin_width
        
        #filename = os.path.basename(filename)
        # output_path = os.path.join(outdir, filename[:filename.rfind('.')] + '_video_depth.mp4')
        #with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as tmpfile:
        #    output_path = tmpfile.name
        #out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"avc1"), frame_rate, (output_width, frame_height))
        #fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        #out = cv2.VideoWriter(output_path, fourcc, frame_rate, (output_width, frame_height))
        count=0
        depth_frames = []
        orig_frames = []
        while raw_video.isOpened():
            ret, raw_frame = raw_video.read()
            if not ret:
                break

            frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2RGB) / 255.0
            frame_pil =  Image.fromarray((frame * 255).astype(np.uint8))
            frame = transform({'image': frame})['image']
            
            frame = torch.from_numpy(frame).unsqueeze(0).to(DEVICE)
            
            
            depth = to_tensor_transform(predict_depth(depth_anything, frame_pil))

            depth = F.interpolate(depth[None], (frame_height, frame_width), mode='bilinear', align_corners=False)[0, 0]
            depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
            
            depth = depth.cpu().numpy().astype(np.uint8)
            depth_color = cv2.applyColorMap(depth, cv2.COLORMAP_BONE)
            depth_gray = cv2.cvtColor(depth_color, cv2.COLOR_RGBA2GRAY)
            depth_color = cv2.cvtColor(depth_gray, cv2.COLOR_GRAY2RGB)

            # Remove white border around map:
            # define lower and upper limits of white
            white_lo = np.array([250,250,250])
            white_hi = np.array([255,255,255])
            # mask image to only select white
            mask = cv2.inRange(depth_color, white_lo, white_hi)
            # change image to black where we found white
            depth_color[mask>0] = (0,0,0)

            blur_frame = raw_frame.copy()
            i = 240
            l = 0
            j = 1
            while j <= 8:
                blur_lo = np.array([i,i,i])
                blur_hi = np.array([i+16,i+16,i+16])
                blur_mask = cv2.inRange(depth_color, blur_lo, blur_hi)
                
                print(f'kernel size {j}')
                blur = cv2.GaussianBlur(raw_frame, (j, j), 0)
                
                blur_frame[blur_mask>0] = blur[blur_mask>0]
                i = i - 16
                l = l + 1
                if l == 4:
                    l = 0
                    j = j + 2
            
            # split_region = np.ones((frame_height, margin_width, 3), dtype=np.uint8) * 255
            # combined_frame = cv2.hconcat([raw_frame, split_region, depth_color])
            
            # out.write(combined_frame)
            # frame_path = os.path.join(temp_frame_dir, f"frame_{count:05d}.png")
            # cv2.imwrite(frame_path, combined_frame)
            
            cv2.imwrite(f"f{count}.jpg", blur_frame)
            orig_frames.append(f"f{count}.jpg")
            cv2.imwrite(f"f{count}_dmap.jpg", depth_color)
            depth_frames.append(f"f{count}_dmap.jpg")
            count += 1

        final_vid = create_video(depth_frames, frame_rate, "depth")
        final_zip = zip_files(orig_frames, depth_frames)
        raw_video.release()
        # out.release()
        cv2.destroyAllWindows()
        
        return final_vid, final_zip, orig_frames, depth_frames #output_path

def depth_edges_mask(depth):
    """Returns a mask of edges in the depth map.
    Args:
    depth: 2D numpy array of shape (H, W) with dtype float32.
    Returns:
    mask: 2D numpy array of shape (H, W) with dtype bool.
    """
    # Compute the x and y gradients of the depth map.
    depth_dx, depth_dy = np.gradient(depth)
    # Compute the gradient magnitude.
    depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2)
    # Compute the edge mask.
    mask = depth_grad > 0.05
    return mask

def pano_depth_to_world_points(depth, scale):
    """
    360 depth to world points
    given 2D depth is an equirectangular projection of a spherical image
    Treat depth as radius
    longitude : -pi to pi
    latitude : -pi/2 to pi/2
    """

    # Convert depth to radius
    radius = (255 - depth.flatten()) * scale

    lon = np.linspace(0, np.pi*2, depth.shape[1])
    lat = np.linspace(0, np.pi, depth.shape[0])
    lon, lat = np.meshgrid(lon, lat)
    lon = lon.flatten()
    lat = lat.flatten()

    pts3d = [[0,0,0]]
    uv = [[0,0]]
    for i in range(0, 1): #(0,2)
        for j in range(0, 1): #(0,2)
            #rnd_lon = (np.random.rand(depth.shape[0]*depth.shape[1]) - 0.5) / 8
            #rnd_lat = (np.random.rand(depth.shape[0]*depth.shape[1]) - 0.5) / 8
            d_lon = lon + i/2 * np.pi*2 / depth.shape[1]
            d_lat = lat + j/2 * np.pi / depth.shape[0]
            
            # Convert to cartesian coordinates
            x = radius * np.cos(d_lon) * np.sin(d_lat)
            y = radius * np.cos(d_lat)
            z = radius * np.sin(d_lon) * np.sin(d_lat)
            
            pts = np.stack([x, y, z], axis=1)
            uvs = np.stack([lon, lat], axis=1)
            
            pts3d = np.concatenate((pts3d, pts), axis=0)
            uv = np.concatenate((uv, uvs), axis=0)
            #print(f'i: {i}, j: {j}')
            j = j+1
        i = i+1
        
    return [pts3d, uv]

def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.333, 0.333, 0.333])

def get_mesh(image, depth):
    fnum = frame_selected
    gdepth = rgb2gray(depth[fnum][0])
    print('depth to gray - ok')
    points = pano_depth_to_world_points(gdepth, 1)
    pts3d = points[0]
    uv = points[1]
    print('radius from depth - ok')

    # Create a trimesh mesh from the points
    # Each pixel is connected to its 4 neighbors
    # colors are the RGB values of the image

    verts = pts3d.reshape(-1, 3)
    #triangles = create_triangles(image.shape[0], image.shape[1])
    #print('triangles - ok')
    rgba = cv2.cvtColor(image[fnum][0], cv2.COLOR_RGBA2RGB)
    rgba = cv2.cvtColor(rgba, cv2.COLOR_RGB2RGBA)
    colors = rgba.reshape(-1, 4)
    clrs = [[128, 128, 128, 0]]

    for i in range(0,1): #(0,4)
        clrs = np.concatenate((clrs, colors), axis=0)
        i = i+1

    #mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors)
    mesh = trimesh.PointCloud(verts, colors=clrs)
    #material = trimesh.visual.texture.SimpleMaterial(image=image)
    #texture = trimesh.visual.TextureVisuals(uv=uv, image=image, material=material)
    #mesh.visual = texture
    scene = trimesh.Scene([mesh])
    print('mesh - ok')

    # Save as glb
    glb_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
    glb_path = glb_file.name
    scene.export(glb_path)
    print('file - ok')
    return glb_path

def loadurl(url):
    return url

def select_frame(evt: gr.SelectData):
    global frame_selected
    if evt.index != frame_selected:
        frame_selected = evt.index
    return gr.Gallery(selected_index=evt.index, preview=True)


css = """
#img-display-container {
    max-height: 100vh;
    }
#img-display-input {
    max-height: 80vh;
    }
#img-display-output {
    max-height: 80vh;
    }
"""

title = "# Depth Anything Video Demo"
description = """Depth Anything on full video files.  
Please refer to our [paper](https://arxiv.org/abs/2401.10891), [project page](https://depth-anything.github.io), or [github](https://github.com/LiheYoung/Depth-Anything) for more details.  
Mesh rendering from [ZoeDepth](https://huggingface.co/spaces/shariqfarooq/ZoeDepth) ([github](https://github.com/isl-org/ZoeDepth/tree/main/ui))."""

transform = Compose([
        Resize(
            width=518,
            height=518,
            resize_target=False,
            keep_aspect_ratio=True,
            ensure_multiple_of=14,
            resize_method='lower_bound',
            image_interpolation_method=cv2.INTER_CUBIC,
        ),
        NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        PrepareForNet(),
])

# @torch.no_grad()
# def predict_depth(model, image):
#     return model(image)

with gr.Blocks(css=css) as demo:
    gr.Markdown(title)
    gr.Markdown(description)
    gr.Markdown("### Video Depth Prediction demo")

    with gr.Row():
        with gr.Column():
            input_url = gr.Textbox(value="./examples/streetview.mp4", label="URL")
            input_video = gr.Video(label="Input Video", format="mp4")
            input_url.change(fn=loadurl, inputs=[input_url], outputs=[input_video])
            output_frame = gr.Gallery(label="Frame", type='numpy', preview=True, columns=8192)
            output_depth = gr.Gallery(label="Depth", type='numpy', preview=True, columns=8192, interactive=False)
            output_frame.select(fn=select_frame, inputs=None, outputs=[output_depth], show_progress='hidden')
            output_depth.select(fn=select_frame, inputs=None, outputs=[output_frame], show_progress='hidden')
            submit = gr.Button("Submit")
        with gr.Column():
            model_type = gr.Dropdown([("small", "vits"), ("base", "vitb"), ("large", "vitl")], type="value", value="vits", label='Model Type')
            processed_video = gr.Video(label="Output Video", format="mp4")
            processed_zip = gr.File(label="Output Archive")
            result = gr.Model3D(label="3D Mesh", clear_color=[0.5, 0.5, 0.5, 0.0], camera_position=[0, 90, 0])
            svg_in = gr.HTML(value="""
              <svg id='svg_in' height='128' width='256' viewBox='0 0 256 128' xmlns='http://www.w3.org/2000/svg' xmlns:xlink='http://www.w3.org/1999/xlink' style='touch-action:none;'>
                <defs>
                  <linearGradient id="lg" x1="0%" x2="100%" y1="0%" y2="0%">
                    <stop offset="0%" stop-color="white"/>
                    <stop offset="100%" stop-color="black"/>
                  </linearGradient>
                </defs>
                <polyline id='pl' points='0,0 0,127 255,127 255,0' stroke='url(#lg)' fill='none' stroke-width='3'/>
              </svg>
              <script>try{
                var pl = document.getElementById('pl');
                var pts = '';
                for (var i=0; i<256; i++) {
                  pts += i+','+Math.sin(i/256*Math.PI/2)*127+' ';
                }
                pl.setAttribute('points', pts); 
                document.getElementById('svg_in').onpointermove = function(event) {
                  var x = event.clientX - event.target.getBoundingClientRect().x;
                  var y = event.clientY - event.target.getBoundingClientRect().y;
                  var pl_a = pl.getAttribute('points').split(' ');
                  pl_a[x] = x+','+y;
                  pl.setAttribute('points', pl_a.join(' '));
                }
                document.getElementById('svg_in').onpointerup = function(event) {
                  document.getElementsByTagName('input[type=text]')[1].value = document.getElementById('pl').getAttribute('points');
                }
                }catch(e){alert(e);}
              </script>""")
            txt_in = gr.Textbox(value="")
            html = gr.HTML(value="""<label for='zoom'>Zoom</label><input id='zoom' type='range' style='width:256px;height:1em;' min='0.157' max='1.57' step='0.001' oninput='
              BABYLON.Engine.LastCreatedScene.getNodes()[1].material.pointSize = Math.ceil(Math.log2(Math.PI/this.value));
              BABYLON.Engine.LastCreatedScene.activeCamera.metadata.pipeline.imageProcessing.contrast = 2.0;
              BABYLON.Engine.LastCreatedScene.activeCamera.metadata.pipeline.imageProcessing.exposure = 0.5;
              
              if (!BABYLON.Engine.LastCreatedScene.activeCamera.metadata) {
                BABYLON.Engine.LastCreatedScene.activeCamera.metadata = { 
                  screenshot: true,
                  pipeline: new BABYLON.DefaultRenderingPipeline(\"default\", true, BABYLON.Engine.LastCreatedScene, [BABYLON.Engine.LastCreatedScene.activeCamera]) 
                }
              } 
              BABYLON.Engine.LastCreatedScene.activeCamera.metadata.pipeline.samples = 4; 
              BABYLON.Engine.LastCreatedScene.activeCamera.fov = this.value;
              this.parentNode.childNodes[2].innerText = BABYLON.Engine.LastCreatedScene.activeCamera.fov;
              
              document.getElementsByClassName(\"model3D\")[0].getElementsByTagName(\"canvas\")[0].style.filter = \"blur(\" + BABYLON.Engine.LastCreatedScene.getNodes()[1].material.pointSize/Math.sqrt(2.0)/2.0 + \"px)\";
            '/><span>0.8</span>""")
            camera = gr.HTML(value="<a href='#' onclick=\"BABYLON.Engine.LastCreatedScene.activeCamera.radius=0;\">reset camera</a>")
            contrast = gr.HTML(value="""<label for='contrast'>Contrast</label><input id='contrast' type='range' style='width:256px;height:1em;' value='2.0' min='0' max='2' step='0.001' oninput='
              if (!BABYLON.Engine.LastCreatedScene.activeCamera.metadata) {
                BABYLON.Engine.LastCreatedScene.activeCamera.metadata = { 
                  screenshot: true,
                  pipeline: new BABYLON.DefaultRenderingPipeline(\"default\", true, BABYLON.Engine.LastCreatedScene, [BABYLON.Engine.LastCreatedScene.activeCamera]) 
                }
              } 
              BABYLON.Engine.LastCreatedScene.activeCamera.metadata.pipeline.samples = 4; 
              BABYLON.Engine.LastCreatedScene.activeCamera.metadata.pipeline.imageProcessing.contrast = this.value;
              this.parentNode.childNodes[2].innerText = BABYLON.Engine.LastCreatedScene.activeCamera.metadata.pipeline.imageProcessing.contrast;
            '/><span>2.0</span>""")
            exposure = gr.HTML(value="""<label for='exposure'>Exposure</label><input id='exposure' type='range' style='width:256px;height:1em;' value='0.5' min='0' max='2' step='0.001' oninput='
              if (!BABYLON.Engine.LastCreatedScene.activeCamera.metadata) {
                BABYLON.Engine.LastCreatedScene.activeCamera.metadata = { 
                  screenshot: true,
                  pipeline: new BABYLON.DefaultRenderingPipeline(\"default\", true, BABYLON.Engine.LastCreatedScene, [BABYLON.Engine.LastCreatedScene.activeCamera]) 
                }
              } 
              BABYLON.Engine.LastCreatedScene.activeCamera.metadata.pipeline.samples = 4; 
              BABYLON.Engine.LastCreatedScene.activeCamera.metadata.pipeline.imageProcessing.exposure = this.value;
              this.parentNode.childNodes[2].innerText = BABYLON.Engine.LastCreatedScene.activeCamera.metadata.pipeline.imageProcessing.exposure;
            '/><span>0.5</span>""")
            canvas = gr.HTML(value="""<a href='#' onclick='
              if (!BABYLON.Engine.LastCreatedScene.activeCamera.metadata) {
                BABYLON.Engine.LastCreatedScene.activeCamera.metadata = { 
                  screenshot: true,
                  pipeline: new BABYLON.DefaultRenderingPipeline(\"default\", true, BABYLON.Engine.LastCreatedScene, [BABYLON.Engine.LastCreatedScene.activeCamera]) 
                }
              } 
              //var cntxt = document.getElementsByClassName(\"model3D\")[0].getElementsByTagName(\"canvas\")[0].getContext(\"webgl2\");
              //this.innerText = cntxt;
              BABYLON.Engine.LastCreatedScene.activeCamera.metadata.screenshot = true;

              BABYLON.Engine.LastCreatedScene.getEngine().onEndFrameObservable.add(function() {
                if (BABYLON.Engine.LastCreatedScene.activeCamera.metadata.screenshot === true) {
                  BABYLON.Engine.LastCreatedScene.activeCamera.metadata.screenshot = false;
                  try {
                    BABYLON.Tools.CreateScreenshotUsingRenderTarget(BABYLON.Engine.LastCreatedScene.getEngine(), BABYLON.Engine.LastCreatedScene.activeCamera, 
                      { precision: 1.0 }, (durl) => { 
                        document.getElementById(\"img_out\").src = durl;
                        document.getElementById(\"img_out\").onload = function() {
                          var svgd = `<svg id=\"svg_out\" viewBox=\"0 0 ` + document.getElementById(\"img_out\").width + ` ` + document.getElementById(\"img_out\").height + `\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">
                            <defs>
                              <filter id=\"blur\" x=\"0\" y=\"0\" xmlns=\"http://www.w3.org/2000/svg\">
                                <feGaussianBlur in=\"SourceGraphic\" stdDeviation=\"` + BABYLON.Engine.LastCreatedScene.getNodes()[1].material.pointSize/Math.sqrt(2.0)/2.0 + `\" />
                              </filter>
                            </defs>
                            <image filter=\"url(#blur)\" id=\"svg_img\" x=\"0\" y=\"0\" width=\"` + document.getElementById(\"img_out\").width + `\" height=\"` + document.getElementById(\"img_out\").height + `\" xlink:href=\"` + durl + `\"/>
                          </svg>`;
                          document.getElementById(\"img_out\").src = \"data:image/svg+xml;base64,\" + btoa(svgd); 
                          document.getElementById(\"img_out\").onload = function() {
                            document.getElementById(\"cnv_out\").width = document.getElementById(\"img_out\").width;
                            document.getElementById(\"cnv_out\").height = document.getElementById(\"img_out\").height;
                            document.getElementById(\"cnv_out\").getContext(\"2d\").drawImage(img_out, 0, 0); 
                          }
                        }
                      }
                    );
                  } catch(e) { alert(e); }
                  // https://forum.babylonjs.com/t/best-way-to-save-to-jpeg-snapshots-of-scene/17663/11
                }
              });
            '/>webgl2</a><br/><img src='' id='img_out'/><br/>
            <canvas id='cnv_out'/>""")
            render = gr.Button("Render")
    
    def on_submit(uploaded_video,model_type):
                
        # Process the video and get the path of the output video
        output_video_path = make_video(uploaded_video,encoder=model_type)

        return output_video_path

    submit.click(on_submit, inputs=[input_video, model_type], outputs=[processed_video, processed_zip, output_frame, output_depth])
    render.click(partial(get_mesh), inputs=[output_frame, output_depth], outputs=[result])

    example_files = os.listdir('examples')
    example_files.sort()
    example_files = [os.path.join('examples', filename) for filename in example_files]
    examples = gr.Examples(examples=example_files, inputs=[input_video], outputs=[processed_video, processed_zip, output_frame, output_depth], fn=on_submit, cache_examples=True)
    

if __name__ == '__main__':
    demo.queue().launch()