Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
import cv2
|
3 |
import matplotlib
|
4 |
import numpy as np
|
@@ -10,7 +10,11 @@ import tempfile
|
|
10 |
from gradio_imageslider import ImageSlider
|
11 |
from huggingface_hub import hf_hub_download
|
12 |
|
13 |
-
from depth_anything_v2.dpt import DepthAnythingV2
|
|
|
|
|
|
|
|
|
14 |
|
15 |
css = """
|
16 |
#img-display-container {
|
@@ -27,33 +31,62 @@ css = """
|
|
27 |
}
|
28 |
"""
|
29 |
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
@spaces.GPU
|
55 |
-
def predict_depth(image):
|
56 |
-
|
|
|
|
|
|
|
|
|
57 |
|
58 |
with gr.Blocks(css=css) as demo:
|
59 |
gr.Markdown(title)
|
@@ -70,25 +103,47 @@ with gr.Blocks(css=css) as demo:
|
|
70 |
cmap = matplotlib.colormaps.get_cmap('Spectral_r')
|
71 |
|
72 |
def on_submit(image):
|
73 |
-
original_image = image.copy()
|
74 |
|
75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
-
|
78 |
|
79 |
-
|
80 |
-
tmp_raw_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
|
81 |
-
raw_depth.save(tmp_raw_depth.name)
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
|
87 |
-
|
88 |
-
|
89 |
-
|
|
|
|
|
|
|
|
|
90 |
|
91 |
-
return [(original_image, colored_depth), tmp_gray_depth.name, tmp_raw_depth.name]
|
92 |
|
93 |
submit.click(on_submit, inputs=[input_image], outputs=[depth_image_slider, gray_depth_file, raw_file])
|
94 |
|
|
|
1 |
+
import gradio as gr
|
2 |
import cv2
|
3 |
import matplotlib
|
4 |
import numpy as np
|
|
|
10 |
from gradio_imageslider import ImageSlider
|
11 |
from huggingface_hub import hf_hub_download
|
12 |
|
13 |
+
# from depth_anything_v2.dpt import DepthAnythingV2
|
14 |
+
from marigold import MarigoldPipeline
|
15 |
+
from diffusers import AutoencoderKL, DDIMScheduler, UNet2DConditionModel
|
16 |
+
from transformers import CLIPTextModel, CLIPTokenizer
|
17 |
+
import xformers
|
18 |
|
19 |
css = """
|
20 |
#img-display-container {
|
|
|
31 |
}
|
32 |
"""
|
33 |
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
34 |
+
checkpoint_path = "GonzaloMG/marigold-e2e-ft-depth"
|
35 |
+
unet = UNet2DConditionModel.from_pretrained(checkpoint_path, subfolder="unet")
|
36 |
+
vae = AutoencoderKL.from_pretrained(checkpoint_path, subfolder="vae")
|
37 |
+
text_encoder = CLIPTextModel.from_pretrained(checkpoint_path, subfolder="text_encoder")
|
38 |
+
tokenizer = CLIPTokenizer.from_pretrained(checkpoint_path, subfolder="tokenizer")
|
39 |
+
scheduler = DDIMScheduler.from_pretrained(checkpoint_path, timestep_spacing=timestep_spacing, subfolder="scheduler")
|
40 |
+
pipe = MarigoldPipeline.from_pretrained(pretrained_model_name_or_path = checkpoint_path,
|
41 |
+
unet=unet,
|
42 |
+
vae=vae,
|
43 |
+
scheduler=scheduler,
|
44 |
+
text_encoder=text_encoder,
|
45 |
+
tokenizer=tokenizer,
|
46 |
+
variant=variant,
|
47 |
+
torch_dtype=dtype,
|
48 |
+
)
|
49 |
+
try:
|
50 |
+
pipe.enable_xformers_memory_efficient_attention()
|
51 |
+
except ImportError:
|
52 |
+
pass # run without xformers
|
53 |
+
pipe = pipe.to(DEVICE)
|
54 |
+
pipe.unet.eval()
|
55 |
+
|
56 |
+
# model_configs = {
|
57 |
+
# 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
|
58 |
+
# 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
|
59 |
+
# 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
|
60 |
+
# 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
|
61 |
+
# }
|
62 |
+
# encoder2name = {
|
63 |
+
# 'vits': 'Small',
|
64 |
+
# 'vitb': 'Base',
|
65 |
+
# 'vitl': 'Large',
|
66 |
+
# 'vitg': 'Giant', # we are undergoing company review procedures to release our giant model checkpoint
|
67 |
+
# }
|
68 |
+
# encoder = 'vitl'
|
69 |
+
# model_name = encoder2name[encoder]
|
70 |
+
# model = DepthAnythingV2(**model_configs[encoder])
|
71 |
+
# filepath = hf_hub_download(repo_id=f"depth-anything/Depth-Anything-V2-{model_name}", filename=f"depth_anything_v2_{encoder}.pth", repo_type="model")
|
72 |
+
# state_dict = torch.load(filepath, map_location="cpu")
|
73 |
+
# model.load_state_dict(state_dict)
|
74 |
+
# model = model.to(DEVICE).eval()
|
75 |
+
|
76 |
+
title = "# ..."
|
77 |
+
description = """... **...**"""
|
78 |
+
|
79 |
+
|
80 |
+
# def predict_depth(image):
|
81 |
+
# return model.infer_image(image)
|
82 |
+
|
83 |
@spaces.GPU
|
84 |
+
def predict_depth(image): #, processing_res, model_choice, current_model):
|
85 |
+
with torch.no_grad():
|
86 |
+
pipe_out = pipe(image, denoising_steps=1, ensemble_size=1, noise="zeros", normals=False, processing_res=768, match_input_res=True)
|
87 |
+
pred = pipe_out.depth_np
|
88 |
+
pred_colored = pipe_out.depth_colored
|
89 |
+
return pred, pred_colored
|
90 |
|
91 |
with gr.Blocks(css=css) as demo:
|
92 |
gr.Markdown(title)
|
|
|
103 |
cmap = matplotlib.colormaps.get_cmap('Spectral_r')
|
104 |
|
105 |
def on_submit(image):
|
|
|
106 |
|
107 |
+
if image is None:
|
108 |
+
print("No image uploaded.")
|
109 |
+
return None
|
110 |
+
|
111 |
+
pil_image = Image.fromarray(image.astype('uint8'))
|
112 |
+
depth_npy, depth_colored = predict_depth(pil_image)
|
113 |
+
|
114 |
+
# Save the npy data (raw depth map)
|
115 |
+
# tmp_npy_depth = tempfile.NamedTemporaryFile(suffix='.npy', delete=False)
|
116 |
+
# np.save(tmp_npy_depth.name, depth_npy)
|
117 |
+
|
118 |
+
# Save the grayscale depth map
|
119 |
+
depth_gray = (depth_npy * 65535.0).astype(np.uint16)
|
120 |
+
tmp_gray_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
|
121 |
+
Image.fromarray(depth_gray).save(tmp_gray_depth.name, mode="I;16")
|
122 |
+
|
123 |
+
# Save the colored depth map
|
124 |
+
tmp_colored_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
|
125 |
+
depth_colored.save(tmp_colored_depth.name)
|
126 |
+
|
127 |
+
print("Dummy predictions complete, returning results.")
|
128 |
+
return [(image, depth_colored), tmp_gray_depth.name, tmp_colored_depth.name]
|
129 |
|
130 |
+
# h, w = image.shape[:2]
|
131 |
|
132 |
+
# depth = predict_depth(image[:, :, ::-1])
|
|
|
|
|
133 |
|
134 |
+
# raw_depth = Image.fromarray(depth.astype('uint16'))
|
135 |
+
# tmp_raw_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
|
136 |
+
# raw_depth.save(tmp_raw_depth.name)
|
137 |
|
138 |
+
# depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
|
139 |
+
# depth = depth.astype(np.uint8)
|
140 |
+
# colored_depth = (cmap(depth)[:, :, :3] * 255).astype(np.uint8)
|
141 |
+
|
142 |
+
# gray_depth = Image.fromarray(depth)
|
143 |
+
# tmp_gray_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
|
144 |
+
# gray_depth.save(tmp_gray_depth.name)
|
145 |
|
146 |
+
# return [(original_image, colored_depth), tmp_gray_depth.name, tmp_raw_depth.name]
|
147 |
|
148 |
submit.click(on_submit, inputs=[input_image], outputs=[depth_image_slider, gray_depth_file, raw_file])
|
149 |
|