GonzaloMG commited on
Commit
e2bd985
1 Parent(s): f71f2b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -41
app.py CHANGED
@@ -1,4 +1,4 @@
1
- mport gradio as gr
2
  import cv2
3
  import matplotlib
4
  import numpy as np
@@ -10,7 +10,11 @@ import tempfile
10
  from gradio_imageslider import ImageSlider
11
  from huggingface_hub import hf_hub_download
12
 
13
- from depth_anything_v2.dpt import DepthAnythingV2
 
 
 
 
14
 
15
  css = """
16
  #img-display-container {
@@ -27,33 +31,62 @@ css = """
27
  }
28
  """
29
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
30
- model_configs = {
31
- 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
32
- 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
33
- 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
34
- 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
35
- }
36
- encoder2name = {
37
- 'vits': 'Small',
38
- 'vitb': 'Base',
39
- 'vitl': 'Large',
40
- 'vitg': 'Giant', # we are undergoing company review procedures to release our giant model checkpoint
41
- }
42
- encoder = 'vitl'
43
- model_name = encoder2name[encoder]
44
- model = DepthAnythingV2(**model_configs[encoder])
45
- filepath = hf_hub_download(repo_id=f"depth-anything/Depth-Anything-V2-{model_name}", filename=f"depth_anything_v2_{encoder}.pth", repo_type="model")
46
- state_dict = torch.load(filepath, map_location="cpu")
47
- model.load_state_dict(state_dict)
48
- model = model.to(DEVICE).eval()
49
-
50
- title = "# Depth Anything V2"
51
- description = """Official demo for **Depth Anything V2**.
52
- Please refer to our [paper](https://arxiv.org/abs/2406.09414), [project page](https://depth-anything-v2.github.io), and [github](https://github.com/DepthAnything/Depth-Anything-V2) for more details."""
53
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  @spaces.GPU
55
- def predict_depth(image):
56
- return model.infer_image(image)
 
 
 
 
57
 
58
  with gr.Blocks(css=css) as demo:
59
  gr.Markdown(title)
@@ -70,25 +103,47 @@ with gr.Blocks(css=css) as demo:
70
  cmap = matplotlib.colormaps.get_cmap('Spectral_r')
71
 
72
  def on_submit(image):
73
- original_image = image.copy()
74
 
75
- h, w = image.shape[:2]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
- depth = predict_depth(image[:, :, ::-1])
78
 
79
- raw_depth = Image.fromarray(depth.astype('uint16'))
80
- tmp_raw_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
81
- raw_depth.save(tmp_raw_depth.name)
82
 
83
- depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
84
- depth = depth.astype(np.uint8)
85
- colored_depth = (cmap(depth)[:, :, :3] * 255).astype(np.uint8)
86
 
87
- gray_depth = Image.fromarray(depth)
88
- tmp_gray_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
89
- gray_depth.save(tmp_gray_depth.name)
 
 
 
 
90
 
91
- return [(original_image, colored_depth), tmp_gray_depth.name, tmp_raw_depth.name]
92
 
93
  submit.click(on_submit, inputs=[input_image], outputs=[depth_image_slider, gray_depth_file, raw_file])
94
 
 
1
+ import gradio as gr
2
  import cv2
3
  import matplotlib
4
  import numpy as np
 
10
  from gradio_imageslider import ImageSlider
11
  from huggingface_hub import hf_hub_download
12
 
13
+ # from depth_anything_v2.dpt import DepthAnythingV2
14
+ from marigold import MarigoldPipeline
15
+ from diffusers import AutoencoderKL, DDIMScheduler, UNet2DConditionModel
16
+ from transformers import CLIPTextModel, CLIPTokenizer
17
+ import xformers
18
 
19
  css = """
20
  #img-display-container {
 
31
  }
32
  """
33
  DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
34
+ checkpoint_path = "GonzaloMG/marigold-e2e-ft-depth"
35
+ unet = UNet2DConditionModel.from_pretrained(checkpoint_path, subfolder="unet")
36
+ vae = AutoencoderKL.from_pretrained(checkpoint_path, subfolder="vae")
37
+ text_encoder = CLIPTextModel.from_pretrained(checkpoint_path, subfolder="text_encoder")
38
+ tokenizer = CLIPTokenizer.from_pretrained(checkpoint_path, subfolder="tokenizer")
39
+ scheduler = DDIMScheduler.from_pretrained(checkpoint_path, timestep_spacing=timestep_spacing, subfolder="scheduler")
40
+ pipe = MarigoldPipeline.from_pretrained(pretrained_model_name_or_path = checkpoint_path,
41
+ unet=unet,
42
+ vae=vae,
43
+ scheduler=scheduler,
44
+ text_encoder=text_encoder,
45
+ tokenizer=tokenizer,
46
+ variant=variant,
47
+ torch_dtype=dtype,
48
+ )
49
+ try:
50
+ pipe.enable_xformers_memory_efficient_attention()
51
+ except ImportError:
52
+ pass # run without xformers
53
+ pipe = pipe.to(DEVICE)
54
+ pipe.unet.eval()
55
+
56
+ # model_configs = {
57
+ # 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
58
+ # 'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
59
+ # 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
60
+ # 'vitg': {'encoder': 'vitg', 'features': 384, 'out_channels': [1536, 1536, 1536, 1536]}
61
+ # }
62
+ # encoder2name = {
63
+ # 'vits': 'Small',
64
+ # 'vitb': 'Base',
65
+ # 'vitl': 'Large',
66
+ # 'vitg': 'Giant', # we are undergoing company review procedures to release our giant model checkpoint
67
+ # }
68
+ # encoder = 'vitl'
69
+ # model_name = encoder2name[encoder]
70
+ # model = DepthAnythingV2(**model_configs[encoder])
71
+ # filepath = hf_hub_download(repo_id=f"depth-anything/Depth-Anything-V2-{model_name}", filename=f"depth_anything_v2_{encoder}.pth", repo_type="model")
72
+ # state_dict = torch.load(filepath, map_location="cpu")
73
+ # model.load_state_dict(state_dict)
74
+ # model = model.to(DEVICE).eval()
75
+
76
+ title = "# ..."
77
+ description = """... **...**"""
78
+
79
+
80
+ # def predict_depth(image):
81
+ # return model.infer_image(image)
82
+
83
  @spaces.GPU
84
+ def predict_depth(image): #, processing_res, model_choice, current_model):
85
+ with torch.no_grad():
86
+ pipe_out = pipe(image, denoising_steps=1, ensemble_size=1, noise="zeros", normals=False, processing_res=768, match_input_res=True)
87
+ pred = pipe_out.depth_np
88
+ pred_colored = pipe_out.depth_colored
89
+ return pred, pred_colored
90
 
91
  with gr.Blocks(css=css) as demo:
92
  gr.Markdown(title)
 
103
  cmap = matplotlib.colormaps.get_cmap('Spectral_r')
104
 
105
  def on_submit(image):
 
106
 
107
+ if image is None:
108
+ print("No image uploaded.")
109
+ return None
110
+
111
+ pil_image = Image.fromarray(image.astype('uint8'))
112
+ depth_npy, depth_colored = predict_depth(pil_image)
113
+
114
+ # Save the npy data (raw depth map)
115
+ # tmp_npy_depth = tempfile.NamedTemporaryFile(suffix='.npy', delete=False)
116
+ # np.save(tmp_npy_depth.name, depth_npy)
117
+
118
+ # Save the grayscale depth map
119
+ depth_gray = (depth_npy * 65535.0).astype(np.uint16)
120
+ tmp_gray_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
121
+ Image.fromarray(depth_gray).save(tmp_gray_depth.name, mode="I;16")
122
+
123
+ # Save the colored depth map
124
+ tmp_colored_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
125
+ depth_colored.save(tmp_colored_depth.name)
126
+
127
+ print("Dummy predictions complete, returning results.")
128
+ return [(image, depth_colored), tmp_gray_depth.name, tmp_colored_depth.name]
129
 
130
+ # h, w = image.shape[:2]
131
 
132
+ # depth = predict_depth(image[:, :, ::-1])
 
 
133
 
134
+ # raw_depth = Image.fromarray(depth.astype('uint16'))
135
+ # tmp_raw_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
136
+ # raw_depth.save(tmp_raw_depth.name)
137
 
138
+ # depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
139
+ # depth = depth.astype(np.uint8)
140
+ # colored_depth = (cmap(depth)[:, :, :3] * 255).astype(np.uint8)
141
+
142
+ # gray_depth = Image.fromarray(depth)
143
+ # tmp_gray_depth = tempfile.NamedTemporaryFile(suffix='.png', delete=False)
144
+ # gray_depth.save(tmp_gray_depth.name)
145
 
146
+ # return [(original_image, colored_depth), tmp_gray_depth.name, tmp_raw_depth.name]
147
 
148
  submit.click(on_submit, inputs=[input_image], outputs=[depth_image_slider, gray_depth_file, raw_file])
149