Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -79,7 +79,7 @@ def predict_depth(image, model):
|
|
79 |
#def predict_depth(model, image):
|
80 |
# return model(image)["depth"]
|
81 |
|
82 |
-
def make_video(video_path, outdir='./vis_video_depth', encoder='vits', remove_bg=False, maxc=
|
83 |
if encoder not in ["vitl","vitb","vits","vitg"]:
|
84 |
encoder = "vits"
|
85 |
|
@@ -232,7 +232,29 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', remove_bg
|
|
232 |
|
233 |
diff_d = np.abs(depth_color.astype(np.int16)-depth_color_bg.astype(np.int16))
|
234 |
diff_c = np.abs(raw_frame.astype(np.int16)-raw_frame_bg.astype(np.int16))
|
235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
236 |
#print('-most common')
|
237 |
#c = Counter(diff_d.flatten())
|
238 |
#value, cc = c.most_common()[0]
|
@@ -252,6 +274,9 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', remove_bg
|
|
252 |
print(md_c)
|
253 |
mask_bg_shadow = cv2.inRange(diff_d, np.array([0,0,0]), np.array([md_d,md_d,md_d]))
|
254 |
mask_bg_no_shadow = cv2.inRange(diff_c, np.array([0,0,0]), np.array([md_c,md_c,md_c]))
|
|
|
|
|
|
|
255 |
elif lt == "average":
|
256 |
avg_d = int(np.average(diff_d))
|
257 |
avg_c = int(np.average(diff_c))
|
@@ -260,9 +285,18 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', remove_bg
|
|
260 |
print(avg_c)
|
261 |
mask_bg_shadow = cv2.inRange(diff_d, np.array([0,0,0]), np.array([avg_d,avg_d,avg_d]))
|
262 |
mask_bg_no_shadow = cv2.inRange(diff_c, np.array([0,0,0]), np.array([avg_c,avg_c,avg_c]))
|
|
|
|
|
|
|
263 |
elif lt == "slider":
|
264 |
mask_bg_shadow = cv2.inRange(diff_d, np.array([0,0,0]), np.array([maxd,maxd,maxd]))
|
265 |
mask_bg_no_shadow = cv2.inRange(diff_c, np.array([0,0,0]), np.array([maxc,maxc,maxc]))
|
|
|
|
|
|
|
|
|
|
|
|
|
266 |
#mask_no_shadow = cv2.bitwise_not(mask_shadow)
|
267 |
|
268 |
#stereo = cv2.StereoBM.create(numDisparities=16, blockSize=15)
|
@@ -272,7 +306,7 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', remove_bg
|
|
272 |
raw_frame[m>0] = (239,239,239)
|
273 |
m = cv2.inRange(raw_frame, np.array([0,0,0]), np.array([15,15,15]))
|
274 |
raw_frame[m>0] = (16,16,16)
|
275 |
-
raw_frame[
|
276 |
raw_frame[mask_bg_no_shadow>0] = (255,255,255)
|
277 |
else:
|
278 |
break
|
@@ -1046,6 +1080,10 @@ with gr.Blocks(css=css, js=js) as demo:
|
|
1046 |
with gr.Tab(label="Maximums"):
|
1047 |
max_c = gr.Slider(minimum=0, maximum=255, step=1, value=12, label="Color diff")
|
1048 |
max_d = gr.Slider(minimum=0, maximum=255, step=1, value=12, label="Depth diff")
|
|
|
|
|
|
|
|
|
1049 |
lt = gr.Radio(label="Maximum is", choices=["average", "median", "slider"], value="slider")
|
1050 |
processed_video = gr.Video(label="Output Video", format="mp4", interactive=False)
|
1051 |
processed_zip = gr.File(label="Output Archive", interactive=False)
|
@@ -1383,7 +1421,7 @@ with gr.Blocks(css=css, js=js) as demo:
|
|
1383 |
render = gr.Button("Render")
|
1384 |
input_json.input(show_json, inputs=[input_json], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
|
1385 |
|
1386 |
-
def on_submit(uploaded_video,model_type,remove_bg,maxc,maxd,lt,coordinates):
|
1387 |
global locations
|
1388 |
locations = []
|
1389 |
avg = [0, 0]
|
@@ -1417,16 +1455,16 @@ with gr.Blocks(css=css, js=js) as demo:
|
|
1417 |
print(locations)
|
1418 |
|
1419 |
# Process the video and get the path of the output video
|
1420 |
-
output_video_path = make_video(uploaded_video,encoder=model_type,remove_bg=remove_bg,maxc=maxc,maxd=maxd,lt=lt)
|
1421 |
|
1422 |
return output_video_path + (json.dumps(locations),)
|
1423 |
|
1424 |
-
submit.click(on_submit, inputs=[input_video, model_type, remove_bg, max_c, max_d, lt, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
|
1425 |
render.click(None, inputs=[coords, mesh_order, bgcolor, output_frame, output_mask, selected, output_depth], outputs=None, js=load_model)
|
1426 |
render.click(partial(get_mesh), inputs=[output_frame, output_mask, blur_in, load_all], outputs=[result, result_file, mesh_order])
|
1427 |
|
1428 |
-
example_files = [["./examples/streetview.mp4", "vits", False, 12, 12, "slider", example_coords], ["./examples/man-in-museum-reverse-cut.mp4", "vits", True, 12, 12, "slider", example_coords]]
|
1429 |
-
examples = gr.Examples(examples=example_files, fn=on_submit, cache_examples=True, inputs=[input_video, model_type, remove_bg, max_c, max_d, lt, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
|
1430 |
|
1431 |
|
1432 |
if __name__ == '__main__':
|
|
|
79 |
#def predict_depth(model, image):
|
80 |
# return model(image)["depth"]
|
81 |
|
82 |
+
def make_video(video_path, outdir='./vis_video_depth', encoder='vits', remove_bg=False, maxc=12, maxd=12, maxs=32, maxl=64, maxv=16, lt="slider"):
|
83 |
if encoder not in ["vitl","vitb","vits","vitg"]:
|
84 |
encoder = "vits"
|
85 |
|
|
|
232 |
|
233 |
diff_d = np.abs(depth_color.astype(np.int16)-depth_color_bg.astype(np.int16))
|
234 |
diff_c = np.abs(raw_frame.astype(np.int16)-raw_frame_bg.astype(np.int16))
|
235 |
+
|
236 |
+
#correct hue against light
|
237 |
+
bg_gray = cv2.cvtColor(cv2.cvtColor(raw_frame_bg, cv2.COLOR_BGR2GRAY), cv2.COLOR_GRAY2BGR)
|
238 |
+
bg_diff = (raw_frame_bg-bg_gray).astype(np.int16)
|
239 |
+
frame_c = np.abs(raw_frame.astype(np.int16)-bg_diff).astype(np.uint8)
|
240 |
+
|
241 |
+
hsv_ = cv2.cvtColor(frame_c, cv2.COLOR_BGR2HSV)
|
242 |
+
edges = cv2.Laplacian(cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY), cv2.CV_64F)
|
243 |
+
blur_s = np.zeros_like(edges)
|
244 |
+
for i in range(2, frame.shape[0]-2):
|
245 |
+
for j in range(2, frame.shape[1]-2):
|
246 |
+
d = edges[i-2:i+2, j-2:j+2].var()
|
247 |
+
blur_s[i,j] = d.astype(np.uint8)
|
248 |
+
|
249 |
+
print("detail")
|
250 |
+
print(np.average(blur_s))
|
251 |
+
print(np.median(blur_s))
|
252 |
+
print("saturation")
|
253 |
+
print(np.average(hsv_[:,:,1]))
|
254 |
+
print(np.median(hsv_[:,:,1]))
|
255 |
+
print("lightness")
|
256 |
+
print(np.average(hsv_[:,:,2]))
|
257 |
+
print(np.median(hsv_[:,:,2]))
|
258 |
#print('-most common')
|
259 |
#c = Counter(diff_d.flatten())
|
260 |
#value, cc = c.most_common()[0]
|
|
|
274 |
print(md_c)
|
275 |
mask_bg_shadow = cv2.inRange(diff_d, np.array([0,0,0]), np.array([md_d,md_d,md_d]))
|
276 |
mask_bg_no_shadow = cv2.inRange(diff_c, np.array([0,0,0]), np.array([md_c,md_c,md_c]))
|
277 |
+
|
278 |
+
m = cv2.inRange(hsv_, np.array([0,0,0]), np.array([180, int(np.median(hsv_[:,:,1])), int(np.median(hsv_[:,:,2]))]))
|
279 |
+
mask = cv2.inRange(blur_s, 0, int(np.median(blur_s)))
|
280 |
elif lt == "average":
|
281 |
avg_d = int(np.average(diff_d))
|
282 |
avg_c = int(np.average(diff_c))
|
|
|
285 |
print(avg_c)
|
286 |
mask_bg_shadow = cv2.inRange(diff_d, np.array([0,0,0]), np.array([avg_d,avg_d,avg_d]))
|
287 |
mask_bg_no_shadow = cv2.inRange(diff_c, np.array([0,0,0]), np.array([avg_c,avg_c,avg_c]))
|
288 |
+
|
289 |
+
m = cv2.inRange(hsv_, np.array([0,0,0]), np.array([180, int(np.average(hsv_[:,:,1])), int(np.average(hsv_[:,:,2]))]))
|
290 |
+
mask = cv2.inRange(blur_s, 0, int(np.average(blur_s)))
|
291 |
elif lt == "slider":
|
292 |
mask_bg_shadow = cv2.inRange(diff_d, np.array([0,0,0]), np.array([maxd,maxd,maxd]))
|
293 |
mask_bg_no_shadow = cv2.inRange(diff_c, np.array([0,0,0]), np.array([maxc,maxc,maxc]))
|
294 |
+
|
295 |
+
m = cv2.inRange(hsv_, np.array([0,0,0]), np.array([180,maxs,maxl]))
|
296 |
+
mask = cv2.inRange(blur_s, 0, maxv)
|
297 |
+
|
298 |
+
masks = np.bitwise_and(m, mask)
|
299 |
+
masks_shadow = np.bitwise_and(mask_bg_shadow, masks)
|
300 |
#mask_no_shadow = cv2.bitwise_not(mask_shadow)
|
301 |
|
302 |
#stereo = cv2.StereoBM.create(numDisparities=16, blockSize=15)
|
|
|
306 |
raw_frame[m>0] = (239,239,239)
|
307 |
m = cv2.inRange(raw_frame, np.array([0,0,0]), np.array([15,15,15]))
|
308 |
raw_frame[m>0] = (16,16,16)
|
309 |
+
raw_frame[masks_shadow>0] = (raw_frame[masks_shadow>0] / 17).astype(np.uint8)
|
310 |
raw_frame[mask_bg_no_shadow>0] = (255,255,255)
|
311 |
else:
|
312 |
break
|
|
|
1080 |
with gr.Tab(label="Maximums"):
|
1081 |
max_c = gr.Slider(minimum=0, maximum=255, step=1, value=12, label="Color diff")
|
1082 |
max_d = gr.Slider(minimum=0, maximum=255, step=1, value=12, label="Depth diff")
|
1083 |
+
with gr.Tab(label="Shadow maximums"):
|
1084 |
+
max_s = gr.Slider(minimum=0, maximum=255, step=1, value=32, label="Saturation")
|
1085 |
+
max_l = gr.Slider(minimum=0, maximum=255, step=1, value=64, label="Lightness")
|
1086 |
+
max_v = gr.Slider(minimum=0, maximum=255, step=1, value=16, label="Detail")
|
1087 |
lt = gr.Radio(label="Maximum is", choices=["average", "median", "slider"], value="slider")
|
1088 |
processed_video = gr.Video(label="Output Video", format="mp4", interactive=False)
|
1089 |
processed_zip = gr.File(label="Output Archive", interactive=False)
|
|
|
1421 |
render = gr.Button("Render")
|
1422 |
input_json.input(show_json, inputs=[input_json], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
|
1423 |
|
1424 |
+
def on_submit(uploaded_video,model_type,remove_bg,maxc,maxd,maxs,maxl,maxv,lt,coordinates):
|
1425 |
global locations
|
1426 |
locations = []
|
1427 |
avg = [0, 0]
|
|
|
1455 |
print(locations)
|
1456 |
|
1457 |
# Process the video and get the path of the output video
|
1458 |
+
output_video_path = make_video(uploaded_video,encoder=model_type,remove_bg=remove_bg,maxc=maxc,maxd=maxd,maxs=maxs,maxl=maxl,maxv=maxv,lt=lt)
|
1459 |
|
1460 |
return output_video_path + (json.dumps(locations),)
|
1461 |
|
1462 |
+
submit.click(on_submit, inputs=[input_video, model_type, remove_bg, max_c, max_d, max_s, max_l, max_v, lt, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
|
1463 |
render.click(None, inputs=[coords, mesh_order, bgcolor, output_frame, output_mask, selected, output_depth], outputs=None, js=load_model)
|
1464 |
render.click(partial(get_mesh), inputs=[output_frame, output_mask, blur_in, load_all], outputs=[result, result_file, mesh_order])
|
1465 |
|
1466 |
+
example_files = [["./examples/streetview.mp4", "vits", False, 12, 12, 32, 64, 16, "slider", example_coords], ["./examples/man-in-museum-reverse-cut.mp4", "vits", True, 12, 12, 32, 64, 16, "slider", example_coords]]
|
1467 |
+
examples = gr.Examples(examples=example_files, fn=on_submit, cache_examples=True, inputs=[input_video, model_type, remove_bg, max_c, max_d, max_s, max_l, max_v, lt, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
|
1468 |
|
1469 |
|
1470 |
if __name__ == '__main__':
|