freealise commited on
Commit
89a1848
1 Parent(s): 8380995

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -8
app.py CHANGED
@@ -79,7 +79,7 @@ def predict_depth(image, model):
79
  #def predict_depth(model, image):
80
  # return model(image)["depth"]
81
 
82
- def make_video(video_path, outdir='./vis_video_depth', encoder='vits', remove_bg=False, maxc=16, maxd=16, lt="slider"):
83
  if encoder not in ["vitl","vitb","vits","vitg"]:
84
  encoder = "vits"
85
 
@@ -232,7 +232,29 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', remove_bg
232
 
233
  diff_d = np.abs(depth_color.astype(np.int16)-depth_color_bg.astype(np.int16))
234
  diff_c = np.abs(raw_frame.astype(np.int16)-raw_frame_bg.astype(np.int16))
235
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  #print('-most common')
237
  #c = Counter(diff_d.flatten())
238
  #value, cc = c.most_common()[0]
@@ -252,6 +274,9 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', remove_bg
252
  print(md_c)
253
  mask_bg_shadow = cv2.inRange(diff_d, np.array([0,0,0]), np.array([md_d,md_d,md_d]))
254
  mask_bg_no_shadow = cv2.inRange(diff_c, np.array([0,0,0]), np.array([md_c,md_c,md_c]))
 
 
 
255
  elif lt == "average":
256
  avg_d = int(np.average(diff_d))
257
  avg_c = int(np.average(diff_c))
@@ -260,9 +285,18 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', remove_bg
260
  print(avg_c)
261
  mask_bg_shadow = cv2.inRange(diff_d, np.array([0,0,0]), np.array([avg_d,avg_d,avg_d]))
262
  mask_bg_no_shadow = cv2.inRange(diff_c, np.array([0,0,0]), np.array([avg_c,avg_c,avg_c]))
 
 
 
263
  elif lt == "slider":
264
  mask_bg_shadow = cv2.inRange(diff_d, np.array([0,0,0]), np.array([maxd,maxd,maxd]))
265
  mask_bg_no_shadow = cv2.inRange(diff_c, np.array([0,0,0]), np.array([maxc,maxc,maxc]))
 
 
 
 
 
 
266
  #mask_no_shadow = cv2.bitwise_not(mask_shadow)
267
 
268
  #stereo = cv2.StereoBM.create(numDisparities=16, blockSize=15)
@@ -272,7 +306,7 @@ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', remove_bg
272
  raw_frame[m>0] = (239,239,239)
273
  m = cv2.inRange(raw_frame, np.array([0,0,0]), np.array([15,15,15]))
274
  raw_frame[m>0] = (16,16,16)
275
- raw_frame[mask_bg_shadow>0] = (raw_frame[mask_bg_shadow>0] / 17).astype(np.uint8)
276
  raw_frame[mask_bg_no_shadow>0] = (255,255,255)
277
  else:
278
  break
@@ -1046,6 +1080,10 @@ with gr.Blocks(css=css, js=js) as demo:
1046
  with gr.Tab(label="Maximums"):
1047
  max_c = gr.Slider(minimum=0, maximum=255, step=1, value=12, label="Color diff")
1048
  max_d = gr.Slider(minimum=0, maximum=255, step=1, value=12, label="Depth diff")
 
 
 
 
1049
  lt = gr.Radio(label="Maximum is", choices=["average", "median", "slider"], value="slider")
1050
  processed_video = gr.Video(label="Output Video", format="mp4", interactive=False)
1051
  processed_zip = gr.File(label="Output Archive", interactive=False)
@@ -1383,7 +1421,7 @@ with gr.Blocks(css=css, js=js) as demo:
1383
  render = gr.Button("Render")
1384
  input_json.input(show_json, inputs=[input_json], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
1385
 
1386
- def on_submit(uploaded_video,model_type,remove_bg,maxc,maxd,lt,coordinates):
1387
  global locations
1388
  locations = []
1389
  avg = [0, 0]
@@ -1417,16 +1455,16 @@ with gr.Blocks(css=css, js=js) as demo:
1417
  print(locations)
1418
 
1419
  # Process the video and get the path of the output video
1420
- output_video_path = make_video(uploaded_video,encoder=model_type,remove_bg=remove_bg,maxc=maxc,maxd=maxd,lt=lt)
1421
 
1422
  return output_video_path + (json.dumps(locations),)
1423
 
1424
- submit.click(on_submit, inputs=[input_video, model_type, remove_bg, max_c, max_d, lt, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
1425
  render.click(None, inputs=[coords, mesh_order, bgcolor, output_frame, output_mask, selected, output_depth], outputs=None, js=load_model)
1426
  render.click(partial(get_mesh), inputs=[output_frame, output_mask, blur_in, load_all], outputs=[result, result_file, mesh_order])
1427
 
1428
- example_files = [["./examples/streetview.mp4", "vits", False, 12, 12, "slider", example_coords], ["./examples/man-in-museum-reverse-cut.mp4", "vits", True, 12, 12, "slider", example_coords]]
1429
- examples = gr.Examples(examples=example_files, fn=on_submit, cache_examples=True, inputs=[input_video, model_type, remove_bg, max_c, max_d, lt, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
1430
 
1431
 
1432
  if __name__ == '__main__':
 
79
  #def predict_depth(model, image):
80
  # return model(image)["depth"]
81
 
82
+ def make_video(video_path, outdir='./vis_video_depth', encoder='vits', remove_bg=False, maxc=12, maxd=12, maxs=32, maxl=64, maxv=16, lt="slider"):
83
  if encoder not in ["vitl","vitb","vits","vitg"]:
84
  encoder = "vits"
85
 
 
232
 
233
  diff_d = np.abs(depth_color.astype(np.int16)-depth_color_bg.astype(np.int16))
234
  diff_c = np.abs(raw_frame.astype(np.int16)-raw_frame_bg.astype(np.int16))
235
+
236
+ #correct hue against light
237
+ bg_gray = cv2.cvtColor(cv2.cvtColor(raw_frame_bg, cv2.COLOR_BGR2GRAY), cv2.COLOR_GRAY2BGR)
238
+ bg_diff = (raw_frame_bg-bg_gray).astype(np.int16)
239
+ frame_c = np.abs(raw_frame.astype(np.int16)-bg_diff).astype(np.uint8)
240
+
241
+ hsv_ = cv2.cvtColor(frame_c, cv2.COLOR_BGR2HSV)
242
+ edges = cv2.Laplacian(cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY), cv2.CV_64F)
243
+ blur_s = np.zeros_like(edges)
244
+ for i in range(2, frame.shape[0]-2):
245
+ for j in range(2, frame.shape[1]-2):
246
+ d = edges[i-2:i+2, j-2:j+2].var()
247
+ blur_s[i,j] = d.astype(np.uint8)
248
+
249
+ print("detail")
250
+ print(np.average(blur_s))
251
+ print(np.median(blur_s))
252
+ print("saturation")
253
+ print(np.average(hsv_[:,:,1]))
254
+ print(np.median(hsv_[:,:,1]))
255
+ print("lightness")
256
+ print(np.average(hsv_[:,:,2]))
257
+ print(np.median(hsv_[:,:,2]))
258
  #print('-most common')
259
  #c = Counter(diff_d.flatten())
260
  #value, cc = c.most_common()[0]
 
274
  print(md_c)
275
  mask_bg_shadow = cv2.inRange(diff_d, np.array([0,0,0]), np.array([md_d,md_d,md_d]))
276
  mask_bg_no_shadow = cv2.inRange(diff_c, np.array([0,0,0]), np.array([md_c,md_c,md_c]))
277
+
278
+ m = cv2.inRange(hsv_, np.array([0,0,0]), np.array([180, int(np.median(hsv_[:,:,1])), int(np.median(hsv_[:,:,2]))]))
279
+ mask = cv2.inRange(blur_s, 0, int(np.median(blur_s)))
280
  elif lt == "average":
281
  avg_d = int(np.average(diff_d))
282
  avg_c = int(np.average(diff_c))
 
285
  print(avg_c)
286
  mask_bg_shadow = cv2.inRange(diff_d, np.array([0,0,0]), np.array([avg_d,avg_d,avg_d]))
287
  mask_bg_no_shadow = cv2.inRange(diff_c, np.array([0,0,0]), np.array([avg_c,avg_c,avg_c]))
288
+
289
+ m = cv2.inRange(hsv_, np.array([0,0,0]), np.array([180, int(np.average(hsv_[:,:,1])), int(np.average(hsv_[:,:,2]))]))
290
+ mask = cv2.inRange(blur_s, 0, int(np.average(blur_s)))
291
  elif lt == "slider":
292
  mask_bg_shadow = cv2.inRange(diff_d, np.array([0,0,0]), np.array([maxd,maxd,maxd]))
293
  mask_bg_no_shadow = cv2.inRange(diff_c, np.array([0,0,0]), np.array([maxc,maxc,maxc]))
294
+
295
+ m = cv2.inRange(hsv_, np.array([0,0,0]), np.array([180,maxs,maxl]))
296
+ mask = cv2.inRange(blur_s, 0, maxv)
297
+
298
+ masks = np.bitwise_and(m, mask)
299
+ masks_shadow = np.bitwise_and(mask_bg_shadow, masks)
300
  #mask_no_shadow = cv2.bitwise_not(mask_shadow)
301
 
302
  #stereo = cv2.StereoBM.create(numDisparities=16, blockSize=15)
 
306
  raw_frame[m>0] = (239,239,239)
307
  m = cv2.inRange(raw_frame, np.array([0,0,0]), np.array([15,15,15]))
308
  raw_frame[m>0] = (16,16,16)
309
+ raw_frame[masks_shadow>0] = (raw_frame[masks_shadow>0] / 17).astype(np.uint8)
310
  raw_frame[mask_bg_no_shadow>0] = (255,255,255)
311
  else:
312
  break
 
1080
  with gr.Tab(label="Maximums"):
1081
  max_c = gr.Slider(minimum=0, maximum=255, step=1, value=12, label="Color diff")
1082
  max_d = gr.Slider(minimum=0, maximum=255, step=1, value=12, label="Depth diff")
1083
+ with gr.Tab(label="Shadow maximums"):
1084
+ max_s = gr.Slider(minimum=0, maximum=255, step=1, value=32, label="Saturation")
1085
+ max_l = gr.Slider(minimum=0, maximum=255, step=1, value=64, label="Lightness")
1086
+ max_v = gr.Slider(minimum=0, maximum=255, step=1, value=16, label="Detail")
1087
  lt = gr.Radio(label="Maximum is", choices=["average", "median", "slider"], value="slider")
1088
  processed_video = gr.Video(label="Output Video", format="mp4", interactive=False)
1089
  processed_zip = gr.File(label="Output Archive", interactive=False)
 
1421
  render = gr.Button("Render")
1422
  input_json.input(show_json, inputs=[input_json], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
1423
 
1424
+ def on_submit(uploaded_video,model_type,remove_bg,maxc,maxd,maxs,maxl,maxv,lt,coordinates):
1425
  global locations
1426
  locations = []
1427
  avg = [0, 0]
 
1455
  print(locations)
1456
 
1457
  # Process the video and get the path of the output video
1458
+ output_video_path = make_video(uploaded_video,encoder=model_type,remove_bg=remove_bg,maxc=maxc,maxd=maxd,maxs=maxs,maxl=maxl,maxv=maxv,lt=lt)
1459
 
1460
  return output_video_path + (json.dumps(locations),)
1461
 
1462
+ submit.click(on_submit, inputs=[input_video, model_type, remove_bg, max_c, max_d, max_s, max_l, max_v, lt, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
1463
  render.click(None, inputs=[coords, mesh_order, bgcolor, output_frame, output_mask, selected, output_depth], outputs=None, js=load_model)
1464
  render.click(partial(get_mesh), inputs=[output_frame, output_mask, blur_in, load_all], outputs=[result, result_file, mesh_order])
1465
 
1466
+ example_files = [["./examples/streetview.mp4", "vits", False, 12, 12, 32, 64, 16, "slider", example_coords], ["./examples/man-in-museum-reverse-cut.mp4", "vits", True, 12, 12, 32, 64, 16, "slider", example_coords]]
1467
+ examples = gr.Examples(examples=example_files, fn=on_submit, cache_examples=True, inputs=[input_video, model_type, remove_bg, max_c, max_d, max_s, max_l, max_v, lt, coords], outputs=[processed_video, processed_zip, output_frame, output_mask, output_depth, coords])
1468
 
1469
 
1470
  if __name__ == '__main__':