rolpotamias commited on
Commit
6460264
1 Parent(s): cb6a364

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -27
app.py CHANGED
@@ -21,7 +21,6 @@ from wilor.utils import recursive_to
21
  from wilor.datasets.vitdet_dataset import ViTDetDataset, DEFAULT_MEAN, DEFAULT_STD
22
  from wilor.utils.renderer import Renderer, cam_crop_to_full
23
  device = torch.device('cpu') if torch.cuda.is_available() else torch.device('cuda')
24
- print('CUDA AVAILABLE', torch.cuda.is_available())
25
 
26
  LIGHT_PURPLE=(0.25098039, 0.274117647, 0.65882353)
27
 
@@ -33,6 +32,31 @@ model.eval()
33
 
34
  detector = YOLO('./pretrained_models/detector.pt').to(device)
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  @spaces.GPU()
37
  def run_wilow_model(image, conf, IoU_threshold=0.5):
38
  img_cv2 = image[...,::-1]
@@ -75,6 +99,8 @@ def run_wilow_model(image, conf, IoU_threshold=0.5):
75
 
76
  with torch.no_grad():
77
  out = model(batch)
 
 
78
  print(out['pred_vertices'])
79
  multiplier = (2*batch['right']-1)
80
  pred_cam = out['pred_cam']
@@ -84,12 +110,7 @@ def run_wilow_model(image, conf, IoU_threshold=0.5):
84
  img_size = batch["img_size"].float()
85
  scaled_focal_length = model_cfg.EXTRA.FOCAL_LENGTH / model_cfg.MODEL.IMAGE_SIZE * img_size.max()
86
  pred_cam_t_full = cam_crop_to_full(pred_cam, box_center, box_size, img_size, scaled_focal_length).detach().cpu().numpy()
87
-
88
- # Render the result
89
- all_verts = []
90
- all_cam_t = []
91
- all_right = []
92
- all_joints = []
93
 
94
  batch_size = batch['img'].shape[0]
95
  for n in range(batch_size):
@@ -107,25 +128,11 @@ def run_wilow_model(image, conf, IoU_threshold=0.5):
107
  all_cam_t.append(cam_t)
108
  all_right.append(is_right)
109
  all_joints.append(joints)
110
- # Render front view
111
-
112
- misc_args = dict(
113
- mesh_base_color=LIGHT_PURPLE,
114
- scene_bg_color=(1, 1, 1),
115
- focal_length=scaled_focal_length,
116
- )
117
- print(all_verts[0])
118
- print(all_cam_t[0])
119
- cam_view = renderer.render_rgba_multiple(all_verts, cam_t=all_cam_t, render_res=img_size[n], is_right=all_right, **misc_args)
120
-
121
- # Overlay image
122
-
123
- input_img = img_vis.astype(np.float32)/255.0
124
- input_img = np.concatenate([input_img, np.ones_like(input_img[:,:,:1])], axis=2) # Add alpha channel
125
- input_img_overlay = input_img[:,:,:3] * (1-cam_view[:,:,3:]) + cam_view[:,:,:3] * cam_view[:,:,3:]
126
-
127
- image = img_vis #input_img_overlay
128
- return image, f'{len(detections)} hands detected'
129
 
130
 
131
 
@@ -168,7 +175,7 @@ with gr.Blocks(title="WiLoR: End-to-end 3D hand localization and reconstruction
168
  reconstruction = gr.Image(label="Reconstructions", type="numpy")
169
  hands_detected = gr.Textbox(label="Hands Detected")
170
 
171
- submit.click(fn=run_wilow_model, inputs=[input_image, threshold], outputs=[reconstruction, hands_detected])
172
 
173
  with gr.Row():
174
 
 
21
  from wilor.datasets.vitdet_dataset import ViTDetDataset, DEFAULT_MEAN, DEFAULT_STD
22
  from wilor.utils.renderer import Renderer, cam_crop_to_full
23
  device = torch.device('cpu') if torch.cuda.is_available() else torch.device('cuda')
 
24
 
25
  LIGHT_PURPLE=(0.25098039, 0.274117647, 0.65882353)
26
 
 
32
 
33
  detector = YOLO('./pretrained_models/detector.pt').to(device)
34
 
35
+ def render_reconstruction(image, conf, IoU_threshold=0.5):
36
+ input_img, num_dets, reconstructions = run_wilow_model(image, conf, IoU_threshold=0.5)
37
+ if num_dets> 0:
38
+ # Render front view
39
+
40
+ misc_args = dict(
41
+ mesh_base_color=LIGHT_PURPLE,
42
+ scene_bg_color=(1, 1, 1),
43
+ focal_length=reconstructions['focal'],
44
+ )
45
+
46
+ cam_view = renderer.render_rgba_multiple(reconstructions['verts'],
47
+ cam_t=reconstructions['cam_t'],
48
+ render_res=reconstructions['img_size'],
49
+ is_right=reconstructions['right'], **misc_args)
50
+
51
+ # Overlay image
52
+
53
+ input_img = np.concatenate([input_img, np.ones_like(input_img[:,:,:1])], axis=2) # Add alpha channel
54
+ input_img_overlay = input_img[:,:,:3] * (1-cam_view[:,:,3:]) + cam_view[:,:,:3] * cam_view[:,:,3:]
55
+
56
+ return input_img_overlay, f'{num_dets} hands detected'
57
+ else:
58
+ return input_img, f'{num_dets} hands detected'
59
+
60
  @spaces.GPU()
61
  def run_wilow_model(image, conf, IoU_threshold=0.5):
62
  img_cv2 = image[...,::-1]
 
99
 
100
  with torch.no_grad():
101
  out = model(batch)
102
+
103
+ print('CUDA AVAILABLE', torch.cuda.is_available())
104
  print(out['pred_vertices'])
105
  multiplier = (2*batch['right']-1)
106
  pred_cam = out['pred_cam']
 
110
  img_size = batch["img_size"].float()
111
  scaled_focal_length = model_cfg.EXTRA.FOCAL_LENGTH / model_cfg.MODEL.IMAGE_SIZE * img_size.max()
112
  pred_cam_t_full = cam_crop_to_full(pred_cam, box_center, box_size, img_size, scaled_focal_length).detach().cpu().numpy()
113
+
 
 
 
 
 
114
 
115
  batch_size = batch['img'].shape[0]
116
  for n in range(batch_size):
 
128
  all_cam_t.append(cam_t)
129
  all_right.append(is_right)
130
  all_joints.append(joints)
131
+
132
+ reconstructions = {'verts': all_verts, 'cam_t': all_cam_t, 'right': all_right, 'img_size': img_size[n], 'focal': scaled_focal_length}
133
+ return img_vis.astype(np.float32)/255.0, len(detections), reconstructions
134
+ else:
135
+ return img_vis.astype(np.float32)/255.0, len(detections), None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
 
137
 
138
 
 
175
  reconstruction = gr.Image(label="Reconstructions", type="numpy")
176
  hands_detected = gr.Textbox(label="Hands Detected")
177
 
178
+ submit.click(fn=render_reconstruction, inputs=[input_image, threshold], outputs=[reconstruction, hands_detected])
179
 
180
  with gr.Row():
181