xxie commited on
Commit
40d0f76
1 Parent(s): 4ff322d

add instructions

Browse files
app.py CHANGED
@@ -31,10 +31,9 @@ from dataset.demo_dataset import DemoDataset
31
 
32
  md_description="""
33
  # HDM Interaction Reconstruction Demo
34
- ### Official Implementation of the paper \"Template Free Reconstruction of Human Object Interaction\", CVPR'24.
35
  [Project Page](https://virtualhumans.mpi-inf.mpg.de/procigen-hdm/)|[Code](https://github.com/xiexh20/HDM)|[Dataset](https://edmond.mpg.de/dataset.xhtml?persistentId=doi:10.17617/3.2VUEUS )|[Paper](https://virtualhumans.mpi-inf.mpg.de/procigen-hdm/paper-lowreso.pdf)
36
 
37
-
38
  Upload your own human object interaction image and get full 3D reconstruction!
39
 
40
  ## Citation
@@ -49,6 +48,26 @@ Upload your own human object interaction image and get full 3D reconstruction!
49
  ```
50
  """
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  def plot_points(colors, coords):
53
  """
54
  use plotly to visualize 3D point with colors
@@ -143,17 +162,11 @@ def main(cfg: ProjectConfig):
143
  # Setup model
144
  runner = DemoRunner(cfg)
145
 
146
- # runner = None # without model initialization, it shows one line of thumbnail
147
- # TODO: add instructions on how to get masks
148
- # TODO: add instructions on how to use the demo, input output, example outputs etc.
149
-
150
  # Setup interface
151
  demo = gr.Blocks(title="HDM Interaction Reconstruction Demo")
152
  with demo:
153
  gr.Markdown(md_description)
154
- gr.HTML("""<h1 style="text-align:center; color:#10768c">HDM Demo</h1>""")
155
- gr.HTML("""<p style="text-align:center; color:#10768c">Instruction: Upload RGB, human, object masks and then click reconstruct.</p>""")
156
- gr.HTML("""<p style="text-align:center; color:#10768c">You can use these methods to obtain the masks: </p>""")
157
 
158
  # Input data
159
  with gr.Row():
@@ -162,20 +175,26 @@ def main(cfg: ProjectConfig):
162
  with gr.Row():
163
  input_mask_obj = gr.Image(label='Object mask', type='numpy')
164
  with gr.Column():
165
- # TODO: add hint for this value here
166
- input_std = gr.Number(label='Gaussian std coverage', value=3.5)
167
- input_seed = gr.Number(label='Random seed', value=42)
168
- # TODO: add description outside label
169
- input_cls = gr.Dropdown(label='Object category (we have fine tuned the model for specific categories, '
170
- 'reconstructing with these model should lead to better result '
171
- 'for specific categories.) ',
 
 
 
 
172
  choices=['general', 'backpack', 'ball', 'bottle', 'box',
173
  'chair', 'skateboard', 'suitcase', 'table'],
174
  value='general')
 
 
175
  # Output visualization
176
  with gr.Row():
177
  pc_plot = gr.Plot(label="Reconstructed point cloud")
178
- out_pc_download = gr.File(label="3D reconstruction for download") # this allows downloading
179
  with gr.Row():
180
  out_log = gr.TextArea(label='Output log')
181
 
@@ -193,7 +212,8 @@ def main(cfg: ProjectConfig):
193
  rgb, ps, obj = 'k1.color.jpg', 'k1.person_mask.png', 'k1.obj_rend_mask.png'
194
  example_images = gr.Examples([
195
  [f"{example_dir}/017450/{rgb}", f"{example_dir}/017450/{ps}", f"{example_dir}/017450/{obj}", 3.0, 42, 'skateboard'],
196
- [f"{example_dir}/002446/{rgb}", f"{example_dir}/002446/{ps}", f"{example_dir}/002446/{obj}", 3.0, 42, 'ball'],
 
197
  [f"{example_dir}/053431/{rgb}", f"{example_dir}/053431/{ps}", f"{example_dir}/053431/{obj}", 3.8, 42, 'chair'],
198
  [f"{example_dir}/158107/{rgb}", f"{example_dir}/158107/{ps}", f"{example_dir}/158107/{obj}", 3.8, 42, 'chair'],
199
 
@@ -201,12 +221,7 @@ def main(cfg: ProjectConfig):
201
 
202
  # demo.launch(share=True)
203
  # Enabling queue for runtime>60s, see: https://github.com/tloen/alpaca-lora/issues/60#issuecomment-1510006062
204
- demo.queue().launch()
205
 
206
  if __name__ == '__main__':
207
- from argparse import ArgumentParser
208
- # parser = ArgumentParser()
209
- # parser.add_argument('-share', default=False, action='store_true', help='allow a temporal public url')
210
- # args = parser.parse_args()
211
-
212
  main()
 
31
 
32
  md_description="""
33
  # HDM Interaction Reconstruction Demo
34
+ ### Official Demo of the paper \"Template Free Reconstruction of Human Object Interaction\", CVPR'24.
35
  [Project Page](https://virtualhumans.mpi-inf.mpg.de/procigen-hdm/)|[Code](https://github.com/xiexh20/HDM)|[Dataset](https://edmond.mpg.de/dataset.xhtml?persistentId=doi:10.17617/3.2VUEUS )|[Paper](https://virtualhumans.mpi-inf.mpg.de/procigen-hdm/paper-lowreso.pdf)
36
 
 
37
  Upload your own human object interaction image and get full 3D reconstruction!
38
 
39
  ## Citation
 
48
  ```
49
  """
50
 
51
+ html_str = """
52
+ <h2 style="text-align:center; color:#10768c">HDM Demo: Upload you own human object interaction image and get full 3D reconstruction!</h2>
53
+ <p style="text-align:left; color:#10768c">Instruction:
54
+ <ol>
55
+ <li>Upload an RGB image of human object interaction.</li>
56
+ <li>Upload the mask for the human and object that you want to reconstruct. You can use these methods to obtain the masks:
57
+ <a href="https://segment-anything.com/demo" target="_blank">SAM</a>,
58
+ <a href="https://huggingface.co/spaces/sam-hq-team/sam-hq" target="_blank">SAM-HQ</a>,
59
+ <a href="https://huggingface.co/spaces/An-619/FastSAM" target="_blank">FastSAM</a>.</li>
60
+ <li>Click `Start Reconstruction` to start.</li>
61
+ <li>You can view the result at `Reconstructed point cloud` and download the point cloud at `download results`. </li>
62
+ </ol>
63
+ Alternatively, you can click one of the examples below and start reconstruction.
64
+
65
+ Have fun!
66
+ </p>
67
+
68
+
69
+ """
70
+
71
  def plot_points(colors, coords):
72
  """
73
  use plotly to visualize 3D point with colors
 
162
  # Setup model
163
  runner = DemoRunner(cfg)
164
 
 
 
 
 
165
  # Setup interface
166
  demo = gr.Blocks(title="HDM Interaction Reconstruction Demo")
167
  with demo:
168
  gr.Markdown(md_description)
169
+ gr.HTML(html_str)
 
 
170
 
171
  # Input data
172
  with gr.Row():
 
175
  with gr.Row():
176
  input_mask_obj = gr.Image(label='Object mask', type='numpy')
177
  with gr.Column():
178
+ input_std = gr.Number(label='Gaussian std coverage', value=3.5,
179
+ info="This value is used to estimate camera translation to project the points."
180
+ "The larger value, the camera is farther away. It is category-dependent."
181
+ "We empirically found these values are suitable: backpack-3.5, ball-3.0, bottle-3.0,"
182
+ "box-3.5, chair-3.8, skateboard-3.0, suitcase-3.2, table-3.5. "
183
+ "If you are not sure, 3.5 is a good start point.")
184
+ input_cls = gr.Dropdown(label='Object category',
185
+ info='We have fine tuned the model for some specific categories. '
186
+ 'Reconstructing using these models should lead to better result '
187
+ 'for these specific categories. Simply select the category that '
188
+ 'fits the object from input image.',
189
  choices=['general', 'backpack', 'ball', 'bottle', 'box',
190
  'chair', 'skateboard', 'suitcase', 'table'],
191
  value='general')
192
+ input_seed = gr.Number(label='Random seed', value=42,
193
+ info='Seed for the reverse diffusion process.')
194
  # Output visualization
195
  with gr.Row():
196
  pc_plot = gr.Plot(label="Reconstructed point cloud")
197
+ out_pc_download = gr.File(label="Download results") # this allows downloading
198
  with gr.Row():
199
  out_log = gr.TextArea(label='Output log')
200
 
 
212
  rgb, ps, obj = 'k1.color.jpg', 'k1.person_mask.png', 'k1.obj_rend_mask.png'
213
  example_images = gr.Examples([
214
  [f"{example_dir}/017450/{rgb}", f"{example_dir}/017450/{ps}", f"{example_dir}/017450/{obj}", 3.0, 42, 'skateboard'],
215
+ [f"{example_dir}/205904/{rgb}", f"{example_dir}/205904/{ps}", f"{example_dir}/205904/{obj}", 3.2, 42, 'suitcase'],
216
+ [f"{example_dir}/066241/{rgb}", f"{example_dir}/066241/{ps}", f"{example_dir}/066241/{obj}", 3.5, 42, 'backpack'],
217
  [f"{example_dir}/053431/{rgb}", f"{example_dir}/053431/{ps}", f"{example_dir}/053431/{obj}", 3.8, 42, 'chair'],
218
  [f"{example_dir}/158107/{rgb}", f"{example_dir}/158107/{ps}", f"{example_dir}/158107/{obj}", 3.8, 42, 'chair'],
219
 
 
221
 
222
  # demo.launch(share=True)
223
  # Enabling queue for runtime>60s, see: https://github.com/tloen/alpaca-lora/issues/60#issuecomment-1510006062
224
+ demo.queue().launch(share=cfg.run.share)
225
 
226
  if __name__ == '__main__':
 
 
 
 
 
227
  main()
configs/structured.py CHANGED
@@ -35,6 +35,7 @@ class RunConfig:
35
  stage1_name: str = 'stage1' # experiment name to the stage 1 model
36
  stage2_name: str = 'stage2' # experiment name to the stage 2 model
37
  image_path: str = '' # the path to the images for running demo, can be a single file or a glob pattern
 
38
 
39
  # abs path to working dir
40
  code_dir_abs: str = osp.dirname(osp.dirname(osp.abspath(__file__)))
 
35
  stage1_name: str = 'stage1' # experiment name to the stage 1 model
36
  stage2_name: str = 'stage2' # experiment name to the stage 2 model
37
  image_path: str = '' # the path to the images for running demo, can be a single file or a glob pattern
38
+ share: bool = False # whether to run gradio with a temporal public url or not
39
 
40
  # abs path to working dir
41
  code_dir_abs: str = osp.dirname(osp.dirname(osp.abspath(__file__)))
dataset/img_utils.py CHANGED
@@ -103,37 +103,9 @@ def compute_translation(crop_center, crop_size, is_behave=True, std_coverage=3.5
103
  fx, fy = 918.457763671875, 918.4373779296875
104
  cx, cy = 956.9661865234375, 555.944580078125
105
 
106
- # construct the matrix
107
- # A = np.array([
108
- # [fx, 0, cx-x0, cx-x0, 0, 0],
109
- # [0, fy, cy-y0, cy-y0, 0, 0],
110
- # [fx, 0, cx-x1, 0, cx-x1, 0],
111
- # [0, fy, cy-y1, 0, cy-y1, 0],
112
- # [fx, 0, cx-x2, 0, 0, cx-x2],
113
- # [0, fy, cy-y2, 0, 0, cy-y2]
114
- # ]) # this matrix is low-rank because columns are linearly dependent: col3 - col4 = col5 + col6
115
- # # find linearly dependent rows
116
- # lambdas, V = np.linalg.eig(A)
117
- # # print()
118
- # # The linearly dependent row vectors
119
- # print(lambdas == 0, np.linalg.det(A), A[lambdas == 0, :]) # some have determinant zero, some don't??
120
- # print(np.linalg.inv(A))
121
-
122
- # A = np.array([
123
- # [fx, 0, cx - x0, cx - x0, 0, 0],
124
- # [0, fy, cy - y0, cy - y0, 0, 0],
125
- # [fx, 0, cx - x1, 0, cx - x1, 0],
126
- # [0, fy, cy - y1, 0, cy - y1, 0],
127
- # [fx, 0, cx - x3, 0, 0, cx - x3],
128
- # [0, fy, cy - y3, 0, 0, cy - y3]
129
- # ]) # this is also low rank!
130
- # b = np.array([0, 0, -3*fx, 0, 0, -3*fy]).reshape((-1, 1))
131
- # print("rank of the coefficient matrix:", np.linalg.matrix_rank(A)) # rank is 5! underconstrained matrix!
132
- # x = np.matmul(np.linalg.inv(A), b)
133
-
134
- # fix z0 as 0, then A is a full-rank matrix
135
- # first two equations: origin (0, 0, 0) is projected to the crop center
136
- # last two equations: edge point (3.5, 0, z) is projected to the edge of crop
137
  A = np.array([
138
  [fx, 0, cx-x0, cx-x0],
139
  [0, fy, cy-y0, cy-y0],
@@ -142,7 +114,7 @@ def compute_translation(crop_center, crop_size, is_behave=True, std_coverage=3.5
142
  ])
143
  # b = np.array([0, 0, -3.5*fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0
144
  b = np.array([0, 0, -std_coverage * fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0
145
- x = np.matmul(np.linalg.inv(A), b) # use 4 or 5 does not really matter, same results
146
 
147
  # A is always a full-rank matrix
148
 
 
103
  fx, fy = 918.457763671875, 918.4373779296875
104
  cx, cy = 956.9661865234375, 555.944580078125
105
 
106
+ # Construct the matrix
107
+ # First two equations: origin (0, 0, 0) is projected to the crop center
108
+ # Last two equations: edge point (std_coverage, 0, z) is projected to the edge of crop
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  A = np.array([
110
  [fx, 0, cx-x0, cx-x0],
111
  [0, fy, cy-y0, cy-y0],
 
114
  ])
115
  # b = np.array([0, 0, -3.5*fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0
116
  b = np.array([0, 0, -std_coverage * fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0
117
+ x = np.matmul(np.linalg.inv(A), b)
118
 
119
  # A is always a full-rank matrix
120
 
examples/002446/k1.color.jpg DELETED
Binary file (875 kB)
 
examples/002446/k1.color.json DELETED
@@ -1,79 +0,0 @@
1
- {
2
- "body_joints": [
3
- 362.91015625,
4
- 159.39576721191406,
5
- 0.9023686647415161,
6
- 373.57745361328125,
7
- 180.60316467285156,
8
- 0.8592674136161804,
9
- 333.528564453125,
10
- 179.45702362060547,
11
- 0.7867028713226318,
12
- 278.2209167480469,
13
- 207.63121032714844,
14
- 0.8840203285217285,
15
- 228.78005981445312,
16
- 234.69793701171875,
17
- 0.8324164152145386,
18
- 417.08209228515625,
19
- 181.77294921875,
20
- 0.7164953947067261,
21
- 477.138427734375,
22
- 199.3846893310547,
23
- 0.7733086347579956,
24
- 539.4710083007812,
25
- 219.44891357421875,
26
- 0.8321817517280579,
27
- 401.8182678222656,
28
- 288.8574676513672,
29
- 0.61277836561203,
30
- 382.9984436035156,
31
- 294.7460632324219,
32
- 0.5884051322937012,
33
- 388.8341979980469,
34
- 377.1164245605469,
35
- 0.8282020092010498,
36
- 488.86529541015625,
37
- 404.145751953125,
38
- 0.6257187724113464,
39
- 420.6218566894531,
40
- 282.9443664550781,
41
- 0.5774698257446289,
42
- 455.9610290527344,
43
- 361.8221130371094,
44
- 0.8058001399040222,
45
- 557.13916015625,
46
- 339.43017578125,
47
- 0.69627445936203,
48
- 352.3575134277344,
49
- 151.14682006835938,
50
- 0.9335765242576599,
51
- 371.185791015625,
52
- 146.48798370361328,
53
- 0.8626495003700256,
54
- 342.9620666503906,
55
- 150.00089263916016,
56
- 0.0641486719250679,
57
- 390.03204345703125,
58
- 135.8568878173828,
59
- 0.8869808316230774,
60
- 595.938720703125,
61
- 338.2825012207031,
62
- 0.25365617871284485,
63
- 594.7731323242188,
64
- 334.75506591796875,
65
- 0.23056654632091522,
66
- 561.8401489257812,
67
- 331.20794677734375,
68
- 0.29395991563796997,
69
- 484.1672058105469,
70
- 435.9705810546875,
71
- 0.6335450410842896,
72
- 479.44921875,
73
- 433.6032409667969,
74
- 0.5307492017745972,
75
- 501.7928466796875,
76
- 398.28533935546875,
77
- 0.5881072878837585
78
- ]
79
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples/002446/k1.obj_rend_mask.png DELETED
Binary file (10 kB)
 
examples/002446/k1.person_mask.png DELETED
Binary file (35.6 kB)
 
examples/066241/k1.color.jpg ADDED
examples/066241/k1.obj_rend_mask.png ADDED
examples/066241/k1.person_mask.png ADDED
examples/205904/k1.color.jpg ADDED
examples/205904/k1.obj_rend_mask.png ADDED
examples/205904/k1.person_mask.png ADDED