Spaces:
Sleeping
Sleeping
xxie
commited on
Commit
•
40d0f76
1
Parent(s):
4ff322d
add instructions
Browse files- app.py +39 -24
- configs/structured.py +1 -0
- dataset/img_utils.py +4 -32
- examples/002446/k1.color.jpg +0 -0
- examples/002446/k1.color.json +0 -79
- examples/002446/k1.obj_rend_mask.png +0 -0
- examples/002446/k1.person_mask.png +0 -0
- examples/066241/k1.color.jpg +0 -0
- examples/066241/k1.obj_rend_mask.png +0 -0
- examples/066241/k1.person_mask.png +0 -0
- examples/205904/k1.color.jpg +0 -0
- examples/205904/k1.obj_rend_mask.png +0 -0
- examples/205904/k1.person_mask.png +0 -0
app.py
CHANGED
@@ -31,10 +31,9 @@ from dataset.demo_dataset import DemoDataset
|
|
31 |
|
32 |
md_description="""
|
33 |
# HDM Interaction Reconstruction Demo
|
34 |
-
### Official
|
35 |
[Project Page](https://virtualhumans.mpi-inf.mpg.de/procigen-hdm/)|[Code](https://github.com/xiexh20/HDM)|[Dataset](https://edmond.mpg.de/dataset.xhtml?persistentId=doi:10.17617/3.2VUEUS )|[Paper](https://virtualhumans.mpi-inf.mpg.de/procigen-hdm/paper-lowreso.pdf)
|
36 |
|
37 |
-
|
38 |
Upload your own human object interaction image and get full 3D reconstruction!
|
39 |
|
40 |
## Citation
|
@@ -49,6 +48,26 @@ Upload your own human object interaction image and get full 3D reconstruction!
|
|
49 |
```
|
50 |
"""
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
def plot_points(colors, coords):
|
53 |
"""
|
54 |
use plotly to visualize 3D point with colors
|
@@ -143,17 +162,11 @@ def main(cfg: ProjectConfig):
|
|
143 |
# Setup model
|
144 |
runner = DemoRunner(cfg)
|
145 |
|
146 |
-
# runner = None # without model initialization, it shows one line of thumbnail
|
147 |
-
# TODO: add instructions on how to get masks
|
148 |
-
# TODO: add instructions on how to use the demo, input output, example outputs etc.
|
149 |
-
|
150 |
# Setup interface
|
151 |
demo = gr.Blocks(title="HDM Interaction Reconstruction Demo")
|
152 |
with demo:
|
153 |
gr.Markdown(md_description)
|
154 |
-
gr.HTML(
|
155 |
-
gr.HTML("""<p style="text-align:center; color:#10768c">Instruction: Upload RGB, human, object masks and then click reconstruct.</p>""")
|
156 |
-
gr.HTML("""<p style="text-align:center; color:#10768c">You can use these methods to obtain the masks: </p>""")
|
157 |
|
158 |
# Input data
|
159 |
with gr.Row():
|
@@ -162,20 +175,26 @@ def main(cfg: ProjectConfig):
|
|
162 |
with gr.Row():
|
163 |
input_mask_obj = gr.Image(label='Object mask', type='numpy')
|
164 |
with gr.Column():
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
|
|
|
|
|
|
|
|
172 |
choices=['general', 'backpack', 'ball', 'bottle', 'box',
|
173 |
'chair', 'skateboard', 'suitcase', 'table'],
|
174 |
value='general')
|
|
|
|
|
175 |
# Output visualization
|
176 |
with gr.Row():
|
177 |
pc_plot = gr.Plot(label="Reconstructed point cloud")
|
178 |
-
out_pc_download = gr.File(label="
|
179 |
with gr.Row():
|
180 |
out_log = gr.TextArea(label='Output log')
|
181 |
|
@@ -193,7 +212,8 @@ def main(cfg: ProjectConfig):
|
|
193 |
rgb, ps, obj = 'k1.color.jpg', 'k1.person_mask.png', 'k1.obj_rend_mask.png'
|
194 |
example_images = gr.Examples([
|
195 |
[f"{example_dir}/017450/{rgb}", f"{example_dir}/017450/{ps}", f"{example_dir}/017450/{obj}", 3.0, 42, 'skateboard'],
|
196 |
-
[f"{example_dir}/
|
|
|
197 |
[f"{example_dir}/053431/{rgb}", f"{example_dir}/053431/{ps}", f"{example_dir}/053431/{obj}", 3.8, 42, 'chair'],
|
198 |
[f"{example_dir}/158107/{rgb}", f"{example_dir}/158107/{ps}", f"{example_dir}/158107/{obj}", 3.8, 42, 'chair'],
|
199 |
|
@@ -201,12 +221,7 @@ def main(cfg: ProjectConfig):
|
|
201 |
|
202 |
# demo.launch(share=True)
|
203 |
# Enabling queue for runtime>60s, see: https://github.com/tloen/alpaca-lora/issues/60#issuecomment-1510006062
|
204 |
-
demo.queue().launch()
|
205 |
|
206 |
if __name__ == '__main__':
|
207 |
-
from argparse import ArgumentParser
|
208 |
-
# parser = ArgumentParser()
|
209 |
-
# parser.add_argument('-share', default=False, action='store_true', help='allow a temporal public url')
|
210 |
-
# args = parser.parse_args()
|
211 |
-
|
212 |
main()
|
|
|
31 |
|
32 |
md_description="""
|
33 |
# HDM Interaction Reconstruction Demo
|
34 |
+
### Official Demo of the paper \"Template Free Reconstruction of Human Object Interaction\", CVPR'24.
|
35 |
[Project Page](https://virtualhumans.mpi-inf.mpg.de/procigen-hdm/)|[Code](https://github.com/xiexh20/HDM)|[Dataset](https://edmond.mpg.de/dataset.xhtml?persistentId=doi:10.17617/3.2VUEUS )|[Paper](https://virtualhumans.mpi-inf.mpg.de/procigen-hdm/paper-lowreso.pdf)
|
36 |
|
|
|
37 |
Upload your own human object interaction image and get full 3D reconstruction!
|
38 |
|
39 |
## Citation
|
|
|
48 |
```
|
49 |
"""
|
50 |
|
51 |
+
html_str = """
|
52 |
+
<h2 style="text-align:center; color:#10768c">HDM Demo: Upload you own human object interaction image and get full 3D reconstruction!</h2>
|
53 |
+
<p style="text-align:left; color:#10768c">Instruction:
|
54 |
+
<ol>
|
55 |
+
<li>Upload an RGB image of human object interaction.</li>
|
56 |
+
<li>Upload the mask for the human and object that you want to reconstruct. You can use these methods to obtain the masks:
|
57 |
+
<a href="https://segment-anything.com/demo" target="_blank">SAM</a>,
|
58 |
+
<a href="https://huggingface.co/spaces/sam-hq-team/sam-hq" target="_blank">SAM-HQ</a>,
|
59 |
+
<a href="https://huggingface.co/spaces/An-619/FastSAM" target="_blank">FastSAM</a>.</li>
|
60 |
+
<li>Click `Start Reconstruction` to start.</li>
|
61 |
+
<li>You can view the result at `Reconstructed point cloud` and download the point cloud at `download results`. </li>
|
62 |
+
</ol>
|
63 |
+
Alternatively, you can click one of the examples below and start reconstruction.
|
64 |
+
|
65 |
+
Have fun!
|
66 |
+
</p>
|
67 |
+
|
68 |
+
|
69 |
+
"""
|
70 |
+
|
71 |
def plot_points(colors, coords):
|
72 |
"""
|
73 |
use plotly to visualize 3D point with colors
|
|
|
162 |
# Setup model
|
163 |
runner = DemoRunner(cfg)
|
164 |
|
|
|
|
|
|
|
|
|
165 |
# Setup interface
|
166 |
demo = gr.Blocks(title="HDM Interaction Reconstruction Demo")
|
167 |
with demo:
|
168 |
gr.Markdown(md_description)
|
169 |
+
gr.HTML(html_str)
|
|
|
|
|
170 |
|
171 |
# Input data
|
172 |
with gr.Row():
|
|
|
175 |
with gr.Row():
|
176 |
input_mask_obj = gr.Image(label='Object mask', type='numpy')
|
177 |
with gr.Column():
|
178 |
+
input_std = gr.Number(label='Gaussian std coverage', value=3.5,
|
179 |
+
info="This value is used to estimate camera translation to project the points."
|
180 |
+
"The larger value, the camera is farther away. It is category-dependent."
|
181 |
+
"We empirically found these values are suitable: backpack-3.5, ball-3.0, bottle-3.0,"
|
182 |
+
"box-3.5, chair-3.8, skateboard-3.0, suitcase-3.2, table-3.5. "
|
183 |
+
"If you are not sure, 3.5 is a good start point.")
|
184 |
+
input_cls = gr.Dropdown(label='Object category',
|
185 |
+
info='We have fine tuned the model for some specific categories. '
|
186 |
+
'Reconstructing using these models should lead to better result '
|
187 |
+
'for these specific categories. Simply select the category that '
|
188 |
+
'fits the object from input image.',
|
189 |
choices=['general', 'backpack', 'ball', 'bottle', 'box',
|
190 |
'chair', 'skateboard', 'suitcase', 'table'],
|
191 |
value='general')
|
192 |
+
input_seed = gr.Number(label='Random seed', value=42,
|
193 |
+
info='Seed for the reverse diffusion process.')
|
194 |
# Output visualization
|
195 |
with gr.Row():
|
196 |
pc_plot = gr.Plot(label="Reconstructed point cloud")
|
197 |
+
out_pc_download = gr.File(label="Download results") # this allows downloading
|
198 |
with gr.Row():
|
199 |
out_log = gr.TextArea(label='Output log')
|
200 |
|
|
|
212 |
rgb, ps, obj = 'k1.color.jpg', 'k1.person_mask.png', 'k1.obj_rend_mask.png'
|
213 |
example_images = gr.Examples([
|
214 |
[f"{example_dir}/017450/{rgb}", f"{example_dir}/017450/{ps}", f"{example_dir}/017450/{obj}", 3.0, 42, 'skateboard'],
|
215 |
+
[f"{example_dir}/205904/{rgb}", f"{example_dir}/205904/{ps}", f"{example_dir}/205904/{obj}", 3.2, 42, 'suitcase'],
|
216 |
+
[f"{example_dir}/066241/{rgb}", f"{example_dir}/066241/{ps}", f"{example_dir}/066241/{obj}", 3.5, 42, 'backpack'],
|
217 |
[f"{example_dir}/053431/{rgb}", f"{example_dir}/053431/{ps}", f"{example_dir}/053431/{obj}", 3.8, 42, 'chair'],
|
218 |
[f"{example_dir}/158107/{rgb}", f"{example_dir}/158107/{ps}", f"{example_dir}/158107/{obj}", 3.8, 42, 'chair'],
|
219 |
|
|
|
221 |
|
222 |
# demo.launch(share=True)
|
223 |
# Enabling queue for runtime>60s, see: https://github.com/tloen/alpaca-lora/issues/60#issuecomment-1510006062
|
224 |
+
demo.queue().launch(share=cfg.run.share)
|
225 |
|
226 |
if __name__ == '__main__':
|
|
|
|
|
|
|
|
|
|
|
227 |
main()
|
configs/structured.py
CHANGED
@@ -35,6 +35,7 @@ class RunConfig:
|
|
35 |
stage1_name: str = 'stage1' # experiment name to the stage 1 model
|
36 |
stage2_name: str = 'stage2' # experiment name to the stage 2 model
|
37 |
image_path: str = '' # the path to the images for running demo, can be a single file or a glob pattern
|
|
|
38 |
|
39 |
# abs path to working dir
|
40 |
code_dir_abs: str = osp.dirname(osp.dirname(osp.abspath(__file__)))
|
|
|
35 |
stage1_name: str = 'stage1' # experiment name to the stage 1 model
|
36 |
stage2_name: str = 'stage2' # experiment name to the stage 2 model
|
37 |
image_path: str = '' # the path to the images for running demo, can be a single file or a glob pattern
|
38 |
+
share: bool = False # whether to run gradio with a temporal public url or not
|
39 |
|
40 |
# abs path to working dir
|
41 |
code_dir_abs: str = osp.dirname(osp.dirname(osp.abspath(__file__)))
|
dataset/img_utils.py
CHANGED
@@ -103,37 +103,9 @@ def compute_translation(crop_center, crop_size, is_behave=True, std_coverage=3.5
|
|
103 |
fx, fy = 918.457763671875, 918.4373779296875
|
104 |
cx, cy = 956.9661865234375, 555.944580078125
|
105 |
|
106 |
-
#
|
107 |
-
#
|
108 |
-
#
|
109 |
-
# [0, fy, cy-y0, cy-y0, 0, 0],
|
110 |
-
# [fx, 0, cx-x1, 0, cx-x1, 0],
|
111 |
-
# [0, fy, cy-y1, 0, cy-y1, 0],
|
112 |
-
# [fx, 0, cx-x2, 0, 0, cx-x2],
|
113 |
-
# [0, fy, cy-y2, 0, 0, cy-y2]
|
114 |
-
# ]) # this matrix is low-rank because columns are linearly dependent: col3 - col4 = col5 + col6
|
115 |
-
# # find linearly dependent rows
|
116 |
-
# lambdas, V = np.linalg.eig(A)
|
117 |
-
# # print()
|
118 |
-
# # The linearly dependent row vectors
|
119 |
-
# print(lambdas == 0, np.linalg.det(A), A[lambdas == 0, :]) # some have determinant zero, some don't??
|
120 |
-
# print(np.linalg.inv(A))
|
121 |
-
|
122 |
-
# A = np.array([
|
123 |
-
# [fx, 0, cx - x0, cx - x0, 0, 0],
|
124 |
-
# [0, fy, cy - y0, cy - y0, 0, 0],
|
125 |
-
# [fx, 0, cx - x1, 0, cx - x1, 0],
|
126 |
-
# [0, fy, cy - y1, 0, cy - y1, 0],
|
127 |
-
# [fx, 0, cx - x3, 0, 0, cx - x3],
|
128 |
-
# [0, fy, cy - y3, 0, 0, cy - y3]
|
129 |
-
# ]) # this is also low rank!
|
130 |
-
# b = np.array([0, 0, -3*fx, 0, 0, -3*fy]).reshape((-1, 1))
|
131 |
-
# print("rank of the coefficient matrix:", np.linalg.matrix_rank(A)) # rank is 5! underconstrained matrix!
|
132 |
-
# x = np.matmul(np.linalg.inv(A), b)
|
133 |
-
|
134 |
-
# fix z0 as 0, then A is a full-rank matrix
|
135 |
-
# first two equations: origin (0, 0, 0) is projected to the crop center
|
136 |
-
# last two equations: edge point (3.5, 0, z) is projected to the edge of crop
|
137 |
A = np.array([
|
138 |
[fx, 0, cx-x0, cx-x0],
|
139 |
[0, fy, cy-y0, cy-y0],
|
@@ -142,7 +114,7 @@ def compute_translation(crop_center, crop_size, is_behave=True, std_coverage=3.5
|
|
142 |
])
|
143 |
# b = np.array([0, 0, -3.5*fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0
|
144 |
b = np.array([0, 0, -std_coverage * fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0
|
145 |
-
x = np.matmul(np.linalg.inv(A), b)
|
146 |
|
147 |
# A is always a full-rank matrix
|
148 |
|
|
|
103 |
fx, fy = 918.457763671875, 918.4373779296875
|
104 |
cx, cy = 956.9661865234375, 555.944580078125
|
105 |
|
106 |
+
# Construct the matrix
|
107 |
+
# First two equations: origin (0, 0, 0) is projected to the crop center
|
108 |
+
# Last two equations: edge point (std_coverage, 0, z) is projected to the edge of crop
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
A = np.array([
|
110 |
[fx, 0, cx-x0, cx-x0],
|
111 |
[0, fy, cy-y0, cy-y0],
|
|
|
114 |
])
|
115 |
# b = np.array([0, 0, -3.5*fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0
|
116 |
b = np.array([0, 0, -std_coverage * fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0
|
117 |
+
x = np.matmul(np.linalg.inv(A), b)
|
118 |
|
119 |
# A is always a full-rank matrix
|
120 |
|
examples/002446/k1.color.jpg
DELETED
Binary file (875 kB)
|
|
examples/002446/k1.color.json
DELETED
@@ -1,79 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"body_joints": [
|
3 |
-
362.91015625,
|
4 |
-
159.39576721191406,
|
5 |
-
0.9023686647415161,
|
6 |
-
373.57745361328125,
|
7 |
-
180.60316467285156,
|
8 |
-
0.8592674136161804,
|
9 |
-
333.528564453125,
|
10 |
-
179.45702362060547,
|
11 |
-
0.7867028713226318,
|
12 |
-
278.2209167480469,
|
13 |
-
207.63121032714844,
|
14 |
-
0.8840203285217285,
|
15 |
-
228.78005981445312,
|
16 |
-
234.69793701171875,
|
17 |
-
0.8324164152145386,
|
18 |
-
417.08209228515625,
|
19 |
-
181.77294921875,
|
20 |
-
0.7164953947067261,
|
21 |
-
477.138427734375,
|
22 |
-
199.3846893310547,
|
23 |
-
0.7733086347579956,
|
24 |
-
539.4710083007812,
|
25 |
-
219.44891357421875,
|
26 |
-
0.8321817517280579,
|
27 |
-
401.8182678222656,
|
28 |
-
288.8574676513672,
|
29 |
-
0.61277836561203,
|
30 |
-
382.9984436035156,
|
31 |
-
294.7460632324219,
|
32 |
-
0.5884051322937012,
|
33 |
-
388.8341979980469,
|
34 |
-
377.1164245605469,
|
35 |
-
0.8282020092010498,
|
36 |
-
488.86529541015625,
|
37 |
-
404.145751953125,
|
38 |
-
0.6257187724113464,
|
39 |
-
420.6218566894531,
|
40 |
-
282.9443664550781,
|
41 |
-
0.5774698257446289,
|
42 |
-
455.9610290527344,
|
43 |
-
361.8221130371094,
|
44 |
-
0.8058001399040222,
|
45 |
-
557.13916015625,
|
46 |
-
339.43017578125,
|
47 |
-
0.69627445936203,
|
48 |
-
352.3575134277344,
|
49 |
-
151.14682006835938,
|
50 |
-
0.9335765242576599,
|
51 |
-
371.185791015625,
|
52 |
-
146.48798370361328,
|
53 |
-
0.8626495003700256,
|
54 |
-
342.9620666503906,
|
55 |
-
150.00089263916016,
|
56 |
-
0.0641486719250679,
|
57 |
-
390.03204345703125,
|
58 |
-
135.8568878173828,
|
59 |
-
0.8869808316230774,
|
60 |
-
595.938720703125,
|
61 |
-
338.2825012207031,
|
62 |
-
0.25365617871284485,
|
63 |
-
594.7731323242188,
|
64 |
-
334.75506591796875,
|
65 |
-
0.23056654632091522,
|
66 |
-
561.8401489257812,
|
67 |
-
331.20794677734375,
|
68 |
-
0.29395991563796997,
|
69 |
-
484.1672058105469,
|
70 |
-
435.9705810546875,
|
71 |
-
0.6335450410842896,
|
72 |
-
479.44921875,
|
73 |
-
433.6032409667969,
|
74 |
-
0.5307492017745972,
|
75 |
-
501.7928466796875,
|
76 |
-
398.28533935546875,
|
77 |
-
0.5881072878837585
|
78 |
-
]
|
79 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
examples/002446/k1.obj_rend_mask.png
DELETED
Binary file (10 kB)
|
|
examples/002446/k1.person_mask.png
DELETED
Binary file (35.6 kB)
|
|
examples/066241/k1.color.jpg
ADDED
examples/066241/k1.obj_rend_mask.png
ADDED
examples/066241/k1.person_mask.png
ADDED
examples/205904/k1.color.jpg
ADDED
examples/205904/k1.obj_rend_mask.png
ADDED
examples/205904/k1.person_mask.png
ADDED