Spaces:

xiexh20
/

HDM-interaction-recon

Sleeping

App Files Files Community

xxie commited on Mar 9

Commit

40d0f76

•

1 Parent(s): 4ff322d

add instructions

Browse files

Files changed (13) hide show

app.py +39 -24
configs/structured.py +1 -0
dataset/img_utils.py +4 -32
examples/002446/k1.color.jpg +0 -0
examples/002446/k1.color.json +0 -79
examples/002446/k1.obj_rend_mask.png +0 -0
examples/002446/k1.person_mask.png +0 -0
examples/066241/k1.color.jpg +0 -0
examples/066241/k1.obj_rend_mask.png +0 -0
examples/066241/k1.person_mask.png +0 -0
examples/205904/k1.color.jpg +0 -0
examples/205904/k1.obj_rend_mask.png +0 -0
examples/205904/k1.person_mask.png +0 -0

app.py CHANGED Viewed

@@ -31,10 +31,9 @@ from dataset.demo_dataset import DemoDataset
 md_description="""
 # HDM Interaction Reconstruction Demo
-### Official Implementation of the paper \"Template Free Reconstruction of Human Object Interaction\", CVPR'24.
 [Project Page](https://virtualhumans.mpi-inf.mpg.de/procigen-hdm/)|[Code](https://github.com/xiexh20/HDM)|[Dataset](https://edmond.mpg.de/dataset.xhtml?persistentId=doi:10.17617/3.2VUEUS )|[Paper](https://virtualhumans.mpi-inf.mpg.de/procigen-hdm/paper-lowreso.pdf)
 Upload your own human object interaction image and get full 3D reconstruction!
 ## Citation
@@ -49,6 +48,26 @@ Upload your own human object interaction image and get full 3D reconstruction!
 ```
 """
 def plot_points(colors, coords):
     """
     use plotly to visualize 3D point with colors
@@ -143,17 +162,11 @@ def main(cfg: ProjectConfig):
     # Setup model
     runner = DemoRunner(cfg)
-    # runner = None # without model initialization, it shows one line of thumbnail
-    # TODO: add instructions on how to get masks
-    # TODO: add instructions on how to use the demo, input output, example outputs etc.
     # Setup interface
     demo = gr.Blocks(title="HDM Interaction Reconstruction Demo")
     with demo:
         gr.Markdown(md_description)
-        gr.HTML("""<h1 style="text-align:center; color:#10768c">HDM Demo</h1>""")
-        gr.HTML("""<p style="text-align:center; color:#10768c">Instruction: Upload RGB, human, object masks and then click reconstruct.</p>""")
-        gr.HTML("""<p style="text-align:center; color:#10768c">You can use these methods to obtain the masks: </p>""")
         # Input data
         with gr.Row():
@@ -162,20 +175,26 @@ def main(cfg: ProjectConfig):
         with gr.Row():
             input_mask_obj = gr.Image(label='Object mask', type='numpy')
             with gr.Column():
-                # TODO: add hint for this value here
-                input_std = gr.Number(label='Gaussian std coverage', value=3.5)
-                input_seed = gr.Number(label='Random seed', value=42)
-                # TODO: add description outside label
-                input_cls = gr.Dropdown(label='Object category (we have fine tuned the model for specific categories, '
-                                              'reconstructing with these model should lead to better result '
-                                              'for specific categories.) ',
                                         choices=['general', 'backpack', 'ball', 'bottle', 'box',
                                                  'chair', 'skateboard', 'suitcase', 'table'],
                                         value='general')
         # Output visualization
         with gr.Row():
             pc_plot = gr.Plot(label="Reconstructed point cloud")
-            out_pc_download = gr.File(label="3D reconstruction for download") # this allows downloading
         with gr.Row():
             out_log = gr.TextArea(label='Output log')
@@ -193,7 +212,8 @@ def main(cfg: ProjectConfig):
         rgb, ps, obj = 'k1.color.jpg', 'k1.person_mask.png', 'k1.obj_rend_mask.png'
         example_images = gr.Examples([
             [f"{example_dir}/017450/{rgb}", f"{example_dir}/017450/{ps}", f"{example_dir}/017450/{obj}", 3.0, 42, 'skateboard'],
-            [f"{example_dir}/002446/{rgb}", f"{example_dir}/002446/{ps}", f"{example_dir}/002446/{obj}", 3.0, 42, 'ball'],
             [f"{example_dir}/053431/{rgb}", f"{example_dir}/053431/{ps}", f"{example_dir}/053431/{obj}", 3.8, 42, 'chair'],
             [f"{example_dir}/158107/{rgb}", f"{example_dir}/158107/{ps}", f"{example_dir}/158107/{obj}", 3.8, 42, 'chair'],
@@ -201,12 +221,7 @@ def main(cfg: ProjectConfig):
     # demo.launch(share=True)
     # Enabling queue for runtime>60s, see: https://github.com/tloen/alpaca-lora/issues/60#issuecomment-1510006062
-    demo.queue().launch()
 if __name__ == '__main__':
-    from argparse import ArgumentParser
-    # parser = ArgumentParser()
-    # parser.add_argument('-share', default=False, action='store_true', help='allow a temporal public url')
-    # args = parser.parse_args()
     main()

 md_description="""
 # HDM Interaction Reconstruction Demo
+### Official Demo of the paper \"Template Free Reconstruction of Human Object Interaction\", CVPR'24.
 [Project Page](https://virtualhumans.mpi-inf.mpg.de/procigen-hdm/)|[Code](https://github.com/xiexh20/HDM)|[Dataset](https://edmond.mpg.de/dataset.xhtml?persistentId=doi:10.17617/3.2VUEUS )|[Paper](https://virtualhumans.mpi-inf.mpg.de/procigen-hdm/paper-lowreso.pdf)
 Upload your own human object interaction image and get full 3D reconstruction!
 ## Citation
 ```
 """
+html_str = """
+<h2 style="text-align:center; color:#10768c">HDM Demo: Upload you own human object interaction image and get full 3D reconstruction!</h2>
+<p style="text-align:left; color:#10768c">Instruction:
+<ol>
+    <li>Upload an RGB image of human object interaction.</li>
+    <li>Upload the mask for the human and object that you want to reconstruct. You can use these methods to obtain the masks:
+                <a href="https://segment-anything.com/demo" target="_blank">SAM</a>,
+                <a href="https://huggingface.co/spaces/sam-hq-team/sam-hq" target="_blank">SAM-HQ</a>,
+                <a href="https://huggingface.co/spaces/An-619/FastSAM" target="_blank">FastSAM</a>.</li>
+    <li>Click `Start Reconstruction` to start.</li>
+    <li>You can view the result at `Reconstructed point cloud` and download the point cloud at `download results`. </li>
+</ol>
+Alternatively, you can click one of the examples below and start reconstruction.
+Have fun!
+</p>
+"""
 def plot_points(colors, coords):
     """
     use plotly to visualize 3D point with colors
     # Setup model
     runner = DemoRunner(cfg)
     # Setup interface
     demo = gr.Blocks(title="HDM Interaction Reconstruction Demo")
     with demo:
         gr.Markdown(md_description)
+        gr.HTML(html_str)
         # Input data
         with gr.Row():
         with gr.Row():
             input_mask_obj = gr.Image(label='Object mask', type='numpy')
             with gr.Column():
+                input_std = gr.Number(label='Gaussian std coverage', value=3.5,
+                                      info="This value is used to estimate camera translation to project the points."
+                                           "The larger value, the camera is farther away. It is category-dependent."
+                                           "We empirically found these values are suitable: backpack-3.5, ball-3.0, bottle-3.0,"
+                                           "box-3.5, chair-3.8, skateboard-3.0, suitcase-3.2, table-3.5. "
+                                           "If you are not sure, 3.5 is a good start point.")
+                input_cls = gr.Dropdown(label='Object category',
+                                        info='We have fine tuned the model for some specific categories. '
+                                             'Reconstructing using these models should lead to better result '
+                                             'for these specific categories. Simply select the category that '
+                                             'fits the object from input image.',
                                         choices=['general', 'backpack', 'ball', 'bottle', 'box',
                                                  'chair', 'skateboard', 'suitcase', 'table'],
                                         value='general')
+                input_seed = gr.Number(label='Random seed', value=42,
+                                       info='Seed for the reverse diffusion process.')
         # Output visualization
         with gr.Row():
             pc_plot = gr.Plot(label="Reconstructed point cloud")
+            out_pc_download = gr.File(label="Download results") # this allows downloading
         with gr.Row():
             out_log = gr.TextArea(label='Output log')
         rgb, ps, obj = 'k1.color.jpg', 'k1.person_mask.png', 'k1.obj_rend_mask.png'
         example_images = gr.Examples([
             [f"{example_dir}/017450/{rgb}", f"{example_dir}/017450/{ps}", f"{example_dir}/017450/{obj}", 3.0, 42, 'skateboard'],
+            [f"{example_dir}/205904/{rgb}", f"{example_dir}/205904/{ps}", f"{example_dir}/205904/{obj}", 3.2, 42, 'suitcase'],
+            [f"{example_dir}/066241/{rgb}", f"{example_dir}/066241/{ps}", f"{example_dir}/066241/{obj}", 3.5, 42, 'backpack'],
             [f"{example_dir}/053431/{rgb}", f"{example_dir}/053431/{ps}", f"{example_dir}/053431/{obj}", 3.8, 42, 'chair'],
             [f"{example_dir}/158107/{rgb}", f"{example_dir}/158107/{ps}", f"{example_dir}/158107/{obj}", 3.8, 42, 'chair'],
     # demo.launch(share=True)
     # Enabling queue for runtime>60s, see: https://github.com/tloen/alpaca-lora/issues/60#issuecomment-1510006062
+    demo.queue().launch(share=cfg.run.share)
 if __name__ == '__main__':
     main()

configs/structured.py CHANGED Viewed

@@ -35,6 +35,7 @@ class RunConfig:
     stage1_name: str = 'stage1'     # experiment name to the stage 1 model
     stage2_name: str = 'stage2'     # experiment name to the stage 2 model
     image_path: str = ''            # the path to the images for running demo, can be a single file or a glob pattern
     # abs path to working dir
     code_dir_abs: str = osp.dirname(osp.dirname(osp.abspath(__file__)))

     stage1_name: str = 'stage1'     # experiment name to the stage 1 model
     stage2_name: str = 'stage2'     # experiment name to the stage 2 model
     image_path: str = ''            # the path to the images for running demo, can be a single file or a glob pattern
+    share: bool = False             # whether to run gradio with a temporal public url or not
     # abs path to working dir
     code_dir_abs: str = osp.dirname(osp.dirname(osp.abspath(__file__)))

dataset/img_utils.py CHANGED Viewed

@@ -103,37 +103,9 @@ def compute_translation(crop_center, crop_size, is_behave=True, std_coverage=3.5
         fx, fy = 918.457763671875, 918.4373779296875
         cx, cy = 956.9661865234375, 555.944580078125
-    # construct the matrix
-    # A = np.array([
-    #     [fx, 0, cx-x0, cx-x0,  0,  0],
-    #     [0, fy, cy-y0, cy-y0,  0,  0],
-    #     [fx, 0, cx-x1,   0, cx-x1, 0],
-    #     [0, fy, cy-y1,   0, cy-y1, 0],
-    #     [fx, 0, cx-x2,   0,  0,    cx-x2],
-    #     [0, fy, cy-y2,   0,  0,    cy-y2]
-    # ]) # this matrix is low-rank because columns are linearly dependent: col3 - col4 = col5 + col6
-    # # find linearly dependent rows
-    # lambdas, V = np.linalg.eig(A)
-    # # print()
-    # # The linearly dependent row vectors
-    # print(lambdas == 0, np.linalg.det(A), A[lambdas == 0, :]) # some have determinant zero, some don't??
-    # print(np.linalg.inv(A))
-    # A = np.array([
-    #     [fx, 0, cx - x0, cx - x0, 0, 0],
-    #     [0, fy, cy - y0, cy - y0, 0, 0],
-    #     [fx, 0, cx - x1, 0, cx - x1, 0],
-    #     [0, fy, cy - y1, 0, cy - y1, 0],
-    #     [fx, 0, cx - x3, 0, 0, cx - x3],
-    #     [0, fy, cy - y3, 0, 0, cy - y3]
-    # ]) # this is also low rank!
-    # b = np.array([0, 0, -3*fx, 0, 0, -3*fy]).reshape((-1, 1))
-    # print("rank of the coefficient matrix:", np.linalg.matrix_rank(A))  # rank is 5! underconstrained matrix!
-    # x = np.matmul(np.linalg.inv(A), b)
-    # fix z0 as 0, then A is a full-rank matrix
-    # first two equations: origin (0, 0, 0) is projected to the crop center
-    # last two equations: edge point (3.5, 0, z) is projected to the edge of crop
     A = np.array([
         [fx, 0, cx-x0, cx-x0],
         [0, fy, cy-y0, cy-y0],
@@ -142,7 +114,7 @@ def compute_translation(crop_center, crop_size, is_behave=True, std_coverage=3.5
     ])
     # b = np.array([0, 0, -3.5*fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0
     b = np.array([0, 0, -std_coverage * fx, 0]).reshape((-1, 1))  # 3.5->half of 7.0
-    x = np.matmul(np.linalg.inv(A), b) # use 4 or 5 does not really matter, same results
     # A is always a full-rank matrix

         fx, fy = 918.457763671875, 918.4373779296875
         cx, cy = 956.9661865234375, 555.944580078125
+    # Construct the matrix
+    # First two equations: origin (0, 0, 0) is projected to the crop center
+    # Last two equations: edge point (std_coverage, 0, z) is projected to the edge of crop
     A = np.array([
         [fx, 0, cx-x0, cx-x0],
         [0, fy, cy-y0, cy-y0],
     ])
     # b = np.array([0, 0, -3.5*fx, 0]).reshape((-1, 1)) # 3.5->half of 7.0
     b = np.array([0, 0, -std_coverage * fx, 0]).reshape((-1, 1))  # 3.5->half of 7.0
+    x = np.matmul(np.linalg.inv(A), b)
     # A is always a full-rank matrix

examples/002446/k1.color.jpg DELETED Viewed

Binary file (875 kB)

examples/002446/k1.color.json DELETED Viewed

@@ -1,79 +0,0 @@
-{
-  "body_joints": [
-    362.91015625,
-    159.39576721191406,
-    0.9023686647415161,
-    373.57745361328125,
-    180.60316467285156,
-    0.8592674136161804,
-    333.528564453125,
-    179.45702362060547,
-    0.7867028713226318,
-    278.2209167480469,
-    207.63121032714844,
-    0.8840203285217285,
-    228.78005981445312,
-    234.69793701171875,
-    0.8324164152145386,
-    417.08209228515625,
-    181.77294921875,
-    0.7164953947067261,
-    477.138427734375,
-    199.3846893310547,
-    0.7733086347579956,
-    539.4710083007812,
-    219.44891357421875,
-    0.8321817517280579,
-    401.8182678222656,
-    288.8574676513672,
-    0.61277836561203,
-    382.9984436035156,
-    294.7460632324219,
-    0.5884051322937012,
-    388.8341979980469,
-    377.1164245605469,
-    0.8282020092010498,
-    488.86529541015625,
-    404.145751953125,
-    0.6257187724113464,
-    420.6218566894531,
-    282.9443664550781,
-    0.5774698257446289,
-    455.9610290527344,
-    361.8221130371094,
-    0.8058001399040222,
-    557.13916015625,
-    339.43017578125,
-    0.69627445936203,
-    352.3575134277344,
-    151.14682006835938,
-    0.9335765242576599,
-    371.185791015625,
-    146.48798370361328,
-    0.8626495003700256,
-    342.9620666503906,
-    150.00089263916016,
-    0.0641486719250679,
-    390.03204345703125,
-    135.8568878173828,
-    0.8869808316230774,
-    595.938720703125,
-    338.2825012207031,
-    0.25365617871284485,
-    594.7731323242188,
-    334.75506591796875,
-    0.23056654632091522,
-    561.8401489257812,
-    331.20794677734375,
-    0.29395991563796997,
-    484.1672058105469,
-    435.9705810546875,
-    0.6335450410842896,
-    479.44921875,
-    433.6032409667969,
-    0.5307492017745972,
-    501.7928466796875,
-    398.28533935546875,
-    0.5881072878837585
-  ]
-}

examples/002446/k1.obj_rend_mask.png DELETED Viewed

Binary file (10 kB)

examples/002446/k1.person_mask.png DELETED Viewed

Binary file (35.6 kB)

examples/066241/k1.color.jpg ADDED Viewed

examples/066241/k1.obj_rend_mask.png ADDED Viewed

examples/066241/k1.person_mask.png ADDED Viewed

examples/205904/k1.color.jpg ADDED Viewed

examples/205904/k1.obj_rend_mask.png ADDED Viewed

examples/205904/k1.person_mask.png ADDED Viewed