File size: 9,733 Bytes
a870321
 
 
 
 
 
 
 
a7620c3
a870321
54148c8
a870321
 
54148c8
 
a870321
54148c8
a870321
 
 
 
 
37f1879
54148c8
71ec6e3
54c2fd6
9def845
 
a870321
 
 
 
 
 
 
 
 
 
 
cb6c3df
 
a870321
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f338983
cb6c3df
 
 
ca77995
 
 
 
a870321
 
93147ba
 
 
 
c52acae
 
 
 
 
 
 
54148c8
93147ba
 
 
54148c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a870321
 
 
 
cb6c3df
a870321
 
 
 
 
cb6c3df
a870321
 
 
 
cb6c3df
a870321
 
 
 
 
 
cb6c3df
a870321
cb6c3df
a870321
 
cb6c3df
 
 
a870321
cb6c3df
a870321
 
 
cb6c3df
 
 
a870321
cb6c3df
a870321
 
54148c8
 
 
 
 
 
 
a870321
54148c8
 
 
222abc5
 
54148c8
a870321
c839178
a870321
 
 
 
 
 
54148c8
68ee842
 
 
 
 
 
54148c8
93147ba
cb6c3df
a870321
 
436ca04
 
 
3468720
a870321
54148c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb6c3df
a870321
93147ba
 
4fb456c
93147ba
a870321
cb6c3df
4fb456c
0c60363
54148c8
93147ba
0c60363
a870321
cb6c3df
a870321
 
cb6c3df
93147ba
a870321
 
eeef62e
a870321
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
import argparse
import os
import glob
import torch
from PIL import Image
from copy import deepcopy
import sys
import tempfile
import subprocess
from huggingface_hub import snapshot_download
from functools import partial

LOCAL_CODE = os.environ.get("LOCAL_CODE", "1") == "1"
CACHE_EXAMPLES = os.environ.get("CACHE_EXAMPLES", "1") == "1"
SAM_LOCAL = os.environ.get("SAM_LOCAL", "1") == "1"
AUTH = ("admin", os.environ["PASSWD"]) if "PASSWD" in os.environ else None
DEFAULT_CAM_DIST = 1.9

code_dir = snapshot_download("zouzx/TriplaneGaussian", local_dir="./code", token=os.environ["HF_TOKEN"]) if not LOCAL_CODE else "./code"

sys.path.append(code_dir)

if not LOCAL_CODE:
    subprocess.run(["pip", "install", "--upgrade", "gradio==4.12.0"])

import gradio as gr
print("gr version: ", gr.__version__)

from utils import image_preprocess, pred_bbox, sam_init, sam_out_nosave, todevice
from gradio_splatting.backend.gradio_model3dgs import Model3DGS
import tgs
from tgs.utils.config import ExperimentConfig, load_config
from tgs.systems.infer import TGS

SAM_CKPT_PATH = "code/checkpoints/sam_vit_h_4b8939.pth"
MODEL_CKPT_PATH = "code/checkpoints/tgs_lvis_100v_rel.ckpt"
CONFIG = "code/configs/single-rel.yaml"
EXP_ROOT_DIR = "./outputs-gradio"

os.makedirs(EXP_ROOT_DIR, exist_ok=True)

gpu = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
device = "cuda:{}".format(gpu) if torch.cuda.is_available() else "cpu"

print("device: ", device)

# init system
base_cfg: ExperimentConfig
base_cfg = load_config(CONFIG, cli_args=[], n_gpus=1)
base_cfg.system.weights = MODEL_CKPT_PATH
system = TGS(cfg=base_cfg.system).to(device)
print("load model ckpt done.")

HEADER = """
# Triplane Meets Gaussian Splatting: Fast and Generalizable Single-View 3D Reconstruction with Transformers

<div>
<a style="display: inline-block;" href="https://arxiv.org/abs/2312.09147"><img src="https://img.shields.io/badge/arxiv-2312.09147-B31B1B.svg"></a>
</div>

TGS enables fast reconstruction from single-view image in a few seconds based on a hybrid Triplane-Gaussian 3D representation.

This model is trained on Objaverse-LVIS (**~45K** synthetic objects) only. And note that we normalize the input camera pose to a pre-set viewpoint during training stage following LRM, rather than directly using camera pose of input camera as implemented in our original paper.

**Tips:**
1. If you find the result is unsatisfied, please try to change the camera distance. It perhaps improves the results.

**Notes:**
1. Please wait until the completion of the reconstruction of the previous model before proceeding with the next one, otherwise, it may cause bug. We will fix it soon.
2. We currently conduct image segmentation (SAM) by invoking subprocess, which consumes more time as it requires loading SAM checkpoint each time. We have observed that directly running SAM in app.py often leads to queue blocking, but we haven't identified the cause yet. We plan to fix this issue for faster segmentation running time later. 
"""

def assert_input_image(input_image):
    if input_image is None:
        raise gr.Error("No image selected or uploaded!")

def resize_image(input_raw, size):
    w, h = input_raw.size
    ratio = size / max(w, h)
    resized_w = int(w * ratio)
    resized_h = int(h * ratio)
    return input_raw.resize((resized_w, resized_h), Image.Resampling.LANCZOS)

def preprocess(input_raw, save_path, sam_predictor=None):
    # if not preprocess:
    #     print("No preprocess")
    #     # return image_path
    image_path = os.path.join(save_path, "input_raw.png")
    save_path = os.path.join(save_path, "seg_rgba.png")
    if SAM_LOCAL and sam_predictor is not None:
        # input_raw = Image.open(image_path)
        # input_raw.thumbnail([512, 512], Image.Resampling.LANCZOS)
        input_raw = resize_image(input_raw, 512)
        print("image size:", input_raw.size)
        image_sam = sam_out_nosave(
            sam_predictor, input_raw.convert("RGB"), pred_bbox(input_raw)
        )
        image_preprocess(image_sam, save_path, lower_contrast=False, rescale=True)
    else:
        input_raw.save(image_path)
        subprocess.run([f"python run_sam.py --image_path {image_path} --save_path {save_path}"], shell=True)

    print("image raw path = ", image_path, "image save path =", save_path)
    return save_path

def init_trial_dir():
    trial_dir = tempfile.TemporaryDirectory(dir=EXP_ROOT_DIR).name
    os.makedirs(trial_dir, exist_ok=True)
    return trial_dir

@torch.no_grad()
def infer(image_path: str,
          cam_dist: float,
          save_path: str,
          only_3dgs: bool = False):
    data_cfg = deepcopy(base_cfg.data)
    data_cfg.only_3dgs = only_3dgs
    data_cfg.cond_camera_distance = cam_dist
    data_cfg.eval_camera_distance = cam_dist
    data_cfg.image_list = [image_path]
    dm = tgs.find(base_cfg.data_cls)(data_cfg)

    dm.setup()
    for batch_idx, batch in enumerate(dm.test_dataloader()):
        batch = todevice(batch, device)
        system.test_step(save_path, batch, batch_idx, save_3dgs=only_3dgs)
    if not only_3dgs:
        system.on_test_epoch_end(save_path)

def run(image_path: str,
        cam_dist: float,
        save_path: str):
    infer(image_path, cam_dist, save_path, only_3dgs=True)
    gs = glob.glob(os.path.join(save_path, "*.ply"))[0]
    # print("save gs", gs)
    return gs

def run_video(image_path: str,
            cam_dist: float,
            save_path: str):
    infer(image_path, cam_dist, save_path)
    video = glob.glob(os.path.join(save_path, "*.mp4"))[0]
    # print("save video", video)
    return video

def run_example(image_path, sam_predictor=None):
    save_path = init_trial_dir()
    seg_image_path = preprocess(image_path, save_path, sam_predictor)
    gs = run(seg_image_path, DEFAULT_CAM_DIST, save_path)
    video = run_video(seg_image_path, DEFAULT_CAM_DIST, save_path)
    return seg_image_path, gs, video

def launch(port):
    if SAM_LOCAL:
        sam_predictor = sam_init(SAM_CKPT_PATH, gpu)
        print("load sam ckpt done.")
    else:
        sam_predictor = None

    with gr.Blocks(
        title="TGS - Demo"
    ) as demo:
        with gr.Row(variant='panel'):
            gr.Markdown(HEADER)
    
        with gr.Row(variant='panel'):
            with gr.Column(scale=1):
                input_image = gr.Image(value=None, image_mode="RGB", width=512, height=512, type="pil", sources="upload", label="Input Image")
                gr.Markdown(
                    """
                    **Camera distance** denotes the distance between camera center and scene center.
                    If you find the 3D model appears flattened, you can increase it. Conversely, if the 3D model appears thick, you can decrease it.
                    """
                )
                camera_dist_slider = gr.Slider(1.0, 4.0, value=DEFAULT_CAM_DIST, step=0.1, label="Camera Distance")
                # preprocess_ckb = gr.Checkbox(value=True, label="Remove background")
                img_run_btn = gr.Button("Reconstruction", variant="primary")

            with gr.Column(scale=1):
                with gr.Row(variant='panel'):
                    seg_image = gr.Image(value=None, width="auto", type="filepath", image_mode="RGBA", label="Segmented Image", interactive=False)
                    output_video = gr.Video(value=None, width="auto", label="Rendered Video", autoplay=True)
                output_3dgs = Model3DGS(value=None, label="3D Model")
        
        with gr.Row(variant="panel"):
            gr.Examples(
                examples=[
                    "example_images/green_parrot.webp",
                    "example_images/rusty_gameboy.webp",
                    "example_images/a_pikachu_with_smily_face.webp",
                    "example_images/an_otter_wearing_sunglasses.webp",
                    "example_images/lumberjack_axe.webp",
                    "example_images/medieval_shield.webp",
                    "example_images/a_cat_dressed_as_the_pope.webp",
                    "example_images/a_cute_little_frog_comicbook_style.webp",
                    "example_images/a_purple_winter_jacket.webp",
                    "example_images/MP5,_high_quality,_ultra_realistic.webp",
                    "example_images/retro_pc_photorealistic_high_detailed.webp",
                    "example_images/stratocaster_guitar_pixar_style.webp"
                ],
                inputs=[input_image],
                outputs=[seg_image, output_3dgs, output_video],
                cache_examples=CACHE_EXAMPLES,
                fn=partial(run_example, sam_predictor=sam_predictor),
                label="Examples",
                examples_per_page=40
            )

        trial_dir = gr.State()
        img_run_btn.click(
            fn=assert_input_image,
            inputs=[input_image],
            # queue=False
        ).success(
            fn=init_trial_dir,
            outputs=[trial_dir],
            # queue=False
        ).success(
            fn=partial(preprocess, sam_predictor=sam_predictor),
            inputs=[input_image, trial_dir],
            outputs=[seg_image],
        ).success(fn=run,
                inputs=[seg_image, camera_dist_slider, trial_dir],
                outputs=[output_3dgs],
        ).success(fn=run_video,
                inputs=[seg_image, camera_dist_slider, trial_dir],
                outputs=[output_video])

        launch_args = {"server_port": port}
        demo.queue(max_size=10)
        demo.launch(auth=AUTH, **launch_args)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    args, extra = parser.parse_known_args()
    parser.add_argument("--port", type=int, default=7860)
    args = parser.parse_args()
    launch(args.port)