File size: 5,852 Bytes
90fd8f8 bab8ce7 92f2e1f 1a5d02b 90fd8f8 92f2e1f 90fd8f8 92f2e1f 90fd8f8 bab8ce7 90fd8f8 92f2e1f 90fd8f8 92f2e1f 90fd8f8 92f2e1f 90fd8f8 92f2e1f 90fd8f8 92f2e1f 90fd8f8 92f2e1f 90fd8f8 92f2e1f 90fd8f8 dec9ec5 90fd8f8 92f2e1f 90fd8f8 92f2e1f 90fd8f8 92f2e1f 90fd8f8 75482e2 90fd8f8 dec9ec5 92f2e1f 90fd8f8 e2beaf5 90fd8f8 92f2e1f e2beaf5 90fd8f8 92f2e1f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
import os
import numpy as np
import torch
import torch.nn.functional as F
import torchvision.transforms.functional as TF
from safetensors.torch import load_file
import rembg
import gradio as gr
# download checkpoints
from huggingface_hub import hf_hub_download
ckpt_path = hf_hub_download(repo_id="ashawkey/LGM", filename="model_fp16.safetensors")
try:
import diff_gaussian_rasterization
except ImportError:
os.system("pip install ./diff-gaussian-rasterization")
import kiui
from kiui.op import recenter
from core.options import Options
from core.models import LGM
from mvdream.pipeline_mvdream import MVDreamPipeline
IMAGENET_DEFAULT_MEAN = (0.485, 0.456, 0.406)
IMAGENET_DEFAULT_STD = (0.229, 0.224, 0.225)
TMP_DIR = '/tmp'
os.makedirs(TMP_DIR, exist_ok=True)
# opt = tyro.cli(AllConfigs)
opt = Options(
input_size=256,
up_channels=(1024, 1024, 512, 256, 128), # one more decoder
up_attention=(True, True, True, False, False),
splat_size=128,
output_size=512, # render & supervise Gaussians at a higher resolution.
batch_size=8,
num_views=8,
gradient_accumulation_steps=1,
mixed_precision='bf16',
resume=ckpt_path,
)
# model
model = LGM(opt)
# resume pretrained checkpoint
if opt.resume is not None:
if opt.resume.endswith('safetensors'):
ckpt = load_file(opt.resume, device='cpu')
else:
ckpt = torch.load(opt.resume, map_location='cpu')
model.load_state_dict(ckpt, strict=False)
print(f'[INFO] Loaded checkpoint from {opt.resume}')
else:
print(f'[WARN] model randomly initialized, are you sure?')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.half().to(device)
model.eval()
tan_half_fov = np.tan(0.5 * np.deg2rad(opt.fovy))
proj_matrix = torch.zeros(4, 4, dtype=torch.float32, device=device)
proj_matrix[0, 0] = -1 / tan_half_fov
proj_matrix[1, 1] = -1 / tan_half_fov
proj_matrix[2, 2] = - (opt.zfar + opt.znear) / (opt.zfar - opt.znear)
proj_matrix[3, 2] = - (opt.zfar * opt.znear) / (opt.zfar - opt.znear)
proj_matrix[2, 3] = 1
# load dreams
pipe_text = MVDreamPipeline.from_pretrained(
'ashawkey/mvdream-sd2.1-diffusers', # remote weights
torch_dtype=torch.float16,
trust_remote_code=True,
# local_files_only=True,
)
pipe_text = pipe_text.to(device)
pipe_image = MVDreamPipeline.from_pretrained(
"ashawkey/imagedream-ipmv-diffusers", # remote weights
torch_dtype=torch.float16,
trust_remote_code=True,
# local_files_only=True,
)
pipe_image = pipe_image.to(device)
# load rembg
bg_remover = rembg.new_session()
# process function
def run(input_image):
prompt_neg = "ugly, blurry, pixelated obscure, unnatural colors, poor lighting, dull, unclear, cropped, lowres, low quality, artifacts, duplicate"
# seed
kiui.seed_everything(42)
output_ply_path = os.path.join(TMP_DIR, 'output.ply')
input_image = np.array(input_image) # uint8
# bg removal
carved_image = rembg.remove(input_image, session=bg_remover) # [H, W, 4]
mask = carved_image[..., -1] > 0
image = recenter(carved_image, mask, border_ratio=0.2)
image = image.astype(np.float32) / 255.0
image = image[..., :3] * image[..., 3:4] + (1 - image[..., 3:4])
mv_image = pipe_image("", image, negative_prompt=prompt_neg, num_inference_steps=30, guidance_scale=5.0, elevation=0)
# generate gaussians
input_image = np.stack([mv_image[1], mv_image[2], mv_image[3], mv_image[0]], axis=0) # [4, 256, 256, 3], float32
input_image = torch.from_numpy(input_image).permute(0, 3, 1, 2).float().to(device) # [4, 3, 256, 256]
input_image = F.interpolate(input_image, size=(opt.input_size, opt.input_size), mode='bilinear', align_corners=False)
input_image = TF.normalize(input_image, IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD)
rays_embeddings = model.prepare_default_rays(device, elevation=0)
input_image = torch.cat([input_image, rays_embeddings], dim=1).unsqueeze(0) # [1, 4, 9, H, W]
with torch.no_grad():
with torch.autocast(device_type='cuda', dtype=torch.float16):
# generate gaussians
gaussians = model.forward_gaussians(input_image)
# save gaussians
model.gs.save_ply(gaussians, output_ply_path)
return output_ply_path
# gradio UI
_TITLE = '''LGM Mini'''
_DESCRIPTION = '''
<div>
A lightweight version of <a href="https://huggingface.co/spaces/ashawkey/LGM">LGM: Large Multi-View Gaussian Model for High-Resolution 3D Content Creation</a>.
</div>
'''
css = '''
#duplicate-button {
margin: auto;
color: white;
background: #1565c0;
border-radius: 100vh;
}
'''
block = gr.Blocks(title=_TITLE, css=css)
with block:
gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown('# ' + _TITLE)
gr.Markdown(_DESCRIPTION)
with gr.Row(variant='panel'):
with gr.Column(scale=1):
# input image
input_image = gr.Image(label="image", type='pil', height=320)
# gen button
button_gen = gr.Button("Generate")
with gr.Column(scale=1):
output_splat = gr.Model3D(label="3D Gaussians")
button_gen.click(fn=run, inputs=[input_image], outputs=[output_splat])
gr.Examples(
examples=[
"data_test/frog_sweater.jpg",
"data_test/bird.jpg",
"data_test/boy.jpg",
"data_test/cat_statue.jpg",
"data_test/dragontoy.jpg",
"data_test/gso_rabbit.jpg",
],
inputs=[input_image],
outputs=[output_splat],
fn=lambda x: run(input_image=x),
cache_examples=True,
label='Image-to-3D Examples'
)
block.queue().launch(debug=True, share=True)
|