Spaces:
Paused
Paused
Chao Xu
commited on
Commit
β’
3c4eaa2
1
Parent(s):
1d24bdc
add badges, fix rerun bug, pruning
Browse files- README.md +1 -1
- app.py +37 -54
- pre-requirements.txt +6 -13
- requirements.txt +1 -6
- style.css +13 -0
- unsafe.png +3 -0
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: πΈππ
|
|
4 |
colorFrom: red
|
5 |
colorTo: yellow
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 3.
|
8 |
app_file: app.py
|
9 |
pinned: true
|
10 |
license: mit
|
|
|
4 |
colorFrom: red
|
5 |
colorTo: yellow
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 3.40.0
|
8 |
app_file: app.py
|
9 |
pinned: true
|
10 |
license: mit
|
app.py
CHANGED
@@ -31,7 +31,6 @@ import numpy as np
|
|
31 |
import plotly.graph_objects as go
|
32 |
from functools import partial
|
33 |
|
34 |
-
from lovely_numpy import lo
|
35 |
import cv2
|
36 |
from PIL import Image
|
37 |
import trimesh
|
@@ -46,16 +45,16 @@ _GPU_INDEX = 0
|
|
46 |
|
47 |
_TITLE = '''One-2-3-45: Any Single Image to 3D Mesh in 45 Seconds without Per-Shape Optimization'''
|
48 |
|
|
|
|
|
49 |
_DESCRIPTION = '''
|
|
|
|
|
|
|
|
|
|
|
50 |
We reconstruct a 3D textured mesh from a single image by initially predicting multi-view images and then lifting them to 3D.
|
51 |
-
[<a href="http://One-2-3-45.com">Project</a>]
|
52 |
-
[<a href="https://github.com/One-2-3-45/One-2-3-45">GitHub</a>]
|
53 |
'''
|
54 |
-
# _HTML = '''<p>[<a href="https://github.com/One-2-3-45/One-2-3-45">GitHub</a>]
|
55 |
-
# <object alt="GitHub Repo stars" src="https://img.shields.io/github/stars/One-2-3-45/One-2-3-45?style=social&link=https%3A%2F%2Fgithub.com%2FOne-2-3-45%2FOne-2-3-45">
|
56 |
-
# </p>'''
|
57 |
-
# _HTML = '<script async defer src="https://buttons.github.io/buttons.js"></script> <a class="github-button" href="https://github.com/One-2-3-45/One-2-3-45" data-icon="octicon-star" data-show-count="true" aria-label="Star One-2-3-45/One-2-3-45 on GitHub">Star</a><p>'
|
58 |
-
|
59 |
_USER_GUIDE = "Please upload an image in the block above (or choose an example above) and click **Run Generation**."
|
60 |
_BBOX_1 = "Predicting bounding box for the input image..."
|
61 |
_BBOX_2 = "Bounding box adjusted. Continue adjusting or **Run Generation**."
|
@@ -184,11 +183,6 @@ class CameraVisualizer:
|
|
184 |
# Extract the new x, y, z coordinates from the rotated coordinates
|
185 |
x, y, z = rotated_coordinates[..., 0], rotated_coordinates[..., 1], rotated_coordinates[..., 2]
|
186 |
|
187 |
-
|
188 |
-
print('x:', lo(x))
|
189 |
-
print('y:', lo(y))
|
190 |
-
print('z:', lo(z))
|
191 |
-
|
192 |
fig.add_trace(go.Surface(
|
193 |
x=x, y=y, z=z,
|
194 |
surfacecolor=self._8bit_image,
|
@@ -316,7 +310,12 @@ def stage1_run(models, device, cam_vis, tmp_dir,
|
|
316 |
output_ims = predict_stage1_gradio(model, input_im, save_path=stage1_dir, adjust_set=list(range(4)), device=device, ddim_steps=ddim_steps, scale=scale)
|
317 |
stage2_steps = 50 # ddim_steps
|
318 |
zero123_infer(model, tmp_dir, indices=[0], device=device, ddim_steps=stage2_steps, scale=scale)
|
319 |
-
|
|
|
|
|
|
|
|
|
|
|
320 |
gen_poses(tmp_dir, elev_output)
|
321 |
show_in_im1 = np.asarray(input_im, dtype=np.uint8)
|
322 |
cam_vis.encode_image(show_in_im1, elev=elev_output)
|
@@ -367,7 +366,7 @@ def stage2_run(models, device, tmp_dir,
|
|
367 |
torch.cuda.empty_cache()
|
368 |
os.chdir(os.path.join(code_dir, 'SparseNeuS_demo_v1/'))
|
369 |
|
370 |
-
bash_script = f'CUDA_VISIBLE_DEVICES={_GPU_INDEX} python exp_runner_generic_blender_val.py --specific_dataset_name {dataset} --mode export_mesh --conf confs/one2345_lod0_val_demo.conf
|
371 |
print(bash_script)
|
372 |
os.system(bash_script)
|
373 |
os.chdir(main_dir_path)
|
@@ -377,13 +376,9 @@ def stage2_run(models, device, tmp_dir,
|
|
377 |
mesh_path = os.path.join(tmp_dir, f"mesh{mesh_ext}")
|
378 |
# Read the textured mesh from .ply file
|
379 |
mesh = trimesh.load_mesh(ply_path)
|
380 |
-
|
381 |
-
angle = np.radians(90)
|
382 |
-
rotation_matrix = trimesh.transformations.rotation_matrix(angle, axis)
|
383 |
mesh.apply_transform(rotation_matrix)
|
384 |
-
|
385 |
-
angle = np.radians(180)
|
386 |
-
rotation_matrix = trimesh.transformations.rotation_matrix(angle, axis)
|
387 |
mesh.apply_transform(rotation_matrix)
|
388 |
# flip x
|
389 |
mesh.vertices[:, 0] = -mesh.vertices[:, 0]
|
@@ -398,31 +393,16 @@ def stage2_run(models, device, tmp_dir,
|
|
398 |
if not is_rerun:
|
399 |
return (mesh_path)
|
400 |
else:
|
401 |
-
return (mesh_path, [], gr.update(visible=False), gr.update(visible=False))
|
402 |
|
403 |
def nsfw_check(models, raw_im, device='cuda'):
|
404 |
safety_checker_input = models['clip_fe'](raw_im, return_tensors='pt').to(device)
|
405 |
(_, has_nsfw_concept) = models['nsfw'](
|
406 |
images=np.ones((1, 3)), clip_input=safety_checker_input.pixel_values)
|
407 |
-
print('has_nsfw_concept:', has_nsfw_concept)
|
408 |
del safety_checker_input
|
409 |
if np.any(has_nsfw_concept):
|
410 |
print('NSFW content detected.')
|
411 |
-
|
412 |
-
image_width = image_height = 256
|
413 |
-
background_color = (255, 255, 255) # White
|
414 |
-
# Create a blank image
|
415 |
-
image = Image.new("RGB", (image_width, image_height), background_color)
|
416 |
-
from PIL import ImageDraw
|
417 |
-
draw = ImageDraw.Draw(image)
|
418 |
-
text = "Potential NSFW content was detected."
|
419 |
-
text_color = (255, 0, 0)
|
420 |
-
text_position = (10, 123)
|
421 |
-
draw.text(text_position, text, fill=text_color)
|
422 |
-
text = "Please try again with a different image."
|
423 |
-
text_position = (10, 133)
|
424 |
-
draw.text(text_position, text, fill=text_color)
|
425 |
-
return image
|
426 |
else:
|
427 |
print('Safety check passed.')
|
428 |
return False
|
@@ -439,7 +419,7 @@ def preprocess_run(predictor, models, raw_im, preprocess, *bbox_sliders):
|
|
439 |
|
440 |
def on_coords_slider(image, x_min, y_min, x_max, y_max, color=(88, 191, 131, 255)):
|
441 |
"""Draw a bounding box annotation for an image."""
|
442 |
-
print("
|
443 |
image.thumbnail([512, 512], Image.Resampling.LANCZOS)
|
444 |
image_size = image.size
|
445 |
if max(image_size) > 224:
|
@@ -502,15 +482,18 @@ def run_demo(
|
|
502 |
examples_full = [os.path.join(example_folder, x) for x in example_fns if x.endswith('.png')]
|
503 |
|
504 |
# Compose demo layout & data flow.
|
505 |
-
|
506 |
-
|
507 |
-
|
|
|
|
|
|
|
|
|
508 |
gr.Markdown(_DESCRIPTION)
|
509 |
-
# gr.HTML(_HTML)
|
510 |
|
511 |
with gr.Row(variant='panel'):
|
512 |
with gr.Column(scale=1.2):
|
513 |
-
image_block = gr.Image(type='pil', image_mode='RGBA', label='Input image', tool=None)
|
514 |
|
515 |
gr.Examples(
|
516 |
examples=examples_full, # NOTE: elements must match inputs list!
|
@@ -535,7 +518,7 @@ def run_demo(
|
|
535 |
|
536 |
with gr.Column(scale=.8):
|
537 |
with gr.Row():
|
538 |
-
bbox_block = gr.Image(type='pil', label="Bounding box", interactive=False)
|
539 |
sam_block = gr.Image(type='pil', label="SAM output", interactive=False)
|
540 |
max_width = max_height = 256
|
541 |
with gr.Row():
|
@@ -556,20 +539,20 @@ def run_demo(
|
|
556 |
with gr.Column(scale=1.15):
|
557 |
gr.Markdown('Predicted multi-view images')
|
558 |
with gr.Row():
|
559 |
-
view_1 = gr.Image(interactive=False, show_label=False)
|
560 |
-
view_2 = gr.Image(interactive=False, show_label=False)
|
561 |
-
view_3 = gr.Image(interactive=False, show_label=False)
|
562 |
-
view_4 = gr.Image(interactive=False, show_label=False)
|
563 |
with gr.Row():
|
564 |
btn_retry_1 = gr.Checkbox(label='Retry view 1')
|
565 |
btn_retry_2 = gr.Checkbox(label='Retry view 2')
|
566 |
btn_retry_3 = gr.Checkbox(label='Retry view 3')
|
567 |
btn_retry_4 = gr.Checkbox(label='Retry view 4')
|
568 |
with gr.Row():
|
569 |
-
view_5 = gr.Image(interactive=False, show_label=False)
|
570 |
-
view_6 = gr.Image(interactive=False, show_label=False)
|
571 |
-
view_7 = gr.Image(interactive=False, show_label=False)
|
572 |
-
view_8 = gr.Image(interactive=False, show_label=False)
|
573 |
with gr.Row():
|
574 |
btn_retry_5 = gr.Checkbox(label='Retry view 5')
|
575 |
btn_retry_6 = gr.Checkbox(label='Retry view 6')
|
@@ -663,7 +646,7 @@ def run_demo(
|
|
663 |
).success(fn=partial(update_guide, _REGEN_2), outputs=[guide_text], queue=False)
|
664 |
|
665 |
|
666 |
-
demo.launch(
|
667 |
|
668 |
|
669 |
if __name__ == '__main__':
|
|
|
31 |
import plotly.graph_objects as go
|
32 |
from functools import partial
|
33 |
|
|
|
34 |
import cv2
|
35 |
from PIL import Image
|
36 |
import trimesh
|
|
|
45 |
|
46 |
_TITLE = '''One-2-3-45: Any Single Image to 3D Mesh in 45 Seconds without Per-Shape Optimization'''
|
47 |
|
48 |
+
|
49 |
+
# <a style="display:inline-block; margin-left: 1em" href="https://arxiv.org/abs/2306.16928"><img src="https://img.shields.io/badge/arXiv-2306.16928-b31b1b.svg"></a>
|
50 |
_DESCRIPTION = '''
|
51 |
+
<div>
|
52 |
+
<a style="display:inline-block" href="http://one-2-3-45.com"><img src="https://img.shields.io/badge/Project_Homepage-f9f7f7?logo="></a>
|
53 |
+
<a style="display:inline-block; margin-left: .5em" href="https://arxiv.org/abs/2306.16928"><img src="https://img.shields.io/badge/2306.16928-f9f7f7?logo="></a>
|
54 |
+
<a style="display:inline-block; margin-left: .5em" href='https://github.com/One-2-3-45/One-2-3-45'><img src='https://img.shields.io/github/stars/One-2-3-45/One-2-3-45?style=social' /></a>
|
55 |
+
</div>
|
56 |
We reconstruct a 3D textured mesh from a single image by initially predicting multi-view images and then lifting them to 3D.
|
|
|
|
|
57 |
'''
|
|
|
|
|
|
|
|
|
|
|
58 |
_USER_GUIDE = "Please upload an image in the block above (or choose an example above) and click **Run Generation**."
|
59 |
_BBOX_1 = "Predicting bounding box for the input image..."
|
60 |
_BBOX_2 = "Bounding box adjusted. Continue adjusting or **Run Generation**."
|
|
|
183 |
# Extract the new x, y, z coordinates from the rotated coordinates
|
184 |
x, y, z = rotated_coordinates[..., 0], rotated_coordinates[..., 1], rotated_coordinates[..., 2]
|
185 |
|
|
|
|
|
|
|
|
|
|
|
186 |
fig.add_trace(go.Surface(
|
187 |
x=x, y=y, z=z,
|
188 |
surfacecolor=self._8bit_image,
|
|
|
310 |
output_ims = predict_stage1_gradio(model, input_im, save_path=stage1_dir, adjust_set=list(range(4)), device=device, ddim_steps=ddim_steps, scale=scale)
|
311 |
stage2_steps = 50 # ddim_steps
|
312 |
zero123_infer(model, tmp_dir, indices=[0], device=device, ddim_steps=stage2_steps, scale=scale)
|
313 |
+
try:
|
314 |
+
elev_output = estimate_elev(tmp_dir)
|
315 |
+
except:
|
316 |
+
print("Failed to estimate polar angle")
|
317 |
+
elev_output = 90
|
318 |
+
print("Estimated polar angle:", elev_output)
|
319 |
gen_poses(tmp_dir, elev_output)
|
320 |
show_in_im1 = np.asarray(input_im, dtype=np.uint8)
|
321 |
cam_vis.encode_image(show_in_im1, elev=elev_output)
|
|
|
366 |
torch.cuda.empty_cache()
|
367 |
os.chdir(os.path.join(code_dir, 'SparseNeuS_demo_v1/'))
|
368 |
|
369 |
+
bash_script = f'CUDA_VISIBLE_DEVICES={_GPU_INDEX} python exp_runner_generic_blender_val.py --specific_dataset_name {dataset} --mode export_mesh --conf confs/one2345_lod0_val_demo.conf'
|
370 |
print(bash_script)
|
371 |
os.system(bash_script)
|
372 |
os.chdir(main_dir_path)
|
|
|
376 |
mesh_path = os.path.join(tmp_dir, f"mesh{mesh_ext}")
|
377 |
# Read the textured mesh from .ply file
|
378 |
mesh = trimesh.load_mesh(ply_path)
|
379 |
+
rotation_matrix = trimesh.transformations.rotation_matrix(np.pi/2, [1, 0, 0])
|
|
|
|
|
380 |
mesh.apply_transform(rotation_matrix)
|
381 |
+
rotation_matrix = trimesh.transformations.rotation_matrix(np.pi, [0, 0, 1])
|
|
|
|
|
382 |
mesh.apply_transform(rotation_matrix)
|
383 |
# flip x
|
384 |
mesh.vertices[:, 0] = -mesh.vertices[:, 0]
|
|
|
393 |
if not is_rerun:
|
394 |
return (mesh_path)
|
395 |
else:
|
396 |
+
return (mesh_path, gr.update(value=[]), gr.update(visible=False), gr.update(visible=False))
|
397 |
|
398 |
def nsfw_check(models, raw_im, device='cuda'):
|
399 |
safety_checker_input = models['clip_fe'](raw_im, return_tensors='pt').to(device)
|
400 |
(_, has_nsfw_concept) = models['nsfw'](
|
401 |
images=np.ones((1, 3)), clip_input=safety_checker_input.pixel_values)
|
|
|
402 |
del safety_checker_input
|
403 |
if np.any(has_nsfw_concept):
|
404 |
print('NSFW content detected.')
|
405 |
+
return Image.open("unsafe.png")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
406 |
else:
|
407 |
print('Safety check passed.')
|
408 |
return False
|
|
|
419 |
|
420 |
def on_coords_slider(image, x_min, y_min, x_max, y_max, color=(88, 191, 131, 255)):
|
421 |
"""Draw a bounding box annotation for an image."""
|
422 |
+
print("Slider adjusted, drawing bbox...")
|
423 |
image.thumbnail([512, 512], Image.Resampling.LANCZOS)
|
424 |
image_size = image.size
|
425 |
if max(image_size) > 224:
|
|
|
482 |
examples_full = [os.path.join(example_folder, x) for x in example_fns if x.endswith('.png')]
|
483 |
|
484 |
# Compose demo layout & data flow.
|
485 |
+
with gr.Blocks(title=_TITLE, css="style.css") as demo:
|
486 |
+
with gr.Row():
|
487 |
+
with gr.Column(scale=1):
|
488 |
+
gr.Markdown('# ' + _TITLE)
|
489 |
+
with gr.Column(scale=0):
|
490 |
+
gr.DuplicateButton(value='Duplicate Space for private use',
|
491 |
+
elem_id='duplicate-button')
|
492 |
gr.Markdown(_DESCRIPTION)
|
|
|
493 |
|
494 |
with gr.Row(variant='panel'):
|
495 |
with gr.Column(scale=1.2):
|
496 |
+
image_block = gr.Image(type='pil', image_mode='RGBA', height=290, label='Input image', tool=None)
|
497 |
|
498 |
gr.Examples(
|
499 |
examples=examples_full, # NOTE: elements must match inputs list!
|
|
|
518 |
|
519 |
with gr.Column(scale=.8):
|
520 |
with gr.Row():
|
521 |
+
bbox_block = gr.Image(type='pil', label="Bounding box", height=290, interactive=False)
|
522 |
sam_block = gr.Image(type='pil', label="SAM output", interactive=False)
|
523 |
max_width = max_height = 256
|
524 |
with gr.Row():
|
|
|
539 |
with gr.Column(scale=1.15):
|
540 |
gr.Markdown('Predicted multi-view images')
|
541 |
with gr.Row():
|
542 |
+
view_1 = gr.Image(interactive=False, height=200, show_label=False)
|
543 |
+
view_2 = gr.Image(interactive=False, height=200, show_label=False)
|
544 |
+
view_3 = gr.Image(interactive=False, height=200, show_label=False)
|
545 |
+
view_4 = gr.Image(interactive=False, height=200, show_label=False)
|
546 |
with gr.Row():
|
547 |
btn_retry_1 = gr.Checkbox(label='Retry view 1')
|
548 |
btn_retry_2 = gr.Checkbox(label='Retry view 2')
|
549 |
btn_retry_3 = gr.Checkbox(label='Retry view 3')
|
550 |
btn_retry_4 = gr.Checkbox(label='Retry view 4')
|
551 |
with gr.Row():
|
552 |
+
view_5 = gr.Image(interactive=False, height=200, show_label=False)
|
553 |
+
view_6 = gr.Image(interactive=False, height=200, show_label=False)
|
554 |
+
view_7 = gr.Image(interactive=False, height=200, show_label=False)
|
555 |
+
view_8 = gr.Image(interactive=False, height=200, show_label=False)
|
556 |
with gr.Row():
|
557 |
btn_retry_5 = gr.Checkbox(label='Retry view 5')
|
558 |
btn_retry_6 = gr.Checkbox(label='Retry view 6')
|
|
|
646 |
).success(fn=partial(update_guide, _REGEN_2), outputs=[guide_text], queue=False)
|
647 |
|
648 |
|
649 |
+
demo.queue().launch(share=False, max_threads=80) # auth=("admin", os.environ['PASSWD'])
|
650 |
|
651 |
|
652 |
if __name__ == '__main__':
|
pre-requirements.txt
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
|
2 |
-
torch>=
|
3 |
torchvision>=0.13.1
|
4 |
albumentations>=0.4.3
|
5 |
opencv-python>=4.5.5.64
|
@@ -22,8 +22,6 @@ diffusers>=0.12.1
|
|
22 |
datasets[vision]>=2.4.0
|
23 |
carvekit-colab>=4.1.0
|
24 |
rich>=13.3.2
|
25 |
-
lovely-numpy>=0.2.8
|
26 |
-
lovely-tensors>=0.1.14
|
27 |
plotly>=5.13.1
|
28 |
-e git+https://github.com/CompVis/taming-transformers.git#egg=taming-transformers
|
29 |
# elev est
|
@@ -32,7 +30,6 @@ easydict
|
|
32 |
glumpy
|
33 |
gym
|
34 |
h5py
|
35 |
-
imageio
|
36 |
loguru
|
37 |
matplotlib
|
38 |
# mplib
|
@@ -55,18 +52,14 @@ tqdm
|
|
55 |
transforms3d
|
56 |
trimesh
|
57 |
yacs
|
58 |
-
zarr
|
59 |
-
sapien
|
60 |
pyglet==1.5.27
|
61 |
-
wis3d
|
62 |
gdown
|
63 |
git+https://github.com/NVlabs/nvdiffrast.git
|
64 |
-
|
65 |
-
git+https://github.com/openai/shap-e@8625e7c
|
66 |
# segment anything
|
67 |
-
opencv-python
|
68 |
-
pycocotools
|
69 |
-
matplotlib
|
70 |
onnxruntime
|
71 |
onnx
|
72 |
git+https://github.com/facebookresearch/segment-anything.git
|
|
|
1 |
+
--extra-index-url https://download.pytorch.org/whl/cu118
|
2 |
+
torch>=2.0.0
|
3 |
torchvision>=0.13.1
|
4 |
albumentations>=0.4.3
|
5 |
opencv-python>=4.5.5.64
|
|
|
22 |
datasets[vision]>=2.4.0
|
23 |
carvekit-colab>=4.1.0
|
24 |
rich>=13.3.2
|
|
|
|
|
25 |
plotly>=5.13.1
|
26 |
-e git+https://github.com/CompVis/taming-transformers.git#egg=taming-transformers
|
27 |
# elev est
|
|
|
30 |
glumpy
|
31 |
gym
|
32 |
h5py
|
|
|
33 |
loguru
|
34 |
matplotlib
|
35 |
# mplib
|
|
|
52 |
transforms3d
|
53 |
trimesh
|
54 |
yacs
|
55 |
+
# zarr
|
56 |
+
# sapien
|
57 |
pyglet==1.5.27
|
58 |
+
# wis3d
|
59 |
gdown
|
60 |
git+https://github.com/NVlabs/nvdiffrast.git
|
61 |
+
git+https://github.com/openai/CLIP.git
|
|
|
62 |
# segment anything
|
|
|
|
|
|
|
63 |
onnxruntime
|
64 |
onnx
|
65 |
git+https://github.com/facebookresearch/segment-anything.git
|
requirements.txt
CHANGED
@@ -1,12 +1,7 @@
|
|
1 |
# sparseneus
|
2 |
# -e git+https://github.com/mit-han-lab/[email protected]#egg=torchsparse
|
3 |
-
opencv_python
|
4 |
-
trimesh
|
5 |
numpy
|
6 |
pyhocon
|
7 |
icecream
|
8 |
-
tqdm
|
9 |
-
scipy
|
10 |
PyMCubes
|
11 |
-
ninja
|
12 |
-
# sudo apt-get install libsparsehash-dev
|
|
|
1 |
# sparseneus
|
2 |
# -e git+https://github.com/mit-han-lab/[email protected]#egg=torchsparse
|
|
|
|
|
3 |
numpy
|
4 |
pyhocon
|
5 |
icecream
|
|
|
|
|
6 |
PyMCubes
|
7 |
+
ninja
|
|
style.css
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#model-3d-out {
|
2 |
+
height: 400px;
|
3 |
+
}
|
4 |
+
|
5 |
+
#plot-out {
|
6 |
+
height: 450px;
|
7 |
+
}
|
8 |
+
|
9 |
+
#duplicate-button {
|
10 |
+
margin-left: auto;
|
11 |
+
color: #fff;
|
12 |
+
background: #1565c0;
|
13 |
+
}
|
unsafe.png
ADDED
Git LFS Details
|