Maikou commited on Jan 22

Commit

9c3a994

•

1 Parent(s): e83e789

all files first commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

checkpoints/aligned_shape_latents/shapevae-256.ckpt +3 -0
checkpoints/clip/clip-vit-large-patch14 +1 -0
checkpoints/image_cond_diffuser_asl/image-ASLDM-256.ckpt +3 -0
checkpoints/text_cond_diffuser_asl/text-ASLDM-256.ckpt +3 -0
configs/aligned_shape_latents/shapevae-256.yaml +46 -0
configs/deploy/clip_aslp_3df+3dc+abo+gso+toy+t10k+obj+sp+pk=256_01_4096_8_ckpt_250000_udt=110M_finetune_500000_deploy.yaml +181 -0
configs/deploy/clip_sp+pk_aslperceiver=256_01_4096_8_udt=03.yaml +180 -0
configs/image_cond_diffuser_asl/image-ASLDM-256.yaml +97 -0
configs/text_cond_diffuser_asl/text-ASLDM-256.yaml +98 -0
example_data/image/car.jpg +0 -0
example_data/surface/surface.npz +3 -0
gradio_app.py +372 -0
gradio_cached_dir/example/img_example/airplane.jpg +0 -0
gradio_cached_dir/example/img_example/alita.jpg +0 -0
gradio_cached_dir/example/img_example/bag.jpg +0 -0
gradio_cached_dir/example/img_example/bench.jpg +0 -0
gradio_cached_dir/example/img_example/building.jpg +0 -0
gradio_cached_dir/example/img_example/burger.jpg +0 -0
gradio_cached_dir/example/img_example/car.jpg +0 -0
gradio_cached_dir/example/img_example/loopy.jpg +0 -0
gradio_cached_dir/example/img_example/mario.jpg +0 -0
gradio_cached_dir/example/img_example/ship.jpg +0 -0
inference.py +181 -0
michelangelo/__init__.py +1 -0
michelangelo/__pycache__/__init__.cpython-39.pyc +0 -0
michelangelo/data/__init__.py +1 -0
michelangelo/data/__pycache__/__init__.cpython-39.pyc +0 -0
michelangelo/data/__pycache__/asl_webdataset.cpython-39.pyc +0 -0
michelangelo/data/__pycache__/tokenizer.cpython-39.pyc +0 -0
michelangelo/data/__pycache__/transforms.cpython-39.pyc +0 -0
michelangelo/data/__pycache__/utils.cpython-39.pyc +0 -0
michelangelo/data/templates.json +69 -0
michelangelo/data/transforms.py +407 -0
michelangelo/data/utils.py +59 -0
michelangelo/graphics/__init__.py +1 -0
michelangelo/graphics/__pycache__/__init__.cpython-39.pyc +0 -0
michelangelo/graphics/primitives/__init__.py +9 -0
michelangelo/graphics/primitives/__pycache__/__init__.cpython-39.pyc +0 -0
michelangelo/graphics/primitives/__pycache__/extract_texture_map.cpython-39.pyc +0 -0
michelangelo/graphics/primitives/__pycache__/mesh.cpython-39.pyc +0 -0
michelangelo/graphics/primitives/__pycache__/volume.cpython-39.pyc +0 -0
michelangelo/graphics/primitives/mesh.py +114 -0
michelangelo/graphics/primitives/volume.py +21 -0
michelangelo/models/__init__.py +1 -0
michelangelo/models/__pycache__/__init__.cpython-39.pyc +0 -0
michelangelo/models/asl_diffusion/__init__.py +1 -0
michelangelo/models/asl_diffusion/__pycache__/__init__.cpython-39.pyc +0 -0
michelangelo/models/asl_diffusion/__pycache__/asl_udt.cpython-39.pyc +0 -0
michelangelo/models/asl_diffusion/__pycache__/clip_asl_diffuser_pl_module.cpython-39.pyc +0 -0
michelangelo/models/asl_diffusion/__pycache__/inference_utils.cpython-39.pyc +0 -0

checkpoints/aligned_shape_latents/shapevae-256.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0391b81c36240e8f766fedf4265df599884193a5ef65354525074b9a00887454
+size 3934164973

checkpoints/clip/clip-vit-large-patch14 ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 8d052a0f05efbaefbc9e8786ba291cfdf93e5bff

checkpoints/image_cond_diffuser_asl/image-ASLDM-256.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83eda8e4f81034dee7674b3ce1ff03a4900181f0f0d7bc461e1a8692fb379b0f
+size 1999253985

checkpoints/text_cond_diffuser_asl/text-ASLDM-256.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af546b1f877a41d71f63c3a11394779e77c954002c50dc8e75359338224f615b
+size 4076140813

configs/aligned_shape_latents/shapevae-256.yaml ADDED Viewed

	@@ -0,0 +1,46 @@

+model:
+  target: michelangelo.models.tsal.asl_pl_module.AlignedShapeAsLatentPLModule
+  params:
+    shape_module_cfg:
+      target: michelangelo.models.tsal.sal_perceiver.AlignedShapeLatentPerceiver
+      params:
+        num_latents: 256
+        embed_dim: 64
+        point_feats: 3   # normal
+        num_freqs: 8
+        include_pi: false
+        heads: 12
+        width: 768
+        num_encoder_layers: 8
+        num_decoder_layers: 16
+        use_ln_post: true
+        init_scale: 0.25
+        qkv_bias: false
+        use_checkpoint: true
+    aligned_module_cfg:
+      target: michelangelo.models.tsal.clip_asl_module.CLIPAlignedShapeAsLatentModule
+      params:
+        clip_model_version: "./checkpoints/clip/clip-vit-large-patch14"
+    loss_cfg:
+      target: michelangelo.models.tsal.loss.ContrastKLNearFar
+      params:
+        contrast_weight: 0.1
+        near_weight: 0.1
+        kl_weight: 0.001
+    optimizer_cfg:
+      optimizer:
+        target: torch.optim.AdamW
+        params:
+          betas: [0.9, 0.99]
+          eps: 1.e-6
+          weight_decay: 1.e-2
+      scheduler:
+        target: michelangelo.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
+        params:
+          warm_up_steps: 5000
+          f_start: 1.e-6
+          f_min: 1.e-3
+          f_max: 1.0

configs/deploy/clip_aslp_3df+3dc+abo+gso+toy+t10k+obj+sp+pk=256_01_4096_8_ckpt_250000_udt=110M_finetune_500000_deploy.yaml ADDED Viewed

	@@ -0,0 +1,181 @@

+name: "0630_clip_aslp_3df+3dc+abo+gso+toy+t10k+obj+sp+pk=256_01_4096_8_ckpt_250000_udt=110M_finetune_500000"
+#wandb:
+#  project: "image_diffuser"
+#  offline: false
+training:
+  steps: 500000
+  use_amp: true
+  ckpt_path: ""
+  base_lr: 1.e-4
+  gradient_clip_val: 5.0
+  gradient_clip_algorithm: "norm"
+  every_n_train_steps: 5000
+  val_check_interval: 1024
+  limit_val_batches: 16
+dataset:
+  target: michelangelo.data.asl_webdataset.MultiAlignedShapeLatentModule
+  params:
+    batch_size: 38
+    num_workers: 4
+    val_num_workers: 4
+    buffer_size: 256
+    return_normal: true
+    random_crop: false
+    surface_sampling: true
+    pc_size: &pc_size 4096
+    image_size: 384
+    mean: &mean [0.5, 0.5, 0.5]
+    std: &std [0.5, 0.5, 0.5]
+    cond_stage_key: "image"
+    meta_info:
+      3D-FUTURE:
+        render_folder: "/root/workspace/cq_workspace/datasets/3D-FUTURE/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/3D-FUTURE"
+      ABO:
+        render_folder: "/root/workspace/cq_workspace/datasets/ABO/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/ABO"
+      GSO:
+        render_folder: "/root/workspace/cq_workspace/datasets/GSO/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/GSO"
+      TOYS4K:
+        render_folder: "/root/workspace/cq_workspace/datasets/TOYS4K/TOYS4K/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/TOYS4K"
+      3DCaricShop:
+        render_folder: "/root/workspace/cq_workspace/datasets/3DCaricShop/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/3DCaricShop"
+      Thingi10K:
+        render_folder: "/root/workspace/cq_workspace/datasets/Thingi10K/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/Thingi10K"
+      shapenet:
+        render_folder: "/root/workspace/cq_workspace/datasets/shapenet/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/shapenet"
+      pokemon:
+        render_folder: "/root/workspace/cq_workspace/datasets/pokemon/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/pokemon"
+      objaverse:
+        render_folder: "/root/workspace/cq_workspace/datasets/objaverse/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/objaverse"
+model:
+  target: michelangelo.models.asl_diffusion.clip_asl_diffuser_pl_module.ClipASLDiffuser
+  params:
+    first_stage_config:
+      target: michelangelo.models.tsal.asl_pl_module.AlignedShapeAsLatentPLModule
+      params:
+        shape_module_cfg:
+          target: michelangelo.models.tsal.sal_perceiver.AlignedShapeLatentPerceiver
+          params:
+            num_latents: &num_latents 256
+            embed_dim: &embed_dim 64
+            point_feats: 3   # normal
+            num_freqs: 8
+            include_pi: false
+            heads: 12
+            width: 768
+            num_encoder_layers: 8
+            num_decoder_layers: 16
+            use_ln_post: true
+            init_scale: 0.25
+            qkv_bias: false
+            use_checkpoint: false
+        aligned_module_cfg:
+          target: michelangelo.models.tsal.clip_asl_module.CLIPAlignedShapeAsLatentModule
+          params:
+            clip_model_version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14"
+            # clip_model_version: "/root/workspace/checkpoints/clip/clip-vit-large-patch14"
+        loss_cfg:
+          target: torch.nn.Identity
+    cond_stage_config:
+      target: michelangelo.models.conditional_encoders.encoder_factory.FrozenCLIPImageGridEmbedder
+      params:
+        version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14"
+        # version: "/root/workspace/checkpoints/clip/clip-vit-large-patch14"
+        zero_embedding_radio: 0.1
+    first_stage_key: "surface"
+    cond_stage_key: "image"
+    scale_by_std: false
+    denoiser_cfg:
+      target: michelangelo.models.asl_diffusion.asl_udt.ConditionalASLUDTDenoiser
+      params:
+        input_channels: *embed_dim
+        output_channels: *embed_dim
+        n_ctx: *num_latents
+        width: 768
+        layers: 6   # 2 * 6 + 1 = 13
+        heads: 12
+        context_dim: 1024
+        init_scale: 1.0
+        skip_ln: true
+        use_checkpoint: true
+    scheduler_cfg:
+      guidance_scale: 7.5
+      num_inference_steps: 50
+      eta: 0.0
+      noise:
+        target: diffusers.schedulers.DDPMScheduler
+        params:
+          num_train_timesteps: 1000
+          beta_start: 0.00085
+          beta_end: 0.012
+          beta_schedule: "scaled_linear"
+          variance_type: "fixed_small"
+          clip_sample: false
+      denoise:
+        target: diffusers.schedulers.DDIMScheduler
+        params:
+          num_train_timesteps: 1000
+          beta_start: 0.00085
+          beta_end: 0.012
+          beta_schedule: "scaled_linear"
+          clip_sample: false   # clip sample to -1~1
+          set_alpha_to_one: false
+          steps_offset: 1
+    optimizer_cfg:
+      optimizer:
+        target: torch.optim.AdamW
+        params:
+          betas: [0.9, 0.99]
+          eps: 1.e-6
+          weight_decay: 1.e-2
+      scheduler:
+        target: michelangelo.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
+        params:
+          warm_up_steps: 5000
+          f_start: 1.e-6
+          f_min: 1.e-3
+          f_max: 1.0
+    loss_cfg:
+      loss_type: "mse"
+logger:
+  target: michelangelo.utils.trainings.mesh_log_callback.ImageConditionalASLDiffuserLogger
+  params:
+    step_frequency: 2000
+    num_samples: 4
+    sample_times: 4
+    mean: *mean
+    std: *std
+    bounds: [-1.1, -1.1, -1.1, 1.1, 1.1, 1.1]
+    octree_depth: 7
+    num_chunks: 10000

configs/deploy/clip_sp+pk_aslperceiver=256_01_4096_8_udt=03.yaml ADDED Viewed

	@@ -0,0 +1,180 @@

+name: "0428_clip_subsp+pk_sal_perceiver=256_01_4096_8_udt=03"
+#wandb:
+#  project: "image_diffuser"
+#  offline: false
+training:
+  steps: 500000
+  use_amp: true
+  ckpt_path: ""
+  base_lr: 1.e-4
+  gradient_clip_val: 5.0
+  gradient_clip_algorithm: "norm"
+  every_n_train_steps: 5000
+  val_check_interval: 1024
+  limit_val_batches: 16
+# dataset
+dataset:
+  target: michelangelo.data.asl_torch_dataset.MultiAlignedShapeImageTextModule
+  params:
+    batch_size: 38
+    num_workers: 4
+    val_num_workers: 4
+    buffer_size: 256
+    return_normal: true
+    random_crop: false
+    surface_sampling: true
+    pc_size: &pc_size 4096
+    image_size: 384
+    mean: &mean [0.5, 0.5, 0.5]
+    std: &std [0.5, 0.5, 0.5]
+    cond_stage_key: "text"
+    meta_info:
+      3D-FUTURE:
+        render_folder: "/root/workspace/cq_workspace/datasets/3D-FUTURE/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/3D-FUTURE"
+      ABO:
+        render_folder: "/root/workspace/cq_workspace/datasets/ABO/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/ABO"
+      GSO:
+        render_folder: "/root/workspace/cq_workspace/datasets/GSO/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/GSO"
+      TOYS4K:
+        render_folder: "/root/workspace/cq_workspace/datasets/TOYS4K/TOYS4K/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/TOYS4K"
+      3DCaricShop:
+        render_folder: "/root/workspace/cq_workspace/datasets/3DCaricShop/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/3DCaricShop"
+      Thingi10K:
+        render_folder: "/root/workspace/cq_workspace/datasets/Thingi10K/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/Thingi10K"
+      shapenet:
+        render_folder: "/root/workspace/cq_workspace/datasets/shapenet/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/shapenet"
+      pokemon:
+        render_folder: "/root/workspace/cq_workspace/datasets/pokemon/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/pokemon"
+      objaverse:
+        render_folder: "/root/workspace/cq_workspace/datasets/objaverse/renders"
+        tar_folder: "/root/workspace/datasets/make_tars/objaverse"
+model:
+  target: michelangelo.models.asl_diffusion.clip_asl_diffuser_pl_module.ClipASLDiffuser
+  params:
+    first_stage_config:
+      target: michelangelo.models.tsal.asl_pl_module.AlignedShapeAsLatentPLModule
+      params:
+        # ckpt_path: "/root/workspace/cq_workspace/michelangelo/experiments/aligned_shape_latents/clip_aslperceiver_sp+pk_01_01/ckpt/ckpt-step=00230000.ckpt"
+        shape_module_cfg:
+          target: michelangelo.models.tsal.sal_perceiver.AlignedShapeLatentPerceiver
+          params:
+            num_latents: &num_latents 256
+            embed_dim: &embed_dim 64
+            point_feats: 3   # normal
+            num_freqs: 8
+            include_pi: false
+            heads: 12
+            width: 768
+            num_encoder_layers: 8
+            num_decoder_layers: 16
+            use_ln_post: true
+            init_scale: 0.25
+            qkv_bias: false
+            use_checkpoint: true
+        aligned_module_cfg:
+          target: michelangelo.models.tsal.clip_asl_module.CLIPAlignedShapeAsLatentModule
+          params:
+            clip_model_version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14"
+        loss_cfg:
+          target: torch.nn.Identity
+    cond_stage_config:
+      target: michelangelo.models.conditional_encoders.encoder_factory.FrozenAlignedCLIPTextEmbedder
+      params:
+        version: "/mnt/shadow_cv_training/stevenxxliu/checkpoints/clip/clip-vit-large-patch14"
+        zero_embedding_radio: 0.1
+        max_length: 77
+    first_stage_key: "surface"
+    cond_stage_key: "text"
+    scale_by_std: false
+    denoiser_cfg:
+      target: michelangelo.models.asl_diffusion.asl_udt.ConditionalASLUDTDenoiser
+      params:
+        input_channels: *embed_dim
+        output_channels: *embed_dim
+        n_ctx: *num_latents
+        width: 768
+        layers: 8   # 2 * 6 + 1 = 13
+        heads: 12
+        context_dim: 768
+        init_scale: 1.0
+        skip_ln: true
+        use_checkpoint: true
+    scheduler_cfg:
+      guidance_scale: 7.5
+      num_inference_steps: 50
+      eta: 0.0
+      noise:
+        target: diffusers.schedulers.DDPMScheduler
+        params:
+          num_train_timesteps: 1000
+          beta_start: 0.00085
+          beta_end: 0.012
+          beta_schedule: "scaled_linear"
+          variance_type: "fixed_small"
+          clip_sample: false
+      denoise:
+        target: diffusers.schedulers.DDIMScheduler
+        params:
+          num_train_timesteps: 1000
+          beta_start: 0.00085
+          beta_end: 0.012
+          beta_schedule: "scaled_linear"
+          clip_sample: false   # clip sample to -1~1
+          set_alpha_to_one: false
+          steps_offset: 1
+    optimizer_cfg:
+      optimizer:
+        target: torch.optim.AdamW
+        params:
+          betas: [0.9, 0.99]
+          eps: 1.e-6
+          weight_decay: 1.e-2
+      scheduler:
+        target: michelangelo.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
+        params:
+          warm_up_steps: 5000
+          f_start: 1.e-6
+          f_min: 1.e-3
+          f_max: 1.0
+    loss_cfg:
+      loss_type: "mse"
+logger:
+  target: michelangelo.utils.trainings.mesh_log_callback.TextConditionalASLDiffuserLogger
+  params:
+    step_frequency: 1000
+    num_samples: 4
+    sample_times: 4
+    bounds: [-1.1, -1.1, -1.1, 1.1, 1.1, 1.1]
+    octree_depth: 7
+    num_chunks: 10000

configs/image_cond_diffuser_asl/image-ASLDM-256.yaml ADDED Viewed

	@@ -0,0 +1,97 @@

+model:
+  target: michelangelo.models.asl_diffusion.clip_asl_diffuser_pl_module.ClipASLDiffuser
+  params:
+    first_stage_config:
+      target: michelangelo.models.tsal.asl_pl_module.AlignedShapeAsLatentPLModule
+      params:
+        shape_module_cfg:
+          target: michelangelo.models.tsal.sal_perceiver.AlignedShapeLatentPerceiver
+          params:
+            num_latents: &num_latents 256
+            embed_dim: &embed_dim 64
+            point_feats: 3   # normal
+            num_freqs: 8
+            include_pi: false
+            heads: 12
+            width: 768
+            num_encoder_layers: 8
+            num_decoder_layers: 16
+            use_ln_post: true
+            init_scale: 0.25
+            qkv_bias: false
+            use_checkpoint: false
+        aligned_module_cfg:
+          target: michelangelo.models.tsal.clip_asl_module.CLIPAlignedShapeAsLatentModule
+          params:
+            clip_model_version: "./checkpoints/clip/clip-vit-large-patch14"
+        loss_cfg:
+          target: torch.nn.Identity
+    cond_stage_config:
+      target: michelangelo.models.conditional_encoders.encoder_factory.FrozenCLIPImageGridEmbedder
+      params:
+        version: "./checkpoints/clip/clip-vit-large-patch14"
+        zero_embedding_radio: 0.1
+    first_stage_key: "surface"
+    cond_stage_key: "image"
+    scale_by_std: false
+    denoiser_cfg:
+      target: michelangelo.models.asl_diffusion.asl_udt.ConditionalASLUDTDenoiser
+      params:
+        input_channels: *embed_dim
+        output_channels: *embed_dim
+        n_ctx: *num_latents
+        width: 768
+        layers: 6   # 2 * 6 + 1 = 13
+        heads: 12
+        context_dim: 1024
+        init_scale: 1.0
+        skip_ln: true
+        use_checkpoint: true
+    scheduler_cfg:
+      guidance_scale: 7.5
+      num_inference_steps: 50
+      eta: 0.0
+      noise:
+        target: diffusers.schedulers.DDPMScheduler
+        params:
+          num_train_timesteps: 1000
+          beta_start: 0.00085
+          beta_end: 0.012
+          beta_schedule: "scaled_linear"
+          variance_type: "fixed_small"
+          clip_sample: false
+      denoise:
+        target: diffusers.schedulers.DDIMScheduler
+        params:
+          num_train_timesteps: 1000
+          beta_start: 0.00085
+          beta_end: 0.012
+          beta_schedule: "scaled_linear"
+          clip_sample: false   # clip sample to -1~1
+          set_alpha_to_one: false
+          steps_offset: 1
+    optimizer_cfg:
+      optimizer:
+        target: torch.optim.AdamW
+        params:
+          betas: [0.9, 0.99]
+          eps: 1.e-6
+          weight_decay: 1.e-2
+      scheduler:
+        target: michelangelo.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
+        params:
+          warm_up_steps: 5000
+          f_start: 1.e-6
+          f_min: 1.e-3
+          f_max: 1.0
+    loss_cfg:
+      loss_type: "mse"

configs/text_cond_diffuser_asl/text-ASLDM-256.yaml ADDED Viewed

	@@ -0,0 +1,98 @@

+model:
+  target: michelangelo.models.asl_diffusion.clip_asl_diffuser_pl_module.ClipASLDiffuser
+  params:
+    first_stage_config:
+      target: michelangelo.models.tsal.asl_pl_module.AlignedShapeAsLatentPLModule
+      params:
+        shape_module_cfg:
+          target: michelangelo.models.tsal.sal_perceiver.AlignedShapeLatentPerceiver
+          params:
+            num_latents: &num_latents 256
+            embed_dim: &embed_dim 64
+            point_feats: 3   # normal
+            num_freqs: 8
+            include_pi: false
+            heads: 12
+            width: 768
+            num_encoder_layers: 8
+            num_decoder_layers: 16
+            use_ln_post: true
+            init_scale: 0.25
+            qkv_bias: false
+            use_checkpoint: true
+        aligned_module_cfg:
+          target: michelangelo.models.tsal.clip_asl_module.CLIPAlignedShapeAsLatentModule
+          params:
+            clip_model_version: "./checkpoints/clip/clip-vit-large-patch14"
+        loss_cfg:
+          target: torch.nn.Identity
+    cond_stage_config:
+      target: michelangelo.models.conditional_encoders.encoder_factory.FrozenAlignedCLIPTextEmbedder
+      params:
+        version: "./checkpoints/clip/clip-vit-large-patch14"
+        zero_embedding_radio: 0.1
+        max_length: 77
+    first_stage_key: "surface"
+    cond_stage_key: "text"
+    scale_by_std: false
+    denoiser_cfg:
+      target: michelangelo.models.asl_diffusion.asl_udt.ConditionalASLUDTDenoiser
+      params:
+        input_channels: *embed_dim
+        output_channels: *embed_dim
+        n_ctx: *num_latents
+        width: 768
+        layers: 8   # 2 * 6 + 1 = 13
+        heads: 12
+        context_dim: 768
+        init_scale: 1.0
+        skip_ln: true
+        use_checkpoint: true
+    scheduler_cfg:
+      guidance_scale: 7.5
+      num_inference_steps: 50
+      eta: 0.0
+      noise:
+        target: diffusers.schedulers.DDPMScheduler
+        params:
+          num_train_timesteps: 1000
+          beta_start: 0.00085
+          beta_end: 0.012
+          beta_schedule: "scaled_linear"
+          variance_type: "fixed_small"
+          clip_sample: false
+      denoise:
+        target: diffusers.schedulers.DDIMScheduler
+        params:
+          num_train_timesteps: 1000
+          beta_start: 0.00085
+          beta_end: 0.012
+          beta_schedule: "scaled_linear"
+          clip_sample: false   # clip sample to -1~1
+          set_alpha_to_one: false
+          steps_offset: 1
+    optimizer_cfg:
+      optimizer:
+        target: torch.optim.AdamW
+        params:
+          betas: [0.9, 0.99]
+          eps: 1.e-6
+          weight_decay: 1.e-2
+      scheduler:
+        target: michelangelo.utils.trainings.lr_scheduler.LambdaWarmUpCosineFactorScheduler
+        params:
+          warm_up_steps: 5000
+          f_start: 1.e-6
+          f_min: 1.e-3
+          f_max: 1.0
+    loss_cfg:
+      loss_type: "mse"

example_data/image/car.jpg ADDED Viewed

example_data/surface/surface.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0893e44d82ada683baa656a718beaf6ec19fc28b6816b451f56645530d5bb962
+size 1201024

gradio_app.py ADDED Viewed

	@@ -0,0 +1,372 @@

+# -*- coding: utf-8 -*-
+import os
+import time
+from collections import OrderedDict
+from PIL import Image
+import torch
+import trimesh
+from typing import Optional, List
+from einops import repeat, rearrange
+import numpy as np
+from michelangelo.models.tsal.tsal_base import Latent2MeshOutput
+from michelangelo.utils.misc import get_config_from_file, instantiate_from_config
+from michelangelo.utils.visualizers.pythreejs_viewer import PyThreeJSViewer
+from michelangelo.utils.visualizers import html_util
+import gradio as gr
+gradio_cached_dir = "./gradio_cached_dir"
+os.makedirs(gradio_cached_dir, exist_ok=True)
+save_mesh = False
+state = ""
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+box_v = 1.1
+viewer = PyThreeJSViewer(settings={}, render_mode="WEBSITE")
+image_model_config_dict = OrderedDict({
+    "ASLDM-256-obj": {
+        "config": "./configs/image_cond_diffuser_asl/image-ASLDM-256.yaml",
+        "ckpt_path": "./checkpoints/image_cond_diffuser_asl/image-ASLDM-256.ckpt",
+    },
+})
+text_model_config_dict = OrderedDict({
+    "ASLDM-256": {
+        "config": "./configs/text_cond_diffuser_asl/text-ASLDM-256.yaml",
+        "ckpt_path": "./checkpoints/text_cond_diffuser_asl/text-ASLDM-256.ckpt",
+    },
+})
+class InferenceModel(object):
+    model = None
+    name = ""
+text2mesh_model = InferenceModel()
+image2mesh_model = InferenceModel()
+def set_state(s):
+    global state
+    state = s
+    print(s)
+def output_to_html_frame(mesh_outputs: List[Latent2MeshOutput], bbox_size: float,
+                         image: Optional[np.ndarray] = None,
+                         html_frame: bool = False):
+    global viewer
+    for i in range(len(mesh_outputs)):
+        mesh = mesh_outputs[i]
+        if mesh is None:
+            continue
+        mesh_v = mesh.mesh_v.copy()
+        mesh_v[:, 0] += i * np.max(bbox_size)
+        mesh_v[:, 2] += np.max(bbox_size)
+        viewer.add_mesh(mesh_v, mesh.mesh_f)
+    mesh_tag = viewer.to_html(html_frame=False)
+    if image is not None:
+        image_tag = html_util.to_image_embed_tag(image)
+        frame = f"""
+        <table border = "1">
+            <tr>
+                <td>{image_tag}</td>
+                <td>{mesh_tag}</td>
+            </tr>
+        </table>
+        """
+    else:
+        frame = mesh_tag
+    if html_frame:
+        frame = html_util.to_html_frame(frame)
+    viewer.reset()
+    return frame
+def load_model(model_name: str, model_config_dict: dict, inference_model: InferenceModel):
+    global device
+    if inference_model.name == model_name:
+        model = inference_model.model
+    else:
+        assert model_name in model_config_dict
+        if inference_model.model is not None:
+            del inference_model.model
+        config_ckpt_path = model_config_dict[model_name]
+        model_config = get_config_from_file(config_ckpt_path["config"])
+        if hasattr(model_config, "model"):
+            model_config = model_config.model
+        model = instantiate_from_config(model_config, ckpt_path=config_ckpt_path["ckpt_path"])
+        model = model.to(device)
+        model = model.eval()
+        inference_model.model = model
+        inference_model.name = model_name
+    return model
+def prepare_img(image: np.ndarray):
+    image_pt = torch.tensor(image).float()
+    image_pt = image_pt / 255 * 2 - 1
+    image_pt = rearrange(image_pt, "h w c -> c h w")
+    return image_pt
+def prepare_model_viewer(fp):
+    content = f"""
+      <head>
+        <script
+          type="module" src="https://ajax.googleapis.com/ajax/libs/model-viewer/3.1.1/model-viewer.min.js">
+        </script>
+      </head>
+      <body>
+        <model-viewer
+          style="height: 150px; width: 150px;"
+          rotation-per-second="10deg"
+          id="t1"
+          src="file/gradio_cached_dir/{fp}"
+          environment-image="neutral"
+          camera-target="0m 0m 0m"
+          orientation="0deg 90deg 170deg"
+          shadow-intensity="1"
+          ar:true
+          auto-rotate
+          camera-controls>
+        </model-viewer>
+      </body>
+    """
+    return content
+def prepare_html_frame(content):
+    frame = f"""
+    <html>
+      <body>
+        {content}
+      </body>
+    </html>
+    """
+    return frame
+def prepare_html_body(content):
+    frame = f"""
+      <body>
+        {content}
+      </body>
+    """
+    return frame
+def post_process_mesh_outputs(mesh_outputs):
+    # html_frame = output_to_html_frame(mesh_outputs, 2 * box_v, image=None, html_frame=True)
+    html_content = output_to_html_frame(mesh_outputs, 2 * box_v, image=None, html_frame=False)
+    html_frame = prepare_html_frame(html_content)
+    # filename = f"{time.time()}.html"
+    filename = f"text-256-{time.time()}.html"
+    html_filepath = os.path.join(gradio_cached_dir, filename)
+    with open(html_filepath, "w") as writer:
+        writer.write(html_frame)
+    '''
+    Bug: The iframe tag does not work in Gradio.
+         The chrome returns "No resource with given URL found"
+    Solutions:
+         https://github.com/gradio-app/gradio/issues/884
+         Due to the security bitches, the server can only find files parallel to the gradio_app.py.
+         The path has format "file/TARGET_FILE_PATH"
+    '''
+    iframe_tag = f'<iframe src="file/gradio_cached_dir/{filename}" width="600%" height="400" frameborder="0"></iframe>'
+    filelist = []
+    filenames = []
+    for i, mesh in enumerate(mesh_outputs):
+        mesh.mesh_f = mesh.mesh_f[:, ::-1]
+        mesh_output = trimesh.Trimesh(mesh.mesh_v, mesh.mesh_f)
+        name = str(i) + "_out_mesh.obj"
+        filepath = gradio_cached_dir + "/" + name
+        mesh_output.export(filepath, include_normals=True)
+        filelist.append(filepath)
+        filenames.append(name)
+    filelist.append(html_filepath)
+    return iframe_tag, filelist
+def image2mesh(image: np.ndarray,
+               model_name: str = "subsp+pk_asl_perceiver=01_01_udt=03",
+               num_samples: int = 4,
+               guidance_scale: int = 7.5,
+               octree_depth: int = 7):
+    global device, gradio_cached_dir, image_model_config_dict, box_v
+    # load model
+    model = load_model(model_name, image_model_config_dict, image2mesh_model)
+    # prepare image inputs
+    image_pt = prepare_img(image)
+    image_pt = repeat(image_pt, "c h w -> b c h w", b=num_samples)
+    sample_inputs = {
+        "image": image_pt
+    }
+    mesh_outputs = model.sample(
+        sample_inputs,
+        sample_times=1,
+        guidance_scale=guidance_scale,
+        return_intermediates=False,
+        bounds=[-box_v, -box_v, -box_v, box_v, box_v, box_v],
+        octree_depth=octree_depth,
+    )[0]
+    iframe_tag, filelist = post_process_mesh_outputs(mesh_outputs)
+    return iframe_tag, gr.update(value=filelist, visible=True)
+def text2mesh(text: str,
+              model_name: str = "subsp+pk_asl_perceiver=01_01_udt=03",
+              num_samples: int = 4,
+              guidance_scale: int = 7.5,
+              octree_depth: int = 7):
+    global device, gradio_cached_dir, text_model_config_dict, text2mesh_model, box_v
+    # load model
+    model = load_model(model_name, text_model_config_dict, text2mesh_model)
+    # prepare text inputs
+    sample_inputs = {
+        "text": [text] * num_samples
+    }
+    mesh_outputs = model.sample(
+        sample_inputs,
+        sample_times=1,
+        guidance_scale=guidance_scale,
+        return_intermediates=False,
+        bounds=[-box_v, -box_v, -box_v, box_v, box_v, box_v],
+        octree_depth=octree_depth,
+    )[0]
+    iframe_tag, filelist = post_process_mesh_outputs(mesh_outputs)
+    return iframe_tag, gr.update(value=filelist, visible=True)
+example_dir = './gradio_cached_dir/example/img_example'
+first_page_items = [
+    'alita.jpg',
+    'burger.jpg'
+    'loopy.jpg'
+    'building.jpg',
+    'mario.jpg',
+    'car.jpg',
+    'airplane.jpg',
+    'bag.jpg',
+    'bench.jpg',
+    'ship.jpg'
+]
+raw_example_items = [
+    # (os.path.join(example_dir, x), x)
+    os.path.join(example_dir, x)
+    for x in os.listdir(example_dir)
+    if x.endswith(('.jpg', '.png'))
+]
+example_items = [x for x in raw_example_items if os.path.basename(x) in first_page_items] + [x for x in raw_example_items if os.path.basename(x) not in first_page_items]
+example_text = [
+    ["A 3D model of a car; Audi A6."],
+    ["A 3D model of police car; Highway Patrol Charger"]
+    ],
+def set_cache(data: gr.SelectData):
+    img_name = os.path.basename(example_items[data.index])
+    return os.path.join(example_dir, img_name), os.path.join(img_name)
+def disable_cache():
+    return ""
+with gr.Blocks() as app:
+    gr.Markdown("# Michelangelo")
+    gr.Markdown("## [Github](https://github.com/NeuralCarver/Michelangelo) | [Arxiv](https://arxiv.org/abs/2306.17115) | [Project Page](https://neuralcarver.github.io/michelangelo/)")
+    gr.Markdown("Michelangelo is a conditional 3D shape generation system that trains based on the shape-image-text aligned latent representation.")
+    gr.Markdown("### Hint:")
+    gr.Markdown("1. We provide two APIs: Image-conditioned generation and Text-conditioned generation")
+    gr.Markdown("2. Note that the Image-conditioned model is trained on multiple 3D datasets like ShapeNet and Objaverse")
+    gr.Markdown("3. We provide some examples for you to try. You can also upload images or text as input.")
+    gr.Markdown("4. Welcome to share your amazing results with us, and thanks for your interest in our work!")
+    with gr.Row():
+        with gr.Column():
+            with gr.Tab("Image to 3D"):
+                img = gr.Image(label="Image")
+                gr.Markdown("For the best results, we suggest that the images uploaded meet the following three criteria: 1. The object is positioned at the center of the image, 2. The image size is square, and 3. The background is relatively clean.")
+                btn_generate_img2obj = gr.Button(value="Generate")
+                with gr.Accordion("Advanced settings", open=False):
+                    image_dropdown_models = gr.Dropdown(label="Model", value="ASLDM-256-obj",choices=list(image_model_config_dict.keys()))
+                    num_samples = gr.Slider(label="samples", value=4, minimum=1, maximum=8, step=1)
+                    guidance_scale = gr.Slider(label="Guidance scale", value=7.5, minimum=3.0, maximum=10.0, step=0.1)
+                    octree_depth = gr.Slider(label="Octree Depth (for 3D model)", value=7, minimum=4, maximum=8, step=1)
+                cache_dir = gr.Textbox(value="", visible=False)
+                examples = gr.Gallery(label='Examples', value=example_items, elem_id="gallery", allow_preview=False, columns=[4], object_fit="contain")
+            with gr.Tab("Text to 3D"):
+                prompt = gr.Textbox(label="Prompt", placeholder="A 3D model of motorcar; Porche Cayenne Turbo.")
+                gr.Markdown("For the best results, we suggest that the prompt follows 'A 3D model of CATEGORY; DESCRIPTION'. For example, A 3D model of motorcar; Porche Cayenne Turbo.")
+                btn_generate_txt2obj = gr.Button(value="Generate")
+                with gr.Accordion("Advanced settings", open=False):
+                    text_dropdown_models = gr.Dropdown(label="Model", value="ASLDM-256",choices=list(text_model_config_dict.keys()))
+                    num_samples = gr.Slider(label="samples", value=4, minimum=1, maximum=8, step=1)
+                    guidance_scale = gr.Slider(label="Guidance scale", value=7.5, minimum=3.0, maximum=10.0, step=0.1)
+                    octree_depth = gr.Slider(label="Octree Depth (for 3D model)", value=7, minimum=4, maximum=8, step=1)
+                gr.Markdown("#### Examples:")
+                gr.Markdown("1. A 3D model of a coupe; Audi A6.")
+                gr.Markdown("2. A 3D model of a motorcar; Hummer H2 SUT.")
+                gr.Markdown("3. A 3D model of an airplane; Airbus.")
+                gr.Markdown("4. A 3D model of a fighter aircraft; Attack Fighter.")
+                gr.Markdown("5. A 3D model of a chair; Simple Wooden Chair.")
+                gr.Markdown("6. A 3D model of a laptop computer; Dell Laptop.")
+                gr.Markdown("7. A 3D model of a lamp; ceiling light.")
+                gr.Markdown("8. A 3D model of a rifle; AK47.")
+                gr.Markdown("9. A 3D model of a knife; Sword.")
+                gr.Markdown("10. A 3D model of a vase; Plant in pot.")
+        with gr.Column():
+            model_3d = gr.HTML()
+            file_out = gr.File(label="Files", visible=False)
+        outputs = [model_3d, file_out]
+        img.upload(disable_cache, outputs=cache_dir)
+        examples.select(set_cache, outputs=[img, cache_dir])
+        print(f'line:404: {cache_dir}')
+        btn_generate_img2obj.click(image2mesh, inputs=[img, image_dropdown_models, num_samples,
+                                                       guidance_scale, octree_depth],
+                                   outputs=outputs, api_name="generate_img2obj")
+        btn_generate_txt2obj.click(text2mesh, inputs=[prompt, text_dropdown_models, num_samples,
+                                                      guidance_scale, octree_depth],
+                                   outputs=outputs, api_name="generate_txt2obj")
+app.launch(server_name="0.0.0.0", server_port=8008, share=False)

gradio_cached_dir/example/img_example/airplane.jpg ADDED Viewed

gradio_cached_dir/example/img_example/alita.jpg ADDED Viewed

gradio_cached_dir/example/img_example/bag.jpg ADDED Viewed

gradio_cached_dir/example/img_example/bench.jpg ADDED Viewed

gradio_cached_dir/example/img_example/building.jpg ADDED Viewed

gradio_cached_dir/example/img_example/burger.jpg ADDED Viewed

gradio_cached_dir/example/img_example/car.jpg ADDED Viewed

gradio_cached_dir/example/img_example/loopy.jpg ADDED Viewed

gradio_cached_dir/example/img_example/mario.jpg ADDED Viewed

gradio_cached_dir/example/img_example/ship.jpg ADDED Viewed

inference.py ADDED Viewed

	@@ -0,0 +1,181 @@

+# -*- coding: utf-8 -*-
+import os
+import time
+from collections import OrderedDict
+from typing import Optional, List
+import argparse
+from functools import partial
+from einops import repeat, rearrange
+import numpy as np
+from PIL import Image
+import trimesh
+import cv2
+import torch
+import pytorch_lightning as pl
+from michelangelo.models.tsal.tsal_base import Latent2MeshOutput
+from michelangelo.models.tsal.inference_utils import extract_geometry
+from michelangelo.utils.misc import get_config_from_file, instantiate_from_config
+from michelangelo.utils.visualizers.pythreejs_viewer import PyThreeJSViewer
+from michelangelo.utils.visualizers import html_util
+def load_model(args):
+    model_config = get_config_from_file(args.config_path)
+    if hasattr(model_config, "model"):
+        model_config = model_config.model
+    model = instantiate_from_config(model_config, ckpt_path=args.ckpt_path)
+    model = model.cuda()
+    model = model.eval()
+    return model
+def load_surface(fp):
+    with np.load(args.pointcloud_path) as input_pc:
+        surface = input_pc['points']
+        normal = input_pc['normals']
+    rng = np.random.default_rng()
+    ind = rng.choice(surface.shape[0], 4096, replace=False)
+    surface = torch.FloatTensor(surface[ind])
+    normal = torch.FloatTensor(normal[ind])
+    surface = torch.cat([surface, normal], dim=-1).unsqueeze(0).cuda()
+    return surface
+def prepare_image(args, number_samples=2):
+    image = cv2.imread(f"{args.image_path}")
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    image_pt = torch.tensor(image).float()
+    image_pt = image_pt / 255 * 2 - 1
+    image_pt = rearrange(image_pt, "h w c -> c h w")
+    image_pt = repeat(image_pt, "c h w -> b c h w", b=number_samples)
+    return image_pt
+def save_output(args, mesh_outputs):
+    os.makedirs(args.output_dir, exist_ok=True)
+    for i, mesh in enumerate(mesh_outputs):
+        mesh.mesh_f = mesh.mesh_f[:, ::-1]
+        mesh_output = trimesh.Trimesh(mesh.mesh_v, mesh.mesh_f)
+        name = str(i) + "_out_mesh.obj"
+        mesh_output.export(os.path.join(args.output_dir, name), include_normals=True)
+    print(f'-----------------------------------------------------------------------------')
+    print(f'>>> Finished and mesh saved in {args.output_dir}')
+    print(f'-----------------------------------------------------------------------------')
+    return 0
+def reconstruction(args, model, bounds=(-1.25, -1.25, -1.25, 1.25, 1.25, 1.25), octree_depth=7, num_chunks=10000):
+    surface = load_surface(args.pointcloud_path)
+    # encoding
+    shape_embed, shape_latents = model.model.encode_shape_embed(surface, return_latents=True)
+    shape_zq, posterior = model.model.shape_model.encode_kl_embed(shape_latents)
+    # decoding
+    latents = model.model.shape_model.decode(shape_zq)
+    geometric_func = partial(model.model.shape_model.query_geometry, latents=latents)
+    # reconstruction
+    mesh_v_f, has_surface = extract_geometry(
+        geometric_func=geometric_func,
+        device=surface.device,
+        batch_size=surface.shape[0],
+        bounds=bounds,
+        octree_depth=octree_depth,
+        num_chunks=num_chunks,
+    )
+    recon_mesh = trimesh.Trimesh(mesh_v_f[0][0], mesh_v_f[0][1])
+    # save
+    os.makedirs(args.output_dir, exist_ok=True)
+    recon_mesh.export(os.path.join(args.output_dir, 'reconstruction.obj'))
+    print(f'-----------------------------------------------------------------------------')
+    print(f'>>> Finished and mesh saved in {os.path.join(args.output_dir, "reconstruction.obj")}')
+    print(f'-----------------------------------------------------------------------------')
+    return 0
+def image2mesh(args, model, guidance_scale=7.5, box_v=1.1, octree_depth=7):
+    sample_inputs = {
+        "image": prepare_image(args)
+    }
+    mesh_outputs = model.sample(
+        sample_inputs,
+        sample_times=1,
+        guidance_scale=guidance_scale,
+        return_intermediates=False,
+        bounds=[-box_v, -box_v, -box_v, box_v, box_v, box_v],
+        octree_depth=octree_depth,
+    )[0]
+    save_output(args, mesh_outputs)
+    return 0
+def text2mesh(args, model, num_samples=2, guidance_scale=7.5, box_v=1.1, octree_depth=7):
+    sample_inputs = {
+        "text": [args.text] * num_samples
+    }
+    mesh_outputs = model.sample(
+        sample_inputs,
+        sample_times=1,
+        guidance_scale=guidance_scale,
+        return_intermediates=False,
+        bounds=[-box_v, -box_v, -box_v, box_v, box_v, box_v],
+        octree_depth=octree_depth,
+    )[0]
+    save_output(args, mesh_outputs)
+    return 0
+task_dick = {
+    'reconstruction': reconstruction,
+    'image2mesh': image2mesh,
+    'text2mesh': text2mesh,
+}
+if __name__ == "__main__":
+    '''
+    1. Reconstruct point cloud
+    2. Image-conditioned generation
+    3. Text-conditioned generation
+    '''
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--task", type=str, choices=['reconstruction', 'image2mesh', 'text2mesh'], required=True)
+    parser.add_argument("--config_path", type=str, required=True)
+    parser.add_argument("--ckpt_path", type=str, required=True)
+    parser.add_argument("--pointcloud_path", type=str, default='./example_data/surface.npz', help='Path to the input point cloud')
+    parser.add_argument("--image_path", type=str, help='Path to the input image')
+    parser.add_argument("--text", type=str, help='Input text within a format: A 3D model of motorcar; Porsche 911.')
+    parser.add_argument("--output_dir", type=str, default='./output')
+    parser.add_argument("-s", "--seed", type=int, default=0)
+    args = parser.parse_args()
+    pl.seed_everything(args.seed)
+    print(f'-----------------------------------------------------------------------------')
+    print(f'>>> Running {args.task}')
+    args.output_dir = os.path.join(args.output_dir, args.task)
+    print(f'>>> Output directory: {args.output_dir}')
+    print(f'-----------------------------------------------------------------------------')
+    task_dick[args.task](args, load_model(args))

michelangelo/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # -- coding: utf-8 --

michelangelo/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (176 Bytes). View file

michelangelo/data/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # -- coding: utf-8 --

michelangelo/data/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (181 Bytes). View file

michelangelo/data/__pycache__/asl_webdataset.cpython-39.pyc ADDED Viewed

Binary file (9.43 kB). View file

michelangelo/data/__pycache__/tokenizer.cpython-39.pyc ADDED Viewed

Binary file (6.48 kB). View file

michelangelo/data/__pycache__/transforms.cpython-39.pyc ADDED Viewed

Binary file (11.4 kB). View file

michelangelo/data/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (1.13 kB). View file

michelangelo/data/templates.json ADDED Viewed

	@@ -0,0 +1,69 @@

+{
+    "shape": [
+        "a point cloud model of {}.",
+        "There is a {} in the scene.",
+        "There is the {} in the scene.",
+        "a photo of a {} in the scene.",
+        "a photo of the {} in the scene.",
+        "a photo of one {} in the scene.",
+        "itap of a {}.",
+        "itap of my {}.",
+        "itap of the {}.",
+        "a photo of a {}.",
+        "a photo of my {}.",
+        "a photo of the {}.",
+        "a photo of one {}.",
+        "a photo of many {}.",
+        "a good photo of a {}.",
+        "a good photo of the {}.",
+        "a bad photo of a {}.",
+        "a bad photo of the {}.",
+        "a photo of a nice {}.",
+        "a photo of the nice {}.",
+        "a photo of a cool {}.",
+        "a photo of the cool {}.",
+        "a photo of a weird {}.",
+        "a photo of the weird {}.",
+        "a photo of a small {}.",
+        "a photo of the small {}.",
+        "a photo of a large {}.",
+        "a photo of the large {}.",
+        "a photo of a clean {}.",
+        "a photo of the clean {}.",
+        "a photo of a dirty {}.",
+        "a photo of the dirty {}.",
+        "a bright photo of a {}.",
+        "a bright photo of the {}.",
+        "a dark photo of a {}.",
+        "a dark photo of the {}.",
+        "a photo of a hard to see {}.",
+        "a photo of the hard to see {}.",
+        "a low resolution photo of a {}.",
+        "a low resolution photo of the {}.",
+        "a cropped photo of a {}.",
+        "a cropped photo of the {}.",
+        "a close-up photo of a {}.",
+        "a close-up photo of the {}.",
+        "a jpeg corrupted photo of a {}.",
+        "a jpeg corrupted photo of the {}.",
+        "a blurry photo of a {}.",
+        "a blurry photo of the {}.",
+        "a pixelated photo of a {}.",
+        "a pixelated photo of the {}.",
+        "a black and white photo of the {}.",
+        "a black and white photo of a {}",
+        "a plastic {}.",
+        "the plastic {}.",
+        "a toy {}.",
+        "the toy {}.",
+        "a plushie {}.",
+        "the plushie {}.",
+        "a cartoon {}.",
+        "the cartoon {}.",
+        "an embroidered {}.",
+        "the embroidered {}.",
+        "a painting of the {}.",
+        "a painting of a {}."
+    ]
+}

michelangelo/data/transforms.py ADDED Viewed

	@@ -0,0 +1,407 @@

+# -*- coding: utf-8 -*-
+import os
+import time
+import numpy as np
+import warnings
+import random
+from omegaconf.listconfig import ListConfig
+from webdataset import pipelinefilter
+import torch
+import torchvision.transforms.functional as TVF
+from torchvision.transforms import InterpolationMode
+from torchvision.transforms.transforms import _interpolation_modes_from_int
+from typing import Sequence
+from michelangelo.utils import instantiate_from_config
+def _uid_buffer_pick(buf_dict, rng):
+    uid_keys = list(buf_dict.keys())
+    selected_uid = rng.choice(uid_keys)
+    buf = buf_dict[selected_uid]
+    k = rng.randint(0, len(buf) - 1)
+    sample = buf[k]
+    buf[k] = buf[-1]
+    buf.pop()
+    if len(buf) == 0:
+        del buf_dict[selected_uid]
+    return sample
+def _add_to_buf_dict(buf_dict, sample):
+    key = sample["__key__"]
+    uid, uid_sample_id = key.split("_")
+    if uid not in buf_dict:
+        buf_dict[uid] = []
+    buf_dict[uid].append(sample)
+    return buf_dict
+def _uid_shuffle(data, bufsize=1000, initial=100, rng=None, handler=None):
+    """Shuffle the data in the stream.
+    This uses a buffer of size `bufsize`. Shuffling at
+    startup is less random; this is traded off against
+    yielding samples quickly.
+    data: iterator
+    bufsize: buffer size for shuffling
+    returns: iterator
+    rng: either random module or random.Random instance
+    """
+    if rng is None:
+        rng = random.Random(int((os.getpid() + time.time()) * 1e9))
+    initial = min(initial, bufsize)
+    buf_dict = dict()
+    current_samples = 0
+    for sample in data:
+        _add_to_buf_dict(buf_dict, sample)
+        current_samples += 1
+        if current_samples < bufsize:
+            try:
+                _add_to_buf_dict(buf_dict, next(data))  # skipcq: PYL-R1708
+                current_samples += 1
+            except StopIteration:
+                pass
+        if current_samples >= initial:
+            current_samples -= 1
+            yield _uid_buffer_pick(buf_dict, rng)
+    while current_samples > 0:
+        current_samples -= 1
+        yield _uid_buffer_pick(buf_dict, rng)
+uid_shuffle = pipelinefilter(_uid_shuffle)
+class RandomSample(object):
+    def __init__(self,
+                 num_volume_samples: int = 1024,
+                 num_near_samples: int = 1024):
+        super().__init__()
+        self.num_volume_samples = num_volume_samples
+        self.num_near_samples = num_near_samples
+    def __call__(self, sample):
+        rng = np.random.default_rng()
+        # 1. sample surface input
+        total_surface = sample["surface"]
+        ind = rng.choice(total_surface.shape[0], replace=False)
+        surface = total_surface[ind]
+        # 2. sample volume/near geometric points
+        vol_points = sample["vol_points"]
+        vol_label = sample["vol_label"]
+        near_points = sample["near_points"]
+        near_label = sample["near_label"]
+        ind = rng.choice(vol_points.shape[0], self.num_volume_samples, replace=False)
+        vol_points = vol_points[ind]
+        vol_label = vol_label[ind]
+        vol_points_labels = np.concatenate([vol_points, vol_label[:, np.newaxis]], axis=1)
+        ind = rng.choice(near_points.shape[0], self.num_near_samples, replace=False)
+        near_points = near_points[ind]
+        near_label = near_label[ind]
+        near_points_labels = np.concatenate([near_points, near_label[:, np.newaxis]], axis=1)
+        # concat sampled volume and near points
+        geo_points = np.concatenate([vol_points_labels, near_points_labels], axis=0)
+        sample = {
+            "surface": surface,
+            "geo_points": geo_points
+        }
+        return sample
+class SplitRandomSample(object):
+    def __init__(self,
+                 use_surface_sample: bool = False,
+                 num_surface_samples: int = 4096,
+                 num_volume_samples: int = 1024,
+                 num_near_samples: int = 1024):
+        super().__init__()
+        self.use_surface_sample = use_surface_sample
+        self.num_surface_samples = num_surface_samples
+        self.num_volume_samples = num_volume_samples
+        self.num_near_samples = num_near_samples
+    def __call__(self, sample):
+        rng = np.random.default_rng()
+        # 1. sample surface input
+        surface = sample["surface"]
+        if self.use_surface_sample:
+            replace = surface.shape[0] < self.num_surface_samples
+            ind = rng.choice(surface.shape[0], self.num_surface_samples, replace=replace)
+            surface = surface[ind]
+        # 2. sample volume/near geometric points
+        vol_points = sample["vol_points"]
+        vol_label = sample["vol_label"]
+        near_points = sample["near_points"]
+        near_label = sample["near_label"]
+        ind = rng.choice(vol_points.shape[0], self.num_volume_samples, replace=False)
+        vol_points = vol_points[ind]
+        vol_label = vol_label[ind]
+        vol_points_labels = np.concatenate([vol_points, vol_label[:, np.newaxis]], axis=1)
+        ind = rng.choice(near_points.shape[0], self.num_near_samples, replace=False)
+        near_points = near_points[ind]
+        near_label = near_label[ind]
+        near_points_labels = np.concatenate([near_points, near_label[:, np.newaxis]], axis=1)
+        # concat sampled volume and near points
+        geo_points = np.concatenate([vol_points_labels, near_points_labels], axis=0)
+        sample = {
+            "surface": surface,
+            "geo_points": geo_points
+        }
+        return sample
+class FeatureSelection(object):
+    VALID_SURFACE_FEATURE_DIMS = {
+        "none": [0, 1, 2],                              # xyz
+        "watertight_normal": [0, 1, 2, 3, 4, 5],        # xyz, normal
+        "normal": [0, 1, 2, 6, 7, 8]
+    }
+    def __init__(self, surface_feature_type: str):
+        self.surface_feature_type = surface_feature_type
+        self.surface_dims = self.VALID_SURFACE_FEATURE_DIMS[surface_feature_type]
+    def __call__(self, sample):
+        sample["surface"] = sample["surface"][:, self.surface_dims]
+        return sample
+class AxisScaleTransform(object):
+    def __init__(self, interval=(0.75, 1.25), jitter=True, jitter_scale=0.005):
+        assert isinstance(interval, (tuple, list, ListConfig))
+        self.interval = interval
+        self.min_val = interval[0]
+        self.max_val = interval[1]
+        self.inter_size = interval[1] - interval[0]
+        self.jitter = jitter
+        self.jitter_scale = jitter_scale
+    def __call__(self, sample):
+        surface = sample["surface"][..., 0:3]
+        geo_points = sample["geo_points"][..., 0:3]
+        scaling = torch.rand(1, 3) * self.inter_size + self.min_val
+        # print(scaling)
+        surface = surface * scaling
+        geo_points = geo_points * scaling
+        scale = (1 / torch.abs(surface).max().item()) * 0.999999
+        surface *= scale
+        geo_points *= scale
+        if self.jitter:
+            surface += self.jitter_scale * torch.randn_like(surface)
+            surface.clamp_(min=-1.015, max=1.015)
+        sample["surface"][..., 0:3] = surface
+        sample["geo_points"][..., 0:3] = geo_points
+        return sample
+class ToTensor(object):
+    def __init__(self, tensor_keys=("surface", "geo_points", "tex_points")):
+        self.tensor_keys = tensor_keys
+    def __call__(self, sample):
+        for key in self.tensor_keys:
+            if key not in sample:
+                continue
+            sample[key] = torch.tensor(sample[key], dtype=torch.float32)
+        return sample
+class AxisScale(object):
+    def __init__(self, interval=(0.75, 1.25), jitter=True, jitter_scale=0.005):
+        assert isinstance(interval, (tuple, list, ListConfig))
+        self.interval = interval
+        self.jitter = jitter
+        self.jitter_scale = jitter_scale
+    def __call__(self, surface, *args):
+        scaling = torch.rand(1, 3) * 0.5 + 0.75
+        # print(scaling)
+        surface = surface * scaling
+        scale = (1 / torch.abs(surface).max().item()) * 0.999999
+        surface *= scale
+        args_outputs = []
+        for _arg in args:
+            _arg = _arg * scaling * scale
+            args_outputs.append(_arg)
+        if self.jitter:
+            surface += self.jitter_scale * torch.randn_like(surface)
+            surface.clamp_(min=-1, max=1)
+        if len(args) == 0:
+            return surface
+        else:
+            return surface, *args_outputs
+class RandomResize(torch.nn.Module):
+    """Apply randomly Resize with a given probability."""
+    def __init__(
+        self,
+        size,
+        resize_radio=(0.5, 1),
+        allow_resize_interpolations=(InterpolationMode.BICUBIC, InterpolationMode.BILINEAR, InterpolationMode.BILINEAR),
+        interpolation=InterpolationMode.BICUBIC,
+        max_size=None,
+        antialias=None,
+    ):
+        super().__init__()
+        if not isinstance(size, (int, Sequence)):
+            raise TypeError(f"Size should be int or sequence. Got {type(size)}")
+        if isinstance(size, Sequence) and len(size) not in (1, 2):
+            raise ValueError("If size is a sequence, it should have 1 or 2 values")
+        self.size = size
+        self.max_size = max_size
+        # Backward compatibility with integer value
+        if isinstance(interpolation, int):
+            warnings.warn(
+                "Argument 'interpolation' of type int is deprecated since 0.13 and will be removed in 0.15. "
+                "Please use InterpolationMode enum."
+            )
+            interpolation = _interpolation_modes_from_int(interpolation)
+        self.interpolation = interpolation
+        self.antialias = antialias
+        self.resize_radio = resize_radio
+        self.allow_resize_interpolations = allow_resize_interpolations
+    def random_resize_params(self):
+        radio = torch.rand(1) * (self.resize_radio[1] - self.resize_radio[0]) + self.resize_radio[0]
+        if isinstance(self.size, int):
+            size = int(self.size * radio)
+        elif isinstance(self.size, Sequence):
+            size = list(self.size)
+            size = (int(size[0] * radio), int(size[1] * radio))
+        else:
+            raise RuntimeError()
+        interpolation = self.allow_resize_interpolations[
+            torch.randint(low=0, high=len(self.allow_resize_interpolations), size=(1,))
+        ]
+        return size, interpolation
+    def forward(self, img):
+        size, interpolation = self.random_resize_params()
+        img = TVF.resize(img, size, interpolation, self.max_size, self.antialias)
+        img = TVF.resize(img, self.size, self.interpolation, self.max_size, self.antialias)
+        return img
+    def __repr__(self) -> str:
+        detail = f"(size={self.size}, interpolation={self.interpolation.value},"
+        detail += f"max_size={self.max_size}, antialias={self.antialias}), resize_radio={self.resize_radio}"
+        return f"{self.__class__.__name__}{detail}"
+class Compose(object):
+    """Composes several transforms together. This transform does not support torchscript.
+    Please, see the note below.
+    Args:
+        transforms (list of ``Transform`` objects): list of transforms to compose.
+    Example:
+        >>> transforms.Compose([
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.ToTensor(),
+        >>> ])
+    .. note::
+        In order to script the transformations, please use ``torch.nn.Sequential`` as below.
+        >>> transforms = torch.nn.Sequential(
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+        >>> )
+        >>> scripted_transforms = torch.jit.script(transforms)
+        Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
+        `lambda` functions or ``PIL.Image``.
+    """
+    def __init__(self, transforms):
+        self.transforms = transforms
+    def __call__(self, *args):
+        for t in self.transforms:
+            args = t(*args)
+        return args
+    def __repr__(self):
+        format_string = self.__class__.__name__ + '('
+        for t in self.transforms:
+            format_string += '\n'
+            format_string += '    {0}'.format(t)
+        format_string += '\n)'
+        return format_string
+def identity(*args, **kwargs):
+    if len(args) == 1:
+        return args[0]
+    else:
+        return args
+def build_transforms(cfg):
+    if cfg is None:
+        return identity
+    transforms = []
+    for transform_name, cfg_instance in cfg.items():
+        transform_instance = instantiate_from_config(cfg_instance)
+        transforms.append(transform_instance)
+        print(f"Build transform: {transform_instance}")
+    transforms = Compose(transforms)
+    return transforms

michelangelo/data/utils.py ADDED Viewed

	@@ -0,0 +1,59 @@

+# -*- coding: utf-8 -*-
+import torch
+import numpy as np
+def worker_init_fn(_):
+    worker_info = torch.utils.data.get_worker_info()
+    worker_id = worker_info.id
+    # dataset = worker_info.dataset
+    # split_size = dataset.num_records // worker_info.num_workers
+    # # reset num_records to the true number to retain reliable length information
+    # dataset.sample_ids = dataset.valid_ids[worker_id * split_size:(worker_id + 1) * split_size]
+    # current_id = np.random.choice(len(np.random.get_state()[1]), 1)
+    # return np.random.seed(np.random.get_state()[1][current_id] + worker_id)
+    return np.random.seed(np.random.get_state()[1][0] + worker_id)
+def collation_fn(samples, combine_tensors=True, combine_scalars=True):
+    """
+    Args:
+        samples (list[dict]):
+        combine_tensors:
+        combine_scalars:
+    Returns:
+    """
+    result = {}
+    keys = samples[0].keys()
+    for key in keys:
+        result[key] = []
+    for sample in samples:
+        for key in keys:
+            val = sample[key]
+            result[key].append(val)
+    for key in keys:
+        val_list = result[key]
+        if isinstance(val_list[0], (int, float)):
+            if combine_scalars:
+                result[key] = np.array(result[key])
+        elif isinstance(val_list[0], torch.Tensor):
+            if combine_tensors:
+                result[key] = torch.stack(val_list)
+        elif isinstance(val_list[0], np.ndarray):
+            if combine_tensors:
+                result[key] = np.stack(val_list)
+    return result

michelangelo/graphics/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # -- coding: utf-8 --

michelangelo/graphics/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (185 Bytes). View file

michelangelo/graphics/primitives/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+# -*- coding: utf-8 -*-
+from .volume import generate_dense_grid_points
+from .mesh import (
+    MeshOutput,
+    save_obj,
+    savemeshtes2
+)

michelangelo/graphics/primitives/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (334 Bytes). View file

michelangelo/graphics/primitives/__pycache__/extract_texture_map.cpython-39.pyc ADDED Viewed

Binary file (2.46 kB). View file

michelangelo/graphics/primitives/__pycache__/mesh.cpython-39.pyc ADDED Viewed

Binary file (2.93 kB). View file

michelangelo/graphics/primitives/__pycache__/volume.cpython-39.pyc ADDED Viewed

Binary file (860 Bytes). View file

michelangelo/graphics/primitives/mesh.py ADDED Viewed

	@@ -0,0 +1,114 @@

+# -*- coding: utf-8 -*-
+import os
+import cv2
+import numpy as np
+import PIL.Image
+from typing import Optional
+import trimesh
+def save_obj(pointnp_px3, facenp_fx3, fname):
+    fid = open(fname, "w")
+    write_str = ""
+    for pidx, p in enumerate(pointnp_px3):
+        pp = p
+        write_str += "v %f %f %f\n" % (pp[0], pp[1], pp[2])
+    for i, f in enumerate(facenp_fx3):
+        f1 = f + 1
+        write_str += "f %d %d %d\n" % (f1[0], f1[1], f1[2])
+    fid.write(write_str)
+    fid.close()
+    return
+def savemeshtes2(pointnp_px3, tcoords_px2, facenp_fx3, facetex_fx3, tex_map, fname):
+    fol, na = os.path.split(fname)
+    na, _ = os.path.splitext(na)
+    matname = "%s/%s.mtl" % (fol, na)
+    fid = open(matname, "w")
+    fid.write("newmtl material_0\n")
+    fid.write("Kd 1 1 1\n")
+    fid.write("Ka 0 0 0\n")
+    fid.write("Ks 0.4 0.4 0.4\n")
+    fid.write("Ns 10\n")
+    fid.write("illum 2\n")
+    fid.write("map_Kd %s.png\n" % na)
+    fid.close()
+    ####
+    fid = open(fname, "w")
+    fid.write("mtllib %s.mtl\n" % na)
+    for pidx, p in enumerate(pointnp_px3):
+        pp = p
+        fid.write("v %f %f %f\n" % (pp[0], pp[1], pp[2]))
+    for pidx, p in enumerate(tcoords_px2):
+        pp = p
+        fid.write("vt %f %f\n" % (pp[0], pp[1]))
+    fid.write("usemtl material_0\n")
+    for i, f in enumerate(facenp_fx3):
+        f1 = f + 1
+        f2 = facetex_fx3[i] + 1
+        fid.write("f %d/%d %d/%d %d/%d\n" % (f1[0], f2[0], f1[1], f2[1], f1[2], f2[2]))
+    fid.close()
+    PIL.Image.fromarray(np.ascontiguousarray(tex_map), "RGB").save(
+        os.path.join(fol, "%s.png" % na))
+    return
+class MeshOutput(object):
+    def __init__(self,
+                 mesh_v: np.ndarray,
+                 mesh_f: np.ndarray,
+                 vertex_colors: Optional[np.ndarray] = None,
+                 uvs: Optional[np.ndarray] = None,
+                 mesh_tex_idx: Optional[np.ndarray] = None,
+                 tex_map: Optional[np.ndarray] = None):
+        self.mesh_v = mesh_v
+        self.mesh_f = mesh_f
+        self.vertex_colors = vertex_colors
+        self.uvs = uvs
+        self.mesh_tex_idx = mesh_tex_idx
+        self.tex_map = tex_map
+    def contain_uv_texture(self):
+        return (self.uvs is not None) and (self.mesh_tex_idx is not None) and (self.tex_map is not None)
+    def contain_vertex_colors(self):
+        return self.vertex_colors is not None
+    def export(self, fname):
+        if self.contain_uv_texture():
+            savemeshtes2(
+                self.mesh_v,
+                self.uvs,
+                self.mesh_f,
+                self.mesh_tex_idx,
+                self.tex_map,
+                fname
+            )
+        elif self.contain_vertex_colors():
+            mesh_obj = trimesh.Trimesh(vertices=self.mesh_v, faces=self.mesh_f, vertex_colors=self.vertex_colors)
+            mesh_obj.export(fname)
+        else:
+            save_obj(
+                self.mesh_v,
+                self.mesh_f,
+                fname
+            )

michelangelo/graphics/primitives/volume.py ADDED Viewed

	@@ -0,0 +1,21 @@

+# -*- coding: utf-8 -*-
+import numpy as np
+def generate_dense_grid_points(bbox_min: np.ndarray,
+                               bbox_max: np.ndarray,
+                               octree_depth: int,
+                               indexing: str = "ij"):
+    length = bbox_max - bbox_min
+    num_cells = np.exp2(octree_depth)
+    x = np.linspace(bbox_min[0], bbox_max[0], int(num_cells) + 1, dtype=np.float32)
+    y = np.linspace(bbox_min[1], bbox_max[1], int(num_cells) + 1, dtype=np.float32)
+    z = np.linspace(bbox_min[2], bbox_max[2], int(num_cells) + 1, dtype=np.float32)
+    [xs, ys, zs] = np.meshgrid(x, y, z, indexing=indexing)
+    xyz = np.stack((xs, ys, zs), axis=-1)
+    xyz = xyz.reshape(-1, 3)
+    grid_size = [int(num_cells) + 1, int(num_cells) + 1, int(num_cells) + 1]
+    return xyz, grid_size, length

michelangelo/models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # -- coding: utf-8 --

michelangelo/models/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (183 Bytes). View file

michelangelo/models/asl_diffusion/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # -- coding: utf-8 --

michelangelo/models/asl_diffusion/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (197 Bytes). View file

michelangelo/models/asl_diffusion/__pycache__/asl_udt.cpython-39.pyc ADDED Viewed

Binary file (2.64 kB). View file

michelangelo/models/asl_diffusion/__pycache__/clip_asl_diffuser_pl_module.cpython-39.pyc ADDED Viewed

Binary file (9.87 kB). View file

michelangelo/models/asl_diffusion/__pycache__/inference_utils.cpython-39.pyc ADDED Viewed

Binary file (1.75 kB). View file