CatCon-One-Shot-Controlnet-SD-1-5-b2

Build error

App Files Files Community

Ryukijano commited on 3 days ago

Commit

ed41af7

•

1 Parent(s): bf40592

Commit MV diffusion

Browse files

Files changed (40) hide show

.gitattributes +16 -0
README.md +13 -0
app.py +347 -0
assets/advanced/img1.png +3 -0
assets/advanced/img2.png +3 -0
assets/advanced/img3.png +3 -0
assets/advanced/img4.png +3 -0
assets/advanced/img5.png +3 -0
assets/advanced/img6.png +3 -0
assets/advanced/img7.png +3 -0
assets/advanced/img8.png +3 -0
assets/basic/img1.png +3 -0
assets/basic/img2.png +3 -0
assets/basic/img3.png +3 -0
assets/basic/img4.png +3 -0
assets/basic/img5.png +3 -0
assets/basic/img6.png +3 -0
assets/basic/img7.png +3 -0
assets/basic/img_temp2.png +3 -0
batch.pth +3 -0
configs/inpaint.yaml +31 -0
configs/sd_upsampler.yaml +32 -0
configs/sd_upsampler_temporal.yaml +36 -0
configs/sd_upsampler_train.yaml +36 -0
configs/train.yaml +35 -0
configs/train_floyd.yaml +32 -0
configs/train_lora.yaml +28 -0
configs/train_mv.yaml +33 -0
configs/train_mv_256.yaml +33 -0
configs/upsample_gen_single.yaml +37 -0
configs/upsample_generation.yaml +37 -0
lib/Equirec2Perspec.py +69 -0
lib/Perspec2Equirec.py +75 -0
lib/multi_Perspec2Equirec.py +57 -0
lib/old_Perspec2Equirec.py +89 -0
model.cpython-39-x86_64-linux-gnu.so +0 -0
model.pyc +0 -0
null_prompt.pt +3 -0
requirements.txt +6 -0
weights/last.ckpt +3 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,19 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+assets/advanced/img1.png filter=lfs diff=lfs merge=lfs -text
+assets/advanced/img2.png filter=lfs diff=lfs merge=lfs -text
+assets/advanced/img3.png filter=lfs diff=lfs merge=lfs -text
+assets/advanced/img4.png filter=lfs diff=lfs merge=lfs -text
+assets/advanced/img5.png filter=lfs diff=lfs merge=lfs -text
+assets/advanced/img6.png filter=lfs diff=lfs merge=lfs -text
+assets/advanced/img7.png filter=lfs diff=lfs merge=lfs -text
+assets/advanced/img8.png filter=lfs diff=lfs merge=lfs -text
+assets/basic/img_temp2.png filter=lfs diff=lfs merge=lfs -text
+assets/basic/img1.png filter=lfs diff=lfs merge=lfs -text
+assets/basic/img2.png filter=lfs diff=lfs merge=lfs -text
+assets/basic/img3.png filter=lfs diff=lfs merge=lfs -text
+assets/basic/img4.png filter=lfs diff=lfs merge=lfs -text
+assets/basic/img5.png filter=lfs diff=lfs merge=lfs -text
+assets/basic/img6.png filter=lfs diff=lfs merge=lfs -text
+assets/basic/img7.png filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: MVDiffusion
+emoji: 🐢
+colorFrom: blue
+colorTo: yellow
+sdk: gradio
+sdk_version: 3.35.2
+app_file: app.py
+pinned: false
+python_version: 3.9.16
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,347 @@

+import torch
+import torch.nn as nn
+import yaml
+import cv2
+import numpy as np
+from PIL import Image
+import gradio as gr
+from functools import partial
+import lib.Equirec2Perspec as E2P
+import lib.Perspec2Equirec as P2E
+import lib.multi_Perspec2Equirec as m_P2E
+import openai
+from model import Model
+def get_K_R(FOV, THETA, PHI, height, width):
+    f = 0.5 * width * 1 / np.tan(0.5 * FOV / 180.0 * np.pi)
+    cx = (width - 1) / 2.0
+    cy = (height - 1) / 2.0
+    K = np.array([
+        [f, 0, cx],
+        [0, f, cy],
+        [0, 0,  1],
+    ], np.float32)
+    y_axis = np.array([0.0, 1.0, 0.0], np.float32)
+    x_axis = np.array([1.0, 0.0, 0.0], np.float32)
+    R1, _ = cv2.Rodrigues(y_axis * np.radians(THETA))
+    R2, _ = cv2.Rodrigues(np.dot(R1, x_axis) * np.radians(PHI))
+    R = R2 @ R1
+    return K, R
+if __name__=='__main__':
+    cfg_path='configs/train_mv.yaml'
+    config = yaml.load(open(cfg_path, 'rb'), Loader=yaml.SafeLoader)
+    config['height']=512
+    config['width']=512
+    config['length']=8
+    config['model_path']='weights/last.ckpt'
+    demo_model=Model(config)
+    state_dict=torch.load(config['model_path'])['state_dict']
+    demo_model.load_state_dict(state_dict, strict=False)
+    demo_model=demo_model.cuda()
+    batch=torch.load('batch.pth')
+    example1=[
+        "A room with a sofa and coffee table for relaxing.",
+        "A corner sofa is surrounded by plants.",
+        "A comfy sofa, bookshelf, and lamp for reading.",
+        "A bright room with a sofa, TV, and games.",
+        "A stylish sofa and desk setup for work.",
+        "A sofa, dining table, and chairs for gatherings.",
+        "A colorful sofa, art, and music fill the room.",
+        "A sofa, yoga mat, and meditation corner for calm."
+    ]
+    example2=[
+        "A room with a sofa and coffee table for relaxing, cartoon style",
+        "A corner sofa is surrounded by plants, cartoon style",
+        "A comfy sofa, bookshelf, and lamp for reading, cartoon style",
+        "A bright room with a sofa, TV, and games, cartoon style",
+        "A stylish sofa and desk setup for work, cartoon style",
+        "A sofa, dining table, and chairs for gatherings, cartoon style",
+        "A colorful sofa, art, and music fill the room, cartoon style",
+        "A sofa, yoga mat, and meditation corner for calm, cartoon style"
+    ]
+    example3=[
+        "A room with a sofa and coffee table for relaxing, oil painting style",
+        "A corner sofa is surrounded by plants, oil painting style",
+        "A comfy sofa, bookshelf, and lamp for reading, oil painting style",
+        "A bright room with a sofa, TV, and games, oil painting style",
+        "A stylish sofa and desk setup for work, oil painting style",
+        "A sofa, dining table, and chairs for gatherings, oil painting style",
+        "A colorful sofa, art, and music fill the room, oil painting style",
+        "A sofa, yoga mat, and meditation corner for calm, oil painting style"
+    ]
+    example4=[
+        "A Japanese room with muted-colored tatami mats.",
+        "A Japanese room with a simple, folded futon sits to one side.",
+        "A Japanese room with a low table rests in the room's center.",
+        "A Japanese room with Shoji screens divide the room softly.",
+        "A Japanese room with An alcove holds an elegant scroll and flowers.",
+        "A Japanese room with a tea set rests on a bamboo tray.",
+        "A Japanese room with a carved wooden cupboard stands against a wall.",
+        "A Japanese room with a traditional lamp gently lights the room."
+    ]
+    example6=[
+        'This kitchen is a charming blend of rustic and modern, featuring a large reclaimed wood island with marble countertop',
+        'This kitchen is a charming blend of rustic and modern, featuring a large reclaimed wood island with marble countertop',
+        'This kitchen is a charming blend of rustic and modern, featuring a large reclaimed wood island with marble countertop',
+        'To the left of the island, a stainless-steel refrigerator stands tall. ',
+        'To the left of the island, a stainless-steel refrigerator stands tall. ',
+        'a sink surrounded by cabinets',
+        'a sink surrounded by cabinets',
+        'To the right of the sink, built-in wooden cabinets painted in a muted.'
+    ]
+    example7= [
+        "Cobblestone streets curl between old buildings.",
+        "Shops and cafes display signs and emit pleasant smells.",
+        "A fruit market scents the air with fresh citrus.",
+        "A fountain adds calm to one side of the scene.",
+        "Bicycles rest against walls and posts.",
+        "Flowers in boxes color the windows.",
+        "Flowers in boxes color the windows.",
+        "Cobblestone streets curl between old buildings."
+    ]
+    example8=[
+        "The patio is open and airy.",
+        "A table and chairs sit in the middle.",
+        "Next the table is flowers.",
+        "Colorful flowers fill the planters.",
+        "A grill stands ready for barbecues.",
+        "A grill stands ready for barbecues.",
+        "The patio overlooks a lush garden.",
+        "The patio overlooks a lush garden."
+    ]
+    example9=[
+        "A Chinese palace with roofs curve.",
+        "A Chinese palace, Red and gold accents gleam in the sun.",
+        "A Chinese palace with a view of mountain in the front.",
+        "A view of mountain in the front.",
+        "A Chinese palace with a view of mountain in the front.",
+        "A Chinese palace with a tree beside.",
+        "A Chinese palace with a tree beside.",
+        "A Chinese palace, with a tree beside."
+    ]
+    example_b1="This kitchen is a charming blend of rustic and modern, featuring a large reclaimed wood island with marble countertop, a sink surrounded by cabinets. To the left of the island, a stainless-steel refrigerator stands tall. To the right of the sink, built-in wooden cabinets painted in a muted."
+    example_b2="Bursting with vibrant hues and exaggerated proportions, the cartoon-styled room sparkled with whimsy and cheer, with floating shelves crammed with oddly shaped trinkets, a comically oversized polka-dot armchair perched near a gravity-defying, tilted lamp, and the candy-striped wallpaper creating a playful backdrop to the merry chaos, exuding a sense of fun and boundless imagination."
+    example_b3="Bathed in the pulsating glow of neon lights that painted stark contrasts of shadow and color, the cyberpunk room was a high-tech, low-life sanctuary, where sleek, metallic surfaces met jagged, improvised tech; a wall of glitchy monitors flickered with unending streams of data, and the buzz of electric current and the low hum of cooling fans formed a dystopian symphony, adding to the room's relentless, gritty energy."
+    example_b4="Majestically rising towards the heavens, the snow-capped mountain stood, its jagged peaks cloaked in a shroud of ethereal clouds, its rugged slopes a stark contrast against the serene azure sky, and its silent grandeur exuding an air of ancient wisdom and timeless solitude, commanding awe and reverence from all who beheld it."
+    example_b5='Bathed in the soft, dappled light of the setting sun, the silent street lay undisturbed, revealing the grandeur of its cobblestone texture, the rusted lampposts bearing witness to forgotten stories, and the ancient, ivy-clad houses standing stoically, their shuttered windows and weather-beaten doors speaking volumes about their passage through time.'
+    example_b6='Awash with the soothing hues of an array of blossoms, the tranquil garden was a symphony of life and color, where the soft murmur of the babbling brook intertwined with the whispering willows, and the iridescent petals danced in the gentle breeze, creating an enchanting sanctuary of beauty and serenity.'
+    example_b7="Canopied by a patchwork quilt of sunlight and shadows, the sprawling park was a panorama of lush green grass, meandering trails etched through vibrant wildflowers, towering oaks reaching towards the sky, and tranquil ponds mirroring the clear, blue expanse above, offering a serene retreat in the heart of nature's splendor."
+    examples_basic=[example_b1, example_b2, example_b3, example_b4, example_b5, example_b6]
+    examples_advanced=[example1, example2, example3, example4, example6, example7, example8, example9]
+    description="The demo generates 8 perspective images, with FOV of 90 and rotation angle of 45. Please type 8 sentences corresponding to each perspective image."
+    outputs=[gr.Image(shape=(484, 2048))]
+    outputs.extend([gr.Image(shape=(1, 1)) for i in range(8)])
+    def load_example_img(path):
+        img=Image.open(path)
+        img.resize((1024, 242))
+        return img
+    def copy(text):
+        return [text]*8
+    def clear():
+        return None, None, None, None, None, None, None, None, None
+    def load_basic(example):
+        return example
+    def generate_advanced(acc, text1, text2, text3, text4, text5, text6, text7, text8):
+        texts=[text1, text2, text3, text4, text5, text6, text7, text8]
+        for text in texts:
+            if text is None or text=='':
+                raise gr.Error('Text cannot be empty')
+        images_low_res_pred=demo_model(texts, batch)[0]
+        imgs=[]
+        degrees = [[90, 0, 0],[90, 45, 0],[90, 90, 0],[90, 135, 0],[90, 180, 0],[90, 225, 0],[90, 270, 0],[90, 315, 0]]
+        width = 2048
+        height = 1024
+        for i in range(8):
+            imgs.append(images_low_res_pred[i])
+        equ = m_P2E.Perspective(imgs,
+                                degrees)
+        img = equ.GetEquirec(height,width).astype(np.uint8)
+        img=img[270:-270]
+        imgs=[img]+imgs
+        return [acc.update(open=False)]+imgs
+    def generate_basic(acc, text):
+        print(text)
+        if text is None or text=='':
+            raise gr.Error('Text cannot be empty')
+        model='gpt-3.5-turbo'
+        openai.api_key = "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"
+        # Start sending prompts
+        flag=False
+        for i in range(20):
+            try:
+                response = openai.ChatCompletion.create(
+                    model=model,
+                    messages=[
+                        {"role": "user", "content": "Can you describe the following with 5 or 6 sentences? {}".format(text)}],
+                    max_tokens=193,
+                    temperature=0,
+                )
+                text=response.choices[0]['message']['content']
+                flag=True
+                break
+            except:
+                flag=False
+        if not flag:
+            raise gr.Error('Text error')
+        texts=[text]*8
+        if text=='':
+            raise gr.Error('Text cannot be empty')
+        images_low_res_pred=demo_model(texts, batch)[0]
+        imgs=[]
+        degrees = [[90, 0, 0],[90, 45, 0],[90, 90, 0],[90, 135, 0],[90, 180, 0],[90, 225, 0],[90, 270, 0],[90, 315, 0]]
+        width = 2048
+        height = 1024
+        for i in range(8):
+            imgs.append(images_low_res_pred[i])
+        equ = m_P2E.Perspective(imgs,
+                                degrees)
+        img = equ.GetEquirec(height,width).astype(np.uint8)
+        img=img[270:-270]
+        imgs=[img]+imgs
+        return [acc.update(open=False)]+imgs
+    default_text='This kitchen is a charming blend of rustic and modern, featuring a large reclaimed wood island with marble countertop, a sink surrounded by cabinets. To the left of the island, a stainless-steel refrigerator stands tall. To the right of the sink, built-in wooden cabinets painted in a muted.'
+    css = """
+    #warning {background-color: #000000}
+    .feedback textarea {font-size: 16px !important}
+    #foo {}
+    .text111 textarea {
+        color: rgba(0, 0, 0, 0.5);
+    }
+    """
+    inputs=[gr.Textbox(type="text", label='Text{}'.format(i)) for i in range(8)]
+    with gr.Blocks(css=css) as demo:
+        with gr.Row():
+            gr.Markdown(
+            """
+            # <center>Text2Pano with MVDiffusion</center>
+            """)
+        with gr.Row():
+            gr.Markdown(
+            """
+            <center>Text2Pano demonstration: Write a scene you want in Text, then click "Generate panorama". Alternatively, you can load the example text prompts below to populate text-boxes. The advanced mode allows to specify text prompts for each perspective image</center>
+            """)
+        with gr.Tab("Basic"):
+            with gr.Row():
+                textbox1=gr.Textbox(type="text", label='Text', value=default_text, elem_id='warning', elem_classes="feedback")
+            with gr.Row():
+                submit_btn = gr.Button("Generate panorama")
+                clear_btn = gr.Button("Clear all texts")
+                clear_btn.click(
+                    clear,
+                    outputs=inputs+[textbox1]
+                )
+            with gr.Accordion("Example expand/hide") as acc:
+                for i in range(0, len(examples_basic)):
+                    with gr.Row():
+                        gr.Image(load_example_img('assets/basic/img{}.png'.format(i+1)), label='example {}'.format(i+1))
+                        #gr.Image('demo/assets/basic/img{}.png'.format(i+2), label='example {}'.format(i+2))
+                    with gr.Row():
+                        gr.Textbox(type="text", label='Example text {}'.format(i+1), value=examples_basic[i])
+                        #gr.Textbox(type="text", label='Example text {}'.format(i+2), value=examples_basic[i+1])
+                    with gr.Row():
+                        load_btn=gr.Button("Load text to the main box")
+                        load_btn.click(
+                            partial(load_basic, examples_basic[i]),
+                            outputs=[textbox1]
+                        )
+                submit_btn.click(
+                    partial(generate_basic, acc),
+                    inputs=textbox1,
+                    outputs=[acc]+outputs
+                )
+        with gr.Tab("Advanced"):
+            with gr.Row():
+                for text_bar in inputs[:4]:
+                    text_bar.render()
+            with gr.Row():
+                for text_bar in inputs[4:]:
+                    text_bar.render()
+            with gr.Row():
+                submit_btn = gr.Button("Generate panorama")
+                clear_btn = gr.Button("Clear all texts")
+                clear_btn.click(
+                    clear,
+                    outputs=inputs+[textbox1]
+                )
+            with gr.Accordion("Example expand/hide") as acc_advanced:
+                for i, example in enumerate(examples_advanced):
+                    with gr.Row():
+                        gr.Image(load_example_img('assets/advanced/img{}.png'.format(i+1)), label='example {}'.format(i+1))
+                    with gr.Row():
+                        gr.Textbox(type="text", label='Text 1', value=example[0])
+                        gr.Textbox(type="text", label='Text 2', value=example[1])
+                        gr.Textbox(type="text", label='Text 3', value=example[2])
+                        gr.Textbox(type="text", label='Text 4', value=example[3])
+                    with gr.Row():
+                        gr.Textbox(type="text", label='Text 4', value=example[4])
+                        gr.Textbox(type="text", label='Text 5', value=example[5])
+                        gr.Textbox(type="text", label='Text 6', value=example[6])
+                        gr.Textbox(type="text", label='Text 7', value=example[7])
+                    with gr.Row():
+                        load_btn=gr.Button("Load text to other text boxes")
+                        load_btn.click(
+                            partial(load_basic, example),
+                            outputs=inputs
+                        )
+                submit_btn.click(
+                    partial(generate_advanced, acc_advanced),
+                    inputs=inputs,
+                    outputs=[acc_advanced]+outputs
+                )
+        with gr.Row():
+            outputs[0].render()
+        with gr.Row():
+            outputs[1].render()
+            outputs[2].render()
+        with gr.Row():
+            outputs[3].render()
+            outputs[4].render()
+        with gr.Row():
+            outputs[5].render()
+            outputs[6].render()
+        with gr.Row():
+            outputs[7].render()
+            outputs[8].render()
+    demo.queue(concurrency_count=3)
+    demo.launch(share=True)

assets/advanced/img1.png ADDED Viewed

Git LFS Details

SHA256: 0b6ee0af34eb5fa6bce208003dbad31301303572172346e978a6069fc45c67c8
Pointer size: 132 Bytes
Size of remote file: 1.25 MB

assets/advanced/img2.png ADDED Viewed

Git LFS Details

SHA256: 46b83c749171ee3105795ec7cc35ce4f37fcb2f18e1578ab8f7b41a5972ca4ed
Pointer size: 132 Bytes
Size of remote file: 1.22 MB

assets/advanced/img3.png ADDED Viewed

Git LFS Details

SHA256: aea4f2b59581dc012b353ed405e15bc286d24c6211e54748fbc6692be6203479
Pointer size: 132 Bytes
Size of remote file: 1.53 MB

assets/advanced/img4.png ADDED Viewed

Git LFS Details

SHA256: 34235863cf886241b37129a4fd3e7d24788ecef3ef29c2038211f1ddfe0cad28
Pointer size: 132 Bytes
Size of remote file: 1.27 MB

assets/advanced/img5.png ADDED Viewed

Git LFS Details

SHA256: 8b9b107ff5712c15114cc1dcafb3e847c8035941bb7c74111a54fbc6bfce5188
Pointer size: 132 Bytes
Size of remote file: 1.41 MB

assets/advanced/img6.png ADDED Viewed

Git LFS Details

SHA256: cdc8b4a173321f3a01ae7e167770bd3b67d8157310ed8055b2c984626e8589df
Pointer size: 132 Bytes
Size of remote file: 1.64 MB

assets/advanced/img7.png ADDED Viewed

Git LFS Details

SHA256: f399a24a822cff263cd5e310b6ef265500f0dfcdb48e92c398405b5761908446
Pointer size: 132 Bytes
Size of remote file: 1.66 MB

assets/advanced/img8.png ADDED Viewed

Git LFS Details

SHA256: 4541dbd422a4994b4219e2c89db08f514b406fb3f8d4c45084faeb13d7d6482e
Pointer size: 132 Bytes
Size of remote file: 1.74 MB

assets/basic/img1.png ADDED Viewed

Git LFS Details

SHA256: dd92c705faecc52785e9e114762747cfdc748abd770f7967ca3a97859abbc873
Pointer size: 132 Bytes
Size of remote file: 1.4 MB

assets/basic/img2.png ADDED Viewed

Git LFS Details

SHA256: 0a5d29c163ce3e6e8ee87c128e606b34ee911c2ff51ba112b0b801bf37f32c0c
Pointer size: 132 Bytes
Size of remote file: 1.62 MB

assets/basic/img3.png ADDED Viewed

Git LFS Details

SHA256: 4401231e8f735e12ff6741c4b5da2ce71e8bc9174b9c1f84770e04fad9d7cd63
Pointer size: 132 Bytes
Size of remote file: 1.39 MB

assets/basic/img4.png ADDED Viewed

Git LFS Details

SHA256: ca831c0265505b62a316fe5950b84dcfd83cc3a4ff92d721ea42350a01c28862
Pointer size: 132 Bytes
Size of remote file: 1.4 MB

assets/basic/img5.png ADDED Viewed

Git LFS Details

SHA256: 81ba340c301fd82a8fe41efd9f877d052b0999b2a62f46a6bc3250528f35bf01
Pointer size: 132 Bytes
Size of remote file: 1.94 MB

assets/basic/img6.png ADDED Viewed

Git LFS Details

SHA256: 928cc4876a697603597b5d9273e607cea6bd8c2d08b5c9f70e5b8e354309f845
Pointer size: 132 Bytes
Size of remote file: 1.94 MB

assets/basic/img7.png ADDED Viewed

Git LFS Details

SHA256: 8bfcad7363ff53c742fe13a49ecbbbcaea1142dc138a6a8020c21bcbfbce30b8
Pointer size: 132 Bytes
Size of remote file: 1.88 MB

assets/basic/img_temp2.png ADDED Viewed

Git LFS Details

SHA256: 8c1a083c1a06637ab7e871270e7b755082db9562ae67219533e06ed5e2b0831c
Pointer size: 132 Bytes
Size of remote file: 1.78 MB

batch.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:849e5e843a9f36c91e0b79c53e8349a87655ea09ddc4ecde6df11c94e9f4023b
+size 26636778

configs/inpaint.yaml ADDED Viewed

	@@ -0,0 +1,31 @@

+dataset:
+  num_views_low_res: 8
+  num_views_high_res: 1
+  image_root_dir: training/mp3d_skybox
+  fov: 90
+  rot_low_res: 45
+  resolution: 256
+train:
+  log_dir: sd_upsampler
+  lr: 0.0002
+test:
+  fuse_type: single # multidiff
+model:
+  guidance_scale: 9.
+  diff_timestep: 1
+  low_res_noise_level: 30 # from the default SD upsampler setting
+  model_type: upsample
+  upsample_model:
+    model_id: stabilityai/stable-diffusion-x4-upscaler
+    num_coarse_cp_blocks: 5
+    lora_layers: True
+    homo_cp_attn: True
+    diff_timestep: 75
+  base_model:
+    diff_timestep: 50
+    model_id: stabilityai/stable-diffusion-2-base
+    lora_layers: True

configs/sd_upsampler.yaml ADDED Viewed

	@@ -0,0 +1,32 @@

+dataset:
+  name: mp3d
+  num_views_low_res: 8
+  num_views_high_res: 1
+  image_root_dir: training/mp3d_skybox
+  fov: 90
+  rot_low_res: 45
+  resolution: 128
+train:
+  log_dir: sd_upsampler
+  lr: 0.0002
+test:
+  fuse_type: single # multidiff
+model:
+  guidance_scale: 9.
+  diff_timestep: 1
+  low_res_noise_level: 20 # from the default SD upsampler setting
+  model_type: upsample
+  upsample_model:
+    model_id: stabilityai/stable-diffusion-x4-upscaler
+    num_coarse_cp_blocks: 5
+    lora_layers: True
+    homo_cp_attn: True
+    diff_timestep: 75
+  base_model:
+    diff_timestep: 50
+    model_id: stabilityai/stable-diffusion-2-base
+    lora_layers: True

configs/sd_upsampler_temporal.yaml ADDED Viewed

	@@ -0,0 +1,36 @@

+dataset:
+  name: mp3d
+  num_views_low_res: 8
+  num_views_high_res: 6
+  image_root_dir: training/mp3d_skybox
+  fov: 90
+  rot_low_res: 45
+  rot_high_res: 45
+  resolution: 256
+  resolution_high_res: 1024
+  crop_size_high_res: 384
+train:
+  log_dir: sd_upsampler
+  lr: 0.0001
+test:
+  fuse_type: single # multidiff
+model:
+  guidance_scale: 9.
+  diff_timestep: 1
+  low_res_noise_level: 20 # from the default SD upsampler setting
+  model_type: upsample
+  upsample_model:
+    model_id: stabilityai/stable-diffusion-x4-upscaler
+    num_coarse_cp_blocks: 5
+    lora_layers: False
+    homo_cp_attn: True
+    diff_timestep: 75
+    multiframe_fuse: True
+  base_model:
+    diff_timestep: 50
+    model_id: stabilityai/stable-diffusion-2-base
+    lora_layers: True

configs/sd_upsampler_train.yaml ADDED Viewed

	@@ -0,0 +1,36 @@

+dataset:
+  name: mp3d
+  num_views_low_res: 8
+  num_views_high_res: 1
+  image_root_dir: training/mp3d_skybox
+  fov: 90
+  rot_low_res: 45
+  rot_high_res: 10
+  resolution: 256
+  resolution_high_res: 1024
+  crop_size_high_res: 512
+train:
+  log_dir: sd_upsampler
+  lr: 0.000005
+test:
+  fuse_type: single # multidiff
+model:
+  guidance_scale: 9.
+  diff_timestep: 1
+  low_res_noise_level: 20 # from the default SD upsampler setting
+  model_type: upsample
+  upsample_model:
+    model_id: stabilityai/stable-diffusion-x4-upscaler
+    num_coarse_cp_blocks: 5
+    lora_layers: False
+    homo_cp_attn: True
+    diff_timestep: 75
+    multiframe_fuse: False
+  base_model:
+    diff_timestep: 50
+    model_id: stabilityai/stable-diffusion-2-base
+    lora_layers: True

configs/train.yaml ADDED Viewed

	@@ -0,0 +1,35 @@

+dataset:
+  name: 'mp3d'
+  num_views_low_res: 1
+  num_views_high_res: 1
+  image_root_dir: training/mp3d_skybox
+  fov: 90
+  rot_low_res: 45
+  rot_high_res: 10
+  resolution: 512
+  resolution_high_res: 1024
+  crop_size_high_res: 256
+train:
+  log_dir: high_res_upsample
+  lr: 0.0002
+test:
+  fuse_type: diffcollage # multidiff
+model:
+  model_id: stabilityai/stable-diffusion-2-base
+  guidance_scale: 9.
+  model_type: base
+  low_res_noise_level: 20
+  upsample_model:
+    num_coarse_cp_blocks: 5
+    lora_layers: True
+    homo_cp_attn: True
+    diff_timestep: 75
+  base_model:
+    model_id: stabilityai/stable-diffusion-2-base
+    diff_timestep: 50
+    lora_layers: False
+    single_image_ft: True

configs/train_floyd.yaml ADDED Viewed

	@@ -0,0 +1,32 @@

+dataset:
+  name: 'mp3d'
+  num_views_low_res: 12
+  num_views_high_res: 1
+  image_root_dir: training/mp3d_skybox
+  fov: 90
+  rot_low_res: 30
+  rot_high_res: 10
+  resolution: 256
+  crop_size_high_res: 256
+train:
+  log_dir: high_res_upsample
+  lr: 0.0001
+test:
+  fuse_type: diffcollage # multidiff
+model:
+  guidance_scale: 9.
+  model_type: base
+  low_res_noise_level: 20
+  upsample_model:
+    num_coarse_cp_blocks: 5
+    lora_layers: True
+    homo_cp_attn: True
+    diff_timestep: 75
+  base_model:
+    model_id: DeepFloyd/IF-I-XL-v1.0
+    lora_layers: False
+    single_image_ft: False
+    diff_timestep: 50

configs/train_lora.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+dataset:
+  num_views_low_res: 1
+  num_views_high_res: 1
+  image_root_dir: training/mp3d_skybox
+  fov: 90
+  rot_low_res: 45
+  resolution: 256
+train:
+  log_dir: high_res_upsample
+  lr: 0.0002
+test:
+  fuse_type: diffcollage # multidiff
+model:
+  model_id: stabilityai/stable-diffusion-2-base
+  guidance_scale: 9.
+  diff_timestep: 50
+  model_type: base
+  upsample_model:
+    num_coarse_cp_blocks: 5
+    lora_layers: True
+    homo_cp_attn: True
+  base_model:
+    lora_layers: True
+    single_image_ft: True

configs/train_mv.yaml ADDED Viewed

	@@ -0,0 +1,33 @@

+dataset:
+  name: 'mp3d'
+  num_views_low_res: 8
+  num_views_high_res: 1
+  image_root_dir: training/mp3d_skybox
+  fov: 90
+  rot_low_res: 45
+  rot_high_res: 10
+  resolution: 512
+  resolution_high_res: 1024
+  crop_size_high_res: 256
+train:
+  log_dir: high_res_upsample
+  lr: 0.0001
+test:
+  fuse_type: diffcollage # multidiff
+model:
+  guidance_scale: 9.
+  model_type: base
+  low_res_noise_level: 20
+  upsample_model:
+    num_coarse_cp_blocks: 5
+    lora_layers: True
+    homo_cp_attn: True
+    diff_timestep: 75
+  base_model:
+    model_id: stabilityai/stable-diffusion-2-base
+    lora_layers: False
+    single_image_ft: False
+    diff_timestep: 50

configs/train_mv_256.yaml ADDED Viewed

	@@ -0,0 +1,33 @@

+dataset:
+  name: 'mp3d'
+  num_views_low_res: 2
+  num_views_high_res: 1
+  image_root_dir: training/mp3d_skybox
+  fov: 90
+  rot_low_res: 45
+  rot_high_res: 10
+  resolution: 256
+  resolution_high_res: 1024
+  crop_size_high_res: 256
+train:
+  log_dir: high_res_upsample
+  lr: 0.00001
+test:
+  fuse_type: diffcollage # multidiff
+model:
+  guidance_scale: 9.
+  model_type: base
+  low_res_noise_level: 20
+  upsample_model:
+    num_coarse_cp_blocks: 5
+    lora_layers: True
+    homo_cp_attn: True
+    diff_timestep: 75
+  base_model:
+    model_id: stabilityai/stable-diffusion-2-base
+    lora_layers: False
+    single_image_ft: False
+    diff_timestep: 50

configs/upsample_gen_single.yaml ADDED Viewed

	@@ -0,0 +1,37 @@

+dataset:
+  name: generation
+  num_views_low_res: 8
+  num_views_high_res: 8
+  image_root_dir: logs/tb_logs/test_mp3d_base_mv_all=1/version_1/images
+  resume_dir: logs/tb_logs/test_mp3d_upsample_seperate=2/version_0/images
+  fov: 90
+  rot_low_res: 45
+  rot_high_res: 45
+  resolution: 256
+  resolution_high_res: 1024
+  crop_size_high_res: 1024
+train:
+  log_dir: sd_upsampler
+  lr: 0.0002
+test:
+  fuse_type: single # multidiff
+model:
+  guidance_scale: 9.
+  diff_timestep: 1
+  low_res_noise_level: 50 # from the default SD upsampler setting
+  model_type: upsample
+  upsample_model:
+    model_id: stabilityai/stable-diffusion-x4-upscaler
+    num_coarse_cp_blocks: 5
+    lora_layers: False
+    homo_cp_attn: True
+    diff_timestep: 75
+    multiframe_fuse: False
+  base_model:
+    diff_timestep: 30
+    model_id: stabilityai/stable-diffusion-2-base
+    lora_layers: True

configs/upsample_generation.yaml ADDED Viewed

	@@ -0,0 +1,37 @@

+dataset:
+  name: generation
+  num_views_low_res: 8
+  num_views_high_res: 8
+  image_root_dir: logs/tb_logs/test_mp3d_base_mv_all=1/version_1/images
+  resume_dir: logs/tb_logs/test_mp3d_upsample_all=7/version_0/images
+  fov: 90
+  rot_low_res: 45
+  rot_high_res: 45
+  resolution: 256
+  resolution_high_res: 1024
+  crop_size_high_res: 1024
+train:
+  log_dir: sd_upsampler
+  lr: 0.0002
+test:
+  fuse_type: single # multidiff
+model:
+  guidance_scale: 9.
+  diff_timestep: 1
+  low_res_noise_level: 1 # from the default SD upsampler setting
+  model_type: upsample
+  upsample_model:
+    model_id: stabilityai/stable-diffusion-x4-upscaler
+    num_coarse_cp_blocks: 5
+    lora_layers: False
+    homo_cp_attn: True
+    diff_timestep: 75
+    multiframe_fuse: True
+  base_model:
+    diff_timestep: 30
+    model_id: stabilityai/stable-diffusion-2-base
+    lora_layers: True

lib/Equirec2Perspec.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import os
+import sys
+import cv2
+import numpy as np
+class Equirectangular:
+    def __init__(self, img_name, text2light=False):
+        if isinstance(img_name, str):
+            self._img = cv2.imread(img_name, cv2.IMREAD_COLOR)
+        else:
+            self._img = img_name
+        if text2light:
+            self._img = np.roll(self._img, -60, axis=0)
+        [self._height, self._width, _] = self._img.shape
+    def GetPerspective(self, FOV, THETA, PHI, height, width):
+        #
+        # THETA is left/right angle, PHI is up/down angle, both in degree
+        #
+        equ_h = self._height
+        equ_w = self._width
+        equ_cx = (equ_w - 1) / 2.0
+        equ_cy = (equ_h - 1) / 2.0
+        wFOV = FOV
+        hFOV = float(height) / width * wFOV
+        w_len = np.tan(np.radians(wFOV / 2.0))
+        h_len = np.tan(np.radians(hFOV / 2.0))
+        x_map = np.ones([height, width], np.float32)
+        y_map = np.tile(np.linspace(-w_len, w_len,width), [height,1])
+        z_map = -np.tile(np.linspace(-h_len, h_len,height), [width,1]).T
+        D = np.sqrt(x_map**2 + y_map**2 + z_map**2)
+        xyz = np.stack((x_map,y_map,z_map),axis=2)/np.repeat(D[:, :, np.newaxis], 3, axis=2)
+        y_axis = np.array([0.0, 1.0, 0.0], np.float32)
+        z_axis = np.array([0.0, 0.0, 1.0], np.float32)
+        [R1, _] = cv2.Rodrigues(z_axis * np.radians(THETA))
+        [R2, _] = cv2.Rodrigues(np.dot(R1, y_axis) * np.radians(-PHI))
+        xyz = xyz.reshape([height * width, 3]).T
+        xyz = np.dot(R1, xyz)
+        xyz = np.dot(R2, xyz).T
+        lat = np.arcsin(xyz[:, 2])
+        lon = np.arctan2(xyz[:, 1] , xyz[:, 0])
+        lon = lon.reshape([height, width]) / np.pi * 180
+        lat = -lat.reshape([height, width]) / np.pi * 180
+        lon = lon / 180 * equ_cx + equ_cx
+        lat = lat / 90  * equ_cy + equ_cy
+        persp = cv2.remap(self._img, lon.astype(np.float32), lat.astype(np.float32), cv2.INTER_CUBIC, borderMode=cv2.BORDER_WRAP)
+        return persp

lib/Perspec2Equirec.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import os
+import sys
+import cv2
+import numpy as np
+class Perspective:
+    def __init__(self, img_name , FOV, THETA, PHI ):
+        if isinstance(img_name, str):
+            self._img = cv2.imread(img_name, cv2.IMREAD_COLOR)
+        else:
+            self._img = img_name
+        [self._height, self._width, _] = self._img.shape
+        self.wFOV = FOV
+        self.THETA = THETA
+        self.PHI = PHI
+        self.hFOV = float(self._height) / self._width * FOV
+        self.w_len = np.tan(np.radians(self.wFOV / 2.0))
+        self.h_len = np.tan(np.radians(self.hFOV / 2.0))
+    def GetEquirec(self,height,width):
+        #
+        # THETA is left/right angle, PHI is up/down angle, both in degree
+        #
+        x,y = np.meshgrid(np.linspace(-180, 180,width),np.linspace(90,-90,height))
+        x_map = np.cos(np.radians(x)) * np.cos(np.radians(y))
+        y_map = np.sin(np.radians(x)) * np.cos(np.radians(y))
+        z_map = np.sin(np.radians(y))
+        xyz = np.stack((x_map,y_map,z_map),axis=2)
+        y_axis = np.array([0.0, 1.0, 0.0], np.float32)
+        z_axis = np.array([0.0, 0.0, 1.0], np.float32)
+        [R1, _] = cv2.Rodrigues(z_axis * np.radians(self.THETA))
+        [R2, _] = cv2.Rodrigues(np.dot(R1, y_axis) * np.radians(-self.PHI))
+        R1 = np.linalg.inv(R1)
+        R2 = np.linalg.inv(R2)
+        xyz = xyz.reshape([height * width, 3]).T
+        xyz = np.dot(R2, xyz)
+        xyz = np.dot(R1, xyz).T
+        xyz = xyz.reshape([height , width, 3])
+        inverse_mask = np.where(xyz[:,:,0]>0,1,0)
+        xyz[:,:] = xyz[:,:]/np.repeat(xyz[:,:,0][:, :, np.newaxis], 3, axis=2)
+        lon_map = np.where((-self.w_len<xyz[:,:,1])&(xyz[:,:,1]<self.w_len)&(-self.h_len<xyz[:,:,2])
+                    &(xyz[:,:,2]<self.h_len),(xyz[:,:,1]+self.w_len)/2/self.w_len*self._width,0)
+        lat_map = np.where((-self.w_len<xyz[:,:,1])&(xyz[:,:,1]<self.w_len)&(-self.h_len<xyz[:,:,2])
+                    &(xyz[:,:,2]<self.h_len),(-xyz[:,:,2]+self.h_len)/2/self.h_len*self._height,0)
+        mask = np.where((-self.w_len<xyz[:,:,1])&(xyz[:,:,1]<self.w_len)&(-self.h_len<xyz[:,:,2])
+                    &(xyz[:,:,2]<self.h_len),1,0)
+        persp = cv2.remap(self._img, lon_map.astype(np.float32), lat_map.astype(np.float32), cv2.INTER_CUBIC, borderMode=cv2.BORDER_WRAP)
+        mask = mask * inverse_mask
+        mask = np.repeat(mask[:, :, np.newaxis], 3, axis=2)
+        persp = persp * mask
+        return persp , mask

lib/multi_Perspec2Equirec.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import os
+import sys
+import cv2
+import numpy as np
+import lib.Perspec2Equirec as P2E
+class Perspective:
+    def __init__(self, img_array , F_T_P_array ):
+        assert len(img_array)==len(F_T_P_array)
+        self.img_array = img_array
+        self.F_T_P_array = F_T_P_array
+    def GetEquirec(self,height,width):
+        #
+        # THETA is left/right angle, PHI is up/down angle, both in degree
+        #
+        merge_image = np.zeros((height,width,3))
+        merge_mask = np.zeros((height,width,3))
+        for img_dir,[F,T,P] in zip (self.img_array,self.F_T_P_array):
+            per = P2E.Perspective(img_dir,F,T,P)        # Load equirectangular image
+            img , mask = per.GetEquirec(height,width)   # Specify parameters(FOV, theta, phi, height, width)
+            mask = mask.astype(np.float32)
+            img = img.astype(np.float32)
+            weight_mask = np.zeros((img_dir.shape[0],img_dir.shape[1], 3))
+            w = img_dir.shape[1]
+            weight_mask[:,0:w//2,:] = np.linspace(0,1,w//2)[...,None]
+            weight_mask[:,w//2:,:] = np.linspace(1,0,w//2)[...,None]
+            weight_mask = P2E.Perspective(weight_mask,F,T,P)
+            weight_mask, _ = weight_mask.GetEquirec(height,width)
+            blur = cv2.blur(mask,(5,5))
+            blur = blur * mask
+            mask = (blur == 1) * blur + (blur != 1) * blur * 0.05
+            merge_image += img * weight_mask
+            merge_mask += weight_mask
+        merge_image[merge_mask==0] = 255.
+        merge_mask = np.where(merge_mask==0,1,merge_mask)
+        merge_image = (np.divide(merge_image,merge_mask))
+        return merge_image

lib/old_Perspec2Equirec.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import os
+import sys
+import cv2
+import numpy as np
+class Perspective:
+    def __init__(self, img_name , FOV, THETA, PHI ):
+        self._img = cv2.imread(img_name, cv2.IMREAD_COLOR)
+        [self._height, self._width, _] = self._img.shape
+        self.FOV = FOV
+        self.THETA = THETA
+        self.PHI = PHI
+    def GetEquirec(self,height,width):
+        #
+        # THETA is left/right angle, PHI is up/down angle, both in degree
+        #
+        equ_h = height
+        equ_w = width
+        equ_cx = (equ_w - 1) / 2.0
+        equ_cy = (equ_h - 1) / 2.0
+        wFOV = self.FOV
+        hFOV = float(self._height) / self._width * wFOV
+        w_len = np.tan(np.radians(wFOV / 2.0))
+        h_len = np.tan(np.radians(hFOV / 2.0))
+        x_map = np.ones([self._height, self._width], np.float32)
+        y_map = np.tile(np.linspace(-w_len, w_len,self._width), [self._height,1])
+        z_map = -np.tile(np.linspace(-h_len, h_len,self._height), [self._width,1]).T
+        print(z_map[0])
+        D = np.sqrt(x_map**2 + y_map**2 + z_map**2)
+        xyz = np.stack((x_map,y_map,z_map),axis=2)/np.repeat(D[:, :, np.newaxis], 3, axis=2)
+        print(xyz[0,:,2])
+        y_axis = np.array([0.0, 1.0, 0.0], np.float32)
+        z_axis = np.array([0.0, 0.0, 1.0], np.float32)
+        [R1, _] = cv2.Rodrigues(z_axis * np.radians(self.THETA))
+        [R2, _] = cv2.Rodrigues(np.dot(R1, y_axis) * np.radians(-self.PHI))
+        xyz = xyz.reshape([self._height * self._width, 3]).T
+        xyz = np.dot(R1, xyz)
+        xyz = np.dot(R2, xyz).T
+        lat = np.arcsin(xyz[:, 2])
+        lon = np.arctan2(xyz[:, 1] , xyz[:, 0])
+        lon = lon / np.pi * 180
+        lat = -lat / np.pi * 180
+        print(lat.reshape([self._height , self._width])[0])
+        print(lon.reshape([self._height , self._width])[0])
+        lon = (lon / 180 * equ_cx + equ_cx).astype(np.int)
+        lat = (lat / 90  * equ_cy + equ_cy).astype(np.int)
+        coordinate = (lat,lon)
+        x_map = np.repeat(np.arange(self._height), self._width)
+        y_map = np.tile(np.arange(self._width), self._height)
+        blank_map_x = np.zeros((height,width))
+        blank_map_y = np.zeros((height,width))
+        mask = np.zeros((height,width,3))
+        blank_map_x[coordinate] = x_map
+        blank_map_y[coordinate] = y_map
+        mask[coordinate] = [1,1,1]
+        # print(lat.reshape([self._height, self._width]))
+        # print(lon.reshape([self._height, self._width])[-1,1910:1930])
+        persp = cv2.remap(self._img, blank_map_y.astype(np.float32), blank_map_x.astype(np.float32), cv2.INTER_CUBIC, borderMode=cv2.BORDER_WRAP)
+        persp = persp * mask
+        return persp , mask

model.cpython-39-x86_64-linux-gnu.so ADDED Viewed

Binary file (176 kB). View file

model.pyc ADDED Viewed

Binary file (4.19 kB). View file

null_prompt.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c755f0261a4b055e75f80e44746f6c8db62113fa2181f90f6c13bff08b405539
+size 316139

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+torch==2.0.1
+opencv-python
+transformers
+diffusers
+openai
+einops

weights/last.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:08b9ec605c2e962adcb3dfe95c4c89d977bbaaccfc2ff2a0888dbcdfdea2dd82
+size 10012818135