Ryukijano commited on
Commit
ed41af7
1 Parent(s): bf40592

Commit MV diffusion

Browse files
.gitattributes CHANGED
@@ -32,3 +32,19 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ assets/advanced/img1.png filter=lfs diff=lfs merge=lfs -text
36
+ assets/advanced/img2.png filter=lfs diff=lfs merge=lfs -text
37
+ assets/advanced/img3.png filter=lfs diff=lfs merge=lfs -text
38
+ assets/advanced/img4.png filter=lfs diff=lfs merge=lfs -text
39
+ assets/advanced/img5.png filter=lfs diff=lfs merge=lfs -text
40
+ assets/advanced/img6.png filter=lfs diff=lfs merge=lfs -text
41
+ assets/advanced/img7.png filter=lfs diff=lfs merge=lfs -text
42
+ assets/advanced/img8.png filter=lfs diff=lfs merge=lfs -text
43
+ assets/basic/img_temp2.png filter=lfs diff=lfs merge=lfs -text
44
+ assets/basic/img1.png filter=lfs diff=lfs merge=lfs -text
45
+ assets/basic/img2.png filter=lfs diff=lfs merge=lfs -text
46
+ assets/basic/img3.png filter=lfs diff=lfs merge=lfs -text
47
+ assets/basic/img4.png filter=lfs diff=lfs merge=lfs -text
48
+ assets/basic/img5.png filter=lfs diff=lfs merge=lfs -text
49
+ assets/basic/img6.png filter=lfs diff=lfs merge=lfs -text
50
+ assets/basic/img7.png filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: MVDiffusion
3
+ emoji: 🐢
4
+ colorFrom: blue
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 3.35.2
8
+ app_file: app.py
9
+ pinned: false
10
+ python_version: 3.9.16
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import yaml
4
+ import cv2
5
+ import numpy as np
6
+ from PIL import Image
7
+ import gradio as gr
8
+ from functools import partial
9
+ import lib.Equirec2Perspec as E2P
10
+ import lib.Perspec2Equirec as P2E
11
+ import lib.multi_Perspec2Equirec as m_P2E
12
+ import openai
13
+ from model import Model
14
+
15
+ def get_K_R(FOV, THETA, PHI, height, width):
16
+ f = 0.5 * width * 1 / np.tan(0.5 * FOV / 180.0 * np.pi)
17
+ cx = (width - 1) / 2.0
18
+ cy = (height - 1) / 2.0
19
+ K = np.array([
20
+ [f, 0, cx],
21
+ [0, f, cy],
22
+ [0, 0, 1],
23
+ ], np.float32)
24
+
25
+ y_axis = np.array([0.0, 1.0, 0.0], np.float32)
26
+ x_axis = np.array([1.0, 0.0, 0.0], np.float32)
27
+ R1, _ = cv2.Rodrigues(y_axis * np.radians(THETA))
28
+ R2, _ = cv2.Rodrigues(np.dot(R1, x_axis) * np.radians(PHI))
29
+ R = R2 @ R1
30
+ return K, R
31
+
32
+
33
+ if __name__=='__main__':
34
+ cfg_path='configs/train_mv.yaml'
35
+ config = yaml.load(open(cfg_path, 'rb'), Loader=yaml.SafeLoader)
36
+ config['height']=512
37
+ config['width']=512
38
+ config['length']=8
39
+ config['model_path']='weights/last.ckpt'
40
+
41
+ demo_model=Model(config)
42
+ state_dict=torch.load(config['model_path'])['state_dict']
43
+ demo_model.load_state_dict(state_dict, strict=False)
44
+ demo_model=demo_model.cuda()
45
+
46
+ batch=torch.load('batch.pth')
47
+
48
+
49
+ example1=[
50
+ "A room with a sofa and coffee table for relaxing.",
51
+ "A corner sofa is surrounded by plants.",
52
+ "A comfy sofa, bookshelf, and lamp for reading.",
53
+ "A bright room with a sofa, TV, and games.",
54
+ "A stylish sofa and desk setup for work.",
55
+ "A sofa, dining table, and chairs for gatherings.",
56
+ "A colorful sofa, art, and music fill the room.",
57
+ "A sofa, yoga mat, and meditation corner for calm."
58
+ ]
59
+ example2=[
60
+ "A room with a sofa and coffee table for relaxing, cartoon style",
61
+ "A corner sofa is surrounded by plants, cartoon style",
62
+ "A comfy sofa, bookshelf, and lamp for reading, cartoon style",
63
+ "A bright room with a sofa, TV, and games, cartoon style",
64
+ "A stylish sofa and desk setup for work, cartoon style",
65
+ "A sofa, dining table, and chairs for gatherings, cartoon style",
66
+ "A colorful sofa, art, and music fill the room, cartoon style",
67
+ "A sofa, yoga mat, and meditation corner for calm, cartoon style"
68
+ ]
69
+
70
+ example3=[
71
+ "A room with a sofa and coffee table for relaxing, oil painting style",
72
+ "A corner sofa is surrounded by plants, oil painting style",
73
+ "A comfy sofa, bookshelf, and lamp for reading, oil painting style",
74
+ "A bright room with a sofa, TV, and games, oil painting style",
75
+ "A stylish sofa and desk setup for work, oil painting style",
76
+ "A sofa, dining table, and chairs for gatherings, oil painting style",
77
+ "A colorful sofa, art, and music fill the room, oil painting style",
78
+ "A sofa, yoga mat, and meditation corner for calm, oil painting style"
79
+ ]
80
+
81
+ example4=[
82
+ "A Japanese room with muted-colored tatami mats.",
83
+ "A Japanese room with a simple, folded futon sits to one side.",
84
+ "A Japanese room with a low table rests in the room's center.",
85
+ "A Japanese room with Shoji screens divide the room softly.",
86
+ "A Japanese room with An alcove holds an elegant scroll and flowers.",
87
+ "A Japanese room with a tea set rests on a bamboo tray.",
88
+ "A Japanese room with a carved wooden cupboard stands against a wall.",
89
+ "A Japanese room with a traditional lamp gently lights the room."
90
+ ]
91
+ example6=[
92
+ 'This kitchen is a charming blend of rustic and modern, featuring a large reclaimed wood island with marble countertop',
93
+ 'This kitchen is a charming blend of rustic and modern, featuring a large reclaimed wood island with marble countertop',
94
+ 'This kitchen is a charming blend of rustic and modern, featuring a large reclaimed wood island with marble countertop',
95
+ 'To the left of the island, a stainless-steel refrigerator stands tall. ',
96
+ 'To the left of the island, a stainless-steel refrigerator stands tall. ',
97
+ 'a sink surrounded by cabinets',
98
+ 'a sink surrounded by cabinets',
99
+ 'To the right of the sink, built-in wooden cabinets painted in a muted.'
100
+ ]
101
+
102
+ example7= [
103
+ "Cobblestone streets curl between old buildings.",
104
+ "Shops and cafes display signs and emit pleasant smells.",
105
+ "A fruit market scents the air with fresh citrus.",
106
+ "A fountain adds calm to one side of the scene.",
107
+ "Bicycles rest against walls and posts.",
108
+ "Flowers in boxes color the windows.",
109
+ "Flowers in boxes color the windows.",
110
+ "Cobblestone streets curl between old buildings."
111
+ ]
112
+
113
+ example8=[
114
+ "The patio is open and airy.",
115
+ "A table and chairs sit in the middle.",
116
+ "Next the table is flowers.",
117
+ "Colorful flowers fill the planters.",
118
+ "A grill stands ready for barbecues.",
119
+ "A grill stands ready for barbecues.",
120
+ "The patio overlooks a lush garden.",
121
+ "The patio overlooks a lush garden."
122
+ ]
123
+
124
+ example9=[
125
+ "A Chinese palace with roofs curve.",
126
+ "A Chinese palace, Red and gold accents gleam in the sun.",
127
+ "A Chinese palace with a view of mountain in the front.",
128
+ "A view of mountain in the front.",
129
+ "A Chinese palace with a view of mountain in the front.",
130
+ "A Chinese palace with a tree beside.",
131
+ "A Chinese palace with a tree beside.",
132
+ "A Chinese palace, with a tree beside."
133
+ ]
134
+
135
+
136
+
137
+ example_b1="This kitchen is a charming blend of rustic and modern, featuring a large reclaimed wood island with marble countertop, a sink surrounded by cabinets. To the left of the island, a stainless-steel refrigerator stands tall. To the right of the sink, built-in wooden cabinets painted in a muted."
138
+ example_b2="Bursting with vibrant hues and exaggerated proportions, the cartoon-styled room sparkled with whimsy and cheer, with floating shelves crammed with oddly shaped trinkets, a comically oversized polka-dot armchair perched near a gravity-defying, tilted lamp, and the candy-striped wallpaper creating a playful backdrop to the merry chaos, exuding a sense of fun and boundless imagination."
139
+ example_b3="Bathed in the pulsating glow of neon lights that painted stark contrasts of shadow and color, the cyberpunk room was a high-tech, low-life sanctuary, where sleek, metallic surfaces met jagged, improvised tech; a wall of glitchy monitors flickered with unending streams of data, and the buzz of electric current and the low hum of cooling fans formed a dystopian symphony, adding to the room's relentless, gritty energy."
140
+ example_b4="Majestically rising towards the heavens, the snow-capped mountain stood, its jagged peaks cloaked in a shroud of ethereal clouds, its rugged slopes a stark contrast against the serene azure sky, and its silent grandeur exuding an air of ancient wisdom and timeless solitude, commanding awe and reverence from all who beheld it."
141
+ example_b5='Bathed in the soft, dappled light of the setting sun, the silent street lay undisturbed, revealing the grandeur of its cobblestone texture, the rusted lampposts bearing witness to forgotten stories, and the ancient, ivy-clad houses standing stoically, their shuttered windows and weather-beaten doors speaking volumes about their passage through time.'
142
+ example_b6='Awash with the soothing hues of an array of blossoms, the tranquil garden was a symphony of life and color, where the soft murmur of the babbling brook intertwined with the whispering willows, and the iridescent petals danced in the gentle breeze, creating an enchanting sanctuary of beauty and serenity.'
143
+ example_b7="Canopied by a patchwork quilt of sunlight and shadows, the sprawling park was a panorama of lush green grass, meandering trails etched through vibrant wildflowers, towering oaks reaching towards the sky, and tranquil ponds mirroring the clear, blue expanse above, offering a serene retreat in the heart of nature's splendor."
144
+
145
+ examples_basic=[example_b1, example_b2, example_b3, example_b4, example_b5, example_b6]
146
+ examples_advanced=[example1, example2, example3, example4, example6, example7, example8, example9]
147
+
148
+ description="The demo generates 8 perspective images, with FOV of 90 and rotation angle of 45. Please type 8 sentences corresponding to each perspective image."
149
+
150
+ outputs=[gr.Image(shape=(484, 2048))]
151
+ outputs.extend([gr.Image(shape=(1, 1)) for i in range(8)])
152
+
153
+ def load_example_img(path):
154
+ img=Image.open(path)
155
+ img.resize((1024, 242))
156
+ return img
157
+
158
+ def copy(text):
159
+ return [text]*8
160
+
161
+ def clear():
162
+ return None, None, None, None, None, None, None, None, None
163
+
164
+ def load_basic(example):
165
+ return example
166
+
167
+ def generate_advanced(acc, text1, text2, text3, text4, text5, text6, text7, text8):
168
+ texts=[text1, text2, text3, text4, text5, text6, text7, text8]
169
+ for text in texts:
170
+ if text is None or text=='':
171
+ raise gr.Error('Text cannot be empty')
172
+ images_low_res_pred=demo_model(texts, batch)[0]
173
+ imgs=[]
174
+ degrees = [[90, 0, 0],[90, 45, 0],[90, 90, 0],[90, 135, 0],[90, 180, 0],[90, 225, 0],[90, 270, 0],[90, 315, 0]]
175
+ width = 2048
176
+ height = 1024
177
+ for i in range(8):
178
+ imgs.append(images_low_res_pred[i])
179
+ equ = m_P2E.Perspective(imgs,
180
+ degrees)
181
+
182
+
183
+ img = equ.GetEquirec(height,width).astype(np.uint8)
184
+ img=img[270:-270]
185
+ imgs=[img]+imgs
186
+ return [acc.update(open=False)]+imgs
187
+
188
+ def generate_basic(acc, text):
189
+ print(text)
190
+ if text is None or text=='':
191
+ raise gr.Error('Text cannot be empty')
192
+ model='gpt-3.5-turbo'
193
+ openai.api_key = "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"
194
+
195
+ # Start sending prompts
196
+ flag=False
197
+ for i in range(20):
198
+ try:
199
+ response = openai.ChatCompletion.create(
200
+ model=model,
201
+ messages=[
202
+ {"role": "user", "content": "Can you describe the following with 5 or 6 sentences? {}".format(text)}],
203
+ max_tokens=193,
204
+ temperature=0,
205
+ )
206
+ text=response.choices[0]['message']['content']
207
+ flag=True
208
+ break
209
+ except:
210
+ flag=False
211
+ if not flag:
212
+ raise gr.Error('Text error')
213
+
214
+ texts=[text]*8
215
+ if text=='':
216
+ raise gr.Error('Text cannot be empty')
217
+ images_low_res_pred=demo_model(texts, batch)[0]
218
+ imgs=[]
219
+ degrees = [[90, 0, 0],[90, 45, 0],[90, 90, 0],[90, 135, 0],[90, 180, 0],[90, 225, 0],[90, 270, 0],[90, 315, 0]]
220
+ width = 2048
221
+ height = 1024
222
+ for i in range(8):
223
+ imgs.append(images_low_res_pred[i])
224
+ equ = m_P2E.Perspective(imgs,
225
+ degrees)
226
+
227
+
228
+ img = equ.GetEquirec(height,width).astype(np.uint8)
229
+ img=img[270:-270]
230
+ imgs=[img]+imgs
231
+ return [acc.update(open=False)]+imgs
232
+
233
+ default_text='This kitchen is a charming blend of rustic and modern, featuring a large reclaimed wood island with marble countertop, a sink surrounded by cabinets. To the left of the island, a stainless-steel refrigerator stands tall. To the right of the sink, built-in wooden cabinets painted in a muted.'
234
+ css = """
235
+ #warning {background-color: #000000}
236
+ .feedback textarea {font-size: 16px !important}
237
+ #foo {}
238
+ .text111 textarea {
239
+ color: rgba(0, 0, 0, 0.5);
240
+ }
241
+ """
242
+
243
+ inputs=[gr.Textbox(type="text", label='Text{}'.format(i)) for i in range(8)]
244
+
245
+ with gr.Blocks(css=css) as demo:
246
+ with gr.Row():
247
+ gr.Markdown(
248
+ """
249
+ # <center>Text2Pano with MVDiffusion</center>
250
+ """)
251
+ with gr.Row():
252
+ gr.Markdown(
253
+ """
254
+ <center>Text2Pano demonstration: Write a scene you want in Text, then click "Generate panorama". Alternatively, you can load the example text prompts below to populate text-boxes. The advanced mode allows to specify text prompts for each perspective image</center>
255
+ """)
256
+ with gr.Tab("Basic"):
257
+ with gr.Row():
258
+ textbox1=gr.Textbox(type="text", label='Text', value=default_text, elem_id='warning', elem_classes="feedback")
259
+
260
+ with gr.Row():
261
+ submit_btn = gr.Button("Generate panorama")
262
+ clear_btn = gr.Button("Clear all texts")
263
+ clear_btn.click(
264
+ clear,
265
+ outputs=inputs+[textbox1]
266
+ )
267
+
268
+ with gr.Accordion("Example expand/hide") as acc:
269
+ for i in range(0, len(examples_basic)):
270
+ with gr.Row():
271
+ gr.Image(load_example_img('assets/basic/img{}.png'.format(i+1)), label='example {}'.format(i+1))
272
+ #gr.Image('demo/assets/basic/img{}.png'.format(i+2), label='example {}'.format(i+2))
273
+ with gr.Row():
274
+ gr.Textbox(type="text", label='Example text {}'.format(i+1), value=examples_basic[i])
275
+ #gr.Textbox(type="text", label='Example text {}'.format(i+2), value=examples_basic[i+1])
276
+ with gr.Row():
277
+ load_btn=gr.Button("Load text to the main box")
278
+ load_btn.click(
279
+ partial(load_basic, examples_basic[i]),
280
+ outputs=[textbox1]
281
+ )
282
+
283
+ submit_btn.click(
284
+ partial(generate_basic, acc),
285
+ inputs=textbox1,
286
+ outputs=[acc]+outputs
287
+ )
288
+
289
+ with gr.Tab("Advanced"):
290
+ with gr.Row():
291
+ for text_bar in inputs[:4]:
292
+ text_bar.render()
293
+ with gr.Row():
294
+ for text_bar in inputs[4:]:
295
+ text_bar.render()
296
+
297
+ with gr.Row():
298
+
299
+ submit_btn = gr.Button("Generate panorama")
300
+ clear_btn = gr.Button("Clear all texts")
301
+ clear_btn.click(
302
+ clear,
303
+ outputs=inputs+[textbox1]
304
+ )
305
+ with gr.Accordion("Example expand/hide") as acc_advanced:
306
+ for i, example in enumerate(examples_advanced):
307
+ with gr.Row():
308
+ gr.Image(load_example_img('assets/advanced/img{}.png'.format(i+1)), label='example {}'.format(i+1))
309
+ with gr.Row():
310
+ gr.Textbox(type="text", label='Text 1', value=example[0])
311
+ gr.Textbox(type="text", label='Text 2', value=example[1])
312
+ gr.Textbox(type="text", label='Text 3', value=example[2])
313
+ gr.Textbox(type="text", label='Text 4', value=example[3])
314
+ with gr.Row():
315
+ gr.Textbox(type="text", label='Text 4', value=example[4])
316
+ gr.Textbox(type="text", label='Text 5', value=example[5])
317
+ gr.Textbox(type="text", label='Text 6', value=example[6])
318
+ gr.Textbox(type="text", label='Text 7', value=example[7])
319
+ with gr.Row():
320
+ load_btn=gr.Button("Load text to other text boxes")
321
+ load_btn.click(
322
+ partial(load_basic, example),
323
+ outputs=inputs
324
+ )
325
+ submit_btn.click(
326
+ partial(generate_advanced, acc_advanced),
327
+ inputs=inputs,
328
+ outputs=[acc_advanced]+outputs
329
+ )
330
+
331
+ with gr.Row():
332
+ outputs[0].render()
333
+ with gr.Row():
334
+ outputs[1].render()
335
+ outputs[2].render()
336
+ with gr.Row():
337
+ outputs[3].render()
338
+ outputs[4].render()
339
+ with gr.Row():
340
+ outputs[5].render()
341
+ outputs[6].render()
342
+ with gr.Row():
343
+ outputs[7].render()
344
+ outputs[8].render()
345
+
346
+ demo.queue(concurrency_count=3)
347
+ demo.launch(share=True)
assets/advanced/img1.png ADDED

Git LFS Details

  • SHA256: 0b6ee0af34eb5fa6bce208003dbad31301303572172346e978a6069fc45c67c8
  • Pointer size: 132 Bytes
  • Size of remote file: 1.25 MB
assets/advanced/img2.png ADDED

Git LFS Details

  • SHA256: 46b83c749171ee3105795ec7cc35ce4f37fcb2f18e1578ab8f7b41a5972ca4ed
  • Pointer size: 132 Bytes
  • Size of remote file: 1.22 MB
assets/advanced/img3.png ADDED

Git LFS Details

  • SHA256: aea4f2b59581dc012b353ed405e15bc286d24c6211e54748fbc6692be6203479
  • Pointer size: 132 Bytes
  • Size of remote file: 1.53 MB
assets/advanced/img4.png ADDED

Git LFS Details

  • SHA256: 34235863cf886241b37129a4fd3e7d24788ecef3ef29c2038211f1ddfe0cad28
  • Pointer size: 132 Bytes
  • Size of remote file: 1.27 MB
assets/advanced/img5.png ADDED

Git LFS Details

  • SHA256: 8b9b107ff5712c15114cc1dcafb3e847c8035941bb7c74111a54fbc6bfce5188
  • Pointer size: 132 Bytes
  • Size of remote file: 1.41 MB
assets/advanced/img6.png ADDED

Git LFS Details

  • SHA256: cdc8b4a173321f3a01ae7e167770bd3b67d8157310ed8055b2c984626e8589df
  • Pointer size: 132 Bytes
  • Size of remote file: 1.64 MB
assets/advanced/img7.png ADDED

Git LFS Details

  • SHA256: f399a24a822cff263cd5e310b6ef265500f0dfcdb48e92c398405b5761908446
  • Pointer size: 132 Bytes
  • Size of remote file: 1.66 MB
assets/advanced/img8.png ADDED

Git LFS Details

  • SHA256: 4541dbd422a4994b4219e2c89db08f514b406fb3f8d4c45084faeb13d7d6482e
  • Pointer size: 132 Bytes
  • Size of remote file: 1.74 MB
assets/basic/img1.png ADDED

Git LFS Details

  • SHA256: dd92c705faecc52785e9e114762747cfdc748abd770f7967ca3a97859abbc873
  • Pointer size: 132 Bytes
  • Size of remote file: 1.4 MB
assets/basic/img2.png ADDED

Git LFS Details

  • SHA256: 0a5d29c163ce3e6e8ee87c128e606b34ee911c2ff51ba112b0b801bf37f32c0c
  • Pointer size: 132 Bytes
  • Size of remote file: 1.62 MB
assets/basic/img3.png ADDED

Git LFS Details

  • SHA256: 4401231e8f735e12ff6741c4b5da2ce71e8bc9174b9c1f84770e04fad9d7cd63
  • Pointer size: 132 Bytes
  • Size of remote file: 1.39 MB
assets/basic/img4.png ADDED

Git LFS Details

  • SHA256: ca831c0265505b62a316fe5950b84dcfd83cc3a4ff92d721ea42350a01c28862
  • Pointer size: 132 Bytes
  • Size of remote file: 1.4 MB
assets/basic/img5.png ADDED

Git LFS Details

  • SHA256: 81ba340c301fd82a8fe41efd9f877d052b0999b2a62f46a6bc3250528f35bf01
  • Pointer size: 132 Bytes
  • Size of remote file: 1.94 MB
assets/basic/img6.png ADDED

Git LFS Details

  • SHA256: 928cc4876a697603597b5d9273e607cea6bd8c2d08b5c9f70e5b8e354309f845
  • Pointer size: 132 Bytes
  • Size of remote file: 1.94 MB
assets/basic/img7.png ADDED

Git LFS Details

  • SHA256: 8bfcad7363ff53c742fe13a49ecbbbcaea1142dc138a6a8020c21bcbfbce30b8
  • Pointer size: 132 Bytes
  • Size of remote file: 1.88 MB
assets/basic/img_temp2.png ADDED

Git LFS Details

  • SHA256: 8c1a083c1a06637ab7e871270e7b755082db9562ae67219533e06ed5e2b0831c
  • Pointer size: 132 Bytes
  • Size of remote file: 1.78 MB
batch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:849e5e843a9f36c91e0b79c53e8349a87655ea09ddc4ecde6df11c94e9f4023b
3
+ size 26636778
configs/inpaint.yaml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset:
2
+ num_views_low_res: 8
3
+ num_views_high_res: 1
4
+ image_root_dir: training/mp3d_skybox
5
+ fov: 90
6
+ rot_low_res: 45
7
+ resolution: 256
8
+
9
+ train:
10
+ log_dir: sd_upsampler
11
+ lr: 0.0002
12
+
13
+ test:
14
+ fuse_type: single # multidiff
15
+
16
+ model:
17
+ guidance_scale: 9.
18
+ diff_timestep: 1
19
+ low_res_noise_level: 30 # from the default SD upsampler setting
20
+ model_type: upsample
21
+ upsample_model:
22
+ model_id: stabilityai/stable-diffusion-x4-upscaler
23
+ num_coarse_cp_blocks: 5
24
+ lora_layers: True
25
+ homo_cp_attn: True
26
+ diff_timestep: 75
27
+ base_model:
28
+ diff_timestep: 50
29
+ model_id: stabilityai/stable-diffusion-2-base
30
+ lora_layers: True
31
+
configs/sd_upsampler.yaml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset:
2
+ name: mp3d
3
+ num_views_low_res: 8
4
+ num_views_high_res: 1
5
+ image_root_dir: training/mp3d_skybox
6
+ fov: 90
7
+ rot_low_res: 45
8
+ resolution: 128
9
+
10
+ train:
11
+ log_dir: sd_upsampler
12
+ lr: 0.0002
13
+
14
+ test:
15
+ fuse_type: single # multidiff
16
+
17
+ model:
18
+ guidance_scale: 9.
19
+ diff_timestep: 1
20
+ low_res_noise_level: 20 # from the default SD upsampler setting
21
+ model_type: upsample
22
+ upsample_model:
23
+ model_id: stabilityai/stable-diffusion-x4-upscaler
24
+ num_coarse_cp_blocks: 5
25
+ lora_layers: True
26
+ homo_cp_attn: True
27
+ diff_timestep: 75
28
+ base_model:
29
+ diff_timestep: 50
30
+ model_id: stabilityai/stable-diffusion-2-base
31
+ lora_layers: True
32
+
configs/sd_upsampler_temporal.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset:
2
+ name: mp3d
3
+ num_views_low_res: 8
4
+ num_views_high_res: 6
5
+ image_root_dir: training/mp3d_skybox
6
+ fov: 90
7
+ rot_low_res: 45
8
+ rot_high_res: 45
9
+ resolution: 256
10
+ resolution_high_res: 1024
11
+ crop_size_high_res: 384
12
+
13
+ train:
14
+ log_dir: sd_upsampler
15
+ lr: 0.0001
16
+
17
+ test:
18
+ fuse_type: single # multidiff
19
+
20
+ model:
21
+ guidance_scale: 9.
22
+ diff_timestep: 1
23
+ low_res_noise_level: 20 # from the default SD upsampler setting
24
+ model_type: upsample
25
+ upsample_model:
26
+ model_id: stabilityai/stable-diffusion-x4-upscaler
27
+ num_coarse_cp_blocks: 5
28
+ lora_layers: False
29
+ homo_cp_attn: True
30
+ diff_timestep: 75
31
+ multiframe_fuse: True
32
+ base_model:
33
+ diff_timestep: 50
34
+ model_id: stabilityai/stable-diffusion-2-base
35
+ lora_layers: True
36
+
configs/sd_upsampler_train.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset:
2
+ name: mp3d
3
+ num_views_low_res: 8
4
+ num_views_high_res: 1
5
+ image_root_dir: training/mp3d_skybox
6
+ fov: 90
7
+ rot_low_res: 45
8
+ rot_high_res: 10
9
+ resolution: 256
10
+ resolution_high_res: 1024
11
+ crop_size_high_res: 512
12
+
13
+ train:
14
+ log_dir: sd_upsampler
15
+ lr: 0.000005
16
+
17
+ test:
18
+ fuse_type: single # multidiff
19
+
20
+ model:
21
+ guidance_scale: 9.
22
+ diff_timestep: 1
23
+ low_res_noise_level: 20 # from the default SD upsampler setting
24
+ model_type: upsample
25
+ upsample_model:
26
+ model_id: stabilityai/stable-diffusion-x4-upscaler
27
+ num_coarse_cp_blocks: 5
28
+ lora_layers: False
29
+ homo_cp_attn: True
30
+ diff_timestep: 75
31
+ multiframe_fuse: False
32
+ base_model:
33
+ diff_timestep: 50
34
+ model_id: stabilityai/stable-diffusion-2-base
35
+ lora_layers: True
36
+
configs/train.yaml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset:
2
+ name: 'mp3d'
3
+ num_views_low_res: 1
4
+ num_views_high_res: 1
5
+ image_root_dir: training/mp3d_skybox
6
+ fov: 90
7
+ rot_low_res: 45
8
+ rot_high_res: 10
9
+ resolution: 512
10
+ resolution_high_res: 1024
11
+ crop_size_high_res: 256
12
+
13
+ train:
14
+ log_dir: high_res_upsample
15
+ lr: 0.0002
16
+
17
+ test:
18
+ fuse_type: diffcollage # multidiff
19
+
20
+ model:
21
+ model_id: stabilityai/stable-diffusion-2-base
22
+ guidance_scale: 9.
23
+ model_type: base
24
+ low_res_noise_level: 20
25
+ upsample_model:
26
+ num_coarse_cp_blocks: 5
27
+ lora_layers: True
28
+ homo_cp_attn: True
29
+ diff_timestep: 75
30
+ base_model:
31
+ model_id: stabilityai/stable-diffusion-2-base
32
+ diff_timestep: 50
33
+ lora_layers: False
34
+ single_image_ft: True
35
+
configs/train_floyd.yaml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset:
2
+ name: 'mp3d'
3
+ num_views_low_res: 12
4
+ num_views_high_res: 1
5
+ image_root_dir: training/mp3d_skybox
6
+ fov: 90
7
+ rot_low_res: 30
8
+ rot_high_res: 10
9
+ resolution: 256
10
+ crop_size_high_res: 256
11
+ train:
12
+ log_dir: high_res_upsample
13
+ lr: 0.0001
14
+
15
+ test:
16
+ fuse_type: diffcollage # multidiff
17
+
18
+ model:
19
+ guidance_scale: 9.
20
+ model_type: base
21
+ low_res_noise_level: 20
22
+ upsample_model:
23
+ num_coarse_cp_blocks: 5
24
+ lora_layers: True
25
+ homo_cp_attn: True
26
+ diff_timestep: 75
27
+ base_model:
28
+ model_id: DeepFloyd/IF-I-XL-v1.0
29
+ lora_layers: False
30
+ single_image_ft: False
31
+ diff_timestep: 50
32
+
configs/train_lora.yaml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset:
2
+ num_views_low_res: 1
3
+ num_views_high_res: 1
4
+ image_root_dir: training/mp3d_skybox
5
+ fov: 90
6
+ rot_low_res: 45
7
+ resolution: 256
8
+
9
+ train:
10
+ log_dir: high_res_upsample
11
+ lr: 0.0002
12
+
13
+ test:
14
+ fuse_type: diffcollage # multidiff
15
+
16
+ model:
17
+ model_id: stabilityai/stable-diffusion-2-base
18
+ guidance_scale: 9.
19
+ diff_timestep: 50
20
+ model_type: base
21
+ upsample_model:
22
+ num_coarse_cp_blocks: 5
23
+ lora_layers: True
24
+ homo_cp_attn: True
25
+ base_model:
26
+ lora_layers: True
27
+ single_image_ft: True
28
+
configs/train_mv.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset:
2
+ name: 'mp3d'
3
+ num_views_low_res: 8
4
+ num_views_high_res: 1
5
+ image_root_dir: training/mp3d_skybox
6
+ fov: 90
7
+ rot_low_res: 45
8
+ rot_high_res: 10
9
+ resolution: 512
10
+ resolution_high_res: 1024
11
+ crop_size_high_res: 256
12
+ train:
13
+ log_dir: high_res_upsample
14
+ lr: 0.0001
15
+
16
+ test:
17
+ fuse_type: diffcollage # multidiff
18
+
19
+ model:
20
+ guidance_scale: 9.
21
+ model_type: base
22
+ low_res_noise_level: 20
23
+ upsample_model:
24
+ num_coarse_cp_blocks: 5
25
+ lora_layers: True
26
+ homo_cp_attn: True
27
+ diff_timestep: 75
28
+ base_model:
29
+ model_id: stabilityai/stable-diffusion-2-base
30
+ lora_layers: False
31
+ single_image_ft: False
32
+ diff_timestep: 50
33
+
configs/train_mv_256.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset:
2
+ name: 'mp3d'
3
+ num_views_low_res: 2
4
+ num_views_high_res: 1
5
+ image_root_dir: training/mp3d_skybox
6
+ fov: 90
7
+ rot_low_res: 45
8
+ rot_high_res: 10
9
+ resolution: 256
10
+ resolution_high_res: 1024
11
+ crop_size_high_res: 256
12
+ train:
13
+ log_dir: high_res_upsample
14
+ lr: 0.00001
15
+
16
+ test:
17
+ fuse_type: diffcollage # multidiff
18
+
19
+ model:
20
+ guidance_scale: 9.
21
+ model_type: base
22
+ low_res_noise_level: 20
23
+ upsample_model:
24
+ num_coarse_cp_blocks: 5
25
+ lora_layers: True
26
+ homo_cp_attn: True
27
+ diff_timestep: 75
28
+ base_model:
29
+ model_id: stabilityai/stable-diffusion-2-base
30
+ lora_layers: False
31
+ single_image_ft: False
32
+ diff_timestep: 50
33
+
configs/upsample_gen_single.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset:
2
+ name: generation
3
+ num_views_low_res: 8
4
+ num_views_high_res: 8
5
+ image_root_dir: logs/tb_logs/test_mp3d_base_mv_all=1/version_1/images
6
+ resume_dir: logs/tb_logs/test_mp3d_upsample_seperate=2/version_0/images
7
+ fov: 90
8
+ rot_low_res: 45
9
+ rot_high_res: 45
10
+ resolution: 256
11
+ resolution_high_res: 1024
12
+ crop_size_high_res: 1024
13
+
14
+ train:
15
+ log_dir: sd_upsampler
16
+ lr: 0.0002
17
+
18
+ test:
19
+ fuse_type: single # multidiff
20
+
21
+ model:
22
+ guidance_scale: 9.
23
+ diff_timestep: 1
24
+ low_res_noise_level: 50 # from the default SD upsampler setting
25
+ model_type: upsample
26
+ upsample_model:
27
+ model_id: stabilityai/stable-diffusion-x4-upscaler
28
+ num_coarse_cp_blocks: 5
29
+ lora_layers: False
30
+ homo_cp_attn: True
31
+ diff_timestep: 75
32
+ multiframe_fuse: False
33
+ base_model:
34
+ diff_timestep: 30
35
+ model_id: stabilityai/stable-diffusion-2-base
36
+ lora_layers: True
37
+
configs/upsample_generation.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset:
2
+ name: generation
3
+ num_views_low_res: 8
4
+ num_views_high_res: 8
5
+ image_root_dir: logs/tb_logs/test_mp3d_base_mv_all=1/version_1/images
6
+ resume_dir: logs/tb_logs/test_mp3d_upsample_all=7/version_0/images
7
+ fov: 90
8
+ rot_low_res: 45
9
+ rot_high_res: 45
10
+ resolution: 256
11
+ resolution_high_res: 1024
12
+ crop_size_high_res: 1024
13
+
14
+ train:
15
+ log_dir: sd_upsampler
16
+ lr: 0.0002
17
+
18
+ test:
19
+ fuse_type: single # multidiff
20
+
21
+ model:
22
+ guidance_scale: 9.
23
+ diff_timestep: 1
24
+ low_res_noise_level: 1 # from the default SD upsampler setting
25
+ model_type: upsample
26
+ upsample_model:
27
+ model_id: stabilityai/stable-diffusion-x4-upscaler
28
+ num_coarse_cp_blocks: 5
29
+ lora_layers: False
30
+ homo_cp_attn: True
31
+ diff_timestep: 75
32
+ multiframe_fuse: True
33
+ base_model:
34
+ diff_timestep: 30
35
+ model_id: stabilityai/stable-diffusion-2-base
36
+ lora_layers: True
37
+
lib/Equirec2Perspec.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import cv2
4
+ import numpy as np
5
+
6
+ class Equirectangular:
7
+ def __init__(self, img_name, text2light=False):
8
+ if isinstance(img_name, str):
9
+ self._img = cv2.imread(img_name, cv2.IMREAD_COLOR)
10
+ else:
11
+ self._img = img_name
12
+ if text2light:
13
+ self._img = np.roll(self._img, -60, axis=0)
14
+
15
+ [self._height, self._width, _] = self._img.shape
16
+
17
+
18
+ def GetPerspective(self, FOV, THETA, PHI, height, width):
19
+ #
20
+ # THETA is left/right angle, PHI is up/down angle, both in degree
21
+ #
22
+
23
+ equ_h = self._height
24
+ equ_w = self._width
25
+ equ_cx = (equ_w - 1) / 2.0
26
+ equ_cy = (equ_h - 1) / 2.0
27
+
28
+ wFOV = FOV
29
+ hFOV = float(height) / width * wFOV
30
+
31
+ w_len = np.tan(np.radians(wFOV / 2.0))
32
+ h_len = np.tan(np.radians(hFOV / 2.0))
33
+
34
+
35
+ x_map = np.ones([height, width], np.float32)
36
+ y_map = np.tile(np.linspace(-w_len, w_len,width), [height,1])
37
+ z_map = -np.tile(np.linspace(-h_len, h_len,height), [width,1]).T
38
+
39
+ D = np.sqrt(x_map**2 + y_map**2 + z_map**2)
40
+ xyz = np.stack((x_map,y_map,z_map),axis=2)/np.repeat(D[:, :, np.newaxis], 3, axis=2)
41
+
42
+ y_axis = np.array([0.0, 1.0, 0.0], np.float32)
43
+ z_axis = np.array([0.0, 0.0, 1.0], np.float32)
44
+ [R1, _] = cv2.Rodrigues(z_axis * np.radians(THETA))
45
+ [R2, _] = cv2.Rodrigues(np.dot(R1, y_axis) * np.radians(-PHI))
46
+
47
+ xyz = xyz.reshape([height * width, 3]).T
48
+ xyz = np.dot(R1, xyz)
49
+ xyz = np.dot(R2, xyz).T
50
+ lat = np.arcsin(xyz[:, 2])
51
+ lon = np.arctan2(xyz[:, 1] , xyz[:, 0])
52
+
53
+ lon = lon.reshape([height, width]) / np.pi * 180
54
+ lat = -lat.reshape([height, width]) / np.pi * 180
55
+
56
+ lon = lon / 180 * equ_cx + equ_cx
57
+ lat = lat / 90 * equ_cy + equ_cy
58
+
59
+
60
+
61
+ persp = cv2.remap(self._img, lon.astype(np.float32), lat.astype(np.float32), cv2.INTER_CUBIC, borderMode=cv2.BORDER_WRAP)
62
+ return persp
63
+
64
+
65
+
66
+
67
+
68
+
69
+
lib/Perspec2Equirec.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import cv2
4
+ import numpy as np
5
+
6
+ class Perspective:
7
+ def __init__(self, img_name , FOV, THETA, PHI ):
8
+ if isinstance(img_name, str):
9
+ self._img = cv2.imread(img_name, cv2.IMREAD_COLOR)
10
+ else:
11
+ self._img = img_name
12
+ [self._height, self._width, _] = self._img.shape
13
+ self.wFOV = FOV
14
+ self.THETA = THETA
15
+ self.PHI = PHI
16
+ self.hFOV = float(self._height) / self._width * FOV
17
+
18
+ self.w_len = np.tan(np.radians(self.wFOV / 2.0))
19
+ self.h_len = np.tan(np.radians(self.hFOV / 2.0))
20
+
21
+
22
+
23
+ def GetEquirec(self,height,width):
24
+ #
25
+ # THETA is left/right angle, PHI is up/down angle, both in degree
26
+ #
27
+
28
+ x,y = np.meshgrid(np.linspace(-180, 180,width),np.linspace(90,-90,height))
29
+
30
+ x_map = np.cos(np.radians(x)) * np.cos(np.radians(y))
31
+ y_map = np.sin(np.radians(x)) * np.cos(np.radians(y))
32
+ z_map = np.sin(np.radians(y))
33
+
34
+ xyz = np.stack((x_map,y_map,z_map),axis=2)
35
+
36
+ y_axis = np.array([0.0, 1.0, 0.0], np.float32)
37
+ z_axis = np.array([0.0, 0.0, 1.0], np.float32)
38
+ [R1, _] = cv2.Rodrigues(z_axis * np.radians(self.THETA))
39
+ [R2, _] = cv2.Rodrigues(np.dot(R1, y_axis) * np.radians(-self.PHI))
40
+
41
+ R1 = np.linalg.inv(R1)
42
+ R2 = np.linalg.inv(R2)
43
+
44
+ xyz = xyz.reshape([height * width, 3]).T
45
+ xyz = np.dot(R2, xyz)
46
+ xyz = np.dot(R1, xyz).T
47
+
48
+ xyz = xyz.reshape([height , width, 3])
49
+ inverse_mask = np.where(xyz[:,:,0]>0,1,0)
50
+
51
+ xyz[:,:] = xyz[:,:]/np.repeat(xyz[:,:,0][:, :, np.newaxis], 3, axis=2)
52
+
53
+
54
+ lon_map = np.where((-self.w_len<xyz[:,:,1])&(xyz[:,:,1]<self.w_len)&(-self.h_len<xyz[:,:,2])
55
+ &(xyz[:,:,2]<self.h_len),(xyz[:,:,1]+self.w_len)/2/self.w_len*self._width,0)
56
+ lat_map = np.where((-self.w_len<xyz[:,:,1])&(xyz[:,:,1]<self.w_len)&(-self.h_len<xyz[:,:,2])
57
+ &(xyz[:,:,2]<self.h_len),(-xyz[:,:,2]+self.h_len)/2/self.h_len*self._height,0)
58
+ mask = np.where((-self.w_len<xyz[:,:,1])&(xyz[:,:,1]<self.w_len)&(-self.h_len<xyz[:,:,2])
59
+ &(xyz[:,:,2]<self.h_len),1,0)
60
+
61
+ persp = cv2.remap(self._img, lon_map.astype(np.float32), lat_map.astype(np.float32), cv2.INTER_CUBIC, borderMode=cv2.BORDER_WRAP)
62
+
63
+ mask = mask * inverse_mask
64
+ mask = np.repeat(mask[:, :, np.newaxis], 3, axis=2)
65
+ persp = persp * mask
66
+
67
+
68
+ return persp , mask
69
+
70
+
71
+
72
+
73
+
74
+
75
+
lib/multi_Perspec2Equirec.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import cv2
4
+ import numpy as np
5
+ import lib.Perspec2Equirec as P2E
6
+
7
+
8
+ class Perspective:
9
+ def __init__(self, img_array , F_T_P_array ):
10
+
11
+ assert len(img_array)==len(F_T_P_array)
12
+
13
+ self.img_array = img_array
14
+ self.F_T_P_array = F_T_P_array
15
+
16
+
17
+ def GetEquirec(self,height,width):
18
+ #
19
+ # THETA is left/right angle, PHI is up/down angle, both in degree
20
+ #
21
+ merge_image = np.zeros((height,width,3))
22
+ merge_mask = np.zeros((height,width,3))
23
+
24
+
25
+ for img_dir,[F,T,P] in zip (self.img_array,self.F_T_P_array):
26
+ per = P2E.Perspective(img_dir,F,T,P) # Load equirectangular image
27
+ img , mask = per.GetEquirec(height,width) # Specify parameters(FOV, theta, phi, height, width)
28
+ mask = mask.astype(np.float32)
29
+ img = img.astype(np.float32)
30
+
31
+ weight_mask = np.zeros((img_dir.shape[0],img_dir.shape[1], 3))
32
+ w = img_dir.shape[1]
33
+ weight_mask[:,0:w//2,:] = np.linspace(0,1,w//2)[...,None]
34
+ weight_mask[:,w//2:,:] = np.linspace(1,0,w//2)[...,None]
35
+ weight_mask = P2E.Perspective(weight_mask,F,T,P)
36
+ weight_mask, _ = weight_mask.GetEquirec(height,width)
37
+
38
+
39
+ blur = cv2.blur(mask,(5,5))
40
+ blur = blur * mask
41
+ mask = (blur == 1) * blur + (blur != 1) * blur * 0.05
42
+
43
+ merge_image += img * weight_mask
44
+ merge_mask += weight_mask
45
+
46
+ merge_image[merge_mask==0] = 255.
47
+ merge_mask = np.where(merge_mask==0,1,merge_mask)
48
+ merge_image = (np.divide(merge_image,merge_mask))
49
+
50
+
51
+ return merge_image
52
+
53
+
54
+
55
+
56
+
57
+
lib/old_Perspec2Equirec.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import cv2
4
+ import numpy as np
5
+
6
+ class Perspective:
7
+ def __init__(self, img_name , FOV, THETA, PHI ):
8
+ self._img = cv2.imread(img_name, cv2.IMREAD_COLOR)
9
+ [self._height, self._width, _] = self._img.shape
10
+ self.FOV = FOV
11
+ self.THETA = THETA
12
+ self.PHI = PHI
13
+
14
+
15
+ def GetEquirec(self,height,width):
16
+ #
17
+ # THETA is left/right angle, PHI is up/down angle, both in degree
18
+ #
19
+
20
+ equ_h = height
21
+ equ_w = width
22
+ equ_cx = (equ_w - 1) / 2.0
23
+ equ_cy = (equ_h - 1) / 2.0
24
+
25
+ wFOV = self.FOV
26
+ hFOV = float(self._height) / self._width * wFOV
27
+
28
+ w_len = np.tan(np.radians(wFOV / 2.0))
29
+ h_len = np.tan(np.radians(hFOV / 2.0))
30
+
31
+
32
+ x_map = np.ones([self._height, self._width], np.float32)
33
+ y_map = np.tile(np.linspace(-w_len, w_len,self._width), [self._height,1])
34
+ z_map = -np.tile(np.linspace(-h_len, h_len,self._height), [self._width,1]).T
35
+
36
+ print(z_map[0])
37
+
38
+ D = np.sqrt(x_map**2 + y_map**2 + z_map**2)
39
+ xyz = np.stack((x_map,y_map,z_map),axis=2)/np.repeat(D[:, :, np.newaxis], 3, axis=2)
40
+ print(xyz[0,:,2])
41
+
42
+ y_axis = np.array([0.0, 1.0, 0.0], np.float32)
43
+ z_axis = np.array([0.0, 0.0, 1.0], np.float32)
44
+ [R1, _] = cv2.Rodrigues(z_axis * np.radians(self.THETA))
45
+ [R2, _] = cv2.Rodrigues(np.dot(R1, y_axis) * np.radians(-self.PHI))
46
+
47
+ xyz = xyz.reshape([self._height * self._width, 3]).T
48
+ xyz = np.dot(R1, xyz)
49
+ xyz = np.dot(R2, xyz).T
50
+ lat = np.arcsin(xyz[:, 2])
51
+ lon = np.arctan2(xyz[:, 1] , xyz[:, 0])
52
+
53
+ lon = lon / np.pi * 180
54
+ lat = -lat / np.pi * 180
55
+
56
+ print(lat.reshape([self._height , self._width])[0])
57
+ print(lon.reshape([self._height , self._width])[0])
58
+
59
+ lon = (lon / 180 * equ_cx + equ_cx).astype(np.int)
60
+ lat = (lat / 90 * equ_cy + equ_cy).astype(np.int)
61
+ coordinate = (lat,lon)
62
+
63
+ x_map = np.repeat(np.arange(self._height), self._width)
64
+ y_map = np.tile(np.arange(self._width), self._height)
65
+
66
+ blank_map_x = np.zeros((height,width))
67
+ blank_map_y = np.zeros((height,width))
68
+ mask = np.zeros((height,width,3))
69
+
70
+ blank_map_x[coordinate] = x_map
71
+ blank_map_y[coordinate] = y_map
72
+ mask[coordinate] = [1,1,1]
73
+
74
+ # print(lat.reshape([self._height, self._width]))
75
+ # print(lon.reshape([self._height, self._width])[-1,1910:1930])
76
+
77
+
78
+ persp = cv2.remap(self._img, blank_map_y.astype(np.float32), blank_map_x.astype(np.float32), cv2.INTER_CUBIC, borderMode=cv2.BORDER_WRAP)
79
+
80
+ persp = persp * mask
81
+
82
+ return persp , mask
83
+
84
+
85
+
86
+
87
+
88
+
89
+
model.cpython-39-x86_64-linux-gnu.so ADDED
Binary file (176 kB). View file
 
model.pyc ADDED
Binary file (4.19 kB). View file
 
null_prompt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c755f0261a4b055e75f80e44746f6c8db62113fa2181f90f6c13bff08b405539
3
+ size 316139
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ torch==2.0.1
2
+ opencv-python
3
+ transformers
4
+ diffusers
5
+ openai
6
+ einops
weights/last.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08b9ec605c2e962adcb3dfe95c4c89d977bbaaccfc2ff2a0888dbcdfdea2dd82
3
+ size 10012818135