Spaces:
Runtime error
Runtime error
add inversion
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +2 -0
- interface/app.py +37 -4
- interface/examples/benedict.jpg +0 -0
- interface/examples/me.jpg +0 -0
- interface/examples/obama.jpg +0 -0
- interface/inversion.py +65 -0
- interface/model_loader.py +3 -3
- interface/pixel2style2pixel/LICENSE +21 -0
- interface/pixel2style2pixel/README.md +517 -0
- interface/pixel2style2pixel/cog.yaml +32 -0
- interface/pixel2style2pixel/configs/__init__.py +0 -0
- interface/pixel2style2pixel/configs/data_configs.py +41 -0
- interface/pixel2style2pixel/configs/paths_config.py +20 -0
- interface/pixel2style2pixel/configs/transforms_config.py +154 -0
- interface/pixel2style2pixel/criteria/__init__.py +0 -0
- interface/pixel2style2pixel/criteria/id_loss.py +44 -0
- interface/pixel2style2pixel/criteria/lpips/__init__.py +0 -0
- interface/pixel2style2pixel/criteria/lpips/lpips.py +35 -0
- interface/pixel2style2pixel/criteria/lpips/networks.py +96 -0
- interface/pixel2style2pixel/criteria/lpips/utils.py +30 -0
- interface/pixel2style2pixel/criteria/moco_loss.py +69 -0
- interface/pixel2style2pixel/criteria/w_norm.py +14 -0
- interface/pixel2style2pixel/datasets/__init__.py +0 -0
- interface/pixel2style2pixel/datasets/augmentations.py +110 -0
- interface/pixel2style2pixel/datasets/gt_res_dataset.py +32 -0
- interface/pixel2style2pixel/datasets/images_dataset.py +33 -0
- interface/pixel2style2pixel/datasets/inference_dataset.py +22 -0
- interface/pixel2style2pixel/docs/encoding_inputs.jpg +0 -0
- interface/pixel2style2pixel/docs/encoding_outputs.jpg +0 -0
- interface/pixel2style2pixel/docs/frontalization_inputs.jpg +0 -0
- interface/pixel2style2pixel/docs/frontalization_outputs.jpg +0 -0
- interface/pixel2style2pixel/docs/seg2image.png +3 -0
- interface/pixel2style2pixel/docs/sketch2image.png +3 -0
- interface/pixel2style2pixel/docs/super_res_32.jpg +0 -0
- interface/pixel2style2pixel/docs/super_res_style_mixing.jpg +0 -0
- interface/pixel2style2pixel/docs/teaser.png +3 -0
- interface/pixel2style2pixel/docs/toonify_input.jpg +0 -0
- interface/pixel2style2pixel/docs/toonify_output.jpg +0 -0
- interface/pixel2style2pixel/download-weights.sh +12 -0
- interface/pixel2style2pixel/environment/psp_env.yaml +37 -0
- interface/pixel2style2pixel/licenses/LICENSE_HuangYG123 +21 -0
- interface/pixel2style2pixel/licenses/LICENSE_S-aiueo32 +25 -0
- interface/pixel2style2pixel/licenses/LICENSE_TreB1eN +21 -0
- interface/pixel2style2pixel/licenses/LICENSE_lessw2020 +201 -0
- interface/pixel2style2pixel/licenses/LICENSE_rosinality +21 -0
- interface/pixel2style2pixel/models/__init__.py +0 -0
- interface/pixel2style2pixel/models/encoders/__init__.py +0 -0
- interface/pixel2style2pixel/models/encoders/helpers.py +119 -0
- interface/pixel2style2pixel/models/encoders/model_irse.py +84 -0
- interface/pixel2style2pixel/models/encoders/psp_encoders.py +186 -0
.gitattributes
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
interface/app.py
CHANGED
@@ -4,6 +4,7 @@ import sys
|
|
4 |
sys.path.append(".")
|
5 |
sys.path.append("..")
|
6 |
from model_loader import Model
|
|
|
7 |
from PIL import Image
|
8 |
import cv2
|
9 |
from huggingface_hub import snapshot_download
|
@@ -25,7 +26,10 @@ models_files = {
|
|
25 |
}
|
26 |
|
27 |
models = {name: Model(models_path + "/" + path) for name, path in models_files.items()}
|
28 |
-
|
|
|
|
|
|
|
29 |
|
30 |
canvas_html = """<draggan-canvas id="canvas-root" style='display:flex;max-width: 500px;margin: 0 auto;'></draggan-canvas>"""
|
31 |
load_js = """
|
@@ -68,6 +72,13 @@ def random_sample(model_name: str):
|
|
68 |
return img_pil, model_name, latents
|
69 |
|
70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
def transform(model_state, latents_state, dxdysxsy=default_dxdysxsy, dz=0):
|
72 |
if "w1" not in latents_state or "w1_initial" not in latents_state:
|
73 |
raise gr.Error("Generate a random sample first")
|
@@ -107,7 +118,7 @@ def image_click(evt: gr.SelectData):
|
|
107 |
|
108 |
|
109 |
with gr.Blocks() as block:
|
110 |
-
model_state = gr.State(value="
|
111 |
latents_state = gr.State({})
|
112 |
gr.Markdown(
|
113 |
"""# UserControllableLT: User Controllable Latent Transformer
|
@@ -128,7 +139,7 @@ Double click to add or remove stop points.
|
|
128 |
model_name = gr.Dropdown(
|
129 |
choices=list(models_files.keys()),
|
130 |
label="Select Pretrained Model",
|
131 |
-
value="
|
132 |
)
|
133 |
with gr.Row():
|
134 |
button = gr.Button("Random sample")
|
@@ -144,7 +155,23 @@ Double click to add or remove stop points.
|
|
144 |
minimum=-15, maximum=15, step_size=0.01, label="zoom", value=0.0
|
145 |
)
|
146 |
image = gr.Image(type="pil", visible=False, preprocess=False)
|
147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
148 |
with gr.Column():
|
149 |
html = gr.HTML(canvas_html, label="output")
|
150 |
|
@@ -176,6 +203,12 @@ Double click to add or remove stop points.
|
|
176 |
show_progress=False,
|
177 |
)
|
178 |
image.change(None, inputs=[image], outputs=None, _js=image_change)
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
block.load(None, None, None, _js=load_js)
|
180 |
block.load(
|
181 |
random_sample, inputs=[model_name], outputs=[image, model_state, latents_state]
|
|
|
4 |
sys.path.append(".")
|
5 |
sys.path.append("..")
|
6 |
from model_loader import Model
|
7 |
+
from inversion import InversionModel
|
8 |
from PIL import Image
|
9 |
import cv2
|
10 |
from huggingface_hub import snapshot_download
|
|
|
26 |
}
|
27 |
|
28 |
models = {name: Model(models_path + "/" + path) for name, path in models_files.items()}
|
29 |
+
inversion_model = InversionModel(
|
30 |
+
models_path + "/psp_ffhq_encode.pt",
|
31 |
+
models_path + "/shape_predictor_68_face_landmarks.dat",
|
32 |
+
)
|
33 |
|
34 |
canvas_html = """<draggan-canvas id="canvas-root" style='display:flex;max-width: 500px;margin: 0 auto;'></draggan-canvas>"""
|
35 |
load_js = """
|
|
|
72 |
return img_pil, model_name, latents
|
73 |
|
74 |
|
75 |
+
def load_from_img_file(image_path: str):
|
76 |
+
img_pil, latents = inversion_model.inference(image_path)
|
77 |
+
if RESIZE:
|
78 |
+
img_pil = img_pil.resize((128, 128))
|
79 |
+
return img_pil, "ffhq", latents
|
80 |
+
|
81 |
+
|
82 |
def transform(model_state, latents_state, dxdysxsy=default_dxdysxsy, dz=0):
|
83 |
if "w1" not in latents_state or "w1_initial" not in latents_state:
|
84 |
raise gr.Error("Generate a random sample first")
|
|
|
118 |
|
119 |
|
120 |
with gr.Blocks() as block:
|
121 |
+
model_state = gr.State(value="ffhq")
|
122 |
latents_state = gr.State({})
|
123 |
gr.Markdown(
|
124 |
"""# UserControllableLT: User Controllable Latent Transformer
|
|
|
139 |
model_name = gr.Dropdown(
|
140 |
choices=list(models_files.keys()),
|
141 |
label="Select Pretrained Model",
|
142 |
+
value="ffhq",
|
143 |
)
|
144 |
with gr.Row():
|
145 |
button = gr.Button("Random sample")
|
|
|
155 |
minimum=-15, maximum=15, step_size=0.01, label="zoom", value=0.0
|
156 |
)
|
157 |
image = gr.Image(type="pil", visible=False, preprocess=False)
|
158 |
+
with gr.Accordion(label="Upload your face image", open=False):
|
159 |
+
gr.Markdown("<small> This only works on FFHQ model </small>")
|
160 |
+
with gr.Row():
|
161 |
+
image_path = gr.Image(
|
162 |
+
type="filepath", label="input image", interactive=True
|
163 |
+
)
|
164 |
+
examples = gr.Examples(
|
165 |
+
examples=[
|
166 |
+
"interface/examples/benedict.jpg",
|
167 |
+
"interface/examples/obama.jpg",
|
168 |
+
"interface/examples/me.jpg",
|
169 |
+
],
|
170 |
+
fn=load_from_img_file,
|
171 |
+
run_on_click=True,
|
172 |
+
inputs=[image_path],
|
173 |
+
outputs=[image, model_state, latents_state],
|
174 |
+
)
|
175 |
with gr.Column():
|
176 |
html = gr.HTML(canvas_html, label="output")
|
177 |
|
|
|
203 |
show_progress=False,
|
204 |
)
|
205 |
image.change(None, inputs=[image], outputs=None, _js=image_change)
|
206 |
+
image_path.upload(
|
207 |
+
load_from_img_file,
|
208 |
+
inputs=[image_path],
|
209 |
+
outputs=[image, model_state, latents_state],
|
210 |
+
)
|
211 |
+
|
212 |
block.load(None, None, None, _js=load_js)
|
213 |
block.load(
|
214 |
random_sample, inputs=[model_name], outputs=[image, model_state, latents_state]
|
interface/examples/benedict.jpg
ADDED
interface/examples/me.jpg
ADDED
interface/examples/obama.jpg
ADDED
interface/inversion.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from argparse import Namespace
|
2 |
+
import time
|
3 |
+
import torch
|
4 |
+
import torchvision.transforms as transforms
|
5 |
+
import dlib
|
6 |
+
import numpy as np
|
7 |
+
from PIL import Image
|
8 |
+
|
9 |
+
from pixel2style2pixel.utils.common import tensor2im
|
10 |
+
from pixel2style2pixel.models.psp import pSp
|
11 |
+
from pixel2style2pixel.scripts.align_all_parallel import align_face
|
12 |
+
|
13 |
+
|
14 |
+
class InversionModel:
|
15 |
+
def __init__(self, checkpoint_path: str, dlib_path: str) -> None:
|
16 |
+
self.dlib_path = dlib_path
|
17 |
+
self.dlib_predictor = dlib.shape_predictor(dlib_path)
|
18 |
+
|
19 |
+
self.tranform_image = transforms.Compose(
|
20 |
+
[
|
21 |
+
transforms.Resize((256, 256)),
|
22 |
+
transforms.ToTensor(),
|
23 |
+
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
|
24 |
+
]
|
25 |
+
)
|
26 |
+
ckpt = torch.load(checkpoint_path, map_location="cpu")
|
27 |
+
opts = ckpt["opts"]
|
28 |
+
opts["checkpoint_path"] = checkpoint_path
|
29 |
+
opts["learn_in_w"] = False
|
30 |
+
opts["output_size"] = 1024
|
31 |
+
|
32 |
+
self.opts = Namespace(**opts)
|
33 |
+
self.net = pSp(self.opts)
|
34 |
+
self.net.eval()
|
35 |
+
self.net.cuda()
|
36 |
+
print("Model successfully loaded!")
|
37 |
+
|
38 |
+
def run_alignment(self, image_path: str):
|
39 |
+
aligned_image = align_face(filepath=image_path, predictor=self.dlib_predictor)
|
40 |
+
print("Aligned image has shape: {}".format(aligned_image.size))
|
41 |
+
return aligned_image
|
42 |
+
|
43 |
+
def inference(self, image_path: str):
|
44 |
+
input_image = self.run_alignment(image_path)
|
45 |
+
input_image = input_image.resize((256, 256))
|
46 |
+
transformed_image = self.tranform_image(input_image)
|
47 |
+
|
48 |
+
with torch.no_grad():
|
49 |
+
tic = time.time()
|
50 |
+
result_image, latents = self.net(
|
51 |
+
transformed_image.unsqueeze(0).to("cuda").float(),
|
52 |
+
return_latents=True,
|
53 |
+
randomize_noise=False,
|
54 |
+
)
|
55 |
+
toc = time.time()
|
56 |
+
print("Inference took {:.4f} seconds.".format(toc - tic))
|
57 |
+
|
58 |
+
res_image = tensor2im(result_image[0])
|
59 |
+
return (
|
60 |
+
res_image,
|
61 |
+
{
|
62 |
+
"w1": latents.cpu().detach().numpy(),
|
63 |
+
"w1_initial": latents.cpu().detach().numpy(),
|
64 |
+
},
|
65 |
+
)
|
interface/model_loader.py
CHANGED
@@ -12,7 +12,7 @@ class Model:
|
|
12 |
):
|
13 |
self.truncation = truncation
|
14 |
self.use_average_code_as_input = use_average_code_as_input
|
15 |
-
ckpt = torch.load(checkpoint_path, map_location="
|
16 |
opts = ckpt["opts"]
|
17 |
opts["checkpoint_path"] = checkpoint_path
|
18 |
self.opts = Namespace(**ckpt["opts"])
|
@@ -84,7 +84,7 @@ class Model:
|
|
84 |
|
85 |
dxyz = np.array([dxy[0], dxy[1], dz], dtype=np.float32)
|
86 |
dxy_norm = np.linalg.norm(dxyz[:2], ord=2)
|
87 |
-
epsilon = 1e-8
|
88 |
dxy_norm = dxy_norm + epsilon
|
89 |
dxyz[:2] = dxyz[:2] / dxy_norm
|
90 |
vec_num = dxy_norm / 10
|
@@ -166,7 +166,7 @@ class Model:
|
|
166 |
result,
|
167 |
{
|
168 |
"w1": w1_new.cpu().detach().numpy(),
|
169 |
-
"w1_initial":
|
170 |
},
|
171 |
)
|
172 |
|
|
|
12 |
):
|
13 |
self.truncation = truncation
|
14 |
self.use_average_code_as_input = use_average_code_as_input
|
15 |
+
ckpt = torch.load(checkpoint_path, map_location="cpu")
|
16 |
opts = ckpt["opts"]
|
17 |
opts["checkpoint_path"] = checkpoint_path
|
18 |
self.opts = Namespace(**ckpt["opts"])
|
|
|
84 |
|
85 |
dxyz = np.array([dxy[0], dxy[1], dz], dtype=np.float32)
|
86 |
dxy_norm = np.linalg.norm(dxyz[:2], ord=2)
|
87 |
+
epsilon = 1e-8
|
88 |
dxy_norm = dxy_norm + epsilon
|
89 |
dxyz[:2] = dxyz[:2] / dxy_norm
|
90 |
vec_num = dxy_norm / 10
|
|
|
166 |
result,
|
167 |
{
|
168 |
"w1": w1_new.cpu().detach().numpy(),
|
169 |
+
"w1_initial": w1_initial.cpu().detach().numpy(),
|
170 |
},
|
171 |
)
|
172 |
|
interface/pixel2style2pixel/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2020 Elad Richardson, Yuval Alaluf
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
interface/pixel2style2pixel/README.md
ADDED
@@ -0,0 +1,517 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Encoding in Style: a StyleGAN Encoder for Image-to-Image Translation
|
2 |
+
<a href="https://arxiv.org/abs/2008.00951"><img src="https://img.shields.io/badge/arXiv-2008.00951-b31b1b.svg" height=22.5></a>
|
3 |
+
<a href="https://opensource.org/licenses/MIT"><img src="https://img.shields.io/badge/License-MIT-yellow.svg" height=22.5></a>
|
4 |
+
|
5 |
+
<a href="https://www.youtube.com/watch?v=bfvSwhqsTgM"><img src="https://img.shields.io/static/v1?label=CVPR 2021&message=5 Minute Video&color=red" height=22.5></a>
|
6 |
+
<a href="https://replicate.ai/eladrich/pixel2style2pixel"><img src="https://img.shields.io/static/v1?label=Replicate&message=Demo and Docker Image&color=darkgreen" height=22.5></a>
|
7 |
+
|
8 |
+
<a href="http://colab.research.google.com/github/eladrich/pixel2style2pixel/blob/master/notebooks/inference_playground.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" height=22.5></a>
|
9 |
+
|
10 |
+
> We present a generic image-to-image translation framework, pixel2style2pixel (pSp).
|
11 |
+
Our pSp framework is based on a novel encoder network that directly generates a series of style vectors which are fed into a pretrained StyleGAN generator,
|
12 |
+
forming the extended W+ latent space. We first show that our encoder can directly embed real images into W+, with no additional optimization.
|
13 |
+
Next, we propose utilizing our encoder to directly solve image-to-image translation tasks, defining them as encoding problems from some input domain into the
|
14 |
+
latent domain. By deviating from the standard "invert first, edit later" methodology used with previous StyleGAN encoders, our approach can handle a variety of
|
15 |
+
tasks even when the input image is not represented in the StyleGAN domain. We show that solving translation tasks through StyleGAN significantly simplifies the training process, as no adversary is required, has better support
|
16 |
+
>for solving tasks without pixel-to-pixel correspondence, and inherently supports multi-modal synthesis via the resampling of styles.
|
17 |
+
Finally, we demonstrate the potential of our framework on a variety of facial image-to-image translation tasks, even when compared to state-of-the-art solutions designed specifically for a single task, and further show that it can be extended beyond the human facial domain.
|
18 |
+
|
19 |
+
<p align="center">
|
20 |
+
<img src="docs/teaser.png" width="800px"/>
|
21 |
+
<br>
|
22 |
+
The proposed pixel2style2pixel framework can be used to solve a wide variety of image-to-image translation tasks. Here we show results of pSp on StyleGAN inversion, multi-modal conditional image synthesis, facial frontalization, inpainting and super-resolution.
|
23 |
+
</p>
|
24 |
+
|
25 |
+
## Description
|
26 |
+
Official Implementation of our pSp paper for both training and evaluation. The pSp method extends the StyleGAN model to
|
27 |
+
allow solving different image-to-image translation problems using its encoder.
|
28 |
+
|
29 |
+
## Table of Contents
|
30 |
+
* [Description](#description)
|
31 |
+
* [Table of Contents](#table-of-contents)
|
32 |
+
* [Recent Updates](#recent-updates)
|
33 |
+
* [Applications](#applications)
|
34 |
+
+ [StyleGAN Encoding](#stylegan-encoding)
|
35 |
+
+ [Face Frontalization](#face-frontalization)
|
36 |
+
+ [Conditional Image Synthesis](#conditional-image-synthesis)
|
37 |
+
+ [Super Resolution](#super-resolution)
|
38 |
+
* [Getting Started](#getting-started)
|
39 |
+
+ [Prerequisites](#prerequisites)
|
40 |
+
+ [Installation](#installation)
|
41 |
+
+ [Inference Notebook](#inference-notebook)
|
42 |
+
+ [Pretrained Models](#pretrained-models)
|
43 |
+
* [Training](#training)
|
44 |
+
+ [Preparing your Data](#preparing-your-data)
|
45 |
+
+ [Training pSp](#training-psp)
|
46 |
+
- [Training the pSp Encoder](#training-the-psp-encoder)
|
47 |
+
- [Frontalization](#frontalization)
|
48 |
+
- [Sketch to Face](#sketch-to-face)
|
49 |
+
- [Segmentation Map to Face](#segmentation-map-to-face)
|
50 |
+
- [Super Resolution](#super-resolution-1)
|
51 |
+
+ [Additional Notes](#additional-notes)
|
52 |
+
+ [Weights & Biases Integration](#weights--biases-integration)
|
53 |
+
* [Testing](#testing)
|
54 |
+
+ [Inference](#inference)
|
55 |
+
+ [Multi-Modal Synthesis with Style-Mixing](#multi-modal-synthesis-with-style-mixing)
|
56 |
+
+ [Computing Metrics](#computing-metrics)
|
57 |
+
* [Additional Applications](#additional-applications)
|
58 |
+
+ [Toonify](#toonify)
|
59 |
+
* [Repository structure](#repository-structure)
|
60 |
+
* [TODOs](#todos)
|
61 |
+
* [Credits](#credits)
|
62 |
+
* [Inspired by pSp](#inspired-by-psp)
|
63 |
+
* [pSp in the Media](#psp-in-the-media)
|
64 |
+
* [Citation](#citation)
|
65 |
+
|
66 |
+
## Recent Updates
|
67 |
+
**`2020.10.04`**: Initial code release
|
68 |
+
**`2020.10.06`**: Add pSp toonify model (Thanks to the great work from [Doron Adler](https://linktr.ee/Norod78) and [Justin Pinkney](https://www.justinpinkney.com/))!
|
69 |
+
**`2021.04.23`**: Added several new features:
|
70 |
+
- Added supported for StyleGANs of different resolutions (e.g., 256, 512, 1024). This can be set using the flag `--output_size`, which is set to 1024 by default.
|
71 |
+
- Added support for the MoCo-Based similarity loss introduced in [encoder4editing (Tov et al. 2021)](https://github.com/omertov/encoder4editing). More details are provided [below](https://github.com/eladrich/pixel2style2pixel#training-psp).
|
72 |
+
|
73 |
+
**`2021.07.06`**: Added support for training with Weights & Biases. [See below for details](https://github.com/eladrich/pixel2style2pixel#weights--biases-integration).
|
74 |
+
|
75 |
+
## Applications
|
76 |
+
### StyleGAN Encoding
|
77 |
+
Here, we use pSp to find the latent code of real images in the latent domain of a pretrained StyleGAN generator.
|
78 |
+
<p align="center">
|
79 |
+
<img src="docs/encoding_inputs.jpg" width="800px"/>
|
80 |
+
<img src="docs/encoding_outputs.jpg" width="800px"/>
|
81 |
+
</p>
|
82 |
+
|
83 |
+
|
84 |
+
### Face Frontalization
|
85 |
+
In this application we want to generate a front-facing face from a given input image.
|
86 |
+
<p align="center">
|
87 |
+
<img src="docs/frontalization_inputs.jpg" width="800px"/>
|
88 |
+
<img src="docs/frontalization_outputs.jpg" width="800px"/>
|
89 |
+
</p>
|
90 |
+
|
91 |
+
### Conditional Image Synthesis
|
92 |
+
Here we wish to generate photo-realistic face images from ambiguous sketch images or segmentation maps. Using style-mixing, we inherently support multi-modal synthesis for a single input.
|
93 |
+
<p align="center">
|
94 |
+
<img src="docs/seg2image.png" width="800px"/>
|
95 |
+
<img src="docs/sketch2image.png" width="800px"/>
|
96 |
+
</p>
|
97 |
+
|
98 |
+
### Super Resolution
|
99 |
+
Given a low-resolution input image, we generate a corresponding high-resolution image. As this too is an ambiguous task, we can use style-mixing to produce several plausible results.
|
100 |
+
<p align="center">
|
101 |
+
<img src="docs/super_res_32.jpg" width="800px"/>
|
102 |
+
<img src="docs/super_res_style_mixing.jpg" width="800px"/>
|
103 |
+
</p>
|
104 |
+
|
105 |
+
|
106 |
+
## Getting Started
|
107 |
+
### Prerequisites
|
108 |
+
- Linux or macOS
|
109 |
+
- NVIDIA GPU + CUDA CuDNN (CPU may be possible with some modifications, but is not inherently supported)
|
110 |
+
- Python 2 or 3
|
111 |
+
|
112 |
+
### Installation
|
113 |
+
- Clone this repo:
|
114 |
+
```
|
115 |
+
git clone https://github.com/eladrich/pixel2style2pixel.git
|
116 |
+
cd pixel2style2pixel
|
117 |
+
```
|
118 |
+
- Dependencies:
|
119 |
+
We recommend running this repository using [Anaconda](https://docs.anaconda.com/anaconda/install/).
|
120 |
+
All dependencies for defining the environment are provided in `environment/psp_env.yaml`.
|
121 |
+
|
122 |
+
### Inference Notebook
|
123 |
+
To help visualize the pSp framework on multiple tasks and to help you get started, we provide a Jupyter notebook found in `notebooks/inference_playground.ipynb` that allows one to visualize the various applications of pSp.
|
124 |
+
The notebook will download the necessary pretrained models and run inference on the images found in `notebooks/images`.
|
125 |
+
For the tasks of conditional image synthesis and super resolution, the notebook also demonstrates pSp's ability to perform multi-modal synthesis using
|
126 |
+
style-mixing.
|
127 |
+
|
128 |
+
### Pretrained Models
|
129 |
+
Please download the pre-trained models from the following links. Each pSp model contains the entire pSp architecture, including the encoder and decoder weights.
|
130 |
+
| Path | Description
|
131 |
+
| :--- | :----------
|
132 |
+
|[StyleGAN Inversion](https://drive.google.com/file/d/1bMTNWkh5LArlaWSc_wa8VKyq2V42T2z0/view?usp=sharing) | pSp trained with the FFHQ dataset for StyleGAN inversion.
|
133 |
+
|[Face Frontalization](https://drive.google.com/file/d/1_S4THAzXb-97DbpXmanjHtXRyKxqjARv/view?usp=sharing) | pSp trained with the FFHQ dataset for face frontalization.
|
134 |
+
|[Sketch to Image](https://drive.google.com/file/d/1lB7wk7MwtdxL-LL4Z_T76DuCfk00aSXA/view?usp=sharing) | pSp trained with the CelebA-HQ dataset for image synthesis from sketches.
|
135 |
+
|[Segmentation to Image](https://drive.google.com/file/d/1VpEKc6E6yG3xhYuZ0cq8D2_1CbT0Dstz/view?usp=sharing) | pSp trained with the CelebAMask-HQ dataset for image synthesis from segmentation maps.
|
136 |
+
|[Super Resolution](https://drive.google.com/file/d/1ZpmSXBpJ9pFEov6-jjQstAlfYbkebECu/view?usp=sharing) | pSp trained with the CelebA-HQ dataset for super resolution (up to x32 down-sampling).
|
137 |
+
|[Toonify](https://drive.google.com/file/d/1YKoiVuFaqdvzDP5CZaqa3k5phL-VDmyz/view) | pSp trained with the FFHQ dataset for toonification using StyleGAN generator from [Doron Adler](https://linktr.ee/Norod78) and [Justin Pinkney](https://www.justinpinkney.com/).
|
138 |
+
|
139 |
+
If you wish to use one of the pretrained models for training or inference, you may do so using the flag `--checkpoint_path`.
|
140 |
+
|
141 |
+
In addition, we provide various auxiliary models needed for training your own pSp model from scratch as well as pretrained models needed for computing the ID metrics reported in the paper.
|
142 |
+
| Path | Description
|
143 |
+
| :--- | :----------
|
144 |
+
|[FFHQ StyleGAN](https://drive.google.com/file/d/1EM87UquaoQmk17Q8d5kYIAHqu0dkYqdT/view?usp=sharing) | StyleGAN model pretrained on FFHQ taken from [rosinality](https://github.com/rosinality/stylegan2-pytorch) with 1024x1024 output resolution.
|
145 |
+
|[IR-SE50 Model](https://drive.google.com/file/d/1KW7bjndL3QG3sxBbZxreGHigcCCpsDgn/view?usp=sharing) | Pretrained IR-SE50 model taken from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch) for use in our ID loss during pSp training.
|
146 |
+
|[MoCo ResNet-50](https://drive.google.com/file/d/18rLcNGdteX5LwT7sv_F7HWr12HpVEzVe/view?usp=sharing) | Pretrained ResNet-50 model trained using MOCOv2 for computing MoCo-based similarity loss on non-facial domains. The model is taken from the [official implementation](https://github.com/facebookresearch/moco).
|
147 |
+
|[CurricularFace Backbone](https://drive.google.com/file/d/1f4IwVa2-Bn9vWLwB-bUwm53U_MlvinAj/view?usp=sharing) | Pretrained CurricularFace model taken from [HuangYG123](https://github.com/HuangYG123/CurricularFace) for use in ID similarity metric computation.
|
148 |
+
|[MTCNN](https://drive.google.com/file/d/1tJ7ih-wbCO6zc3JhI_1ZGjmwXKKaPlja/view?usp=sharing) | Weights for MTCNN model taken from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch) for use in ID similarity metric computation. (Unpack the tar.gz to extract the 3 model weights.)
|
149 |
+
|
150 |
+
By default, we assume that all auxiliary models are downloaded and saved to the directory `pretrained_models`. However, you may use your own paths by changing the necessary values in `configs/path_configs.py`.
|
151 |
+
|
152 |
+
## Training
|
153 |
+
### Preparing your Data
|
154 |
+
- Currently, we provide support for numerous datasets and experiments (encoding, frontalization, etc.).
|
155 |
+
- Refer to `configs/paths_config.py` to define the necessary data paths and model paths for training and evaluation.
|
156 |
+
- Refer to `configs/transforms_config.py` for the transforms defined for each dataset/experiment.
|
157 |
+
- Finally, refer to `configs/data_configs.py` for the source/target data paths for the train and test sets
|
158 |
+
as well as the transforms.
|
159 |
+
- If you wish to experiment with your own dataset, you can simply make the necessary adjustments in
|
160 |
+
1. `data_configs.py` to define your data paths.
|
161 |
+
2. `transforms_configs.py` to define your own data transforms.
|
162 |
+
|
163 |
+
As an example, assume we wish to run encoding using ffhq (`dataset_type=ffhq_encode`).
|
164 |
+
We first go to `configs/paths_config.py` and define:
|
165 |
+
```
|
166 |
+
dataset_paths = {
|
167 |
+
'ffhq': '/path/to/ffhq/images256x256'
|
168 |
+
'celeba_test': '/path/to/CelebAMask-HQ/test_img',
|
169 |
+
}
|
170 |
+
```
|
171 |
+
The transforms for the experiment are defined in the class `EncodeTransforms` in `configs/transforms_config.py`.
|
172 |
+
Finally, in `configs/data_configs.py`, we define:
|
173 |
+
```
|
174 |
+
DATASETS = {
|
175 |
+
'ffhq_encode': {
|
176 |
+
'transforms': transforms_config.EncodeTransforms,
|
177 |
+
'train_source_root': dataset_paths['ffhq'],
|
178 |
+
'train_target_root': dataset_paths['ffhq'],
|
179 |
+
'test_source_root': dataset_paths['celeba_test'],
|
180 |
+
'test_target_root': dataset_paths['celeba_test'],
|
181 |
+
},
|
182 |
+
}
|
183 |
+
```
|
184 |
+
When defining our datasets, we will take the values in the above dictionary.
|
185 |
+
|
186 |
+
|
187 |
+
### Training pSp
|
188 |
+
The main training script can be found in `scripts/train.py`.
|
189 |
+
Intermediate training results are saved to `opts.exp_dir`. This includes checkpoints, train outputs, and test outputs.
|
190 |
+
Additionally, if you have tensorboard installed, you can visualize tensorboard logs in `opts.exp_dir/logs`.
|
191 |
+
|
192 |
+
#### Training the pSp Encoder
|
193 |
+
```
|
194 |
+
python scripts/train.py \
|
195 |
+
--dataset_type=ffhq_encode \
|
196 |
+
--exp_dir=/path/to/experiment \
|
197 |
+
--workers=8 \
|
198 |
+
--batch_size=8 \
|
199 |
+
--test_batch_size=8 \
|
200 |
+
--test_workers=8 \
|
201 |
+
--val_interval=2500 \
|
202 |
+
--save_interval=5000 \
|
203 |
+
--encoder_type=GradualStyleEncoder \
|
204 |
+
--start_from_latent_avg \
|
205 |
+
--lpips_lambda=0.8 \
|
206 |
+
--l2_lambda=1 \
|
207 |
+
--id_lambda=0.1
|
208 |
+
```
|
209 |
+
|
210 |
+
#### Frontalization
|
211 |
+
```
|
212 |
+
python scripts/train.py \
|
213 |
+
--dataset_type=ffhq_frontalize \
|
214 |
+
--exp_dir=/path/to/experiment \
|
215 |
+
--workers=8 \
|
216 |
+
--batch_size=8 \
|
217 |
+
--test_batch_size=8 \
|
218 |
+
--test_workers=8 \
|
219 |
+
--val_interval=2500 \
|
220 |
+
--save_interval=5000 \
|
221 |
+
--encoder_type=GradualStyleEncoder \
|
222 |
+
--start_from_latent_avg \
|
223 |
+
--lpips_lambda=0.08 \
|
224 |
+
--l2_lambda=0.001 \
|
225 |
+
--lpips_lambda_crop=0.8 \
|
226 |
+
--l2_lambda_crop=0.01 \
|
227 |
+
--id_lambda=1 \
|
228 |
+
--w_norm_lambda=0.005
|
229 |
+
```
|
230 |
+
|
231 |
+
#### Sketch to Face
|
232 |
+
```
|
233 |
+
python scripts/train.py \
|
234 |
+
--dataset_type=celebs_sketch_to_face \
|
235 |
+
--exp_dir=/path/to/experiment \
|
236 |
+
--workers=8 \
|
237 |
+
--batch_size=8 \
|
238 |
+
--test_batch_size=8 \
|
239 |
+
--test_workers=8 \
|
240 |
+
--val_interval=2500 \
|
241 |
+
--save_interval=5000 \
|
242 |
+
--encoder_type=GradualStyleEncoder \
|
243 |
+
--start_from_latent_avg \
|
244 |
+
--lpips_lambda=0.8 \
|
245 |
+
--l2_lambda=1 \
|
246 |
+
--id_lambda=0 \
|
247 |
+
--w_norm_lambda=0.005 \
|
248 |
+
--label_nc=1 \
|
249 |
+
--input_nc=1
|
250 |
+
```
|
251 |
+
|
252 |
+
#### Segmentation Map to Face
|
253 |
+
```
|
254 |
+
python scripts/train.py \
|
255 |
+
--dataset_type=celebs_seg_to_face \
|
256 |
+
--exp_dir=/path/to/experiment \
|
257 |
+
--workers=8 \
|
258 |
+
--batch_size=8 \
|
259 |
+
--test_batch_size=8 \
|
260 |
+
--test_workers=8 \
|
261 |
+
--val_interval=2500 \
|
262 |
+
--save_interval=5000 \
|
263 |
+
--encoder_type=GradualStyleEncoder \
|
264 |
+
--start_from_latent_avg \
|
265 |
+
--lpips_lambda=0.8 \
|
266 |
+
--l2_lambda=1 \
|
267 |
+
--id_lambda=0 \
|
268 |
+
--w_norm_lambda=0.005 \
|
269 |
+
--label_nc=19 \
|
270 |
+
--input_nc=19
|
271 |
+
```
|
272 |
+
Notice with conditional image synthesis no identity loss is utilized (i.e. `--id_lambda=0`)
|
273 |
+
|
274 |
+
#### Super Resolution
|
275 |
+
```
|
276 |
+
python scripts/train.py \
|
277 |
+
--dataset_type=celebs_super_resolution \
|
278 |
+
--exp_dir=/path/to/experiment \
|
279 |
+
--workers=8 \
|
280 |
+
--batch_size=8 \
|
281 |
+
--test_batch_size=8 \
|
282 |
+
--test_workers=8 \
|
283 |
+
--val_interval=2500 \
|
284 |
+
--save_interval=5000 \
|
285 |
+
--encoder_type=GradualStyleEncoder \
|
286 |
+
--start_from_latent_avg \
|
287 |
+
--lpips_lambda=0.8 \
|
288 |
+
--l2_lambda=1 \
|
289 |
+
--id_lambda=0.1 \
|
290 |
+
--w_norm_lambda=0.005 \
|
291 |
+
--resize_factors=1,2,4,8,16,32
|
292 |
+
```
|
293 |
+
|
294 |
+
### Additional Notes
|
295 |
+
- See `options/train_options.py` for all training-specific flags.
|
296 |
+
- See `options/test_options.py` for all test-specific flags.
|
297 |
+
- If you wish to resume from a specific checkpoint (e.g. a pretrained pSp model), you may do so using `--checkpoint_path`.
|
298 |
+
- By default, we assume that the StyleGAN used outputs images at resolution `1024x1024`. If you wish to use a StyleGAN at a smaller resolution, you can do so by using the flag `--output_size` (e.g., `--output_size=256`).
|
299 |
+
- If you wish to generate images from segmentation maps, please specify `--label_nc=N` and `--input_nc=N` where `N`
|
300 |
+
is the number of semantic categories.
|
301 |
+
- Similarly, for generating images from sketches, please specify `--label_nc=1` and `--input_nc=1`.
|
302 |
+
- Specifying `--label_nc=0` (the default value), will directly use the RGB colors as input.
|
303 |
+
|
304 |
+
** Identity/Similarity Losses **
|
305 |
+
In pSp, we introduce a facial identity loss using a pre-trained ArcFace network for facial recognition. When operating on the human facial domain, we
|
306 |
+
highly recommend employing this loss objective by using the flag `--id_lambda`.
|
307 |
+
In a more recent paper, [encoder4editing](https://github.com/omertov/encoder4editing), the authors generalize this identity loss to other domains by
|
308 |
+
using a MoCo-based ResNet to extract features instead of an ArcFace network.
|
309 |
+
Applying this MoCo-based similarity loss can be done by using the flag `--moco_lambda`. We recommend setting `--moco_lambda=0.5` in your experiments.
|
310 |
+
Please note, you <ins>cannot</ins> set both `id_lambda` and `moco_lambda` to be active simultaneously (e.g., to use the MoCo-based loss, you should specify,
|
311 |
+
`--moco_lambda=0.5 --id_lambda=0`).
|
312 |
+
|
313 |
+
### Weights & Biases Integration
|
314 |
+
To help track your experiments, we've integrated [Weights & Biases](https://wandb.ai/home) into our training process.
|
315 |
+
To enable Weights & Biases (`wandb`), first make an account on the platform's webpage and install `wandb` using
|
316 |
+
`pip install wandb`. Then, to train pSp using `wandb`, simply add the flag `--use_wandb`.
|
317 |
+
|
318 |
+
Note that when running for the first time, you will be asked to provide your access key which can be accessed via the
|
319 |
+
Weights & Biases platform.
|
320 |
+
|
321 |
+
Using Weights & Biases will allow you to visualize the training and testing loss curves as well as
|
322 |
+
intermediate training results.
|
323 |
+
|
324 |
+
|
325 |
+
## Testing
|
326 |
+
### Inference
|
327 |
+
Having trained your model, you can use `scripts/inference.py` to apply the model on a set of images.
|
328 |
+
For example,
|
329 |
+
```
|
330 |
+
python scripts/inference.py \
|
331 |
+
--exp_dir=/path/to/experiment \
|
332 |
+
--checkpoint_path=experiment/checkpoints/best_model.pt \
|
333 |
+
--data_path=/path/to/test_data \
|
334 |
+
--test_batch_size=4 \
|
335 |
+
--test_workers=4 \
|
336 |
+
--couple_outputs
|
337 |
+
```
|
338 |
+
Additional notes to consider:
|
339 |
+
- During inference, the options used during training are loaded from the saved checkpoint and are then updated using the
|
340 |
+
test options passed to the inference script. For example, there is no need to pass `--dataset_type` or `--label_nc` to the
|
341 |
+
inference script, as they are taken from the loaded `opts`.
|
342 |
+
- When running inference for segmentation-to-image or sketch-to-image, it is highly recommend to do so with a style-mixing,
|
343 |
+
as is done in the paper. This can simply be done by adding `--latent_mask=8,9,10,11,12,13,14,15,16,17` when calling the
|
344 |
+
script.
|
345 |
+
- When running inference for super-resolution, please provide a single down-sampling value using `--resize_factors`.
|
346 |
+
- Adding the flag `--couple_outputs` will save an additional image containing the input and output images side-by-side in the sub-directory
|
347 |
+
`inference_coupled`. Otherwise, only the output image is saved to the sub-directory `inference_results`.
|
348 |
+
- By default, the images will be saved at resolutiosn of 1024x1024, the original output size of StyleGAN. If you wish to save
|
349 |
+
outputs resized to resolutions of 256x256, you can do so by adding the flag `--resize_outputs`.
|
350 |
+
|
351 |
+
|
352 |
+
### Multi-Modal Synthesis with Style-Mixing
|
353 |
+
Given a trained model for conditional image synthesis or super-resolution, we can easily generate multiple outputs
|
354 |
+
for a given input image. This can be done using the script `scripts/style_mixing.py`.
|
355 |
+
For example, running the following command will perform style-mixing for a segmentation-to-image experiment:
|
356 |
+
```
|
357 |
+
python scripts/style_mixing.py \
|
358 |
+
--exp_dir=/path/to/experiment \
|
359 |
+
--checkpoint_path=/path/to/experiment/checkpoints/best_model.pt \
|
360 |
+
--data_path=/path/to/test_data/ \
|
361 |
+
--test_batch_size=4 \
|
362 |
+
--test_workers=4 \
|
363 |
+
--n_images=25 \
|
364 |
+
--n_outputs_to_generate=5 \
|
365 |
+
--latent_mask=8,9,10,11,12,13,14,15,16,17
|
366 |
+
```
|
367 |
+
Here, we inject `5` randomly drawn vectors and perform style-mixing on the latents `[8,9,10,11,12,13,14,15,16,17]`.
|
368 |
+
|
369 |
+
Additional notes to consider:
|
370 |
+
- To perform style-mixing on a subset of images, you may use the flag `--n_images`. The default value of `None` will perform
|
371 |
+
style mixing on every image in the given `data_path`.
|
372 |
+
- You may also include the argument `--mix_alpha=m` where `m` is a float defining the mixing coefficient between the
|
373 |
+
input latent and the randomly drawn latent.
|
374 |
+
- When performing style-mixing for super-resolution, please provide a single down-sampling value using `--resize_factors`.
|
375 |
+
- By default, the images will be saved at resolutiosn of 1024x1024, the original output size of StyleGAN. If you wish to save
|
376 |
+
outputs resized to resolutions of 256x256, you can do so by adding the flag `--resize_outputs`.
|
377 |
+
|
378 |
+
|
379 |
+
### Computing Metrics
|
380 |
+
Similarly, given a trained model and generated outputs, we can compute the loss metrics on a given dataset.
|
381 |
+
These scripts receive the inference output directory and ground truth directory.
|
382 |
+
- Calculating the identity loss:
|
383 |
+
```
|
384 |
+
python scripts/calc_id_loss_parallel.py \
|
385 |
+
--data_path=/path/to/experiment/inference_outputs \
|
386 |
+
--gt_path=/path/to/test_images \
|
387 |
+
```
|
388 |
+
- Calculating LPIPS loss:
|
389 |
+
```
|
390 |
+
python scripts/calc_losses_on_images.py \
|
391 |
+
--mode lpips
|
392 |
+
--data_path=/path/to/experiment/inference_outputs \
|
393 |
+
--gt_path=/path/to/test_images \
|
394 |
+
```
|
395 |
+
- Calculating L2 loss:
|
396 |
+
```
|
397 |
+
python scripts/calc_losses_on_images.py \
|
398 |
+
--mode l2
|
399 |
+
--data_path=/path/to/experiment/inference_outputs \
|
400 |
+
--gt_path=/path/to/test_images \
|
401 |
+
```
|
402 |
+
|
403 |
+
## Additional Applications
|
404 |
+
To better show the flexibility of our pSp framework we present additional applications below.
|
405 |
+
|
406 |
+
As with our main applications, you may download the pretrained models here:
|
407 |
+
| Path | Description
|
408 |
+
| :--- | :----------
|
409 |
+
|[Toonify](https://drive.google.com/file/d/1YKoiVuFaqdvzDP5CZaqa3k5phL-VDmyz/view) | pSp trained with the FFHQ dataset for toonification using StyleGAN generator from [Doron Adler](https://linktr.ee/Norod78) and [Justin Pinkney](https://www.justinpinkney.com/).
|
410 |
+
|
411 |
+
### Toonify
|
412 |
+
Using the toonify StyleGAN built by [Doron Adler](https://linktr.ee/Norod78) and [Justin Pinkney](https://www.justinpinkney.com/),
|
413 |
+
we take a real face image and generate a toonified version of the given image. We train the pSp encoder to directly reconstruct real
|
414 |
+
face images inside the toons latent space resulting in a projection of each image to the closest toon. We do so without requiring any labeled pairs
|
415 |
+
or distillation!
|
416 |
+
<p align="center">
|
417 |
+
<img src="docs/toonify_input.jpg" width="800px"/>
|
418 |
+
<img src="docs/toonify_output.jpg" width="800px"/>
|
419 |
+
</p>
|
420 |
+
|
421 |
+
This is trained exactly like the StyleGAN inversion task with several changes:
|
422 |
+
- Change from FFHQ StyleGAN to toonifed StyleGAN (can be set using `--stylegan_weights`)
|
423 |
+
- The toonify generator is taken from [Doron Adler](https://linktr.ee/Norod78) and [Justin Pinkney](https://www.justinpinkney.com/)
|
424 |
+
and converted to Pytorch using [rosinality's](https://github.com/rosinality/stylegan2-pytorch) conversion script.
|
425 |
+
- For convenience, the converted generator Pytorch model may be downloaded [here](https://drive.google.com/file/d/1r3XVCt_WYUKFZFxhNH-xO2dTtF6B5szu/view?usp=sharing).
|
426 |
+
- Increase `id_lambda` from `0.1` to `1`
|
427 |
+
- Increase `w_norm_lambda` from `0.005` to `0.025`
|
428 |
+
|
429 |
+
We obtain the best results after around `6000` iterations of training (can be set using `--max_steps`)
|
430 |
+
|
431 |
+
|
432 |
+
## Repository structure
|
433 |
+
| Path | Description <img width=200>
|
434 |
+
| :--- | :---
|
435 |
+
| pixel2style2pixel | Repository root folder
|
436 |
+
| ├ configs | Folder containing configs defining model/data paths and data transforms
|
437 |
+
| ├ criteria | Folder containing various loss criterias for training
|
438 |
+
| ├ datasets | Folder with various dataset objects and augmentations
|
439 |
+
| ├ environment | Folder containing Anaconda environment used in our experiments
|
440 |
+
| ├ models | Folder containting all the models and training objects
|
441 |
+
| │ ├ encoders | Folder containing our pSp encoder architecture implementation and ArcFace encoder implementation from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch)
|
442 |
+
| │ ├ mtcnn | MTCNN implementation from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch)
|
443 |
+
| │ ├ stylegan2 | StyleGAN2 model from [rosinality](https://github.com/rosinality/stylegan2-pytorch)
|
444 |
+
| │ └ psp.py | Implementation of our pSp framework
|
445 |
+
| ├ notebook | Folder with jupyter notebook containing pSp inference playground
|
446 |
+
| ├ options | Folder with training and test command-line options
|
447 |
+
| ├ scripts | Folder with running scripts for training and inference
|
448 |
+
| ├ training | Folder with main training logic and Ranger implementation from [lessw2020](https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer)
|
449 |
+
| ├ utils | Folder with various utility functions
|
450 |
+
| <img width=300> | <img>
|
451 |
+
|
452 |
+
## TODOs
|
453 |
+
- [ ] Add multi-gpu support
|
454 |
+
|
455 |
+
## Credits
|
456 |
+
**StyleGAN2 implementation:**
|
457 |
+
https://github.com/rosinality/stylegan2-pytorch
|
458 |
+
Copyright (c) 2019 Kim Seonghyeon
|
459 |
+
License (MIT) https://github.com/rosinality/stylegan2-pytorch/blob/master/LICENSE
|
460 |
+
|
461 |
+
**MTCNN, IR-SE50, and ArcFace models and implementations:**
|
462 |
+
https://github.com/TreB1eN/InsightFace_Pytorch
|
463 |
+
Copyright (c) 2018 TreB1eN
|
464 |
+
License (MIT) https://github.com/TreB1eN/InsightFace_Pytorch/blob/master/LICENSE
|
465 |
+
|
466 |
+
**CurricularFace model and implementation:**
|
467 |
+
https://github.com/HuangYG123/CurricularFace
|
468 |
+
Copyright (c) 2020 HuangYG123
|
469 |
+
License (MIT) https://github.com/HuangYG123/CurricularFace/blob/master/LICENSE
|
470 |
+
|
471 |
+
**Ranger optimizer implementation:**
|
472 |
+
https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer
|
473 |
+
License (Apache License 2.0) https://github.com/lessw2020/Ranger-Deep-Learning-Optimizer/blob/master/LICENSE
|
474 |
+
|
475 |
+
**LPIPS implementation:**
|
476 |
+
https://github.com/S-aiueo32/lpips-pytorch
|
477 |
+
Copyright (c) 2020, Sou Uchida
|
478 |
+
License (BSD 2-Clause) https://github.com/S-aiueo32/lpips-pytorch/blob/master/LICENSE
|
479 |
+
|
480 |
+
**Please Note**: The CUDA files under the [StyleGAN2 ops directory](https://github.com/eladrich/pixel2style2pixel/tree/master/models/stylegan2/op) are made available under the [Nvidia Source Code License-NC](https://nvlabs.github.io/stylegan2/license.html)
|
481 |
+
|
482 |
+
## Inspired by pSp
|
483 |
+
Below are several works inspired by pSp that we found particularly interesting:
|
484 |
+
|
485 |
+
**Reverse Toonification**
|
486 |
+
Using our pSp encoder, artist [Nathan Shipley](https://linktr.ee/nathan_shipley) transformed animated figures and paintings into real life. Check out his amazing work on his [twitter page](https://twitter.com/citizenplain?lang=en) and [website](http://www.nathanshipley.com/gan).
|
487 |
+
|
488 |
+
**Deploying pSp with StyleSpace for Editing**
|
489 |
+
Awesome work from [Justin Pinkney](https://www.justinpinkney.com/) who deployed our pSp model on Runway and provided support for editing the resulting inversions using the [StyleSpace Analysis paper](https://arxiv.org/abs/2011.12799). Check out his repository [here](https://github.com/justinpinkney/pixel2style2pixel).
|
490 |
+
|
491 |
+
**Encoder4Editing (e4e)**
|
492 |
+
Building on the work of pSp, Tov et al. design an encoder to enable high quality edits on real images. Check out their [paper](https://arxiv.org/abs/2102.02766) and [code](https://github.com/omertov/encoder4editing).
|
493 |
+
|
494 |
+
**Style-based Age Manipulation (SAM)**
|
495 |
+
Leveraging pSp and the rich semantics of StyleGAN, SAM learns non-linear latent space paths for modeling the age transformation of real face images. Check out the project page [here](https://yuval-alaluf.github.io/SAM/).
|
496 |
+
|
497 |
+
**ReStyle**
|
498 |
+
ReStyle builds on recent encoders such as pSp and e4e by introducing an iterative refinment mechanism to gradually improve the inversion of real images. Check out the project page [here](https://yuval-alaluf.github.io/restyle-encoder/).
|
499 |
+
|
500 |
+
## pSp in the Media
|
501 |
+
* bycloud: [AI Generates Cartoon Characters In Real Life Pixel2Style2Pixel](https://www.youtube.com/watch?v=g-N8lfceclI&ab_channel=bycloud)
|
502 |
+
* Synced: [Pixel2Style2Pixel: Novel Encoder Architecture Boosts Facial Image-To-Image Translation](https://syncedreview.com/2020/08/07/pixel2style2pixel-novel-encoder-architecture-boosts-facial-image-to-image-translation/)
|
503 |
+
* Cartoon Brew: [An Artist Has Used Machine Learning To Turn Animated Characters Into Creepy Photorealistic Figures](https://www.cartoonbrew.com/tech/an-artist-has-used-machine-learning-to-turn-animated-characters-into-creepy-photorealistic-figures-197975.html)
|
504 |
+
|
505 |
+
|
506 |
+
## Citation
|
507 |
+
If you use this code for your research, please cite our paper <a href="https://arxiv.org/abs/2008.00951">Encoding in Style: a StyleGAN Encoder for Image-to-Image Translation</a>:
|
508 |
+
|
509 |
+
```
|
510 |
+
@InProceedings{richardson2021encoding,
|
511 |
+
author = {Richardson, Elad and Alaluf, Yuval and Patashnik, Or and Nitzan, Yotam and Azar, Yaniv and Shapiro, Stav and Cohen-Or, Daniel},
|
512 |
+
title = {Encoding in Style: a StyleGAN Encoder for Image-to-Image Translation},
|
513 |
+
booktitle = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
|
514 |
+
month = {June},
|
515 |
+
year = {2021}
|
516 |
+
}
|
517 |
+
```
|
interface/pixel2style2pixel/cog.yaml
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
build:
|
2 |
+
gpu: true
|
3 |
+
python_version: "3.8"
|
4 |
+
system_packages:
|
5 |
+
- "libgl1-mesa-glx"
|
6 |
+
- "libglib2.0-0"
|
7 |
+
- "ninja-build"
|
8 |
+
python_packages:
|
9 |
+
- "cmake==3.21.2"
|
10 |
+
- "torch==1.8.0"
|
11 |
+
- "torchvision==0.9.0"
|
12 |
+
- "numpy==1.21.1"
|
13 |
+
- "ipython==7.21.0"
|
14 |
+
- "tensorboard==2.6.0"
|
15 |
+
- "tqdm==4.43.0"
|
16 |
+
- "torch-optimizer==0.1.0"
|
17 |
+
- "opencv-python==4.5.3.56"
|
18 |
+
- "Pillow==8.3.2"
|
19 |
+
- "matplotlib==3.2.1"
|
20 |
+
- "scipy==1.7.1"
|
21 |
+
run:
|
22 |
+
- pip install dlib
|
23 |
+
|
24 |
+
predict: "predict.py:Predictor"
|
25 |
+
|
26 |
+
|
27 |
+
|
28 |
+
|
29 |
+
|
30 |
+
|
31 |
+
|
32 |
+
|
interface/pixel2style2pixel/configs/__init__.py
ADDED
File without changes
|
interface/pixel2style2pixel/configs/data_configs.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from configs import transforms_config
|
2 |
+
from configs.paths_config import dataset_paths
|
3 |
+
|
4 |
+
|
5 |
+
DATASETS = {
|
6 |
+
'ffhq_encode': {
|
7 |
+
'transforms': transforms_config.EncodeTransforms,
|
8 |
+
'train_source_root': dataset_paths['ffhq'],
|
9 |
+
'train_target_root': dataset_paths['ffhq'],
|
10 |
+
'test_source_root': dataset_paths['celeba_test'],
|
11 |
+
'test_target_root': dataset_paths['celeba_test'],
|
12 |
+
},
|
13 |
+
'ffhq_frontalize': {
|
14 |
+
'transforms': transforms_config.FrontalizationTransforms,
|
15 |
+
'train_source_root': dataset_paths['ffhq'],
|
16 |
+
'train_target_root': dataset_paths['ffhq'],
|
17 |
+
'test_source_root': dataset_paths['celeba_test'],
|
18 |
+
'test_target_root': dataset_paths['celeba_test'],
|
19 |
+
},
|
20 |
+
'celebs_sketch_to_face': {
|
21 |
+
'transforms': transforms_config.SketchToImageTransforms,
|
22 |
+
'train_source_root': dataset_paths['celeba_train_sketch'],
|
23 |
+
'train_target_root': dataset_paths['celeba_train'],
|
24 |
+
'test_source_root': dataset_paths['celeba_test_sketch'],
|
25 |
+
'test_target_root': dataset_paths['celeba_test'],
|
26 |
+
},
|
27 |
+
'celebs_seg_to_face': {
|
28 |
+
'transforms': transforms_config.SegToImageTransforms,
|
29 |
+
'train_source_root': dataset_paths['celeba_train_segmentation'],
|
30 |
+
'train_target_root': dataset_paths['celeba_train'],
|
31 |
+
'test_source_root': dataset_paths['celeba_test_segmentation'],
|
32 |
+
'test_target_root': dataset_paths['celeba_test'],
|
33 |
+
},
|
34 |
+
'celebs_super_resolution': {
|
35 |
+
'transforms': transforms_config.SuperResTransforms,
|
36 |
+
'train_source_root': dataset_paths['celeba_train'],
|
37 |
+
'train_target_root': dataset_paths['celeba_train'],
|
38 |
+
'test_source_root': dataset_paths['celeba_test'],
|
39 |
+
'test_target_root': dataset_paths['celeba_test'],
|
40 |
+
},
|
41 |
+
}
|
interface/pixel2style2pixel/configs/paths_config.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dataset_paths = {
|
2 |
+
'celeba_train': '',
|
3 |
+
'celeba_test': '',
|
4 |
+
'celeba_train_sketch': '',
|
5 |
+
'celeba_test_sketch': '',
|
6 |
+
'celeba_train_segmentation': '',
|
7 |
+
'celeba_test_segmentation': '',
|
8 |
+
'ffhq': '',
|
9 |
+
}
|
10 |
+
|
11 |
+
model_paths = {
|
12 |
+
'stylegan_ffhq': 'pretrained_models/stylegan2-ffhq-config-f.pt',
|
13 |
+
'ir_se50': 'pretrained_models/model_ir_se50.pth',
|
14 |
+
'circular_face': 'pretrained_models/CurricularFace_Backbone.pth',
|
15 |
+
'mtcnn_pnet': 'pretrained_models/mtcnn/pnet.npy',
|
16 |
+
'mtcnn_rnet': 'pretrained_models/mtcnn/rnet.npy',
|
17 |
+
'mtcnn_onet': 'pretrained_models/mtcnn/onet.npy',
|
18 |
+
'shape_predictor': 'shape_predictor_68_face_landmarks.dat',
|
19 |
+
'moco': 'pretrained_models/moco_v2_800ep_pretrain.pth.tar'
|
20 |
+
}
|
interface/pixel2style2pixel/configs/transforms_config.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import abstractmethod
|
2 |
+
import torchvision.transforms as transforms
|
3 |
+
from datasets import augmentations
|
4 |
+
|
5 |
+
|
6 |
+
class TransformsConfig(object):
|
7 |
+
|
8 |
+
def __init__(self, opts):
|
9 |
+
self.opts = opts
|
10 |
+
|
11 |
+
@abstractmethod
|
12 |
+
def get_transforms(self):
|
13 |
+
pass
|
14 |
+
|
15 |
+
|
16 |
+
class EncodeTransforms(TransformsConfig):
|
17 |
+
|
18 |
+
def __init__(self, opts):
|
19 |
+
super(EncodeTransforms, self).__init__(opts)
|
20 |
+
|
21 |
+
def get_transforms(self):
|
22 |
+
transforms_dict = {
|
23 |
+
'transform_gt_train': transforms.Compose([
|
24 |
+
transforms.Resize((256, 256)),
|
25 |
+
transforms.RandomHorizontalFlip(0.5),
|
26 |
+
transforms.ToTensor(),
|
27 |
+
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
|
28 |
+
'transform_source': None,
|
29 |
+
'transform_test': transforms.Compose([
|
30 |
+
transforms.Resize((256, 256)),
|
31 |
+
transforms.ToTensor(),
|
32 |
+
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
|
33 |
+
'transform_inference': transforms.Compose([
|
34 |
+
transforms.Resize((256, 256)),
|
35 |
+
transforms.ToTensor(),
|
36 |
+
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
|
37 |
+
}
|
38 |
+
return transforms_dict
|
39 |
+
|
40 |
+
|
41 |
+
class FrontalizationTransforms(TransformsConfig):
|
42 |
+
|
43 |
+
def __init__(self, opts):
|
44 |
+
super(FrontalizationTransforms, self).__init__(opts)
|
45 |
+
|
46 |
+
def get_transforms(self):
|
47 |
+
transforms_dict = {
|
48 |
+
'transform_gt_train': transforms.Compose([
|
49 |
+
transforms.Resize((256, 256)),
|
50 |
+
transforms.RandomHorizontalFlip(0.5),
|
51 |
+
transforms.ToTensor(),
|
52 |
+
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
|
53 |
+
'transform_source': transforms.Compose([
|
54 |
+
transforms.Resize((256, 256)),
|
55 |
+
transforms.RandomHorizontalFlip(0.5),
|
56 |
+
transforms.ToTensor(),
|
57 |
+
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
|
58 |
+
'transform_test': transforms.Compose([
|
59 |
+
transforms.Resize((256, 256)),
|
60 |
+
transforms.ToTensor(),
|
61 |
+
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
|
62 |
+
'transform_inference': transforms.Compose([
|
63 |
+
transforms.Resize((256, 256)),
|
64 |
+
transforms.ToTensor(),
|
65 |
+
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
|
66 |
+
}
|
67 |
+
return transforms_dict
|
68 |
+
|
69 |
+
|
70 |
+
class SketchToImageTransforms(TransformsConfig):
|
71 |
+
|
72 |
+
def __init__(self, opts):
|
73 |
+
super(SketchToImageTransforms, self).__init__(opts)
|
74 |
+
|
75 |
+
def get_transforms(self):
|
76 |
+
transforms_dict = {
|
77 |
+
'transform_gt_train': transforms.Compose([
|
78 |
+
transforms.Resize((256, 256)),
|
79 |
+
transforms.ToTensor(),
|
80 |
+
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
|
81 |
+
'transform_source': transforms.Compose([
|
82 |
+
transforms.Resize((256, 256)),
|
83 |
+
transforms.ToTensor()]),
|
84 |
+
'transform_test': transforms.Compose([
|
85 |
+
transforms.Resize((256, 256)),
|
86 |
+
transforms.ToTensor(),
|
87 |
+
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
|
88 |
+
'transform_inference': transforms.Compose([
|
89 |
+
transforms.Resize((256, 256)),
|
90 |
+
transforms.ToTensor()]),
|
91 |
+
}
|
92 |
+
return transforms_dict
|
93 |
+
|
94 |
+
|
95 |
+
class SegToImageTransforms(TransformsConfig):
|
96 |
+
|
97 |
+
def __init__(self, opts):
|
98 |
+
super(SegToImageTransforms, self).__init__(opts)
|
99 |
+
|
100 |
+
def get_transforms(self):
|
101 |
+
transforms_dict = {
|
102 |
+
'transform_gt_train': transforms.Compose([
|
103 |
+
transforms.Resize((256, 256)),
|
104 |
+
transforms.ToTensor(),
|
105 |
+
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
|
106 |
+
'transform_source': transforms.Compose([
|
107 |
+
transforms.Resize((256, 256)),
|
108 |
+
augmentations.ToOneHot(self.opts.label_nc),
|
109 |
+
transforms.ToTensor()]),
|
110 |
+
'transform_test': transforms.Compose([
|
111 |
+
transforms.Resize((256, 256)),
|
112 |
+
transforms.ToTensor(),
|
113 |
+
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
|
114 |
+
'transform_inference': transforms.Compose([
|
115 |
+
transforms.Resize((256, 256)),
|
116 |
+
augmentations.ToOneHot(self.opts.label_nc),
|
117 |
+
transforms.ToTensor()])
|
118 |
+
}
|
119 |
+
return transforms_dict
|
120 |
+
|
121 |
+
|
122 |
+
class SuperResTransforms(TransformsConfig):
|
123 |
+
|
124 |
+
def __init__(self, opts):
|
125 |
+
super(SuperResTransforms, self).__init__(opts)
|
126 |
+
|
127 |
+
def get_transforms(self):
|
128 |
+
if self.opts.resize_factors is None:
|
129 |
+
self.opts.resize_factors = '1,2,4,8,16,32'
|
130 |
+
factors = [int(f) for f in self.opts.resize_factors.split(",")]
|
131 |
+
print("Performing down-sampling with factors: {}".format(factors))
|
132 |
+
transforms_dict = {
|
133 |
+
'transform_gt_train': transforms.Compose([
|
134 |
+
transforms.Resize((256, 256)),
|
135 |
+
transforms.ToTensor(),
|
136 |
+
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
|
137 |
+
'transform_source': transforms.Compose([
|
138 |
+
transforms.Resize((256, 256)),
|
139 |
+
augmentations.BilinearResize(factors=factors),
|
140 |
+
transforms.Resize((256, 256)),
|
141 |
+
transforms.ToTensor(),
|
142 |
+
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
|
143 |
+
'transform_test': transforms.Compose([
|
144 |
+
transforms.Resize((256, 256)),
|
145 |
+
transforms.ToTensor(),
|
146 |
+
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]),
|
147 |
+
'transform_inference': transforms.Compose([
|
148 |
+
transforms.Resize((256, 256)),
|
149 |
+
augmentations.BilinearResize(factors=factors),
|
150 |
+
transforms.Resize((256, 256)),
|
151 |
+
transforms.ToTensor(),
|
152 |
+
transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])])
|
153 |
+
}
|
154 |
+
return transforms_dict
|
interface/pixel2style2pixel/criteria/__init__.py
ADDED
File without changes
|
interface/pixel2style2pixel/criteria/id_loss.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
from configs.paths_config import model_paths
|
4 |
+
from models.encoders.model_irse import Backbone
|
5 |
+
|
6 |
+
|
7 |
+
class IDLoss(nn.Module):
|
8 |
+
def __init__(self):
|
9 |
+
super(IDLoss, self).__init__()
|
10 |
+
print('Loading ResNet ArcFace')
|
11 |
+
self.facenet = Backbone(input_size=112, num_layers=50, drop_ratio=0.6, mode='ir_se')
|
12 |
+
self.facenet.load_state_dict(torch.load(model_paths['ir_se50']))
|
13 |
+
self.face_pool = torch.nn.AdaptiveAvgPool2d((112, 112))
|
14 |
+
self.facenet.eval()
|
15 |
+
|
16 |
+
def extract_feats(self, x):
|
17 |
+
x = x[:, :, 35:223, 32:220] # Crop interesting region
|
18 |
+
x = self.face_pool(x)
|
19 |
+
x_feats = self.facenet(x)
|
20 |
+
return x_feats
|
21 |
+
|
22 |
+
def forward(self, y_hat, y, x):
|
23 |
+
n_samples = x.shape[0]
|
24 |
+
x_feats = self.extract_feats(x)
|
25 |
+
y_feats = self.extract_feats(y) # Otherwise use the feature from there
|
26 |
+
y_hat_feats = self.extract_feats(y_hat)
|
27 |
+
y_feats = y_feats.detach()
|
28 |
+
loss = 0
|
29 |
+
sim_improvement = 0
|
30 |
+
id_logs = []
|
31 |
+
count = 0
|
32 |
+
for i in range(n_samples):
|
33 |
+
diff_target = y_hat_feats[i].dot(y_feats[i])
|
34 |
+
diff_input = y_hat_feats[i].dot(x_feats[i])
|
35 |
+
diff_views = y_feats[i].dot(x_feats[i])
|
36 |
+
id_logs.append({'diff_target': float(diff_target),
|
37 |
+
'diff_input': float(diff_input),
|
38 |
+
'diff_views': float(diff_views)})
|
39 |
+
loss += 1 - diff_target
|
40 |
+
id_diff = float(diff_target) - float(diff_views)
|
41 |
+
sim_improvement += id_diff
|
42 |
+
count += 1
|
43 |
+
|
44 |
+
return loss / count, sim_improvement / count, id_logs
|
interface/pixel2style2pixel/criteria/lpips/__init__.py
ADDED
File without changes
|
interface/pixel2style2pixel/criteria/lpips/lpips.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
|
4 |
+
from criteria.lpips.networks import get_network, LinLayers
|
5 |
+
from criteria.lpips.utils import get_state_dict
|
6 |
+
|
7 |
+
|
8 |
+
class LPIPS(nn.Module):
|
9 |
+
r"""Creates a criterion that measures
|
10 |
+
Learned Perceptual Image Patch Similarity (LPIPS).
|
11 |
+
Arguments:
|
12 |
+
net_type (str): the network type to compare the features:
|
13 |
+
'alex' | 'squeeze' | 'vgg'. Default: 'alex'.
|
14 |
+
version (str): the version of LPIPS. Default: 0.1.
|
15 |
+
"""
|
16 |
+
def __init__(self, net_type: str = 'alex', version: str = '0.1'):
|
17 |
+
|
18 |
+
assert version in ['0.1'], 'v0.1 is only supported now'
|
19 |
+
|
20 |
+
super(LPIPS, self).__init__()
|
21 |
+
|
22 |
+
# pretrained network
|
23 |
+
self.net = get_network(net_type).to("cuda")
|
24 |
+
|
25 |
+
# linear layers
|
26 |
+
self.lin = LinLayers(self.net.n_channels_list).to("cuda")
|
27 |
+
self.lin.load_state_dict(get_state_dict(net_type, version))
|
28 |
+
|
29 |
+
def forward(self, x: torch.Tensor, y: torch.Tensor):
|
30 |
+
feat_x, feat_y = self.net(x), self.net(y)
|
31 |
+
|
32 |
+
diff = [(fx - fy) ** 2 for fx, fy in zip(feat_x, feat_y)]
|
33 |
+
res = [l(d).mean((2, 3), True) for d, l in zip(diff, self.lin)]
|
34 |
+
|
35 |
+
return torch.sum(torch.cat(res, 0)) / x.shape[0]
|
interface/pixel2style2pixel/criteria/lpips/networks.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Sequence
|
2 |
+
|
3 |
+
from itertools import chain
|
4 |
+
|
5 |
+
import torch
|
6 |
+
import torch.nn as nn
|
7 |
+
from torchvision import models
|
8 |
+
|
9 |
+
from criteria.lpips.utils import normalize_activation
|
10 |
+
|
11 |
+
|
12 |
+
def get_network(net_type: str):
|
13 |
+
if net_type == 'alex':
|
14 |
+
return AlexNet()
|
15 |
+
elif net_type == 'squeeze':
|
16 |
+
return SqueezeNet()
|
17 |
+
elif net_type == 'vgg':
|
18 |
+
return VGG16()
|
19 |
+
else:
|
20 |
+
raise NotImplementedError('choose net_type from [alex, squeeze, vgg].')
|
21 |
+
|
22 |
+
|
23 |
+
class LinLayers(nn.ModuleList):
|
24 |
+
def __init__(self, n_channels_list: Sequence[int]):
|
25 |
+
super(LinLayers, self).__init__([
|
26 |
+
nn.Sequential(
|
27 |
+
nn.Identity(),
|
28 |
+
nn.Conv2d(nc, 1, 1, 1, 0, bias=False)
|
29 |
+
) for nc in n_channels_list
|
30 |
+
])
|
31 |
+
|
32 |
+
for param in self.parameters():
|
33 |
+
param.requires_grad = False
|
34 |
+
|
35 |
+
|
36 |
+
class BaseNet(nn.Module):
|
37 |
+
def __init__(self):
|
38 |
+
super(BaseNet, self).__init__()
|
39 |
+
|
40 |
+
# register buffer
|
41 |
+
self.register_buffer(
|
42 |
+
'mean', torch.Tensor([-.030, -.088, -.188])[None, :, None, None])
|
43 |
+
self.register_buffer(
|
44 |
+
'std', torch.Tensor([.458, .448, .450])[None, :, None, None])
|
45 |
+
|
46 |
+
def set_requires_grad(self, state: bool):
|
47 |
+
for param in chain(self.parameters(), self.buffers()):
|
48 |
+
param.requires_grad = state
|
49 |
+
|
50 |
+
def z_score(self, x: torch.Tensor):
|
51 |
+
return (x - self.mean) / self.std
|
52 |
+
|
53 |
+
def forward(self, x: torch.Tensor):
|
54 |
+
x = self.z_score(x)
|
55 |
+
|
56 |
+
output = []
|
57 |
+
for i, (_, layer) in enumerate(self.layers._modules.items(), 1):
|
58 |
+
x = layer(x)
|
59 |
+
if i in self.target_layers:
|
60 |
+
output.append(normalize_activation(x))
|
61 |
+
if len(output) == len(self.target_layers):
|
62 |
+
break
|
63 |
+
return output
|
64 |
+
|
65 |
+
|
66 |
+
class SqueezeNet(BaseNet):
|
67 |
+
def __init__(self):
|
68 |
+
super(SqueezeNet, self).__init__()
|
69 |
+
|
70 |
+
self.layers = models.squeezenet1_1(True).features
|
71 |
+
self.target_layers = [2, 5, 8, 10, 11, 12, 13]
|
72 |
+
self.n_channels_list = [64, 128, 256, 384, 384, 512, 512]
|
73 |
+
|
74 |
+
self.set_requires_grad(False)
|
75 |
+
|
76 |
+
|
77 |
+
class AlexNet(BaseNet):
|
78 |
+
def __init__(self):
|
79 |
+
super(AlexNet, self).__init__()
|
80 |
+
|
81 |
+
self.layers = models.alexnet(True).features
|
82 |
+
self.target_layers = [2, 5, 8, 10, 12]
|
83 |
+
self.n_channels_list = [64, 192, 384, 256, 256]
|
84 |
+
|
85 |
+
self.set_requires_grad(False)
|
86 |
+
|
87 |
+
|
88 |
+
class VGG16(BaseNet):
|
89 |
+
def __init__(self):
|
90 |
+
super(VGG16, self).__init__()
|
91 |
+
|
92 |
+
self.layers = models.vgg16(True).features
|
93 |
+
self.target_layers = [4, 9, 16, 23, 30]
|
94 |
+
self.n_channels_list = [64, 128, 256, 512, 512]
|
95 |
+
|
96 |
+
self.set_requires_grad(False)
|
interface/pixel2style2pixel/criteria/lpips/utils.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from collections import OrderedDict
|
2 |
+
|
3 |
+
import torch
|
4 |
+
|
5 |
+
|
6 |
+
def normalize_activation(x, eps=1e-10):
|
7 |
+
norm_factor = torch.sqrt(torch.sum(x ** 2, dim=1, keepdim=True))
|
8 |
+
return x / (norm_factor + eps)
|
9 |
+
|
10 |
+
|
11 |
+
def get_state_dict(net_type: str = 'alex', version: str = '0.1'):
|
12 |
+
# build url
|
13 |
+
url = 'https://raw.githubusercontent.com/richzhang/PerceptualSimilarity/' \
|
14 |
+
+ f'master/lpips/weights/v{version}/{net_type}.pth'
|
15 |
+
|
16 |
+
# download
|
17 |
+
old_state_dict = torch.hub.load_state_dict_from_url(
|
18 |
+
url, progress=True,
|
19 |
+
map_location=None if torch.cuda.is_available() else torch.device('cpu')
|
20 |
+
)
|
21 |
+
|
22 |
+
# rename keys
|
23 |
+
new_state_dict = OrderedDict()
|
24 |
+
for key, val in old_state_dict.items():
|
25 |
+
new_key = key
|
26 |
+
new_key = new_key.replace('lin', '')
|
27 |
+
new_key = new_key.replace('model.', '')
|
28 |
+
new_state_dict[new_key] = val
|
29 |
+
|
30 |
+
return new_state_dict
|
interface/pixel2style2pixel/criteria/moco_loss.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
from configs.paths_config import model_paths
|
5 |
+
|
6 |
+
|
7 |
+
class MocoLoss(nn.Module):
|
8 |
+
|
9 |
+
def __init__(self):
|
10 |
+
super(MocoLoss, self).__init__()
|
11 |
+
print("Loading MOCO model from path: {}".format(model_paths["moco"]))
|
12 |
+
self.model = self.__load_model()
|
13 |
+
self.model.cuda()
|
14 |
+
self.model.eval()
|
15 |
+
|
16 |
+
@staticmethod
|
17 |
+
def __load_model():
|
18 |
+
import torchvision.models as models
|
19 |
+
model = models.__dict__["resnet50"]()
|
20 |
+
# freeze all layers but the last fc
|
21 |
+
for name, param in model.named_parameters():
|
22 |
+
if name not in ['fc.weight', 'fc.bias']:
|
23 |
+
param.requires_grad = False
|
24 |
+
checkpoint = torch.load(model_paths['moco'], map_location="cpu")
|
25 |
+
state_dict = checkpoint['state_dict']
|
26 |
+
# rename moco pre-trained keys
|
27 |
+
for k in list(state_dict.keys()):
|
28 |
+
# retain only encoder_q up to before the embedding layer
|
29 |
+
if k.startswith('module.encoder_q') and not k.startswith('module.encoder_q.fc'):
|
30 |
+
# remove prefix
|
31 |
+
state_dict[k[len("module.encoder_q."):]] = state_dict[k]
|
32 |
+
# delete renamed or unused k
|
33 |
+
del state_dict[k]
|
34 |
+
msg = model.load_state_dict(state_dict, strict=False)
|
35 |
+
assert set(msg.missing_keys) == {"fc.weight", "fc.bias"}
|
36 |
+
# remove output layer
|
37 |
+
model = nn.Sequential(*list(model.children())[:-1]).cuda()
|
38 |
+
return model
|
39 |
+
|
40 |
+
def extract_feats(self, x):
|
41 |
+
x = F.interpolate(x, size=224)
|
42 |
+
x_feats = self.model(x)
|
43 |
+
x_feats = nn.functional.normalize(x_feats, dim=1)
|
44 |
+
x_feats = x_feats.squeeze()
|
45 |
+
return x_feats
|
46 |
+
|
47 |
+
def forward(self, y_hat, y, x):
|
48 |
+
n_samples = x.shape[0]
|
49 |
+
x_feats = self.extract_feats(x)
|
50 |
+
y_feats = self.extract_feats(y)
|
51 |
+
y_hat_feats = self.extract_feats(y_hat)
|
52 |
+
y_feats = y_feats.detach()
|
53 |
+
loss = 0
|
54 |
+
sim_improvement = 0
|
55 |
+
sim_logs = []
|
56 |
+
count = 0
|
57 |
+
for i in range(n_samples):
|
58 |
+
diff_target = y_hat_feats[i].dot(y_feats[i])
|
59 |
+
diff_input = y_hat_feats[i].dot(x_feats[i])
|
60 |
+
diff_views = y_feats[i].dot(x_feats[i])
|
61 |
+
sim_logs.append({'diff_target': float(diff_target),
|
62 |
+
'diff_input': float(diff_input),
|
63 |
+
'diff_views': float(diff_views)})
|
64 |
+
loss += 1 - diff_target
|
65 |
+
sim_diff = float(diff_target) - float(diff_views)
|
66 |
+
sim_improvement += sim_diff
|
67 |
+
count += 1
|
68 |
+
|
69 |
+
return loss / count, sim_improvement / count, sim_logs
|
interface/pixel2style2pixel/criteria/w_norm.py
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
from torch import nn
|
3 |
+
|
4 |
+
|
5 |
+
class WNormLoss(nn.Module):
|
6 |
+
|
7 |
+
def __init__(self, start_from_latent_avg=True):
|
8 |
+
super(WNormLoss, self).__init__()
|
9 |
+
self.start_from_latent_avg = start_from_latent_avg
|
10 |
+
|
11 |
+
def forward(self, latent, latent_avg=None):
|
12 |
+
if self.start_from_latent_avg:
|
13 |
+
latent = latent - latent_avg
|
14 |
+
return torch.sum(latent.norm(2, dim=(1, 2))) / latent.shape[0]
|
interface/pixel2style2pixel/datasets/__init__.py
ADDED
File without changes
|
interface/pixel2style2pixel/datasets/augmentations.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
from torch import nn
|
4 |
+
from torch.nn import functional as F
|
5 |
+
from torchvision import transforms
|
6 |
+
|
7 |
+
|
8 |
+
class ToOneHot(object):
|
9 |
+
""" Convert the input PIL image to a one-hot torch tensor """
|
10 |
+
def __init__(self, n_classes=None):
|
11 |
+
self.n_classes = n_classes
|
12 |
+
|
13 |
+
def onehot_initialization(self, a):
|
14 |
+
if self.n_classes is None:
|
15 |
+
self.n_classes = len(np.unique(a))
|
16 |
+
out = np.zeros(a.shape + (self.n_classes, ), dtype=int)
|
17 |
+
out[self.__all_idx(a, axis=2)] = 1
|
18 |
+
return out
|
19 |
+
|
20 |
+
def __all_idx(self, idx, axis):
|
21 |
+
grid = np.ogrid[tuple(map(slice, idx.shape))]
|
22 |
+
grid.insert(axis, idx)
|
23 |
+
return tuple(grid)
|
24 |
+
|
25 |
+
def __call__(self, img):
|
26 |
+
img = np.array(img)
|
27 |
+
one_hot = self.onehot_initialization(img)
|
28 |
+
return one_hot
|
29 |
+
|
30 |
+
|
31 |
+
class BilinearResize(object):
|
32 |
+
def __init__(self, factors=[1, 2, 4, 8, 16, 32]):
|
33 |
+
self.factors = factors
|
34 |
+
|
35 |
+
def __call__(self, image):
|
36 |
+
factor = np.random.choice(self.factors, size=1)[0]
|
37 |
+
D = BicubicDownSample(factor=factor, cuda=False)
|
38 |
+
img_tensor = transforms.ToTensor()(image).unsqueeze(0)
|
39 |
+
img_tensor_lr = D(img_tensor)[0].clamp(0, 1)
|
40 |
+
img_low_res = transforms.ToPILImage()(img_tensor_lr)
|
41 |
+
return img_low_res
|
42 |
+
|
43 |
+
|
44 |
+
class BicubicDownSample(nn.Module):
|
45 |
+
def bicubic_kernel(self, x, a=-0.50):
|
46 |
+
"""
|
47 |
+
This equation is exactly copied from the website below:
|
48 |
+
https://clouard.users.greyc.fr/Pantheon/experiments/rescaling/index-en.html#bicubic
|
49 |
+
"""
|
50 |
+
abs_x = torch.abs(x)
|
51 |
+
if abs_x <= 1.:
|
52 |
+
return (a + 2.) * torch.pow(abs_x, 3.) - (a + 3.) * torch.pow(abs_x, 2.) + 1
|
53 |
+
elif 1. < abs_x < 2.:
|
54 |
+
return a * torch.pow(abs_x, 3) - 5. * a * torch.pow(abs_x, 2.) + 8. * a * abs_x - 4. * a
|
55 |
+
else:
|
56 |
+
return 0.0
|
57 |
+
|
58 |
+
def __init__(self, factor=4, cuda=True, padding='reflect'):
|
59 |
+
super().__init__()
|
60 |
+
self.factor = factor
|
61 |
+
size = factor * 4
|
62 |
+
k = torch.tensor([self.bicubic_kernel((i - torch.floor(torch.tensor(size / 2)) + 0.5) / factor)
|
63 |
+
for i in range(size)], dtype=torch.float32)
|
64 |
+
k = k / torch.sum(k)
|
65 |
+
k1 = torch.reshape(k, shape=(1, 1, size, 1))
|
66 |
+
self.k1 = torch.cat([k1, k1, k1], dim=0)
|
67 |
+
k2 = torch.reshape(k, shape=(1, 1, 1, size))
|
68 |
+
self.k2 = torch.cat([k2, k2, k2], dim=0)
|
69 |
+
self.cuda = '.cuda' if cuda else ''
|
70 |
+
self.padding = padding
|
71 |
+
for param in self.parameters():
|
72 |
+
param.requires_grad = False
|
73 |
+
|
74 |
+
def forward(self, x, nhwc=False, clip_round=False, byte_output=False):
|
75 |
+
filter_height = self.factor * 4
|
76 |
+
filter_width = self.factor * 4
|
77 |
+
stride = self.factor
|
78 |
+
|
79 |
+
pad_along_height = max(filter_height - stride, 0)
|
80 |
+
pad_along_width = max(filter_width - stride, 0)
|
81 |
+
filters1 = self.k1.type('torch{}.FloatTensor'.format(self.cuda))
|
82 |
+
filters2 = self.k2.type('torch{}.FloatTensor'.format(self.cuda))
|
83 |
+
|
84 |
+
# compute actual padding values for each side
|
85 |
+
pad_top = pad_along_height // 2
|
86 |
+
pad_bottom = pad_along_height - pad_top
|
87 |
+
pad_left = pad_along_width // 2
|
88 |
+
pad_right = pad_along_width - pad_left
|
89 |
+
|
90 |
+
# apply mirror padding
|
91 |
+
if nhwc:
|
92 |
+
x = torch.transpose(torch.transpose(x, 2, 3), 1, 2) # NHWC to NCHW
|
93 |
+
|
94 |
+
# downscaling performed by 1-d convolution
|
95 |
+
x = F.pad(x, (0, 0, pad_top, pad_bottom), self.padding)
|
96 |
+
x = F.conv2d(input=x, weight=filters1, stride=(stride, 1), groups=3)
|
97 |
+
if clip_round:
|
98 |
+
x = torch.clamp(torch.round(x), 0.0, 255.)
|
99 |
+
|
100 |
+
x = F.pad(x, (pad_left, pad_right, 0, 0), self.padding)
|
101 |
+
x = F.conv2d(input=x, weight=filters2, stride=(1, stride), groups=3)
|
102 |
+
if clip_round:
|
103 |
+
x = torch.clamp(torch.round(x), 0.0, 255.)
|
104 |
+
|
105 |
+
if nhwc:
|
106 |
+
x = torch.transpose(torch.transpose(x, 1, 3), 1, 2)
|
107 |
+
if byte_output:
|
108 |
+
return x.type('torch.ByteTensor'.format(self.cuda))
|
109 |
+
else:
|
110 |
+
return x
|
interface/pixel2style2pixel/datasets/gt_res_dataset.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/python
|
2 |
+
# encoding: utf-8
|
3 |
+
import os
|
4 |
+
from torch.utils.data import Dataset
|
5 |
+
from PIL import Image
|
6 |
+
|
7 |
+
|
8 |
+
class GTResDataset(Dataset):
|
9 |
+
|
10 |
+
def __init__(self, root_path, gt_dir=None, transform=None, transform_train=None):
|
11 |
+
self.pairs = []
|
12 |
+
for f in os.listdir(root_path):
|
13 |
+
image_path = os.path.join(root_path, f)
|
14 |
+
gt_path = os.path.join(gt_dir, f)
|
15 |
+
if f.endswith(".jpg") or f.endswith(".png"):
|
16 |
+
self.pairs.append([image_path, gt_path.replace('.png', '.jpg'), None])
|
17 |
+
self.transform = transform
|
18 |
+
self.transform_train = transform_train
|
19 |
+
|
20 |
+
def __len__(self):
|
21 |
+
return len(self.pairs)
|
22 |
+
|
23 |
+
def __getitem__(self, index):
|
24 |
+
from_path, to_path, _ = self.pairs[index]
|
25 |
+
from_im = Image.open(from_path).convert('RGB')
|
26 |
+
to_im = Image.open(to_path).convert('RGB')
|
27 |
+
|
28 |
+
if self.transform:
|
29 |
+
to_im = self.transform(to_im)
|
30 |
+
from_im = self.transform(from_im)
|
31 |
+
|
32 |
+
return from_im, to_im
|
interface/pixel2style2pixel/datasets/images_dataset.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from PIL import Image
|
3 |
+
from utils import data_utils
|
4 |
+
|
5 |
+
|
6 |
+
class ImagesDataset(Dataset):
|
7 |
+
|
8 |
+
def __init__(self, source_root, target_root, opts, target_transform=None, source_transform=None):
|
9 |
+
self.source_paths = sorted(data_utils.make_dataset(source_root))
|
10 |
+
self.target_paths = sorted(data_utils.make_dataset(target_root))
|
11 |
+
self.source_transform = source_transform
|
12 |
+
self.target_transform = target_transform
|
13 |
+
self.opts = opts
|
14 |
+
|
15 |
+
def __len__(self):
|
16 |
+
return len(self.source_paths)
|
17 |
+
|
18 |
+
def __getitem__(self, index):
|
19 |
+
from_path = self.source_paths[index]
|
20 |
+
from_im = Image.open(from_path)
|
21 |
+
from_im = from_im.convert('RGB') if self.opts.label_nc == 0 else from_im.convert('L')
|
22 |
+
|
23 |
+
to_path = self.target_paths[index]
|
24 |
+
to_im = Image.open(to_path).convert('RGB')
|
25 |
+
if self.target_transform:
|
26 |
+
to_im = self.target_transform(to_im)
|
27 |
+
|
28 |
+
if self.source_transform:
|
29 |
+
from_im = self.source_transform(from_im)
|
30 |
+
else:
|
31 |
+
from_im = to_im
|
32 |
+
|
33 |
+
return from_im, to_im
|
interface/pixel2style2pixel/datasets/inference_dataset.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.utils.data import Dataset
|
2 |
+
from PIL import Image
|
3 |
+
from utils import data_utils
|
4 |
+
|
5 |
+
|
6 |
+
class InferenceDataset(Dataset):
|
7 |
+
|
8 |
+
def __init__(self, root, opts, transform=None):
|
9 |
+
self.paths = sorted(data_utils.make_dataset(root))
|
10 |
+
self.transform = transform
|
11 |
+
self.opts = opts
|
12 |
+
|
13 |
+
def __len__(self):
|
14 |
+
return len(self.paths)
|
15 |
+
|
16 |
+
def __getitem__(self, index):
|
17 |
+
from_path = self.paths[index]
|
18 |
+
from_im = Image.open(from_path)
|
19 |
+
from_im = from_im.convert('RGB') if self.opts.label_nc == 0 else from_im.convert('L')
|
20 |
+
if self.transform:
|
21 |
+
from_im = self.transform(from_im)
|
22 |
+
return from_im
|
interface/pixel2style2pixel/docs/encoding_inputs.jpg
ADDED
interface/pixel2style2pixel/docs/encoding_outputs.jpg
ADDED
interface/pixel2style2pixel/docs/frontalization_inputs.jpg
ADDED
interface/pixel2style2pixel/docs/frontalization_outputs.jpg
ADDED
interface/pixel2style2pixel/docs/seg2image.png
ADDED
Git LFS Details
|
interface/pixel2style2pixel/docs/sketch2image.png
ADDED
Git LFS Details
|
interface/pixel2style2pixel/docs/super_res_32.jpg
ADDED
interface/pixel2style2pixel/docs/super_res_style_mixing.jpg
ADDED
interface/pixel2style2pixel/docs/teaser.png
ADDED
Git LFS Details
|
interface/pixel2style2pixel/docs/toonify_input.jpg
ADDED
interface/pixel2style2pixel/docs/toonify_output.jpg
ADDED
interface/pixel2style2pixel/download-weights.sh
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/sh
|
2 |
+
mkdir pretrained_models
|
3 |
+
cd pretrained_models
|
4 |
+
|
5 |
+
wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1lB7wk7MwtdxL-LL4Z_T76DuCfk00aSXA' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1lB7wk7MwtdxL-LL4Z_T76DuCfk00aSXA" -O psp_celebs_sketch_to_face.pt && rm -rf /tmp/cookies.txt
|
6 |
+
wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1_S4THAzXb-97DbpXmanjHtXRyKxqjARv' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1_S4THAzXb-97DbpXmanjHtXRyKxqjARv" -O psp_ffhq_frontalization.pt && rm -rf /tmp/cookies.txt
|
7 |
+
wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1ZpmSXBpJ9pFEov6-jjQstAlfYbkebECu' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1ZpmSXBpJ9pFEov6-jjQstAlfYbkebECu" -O psp_celebs_super_resolution.pt && rm -rf /tmp/cookies.txt
|
8 |
+
wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1YKoiVuFaqdvzDP5CZaqa3k5phL-VDmyz' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1YKoiVuFaqdvzDP5CZaqa3k5phL-VDmyz" -O psp_ffhq_toonify.pt && rm -rf /tmp/cookies.txt
|
9 |
+
|
10 |
+
cd ..
|
11 |
+
wget http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
|
12 |
+
bunzip2 shape_predictor_68_face_landmarks.dat.bz2
|
interface/pixel2style2pixel/environment/psp_env.yaml
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: psp_env
|
2 |
+
channels:
|
3 |
+
- conda-forge
|
4 |
+
- defaults
|
5 |
+
dependencies:
|
6 |
+
- _libgcc_mutex=0.1=main
|
7 |
+
- ca-certificates=2020.4.5.1=hecc5488_0
|
8 |
+
- certifi=2020.4.5.1=py36h9f0ad1d_0
|
9 |
+
- libedit=3.1.20181209=hc058e9b_0
|
10 |
+
- libffi=3.2.1=hd88cf55_4
|
11 |
+
- libgcc-ng=9.1.0=hdf63c60_0
|
12 |
+
- libstdcxx-ng=9.1.0=hdf63c60_0
|
13 |
+
- ncurses=6.2=he6710b0_1
|
14 |
+
- ninja=1.10.0=hc9558a2_0
|
15 |
+
- openssl=1.1.1g=h516909a_0
|
16 |
+
- pip=20.0.2=py36_3
|
17 |
+
- python=3.6.7=h0371630_0
|
18 |
+
- python_abi=3.6=1_cp36m
|
19 |
+
- readline=7.0=h7b6447c_5
|
20 |
+
- setuptools=46.4.0=py36_0
|
21 |
+
- sqlite=3.31.1=h62c20be_1
|
22 |
+
- tk=8.6.8=hbc83047_0
|
23 |
+
- wheel=0.34.2=py36_0
|
24 |
+
- xz=5.2.5=h7b6447c_0
|
25 |
+
- zlib=1.2.11=h7b6447c_3
|
26 |
+
- pip:
|
27 |
+
- scipy==1.4.1
|
28 |
+
- matplotlib==3.2.1
|
29 |
+
- tqdm==4.46.0
|
30 |
+
- numpy==1.18.4
|
31 |
+
- opencv-python==4.2.0.34
|
32 |
+
- pillow==7.1.2
|
33 |
+
- tensorboard==2.2.1
|
34 |
+
- torch==1.6.0
|
35 |
+
- torchvision==0.4.2
|
36 |
+
prefix: ~/anaconda3/envs/psp_env
|
37 |
+
|
interface/pixel2style2pixel/licenses/LICENSE_HuangYG123
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2020 HuangYG123
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
interface/pixel2style2pixel/licenses/LICENSE_S-aiueo32
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
BSD 2-Clause License
|
2 |
+
|
3 |
+
Copyright (c) 2020, Sou Uchida
|
4 |
+
All rights reserved.
|
5 |
+
|
6 |
+
Redistribution and use in source and binary forms, with or without
|
7 |
+
modification, are permitted provided that the following conditions are met:
|
8 |
+
|
9 |
+
1. Redistributions of source code must retain the above copyright notice, this
|
10 |
+
list of conditions and the following disclaimer.
|
11 |
+
|
12 |
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
13 |
+
this list of conditions and the following disclaimer in the documentation
|
14 |
+
and/or other materials provided with the distribution.
|
15 |
+
|
16 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
17 |
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
18 |
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
19 |
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
20 |
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
21 |
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
22 |
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
23 |
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
24 |
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
25 |
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
interface/pixel2style2pixel/licenses/LICENSE_TreB1eN
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2018 TreB1eN
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
interface/pixel2style2pixel/licenses/LICENSE_lessw2020
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
interface/pixel2style2pixel/licenses/LICENSE_rosinality
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2019 Kim Seonghyeon
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
interface/pixel2style2pixel/models/__init__.py
ADDED
File without changes
|
interface/pixel2style2pixel/models/encoders/__init__.py
ADDED
File without changes
|
interface/pixel2style2pixel/models/encoders/helpers.py
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from collections import namedtuple
|
2 |
+
import torch
|
3 |
+
from torch.nn import Conv2d, BatchNorm2d, PReLU, ReLU, Sigmoid, MaxPool2d, AdaptiveAvgPool2d, Sequential, Module
|
4 |
+
|
5 |
+
"""
|
6 |
+
ArcFace implementation from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch)
|
7 |
+
"""
|
8 |
+
|
9 |
+
|
10 |
+
class Flatten(Module):
|
11 |
+
def forward(self, input):
|
12 |
+
return input.view(input.size(0), -1)
|
13 |
+
|
14 |
+
|
15 |
+
def l2_norm(input, axis=1):
|
16 |
+
norm = torch.norm(input, 2, axis, True)
|
17 |
+
output = torch.div(input, norm)
|
18 |
+
return output
|
19 |
+
|
20 |
+
|
21 |
+
class Bottleneck(namedtuple('Block', ['in_channel', 'depth', 'stride'])):
|
22 |
+
""" A named tuple describing a ResNet block. """
|
23 |
+
|
24 |
+
|
25 |
+
def get_block(in_channel, depth, num_units, stride=2):
|
26 |
+
return [Bottleneck(in_channel, depth, stride)] + [Bottleneck(depth, depth, 1) for i in range(num_units - 1)]
|
27 |
+
|
28 |
+
|
29 |
+
def get_blocks(num_layers):
|
30 |
+
if num_layers == 50:
|
31 |
+
blocks = [
|
32 |
+
get_block(in_channel=64, depth=64, num_units=3),
|
33 |
+
get_block(in_channel=64, depth=128, num_units=4),
|
34 |
+
get_block(in_channel=128, depth=256, num_units=14),
|
35 |
+
get_block(in_channel=256, depth=512, num_units=3)
|
36 |
+
]
|
37 |
+
elif num_layers == 100:
|
38 |
+
blocks = [
|
39 |
+
get_block(in_channel=64, depth=64, num_units=3),
|
40 |
+
get_block(in_channel=64, depth=128, num_units=13),
|
41 |
+
get_block(in_channel=128, depth=256, num_units=30),
|
42 |
+
get_block(in_channel=256, depth=512, num_units=3)
|
43 |
+
]
|
44 |
+
elif num_layers == 152:
|
45 |
+
blocks = [
|
46 |
+
get_block(in_channel=64, depth=64, num_units=3),
|
47 |
+
get_block(in_channel=64, depth=128, num_units=8),
|
48 |
+
get_block(in_channel=128, depth=256, num_units=36),
|
49 |
+
get_block(in_channel=256, depth=512, num_units=3)
|
50 |
+
]
|
51 |
+
else:
|
52 |
+
raise ValueError("Invalid number of layers: {}. Must be one of [50, 100, 152]".format(num_layers))
|
53 |
+
return blocks
|
54 |
+
|
55 |
+
|
56 |
+
class SEModule(Module):
|
57 |
+
def __init__(self, channels, reduction):
|
58 |
+
super(SEModule, self).__init__()
|
59 |
+
self.avg_pool = AdaptiveAvgPool2d(1)
|
60 |
+
self.fc1 = Conv2d(channels, channels // reduction, kernel_size=1, padding=0, bias=False)
|
61 |
+
self.relu = ReLU(inplace=True)
|
62 |
+
self.fc2 = Conv2d(channels // reduction, channels, kernel_size=1, padding=0, bias=False)
|
63 |
+
self.sigmoid = Sigmoid()
|
64 |
+
|
65 |
+
def forward(self, x):
|
66 |
+
module_input = x
|
67 |
+
x = self.avg_pool(x)
|
68 |
+
x = self.fc1(x)
|
69 |
+
x = self.relu(x)
|
70 |
+
x = self.fc2(x)
|
71 |
+
x = self.sigmoid(x)
|
72 |
+
return module_input * x
|
73 |
+
|
74 |
+
|
75 |
+
class bottleneck_IR(Module):
|
76 |
+
def __init__(self, in_channel, depth, stride):
|
77 |
+
super(bottleneck_IR, self).__init__()
|
78 |
+
if in_channel == depth:
|
79 |
+
self.shortcut_layer = MaxPool2d(1, stride)
|
80 |
+
else:
|
81 |
+
self.shortcut_layer = Sequential(
|
82 |
+
Conv2d(in_channel, depth, (1, 1), stride, bias=False),
|
83 |
+
BatchNorm2d(depth)
|
84 |
+
)
|
85 |
+
self.res_layer = Sequential(
|
86 |
+
BatchNorm2d(in_channel),
|
87 |
+
Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False), PReLU(depth),
|
88 |
+
Conv2d(depth, depth, (3, 3), stride, 1, bias=False), BatchNorm2d(depth)
|
89 |
+
)
|
90 |
+
|
91 |
+
def forward(self, x):
|
92 |
+
shortcut = self.shortcut_layer(x)
|
93 |
+
res = self.res_layer(x)
|
94 |
+
return res + shortcut
|
95 |
+
|
96 |
+
|
97 |
+
class bottleneck_IR_SE(Module):
|
98 |
+
def __init__(self, in_channel, depth, stride):
|
99 |
+
super(bottleneck_IR_SE, self).__init__()
|
100 |
+
if in_channel == depth:
|
101 |
+
self.shortcut_layer = MaxPool2d(1, stride)
|
102 |
+
else:
|
103 |
+
self.shortcut_layer = Sequential(
|
104 |
+
Conv2d(in_channel, depth, (1, 1), stride, bias=False),
|
105 |
+
BatchNorm2d(depth)
|
106 |
+
)
|
107 |
+
self.res_layer = Sequential(
|
108 |
+
BatchNorm2d(in_channel),
|
109 |
+
Conv2d(in_channel, depth, (3, 3), (1, 1), 1, bias=False),
|
110 |
+
PReLU(depth),
|
111 |
+
Conv2d(depth, depth, (3, 3), stride, 1, bias=False),
|
112 |
+
BatchNorm2d(depth),
|
113 |
+
SEModule(depth, 16)
|
114 |
+
)
|
115 |
+
|
116 |
+
def forward(self, x):
|
117 |
+
shortcut = self.shortcut_layer(x)
|
118 |
+
res = self.res_layer(x)
|
119 |
+
return res + shortcut
|
interface/pixel2style2pixel/models/encoders/model_irse.py
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.nn import Linear, Conv2d, BatchNorm1d, BatchNorm2d, PReLU, Dropout, Sequential, Module
|
2 |
+
from models.encoders.helpers import get_blocks, Flatten, bottleneck_IR, bottleneck_IR_SE, l2_norm
|
3 |
+
|
4 |
+
"""
|
5 |
+
Modified Backbone implementation from [TreB1eN](https://github.com/TreB1eN/InsightFace_Pytorch)
|
6 |
+
"""
|
7 |
+
|
8 |
+
|
9 |
+
class Backbone(Module):
|
10 |
+
def __init__(self, input_size, num_layers, mode='ir', drop_ratio=0.4, affine=True):
|
11 |
+
super(Backbone, self).__init__()
|
12 |
+
assert input_size in [112, 224], "input_size should be 112 or 224"
|
13 |
+
assert num_layers in [50, 100, 152], "num_layers should be 50, 100 or 152"
|
14 |
+
assert mode in ['ir', 'ir_se'], "mode should be ir or ir_se"
|
15 |
+
blocks = get_blocks(num_layers)
|
16 |
+
if mode == 'ir':
|
17 |
+
unit_module = bottleneck_IR
|
18 |
+
elif mode == 'ir_se':
|
19 |
+
unit_module = bottleneck_IR_SE
|
20 |
+
self.input_layer = Sequential(Conv2d(3, 64, (3, 3), 1, 1, bias=False),
|
21 |
+
BatchNorm2d(64),
|
22 |
+
PReLU(64))
|
23 |
+
if input_size == 112:
|
24 |
+
self.output_layer = Sequential(BatchNorm2d(512),
|
25 |
+
Dropout(drop_ratio),
|
26 |
+
Flatten(),
|
27 |
+
Linear(512 * 7 * 7, 512),
|
28 |
+
BatchNorm1d(512, affine=affine))
|
29 |
+
else:
|
30 |
+
self.output_layer = Sequential(BatchNorm2d(512),
|
31 |
+
Dropout(drop_ratio),
|
32 |
+
Flatten(),
|
33 |
+
Linear(512 * 14 * 14, 512),
|
34 |
+
BatchNorm1d(512, affine=affine))
|
35 |
+
|
36 |
+
modules = []
|
37 |
+
for block in blocks:
|
38 |
+
for bottleneck in block:
|
39 |
+
modules.append(unit_module(bottleneck.in_channel,
|
40 |
+
bottleneck.depth,
|
41 |
+
bottleneck.stride))
|
42 |
+
self.body = Sequential(*modules)
|
43 |
+
|
44 |
+
def forward(self, x):
|
45 |
+
x = self.input_layer(x)
|
46 |
+
x = self.body(x)
|
47 |
+
x = self.output_layer(x)
|
48 |
+
return l2_norm(x)
|
49 |
+
|
50 |
+
|
51 |
+
def IR_50(input_size):
|
52 |
+
"""Constructs a ir-50 model."""
|
53 |
+
model = Backbone(input_size, num_layers=50, mode='ir', drop_ratio=0.4, affine=False)
|
54 |
+
return model
|
55 |
+
|
56 |
+
|
57 |
+
def IR_101(input_size):
|
58 |
+
"""Constructs a ir-101 model."""
|
59 |
+
model = Backbone(input_size, num_layers=100, mode='ir', drop_ratio=0.4, affine=False)
|
60 |
+
return model
|
61 |
+
|
62 |
+
|
63 |
+
def IR_152(input_size):
|
64 |
+
"""Constructs a ir-152 model."""
|
65 |
+
model = Backbone(input_size, num_layers=152, mode='ir', drop_ratio=0.4, affine=False)
|
66 |
+
return model
|
67 |
+
|
68 |
+
|
69 |
+
def IR_SE_50(input_size):
|
70 |
+
"""Constructs a ir_se-50 model."""
|
71 |
+
model = Backbone(input_size, num_layers=50, mode='ir_se', drop_ratio=0.4, affine=False)
|
72 |
+
return model
|
73 |
+
|
74 |
+
|
75 |
+
def IR_SE_101(input_size):
|
76 |
+
"""Constructs a ir_se-101 model."""
|
77 |
+
model = Backbone(input_size, num_layers=100, mode='ir_se', drop_ratio=0.4, affine=False)
|
78 |
+
return model
|
79 |
+
|
80 |
+
|
81 |
+
def IR_SE_152(input_size):
|
82 |
+
"""Constructs a ir_se-152 model."""
|
83 |
+
model = Backbone(input_size, num_layers=152, mode='ir_se', drop_ratio=0.4, affine=False)
|
84 |
+
return model
|
interface/pixel2style2pixel/models/encoders/psp_encoders.py
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
import torch.nn.functional as F
|
4 |
+
from torch import nn
|
5 |
+
from torch.nn import Linear, Conv2d, BatchNorm2d, PReLU, Sequential, Module
|
6 |
+
|
7 |
+
from pixel2style2pixel.models.encoders.helpers import get_blocks, Flatten, bottleneck_IR, bottleneck_IR_SE
|
8 |
+
from pixel2style2pixel.models.stylegan2.model import EqualLinear
|
9 |
+
|
10 |
+
|
11 |
+
class GradualStyleBlock(Module):
|
12 |
+
def __init__(self, in_c, out_c, spatial):
|
13 |
+
super(GradualStyleBlock, self).__init__()
|
14 |
+
self.out_c = out_c
|
15 |
+
self.spatial = spatial
|
16 |
+
num_pools = int(np.log2(spatial))
|
17 |
+
modules = []
|
18 |
+
modules += [Conv2d(in_c, out_c, kernel_size=3, stride=2, padding=1),
|
19 |
+
nn.LeakyReLU()]
|
20 |
+
for i in range(num_pools - 1):
|
21 |
+
modules += [
|
22 |
+
Conv2d(out_c, out_c, kernel_size=3, stride=2, padding=1),
|
23 |
+
nn.LeakyReLU()
|
24 |
+
]
|
25 |
+
self.convs = nn.Sequential(*modules)
|
26 |
+
self.linear = EqualLinear(out_c, out_c, lr_mul=1)
|
27 |
+
|
28 |
+
def forward(self, x):
|
29 |
+
x = self.convs(x)
|
30 |
+
x = x.view(-1, self.out_c)
|
31 |
+
x = self.linear(x)
|
32 |
+
return x
|
33 |
+
|
34 |
+
|
35 |
+
class GradualStyleEncoder(Module):
|
36 |
+
def __init__(self, num_layers, mode='ir', opts=None):
|
37 |
+
super(GradualStyleEncoder, self).__init__()
|
38 |
+
assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
|
39 |
+
assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
|
40 |
+
blocks = get_blocks(num_layers)
|
41 |
+
if mode == 'ir':
|
42 |
+
unit_module = bottleneck_IR
|
43 |
+
elif mode == 'ir_se':
|
44 |
+
unit_module = bottleneck_IR_SE
|
45 |
+
self.input_layer = Sequential(Conv2d(opts.input_nc, 64, (3, 3), 1, 1, bias=False),
|
46 |
+
BatchNorm2d(64),
|
47 |
+
PReLU(64))
|
48 |
+
modules = []
|
49 |
+
for block in blocks:
|
50 |
+
for bottleneck in block:
|
51 |
+
modules.append(unit_module(bottleneck.in_channel,
|
52 |
+
bottleneck.depth,
|
53 |
+
bottleneck.stride))
|
54 |
+
self.body = Sequential(*modules)
|
55 |
+
|
56 |
+
self.styles = nn.ModuleList()
|
57 |
+
self.style_count = opts.n_styles
|
58 |
+
self.coarse_ind = 3
|
59 |
+
self.middle_ind = 7
|
60 |
+
for i in range(self.style_count):
|
61 |
+
if i < self.coarse_ind:
|
62 |
+
style = GradualStyleBlock(512, 512, 16)
|
63 |
+
elif i < self.middle_ind:
|
64 |
+
style = GradualStyleBlock(512, 512, 32)
|
65 |
+
else:
|
66 |
+
style = GradualStyleBlock(512, 512, 64)
|
67 |
+
self.styles.append(style)
|
68 |
+
self.latlayer1 = nn.Conv2d(256, 512, kernel_size=1, stride=1, padding=0)
|
69 |
+
self.latlayer2 = nn.Conv2d(128, 512, kernel_size=1, stride=1, padding=0)
|
70 |
+
|
71 |
+
def _upsample_add(self, x, y):
|
72 |
+
'''Upsample and add two feature maps.
|
73 |
+
Args:
|
74 |
+
x: (Variable) top feature map to be upsampled.
|
75 |
+
y: (Variable) lateral feature map.
|
76 |
+
Returns:
|
77 |
+
(Variable) added feature map.
|
78 |
+
Note in PyTorch, when input size is odd, the upsampled feature map
|
79 |
+
with `F.upsample(..., scale_factor=2, mode='nearest')`
|
80 |
+
maybe not equal to the lateral feature map size.
|
81 |
+
e.g.
|
82 |
+
original input size: [N,_,15,15] ->
|
83 |
+
conv2d feature map size: [N,_,8,8] ->
|
84 |
+
upsampled feature map size: [N,_,16,16]
|
85 |
+
So we choose bilinear upsample which supports arbitrary output sizes.
|
86 |
+
'''
|
87 |
+
_, _, H, W = y.size()
|
88 |
+
return F.interpolate(x, size=(H, W), mode='bilinear', align_corners=True) + y
|
89 |
+
|
90 |
+
def forward(self, x):
|
91 |
+
x = self.input_layer(x)
|
92 |
+
|
93 |
+
latents = []
|
94 |
+
modulelist = list(self.body._modules.values())
|
95 |
+
for i, l in enumerate(modulelist):
|
96 |
+
x = l(x)
|
97 |
+
if i == 6:
|
98 |
+
c1 = x
|
99 |
+
elif i == 20:
|
100 |
+
c2 = x
|
101 |
+
elif i == 23:
|
102 |
+
c3 = x
|
103 |
+
|
104 |
+
for j in range(self.coarse_ind):
|
105 |
+
latents.append(self.styles[j](c3))
|
106 |
+
|
107 |
+
p2 = self._upsample_add(c3, self.latlayer1(c2))
|
108 |
+
for j in range(self.coarse_ind, self.middle_ind):
|
109 |
+
latents.append(self.styles[j](p2))
|
110 |
+
|
111 |
+
p1 = self._upsample_add(p2, self.latlayer2(c1))
|
112 |
+
for j in range(self.middle_ind, self.style_count):
|
113 |
+
latents.append(self.styles[j](p1))
|
114 |
+
|
115 |
+
out = torch.stack(latents, dim=1)
|
116 |
+
return out
|
117 |
+
|
118 |
+
|
119 |
+
class BackboneEncoderUsingLastLayerIntoW(Module):
|
120 |
+
def __init__(self, num_layers, mode='ir', opts=None):
|
121 |
+
super(BackboneEncoderUsingLastLayerIntoW, self).__init__()
|
122 |
+
print('Using BackboneEncoderUsingLastLayerIntoW')
|
123 |
+
assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
|
124 |
+
assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
|
125 |
+
blocks = get_blocks(num_layers)
|
126 |
+
if mode == 'ir':
|
127 |
+
unit_module = bottleneck_IR
|
128 |
+
elif mode == 'ir_se':
|
129 |
+
unit_module = bottleneck_IR_SE
|
130 |
+
self.input_layer = Sequential(Conv2d(opts.input_nc, 64, (3, 3), 1, 1, bias=False),
|
131 |
+
BatchNorm2d(64),
|
132 |
+
PReLU(64))
|
133 |
+
self.output_pool = torch.nn.AdaptiveAvgPool2d((1, 1))
|
134 |
+
self.linear = EqualLinear(512, 512, lr_mul=1)
|
135 |
+
modules = []
|
136 |
+
for block in blocks:
|
137 |
+
for bottleneck in block:
|
138 |
+
modules.append(unit_module(bottleneck.in_channel,
|
139 |
+
bottleneck.depth,
|
140 |
+
bottleneck.stride))
|
141 |
+
self.body = Sequential(*modules)
|
142 |
+
|
143 |
+
def forward(self, x):
|
144 |
+
x = self.input_layer(x)
|
145 |
+
x = self.body(x)
|
146 |
+
x = self.output_pool(x)
|
147 |
+
x = x.view(-1, 512)
|
148 |
+
x = self.linear(x)
|
149 |
+
return x
|
150 |
+
|
151 |
+
|
152 |
+
class BackboneEncoderUsingLastLayerIntoWPlus(Module):
|
153 |
+
def __init__(self, num_layers, mode='ir', opts=None):
|
154 |
+
super(BackboneEncoderUsingLastLayerIntoWPlus, self).__init__()
|
155 |
+
print('Using BackboneEncoderUsingLastLayerIntoWPlus')
|
156 |
+
assert num_layers in [50, 100, 152], 'num_layers should be 50,100, or 152'
|
157 |
+
assert mode in ['ir', 'ir_se'], 'mode should be ir or ir_se'
|
158 |
+
blocks = get_blocks(num_layers)
|
159 |
+
if mode == 'ir':
|
160 |
+
unit_module = bottleneck_IR
|
161 |
+
elif mode == 'ir_se':
|
162 |
+
unit_module = bottleneck_IR_SE
|
163 |
+
self.n_styles = opts.n_styles
|
164 |
+
self.input_layer = Sequential(Conv2d(opts.input_nc, 64, (3, 3), 1, 1, bias=False),
|
165 |
+
BatchNorm2d(64),
|
166 |
+
PReLU(64))
|
167 |
+
self.output_layer_2 = Sequential(BatchNorm2d(512),
|
168 |
+
torch.nn.AdaptiveAvgPool2d((7, 7)),
|
169 |
+
Flatten(),
|
170 |
+
Linear(512 * 7 * 7, 512))
|
171 |
+
self.linear = EqualLinear(512, 512 * self.n_styles, lr_mul=1)
|
172 |
+
modules = []
|
173 |
+
for block in blocks:
|
174 |
+
for bottleneck in block:
|
175 |
+
modules.append(unit_module(bottleneck.in_channel,
|
176 |
+
bottleneck.depth,
|
177 |
+
bottleneck.stride))
|
178 |
+
self.body = Sequential(*modules)
|
179 |
+
|
180 |
+
def forward(self, x):
|
181 |
+
x = self.input_layer(x)
|
182 |
+
x = self.body(x)
|
183 |
+
x = self.output_layer_2(x)
|
184 |
+
x = self.linear(x)
|
185 |
+
x = x.view(-1, self.n_styles, 512)
|
186 |
+
return x
|