Spaces:
Running
on
Zero
Running
on
Zero
adamelliotfields
commited on
Commit
•
05246f1
1
Parent(s):
b074f57
Real-ESRGAN (#23)
Browse files- Add scale (d3b61d3b0809e6a6c5740d5bbb63287c901637cb)
- Add upscaler (602cfda55e38ab599208619159bfb8370fc5db15)
- Remove 8x (fe9c9ec4c0c135b811dfd91209372e23ff762961)
- app.py +37 -29
- cli.py +2 -0
- lib/__init__.py +1 -0
- lib/inference.py +9 -4
- lib/loader.py +17 -4
- lib/upscaler.py +317 -0
app.py
CHANGED
@@ -38,7 +38,7 @@ def handle_generate(*args):
|
|
38 |
if prompt is None or prompt.strip() == "":
|
39 |
raise gr.Error("You must enter a prompt")
|
40 |
try:
|
41 |
-
images = generate(*args,
|
42 |
except RuntimeError:
|
43 |
raise gr.Error("RuntimeError: Please try again")
|
44 |
return images
|
@@ -90,32 +90,63 @@ with gr.Blocks(
|
|
90 |
)
|
91 |
|
92 |
model = gr.Dropdown(
|
93 |
-
|
94 |
filterable=False,
|
|
|
95 |
label="Model",
|
96 |
-
choices=cfg.MODELS,
|
97 |
)
|
98 |
|
99 |
with gr.Row():
|
100 |
style = gr.Dropdown(
|
101 |
value=cfg.STYLE,
|
102 |
label="Style",
|
|
|
103 |
choices=[("None", None)]
|
104 |
+ [(style["name"], style["id"]) for style in styles],
|
105 |
)
|
106 |
scheduler = gr.Dropdown(
|
|
|
107 |
value=cfg.SCHEDULER,
|
108 |
elem_id="scheduler",
|
109 |
label="Scheduler",
|
110 |
filterable=False,
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
min_width=200,
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
)
|
114 |
|
115 |
with gr.Row():
|
116 |
guidance_scale = gr.Slider(
|
117 |
value=cfg.GUIDANCE_SCALE,
|
118 |
label="Guidance Scale",
|
|
|
119 |
minimum=1.0,
|
120 |
maximum=15.0,
|
121 |
step=0.1,
|
@@ -134,40 +165,16 @@ with gr.Blocks(
|
|
134 |
maximum=(2**64) - 1,
|
135 |
)
|
136 |
|
137 |
-
with gr.Row():
|
138 |
-
width = gr.Slider(
|
139 |
-
value=cfg.WIDTH,
|
140 |
-
label="Width",
|
141 |
-
minimum=320,
|
142 |
-
maximum=768,
|
143 |
-
step=32,
|
144 |
-
)
|
145 |
-
height = gr.Slider(
|
146 |
-
value=cfg.HEIGHT,
|
147 |
-
label="Height",
|
148 |
-
minimum=320,
|
149 |
-
maximum=768,
|
150 |
-
step=32,
|
151 |
-
)
|
152 |
-
num_images = gr.Dropdown(
|
153 |
-
choices=list(range(1, 5)),
|
154 |
-
value=cfg.NUM_IMAGES,
|
155 |
-
filterable=False,
|
156 |
-
label="Images",
|
157 |
-
)
|
158 |
-
|
159 |
with gr.Row():
|
160 |
use_karras = gr.Checkbox(
|
161 |
elem_classes=["checkbox"],
|
162 |
label="Karras σ",
|
163 |
value=True,
|
164 |
-
scale=1,
|
165 |
)
|
166 |
increment_seed = gr.Checkbox(
|
167 |
elem_classes=["checkbox"],
|
168 |
label="Autoincrement",
|
169 |
value=True,
|
170 |
-
scale=1,
|
171 |
)
|
172 |
|
173 |
with gr.TabItem("🛠️ Advanced"):
|
@@ -226,7 +233,7 @@ with gr.Blocks(
|
|
226 |
columns=2,
|
227 |
)
|
228 |
prompt = gr.Textbox(
|
229 |
-
placeholder="corgi,
|
230 |
show_label=False,
|
231 |
label="Prompt",
|
232 |
value=None,
|
@@ -294,6 +301,7 @@ with gr.Blocks(
|
|
294 |
increment_seed,
|
295 |
deepcache_interval,
|
296 |
tome_ratio,
|
|
|
297 |
],
|
298 |
)
|
299 |
|
|
|
38 |
if prompt is None or prompt.strip() == "":
|
39 |
raise gr.Error("You must enter a prompt")
|
40 |
try:
|
41 |
+
images = generate(*args, Info=gr.Info, Error=gr.Error)
|
42 |
except RuntimeError:
|
43 |
raise gr.Error("RuntimeError: Please try again")
|
44 |
return images
|
|
|
90 |
)
|
91 |
|
92 |
model = gr.Dropdown(
|
93 |
+
choices=cfg.MODELS,
|
94 |
filterable=False,
|
95 |
+
value=cfg.MODEL,
|
96 |
label="Model",
|
|
|
97 |
)
|
98 |
|
99 |
with gr.Row():
|
100 |
style = gr.Dropdown(
|
101 |
value=cfg.STYLE,
|
102 |
label="Style",
|
103 |
+
min_width=200,
|
104 |
choices=[("None", None)]
|
105 |
+ [(style["name"], style["id"]) for style in styles],
|
106 |
)
|
107 |
scheduler = gr.Dropdown(
|
108 |
+
choices=cfg.SCHEDULERS,
|
109 |
value=cfg.SCHEDULER,
|
110 |
elem_id="scheduler",
|
111 |
label="Scheduler",
|
112 |
filterable=False,
|
113 |
+
)
|
114 |
+
|
115 |
+
with gr.Row():
|
116 |
+
width = gr.Slider(
|
117 |
+
value=cfg.WIDTH,
|
118 |
+
label="Width",
|
119 |
min_width=200,
|
120 |
+
minimum=320,
|
121 |
+
maximum=768,
|
122 |
+
step=32,
|
123 |
+
)
|
124 |
+
height = gr.Slider(
|
125 |
+
value=cfg.HEIGHT,
|
126 |
+
label="Height",
|
127 |
+
minimum=320,
|
128 |
+
maximum=768,
|
129 |
+
step=32,
|
130 |
+
)
|
131 |
+
num_images = gr.Dropdown(
|
132 |
+
choices=list(range(1, 5)),
|
133 |
+
value=cfg.NUM_IMAGES,
|
134 |
+
filterable=False,
|
135 |
+
label="Images",
|
136 |
+
)
|
137 |
+
scale = gr.Dropdown(
|
138 |
+
choices=[("1x", 1), ("2x", 2), ("4x", 4)],
|
139 |
+
filterable=False,
|
140 |
+
label="Scale",
|
141 |
+
min_width=200,
|
142 |
+
value=1,
|
143 |
)
|
144 |
|
145 |
with gr.Row():
|
146 |
guidance_scale = gr.Slider(
|
147 |
value=cfg.GUIDANCE_SCALE,
|
148 |
label="Guidance Scale",
|
149 |
+
min_width=200,
|
150 |
minimum=1.0,
|
151 |
maximum=15.0,
|
152 |
step=0.1,
|
|
|
165 |
maximum=(2**64) - 1,
|
166 |
)
|
167 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
168 |
with gr.Row():
|
169 |
use_karras = gr.Checkbox(
|
170 |
elem_classes=["checkbox"],
|
171 |
label="Karras σ",
|
172 |
value=True,
|
|
|
173 |
)
|
174 |
increment_seed = gr.Checkbox(
|
175 |
elem_classes=["checkbox"],
|
176 |
label="Autoincrement",
|
177 |
value=True,
|
|
|
178 |
)
|
179 |
|
180 |
with gr.TabItem("🛠️ Advanced"):
|
|
|
233 |
columns=2,
|
234 |
)
|
235 |
prompt = gr.Textbox(
|
236 |
+
placeholder="corgi, beach, 8k",
|
237 |
show_label=False,
|
238 |
label="Prompt",
|
239 |
value=None,
|
|
|
301 |
increment_seed,
|
302 |
deepcache_interval,
|
303 |
tome_ratio,
|
304 |
+
scale,
|
305 |
],
|
306 |
)
|
307 |
|
cli.py
CHANGED
@@ -24,6 +24,7 @@ def main():
|
|
24 |
parser.add_argument("-h", "--height", type=int, metavar="INT", default=cfg.HEIGHT)
|
25 |
parser.add_argument("-m", "--model", type=str, metavar="STR", default=cfg.MODEL)
|
26 |
parser.add_argument("-d", "--deepcache", type=int, metavar="INT", default=cfg.DEEPCACHE_INTERVAL)
|
|
|
27 |
parser.add_argument("--style", type=str, metavar="STR", default=cfg.STYLE)
|
28 |
parser.add_argument("--scheduler", type=str, metavar="STR", default=cfg.SCHEDULER)
|
29 |
parser.add_argument("--guidance", type=float, metavar="FLOAT", default=cfg.GUIDANCE_SCALE)
|
@@ -56,6 +57,7 @@ def main():
|
|
56 |
args.no_increment,
|
57 |
args.deepcache,
|
58 |
args.tome,
|
|
|
59 |
)
|
60 |
save_images(images, args.filename)
|
61 |
|
|
|
24 |
parser.add_argument("-h", "--height", type=int, metavar="INT", default=cfg.HEIGHT)
|
25 |
parser.add_argument("-m", "--model", type=str, metavar="STR", default=cfg.MODEL)
|
26 |
parser.add_argument("-d", "--deepcache", type=int, metavar="INT", default=cfg.DEEPCACHE_INTERVAL)
|
27 |
+
parser.add_argument("--scale", type=int, metavar="INT", choices=[1, 2, 4], default=1)
|
28 |
parser.add_argument("--style", type=str, metavar="STR", default=cfg.STYLE)
|
29 |
parser.add_argument("--scheduler", type=str, metavar="STR", default=cfg.SCHEDULER)
|
30 |
parser.add_argument("--guidance", type=float, metavar="FLOAT", default=cfg.GUIDANCE_SCALE)
|
|
|
57 |
args.no_increment,
|
58 |
args.deepcache,
|
59 |
args.tome,
|
60 |
+
args.scale,
|
61 |
)
|
62 |
save_images(images, args.filename)
|
63 |
|
lib/__init__.py
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
from .inference import generate
|
2 |
from .loader import Loader
|
|
|
|
1 |
from .inference import generate
|
2 |
from .loader import Loader
|
3 |
+
from .upscaler import RealESRGAN
|
lib/inference.py
CHANGED
@@ -91,7 +91,8 @@ def generate(
|
|
91 |
increment_seed=True,
|
92 |
deepcache_interval=1,
|
93 |
tome_ratio=0,
|
94 |
-
|
|
|
95 |
Error=Exception,
|
96 |
):
|
97 |
if not torch.cuda.is_available():
|
@@ -118,12 +119,13 @@ def generate(
|
|
118 |
with torch.inference_mode():
|
119 |
start = time.perf_counter()
|
120 |
loader = Loader()
|
121 |
-
pipe = loader.load(
|
122 |
model,
|
123 |
scheduler,
|
124 |
karras,
|
125 |
taesd,
|
126 |
deepcache_interval,
|
|
|
127 |
DTYPE,
|
128 |
DEVICE,
|
129 |
)
|
@@ -167,6 +169,7 @@ def generate(
|
|
167 |
with token_merging(pipe, tome_ratio=tome_ratio):
|
168 |
try:
|
169 |
image = pipe(
|
|
|
170 |
num_inference_steps=inference_steps,
|
171 |
negative_prompt_embeds=neg_embeds,
|
172 |
guidance_scale=guidance_scale,
|
@@ -175,6 +178,8 @@ def generate(
|
|
175 |
height=height,
|
176 |
width=width,
|
177 |
).images[0]
|
|
|
|
|
178 |
images.append((image, str(current_seed)))
|
179 |
finally:
|
180 |
if not ZERO_GPU:
|
@@ -188,6 +193,6 @@ def generate(
|
|
188 |
loader.pipe = None
|
189 |
|
190 |
diff = time.perf_counter() - start
|
191 |
-
if
|
192 |
-
|
193 |
return images
|
|
|
91 |
increment_seed=True,
|
92 |
deepcache_interval=1,
|
93 |
tome_ratio=0,
|
94 |
+
scale=1,
|
95 |
+
Info: Callable[[str], None] = None,
|
96 |
Error=Exception,
|
97 |
):
|
98 |
if not torch.cuda.is_available():
|
|
|
119 |
with torch.inference_mode():
|
120 |
start = time.perf_counter()
|
121 |
loader = Loader()
|
122 |
+
pipe, upscaler = loader.load(
|
123 |
model,
|
124 |
scheduler,
|
125 |
karras,
|
126 |
taesd,
|
127 |
deepcache_interval,
|
128 |
+
scale,
|
129 |
DTYPE,
|
130 |
DEVICE,
|
131 |
)
|
|
|
169 |
with token_merging(pipe, tome_ratio=tome_ratio):
|
170 |
try:
|
171 |
image = pipe(
|
172 |
+
output_type="np" if scale > 1 else "pil",
|
173 |
num_inference_steps=inference_steps,
|
174 |
negative_prompt_embeds=neg_embeds,
|
175 |
guidance_scale=guidance_scale,
|
|
|
178 |
height=height,
|
179 |
width=width,
|
180 |
).images[0]
|
181 |
+
if scale > 1:
|
182 |
+
image = upscaler.predict(image)
|
183 |
images.append((image, str(current_seed)))
|
184 |
finally:
|
185 |
if not ZERO_GPU:
|
|
|
193 |
loader.pipe = None
|
194 |
|
195 |
diff = time.perf_counter() - start
|
196 |
+
if Info:
|
197 |
+
Info(f"Generated {len(images)} image{'s' if len(images) > 1 else ''} in {diff:.2f}s")
|
198 |
return images
|
lib/loader.py
CHANGED
@@ -15,6 +15,8 @@ from diffusers import (
|
|
15 |
from diffusers.models import AutoencoderKL, AutoencoderTiny
|
16 |
from torch._dynamo import OptimizedModule
|
17 |
|
|
|
|
|
18 |
ZERO_GPU = (
|
19 |
os.environ.get("SPACES_ZERO_GPU", "").lower() == "true"
|
20 |
or os.environ.get("SPACES_ZERO_GPU", "") == "1"
|
@@ -38,8 +40,17 @@ class Loader:
|
|
38 |
if cls._instance is None:
|
39 |
cls._instance = super(Loader, cls).__new__(cls)
|
40 |
cls._instance.pipe = None
|
|
|
41 |
return cls._instance
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
def _load_deepcache(self, interval=1):
|
44 |
has_deepcache = hasattr(self.pipe, "deepcache")
|
45 |
|
@@ -82,7 +93,7 @@ class Loader:
|
|
82 |
model=model,
|
83 |
)
|
84 |
|
85 |
-
def load(self, model, scheduler, karras, taesd, deepcache_interval, dtype, device):
|
86 |
model_lower = model.lower()
|
87 |
|
88 |
schedulers = {
|
@@ -145,7 +156,9 @@ class Loader:
|
|
145 |
self.pipe.scheduler = schedulers[scheduler](**scheduler_kwargs)
|
146 |
self._load_vae(model_lower, taesd, variant)
|
147 |
self._load_deepcache(interval=deepcache_interval)
|
148 |
-
|
|
|
|
|
149 |
else:
|
150 |
print(f"Unloading {model_name.lower()}...")
|
151 |
self.pipe = None
|
@@ -161,6 +174,6 @@ class Loader:
|
|
161 |
)
|
162 |
self._load_vae(model_lower, taesd, variant)
|
163 |
self._load_deepcache(interval=deepcache_interval)
|
164 |
-
|
165 |
torch.cuda.empty_cache()
|
166 |
-
return self.pipe
|
|
|
15 |
from diffusers.models import AutoencoderKL, AutoencoderTiny
|
16 |
from torch._dynamo import OptimizedModule
|
17 |
|
18 |
+
from .upscaler import RealESRGAN
|
19 |
+
|
20 |
ZERO_GPU = (
|
21 |
os.environ.get("SPACES_ZERO_GPU", "").lower() == "true"
|
22 |
or os.environ.get("SPACES_ZERO_GPU", "") == "1"
|
|
|
40 |
if cls._instance is None:
|
41 |
cls._instance = super(Loader, cls).__new__(cls)
|
42 |
cls._instance.pipe = None
|
43 |
+
cls._instance.upscaler = None
|
44 |
return cls._instance
|
45 |
|
46 |
+
def _load_upscaler(self, device=None, scale=4):
|
47 |
+
same_scale = self.upscaler is not None and self.upscaler.scale == scale
|
48 |
+
if scale == 1:
|
49 |
+
self.upscaler = None
|
50 |
+
if scale > 1 and not same_scale:
|
51 |
+
self.upscaler = RealESRGAN(device=device, scale=scale)
|
52 |
+
self.upscaler.load_weights()
|
53 |
+
|
54 |
def _load_deepcache(self, interval=1):
|
55 |
has_deepcache = hasattr(self.pipe, "deepcache")
|
56 |
|
|
|
93 |
model=model,
|
94 |
)
|
95 |
|
96 |
+
def load(self, model, scheduler, karras, taesd, deepcache_interval, scale, dtype, device):
|
97 |
model_lower = model.lower()
|
98 |
|
99 |
schedulers = {
|
|
|
156 |
self.pipe.scheduler = schedulers[scheduler](**scheduler_kwargs)
|
157 |
self._load_vae(model_lower, taesd, variant)
|
158 |
self._load_deepcache(interval=deepcache_interval)
|
159 |
+
self._load_upscaler(device=device, scale=scale)
|
160 |
+
torch.cuda.empty_cache()
|
161 |
+
return self.pipe, self.upscaler
|
162 |
else:
|
163 |
print(f"Unloading {model_name.lower()}...")
|
164 |
self.pipe = None
|
|
|
174 |
)
|
175 |
self._load_vae(model_lower, taesd, variant)
|
176 |
self._load_deepcache(interval=deepcache_interval)
|
177 |
+
self._load_upscaler(device=device, scale=scale)
|
178 |
torch.cuda.empty_cache()
|
179 |
+
return self.pipe, self.upscaler
|
lib/upscaler.py
ADDED
@@ -0,0 +1,317 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# BSD 3-Clause License
|
2 |
+
#
|
3 |
+
# Copyright (c) 2021, Sberbank AI
|
4 |
+
# All rights reserved.
|
5 |
+
#
|
6 |
+
# Redistribution and use in source and binary forms, with or without
|
7 |
+
# modification, are permitted provided that the following conditions are met:
|
8 |
+
#
|
9 |
+
# 1. Redistributions of source code must retain the above copyright notice, this
|
10 |
+
# list of conditions and the following disclaimer.
|
11 |
+
#
|
12 |
+
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
13 |
+
# this list of conditions and the following disclaimer in the documentation
|
14 |
+
# and/or other materials provided with the distribution.
|
15 |
+
#
|
16 |
+
# 3. Neither the name of the copyright holder nor the names of its
|
17 |
+
# contributors may be used to endorse or promote products derived from
|
18 |
+
# this software without specific prior written permission.
|
19 |
+
#
|
20 |
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
21 |
+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
22 |
+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
23 |
+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
24 |
+
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
25 |
+
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
26 |
+
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
27 |
+
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
28 |
+
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
29 |
+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
30 |
+
import einops
|
31 |
+
import numpy as np
|
32 |
+
import torch
|
33 |
+
from huggingface_hub import hf_hub_download
|
34 |
+
from PIL import Image
|
35 |
+
from torch import nn as nn
|
36 |
+
from torch.nn import functional as F
|
37 |
+
from torch.nn import init as init
|
38 |
+
from torch.nn.modules.batchnorm import _BatchNorm
|
39 |
+
|
40 |
+
# https://huggingface.co/ai-forever/Real-ESRGAN
|
41 |
+
HF_MODELS = {
|
42 |
+
2: {
|
43 |
+
"repo_id": "ai-forever/Real-ESRGAN",
|
44 |
+
"filename": "RealESRGAN_x2.pth",
|
45 |
+
},
|
46 |
+
4: {
|
47 |
+
"repo_id": "ai-forever/Real-ESRGAN",
|
48 |
+
"filename": "RealESRGAN_x4.pth",
|
49 |
+
},
|
50 |
+
# 8: {
|
51 |
+
# "repo_id": "ai-forever/Real-ESRGAN",
|
52 |
+
# "filename": "RealESRGAN_x8.pth",
|
53 |
+
# },
|
54 |
+
}
|
55 |
+
|
56 |
+
|
57 |
+
def pad_reflect(image, pad_size):
|
58 |
+
# fmt: off
|
59 |
+
image_size = image.shape
|
60 |
+
height, width = image_size[:2]
|
61 |
+
new_image = np.zeros([height + pad_size * 2, width + pad_size * 2, image_size[2]]).astype(np.uint8)
|
62 |
+
new_image[pad_size:-pad_size, pad_size:-pad_size, :] = image
|
63 |
+
new_image[0:pad_size, pad_size:-pad_size, :] = np.flip(image[0:pad_size, :, :], axis=0) # top
|
64 |
+
new_image[-pad_size:, pad_size:-pad_size, :] = np.flip(image[-pad_size:, :, :], axis=0) # bottom
|
65 |
+
new_image[:, 0:pad_size, :] = np.flip(new_image[:, pad_size : pad_size * 2, :], axis=1) # left
|
66 |
+
new_image[:, -pad_size:, :] = np.flip(new_image[:, -pad_size * 2 : -pad_size, :], axis=1) # right
|
67 |
+
return new_image
|
68 |
+
# fmt: on
|
69 |
+
|
70 |
+
|
71 |
+
def unpad_image(image, pad_size):
|
72 |
+
return image[pad_size:-pad_size, pad_size:-pad_size, :]
|
73 |
+
|
74 |
+
|
75 |
+
def pad_patch(image_patch, padding_size, channel_last=True):
|
76 |
+
if channel_last:
|
77 |
+
return np.pad(
|
78 |
+
image_patch,
|
79 |
+
((padding_size, padding_size), (padding_size, padding_size), (0, 0)),
|
80 |
+
"edge",
|
81 |
+
)
|
82 |
+
else:
|
83 |
+
return np.pad(
|
84 |
+
image_patch,
|
85 |
+
((0, 0), (padding_size, padding_size), (padding_size, padding_size)),
|
86 |
+
"edge",
|
87 |
+
)
|
88 |
+
|
89 |
+
|
90 |
+
def unpad_patches(image_patches, padding_size):
|
91 |
+
return image_patches[:, padding_size:-padding_size, padding_size:-padding_size, :]
|
92 |
+
|
93 |
+
|
94 |
+
def split_image_into_overlapping_patches(image_array, patch_size, padding_size=2):
|
95 |
+
xmax, ymax, _ = image_array.shape
|
96 |
+
x_remainder = xmax % patch_size
|
97 |
+
y_remainder = ymax % patch_size
|
98 |
+
|
99 |
+
# modulo here is to avoid extending of patch_size instead of 0
|
100 |
+
x_extend = (patch_size - x_remainder) % patch_size
|
101 |
+
y_extend = (patch_size - y_remainder) % patch_size
|
102 |
+
|
103 |
+
# make sure the image is divisible into regular patches
|
104 |
+
extended_image = np.pad(image_array, ((0, x_extend), (0, y_extend), (0, 0)), "edge")
|
105 |
+
|
106 |
+
# add padding around the image to simplify computations
|
107 |
+
padded_image = pad_patch(extended_image, padding_size, channel_last=True)
|
108 |
+
|
109 |
+
patches = []
|
110 |
+
xmax, ymax, _ = padded_image.shape
|
111 |
+
x_lefts = range(padding_size, xmax - padding_size, patch_size)
|
112 |
+
y_tops = range(padding_size, ymax - padding_size, patch_size)
|
113 |
+
|
114 |
+
for x in x_lefts:
|
115 |
+
for y in y_tops:
|
116 |
+
x_left = x - padding_size
|
117 |
+
y_top = y - padding_size
|
118 |
+
x_right = x + patch_size + padding_size
|
119 |
+
y_bottom = y + patch_size + padding_size
|
120 |
+
patch = padded_image[x_left:x_right, y_top:y_bottom, :]
|
121 |
+
patches.append(patch)
|
122 |
+
return np.array(patches), padded_image.shape
|
123 |
+
|
124 |
+
|
125 |
+
def stitch_together(patches, padded_image_shape, target_shape, padding_size=4):
|
126 |
+
xmax, ymax, _ = padded_image_shape
|
127 |
+
patches = unpad_patches(patches, padding_size)
|
128 |
+
patch_size = patches.shape[1]
|
129 |
+
n_patches_per_row = ymax // patch_size
|
130 |
+
complete_image = np.zeros((xmax, ymax, 3))
|
131 |
+
|
132 |
+
row = -1
|
133 |
+
col = 0
|
134 |
+
for i in range(len(patches)):
|
135 |
+
if i % n_patches_per_row == 0:
|
136 |
+
row += 1
|
137 |
+
col = 0
|
138 |
+
complete_image[
|
139 |
+
row * patch_size : (row + 1) * patch_size, col * patch_size : (col + 1) * patch_size, :
|
140 |
+
] = patches[i]
|
141 |
+
col += 1
|
142 |
+
return complete_image[0 : target_shape[0], 0 : target_shape[1], :]
|
143 |
+
|
144 |
+
|
145 |
+
@torch.no_grad()
|
146 |
+
def default_init_weights(module_list, scale=1, bias_fill=0, **kwargs):
|
147 |
+
if not isinstance(module_list, list):
|
148 |
+
module_list = [module_list]
|
149 |
+
for module in module_list:
|
150 |
+
for m in module.modules():
|
151 |
+
if isinstance(m, nn.Conv2d):
|
152 |
+
init.kaiming_normal_(m.weight, **kwargs)
|
153 |
+
m.weight.data *= scale
|
154 |
+
if m.bias is not None:
|
155 |
+
m.bias.data.fill_(bias_fill)
|
156 |
+
elif isinstance(m, nn.Linear):
|
157 |
+
init.kaiming_normal_(m.weight, **kwargs)
|
158 |
+
m.weight.data *= scale
|
159 |
+
if m.bias is not None:
|
160 |
+
m.bias.data.fill_(bias_fill)
|
161 |
+
elif isinstance(m, _BatchNorm):
|
162 |
+
init.constant_(m.weight, 1)
|
163 |
+
if m.bias is not None:
|
164 |
+
m.bias.data.fill_(bias_fill)
|
165 |
+
|
166 |
+
|
167 |
+
def make_layer(basic_block, num_basic_block, **kwarg):
|
168 |
+
layers = []
|
169 |
+
for _ in range(num_basic_block):
|
170 |
+
layers.append(basic_block(**kwarg))
|
171 |
+
return nn.Sequential(*layers)
|
172 |
+
|
173 |
+
|
174 |
+
def pixel_unshuffle(x, scale):
|
175 |
+
_, _, h, w = x.shape
|
176 |
+
assert h % scale == 0 and w % scale == 0, "Height and width must be divisible by scale"
|
177 |
+
return einops.rearrange(
|
178 |
+
x,
|
179 |
+
"b c (h s1) (w s2) -> b (c s1 s2) h w",
|
180 |
+
s1=scale,
|
181 |
+
s2=scale,
|
182 |
+
)
|
183 |
+
|
184 |
+
|
185 |
+
class ResidualDenseBlock(nn.Module):
|
186 |
+
def __init__(self, num_feat=64, num_grow_ch=32):
|
187 |
+
super(ResidualDenseBlock, self).__init__()
|
188 |
+
self.conv1 = nn.Conv2d(num_feat, num_grow_ch, 3, 1, 1)
|
189 |
+
self.conv2 = nn.Conv2d(num_feat + num_grow_ch, num_grow_ch, 3, 1, 1)
|
190 |
+
self.conv3 = nn.Conv2d(num_feat + 2 * num_grow_ch, num_grow_ch, 3, 1, 1)
|
191 |
+
self.conv4 = nn.Conv2d(num_feat + 3 * num_grow_ch, num_grow_ch, 3, 1, 1)
|
192 |
+
self.conv5 = nn.Conv2d(num_feat + 4 * num_grow_ch, num_feat, 3, 1, 1)
|
193 |
+
self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
|
194 |
+
default_init_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1)
|
195 |
+
|
196 |
+
def forward(self, x):
|
197 |
+
x1 = self.lrelu(self.conv1(x))
|
198 |
+
x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1)))
|
199 |
+
x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1)))
|
200 |
+
x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1)))
|
201 |
+
x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1))
|
202 |
+
return x5 * 0.2 + x # scale the residual by a factor of 0.2
|
203 |
+
|
204 |
+
|
205 |
+
class RRDB(nn.Module):
|
206 |
+
def __init__(self, num_feat, num_grow_ch=32):
|
207 |
+
super(RRDB, self).__init__()
|
208 |
+
self.rdb1 = ResidualDenseBlock(num_feat, num_grow_ch)
|
209 |
+
self.rdb2 = ResidualDenseBlock(num_feat, num_grow_ch)
|
210 |
+
self.rdb3 = ResidualDenseBlock(num_feat, num_grow_ch)
|
211 |
+
|
212 |
+
def forward(self, x):
|
213 |
+
out = self.rdb1(x)
|
214 |
+
out = self.rdb2(out)
|
215 |
+
out = self.rdb3(out)
|
216 |
+
return out * 0.2 + x # scale the residual by a factor of 0.2
|
217 |
+
|
218 |
+
|
219 |
+
class RRDBNet(nn.Module):
|
220 |
+
def __init__(self, num_in_ch, num_out_ch, scale=4, num_feat=64, num_block=23, num_grow_ch=32):
|
221 |
+
super(RRDBNet, self).__init__()
|
222 |
+
self.scale = scale
|
223 |
+
if scale == 2:
|
224 |
+
num_in_ch = num_in_ch * 4
|
225 |
+
elif scale == 1:
|
226 |
+
num_in_ch = num_in_ch * 16
|
227 |
+
self.conv_first = nn.Conv2d(num_in_ch, num_feat, 3, 1, 1)
|
228 |
+
self.body = make_layer(RRDB, num_block, num_feat=num_feat, num_grow_ch=num_grow_ch)
|
229 |
+
self.conv_body = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
|
230 |
+
self.conv_up1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
|
231 |
+
self.conv_up2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
|
232 |
+
if scale == 8:
|
233 |
+
self.conv_up3 = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
|
234 |
+
self.conv_hr = nn.Conv2d(num_feat, num_feat, 3, 1, 1)
|
235 |
+
self.conv_last = nn.Conv2d(num_feat, num_out_ch, 3, 1, 1)
|
236 |
+
self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True)
|
237 |
+
|
238 |
+
def forward(self, x):
|
239 |
+
if self.scale == 2:
|
240 |
+
feat = pixel_unshuffle(x, scale=2)
|
241 |
+
elif self.scale == 1:
|
242 |
+
feat = pixel_unshuffle(x, scale=4)
|
243 |
+
else:
|
244 |
+
feat = x
|
245 |
+
feat = self.conv_first(feat)
|
246 |
+
body_feat = self.conv_body(self.body(feat))
|
247 |
+
feat = feat + body_feat
|
248 |
+
feat = self.lrelu(self.conv_up1(F.interpolate(feat, scale_factor=2, mode="nearest")))
|
249 |
+
feat = self.lrelu(self.conv_up2(F.interpolate(feat, scale_factor=2, mode="nearest")))
|
250 |
+
if self.scale == 8:
|
251 |
+
feat = self.lrelu(self.conv_up3(F.interpolate(feat, scale_factor=2, mode="nearest")))
|
252 |
+
out = self.conv_last(self.lrelu(self.conv_hr(feat)))
|
253 |
+
return out
|
254 |
+
|
255 |
+
|
256 |
+
class RealESRGAN:
|
257 |
+
def __init__(self, device, scale=4):
|
258 |
+
self.device = device
|
259 |
+
self.scale = scale
|
260 |
+
self.model = RRDBNet(
|
261 |
+
num_in_ch=3,
|
262 |
+
num_out_ch=3,
|
263 |
+
num_feat=64,
|
264 |
+
num_block=23,
|
265 |
+
num_grow_ch=32,
|
266 |
+
scale=scale,
|
267 |
+
)
|
268 |
+
|
269 |
+
def load_weights(self):
|
270 |
+
assert self.scale in [2, 4], "You can download models only with scales: 2, 4"
|
271 |
+
config = HF_MODELS[self.scale]
|
272 |
+
cache_path = hf_hub_download(config["repo_id"], filename=config["filename"])
|
273 |
+
loadnet = torch.load(cache_path)
|
274 |
+
if "params" in loadnet:
|
275 |
+
self.model.load_state_dict(loadnet["params"], strict=True)
|
276 |
+
elif "params_ema" in loadnet:
|
277 |
+
self.model.load_state_dict(loadnet["params_ema"], strict=True)
|
278 |
+
else:
|
279 |
+
self.model.load_state_dict(loadnet, strict=True)
|
280 |
+
self.model.eval().to(device=self.device)
|
281 |
+
|
282 |
+
@torch.cuda.amp.autocast()
|
283 |
+
def predict(self, lr_image, batch_size=4, patches_size=192, padding=24, pad_size=15):
|
284 |
+
scale = self.scale
|
285 |
+
if not isinstance(lr_image, np.ndarray):
|
286 |
+
lr_image = np.array(lr_image)
|
287 |
+
if lr_image.min() < 0.0:
|
288 |
+
lr_image = (lr_image + 1.0) / 2.0
|
289 |
+
if lr_image.max() <= 1.0:
|
290 |
+
lr_image = lr_image * 255.0
|
291 |
+
lr_image = pad_reflect(lr_image, pad_size)
|
292 |
+
patches, p_shape = split_image_into_overlapping_patches(
|
293 |
+
lr_image,
|
294 |
+
patch_size=patches_size,
|
295 |
+
padding_size=padding,
|
296 |
+
)
|
297 |
+
patches = torch.Tensor(patches / 255.0)
|
298 |
+
image = einops.rearrange(patches, "b h w c -> b c h w").to(device=self.device)
|
299 |
+
|
300 |
+
with torch.inference_mode():
|
301 |
+
res = self.model(image[0:batch_size])
|
302 |
+
for i in range(batch_size, image.shape[0], batch_size):
|
303 |
+
res = torch.cat((res, self.model(image[i : i + batch_size])), 0)
|
304 |
+
|
305 |
+
sr_image = einops.rearrange(res.clamp(0, 1), "b c h w -> b h w c").cpu().numpy()
|
306 |
+
padded_size_scaled = tuple(np.multiply(p_shape[0:2], scale)) + (3,)
|
307 |
+
scaled_image_shape = tuple(np.multiply(lr_image.shape[0:2], scale)) + (3,)
|
308 |
+
sr_image = stitch_together(
|
309 |
+
sr_image,
|
310 |
+
padded_image_shape=padded_size_scaled,
|
311 |
+
target_shape=scaled_image_shape,
|
312 |
+
padding_size=padding * scale,
|
313 |
+
)
|
314 |
+
sr_image = (sr_image * 255).astype(np.uint8)
|
315 |
+
sr_image = unpad_image(sr_image, pad_size * scale)
|
316 |
+
sr_image = Image.fromarray(sr_image)
|
317 |
+
return sr_image
|