Spaces:
Running
on
Zero
Running
on
Zero
adamelliotfields
commited on
Commit
•
069fc81
1
Parent(s):
4c34ed8
Add timer context manager
Browse files- lib/__init__.py +2 -0
- lib/inference.py +9 -9
- lib/loader.py +64 -81
- lib/utils.py +13 -1
lib/__init__.py
CHANGED
@@ -11,6 +11,7 @@ from .utils import (
|
|
11 |
enable_progress_bars,
|
12 |
load_json,
|
13 |
read_file,
|
|
|
14 |
)
|
15 |
|
16 |
__all__ = [
|
@@ -27,4 +28,5 @@ __all__ = [
|
|
27 |
"load_json",
|
28 |
"log_fn",
|
29 |
"read_file",
|
|
|
30 |
]
|
|
|
11 |
enable_progress_bars,
|
12 |
load_json,
|
13 |
read_file,
|
14 |
+
timer,
|
15 |
)
|
16 |
|
17 |
__all__ = [
|
|
|
28 |
"load_json",
|
29 |
"log_fn",
|
30 |
"read_file",
|
31 |
+
"timer",
|
32 |
]
|
lib/inference.py
CHANGED
@@ -124,6 +124,13 @@ def generate(
|
|
124 |
Info=None,
|
125 |
progress=None,
|
126 |
):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
if not torch.cuda.is_available():
|
128 |
raise Error("CUDA not available")
|
129 |
|
@@ -161,13 +168,6 @@ def generate(
|
|
161 |
)
|
162 |
return latents
|
163 |
|
164 |
-
start = time.perf_counter()
|
165 |
-
log = Logger("generate")
|
166 |
-
log.info(f"Generating {num_images} image{'s' if num_images > 1 else ''}")
|
167 |
-
|
168 |
-
if Config.ZERO_GPU and progress is not None:
|
169 |
-
progress((100, 100), desc="ZeroGPU init")
|
170 |
-
|
171 |
loader = Loader()
|
172 |
loader.load(
|
173 |
KIND,
|
@@ -311,8 +311,8 @@ def generate(
|
|
311 |
loader.collect()
|
312 |
gc.collect()
|
313 |
|
314 |
-
|
315 |
-
msg = f"Generating {len(images)} image{'s' if len(images) > 1 else ''}
|
316 |
log.info(msg)
|
317 |
if Info:
|
318 |
Info(msg)
|
|
|
124 |
Info=None,
|
125 |
progress=None,
|
126 |
):
|
127 |
+
start = time.perf_counter()
|
128 |
+
log = Logger("generate")
|
129 |
+
log.info(f"Generating {num_images} image{'s' if num_images > 1 else ''}")
|
130 |
+
|
131 |
+
if Config.ZERO_GPU and progress is not None:
|
132 |
+
progress((100, 100), desc="ZeroGPU init")
|
133 |
+
|
134 |
if not torch.cuda.is_available():
|
135 |
raise Error("CUDA not available")
|
136 |
|
|
|
168 |
)
|
169 |
return latents
|
170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
loader = Loader()
|
172 |
loader.load(
|
173 |
KIND,
|
|
|
311 |
loader.collect()
|
312 |
gc.collect()
|
313 |
|
314 |
+
end = time.perf_counter()
|
315 |
+
msg = f"Generating {len(images)} image{'s' if len(images) > 1 else ''} took {end - start:.2f}s"
|
316 |
log.info(msg)
|
317 |
if Info:
|
318 |
Info(msg)
|
lib/loader.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
import gc
|
2 |
-
import time
|
3 |
from threading import Lock
|
4 |
|
5 |
import torch
|
@@ -10,6 +9,7 @@ from diffusers.models.attention_processor import AttnProcessor2_0, IPAdapterAttn
|
|
10 |
from .config import Config
|
11 |
from .logger import Logger
|
12 |
from .upscaler import RealESRGAN
|
|
|
13 |
|
14 |
|
15 |
class Loader:
|
@@ -61,11 +61,8 @@ class Loader:
|
|
61 |
|
62 |
def _unload_upscaler(self):
|
63 |
if self.upscaler is not None:
|
64 |
-
|
65 |
-
|
66 |
-
self.upscaler.to("cpu")
|
67 |
-
diff = time.perf_counter() - start
|
68 |
-
self.log.info(f"Unloading {self.upscaler.scale}x upscaler done in {diff:.2f}s")
|
69 |
|
70 |
def _unload_deepcache(self):
|
71 |
if self.pipe.deepcache is not None:
|
@@ -73,39 +70,31 @@ class Loader:
|
|
73 |
self.pipe.deepcache.disable()
|
74 |
delattr(self.pipe, "deepcache")
|
75 |
|
76 |
-
# https://github.com/huggingface/diffusers/blob/v0.28.0/src/diffusers/loaders/ip_adapter.py#L300
|
77 |
def _unload_ip_adapter(self):
|
78 |
if self.ip_adapter is not None:
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
self.pipe.
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
else value.__class__()
|
97 |
-
)
|
98 |
-
self.pipe.unet.set_attn_processor(attn_procs)
|
99 |
-
diff = time.perf_counter() - start
|
100 |
-
self.log.info(f"Unloading IP-Adapter done in {diff:.2f}s")
|
101 |
|
102 |
def _unload_pipeline(self):
|
103 |
if self.pipe is not None:
|
104 |
-
|
105 |
-
|
106 |
-
self.pipe.to("cpu")
|
107 |
-
diff = time.perf_counter() - start
|
108 |
-
self.log.info(f"Unloading {self.model} done in {diff:.2f}s")
|
109 |
|
110 |
def _unload(self, kind="", model="", ip_adapter="", deepcache=1, scale=1):
|
111 |
to_unload = []
|
@@ -133,12 +122,9 @@ class Loader:
|
|
133 |
def _load_upscaler(self, scale=1):
|
134 |
if self.upscaler is None and scale > 1:
|
135 |
try:
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
self.upscaler.load_weights()
|
140 |
-
diff = time.perf_counter() - start
|
141 |
-
self.log.info(f"Loading {scale}x upscaler done in {diff:.2f}s")
|
142 |
except Exception as e:
|
143 |
self.log.error(f"Error loading {scale}x upscaler: {e}")
|
144 |
self.upscaler = None
|
@@ -168,15 +154,15 @@ class Loader:
|
|
168 |
|
169 |
def _load_ip_adapter(self, ip_adapter=""):
|
170 |
if not self.ip_adapter and ip_adapter:
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
|
181 |
def _load_pipeline(
|
182 |
self,
|
@@ -188,19 +174,16 @@ class Loader:
|
|
188 |
pipeline = Config.PIPELINES[kind]
|
189 |
if self.pipe is None:
|
190 |
try:
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
self.pipe = pipeline.from_pretrained(model, progress, **kwargs).to("cuda")
|
202 |
-
diff = time.perf_counter() - start
|
203 |
-
self.log.info(f"Loading {model} done in {diff:.2f}s")
|
204 |
except Exception as e:
|
205 |
self.log.error(f"Error loading {model}: {e}")
|
206 |
self.model = None
|
@@ -218,27 +201,27 @@ class Loader:
|
|
218 |
|
219 |
# by default all models use KL
|
220 |
if is_kl and taesd:
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
return
|
227 |
|
228 |
if is_tiny and not taesd:
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
|
243 |
def collect(self):
|
244 |
torch.cuda.empty_cache()
|
@@ -316,7 +299,7 @@ class Loader:
|
|
316 |
# same model, different scheduler
|
317 |
if self.model.lower() == model.lower():
|
318 |
if not same_scheduler:
|
319 |
-
self.log.info(f"
|
320 |
if not same_karras:
|
321 |
self.log.info(f"{'Enabling' if karras else 'Disabling'} Karras sigmas")
|
322 |
if not same_scheduler or not same_karras:
|
|
|
1 |
import gc
|
|
|
2 |
from threading import Lock
|
3 |
|
4 |
import torch
|
|
|
9 |
from .config import Config
|
10 |
from .logger import Logger
|
11 |
from .upscaler import RealESRGAN
|
12 |
+
from .utils import timer
|
13 |
|
14 |
|
15 |
class Loader:
|
|
|
61 |
|
62 |
def _unload_upscaler(self):
|
63 |
if self.upscaler is not None:
|
64 |
+
with timer(f"Unloading {self.upscaler.scale}x upscaler", logger=self.log.info):
|
65 |
+
self.upscaler.to("cpu")
|
|
|
|
|
|
|
66 |
|
67 |
def _unload_deepcache(self):
|
68 |
if self.pipe.deepcache is not None:
|
|
|
70 |
self.pipe.deepcache.disable()
|
71 |
delattr(self.pipe, "deepcache")
|
72 |
|
73 |
+
# Copied from https://github.com/huggingface/diffusers/blob/v0.28.0/src/diffusers/loaders/ip_adapter.py#L300
|
74 |
def _unload_ip_adapter(self):
|
75 |
if self.ip_adapter is not None:
|
76 |
+
with timer("Unloading IP-Adapter", logger=self.log.info):
|
77 |
+
if not isinstance(self.pipe, Config.PIPELINES["img2img"]):
|
78 |
+
self.pipe.image_encoder = None
|
79 |
+
self.pipe.register_to_config(image_encoder=[None, None])
|
80 |
+
self.pipe.feature_extractor = None
|
81 |
+
self.pipe.unet.encoder_hid_proj = None
|
82 |
+
self.pipe.unet.config.encoder_hid_dim_type = None
|
83 |
+
self.pipe.register_to_config(feature_extractor=[None, None])
|
84 |
+
attn_procs = {}
|
85 |
+
for name, value in self.pipe.unet.attn_processors.items():
|
86 |
+
attn_processor_class = AttnProcessor2_0() # raises if not torch 2
|
87 |
+
attn_procs[name] = (
|
88 |
+
attn_processor_class
|
89 |
+
if isinstance(value, IPAdapterAttnProcessor2_0)
|
90 |
+
else value.__class__()
|
91 |
+
)
|
92 |
+
self.pipe.unet.set_attn_processor(attn_procs)
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
def _unload_pipeline(self):
|
95 |
if self.pipe is not None:
|
96 |
+
with timer(f"Unloading {self.model}", logger=self.log.info):
|
97 |
+
self.pipe.to("cpu")
|
|
|
|
|
|
|
98 |
|
99 |
def _unload(self, kind="", model="", ip_adapter="", deepcache=1, scale=1):
|
100 |
to_unload = []
|
|
|
122 |
def _load_upscaler(self, scale=1):
|
123 |
if self.upscaler is None and scale > 1:
|
124 |
try:
|
125 |
+
with timer(f"Loading {scale}x upscaler", logger=self.log.info):
|
126 |
+
self.upscaler = RealESRGAN(scale, device=self.pipe.device)
|
127 |
+
self.upscaler.load_weights()
|
|
|
|
|
|
|
128 |
except Exception as e:
|
129 |
self.log.error(f"Error loading {scale}x upscaler: {e}")
|
130 |
self.upscaler = None
|
|
|
154 |
|
155 |
def _load_ip_adapter(self, ip_adapter=""):
|
156 |
if not self.ip_adapter and ip_adapter:
|
157 |
+
with timer("Loading IP-Adapter", logger=self.log.info):
|
158 |
+
self.pipe.load_ip_adapter(
|
159 |
+
"h94/IP-Adapter",
|
160 |
+
subfolder="models",
|
161 |
+
weight_name=f"ip-adapter-{ip_adapter}_sd15.safetensors",
|
162 |
+
)
|
163 |
+
# 50% works the best
|
164 |
+
self.pipe.set_ip_adapter_scale(0.5)
|
165 |
+
self.ip_adapter = ip_adapter
|
166 |
|
167 |
def _load_pipeline(
|
168 |
self,
|
|
|
174 |
pipeline = Config.PIPELINES[kind]
|
175 |
if self.pipe is None:
|
176 |
try:
|
177 |
+
with timer(f"Loading {model} ({kind})", logger=self.log.info):
|
178 |
+
self.model = model
|
179 |
+
if model.lower() in Config.MODEL_CHECKPOINTS.keys():
|
180 |
+
self.pipe = pipeline.from_single_file(
|
181 |
+
f"https://huggingface.co/{model}/{Config.MODEL_CHECKPOINTS[model.lower()]}",
|
182 |
+
progress,
|
183 |
+
**kwargs,
|
184 |
+
).to("cuda")
|
185 |
+
else:
|
186 |
+
self.pipe = pipeline.from_pretrained(model, progress, **kwargs).to("cuda")
|
|
|
|
|
|
|
187 |
except Exception as e:
|
188 |
self.log.error(f"Error loading {model}: {e}")
|
189 |
self.model = None
|
|
|
201 |
|
202 |
# by default all models use KL
|
203 |
if is_kl and taesd:
|
204 |
+
with timer("Loading Tiny VAE", logger=self.log.info):
|
205 |
+
self.pipe.vae = AutoencoderTiny.from_pretrained(
|
206 |
+
pretrained_model_name_or_path="madebyollin/taesd",
|
207 |
+
torch_dtype=self.pipe.dtype,
|
208 |
+
).to(self.pipe.device)
|
209 |
return
|
210 |
|
211 |
if is_tiny and not taesd:
|
212 |
+
with timer("Loading KL VAE", logger=self.log.info):
|
213 |
+
if model.lower() in Config.MODEL_CHECKPOINTS.keys():
|
214 |
+
self.pipe.vae = AutoencoderKL.from_single_file(
|
215 |
+
f"https://huggingface.co/{model}/{Config.MODEL_CHECKPOINTS[model.lower()]}",
|
216 |
+
torch_dtype=self.pipe.dtype,
|
217 |
+
).to(self.pipe.device)
|
218 |
+
else:
|
219 |
+
self.pipe.vae = AutoencoderKL.from_pretrained(
|
220 |
+
pretrained_model_name_or_path=model,
|
221 |
+
torch_dtype=self.pipe.dtype,
|
222 |
+
subfolder="vae",
|
223 |
+
variant="fp16",
|
224 |
+
).to(self.pipe.device)
|
225 |
|
226 |
def collect(self):
|
227 |
torch.cuda.empty_cache()
|
|
|
299 |
# same model, different scheduler
|
300 |
if self.model.lower() == model.lower():
|
301 |
if not same_scheduler:
|
302 |
+
self.log.info(f"Enabling {scheduler} scheduler")
|
303 |
if not same_karras:
|
304 |
self.log.info(f"{'Enabling' if karras else 'Disabling'} Karras sigmas")
|
305 |
if not same_scheduler or not same_karras:
|
lib/utils.py
CHANGED
@@ -2,6 +2,8 @@ import functools
|
|
2 |
import inspect
|
3 |
import json
|
4 |
import os
|
|
|
|
|
5 |
from typing import Callable, TypeVar
|
6 |
|
7 |
import anyio
|
@@ -21,7 +23,16 @@ P = ParamSpec("P")
|
|
21 |
MAX_CONCURRENT_THREADS = 1
|
22 |
MAX_THREADS_GUARD = Semaphore(MAX_CONCURRENT_THREADS)
|
23 |
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
|
27 |
@functools.lru_cache()
|
@@ -66,6 +77,7 @@ def download_repo_files(repo_id, allow_patterns, token=None):
|
|
66 |
def download_civit_file(lora_id, version_id, file_path=".", token=None):
|
67 |
base_url = "https://civitai.com/api/download/models"
|
68 |
file = f"{file_path}/{lora_id}.{version_id}.safetensors"
|
|
|
69 |
|
70 |
if os.path.exists(file):
|
71 |
return
|
|
|
2 |
import inspect
|
3 |
import json
|
4 |
import os
|
5 |
+
import time
|
6 |
+
from contextlib import contextmanager
|
7 |
from typing import Callable, TypeVar
|
8 |
|
9 |
import anyio
|
|
|
23 |
MAX_CONCURRENT_THREADS = 1
|
24 |
MAX_THREADS_GUARD = Semaphore(MAX_CONCURRENT_THREADS)
|
25 |
|
26 |
+
|
27 |
+
@contextmanager
|
28 |
+
def timer(message="Operation", logger=print):
|
29 |
+
start = time.perf_counter()
|
30 |
+
logger(message)
|
31 |
+
try:
|
32 |
+
yield
|
33 |
+
finally:
|
34 |
+
end = time.perf_counter()
|
35 |
+
logger(f"{message} took {end - start:.2f}s")
|
36 |
|
37 |
|
38 |
@functools.lru_cache()
|
|
|
77 |
def download_civit_file(lora_id, version_id, file_path=".", token=None):
|
78 |
base_url = "https://civitai.com/api/download/models"
|
79 |
file = f"{file_path}/{lora_id}.{version_id}.safetensors"
|
80 |
+
log = Logger("download_civit_file")
|
81 |
|
82 |
if os.path.exists(file):
|
83 |
return
|