adamelliotfields commited on
Commit
9edebae
1 Parent(s): 7f19757

Single-file checkpoints

Browse files
Files changed (6) hide show
  1. README.md +25 -4
  2. app.py +10 -24
  3. cli.py +2 -6
  4. lib/config.py +38 -12
  5. lib/loader.py +81 -71
  6. usage.md +12 -24
README.md CHANGED
@@ -15,6 +15,7 @@ header: mini
15
  license: apache-2.0
16
  models:
17
  - ai-forever/Real-ESRGAN
 
18
  - fluently/Fluently-v4
19
  - h94/IP-Adapter
20
  - Linaqruf/anything-v3-1
@@ -22,31 +23,38 @@ models:
22
  - prompthero/openjourney-v4
23
  - runwayml/stable-diffusion-v1-5
24
  - SG161222/Realistic_Vision_V5.1_noVAE
 
25
  preload_from_hub:
26
  - >-
27
  ai-forever/Real-ESRGAN
28
  RealESRGAN_x2.pth,RealESRGAN_x4.pth
 
 
 
29
  - >-
30
  fluently/Fluently-v4
31
- text_encoder/model.fp16.safetensors,unet/diffusion_pytorch_model.fp16.safetensors,vae/diffusion_pytorch_model.fp16.safetensors
32
  - >-
33
  h94/IP-Adapter
34
  models/ip-adapter-full-face_sd15.safetensors,models/ip-adapter-plus_sd15.safetensors,models/image_encoder/model.safetensors
35
  - >-
36
  Linaqruf/anything-v3-1
37
- text_encoder/model.safetensors,unet/diffusion_pytorch_model.safetensors,vae/diffusion_pytorch_model.safetensors
38
  - >-
39
  Lykon/dreamshaper-8
40
  text_encoder/model.fp16.safetensors,unet/diffusion_pytorch_model.fp16.safetensors,vae/diffusion_pytorch_model.fp16.safetensors
41
  - >-
42
  prompthero/openjourney-v4
43
- text_encoder/model.safetensors,unet/diffusion_pytorch_model.safetensors,vae/diffusion_pytorch_model.safetensors
44
  - >-
45
  runwayml/stable-diffusion-v1-5
46
  text_encoder/model.fp16.safetensors,unet/diffusion_pytorch_model.fp16.safetensors,vae/diffusion_pytorch_model.fp16.safetensors
47
  - >-
48
  SG161222/Realistic_Vision_V5.1_noVAE
49
- text_encoder/model.safetensors,unet/diffusion_pytorch_model.safetensors,vae/diffusion_pytorch_model.safetensors
 
 
 
50
  ---
51
 
52
  # diffusion
@@ -85,3 +93,16 @@ python app.py --port 7860
85
  # cli
86
  python cli.py 'an astronaut riding a horse on mars'
87
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  license: apache-2.0
16
  models:
17
  - ai-forever/Real-ESRGAN
18
+ - cyberdelia/CyberRealistic
19
  - fluently/Fluently-v4
20
  - h94/IP-Adapter
21
  - Linaqruf/anything-v3-1
 
23
  - prompthero/openjourney-v4
24
  - runwayml/stable-diffusion-v1-5
25
  - SG161222/Realistic_Vision_V5.1_noVAE
26
+ - XpucT/Deliberate
27
  preload_from_hub:
28
  - >-
29
  ai-forever/Real-ESRGAN
30
  RealESRGAN_x2.pth,RealESRGAN_x4.pth
31
+ - >-
32
+ cyberdelia/CyberRealistic
33
+ CyberRealistic_V5_FP16.safetensors
34
  - >-
35
  fluently/Fluently-v4
36
+ Fluently-v4.safetensors
37
  - >-
38
  h94/IP-Adapter
39
  models/ip-adapter-full-face_sd15.safetensors,models/ip-adapter-plus_sd15.safetensors,models/image_encoder/model.safetensors
40
  - >-
41
  Linaqruf/anything-v3-1
42
+ anything-v3-2.safetensors
43
  - >-
44
  Lykon/dreamshaper-8
45
  text_encoder/model.fp16.safetensors,unet/diffusion_pytorch_model.fp16.safetensors,vae/diffusion_pytorch_model.fp16.safetensors
46
  - >-
47
  prompthero/openjourney-v4
48
+ openjourney-v4.ckpt
49
  - >-
50
  runwayml/stable-diffusion-v1-5
51
  text_encoder/model.fp16.safetensors,unet/diffusion_pytorch_model.fp16.safetensors,vae/diffusion_pytorch_model.fp16.safetensors
52
  - >-
53
  SG161222/Realistic_Vision_V5.1_noVAE
54
+ Realistic_Vision_V5.1_fp16-no-ema.safetensors
55
+ - >-
56
+ XpucT/Deliberate
57
+ Deliberate_v6.safetensors
58
  ---
59
 
60
  # diffusion
 
93
  # cli
94
  python cli.py 'an astronaut riding a horse on mars'
95
  ```
96
+
97
+ ## Development
98
+
99
+ See [pull requests and discussions](https://huggingface.co/docs/hub/en/repositories-pull-requests-discussions).
100
+
101
+ ```sh
102
+ git fetch origin refs/pr/42:pr/42
103
+ git checkout pr/42
104
+ # ...
105
+ git add .
106
+ git commit -m "Commit message"
107
+ git push origin pr/42:refs/pr/42
108
+ ```
app.py CHANGED
@@ -144,7 +144,7 @@ with gr.Blocks(
144
  min_width=240,
145
  )
146
  scheduler = gr.Dropdown(
147
- choices=Config.SCHEDULERS,
148
  value=Config.SCHEDULER,
149
  elem_id="scheduler",
150
  label="Scheduler",
@@ -245,23 +245,6 @@ with gr.Blocks(
245
  maximum=(2**64) - 1,
246
  )
247
 
248
- with gr.Row():
249
- increment_seed = gr.Checkbox(
250
- elem_classes=["checkbox"],
251
- label="Autoincrement",
252
- value=True,
253
- )
254
- use_freeu = gr.Checkbox(
255
- elem_classes=["checkbox"],
256
- label="FreeU",
257
- value=False,
258
- )
259
- use_clip_skip = gr.Checkbox(
260
- elem_classes=["checkbox"],
261
- label="Clip skip",
262
- value=False,
263
- )
264
-
265
  with gr.Row():
266
  use_karras = gr.Checkbox(
267
  elem_classes=["checkbox"],
@@ -273,9 +256,14 @@ with gr.Blocks(
273
  label="Tiny VAE",
274
  value=False,
275
  )
276
- truncate_prompts = gr.Checkbox(
277
  elem_classes=["checkbox"],
278
- label="Truncate prompts",
 
 
 
 
 
279
  value=False,
280
  )
281
 
@@ -468,15 +456,13 @@ with gr.Blocks(
468
  guidance_scale,
469
  inference_steps,
470
  denoising_strength,
 
 
471
  num_images,
472
  use_karras,
473
  use_taesd,
474
  use_freeu,
475
  use_clip_skip,
476
- truncate_prompts,
477
- increment_seed,
478
- deepcache_interval,
479
- scale,
480
  ],
481
  )
482
 
 
144
  min_width=240,
145
  )
146
  scheduler = gr.Dropdown(
147
+ choices=Config.SCHEDULERS.keys(),
148
  value=Config.SCHEDULER,
149
  elem_id="scheduler",
150
  label="Scheduler",
 
245
  maximum=(2**64) - 1,
246
  )
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  with gr.Row():
249
  use_karras = gr.Checkbox(
250
  elem_classes=["checkbox"],
 
256
  label="Tiny VAE",
257
  value=False,
258
  )
259
+ use_freeu = gr.Checkbox(
260
  elem_classes=["checkbox"],
261
+ label="FreeU",
262
+ value=False,
263
+ )
264
+ use_clip_skip = gr.Checkbox(
265
+ elem_classes=["checkbox"],
266
+ label="Clip skip",
267
  value=False,
268
  )
269
 
 
456
  guidance_scale,
457
  inference_steps,
458
  denoising_strength,
459
+ deepcache_interval,
460
+ scale,
461
  num_images,
462
  use_karras,
463
  use_taesd,
464
  use_freeu,
465
  use_clip_skip,
 
 
 
 
466
  ],
467
  )
468
 
cli.py CHANGED
@@ -36,10 +36,8 @@ async def main():
36
  parser.add_argument("--ip-face", action="store_true")
37
  parser.add_argument("--taesd", action="store_true")
38
  parser.add_argument("--clip-skip", action="store_true")
39
- parser.add_argument("--truncate", action="store_true")
40
  parser.add_argument("--karras", action="store_true")
41
  parser.add_argument("--freeu", action="store_true")
42
- parser.add_argument("--no-increment", action="store_false")
43
  # fmt: on
44
 
45
  args = parser.parse_args()
@@ -60,15 +58,13 @@ async def main():
60
  args.guidance,
61
  args.steps,
62
  args.strength,
 
 
63
  args.images,
64
  args.karras,
65
  args.taesd,
66
  args.freeu,
67
  args.clip_skip,
68
- args.truncate,
69
- args.no_increment,
70
- args.deepcache,
71
- args.scale,
72
  )
73
  await async_call(save_images, images, args.filename)
74
 
 
36
  parser.add_argument("--ip-face", action="store_true")
37
  parser.add_argument("--taesd", action="store_true")
38
  parser.add_argument("--clip-skip", action="store_true")
 
39
  parser.add_argument("--karras", action="store_true")
40
  parser.add_argument("--freeu", action="store_true")
 
41
  # fmt: on
42
 
43
  args = parser.parse_args()
 
58
  args.guidance,
59
  args.steps,
60
  args.strength,
61
+ args.deepcache,
62
+ args.scale,
63
  args.images,
64
  args.karras,
65
  args.taesd,
66
  args.freeu,
67
  args.clip_skip,
 
 
 
 
68
  )
69
  await async_call(save_images, images, args.filename)
70
 
lib/config.py CHANGED
@@ -1,5 +1,16 @@
1
  from types import SimpleNamespace
2
 
 
 
 
 
 
 
 
 
 
 
 
3
  Config = SimpleNamespace(
4
  MONO_FONTS=["monospace"],
5
  SANS_FONTS=[
@@ -9,30 +20,45 @@ Config = SimpleNamespace(
9
  "Segoe UI Symbol",
10
  "Noto Color Emoji",
11
  ],
 
 
 
 
12
  MODEL="Lykon/dreamshaper-8",
13
  MODELS=[
 
14
  "fluently/Fluently-v4",
15
  "Linaqruf/anything-v3-1",
16
  "Lykon/dreamshaper-8",
17
  "prompthero/openjourney-v4",
18
  "runwayml/stable-diffusion-v1-5",
19
- "SG161222/Realistic_Vision_V5.1_Novae",
 
20
  ],
 
 
 
 
 
 
 
 
 
21
  SCHEDULER="DEIS 2M",
22
- SCHEDULERS=[
23
- "DDIM",
24
- "DEIS 2M",
25
- "DPM++ 2M",
26
- "Euler",
27
- "Euler a",
28
- "PNDM",
29
- ],
30
  EMBEDDING="fast_negative",
31
- EMBEDDINGS={
32
  "bad_dream",
33
  "fast_negative",
34
  "unrealistic_dream",
35
- },
36
  STYLE="sai-enhance",
37
  WIDTH=448,
38
  HEIGHT=576,
@@ -40,7 +66,7 @@ Config = SimpleNamespace(
40
  SEED=-1,
41
  GUIDANCE_SCALE=6,
42
  INFERENCE_STEPS=35,
43
- DENOISING_STRENGTH=0.6,
44
  DEEPCACHE_INTERVAL=1,
45
  SCALE=1,
46
  SCALES=[1, 2, 4],
 
1
  from types import SimpleNamespace
2
 
3
+ from diffusers import (
4
+ DDIMScheduler,
5
+ DEISMultistepScheduler,
6
+ DPMSolverMultistepScheduler,
7
+ EulerAncestralDiscreteScheduler,
8
+ EulerDiscreteScheduler,
9
+ PNDMScheduler,
10
+ StableDiffusionImg2ImgPipeline,
11
+ StableDiffusionPipeline,
12
+ )
13
+
14
  Config = SimpleNamespace(
15
  MONO_FONTS=["monospace"],
16
  SANS_FONTS=[
 
20
  "Segoe UI Symbol",
21
  "Noto Color Emoji",
22
  ],
23
+ PIPELINES={
24
+ "txt2img": StableDiffusionPipeline,
25
+ "img2img": StableDiffusionImg2ImgPipeline,
26
+ },
27
  MODEL="Lykon/dreamshaper-8",
28
  MODELS=[
29
+ "cyberdelia/CyberRealistic",
30
  "fluently/Fluently-v4",
31
  "Linaqruf/anything-v3-1",
32
  "Lykon/dreamshaper-8",
33
  "prompthero/openjourney-v4",
34
  "runwayml/stable-diffusion-v1-5",
35
+ "SG161222/Realistic_Vision_V5.1_noVAE",
36
+ "XpucT/Deliberate",
37
  ],
38
+ MODEL_CHECKPOINTS={
39
+ # keep keys lowercase
40
+ "cyberdelia/cyberrealistic": "CyberRealistic_V5_FP16.safetensors",
41
+ "fluently/fluently-v4": "Fluently-v4.safetensors",
42
+ "linaqruf/anything-v3-1": "anything-v3-2.safetensors",
43
+ "prompthero/openjourney-v4": "openjourney-v4.ckpt",
44
+ "sg161222/realistic_vision_v5.1_novae": "Realistic_Vision_V5.1_fp16-no-ema.safetensors",
45
+ "xpuct/deliberate": "Deliberate_v6.safetensors",
46
+ },
47
  SCHEDULER="DEIS 2M",
48
+ SCHEDULERS={
49
+ "DDIM": DDIMScheduler,
50
+ "DEIS 2M": DEISMultistepScheduler,
51
+ "DPM++ 2M": DPMSolverMultistepScheduler,
52
+ "Euler": EulerDiscreteScheduler,
53
+ "Euler a": EulerAncestralDiscreteScheduler,
54
+ "PNDM": PNDMScheduler,
55
+ },
56
  EMBEDDING="fast_negative",
57
+ EMBEDDINGS=[
58
  "bad_dream",
59
  "fast_negative",
60
  "unrealistic_dream",
61
+ ],
62
  STYLE="sai-enhance",
63
  WIDTH=448,
64
  HEIGHT=576,
 
66
  SEED=-1,
67
  GUIDANCE_SCALE=6,
68
  INFERENCE_STEPS=35,
69
+ DENOISING_STRENGTH=0.7,
70
  DEEPCACHE_INTERVAL=1,
71
  SCALE=1,
72
  SCALES=[1, 2, 4],
lib/loader.py CHANGED
@@ -1,27 +1,18 @@
 
 
1
  import torch
2
  from DeepCache import DeepCacheSDHelper
3
- from diffusers import (
4
- DDIMScheduler,
5
- DEISMultistepScheduler,
6
- DPMSolverMultistepScheduler,
7
- EulerAncestralDiscreteScheduler,
8
- EulerDiscreteScheduler,
9
- PNDMScheduler,
10
- StableDiffusionImg2ImgPipeline,
11
- StableDiffusionPipeline,
12
- )
13
  from diffusers.models import AutoencoderKL, AutoencoderTiny
14
  from diffusers.models.attention_processor import AttnProcessor2_0, IPAdapterAttnProcessor2_0
15
  from torch._dynamo import OptimizedModule
16
 
 
17
  from .upscaler import RealESRGAN
18
 
19
  __import__("warnings").filterwarnings("ignore", category=FutureWarning, module="diffusers")
20
-
21
- PIPELINES = {
22
- "txt2img": StableDiffusionPipeline,
23
- "img2img": StableDiffusionImg2ImgPipeline,
24
- }
25
 
26
 
27
  class Loader:
@@ -31,6 +22,7 @@ class Loader:
31
  if cls._instance is None:
32
  cls._instance = super(Loader, cls).__new__(cls)
33
  cls._instance.pipe = None
 
34
  cls._instance.upscaler = None
35
  cls._instance.ip_adapter = None
36
  return cls._instance
@@ -38,13 +30,13 @@ class Loader:
38
  def _should_unload_upscaler(self, scale=1):
39
  return self.upscaler is not None and scale == 1
40
 
41
- def _should_unload_ip_adapter(self, ip_adapter=None):
42
- return self.ip_adapter is not None and ip_adapter is None
43
 
44
  def _should_unload_pipeline(self, kind="", model=""):
45
  if self.pipe is None:
46
  return False
47
- if self.pipe.config._name_or_path.lower() != model.lower():
48
  return True
49
  if kind == "txt2img" and not isinstance(self.pipe, StableDiffusionPipeline):
50
  return True # txt2img -> img2img
@@ -52,6 +44,7 @@ class Loader:
52
  return True # img2img -> txt2img
53
  return False
54
 
 
55
  def _unload_ip_adapter(self):
56
  print("Unloading IP Adapter...")
57
  if not isinstance(self.pipe, StableDiffusionImg2ImgPipeline):
@@ -73,7 +66,7 @@ class Loader:
73
  )
74
  self.pipe.unet.set_attn_processor(attn_procs)
75
 
76
- def _unload(self, kind="", model="", ip_adapter=None, scale=1):
77
  to_unload = []
78
 
79
  if self._should_unload_upscaler(scale):
@@ -84,27 +77,30 @@ class Loader:
84
  to_unload.append("ip_adapter")
85
 
86
  if self._should_unload_pipeline(kind, model):
 
87
  to_unload.append("pipe")
88
 
89
  for component in to_unload:
90
- if hasattr(self, component):
91
- delattr(self, component)
92
 
 
93
  torch.cuda.empty_cache()
94
  torch.cuda.ipc_collect()
 
 
95
 
96
  for component in to_unload:
97
  setattr(self, component, None)
98
 
99
- def _load_ip_adapter(self, ip_adapter=None):
100
- if self.ip_adapter is None and ip_adapter is not None:
101
  print(f"Loading IP Adapter: {ip_adapter}...")
102
  self.pipe.load_ip_adapter(
103
  "h94/IP-Adapter",
104
  subfolder="models",
105
  weight_name=f"ip-adapter-{ip_adapter}_sd15.safetensors",
106
  )
107
- # TODO: slider for ip_scale
108
  self.pipe.set_ip_adapter_scale(0.5)
109
  self.ip_adapter = ip_adapter
110
 
@@ -114,24 +110,39 @@ class Loader:
114
  self.upscaler = RealESRGAN(device=device, scale=scale)
115
  self.upscaler.load_weights()
116
 
117
- def _load_pipeline(self, kind, model, taesd, device, **kwargs):
118
- pipeline = PIPELINES[kind]
119
  if self.pipe is None:
120
- print(f"Loading {model.lower()} with {'Tiny' if taesd else 'KL'} VAE...")
121
- self.pipe = pipeline.from_pretrained(model, **kwargs).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  if not isinstance(self.pipe, pipeline):
123
  self.pipe = pipeline.from_pipe(self.pipe).to(device)
 
124
 
125
- def _load_vae(self, taesd=False, model_name=None, variant=None):
126
  vae_type = type(self.pipe.vae)
127
  is_kl = issubclass(vae_type, (AutoencoderKL, OptimizedModule))
128
  is_tiny = issubclass(vae_type, AutoencoderTiny)
129
 
130
  # by default all models use KL
131
  if is_kl and taesd:
132
- # can't compile tiny VAE
133
  print("Switching to Tiny VAE...")
134
  self.pipe.vae = AutoencoderTiny.from_pretrained(
 
135
  pretrained_model_name_or_path="madebyollin/taesd",
136
  torch_dtype=self.pipe.dtype,
137
  ).to(self.pipe.device)
@@ -139,16 +150,22 @@ class Loader:
139
 
140
  if is_tiny and not taesd:
141
  print("Switching to KL VAE...")
142
- model = AutoencoderKL.from_pretrained(
143
- pretrained_model_name_or_path=model_name,
144
- torch_dtype=self.pipe.dtype,
145
- subfolder="vae",
146
- variant=variant,
147
- ).to(self.pipe.device)
 
 
 
 
 
 
148
  self.pipe.vae = torch.compile(
149
  mode="reduce-overhead",
150
  fullgraph=True,
151
- model=model,
152
  )
153
 
154
  def _load_deepcache(self, interval=1):
@@ -162,8 +179,8 @@ class Loader:
162
  self.pipe.deepcache.set_params(cache_interval=interval)
163
  self.pipe.deepcache.enable()
164
 
 
165
  def _load_freeu(self, freeu=False):
166
- # https://github.com/huggingface/diffusers/blob/v0.30.0/src/diffusers/models/unets/unet_2d_condition.py
167
  block = self.pipe.unet.up_blocks[0]
168
  attrs = ["b1", "b2", "s1", "s2"]
169
  has_freeu = all(getattr(block, attr, None) is not None for attr in attrs)
@@ -171,7 +188,6 @@ class Loader:
171
  print("Disabling FreeU...")
172
  self.pipe.disable_freeu()
173
  elif not has_freeu and freeu:
174
- # https://github.com/ChenyangSi/FreeU
175
  print("Enabling FreeU...")
176
  self.pipe.enable_freeu(b1=1.5, b2=1.6, s1=0.9, s2=0.2)
177
 
@@ -187,20 +203,7 @@ class Loader:
187
  deepcache,
188
  scale,
189
  device,
190
- dtype,
191
  ):
192
- model_lower = model.lower()
193
- model_name = self.pipe.config._name_or_path.lower() if self.pipe is not None else ""
194
-
195
- schedulers = {
196
- "DDIM": DDIMScheduler,
197
- "DEIS 2M": DEISMultistepScheduler,
198
- "DPM++ 2M": DPMSolverMultistepScheduler,
199
- "Euler": EulerDiscreteScheduler,
200
- "Euler a": EulerAncestralDiscreteScheduler,
201
- "PNDM": PNDMScheduler,
202
- }
203
-
204
  scheduler_kwargs = {
205
  "beta_schedule": "scaled_linear",
206
  "timestep_spacing": "leading",
@@ -217,45 +220,52 @@ class Loader:
217
  scheduler_kwargs["clip_sample"] = False
218
  scheduler_kwargs["set_alpha_to_one"] = False
219
 
220
- # no fp16 variant
221
- if model_lower not in [
222
- "sg161222/realistic_vision_v5.1_novae",
223
- "prompthero/openjourney-v4",
224
- "linaqruf/anything-v3-1",
225
- ]:
226
- variant = "fp16"
227
- else:
228
- variant = None
229
-
230
  pipe_kwargs = {
231
- "scheduler": schedulers[scheduler](**scheduler_kwargs),
232
- "requires_safety_checker": False,
233
  "safety_checker": None,
234
- "torch_dtype": dtype,
235
- "variant": variant,
236
  }
237
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  self._unload(kind, model, ip_adapter, scale)
239
- self._load_pipeline(kind, model, taesd, device, **pipe_kwargs)
 
 
 
 
240
 
241
- same_scheduler = isinstance(self.pipe.scheduler, schedulers[scheduler])
242
  same_karras = (
243
  not hasattr(self.pipe.scheduler.config, "use_karras_sigmas")
244
  or self.pipe.scheduler.config.use_karras_sigmas == karras
245
  )
246
 
247
  # same model, different scheduler
248
- if model_name == model_lower:
249
  if not same_scheduler:
250
  print(f"Switching to {scheduler}...")
251
  if not same_karras:
252
  print(f"{'Enabling' if karras else 'Disabling'} Karras sigmas...")
253
  if not same_scheduler or not same_karras:
254
- self.pipe.scheduler = schedulers[scheduler](**scheduler_kwargs)
255
 
256
  self._load_upscaler(device, scale)
257
  self._load_ip_adapter(ip_adapter)
258
- self._load_vae(taesd, model_lower, variant)
259
  self._load_freeu(freeu)
260
  self._load_deepcache(deepcache)
261
  return self.pipe, self.upscaler
 
1
+ import gc
2
+
3
  import torch
4
  from DeepCache import DeepCacheSDHelper
5
+ from diffusers import StableDiffusionImg2ImgPipeline, StableDiffusionPipeline
 
 
 
 
 
 
 
 
 
6
  from diffusers.models import AutoencoderKL, AutoencoderTiny
7
  from diffusers.models.attention_processor import AttnProcessor2_0, IPAdapterAttnProcessor2_0
8
  from torch._dynamo import OptimizedModule
9
 
10
+ from .config import Config
11
  from .upscaler import RealESRGAN
12
 
13
  __import__("warnings").filterwarnings("ignore", category=FutureWarning, module="diffusers")
14
+ __import__("warnings").filterwarnings("ignore", category=FutureWarning, module="torch")
15
+ __import__("diffusers").logging.set_verbosity_error()
 
 
 
16
 
17
 
18
  class Loader:
 
22
  if cls._instance is None:
23
  cls._instance = super(Loader, cls).__new__(cls)
24
  cls._instance.pipe = None
25
+ cls._instance.model = None
26
  cls._instance.upscaler = None
27
  cls._instance.ip_adapter = None
28
  return cls._instance
 
30
  def _should_unload_upscaler(self, scale=1):
31
  return self.upscaler is not None and scale == 1
32
 
33
+ def _should_unload_ip_adapter(self, ip_adapter=""):
34
+ return self.ip_adapter is not None and not ip_adapter
35
 
36
  def _should_unload_pipeline(self, kind="", model=""):
37
  if self.pipe is None:
38
  return False
39
+ if self.model.lower() != model.lower():
40
  return True
41
  if kind == "txt2img" and not isinstance(self.pipe, StableDiffusionPipeline):
42
  return True # txt2img -> img2img
 
44
  return True # img2img -> txt2img
45
  return False
46
 
47
+ # https://github.com/huggingface/diffusers/blob/v0.28.0/src/diffusers/loaders/ip_adapter.py#L300
48
  def _unload_ip_adapter(self):
49
  print("Unloading IP Adapter...")
50
  if not isinstance(self.pipe, StableDiffusionImg2ImgPipeline):
 
66
  )
67
  self.pipe.unet.set_attn_processor(attn_procs)
68
 
69
+ def _unload(self, kind="", model="", ip_adapter="", scale=1):
70
  to_unload = []
71
 
72
  if self._should_unload_upscaler(scale):
 
77
  to_unload.append("ip_adapter")
78
 
79
  if self._should_unload_pipeline(kind, model):
80
+ to_unload.append("model")
81
  to_unload.append("pipe")
82
 
83
  for component in to_unload:
84
+ delattr(self, component)
 
85
 
86
+ gc.collect()
87
  torch.cuda.empty_cache()
88
  torch.cuda.ipc_collect()
89
+ torch.cuda.reset_max_memory_allocated()
90
+ torch.cuda.reset_peak_memory_stats()
91
 
92
  for component in to_unload:
93
  setattr(self, component, None)
94
 
95
+ def _load_ip_adapter(self, ip_adapter=""):
96
+ if self.ip_adapter is None and ip_adapter:
97
  print(f"Loading IP Adapter: {ip_adapter}...")
98
  self.pipe.load_ip_adapter(
99
  "h94/IP-Adapter",
100
  subfolder="models",
101
  weight_name=f"ip-adapter-{ip_adapter}_sd15.safetensors",
102
  )
103
+ # 50% works the best
104
  self.pipe.set_ip_adapter_scale(0.5)
105
  self.ip_adapter = ip_adapter
106
 
 
110
  self.upscaler = RealESRGAN(device=device, scale=scale)
111
  self.upscaler.load_weights()
112
 
113
+ def _load_pipeline(self, kind, model, device, **kwargs):
114
+ pipeline = Config.PIPELINES[kind]
115
  if self.pipe is None:
116
+ print(f"Loading {model}...")
117
+ try:
118
+ if model.lower() in Config.MODEL_CHECKPOINTS.keys():
119
+ self.pipe = pipeline.from_single_file(
120
+ f"https://huggingface.co/{model}/{Config.MODEL_CHECKPOINTS[model.lower()]}",
121
+ **kwargs,
122
+ ).to(device)
123
+ else:
124
+ self.pipe = pipeline.from_pretrained(model, **kwargs).to(device)
125
+ self.model = model
126
+ except Exception as e:
127
+ print(f"Error loading {model}: {e}")
128
+ self.model = None
129
+ self.pipe = None
130
+ return
131
+
132
  if not isinstance(self.pipe, pipeline):
133
  self.pipe = pipeline.from_pipe(self.pipe).to(device)
134
+ self.pipe.set_progress_bar_config(disable=True)
135
 
136
+ def _load_vae(self, taesd=False, model=""):
137
  vae_type = type(self.pipe.vae)
138
  is_kl = issubclass(vae_type, (AutoencoderKL, OptimizedModule))
139
  is_tiny = issubclass(vae_type, AutoencoderTiny)
140
 
141
  # by default all models use KL
142
  if is_kl and taesd:
 
143
  print("Switching to Tiny VAE...")
144
  self.pipe.vae = AutoencoderTiny.from_pretrained(
145
+ # can't compile tiny VAE
146
  pretrained_model_name_or_path="madebyollin/taesd",
147
  torch_dtype=self.pipe.dtype,
148
  ).to(self.pipe.device)
 
150
 
151
  if is_tiny and not taesd:
152
  print("Switching to KL VAE...")
153
+ if model.lower() in Config.MODEL_CHECKPOINTS.keys():
154
+ vae = AutoencoderKL.from_single_file(
155
+ f"https://huggingface.co/{model}/{Config.MODEL_CHECKPOINTS[model.lower()]}",
156
+ torch_dtype=self.pipe.dtype,
157
+ ).to(self.pipe.device)
158
+ else:
159
+ vae = AutoencoderKL.from_pretrained(
160
+ pretrained_model_name_or_path=model,
161
+ torch_dtype=self.pipe.dtype,
162
+ subfolder="vae",
163
+ variant="fp16",
164
+ ).to(self.pipe.device)
165
  self.pipe.vae = torch.compile(
166
  mode="reduce-overhead",
167
  fullgraph=True,
168
+ model=vae,
169
  )
170
 
171
  def _load_deepcache(self, interval=1):
 
179
  self.pipe.deepcache.set_params(cache_interval=interval)
180
  self.pipe.deepcache.enable()
181
 
182
+ # https://github.com/ChenyangSi/FreeU
183
  def _load_freeu(self, freeu=False):
 
184
  block = self.pipe.unet.up_blocks[0]
185
  attrs = ["b1", "b2", "s1", "s2"]
186
  has_freeu = all(getattr(block, attr, None) is not None for attr in attrs)
 
188
  print("Disabling FreeU...")
189
  self.pipe.disable_freeu()
190
  elif not has_freeu and freeu:
 
191
  print("Enabling FreeU...")
192
  self.pipe.enable_freeu(b1=1.5, b2=1.6, s1=0.9, s2=0.2)
193
 
 
203
  deepcache,
204
  scale,
205
  device,
 
206
  ):
 
 
 
 
 
 
 
 
 
 
 
 
207
  scheduler_kwargs = {
208
  "beta_schedule": "scaled_linear",
209
  "timestep_spacing": "leading",
 
220
  scheduler_kwargs["clip_sample"] = False
221
  scheduler_kwargs["set_alpha_to_one"] = False
222
 
 
 
 
 
 
 
 
 
 
 
223
  pipe_kwargs = {
 
 
224
  "safety_checker": None,
225
+ "requires_safety_checker": False,
226
+ "scheduler": Config.SCHEDULERS[scheduler](**scheduler_kwargs),
227
  }
228
 
229
+ # diffusers fp16 variant
230
+ if model.lower() not in Config.MODEL_CHECKPOINTS.keys():
231
+ pipe_kwargs["variant"] = "fp16"
232
+ else:
233
+ pipe_kwargs["variant"] = None
234
+
235
+ # convert fp32 to bf16/fp16
236
+ if (
237
+ model.lower() in ["linaqruf/anything-v3-1"]
238
+ and torch.cuda.get_device_properties(device).major >= 8
239
+ ):
240
+ pipe_kwargs["torch_dtype"] = torch.bfloat16
241
+ else:
242
+ pipe_kwargs["torch_dtype"] = torch.float16
243
+
244
  self._unload(kind, model, ip_adapter, scale)
245
+ self._load_pipeline(kind, model, device, **pipe_kwargs)
246
+
247
+ # error loading model
248
+ if self.pipe is None:
249
+ return self.pipe, self.upscaler
250
 
251
+ same_scheduler = isinstance(self.pipe.scheduler, Config.SCHEDULERS[scheduler])
252
  same_karras = (
253
  not hasattr(self.pipe.scheduler.config, "use_karras_sigmas")
254
  or self.pipe.scheduler.config.use_karras_sigmas == karras
255
  )
256
 
257
  # same model, different scheduler
258
+ if self.model.lower() == model.lower():
259
  if not same_scheduler:
260
  print(f"Switching to {scheduler}...")
261
  if not same_karras:
262
  print(f"{'Enabling' if karras else 'Disabling'} Karras sigmas...")
263
  if not same_scheduler or not same_karras:
264
+ self.pipe.scheduler = Config.SCHEDULERS[scheduler](**scheduler_kwargs)
265
 
266
  self._load_upscaler(device, scale)
267
  self._load_ip_adapter(ip_adapter)
268
+ self._load_vae(taesd, model)
269
  self._load_freeu(freeu)
270
  self._load_deepcache(deepcache)
271
  return self.pipe, self.upscaler
usage.md CHANGED
@@ -4,13 +4,7 @@ Enter a prompt and click `Generate`. Roll the `🎲` for a random prompt.
4
 
5
  ### Prompting
6
 
7
- Positive and negative prompts are embedded by [Compel](https://github.com/damian0815/compel) for weighting. You can use a float or +/-. For example:
8
- * `man, portrait, blue+ eyes, close-up`
9
- * `man, portrait, (blue)1.1 eyes, close-up`
10
- * `man, portrait, (blue eyes)-, close-up`
11
- * `man, portrait, (blue eyes)0.9, close-up`
12
-
13
- Note that `++` is `1.1^2` (and so on). See [syntax features](https://github.com/damian0815/compel/blob/main/doc/syntax.md) to learn more and read [Civitai](https://civitai.com)'s guide on [prompting](https://education.civitai.com/civitais-prompt-crafting-guide-part-1-basics/) for best practices.
14
 
15
  #### Arrays
16
 
@@ -30,22 +24,20 @@ Styles are prompt templates from twri's [sdxl_prompt_styler](https://github.com/
30
 
31
  ### Scale
32
 
33
- Rescale up to 4x using [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN) (Wang et al. 2021).
34
 
35
  ### Models
36
 
37
  Each model checkpoint has a different aesthetic:
38
 
39
- * [lykon/dreamshaper-8](https://huggingface.co/Lykon/dreamshaper-8): general purpose (default)
40
- * [fluently/fluently-v4](https://huggingface.co/fluently/Fluently-v4): general purpose merge
41
- * [linaqruf/anything-v3-1](https://huggingface.co/linaqruf/anything-v3-1): anime
 
42
  * [prompthero/openjourney-v4](https://huggingface.co/prompthero/openjourney-v4): Midjourney-like
43
  * [runwayml/stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5): base
44
- * [sg161222/realistic_vision_v5.1](https://huggingface.co/SG161222/Realistic_Vision_V5.1_noVAE): photorealistic
45
-
46
- ### Schedulers
47
-
48
- The default is [DEIS 2M](https://huggingface.co/docs/diffusers/en/api/schedulers/deis) with [Karras](https://arxiv.org/abs/2206.00364) enabled. The other multistep scheduler, [DPM++ 2M](https://huggingface.co/docs/diffusers/en/api/schedulers/multistep_dpm_solver), is also good. For realism, [DDIM](https://huggingface.co/docs/diffusers/en/api/schedulers/ddim) is recommended. [Euler a](https://huggingface.co/docs/diffusers/en/api/schedulers/euler_ancestral) is worth trying for a different look.
49
 
50
  ### Image-to-Image
51
 
@@ -55,15 +47,15 @@ Denoising strength is essentially how much the generation will differ from the i
55
 
56
  ### IP-Adapter
57
 
58
- In an image-to-image pipeline, the input image is used as the initial latent. With [IP-Adapter](https://github.com/tencent-ailab/IP-Adapter) (Ye et al. 2023), the input image is processed by a separate image encoder and the encoded features are used as conditioning along with the text prompt.
59
 
60
- For capturing faces, enable `IP-Adapter Face` to use the full-face model. You should use an input image that is mostly a face along with the Realistic Vision model.
61
 
62
  ### Advanced
63
 
64
  #### DeepCache
65
 
66
- [DeepCache](https://github.com/horseee/DeepCache) (Ma et al. 2023) caches lower UNet layers and reuses them every `Interval` steps. Trade quality for speed:
67
  * `1`: no caching (default)
68
  * `2`: more quality
69
  * `3`: balanced
@@ -71,7 +63,7 @@ For capturing faces, enable `IP-Adapter Face` to use the full-face model. You sh
71
 
72
  #### FreeU
73
 
74
- [FreeU](https://github.com/ChenyangSi/FreeU) (Si et al. 2023) re-weights the contributions sourced from the UNet’s skip connections and backbone feature maps. Can sometimes improve image quality.
75
 
76
  #### Clip Skip
77
 
@@ -80,7 +72,3 @@ When enabled, the last CLIP layer is skipped. Can sometimes improve image qualit
80
  #### Tiny VAE
81
 
82
  Enable [madebyollin/taesd](https://github.com/madebyollin/taesd) for near-instant latent decoding with a minor loss in detail. Useful for development.
83
-
84
- #### Prompt Truncation
85
-
86
- When enabled, prompts will be truncated to CLIP's limit of 77 tokens. By default this is _disabled_, so Compel will chunk prompts into segments rather than cutting them off.
 
4
 
5
  ### Prompting
6
 
7
+ Positive and negative prompts are embedded by [Compel](https://github.com/damian0815/compel) for weighting. See [syntax features](https://github.com/damian0815/compel/blob/main/doc/syntax.md) to learn more and read [Civitai](https://civitai.com)'s guide on [prompting](https://education.civitai.com/civitais-prompt-crafting-guide-part-1-basics/) for best practices.
 
 
 
 
 
 
8
 
9
  #### Arrays
10
 
 
24
 
25
  ### Scale
26
 
27
+ Rescale up to 4x using [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN) from [ai-forever](ai-forever/Real-ESRGAN).
28
 
29
  ### Models
30
 
31
  Each model checkpoint has a different aesthetic:
32
 
33
+ * [cyberdelia/CyberRealistic_v5](https://huggingface.co/cyberdelia/CyberRealistic): photorealistic
34
+ * [Lykon/dreamshaper-8](https://huggingface.co/Lykon/dreamshaper-8): general purpose (default)
35
+ * [fluently/Fluently-v4](https://huggingface.co/fluently/Fluently-v4): general purpose
36
+ * [Linaqruf/anything-v3-1](https://huggingface.co/Linaqruf/anything-v3-1): anime
37
  * [prompthero/openjourney-v4](https://huggingface.co/prompthero/openjourney-v4): Midjourney-like
38
  * [runwayml/stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5): base
39
+ * [SG161222/Realistic_Vision_v5.1](https://huggingface.co/SG161222/Realistic_Vision_V5.1_noVAE): photorealistic
40
+ * [XpucT/Deliberate_v6](https://huggingface.co/XpucT/Deliberate): general purpose
 
 
 
41
 
42
  ### Image-to-Image
43
 
 
47
 
48
  ### IP-Adapter
49
 
50
+ In an image-to-image pipeline, the input image is used as the initial latent. With [IP-Adapter](https://github.com/tencent-ailab/IP-Adapter), the input image is processed by a separate image encoder and the encoded features are used as conditioning along with the text prompt.
51
 
52
+ For capturing faces, enable `IP-Adapter Face` to use the full-face model. You should use an input image that is mostly a face and it should be high quality. You can generate fake portraits with Realistic Vision to experiment. Note that you'll never get true identity preservation without an advanced pipeline like [InstantID](https://github.com/instantX-research/InstantID), which combines many techniques.
53
 
54
  ### Advanced
55
 
56
  #### DeepCache
57
 
58
+ [DeepCache](https://github.com/horseee/DeepCache) caches lower UNet layers and reuses them every `Interval` steps. Trade quality for speed:
59
  * `1`: no caching (default)
60
  * `2`: more quality
61
  * `3`: balanced
 
63
 
64
  #### FreeU
65
 
66
+ [FreeU](https://github.com/ChenyangSi/FreeU) re-weights the contributions sourced from the UNet’s skip connections and backbone feature maps. Can sometimes improve image quality.
67
 
68
  #### Clip Skip
69
 
 
72
  #### Tiny VAE
73
 
74
  Enable [madebyollin/taesd](https://github.com/madebyollin/taesd) for near-instant latent decoding with a minor loss in detail. Useful for development.