remove deprecated LoRA blocks (#1)
Browse files- fix for PR: 8623 (16d8d119a69e13a0eb0b86bb9b524b12685eb451)
- fix more (0abaa8ad4ab07bda9230858d837fcdb7c5edffcb)
- my_pipeline.py +4 -13
- unet/my_unet_model.py +3 -3
my_pipeline.py
CHANGED
@@ -25,12 +25,7 @@ from diffusers.loaders import (
|
|
25 |
TextualInversionLoaderMixin,
|
26 |
)
|
27 |
from diffusers.models import AutoencoderKL, UNet2DConditionModel
|
28 |
-
from diffusers.models.attention_processor import
|
29 |
-
AttnProcessor2_0,
|
30 |
-
LoRAAttnProcessor2_0,
|
31 |
-
LoRAXFormersAttnProcessor,
|
32 |
-
XFormersAttnProcessor,
|
33 |
-
)
|
34 |
from diffusers.models.lora import adjust_lora_scale_text_encoder
|
35 |
from diffusers.schedulers import KarrasDiffusionSchedulers
|
36 |
from diffusers.utils import (
|
@@ -135,6 +130,7 @@ class MyPipeline(
|
|
135 |
watermark output images. If not defined, it will default to True if the package is installed, otherwise no
|
136 |
watermarker will be used.
|
137 |
"""
|
|
|
138 |
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
|
139 |
_optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
|
140 |
|
@@ -571,12 +567,7 @@ class MyPipeline(
|
|
571 |
self.vae.to(dtype=torch.float32)
|
572 |
use_torch_2_0_or_xformers = isinstance(
|
573 |
self.vae.decoder.mid_block.attentions[0].processor,
|
574 |
-
(
|
575 |
-
AttnProcessor2_0,
|
576 |
-
XFormersAttnProcessor,
|
577 |
-
LoRAXFormersAttnProcessor,
|
578 |
-
LoRAAttnProcessor2_0,
|
579 |
-
),
|
580 |
)
|
581 |
# if xformers or torch_2_0 is used attention block does not need
|
582 |
# to be in float32 which can save lots of memory
|
@@ -971,4 +962,4 @@ class MyPipeline(
|
|
971 |
# Offload all models
|
972 |
self.maybe_free_model_hooks()
|
973 |
|
974 |
-
return (image,)
|
|
|
25 |
TextualInversionLoaderMixin,
|
26 |
)
|
27 |
from diffusers.models import AutoencoderKL, UNet2DConditionModel
|
28 |
+
from diffusers.models.attention_processor import AttnProcessor2_0, XFormersAttnProcessor
|
|
|
|
|
|
|
|
|
|
|
29 |
from diffusers.models.lora import adjust_lora_scale_text_encoder
|
30 |
from diffusers.schedulers import KarrasDiffusionSchedulers
|
31 |
from diffusers.utils import (
|
|
|
130 |
watermark output images. If not defined, it will default to True if the package is installed, otherwise no
|
131 |
watermarker will be used.
|
132 |
"""
|
133 |
+
|
134 |
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
|
135 |
_optional_components = ["tokenizer", "tokenizer_2", "text_encoder", "text_encoder_2"]
|
136 |
|
|
|
567 |
self.vae.to(dtype=torch.float32)
|
568 |
use_torch_2_0_or_xformers = isinstance(
|
569 |
self.vae.decoder.mid_block.attentions[0].processor,
|
570 |
+
(AttnProcessor2_0, XFormersAttnProcessor),
|
|
|
|
|
|
|
|
|
|
|
571 |
)
|
572 |
# if xformers or torch_2_0 is used attention block does not need
|
573 |
# to be in float32 which can save lots of memory
|
|
|
962 |
# Offload all models
|
963 |
self.maybe_free_model_hooks()
|
964 |
|
965 |
+
return (image,)
|
unet/my_unet_model.py
CHANGED
@@ -34,7 +34,7 @@ from diffusers.models.embeddings import (
|
|
34 |
ImageHintTimeEmbedding,
|
35 |
ImageProjection,
|
36 |
ImageTimeEmbedding,
|
37 |
-
|
38 |
TextImageProjection,
|
39 |
TextImageTimeEmbedding,
|
40 |
TextTimeEmbedding,
|
@@ -42,7 +42,7 @@ from diffusers.models.embeddings import (
|
|
42 |
Timesteps,
|
43 |
)
|
44 |
from diffusers.models.modeling_utils import ModelMixin
|
45 |
-
from diffusers.models.unet_2d_blocks import (
|
46 |
UNetMidBlock2DCrossAttn,
|
47 |
UNetMidBlock2DSimpleCrossAttn,
|
48 |
get_down_block,
|
@@ -586,7 +586,7 @@ class MyUNetModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin):
|
|
586 |
positive_len = cross_attention_dim[0]
|
587 |
|
588 |
feature_type = "text-only" if attention_type == "gated" else "text-image"
|
589 |
-
self.position_net =
|
590 |
positive_len=positive_len, out_dim=cross_attention_dim, feature_type=feature_type
|
591 |
)
|
592 |
|
|
|
34 |
ImageHintTimeEmbedding,
|
35 |
ImageProjection,
|
36 |
ImageTimeEmbedding,
|
37 |
+
GLIGENTextBoundingboxProjection,
|
38 |
TextImageProjection,
|
39 |
TextImageTimeEmbedding,
|
40 |
TextTimeEmbedding,
|
|
|
42 |
Timesteps,
|
43 |
)
|
44 |
from diffusers.models.modeling_utils import ModelMixin
|
45 |
+
from diffusers.models.unets.unet_2d_blocks import (
|
46 |
UNetMidBlock2DCrossAttn,
|
47 |
UNetMidBlock2DSimpleCrossAttn,
|
48 |
get_down_block,
|
|
|
586 |
positive_len = cross_attention_dim[0]
|
587 |
|
588 |
feature_type = "text-only" if attention_type == "gated" else "text-image"
|
589 |
+
self.position_net = GLIGENTextBoundingboxProjection(
|
590 |
positive_len=positive_len, out_dim=cross_attention_dim, feature_type=feature_type
|
591 |
)
|
592 |
|