twodgirl commited on
Commit
93f6ee5
1 Parent(s): 25924b4

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +85 -0
README.md ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ tags:
4
+ - text-to-image
5
+ - flux
6
+ ---
7
+
8
+ # Flux Dev Quant
9
+
10
+ ## Setup
11
+
12
+ ```
13
+ pip install accelerate diffusers optimum-quanto transformers sentencepiece
14
+ pip install --upgrade git+https://github.com/huggingface/diffusers.git@main
15
+ ```
16
+
17
+ There are places where the pre-trained weights in fp16 **overflow**, resulting in a blank image. Wait for the updated diffusers library.
18
+
19
+ ## Inference
20
+
21
+ ```python
22
+ from diffusers import AutoencoderKL, FluxPipeline, FlowMatchEulerDiscreteScheduler, FluxTransformer2DModel
23
+ import gc
24
+ from optimum.quanto.models import QuantizedDiffusersModel, QuantizedTransformersModel
25
+ import sys
26
+ import torch
27
+ from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
28
+
29
+ class Flux2DModel(QuantizedDiffusersModel):
30
+ base_class = FluxTransformer2DModel
31
+
32
+ class T5Model(QuantizedTransformersModel):
33
+ auto_class = T5EncoderModel
34
+
35
+ FLUX_DEV = sys.argv[1] if len(sys.argv) > 1 else 'black-forest-labs/FLUX.1-dev'
36
+ FLUX_INT = sys.argv[2] if len(sys.argv) > 2 else './flux-int4'
37
+ T5_INT = sys.argv[3] if len(sys.argv) > 3 else './flux-t5'
38
+ SAMPLER_STEP = 2
39
+ PROMPT_CLIP = ''
40
+ PROMPT_T5 = sys.argv[4] if len(sys.argv) > 4 else 'cat playing piano'
41
+
42
+ if __name__ == '__main__':
43
+ torch.set_default_dtype(torch.float16)
44
+ print('Step 1/5')
45
+ T5EncoderModel.from_config = lambda c: T5EncoderModel(c) # Duck and tape for Quanto support.
46
+ wrapped_t5 = T5Model.from_pretrained(T5_INT)
47
+ print('Step 2/5')
48
+ wrapped_model = Flux2DModel.from_pretrained(FLUX_INT)
49
+ print('Step 3/5')
50
+ pipe = FluxPipeline.from_pretrained(FLUX_DEV,
51
+ scheduler=FlowMatchEulerDiscreteScheduler.from_pretrained(FLUX_DEV, subfolder='scheduler'),
52
+ text_encoder=CLIPTextModel.from_pretrained(FLUX_DEV, subfolder='text_encoder'),
53
+ text_encoder_2=wrapped_t5._wrapped,
54
+ tokenizer=CLIPTokenizer.from_pretrained(FLUX_DEV, subfolder='tokenizer'),
55
+ tokenizer_2=T5TokenizerFast.from_pretrained(FLUX_DEV, subfolder='tokenizer_2'),
56
+ transformer=wrapped_model._wrapped,
57
+ vae=AutoencoderKL.from_pretrained(FLUX_DEV, subfolder='vae'),
58
+ torch_dtype=torch.float16).to('cuda')
59
+ latents = pipe('cat playing piano', num_inference_steps=SAMPLER_STEP, output_type='latent').images
60
+ print('Step 4/5')
61
+ transformer = pipe.transformer.to('cpu')
62
+ te_2 = pipe.text_encoder_2.to('cpu')
63
+ pipe.transformer = None
64
+ pipe.text_encoder_2 = None
65
+ del transformer
66
+ del te_2
67
+ gc.collect()
68
+ torch.cuda.empty_cache()
69
+ print('Step 5/5')
70
+ latents = FluxPipeline._unpack_latents(latents, 1024, 1024, pipe.vae_scale_factor)
71
+ latents = (latents / pipe.vae.config.scaling_factor) + pipe.vae.config.shift_factor
72
+ # Either use fp16 or move vae to cpu and keep it in full precision.
73
+ vae: AutoencoderKL = pipe.vae.to(dtype=torch.float16)
74
+ image, = vae.decode(latents.to(dtype=vae.dtype), return_dict=False)
75
+ image = pipe.image_processor.postprocess(image.detach(), output_type='pil')[0]
76
+ image.save('./cat.png')
77
+ ```
78
+
79
+ ## Disclaimer
80
+
81
+ Use of this code and the copy of documentation requires citation and attribution to the author via a link to their Hugging Face profile in all resulting work.
82
+
83
+ ## License
84
+
85
+ [FLUX.1 Dev Non-Commercial License](http://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/LICENSE.md)