twodgirl
/

Flux-dev-optimum-quant-qfloat8

Text-to-Image

Diffusers

Safetensors

flux

Model card Files Files and versions

twodgirl commited on Aug 15

Commit

59ea6cc

•

1 Parent(s): a1220c6

Update README.md

Browse files

Files changed (1) hide show

README.md +14 -77

README.md CHANGED Viewed

@@ -1,11 +1,14 @@
 ---
 license: other
 tags:
 - text-to-image
 - flux
 ---
-# Flux Dev Quant
 Run the Flux Dev model with limited VRAM in 8bit mode. It's possible, but inpractical, since the downloads alone are "only" 40GB.
@@ -20,14 +23,10 @@ In int4 mode there are places where the pre-trained weights in fp16 **overflow**
 ## Inference
 ```python
-from argparse import ArgumentParser
-from diffusers import AutoencoderKL, FluxPipeline, FlowMatchEulerDiscreteScheduler, FluxTransformer2DModel
-import gc
 from optimum.quanto.models import QuantizedDiffusersModel, QuantizedTransformersModel
-import os
-import sys
 import torch
-from transformers import CLIPTextModel, CLIPTokenizer, T5EncoderModel, T5TokenizerFast
 class Flux2DModel(QuantizedDiffusersModel):
     base_class = FluxTransformer2DModel
@@ -35,81 +34,19 @@ class Flux2DModel(QuantizedDiffusersModel):
 class T5Model(QuantizedTransformersModel):
     auto_class = T5EncoderModel
-builder = ArgumentParser()
-builder.add_argument('prompt',
-                   type=str,
-                   nargs='?',
-                   default='cat playing piano')
-builder.add_argument('--model',
-                   type=str,
-                   default='black-forest-labs/FLUX.1-dev',
-                   required=False)
-builder.add_argument('--output',
-                   type=str,
-                   default='.',
-                   required=False)
-builder.add_argument('--step',
-                   type=int,
-                   default=10,
-                   required=False)
-builder.add_argument('--transformer',
-                   type=str,
-                   default='./flux-fp8',
-                   required=False)
-builder.add_argument('--t5',
-                   type=str,
-                   default='./flux-t5',
-                   required=False)
-args = builder.parse_args()
 if __name__ == '__main__':
-    FLUX_DEV = args.model
-    print('Step 1/5')
-    T5EncoderModel.from_config = lambda c: T5EncoderModel(c)  # Duck and tape for Quanto support.
-    wrapped_t5 = T5Model.from_pretrained(args.t5)
-    print('Step 2/5')
-    wrapped_model = Flux2DModel.from_pretrained(args.transformer)
-    print('Step 3/5')
-    pipe = FluxPipeline.from_pretrained(FLUX_DEV,
-                                        scheduler=FlowMatchEulerDiscreteScheduler.from_pretrained(FLUX_DEV, subfolder='scheduler'),
-                                        text_encoder=CLIPTextModel.from_pretrained(FLUX_DEV, subfolder='text_encoder'),
-                                        text_encoder_2=wrapped_t5._wrapped,
-                                        tokenizer=CLIPTokenizer.from_pretrained(FLUX_DEV, subfolder='tokenizer'),
-                                        tokenizer_2=T5TokenizerFast.from_pretrained(FLUX_DEV, subfolder='tokenizer_2'),
-                                        transformer=wrapped_model._wrapped,
-                                        vae=AutoencoderKL.from_pretrained(FLUX_DEV, subfolder='vae'),
-                                        # torch_dtype=torch.float16  # Turns values to NaN.
-                                        )
     # This method moves one whole model at a time to the GPU when it's in forward mode.
     pipe.enable_model_cpu_offload()
-    latents = pipe(args.prompt, num_inference_steps=args.step, output_type='latent').images
-    # Short alternative:
-    # images = pipe(prompt, num_inference_steps=args.step, output_type='pil').images
-    print('Step 4/5')
-    transformer = pipe.transformer.to('cpu')
-    te_2 = pipe.text_encoder_2.to('cpu')
-    pipe.transformer = None
-    pipe.text_encoder_2 = None
-    del transformer
-    del te_2
-    gc.collect()
-    torch.cuda.empty_cache()
-    print('Step 5/5')
-    latents = FluxPipeline._unpack_latents(latents, 1024, 1024, pipe.vae_scale_factor)
-    latents = (latents / pipe.vae.config.scaling_factor) + pipe.vae.config.shift_factor
-    # Either use fp16 or move vae to cpu and keep it in full precision.
-    vae: AutoencoderKL = pipe.vae.to(dtype=torch.float16)
-    image, = vae.decode(latents.to(dtype=vae.dtype), return_dict=False)
-    image = pipe.image_processor.postprocess(image.detach(), output_type='pil')[0]
-    filename = len([filename for filename in os.listdir(args.output)
-                    if filename.endswith('.png')])
-    image.save('{}/{:05d}.png'.format(args.output, filename))
 ```
 ## Disclaimer
 Use of this code and the copy of documentation requires citation and attribution to the author via a link to their Hugging Face profile in all resulting work.
-## License
-[FLUX.1 Dev Non-Commercial License](http://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/LICENSE.md)

 ---
 license: other
+license_name: flux-1-dev-non-commercial-license
+license_link: >-
+  https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/LICENSE.md
 tags:
 - text-to-image
 - flux
 ---
+# Flux Dev
 Run the Flux Dev model with limited VRAM in 8bit mode. It's possible, but inpractical, since the downloads alone are "only" 40GB.
 ## Inference
 ```python
+from diffusers import FluxPipeline, FluxTransformer2DModel
 from optimum.quanto.models import QuantizedDiffusersModel, QuantizedTransformersModel
 import torch
+from transformers import T5EncoderModel
 class Flux2DModel(QuantizedDiffusersModel):
     base_class = FluxTransformer2DModel
 class T5Model(QuantizedTransformersModel):
     auto_class = T5EncoderModel
 if __name__ == '__main__':
+    T5EncoderModel.from_config = lambda c: T5EncoderModel(c).to(dtype=torch.float16)  # Duck and tape for Quanto support.
+    t5 = T5Model.from_pretrained('./flux-t5')._wrapped
+    transformer = Flux2DModel.from_pretrained('./flux-fp8')._wrapped
+    pipe = FluxPipeline.from_pretrained('black-forest-labs/FLUX.1-dev',
+                                        text_encoder_2=t5,
+                                        transformer=transformer)
     # This method moves one whole model at a time to the GPU when it's in forward mode.
     pipe.enable_model_cpu_offload()
+    image = pipe('cat playing piano', num_inference_steps=10, output_type='pil').images[0]
+    image.save('cat.png')
 ```
 ## Disclaimer
 Use of this code and the copy of documentation requires citation and attribution to the author via a link to their Hugging Face profile in all resulting work.