File size: 2,418 Bytes
bb09ec7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
---
language:
- en
license: other
license_name: flux-1-dev-non-commercial-license
license_link: LICENSE.md
tags:
- text-to-image
- image-generation
- flux
---

`black-forest-labs/FLUX.1-dev` quantized the Transformer model to INT4 and the T5 Text Encoder to INT8 using Optimum Quanto with FP16 calculations.

```shell
pip install diffusers optimum-quanto
```

```python
import json
import torch
import diffusers
import transformers
from optimum.quanto import requantize
from safetensors.torch import load_file
from huggingface_hub import hf_hub_download


def load_quanto_transformer(repo_path):
    with open(hf_hub_download(repo_path, "transformer/quantization_map.json"), "r") as f:
        quantization_map = json.load(f)
    with torch.device("meta"):
        transformer = diffusers.FluxTransformer2DModel.from_config(hf_hub_download(repo_path, "transformer/config.json")).to(torch.float16)
    state_dict = load_file(hf_hub_download(repo_path, "transformer/diffusion_pytorch_model.safetensors"))
    requantize(transformer, state_dict, quantization_map, device=torch.device("cuda"))
    return transformer


def load_quanto_text_encoder_2(repo_path):
    with open(hf_hub_download(repo_path, "text_encoder_2/quantization_map.json"), "r") as f:
        quantization_map = json.load(f)
    with open(hf_hub_download(repo_path, "text_encoder_2/config.json")) as f:
        t5_config = transformers.T5Config(**json.load(f))
    with torch.device("meta"):
        text_encoder_2 = transformers.T5EncoderModel(t5_config).to(torch.float16)
    state_dict = load_file(hf_hub_download(repo_path, "text_encoder_2/model.safetensors"))
    requantize(text_encoder_2, state_dict, quantization_map, device=torch.device("cuda"))
    return text_encoder_2


pipe = diffusers.AutoPipelineForText2Image.from_pretrained("Disty0/FLUX.1-dev-qint4_tf-qint8_te", transformer=None, text_encoder_2=None, torch_dtype=torch.float16)
pipe.transformer = load_quanto_transformer("Disty0/FLUX.1-dev-qint4_tf-qint8_te")
pipe.text_encoder_2 = load_quanto_text_encoder_2("Disty0/FLUX.1-dev-qint4_tf-qint8_te")
pipe = pipe.to("cuda", dtype=torch.float16)


prompt = "A cat holding a sign that says hello world"
image = pipe(
    prompt,
    height=1024,
    width=1024,
    guidance_scale=3.5,
    num_inference_steps=50,
    max_sequence_length=512,
    generator=torch.Generator("cpu").manual_seed(0)
).images[0]
image.save("flux-dev.png")
```