File size: 6,214 Bytes
dc11642
 
 
 
 
 
 
 
a82b0a2
dc11642
 
 
 
 
 
 
 
 
8149286
 
dc11642
 
 
 
b6a8ec7
8149286
 
 
 
 
 
9faa38e
8149286
c363d5e
8149286
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
---
library_name: diffusers
---

# Model Card for Model ID

<!-- Provide a quick summary of what the model is/does. -->

SAVETENSORS VERSION >>> https://civitai.com/models/130664

## Model Details

### Model Description

<!-- Provide a longer summary of what this model is. -->

This is the model card of a 🧨 diffusers model that has been pushed on the Hub. This model card has been automatically generated.

- **Developed by:** Abdallah Alswaiti
- **Model type:** SDXL


## Uses

## Using with Diffusers for low GPU
### 🧨 Diffusers 
> [!IMPORTANT]  
> Make sure to upgrade diffusers to >= 0.28.0:
> ```bash
> pip install -U diffusers --upgrade
> ```
> In addition make sure to install `transformers`, `safetensors`, `sentencepiece`,`gc` and `accelerate`:
> ```
> pip install transformers accelerate safetensors sentencepiece gc



```python
import torch
from diffusers import StableDiffusionXLPipeline
import gc
from transformers import CLIPTokenizer, CLIPTextModel, CLIPTextModelWithProjection
# from onediffx import compile_pipe, save_pipe, load_pipe
from PIL import Image

def encode_prompt(prompts, tokenizers, text_encoders):
    embeddings_list = []
    for prompt, tokenizer, text_encoder in zip(prompts, tokenizers, text_encoders):
        cond_input = tokenizer(
            prompt,
            max_length=tokenizer.model_max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt',
        )
        prompt_embeds = text_encoder(cond_input.input_ids.to('cuda'), output_hidden_states=True)
        pooled_prompt_embeds = prompt_embeds[0]
        embeddings_list.append(prompt_embeds.hidden_states[-2])
    prompt_embeds = torch.concat(embeddings_list, dim=-1)
    negative_prompt_embeds = torch.zeros_like(prompt_embeds)
    negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
    bs_embed, seq_len, _ = prompt_embeds.shape
    prompt_embeds = prompt_embeds.repeat(1, 1, 1).view(bs_embed * 1, seq_len, -1)
    seq_len = negative_prompt_embeds.shape[1]
    negative_prompt_embeds = negative_prompt_embeds.repeat(1, 1, 1).view(1 * 1, seq_len, -1)
    pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, 1).view(bs_embed * 1, -1)
    negative_pooled_prompt_embeds = negative_pooled_prompt_embeds.repeat(1, 1).view(bs_embed * 1, -1)
    return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds

def get_user_input():
    prompt = input("Enter prompt: ") or '3/4 shot, candid photograph of a beautiful 30 year old redhead woman with messy dark hair, peacefully sleeping in her bed, night, dark, light from window, dark shadows, masterpiece, uhd, moody'
    negative_prompt = input("Enter negative prompt: ") or ""
    cfg_scale = float(input("Enter CFG scale (default 7.5): ") or 7.5)
    steps = int(input("Enter number of steps (default 50): ") or 50)
    width = int(input("Enter image width (default 512): ") or 512)
    height = int(input("Enter image height (default 512): ") or 512)
    seed = int(input("Enter seed (default 42): ") or 42)
    return prompt, negative_prompt, cfg_scale, steps, width, height, seed

pipe = StableDiffusionXLPipeline.from_pretrained('ABDALLALSWAITI/DAVINCI-DIFF', use_safetensors=True, torch_dtype=torch.float16, tokenizer=None, text_encoder=None, tokenizer_2=None, text_encoder_2=None).to('cuda')
# pipe = compile_pipe(pipe)
# load_pipe(pipe, dir="cached_pipe")



while True:
    queue = []
    prompt, negative_prompt, cfg_scale, steps, width, height, seed = get_user_input()
    queue.append({
        'prompt': prompt,
        'negative_prompt': negative_prompt,
        'cfg_scale': cfg_scale,
        'steps': steps,
        'width': width,
        'height': height,
        'seed': seed,
    })

    tokenizer = CLIPTokenizer.from_pretrained('ABDALLALSWAITI/DAVINCI-DIFF', subfolder='tokenizer')
    text_encoder = CLIPTextModel.from_pretrained('ABDALLALSWAITI/DAVINCI-DIFF', subfolder='text_encoder', use_safetensors=True, torch_dtype=torch.float16).to('cuda')
    tokenizer_2 = CLIPTokenizer.from_pretrained('ABDALLALSWAITI/DAVINCI-DIFF', subfolder='tokenizer_2')
    text_encoder_2 = CLIPTextModelWithProjection.from_pretrained('ABDALLALSWAITI/DAVINCI-DIFF', subfolder='text_encoder_2', use_safetensors=True, torch_dtype=torch.float16).to('cuda')

    with torch.no_grad():
        for generation in queue:
            generation['embeddings'] = encode_prompt(
                [generation['prompt'], generation['prompt']],
                [tokenizer, tokenizer_2],
                [text_encoder, text_encoder_2],
            )
    del tokenizer, text_encoder, tokenizer_2, text_encoder_2
    gc.collect()
    torch.cuda.empty_cache()

    generator = torch.Generator(device='cuda')
    for i, generation in enumerate(queue, start=1):
        generator.manual_seed(generation['seed'])
        generation['latents'] = pipe(
            prompt_embeds=generation['embeddings'][0],
            negative_prompt_embeds=generation['embeddings'][1],
            pooled_prompt_embeds=generation['embeddings'][2],
            negative_pooled_prompt_embeds=generation['embeddings'][3],
            generator=generator,
            output_type='latent',
            guidance_scale=generation['cfg_scale'],
            num_inference_steps=generation['steps'],
            height=generation['height'],
            width=generation['width']
        ).images


    del pipe.unet
    gc.collect()
    torch.cuda.empty_cache()
    
    # load_pipe(pipe, dir="cached_pipe")


    pipe.upcast_vae()
    with torch.no_grad():
        for i, generation in enumerate(queue, start=1):
            generation['latents'] = generation['latents'].to(next(iter(pipe.vae.post_quant_conv.parameters())).dtype)
            image = pipe.vae.decode(generation['latents'] / pipe.vae.config.scaling_factor, return_dict=False)[0]
            image = pipe.image_processor.postprocess(image, output_type='pil')[0]
            image_path = f'image_{i}.png'
            image.save(image_path)
            print(f"Image saved at: {image_path}")
        # save_pipe(pipe, dir="cached_pipe")



    if input("Do you want to create another image? (y/n): ").lower() != 'y':
        break

```