DAVINCI-DIFF / README.md
ABDALLALSWAITI's picture
Update README.md
c363d5e verified
metadata
library_name: diffusers

Model Card for Model ID

SAVETENSORS VERSION >>> https://civitai.com/models/130664

Model Details

Model Description

This is the model card of a 🧨 diffusers model that has been pushed on the Hub. This model card has been automatically generated.

  • Developed by: Abdallah Alswaiti
  • Model type: SDXL

Uses

Using with Diffusers for low GPU

🧨 Diffusers

Make sure to upgrade diffusers to >= 0.28.0:

pip install -U diffusers --upgrade

In addition make sure to install transformers, safetensors, sentencepiece,gc and accelerate:

pip install transformers accelerate safetensors sentencepiece gc
import torch
from diffusers import StableDiffusionXLPipeline
import gc
from transformers import CLIPTokenizer, CLIPTextModel, CLIPTextModelWithProjection
# from onediffx import compile_pipe, save_pipe, load_pipe
from PIL import Image

def encode_prompt(prompts, tokenizers, text_encoders):
    embeddings_list = []
    for prompt, tokenizer, text_encoder in zip(prompts, tokenizers, text_encoders):
        cond_input = tokenizer(
            prompt,
            max_length=tokenizer.model_max_length,
            padding='max_length',
            truncation=True,
            return_tensors='pt',
        )
        prompt_embeds = text_encoder(cond_input.input_ids.to('cuda'), output_hidden_states=True)
        pooled_prompt_embeds = prompt_embeds[0]
        embeddings_list.append(prompt_embeds.hidden_states[-2])
    prompt_embeds = torch.concat(embeddings_list, dim=-1)
    negative_prompt_embeds = torch.zeros_like(prompt_embeds)
    negative_pooled_prompt_embeds = torch.zeros_like(pooled_prompt_embeds)
    bs_embed, seq_len, _ = prompt_embeds.shape
    prompt_embeds = prompt_embeds.repeat(1, 1, 1).view(bs_embed * 1, seq_len, -1)
    seq_len = negative_prompt_embeds.shape[1]
    negative_prompt_embeds = negative_prompt_embeds.repeat(1, 1, 1).view(1 * 1, seq_len, -1)
    pooled_prompt_embeds = pooled_prompt_embeds.repeat(1, 1).view(bs_embed * 1, -1)
    negative_pooled_prompt_embeds = negative_pooled_prompt_embeds.repeat(1, 1).view(bs_embed * 1, -1)
    return prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds

def get_user_input():
    prompt = input("Enter prompt: ") or '3/4 shot, candid photograph of a beautiful 30 year old redhead woman with messy dark hair, peacefully sleeping in her bed, night, dark, light from window, dark shadows, masterpiece, uhd, moody'
    negative_prompt = input("Enter negative prompt: ") or ""
    cfg_scale = float(input("Enter CFG scale (default 7.5): ") or 7.5)
    steps = int(input("Enter number of steps (default 50): ") or 50)
    width = int(input("Enter image width (default 512): ") or 512)
    height = int(input("Enter image height (default 512): ") or 512)
    seed = int(input("Enter seed (default 42): ") or 42)
    return prompt, negative_prompt, cfg_scale, steps, width, height, seed

pipe = StableDiffusionXLPipeline.from_pretrained('ABDALLALSWAITI/DAVINCI-DIFF', use_safetensors=True, torch_dtype=torch.float16, tokenizer=None, text_encoder=None, tokenizer_2=None, text_encoder_2=None).to('cuda')
# pipe = compile_pipe(pipe)
# load_pipe(pipe, dir="cached_pipe")



while True:
    queue = []
    prompt, negative_prompt, cfg_scale, steps, width, height, seed = get_user_input()
    queue.append({
        'prompt': prompt,
        'negative_prompt': negative_prompt,
        'cfg_scale': cfg_scale,
        'steps': steps,
        'width': width,
        'height': height,
        'seed': seed,
    })

    tokenizer = CLIPTokenizer.from_pretrained('ABDALLALSWAITI/DAVINCI-DIFF', subfolder='tokenizer')
    text_encoder = CLIPTextModel.from_pretrained('ABDALLALSWAITI/DAVINCI-DIFF', subfolder='text_encoder', use_safetensors=True, torch_dtype=torch.float16).to('cuda')
    tokenizer_2 = CLIPTokenizer.from_pretrained('ABDALLALSWAITI/DAVINCI-DIFF', subfolder='tokenizer_2')
    text_encoder_2 = CLIPTextModelWithProjection.from_pretrained('ABDALLALSWAITI/DAVINCI-DIFF', subfolder='text_encoder_2', use_safetensors=True, torch_dtype=torch.float16).to('cuda')

    with torch.no_grad():
        for generation in queue:
            generation['embeddings'] = encode_prompt(
                [generation['prompt'], generation['prompt']],
                [tokenizer, tokenizer_2],
                [text_encoder, text_encoder_2],
            )
    del tokenizer, text_encoder, tokenizer_2, text_encoder_2
    gc.collect()
    torch.cuda.empty_cache()

    generator = torch.Generator(device='cuda')
    for i, generation in enumerate(queue, start=1):
        generator.manual_seed(generation['seed'])
        generation['latents'] = pipe(
            prompt_embeds=generation['embeddings'][0],
            negative_prompt_embeds=generation['embeddings'][1],
            pooled_prompt_embeds=generation['embeddings'][2],
            negative_pooled_prompt_embeds=generation['embeddings'][3],
            generator=generator,
            output_type='latent',
            guidance_scale=generation['cfg_scale'],
            num_inference_steps=generation['steps'],
            height=generation['height'],
            width=generation['width']
        ).images


    del pipe.unet
    gc.collect()
    torch.cuda.empty_cache()
    
    # load_pipe(pipe, dir="cached_pipe")


    pipe.upcast_vae()
    with torch.no_grad():
        for i, generation in enumerate(queue, start=1):
            generation['latents'] = generation['latents'].to(next(iter(pipe.vae.post_quant_conv.parameters())).dtype)
            image = pipe.vae.decode(generation['latents'] / pipe.vae.config.scaling_factor, return_dict=False)[0]
            image = pipe.image_processor.postprocess(image, output_type='pil')[0]
            image_path = f'image_{i}.png'
            image.save(image_path)
            print(f"Image saved at: {image_path}")
        # save_pipe(pipe, dir="cached_pipe")



    if input("Do you want to create another image? (y/n): ").lower() != 'y':
        break