Kolors-Controlnet_and_IPA

Runtime error

File size: 8,628 Bytes

d5f497d
 
 
6c91ee7
 
 
d5f497d
6c91ee7
 
 
d5f497d
 
6c91ee7
 
 
 
 
 
 
 
d5f497d
 
 
6c91ee7
 
d5f497d
 
 
 
 
6c91ee7
 
 
d5f497d
6c91ee7
d5f497d
6c91ee7
 
 
 
 
d5f497d
6c91ee7
d5f497d
6c91ee7
d5f497d
6c91ee7
d5f497d
 
6c91ee7
d5f497d
 
6c91ee7
d5f497d
6c91ee7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5f497d
 
8004741
d5f497d
 
d4fa96f
6c91ee7
 
d4fa96f
 
6c91ee7
 
 
 
 
 
 
d5f497d
 
 
6c91ee7
 
 
 
 
 
 
d5f497d
6c91ee7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cd4f227
d513008
 
 
 
 
 
 
 
 
d5f497d
 
 
 
 
d890da3
d5f497d
 
 
 
 
 
 
f92dc60
 
 
 
 
 
 
d5f497d
 
 
 
 
 
d890da3
d5f497d
 
6c91ee7
 
 
 
 
 
 
d5f497d
 
 
 
 
6c91ee7
d5f497d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c91ee7
d5f497d
 
 
 
 
 
6c91ee7
d5f497d
 
6c91ee7
 
d5f497d
 
6c91ee7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5f497d
d890da3
cbcd3a1
d5f497d
 
6c91ee7
d5f497d
 
 
d4fa96f
d5f497d
6c91ee7
d4fa96f
d5f497d
 
 
 
6c91ee7
d5f497d
 
 
8004741

import spaces
import random
import torch
import cv2
import gradio as gr
import numpy as np
from huggingface_hub import snapshot_download
from transformers import CLIPVisionModelWithProjection,CLIPImageProcessor
from diffusers.utils import load_image
from kolors.pipelines.pipeline_controlnet_xl_kolors_img2img import StableDiffusionXLControlNetImg2ImgPipeline
from kolors.models.modeling_chatglm import ChatGLMModel
from kolors.models.tokenization_chatglm import ChatGLMTokenizer
from kolors.models.controlnet import ControlNetModel
from diffusers import  AutoencoderKL
from kolors.models.unet_2d_condition import UNet2DConditionModel
from diffusers import EulerDiscreteScheduler
from PIL import Image
from annotator.midas import MidasDetector
from annotator.util import resize_image, HWC3


device = "cuda"
ckpt_dir = snapshot_download(repo_id="Kwai-Kolors/Kolors")
ckpt_dir_depth = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Depth")
ckpt_dir_canny = snapshot_download(repo_id="Kwai-Kolors/Kolors-ControlNet-Canny")

text_encoder = ChatGLMModel.from_pretrained(f'{ckpt_dir}/text_encoder', torch_dtype=torch.float16).half().to(device)
tokenizer = ChatGLMTokenizer.from_pretrained(f'{ckpt_dir}/text_encoder')
vae = AutoencoderKL.from_pretrained(f"{ckpt_dir}/vae", revision=None).half().to(device)
scheduler = EulerDiscreteScheduler.from_pretrained(f"{ckpt_dir}/scheduler")
unet = UNet2DConditionModel.from_pretrained(f"{ckpt_dir}/unet", revision=None).half().to(device)
controlnet_depth = ControlNetModel.from_pretrained(f"{ckpt_dir_depth}", revision=None).half().to(device)
controlnet_canny = ControlNetModel.from_pretrained(f"{ckpt_dir_canny}", revision=None).half().to(device)

pipe_depth = StableDiffusionXLControlNetImg2ImgPipeline(
    vae=vae,
    controlnet = controlnet_depth,
    text_encoder=text_encoder,
    tokenizer=tokenizer,
    unet=unet,
    scheduler=scheduler,
    force_zeros_for_empty_prompt=False
)

pipe_canny = StableDiffusionXLControlNetImg2ImgPipeline(
    vae=vae,
    controlnet = controlnet_canny,
    text_encoder=text_encoder,
    tokenizer=tokenizer,
    unet=unet,
    scheduler=scheduler,
    force_zeros_for_empty_prompt=False
)

@spaces.GPU
def process_canny_condition(image, canny_threods=[100,200]):
    np_image = image.copy()
    np_image = cv2.Canny(np_image, canny_threods[0], canny_threods[1])
    np_image = np_image[:, :, None]
    np_image = np.concatenate([np_image, np_image, np_image], axis=2)
    np_image = HWC3(np_image)
    return Image.fromarray(np_image)

model_midas = MidasDetector()

@spaces.GPU
def process_depth_condition_midas(img, res = 1024):
    h,w,_ = img.shape
    img = resize_image(HWC3(img), res)
    result = HWC3(model_midas(img))
    result = cv2.resize(result, (w,h))
    return Image.fromarray(result)

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024

@spaces.GPU
def infer(prompt, 
          image = None, 
          controlnet_type = "Depth", 
          negative_prompt = "", 
          seed = 0, 
          randomize_seed = False,
          guidance_scale = 6.0, 
          num_inference_steps = 50,
          controlnet_conditioning_scale = 0.7,
          control_guidance_end = 0.9,
          strength = 1.0
        ):
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    generator = torch.Generator().manual_seed(seed)
    init_image = resize_image(image,  MAX_IMAGE_SIZE)
    if controlnet_type == "Depth":
        pipe = pipe_depth.to("cuda")
        condi_img = process_depth_condition_midas( np.array(init_image), MAX_IMAGE_SIZE)
    elif controlnet_type == "Canny":
        pipe = pipe_canny.to("cuda")
        condi_img = process_canny_condition(np.array(init_image))
    else:
        return None
    image = pipe(
        prompt= prompt ,
        image = init_image,
        controlnet_conditioning_scale = controlnet_conditioning_scale,
        control_guidance_end = control_guidance_end, 
        strength= strength , 
        control_image = condi_img,
        negative_prompt= negative_prompt , 
        num_inference_steps= num_inference_steps, 
        guidance_scale= guidance_scale,
        num_images_per_prompt=1,
        generator=generator,
    ).images[0]
    return [condi_img, image]

examples = [
    ["一个漂亮的女孩，高品质，超清晰，色彩鲜艳，超高分辨率，最佳品质，8k，高清，4K",
     "woman_1.png", "Canny"],
    ["全景，一只可爱的白色小狗坐在杯子里，看向镜头，动漫风格，3d渲染，辛烷值渲染",
    "dog.png", "Canny"],
    ["新海诚风格，丰富的色彩，穿着绿色衬衫的女人站在田野里，唯美风景，清新明亮，斑驳的光影，最好的质量，超细节，8K画质",
     "woman_2.png", "Depth"],
    ["一只颜色鲜艳的小鸟，高品质，超清晰，色彩鲜艳，超高分辨率，最佳品质，8k，高清，4K",
     "bird.png", "Depth"]
]

css="""
#col-left {
    margin: 0 auto;
    max-width: 600px;
}
#col-right {
    margin: 0 auto;
    max-width: 750px;
}
"""

def load_description(fp):
    with open(fp, 'r', encoding='utf-8') as f:
        content = f.read()
    return content

with gr.Blocks(css=css) as Kolors:
    gr.HTML(load_description("assets/title.md"))
    with gr.Row():
        with gr.Column(elem_id="col-left"):
            with gr.Row():
                prompt = gr.Textbox(
                    label="Prompt",
                    placeholder="Enter your prompt",
                    lines=2
                )
            with gr.Row():
                controlnet_type = gr.Dropdown(
                    ["Depth", "Canny"],
                    label = "Controlnet",
                    value="Depth"
                )
            with gr.Row():
                image = gr.Image(label="Image", type="pil")
            with gr.Accordion("Advanced Settings", open=False):
                negative_prompt = gr.Textbox(
                    label="Negative prompt",
                    placeholder="Enter a negative prompt",
                    visible=True,
                    value="nsfw，脸部阴影，低分辨率，jpeg伪影、模糊、糟糕，黑脸，霓虹灯"
                )
                seed = gr.Slider(
                    label="Seed",
                    minimum=0,
                    maximum=MAX_SEED,
                    step=1,
                    value=0,
                )
                randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                with gr.Row():
                    guidance_scale = gr.Slider(
                        label="Guidance scale",
                        minimum=0.0,
                        maximum=10.0,
                        step=0.1,
                        value=6.0,
                    )
                    num_inference_steps = gr.Slider(
                        label="Number of inference steps",
                        minimum=10,
                        maximum=50,
                        step=1,
                        value=30,
                    )
                with gr.Row():
                    controlnet_conditioning_scale = gr.Slider(
                        label="Controlnet Conditioning Scale",
                        minimum=0.0,
                        maximum=1.0,
                        step=0.1,
                        value=0.7,
                    )
                    control_guidance_end = gr.Slider(
                        label="Control Guidance End",
                        minimum=0.0,
                        maximum=1.0,
                        step=0.1,
                        value=0.9,
                    )
                with gr.Row():
                    strength = gr.Slider(
                        label="Strength",
                        minimum=0.0,
                        maximum=1.0,
                        step=0.1,
                        value=1.0,
                    )
            with gr.Row():
                run_button = gr.Button("Run")
            
        with gr.Column(elem_id="col-right"):
            result = gr.Gallery(label="Result", show_label=False, columns=2)
    
    with gr.Row():
        gr.Examples(
                fn = infer,
                examples = examples,
                inputs = [prompt, image, controlnet_type],
                outputs = [result]
            )

    run_button.click(
        fn = infer,
        inputs = [prompt, image, controlnet_type, negative_prompt, seed, randomize_seed, guidance_scale, num_inference_steps, controlnet_conditioning_scale, control_guidance_end, strength],
        outputs = [result]
    )

Kolors.queue().launch(debug=True)