fffiloni's picture
adjust target_ratio radio choice in UI if manually changed dimensions
fb5d273 verified
raw
history blame
11.7 kB
import gradio as gr
import spaces
import torch
from diffusers import AutoencoderKL, TCDScheduler
from diffusers.models.model_loading_utils import load_state_dict
from gradio_imageslider import ImageSlider
from huggingface_hub import hf_hub_download
from controlnet_union import ControlNetModel_Union
from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
from PIL import Image, ImageDraw
import numpy as np
config_file = hf_hub_download(
"xinsir/controlnet-union-sdxl-1.0",
filename="config_promax.json",
)
config = ControlNetModel_Union.load_config(config_file)
controlnet_model = ControlNetModel_Union.from_config(config)
model_file = hf_hub_download(
"xinsir/controlnet-union-sdxl-1.0",
filename="diffusion_pytorch_model_promax.safetensors",
)
state_dict = load_state_dict(model_file)
model, _, _, _, _ = ControlNetModel_Union._load_pretrained_model(
controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0"
)
model.to(device="cuda", dtype=torch.float16)
vae = AutoencoderKL.from_pretrained(
"madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
).to("cuda")
pipe = StableDiffusionXLFillPipeline.from_pretrained(
"SG161222/RealVisXL_V5.0_Lightning",
torch_dtype=torch.float16,
vae=vae,
controlnet=model,
variant="fp16",
).to("cuda")
pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
def can_expand(source_width, source_height, target_width, target_height, alignment):
"""Checks if the image can be expanded based on the alignment."""
if alignment in ("Left", "Right") and source_width >= target_width:
return False
if alignment in ("Top", "Bottom") and source_height >= target_height:
return False
return True
@spaces.GPU
def infer(image, width, height, overlap_width, num_inference_steps, prompt_input=None, alignment="Middle"):
source = image
target_size = (width, height)
overlap = overlap_width
# Upscale if source is smaller than target in both dimensions
if source.width < target_size[0] and source.height < target_size[1]:
scale_factor = min(target_size[0] / source.width, target_size[1] / source.height)
new_width = int(source.width * scale_factor)
new_height = int(source.height * scale_factor)
source = source.resize((new_width, new_height), Image.LANCZOS)
if source.width > target_size[0] or source.height > target_size[1]:
scale_factor = min(target_size[0] / source.width, target_size[1] / source.height)
new_width = int(source.width * scale_factor)
new_height = int(source.height * scale_factor)
source = source.resize((new_width, new_height), Image.LANCZOS)
if not can_expand(source.width, source.height, target_size[0], target_size[1], alignment):
alignment = "Middle"
# Calculate margins based on alignment
if alignment == "Middle":
margin_x = (target_size[0] - source.width) // 2
margin_y = (target_size[1] - source.height) // 2
elif alignment == "Left":
margin_x = 0
margin_y = (target_size[1] - source.height) // 2
elif alignment == "Right":
margin_x = target_size[0] - source.width
margin_y = (target_size[1] - source.height) // 2
elif alignment == "Top":
margin_x = (target_size[0] - source.width) // 2
margin_y = 0
elif alignment == "Bottom":
margin_x = (target_size[0] - source.width) // 2
margin_y = target_size[1] - source.height
background = Image.new('RGB', target_size, (255, 255, 255))
background.paste(source, (margin_x, margin_y))
mask = Image.new('L', target_size, 255)
mask_draw = ImageDraw.Draw(mask)
# Adjust mask generation based on alignment
if alignment == "Middle":
mask_draw.rectangle([
(margin_x + overlap, margin_y + overlap),
(margin_x + source.width - overlap, margin_y + source.height - overlap)
], fill=0)
elif alignment == "Left":
mask_draw.rectangle([
(margin_x, margin_y),
(margin_x + source.width - overlap, margin_y + source.height)
], fill=0)
elif alignment == "Right":
mask_draw.rectangle([
(margin_x + overlap, margin_y),
(margin_x + source.width, margin_y + source.height)
], fill=0)
elif alignment == "Top":
mask_draw.rectangle([
(margin_x, margin_y),
(margin_x + source.width, margin_y + source.height - overlap)
], fill=0)
elif alignment == "Bottom":
mask_draw.rectangle([
(margin_x, margin_y + overlap),
(margin_x + source.width, margin_y + source.height)
], fill=0)
cnet_image = background.copy()
cnet_image.paste(0, (0, 0), mask)
final_prompt = f"{prompt_input} , high quality, 4k"
(
prompt_embeds,
negative_prompt_embeds,
pooled_prompt_embeds,
negative_pooled_prompt_embeds,
) = pipe.encode_prompt(final_prompt, "cuda", True)
for image in pipe(
prompt_embeds=prompt_embeds,
negative_prompt_embeds=negative_prompt_embeds,
pooled_prompt_embeds=pooled_prompt_embeds,
negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
image=cnet_image,
num_inference_steps=num_inference_steps
):
yield cnet_image, image
image = image.convert("RGBA")
cnet_image.paste(image, (0, 0), mask)
yield background, cnet_image
def clear_result():
"""Clears the result ImageSlider."""
return gr.update(value=None)
def preload_presets(target_ratio):
"""Updates the width and height sliders based on the selected aspect ratio."""
if target_ratio == "9:16":
changed_width = 720
changed_height = 1280
return changed_width, changed_height, gr.update(open=False)
elif target_ratio == "16:9":
changed_width = 1280
changed_height = 720
return changed_width, changed_height, gr.update(open=False)
elif target_ratio == "Custom":
return 720, 1280, gr.update(open=True)
def select_the_right_preset(user_width, user_height):
if user_width == 720 and user_height == 1280:
return "9:16"
elif user_width == 1280 and user_height == 720:
return "16:9"
else:
return "Custom"
css = """
.gradio-container {
width: 1200px !important;
}
"""
title = """<h1 align="center">Diffusers Image Outpaint</h1>
<div align="center">Drop an image you would like to extend, pick your expected ratio and hit Generate.</div>
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
<p style="display: flex;gap: 6px;">
<a href="https://huggingface.co/spaces/fffiloni/diffusers-image-outpout?duplicate=true">
<img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-md.svg" alt="Duplicate this Space">
</a> to skip the queue and enjoy faster inference on the GPU of your choice
</p>
</div>
"""
with gr.Blocks(css=css) as demo:
with gr.Column():
gr.HTML(title)
with gr.Row():
with gr.Column():
input_image = gr.Image(
type="pil",
label="Input Image"
)
with gr.Row():
with gr.Column(scale=2):
prompt_input = gr.Textbox(label="Prompt (Optional)")
with gr.Column(scale=1):
run_button = gr.Button("Generate")
with gr.Row():
target_ratio = gr.Radio(
label="Expected Ratio",
choices=["9:16", "16:9", "Custom"],
value="9:16",
scale=2
)
alignment_dropdown = gr.Dropdown(
choices=["Middle", "Left", "Right", "Top", "Bottom"],
value="Middle",
label="Alignment"
)
with gr.Accordion(label="Advanced settings", open=False) as settings_panel:
with gr.Column():
with gr.Row():
width_slider = gr.Slider(
label="Width",
minimum=720,
maximum=1536,
step=8,
value=720, # Set a default value
)
height_slider = gr.Slider(
label="Height",
minimum=720,
maximum=1536,
step=8,
value=1280, # Set a default value
)
with gr.Row():
num_inference_steps = gr.Slider(label="Steps", minimum=4, maximum=12, step=1, value=8)
overlap_width = gr.Slider(
label="Mask overlap width",
minimum=1,
maximum=50,
value=42,
step=1
)
gr.Examples(
examples=[
["./examples/example_1.webp", 1280, 720, "Middle"],
["./examples/example_2.jpg", 1440, 810, "Left"],
["./examples/example_3.jpg", 1024, 1024, "Top"],
["./examples/example_3.jpg", 1024, 1024, "Bottom"],
],
inputs=[input_image, width_slider, height_slider, alignment_dropdown],
)
with gr.Column():
result = ImageSlider(
interactive=False,
label="Generated Image",
)
use_as_input_button = gr.Button("Use as Input Image", visible=False)
def use_output_as_input(output_image):
"""Sets the generated output as the new input image."""
return gr.update(value=output_image[1])
use_as_input_button.click(
fn=use_output_as_input,
inputs=[result],
outputs=[input_image]
)
target_ratio.change(
fn=preload_presets,
inputs=[target_ratio],
outputs=[width_slider, height_slider, settings_panel],
queue=False
)
width_slider.change(
fn = select_the_right_preset,
inputs = [width_slider, height_slider],
outputs = [target_ratio],
queue = False
)
run_button.click(
fn=clear_result,
inputs=None,
outputs=result,
).then(
fn=infer,
inputs=[input_image, width_slider, height_slider, overlap_width, num_inference_steps,
prompt_input, alignment_dropdown],
outputs=result,
).then(
fn=lambda: gr.update(visible=True),
inputs=None,
outputs=use_as_input_button,
)
prompt_input.submit(
fn=clear_result,
inputs=None,
outputs=result,
).then(
fn=infer,
inputs=[input_image, width_slider, height_slider, overlap_width, num_inference_steps,
prompt_input, alignment_dropdown],
outputs=result,
).then(
fn=lambda: gr.update(visible=True),
inputs=None,
outputs=use_as_input_button,
)
demo.queue(max_size=12).launch(share=False)