Spaces:
Running
on
Zero
Running
on
Zero
from typing import Tuple, Optional | |
import os | |
import gradio as gr | |
import numpy as np | |
import random | |
import spaces | |
import cv2 | |
from diffusers import DiffusionPipeline | |
from diffusers import FluxInpaintPipeline | |
import torch | |
from PIL import Image, ImageFilter | |
from huggingface_hub import login | |
from diffusers import AutoencoderTiny, AutoencoderKL | |
from huggingface_hub import hf_hub_download, HfFileSystem, ModelCard, snapshot_download | |
import copy | |
import random | |
import time | |
import boto3 | |
from io import BytesIO | |
from datetime import datetime | |
from diffusers.utils import load_image, make_image_grid | |
import json | |
from preprocessor import Preprocessor | |
from diffusers import FluxControlNetInpaintPipeline | |
from diffusers.models import FluxControlNetModel | |
HF_TOKEN = os.environ.get("HF_TOKEN") | |
login(token=HF_TOKEN) | |
MAX_SEED = np.iinfo(np.int32).max | |
IMAGE_SIZE = 1024 | |
# init | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
base_model = "black-forest-labs/FLUX.1-dev" | |
controlnet_model = 'InstantX/FLUX.1-dev-Controlnet-Canny' | |
controlnet = FluxControlNetModel.from_pretrained(controlnet_model, torch_dtype=torch.bfloat16) | |
pipe = FluxControlNetInpaintPipeline.from_pretrained(base_model, controlnet=controlnet, torch_dtype=torch.bfloat16).to(device) | |
# pipe.enable_model_cpu_offload() # for saving memory | |
def clear_cuda_cache(): | |
torch.cuda.empty_cache() | |
class calculateDuration: | |
def __init__(self, activity_name=""): | |
self.activity_name = activity_name | |
def __enter__(self): | |
self.start_time = time.time() | |
self.start_time_formatted = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(self.start_time)) | |
print(f"Activity: {self.activity_name}, Start time: {self.start_time_formatted}") | |
return self | |
def __exit__(self, exc_type, exc_value, traceback): | |
self.end_time = time.time() | |
self.elapsed_time = self.end_time - self.start_time | |
self.end_time_formatted = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(self.end_time)) | |
if self.activity_name: | |
print(f"Elapsed time for {self.activity_name}: {self.elapsed_time:.6f} seconds") | |
else: | |
print(f"Elapsed time: {self.elapsed_time:.6f} seconds") | |
def calculate_image_dimensions_for_flux( | |
original_resolution_wh: Tuple[int, int], | |
maximum_dimension: int = IMAGE_SIZE | |
) -> Tuple[int, int]: | |
width, height = original_resolution_wh | |
if width > height: | |
scaling_factor = maximum_dimension / width | |
else: | |
scaling_factor = maximum_dimension / height | |
new_width = int(width * scaling_factor) | |
new_height = int(height * scaling_factor) | |
new_width = new_width - (new_width % 32) | |
new_height = new_height - (new_height % 32) | |
return new_width, new_height | |
def process_mask( | |
mask: Image.Image, | |
mask_inflation: Optional[int] = None, | |
mask_blur: Optional[int] = None | |
) -> Image.Image: | |
""" | |
Inflates and blurs the white regions of a mask. | |
Args: | |
mask (Image.Image): The input mask image. | |
mask_inflation (Optional[int]): The number of pixels to inflate the mask by. | |
mask_blur (Optional[int]): The radius of the Gaussian blur to apply. | |
Returns: | |
Image.Image: The processed mask with inflated and/or blurred regions. | |
""" | |
if mask_inflation and mask_inflation > 0: | |
mask_array = np.array(mask) | |
kernel = np.ones((mask_inflation, mask_inflation), np.uint8) | |
mask_array = cv2.dilate(mask_array, kernel, iterations=1) | |
mask = Image.fromarray(mask_array) | |
if mask_blur and mask_blur > 0: | |
mask = mask.filter(ImageFilter.GaussianBlur(radius=mask_blur)) | |
clear_cuda_cache() | |
return mask | |
def upload_image_to_r2(image, account_id, access_key, secret_key, bucket_name): | |
with calculateDuration("Upload image"): | |
print("upload_image_to_r2", account_id, access_key, secret_key, bucket_name) | |
connectionUrl = f"https://{account_id}.r2.cloudflarestorage.com" | |
s3 = boto3.client( | |
's3', | |
endpoint_url=connectionUrl, | |
region_name='auto', | |
aws_access_key_id=access_key, | |
aws_secret_access_key=secret_key | |
) | |
current_time = datetime.now().strftime("%Y/%m/%d/%H%M%S") | |
image_file = f"generated_images/{current_time}_{random.randint(0, MAX_SEED)}.png" | |
buffer = BytesIO() | |
image.save(buffer, "PNG") | |
buffer.seek(0) | |
s3.upload_fileobj(buffer, bucket_name, image_file) | |
print("upload finish", image_file) | |
return image_file | |
def run_flux( | |
image: Image.Image, | |
mask: Image.Image, | |
control_image: Image.Image, | |
prompt: str, | |
seed_slicer: int, | |
randomize_seed_checkbox: bool, | |
strength_slider: float, | |
num_inference_steps_slider: int, | |
controlnet_conditioning_scale: float, | |
guidance_scale: float, | |
resolution_wh: Tuple[int, int], | |
progress | |
) -> Image.Image: | |
print("Running FLUX...") | |
pipe.to(device) | |
width, height = resolution_wh | |
if randomize_seed_checkbox: | |
seed_slicer = random.randint(0, MAX_SEED) | |
generator = torch.Generator().manual_seed(seed_slicer) | |
with calculateDuration("Run pipe"): | |
with torch.inference_mode(): | |
generated_image = pipe( | |
prompt=prompt, | |
image=image, | |
mask_image=mask, | |
control_image=control_image, | |
controlnet_conditioning_scale=controlnet_conditioning_scale, | |
strength=strength_slider, | |
guidance_scale=guidance_scale, | |
width=width, | |
height=height, | |
generator=generator, | |
num_inference_steps=num_inference_steps_slider, | |
).images[0] | |
progress(99, "Generate image success!") | |
return generated_image | |
def load_loras(lora_strings_json:str): | |
lora_configs = None | |
if lora_strings_json: | |
try: | |
lora_configs = json.loads(lora_strings_json) | |
except: | |
print("parse lora failed") | |
if lora_configs: | |
with calculateDuration("Loading LoRA weights"): | |
pipe.unload_lora_weights() | |
adapter_names = [] | |
adapter_weights = [] | |
for lora_info in lora_configs: | |
lora_repo = lora_info.get("repo") | |
weights = lora_info.get("weights") | |
adapter_name = lora_info.get("adapter_name") | |
adapter_weight = lora_info.get("adapter_weight") | |
if lora_repo and weights and adapter_name: | |
# load lora | |
pipe.load_lora_weights(lora_repo, weight_name=weights, adapter_name=adapter_name) | |
adapter_names.append(adapter_name) | |
adapter_weights.append(adapter_weight) | |
# set lora weights | |
pipe.set_adapters(adapter_names, adapter_weights=adapter_weights) | |
def generate_control_image(image, mask, width, height): | |
# generated control_ | |
with calculateDuration("Generate control image"): | |
preprocessor = Preprocessor() | |
preprocessor.load("Canny") | |
control_image = preprocessor( | |
image=image, | |
image_resolution=width, | |
detect_resolution=512, | |
) | |
control_image = control_image.resize((width, height), Image.LANCZOS) | |
return control_image | |
def process( | |
image_url: str, | |
mask_url: str, | |
inpainting_prompt_text: str, | |
mask_inflation_slider: int, | |
mask_blur_slider: int, | |
seed_slicer: int, | |
randomize_seed_checkbox: bool, | |
strength_slider: float, | |
guidance_scale: float, | |
controlnet_conditioning_scale: float, | |
num_inference_steps_slider: int, | |
lora_strings_json: str, | |
upload_to_r2: bool, | |
account_id: str, | |
access_key: str, | |
secret_key: str, | |
bucket:str, | |
progress=gr.Progress(track_tqdm=True) | |
): | |
print("process", image_url, mask_url, inpainting_prompt_text, lora_strings_json) | |
result = {"status": "false", "message": ""} | |
if not image_url: | |
gr.Info("please enter image url for inpaiting") | |
result["message"] = "invalid image url" | |
return None, json.dumps(result) | |
if not inpainting_prompt_text: | |
gr.Info("Please enter inpainting text prompt.") | |
result["message"] = "invalid inpainting prompt" | |
return None, json.dumps(result) | |
with calculateDuration("Load image"): | |
image = load_image(image_url) | |
mask = load_image(mask_url) | |
if not image or not mask: | |
gr.Info("Please upload an image & mask by url.") | |
result["message"] = "can not load image" | |
return None, json.dumps(result) | |
# generate | |
with calculateDuration("Resize & process mask"): | |
width, height = calculate_image_dimensions_for_flux(original_resolution_wh=image.size) | |
image = image.resize((width, height), Image.LANCZOS) | |
mask = mask.resize((width, height), Image.LANCZOS) | |
mask = process_mask(mask, mask_inflation=mask_inflation_slider, mask_blur=mask_blur_slider) | |
control_image = generate_control_image(image, mask, width, height) | |
# clear_cuda_cache() | |
load_loras(lora_strings_json=lora_strings_json) | |
try: | |
print("Start applying for zeroGPU resources ...") | |
generated_image = run_flux( | |
image=image, | |
mask=mask, | |
control_image=control_image, | |
prompt=inpainting_prompt_text, | |
seed_slicer=seed_slicer, | |
randomize_seed_checkbox=randomize_seed_checkbox, | |
strength_slider=strength_slider, | |
num_inference_steps_slider=num_inference_steps_slider, | |
guidance_scale=guidance_scale, | |
controlnet_conditioning_scale=controlnet_conditioning_scale, | |
resolution_wh=(width, height), | |
progress=progress | |
) | |
except Exception as e: | |
result["status"] = "faield" | |
result["message"] = "generate image failed" | |
print(e) | |
generated_image = None | |
clear_cuda_cache() | |
print("run flux finish") | |
if generated_image: | |
if upload_to_r2: | |
url = upload_image_to_r2(generated_image, account_id, access_key, secret_key, bucket) | |
result = {"status": "success", "message": "upload image success", "url": url} | |
else: | |
result = {"status": "success", "message": "Image generated but not uploaded"} | |
final_images = [] | |
final_images.append(image) | |
final_images.append(mask) | |
final_images.append(control_image) | |
if generated_image: | |
final_images.append(generated_image) | |
progress(100, "finish!") | |
return final_images, json.dumps(result) | |
with gr.Blocks() as demo: | |
gr.Markdown("Run inpainting with Flux, compatible with Canny ControlNet, LoRAs and HyperFlux_8step") | |
with gr.Row(): | |
with gr.Column(): | |
image_url = gr.Text( | |
label="Orginal image url", | |
show_label=True, | |
max_lines=1, | |
placeholder="Enter image url for inpainting", | |
container=False | |
) | |
mask_url = gr.Text( | |
label="Mask image url", | |
show_label=True, | |
max_lines=1, | |
placeholder="Enter url of masking", | |
container=False, | |
) | |
inpainting_prompt_text_component = gr.Text( | |
label="Inpainting prompt", | |
show_label=True, | |
max_lines=5, | |
placeholder="Enter text to generate inpainting", | |
container=False, | |
) | |
lora_strings_json = gr.Text(label="LoRA Configs (JSON List String)", placeholder='[{"repo": "lora_repo1", "weights": "weights1", "adapter_name": "adapter_name1", "adapter_weight": 1}, {"repo": "lora_repo2", "weights": "weights2", "adapter_name": "adapter_name2", "adapter_weight": 1}]', lines=5) | |
submit_button_component = gr.Button(value='Submit', variant='primary', scale=0) | |
with gr.Accordion("Advanced Settings", open=False): | |
with gr.Row(): | |
mask_inflation_slider_component = gr.Slider( | |
label="Mask inflation", | |
info="Adjusts the amount of mask edge expansion before " | |
"inpainting.", | |
minimum=0, | |
maximum=20, | |
step=1, | |
value=10, | |
) | |
mask_blur_slider_component = gr.Slider( | |
label="Mask blur", | |
info="Controls the intensity of the Gaussian blur applied to " | |
"the mask edges.", | |
minimum=0, | |
maximum=20, | |
step=1, | |
value=10, | |
) | |
seed_slicer_component = gr.Slider( | |
label="Seed", | |
minimum=0, | |
maximum=MAX_SEED, | |
step=1, | |
value=42, | |
) | |
randomize_seed_checkbox_component = gr.Checkbox( | |
label="Randomize seed", value=True) | |
with gr.Row(): | |
guidance_scale = gr.Slider( | |
label="guidance_scale", | |
info="Guidance scale.", | |
minimum=0.1, | |
maximum=10, | |
step=0.1, | |
value=3.5, | |
) | |
controlnet_conditioning_scale = gr.Slider( | |
label="controlnet_conditioning_scale", | |
info="ControlNet strength, depth works best at 0.2, canny works best at 0.4. Recommended range is 0.3-0.8", | |
minimum=0.1, | |
maximum=1, | |
step=0.1, | |
value=0.4, | |
) | |
with gr.Row(): | |
strength_slider_component = gr.Slider( | |
label="Strength", | |
info="Indicates extent to transform the reference `image`. " | |
"Must be between 0 and 1. `image` is used as a starting " | |
"point and more noise is added the higher the `strength`.", | |
minimum=0, | |
maximum=1, | |
step=0.01, | |
value=0.85, | |
) | |
num_inference_steps_slider_component = gr.Slider( | |
label="Number of inference steps", | |
info="The number of denoising steps. More denoising steps " | |
"usually lead to a higher quality image at the", | |
minimum=1, | |
maximum=50, | |
step=1, | |
value=8, | |
) | |
with gr.Accordion("R2 Settings", open=False): | |
upload_to_r2 = gr.Checkbox(label="Upload to R2", value=False) | |
with gr.Row(): | |
account_id = gr.Textbox(label="Account Id", placeholder="Enter R2 account id") | |
bucket = gr.Textbox(label="Bucket Name", placeholder="Enter R2 bucket name here") | |
with gr.Row(): | |
access_key = gr.Textbox(label="Access Key", placeholder="Enter R2 access key here") | |
secret_key = gr.Textbox(label="Secret Key", placeholder="Enter R2 secret key here") | |
with gr.Column(): | |
generated_images = gr.Gallery(label="Result", show_label=True) | |
output_json_component = gr.Code(label="JSON Result", language="json") | |
submit_button_component.click( | |
fn=process, | |
inputs=[ | |
image_url, | |
mask_url, | |
inpainting_prompt_text_component, | |
mask_inflation_slider_component, | |
mask_blur_slider_component, | |
seed_slicer_component, | |
randomize_seed_checkbox_component, | |
strength_slider_component, | |
guidance_scale, | |
controlnet_conditioning_scale, | |
num_inference_steps_slider_component, | |
lora_strings_json, | |
upload_to_r2, | |
account_id, | |
access_key, | |
secret_key, | |
bucket | |
], | |
outputs=[ | |
generated_images, | |
output_json_component | |
] | |
) | |
demo.queue(api_open=False) | |
demo.launch() | |