Spaces:
Runtime error
Runtime error
import os | |
import numpy as np | |
import gradio as gr | |
import torch | |
import torch.nn.functional as F | |
from diffusers import DDIMScheduler | |
from torchvision.io import read_image | |
from pytorch_lightning import seed_everything | |
from masactrl.diffuser_utils import MasaCtrlPipeline | |
from masactrl.masactrl_utils import (AttentionBase, | |
regiter_attention_editor_diffusers) | |
from .app_utils import global_context | |
torch.set_grad_enabled(False) | |
# device = torch.device("cuda") if torch.cuda.is_available() else torch.device( | |
# "cpu") | |
# model_path = "CompVis/stable-diffusion-v1-4" | |
# scheduler = DDIMScheduler(beta_start=0.00085, | |
# beta_end=0.012, | |
# beta_schedule="scaled_linear", | |
# clip_sample=False, | |
# set_alpha_to_one=False) | |
# model = MasaCtrlPipeline.from_pretrained(model_path, | |
# scheduler=scheduler).to(device) | |
def load_image(image_path): | |
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") | |
image = read_image(image_path) | |
image = image[:3].unsqueeze_(0).float() / 127.5 - 1. # [-1, 1] | |
image = F.interpolate(image, (512, 512)) | |
image = image.to(device) | |
def real_image_editing(source_image, target_prompt, | |
starting_step, starting_layer, ddim_steps, scale, seed, | |
appended_prompt, negative_prompt): | |
from masactrl.masactrl import MutualSelfAttentionControl | |
model = global_context["model"] | |
device = global_context["device"] | |
seed_everything(seed) | |
with torch.no_grad(): | |
if appended_prompt is not None: | |
target_prompt += appended_prompt | |
ref_prompt = "" | |
prompts = [ref_prompt, target_prompt] | |
# invert the image into noise map | |
if isinstance(source_image, np.ndarray): | |
source_image = torch.from_numpy(source_image).to(device) / 127.5 - 1. | |
source_image = source_image.unsqueeze(0).permute(0, 3, 1, 2) | |
source_image = F.interpolate(source_image, (512, 512)) | |
start_code, latents_list = model.invert(source_image, | |
ref_prompt, | |
guidance_scale=scale, | |
num_inference_steps=ddim_steps, | |
return_intermediates=True) | |
start_code = start_code.expand(len(prompts), -1, -1, -1) | |
# recontruct the image with inverted DDIM noise map | |
editor = AttentionBase() | |
regiter_attention_editor_diffusers(model, editor) | |
image_fixed = model([target_prompt], | |
latents=start_code[-1:], | |
num_inference_steps=ddim_steps, | |
guidance_scale=scale) | |
image_fixed = image_fixed.cpu().permute(0, 2, 3, 1).numpy() | |
# inference the synthesized image with MasaCtrl | |
# hijack the attention module | |
controller = MutualSelfAttentionControl(starting_step, starting_layer) | |
regiter_attention_editor_diffusers(model, controller) | |
# inference the synthesized image | |
image_masactrl = model(prompts, | |
latents=start_code, | |
guidance_scale=scale) | |
image_masactrl = image_masactrl.cpu().permute(0, 2, 3, 1).numpy() | |
return [ | |
image_masactrl[0], | |
image_fixed[0], | |
image_masactrl[1] | |
] # source, fixed seed, masactrl | |
def create_demo_editing(): | |
with gr.Blocks() as demo: | |
gr.Markdown("## **Input Settings**") | |
with gr.Row(): | |
with gr.Column(): | |
source_image = gr.Image(label="Source Image", value=os.path.join(os.path.dirname(__file__), "images/corgi.jpg"), interactive=True) | |
target_prompt = gr.Textbox(label="Target Prompt", | |
value='A photo of a running corgi', | |
interactive=True) | |
with gr.Row(): | |
ddim_steps = gr.Slider(label="DDIM Steps", | |
minimum=1, | |
maximum=999, | |
value=50, | |
step=1) | |
starting_step = gr.Slider(label="Step of MasaCtrl", | |
minimum=0, | |
maximum=999, | |
value=4, | |
step=1) | |
starting_layer = gr.Slider(label="Layer of MasaCtrl", | |
minimum=0, | |
maximum=16, | |
value=10, | |
step=1) | |
run_btn = gr.Button(label="Run") | |
with gr.Column(): | |
appended_prompt = gr.Textbox(label="Appended Prompt", value='') | |
negative_prompt = gr.Textbox(label="Negative Prompt", value='') | |
with gr.Row(): | |
scale = gr.Slider(label="CFG Scale", | |
minimum=0.1, | |
maximum=30.0, | |
value=7.5, | |
step=0.1) | |
seed = gr.Slider(label="Seed", | |
minimum=-1, | |
maximum=2147483647, | |
value=42, | |
step=1) | |
gr.Markdown("## **Output**") | |
with gr.Row(): | |
image_recons = gr.Image(label="Source Image") | |
image_fixed = gr.Image(label="Image with Fixed Seed") | |
image_masactrl = gr.Image(label="Image with MasaCtrl") | |
inputs = [ | |
source_image, target_prompt, starting_step, starting_layer, ddim_steps, | |
scale, seed, appended_prompt, negative_prompt | |
] | |
run_btn.click(real_image_editing, inputs, | |
[image_recons, image_fixed, image_masactrl]) | |
gr.Examples( | |
[[os.path.join(os.path.dirname(__file__), "images/corgi.jpg"), | |
"A photo of a running corgi"], | |
[os.path.join(os.path.dirname(__file__), "images/person.png"), | |
"A photo of a person, black t-shirt, raising hand"], | |
], | |
[source_image, target_prompt] | |
) | |
return demo | |
if __name__ == "__main__": | |
demo_editing = create_demo_editing() | |
demo_editing.launch() | |