mishig's picture
mishig HF staff
testing canny for now only
65784d9
raw
history blame
5.71 kB
# from controlnet_aux import OpenposeDetector
# from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
# from diffusers import UniPCMultistepScheduler
import gradio as gr
# import torch
import base64
from io import BytesIO
from PIL import Image, ImageFilter
import cv2
import numpy as np
canvas_html = '<pose-maker/>'
load_js = """
async () => {
const url = "https://huggingface.co/datasets/mishig/gradio-components/raw/main/mannequinAll.js"
fetch(url)
.then(res => res.text())
.then(text => {
const script = document.createElement('script');
script.type = "module"
script.src = URL.createObjectURL(new Blob([text], { type: 'application/javascript' }));
document.head.appendChild(script);
});
}
"""
get_js_image = """
async (canvas, prompt) => {
const poseMakerEl = document.querySelector("pose-maker");
const imgBase64 = poseMakerEl.captureScreenshot();
return [imgBase64, prompt]
}
"""
# # Models
# controlnet = ControlNetModel.from_pretrained(
# "lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16
# )
# pipe = StableDiffusionControlNetPipeline.from_pretrained(
# "runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16
# )
# pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
# # This command loads the individual model components on GPU on-demand. So, we don't
# # need to explicitly call pipe.to("cuda").
# pipe.enable_model_cpu_offload()
# # xformers
# pipe.enable_xformers_memory_efficient_attention()
# # Generator seed,
# generator = torch.manual_seed(0)
low_threshold = 100
high_threshold = 200
def get_canny_filter(image):
if not isinstance(image, np.ndarray):
image = np.array(image)
image = cv2.Canny(image, low_threshold, high_threshold)
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
canny_image = Image.fromarray(image)
return canny_image
def generate_images(canvas, prompt):
try:
base64_img = canvas
image_data = base64.b64decode(base64_img.split(',')[1])
input_img = Image.open(BytesIO(image_data)).convert(
'RGB').resize((512, 512))
input_img = input_img.filter(ImageFilter.GaussianBlur(radius=2))
input_img = get_canny_filter(input_img)
# output = pipe(
# prompt,
# input_img,
# generator=generator,
# num_images_per_prompt=3,
# num_inference_steps=20,
# )
all_outputs = [input_img, input_img, input_img]
# all_outputs.append(input_img)
# for image in output.images:
# all_outputs.append(image)
return all_outputs
except Exception as e:
raise gr.Error(str(e))
def placeholder_fn(axis):
pass
js_change_rotation_axis = """
async (axis) => {
const poseMakerEl = document.querySelector("pose-maker");
poseMakerEl.changeRotationAxis(axis);
}
"""
js_pose_template = """
async (pose) => {
const poseMakerEl = document.querySelector("pose-maker");
poseMakerEl.setPose(pose);
}
"""
with gr.Blocks() as blocks:
gr.HTML(
"""
<div style="text-align: center; margin: 0 auto;">
<div
style="
display: inline-flex;
align-items: center;
gap: 0.8rem;
font-size: 1.75rem;
"
>
<h1 style="font-weight: 900; margin-bottom: 7px;margin-top:5px">
Pose in 3D & Render with ControlNet (SD-1.5)
</h1>
</div>
<p style="margin-bottom: 10px; font-size: 94%; line-height: 23px;">
Using <a href="https://github.com/lllyasviel/ControlNet">ControlNet</a> and <a href="https://boytchev.github.io/mannequin.js/">three.js/mannequin.js</a>
</p>
<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>
</div>
"""
)
with gr.Row():
with gr.Column():
canvas = gr.HTML(canvas_html, elem_id="canvas_html", visible=True)
with gr.Row():
rotation_axis = gr.Radio(["x", "y", "z"], value="x", label="Joint rotation axis")
pose_template = gr.Radio(["regular", "ballet", "handstand", "split", "kick", "chilling"], value="regular", label="Pose template")
prompt = gr.Textbox(
label="Enter your prompt",
max_lines=1,
placeholder="best quality, extremely detailed",
)
run_button = gr.Button("Generate")
with gr.Column():
gallery = gr.Gallery().style(grid=[2], height="auto")
rotation_axis.change(fn=placeholder_fn,
inputs=[rotation_axis],
outputs=[],
queue=False,
_js=js_change_rotation_axis)
pose_template.change(fn=placeholder_fn,
inputs=[pose_template],
outputs=[],
queue=False,
_js=js_pose_template)
run_button.click(fn=generate_images,
inputs=[canvas, prompt],
outputs=[gallery],
_js=get_js_image)
blocks.load(None, None, None, _js=load_js)
blocks.launch(debug=True)