# from controlnet_aux import OpenposeDetector
# from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
# from diffusers import UniPCMultistepScheduler
import gradio as gr
# import torch
import base64
from io import BytesIO
from PIL import Image, ImageFilter
import cv2
import numpy as np
canvas_html = ''
load_js = """
async () => {
const url = "https://huggingface.co/datasets/mishig/gradio-components/raw/main/mannequinAll.js"
fetch(url)
.then(res => res.text())
.then(text => {
const script = document.createElement('script');
script.type = "module"
script.src = URL.createObjectURL(new Blob([text], { type: 'application/javascript' }));
document.head.appendChild(script);
});
}
"""
get_js_image = """
async (canvas, prompt) => {
const poseMakerEl = document.querySelector("pose-maker");
const imgBase64 = poseMakerEl.captureScreenshot();
return [imgBase64, prompt]
}
"""
# # Models
# controlnet = ControlNetModel.from_pretrained(
# "lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16
# )
# pipe = StableDiffusionControlNetPipeline.from_pretrained(
# "runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16
# )
# pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
# # This command loads the individual model components on GPU on-demand. So, we don't
# # need to explicitly call pipe.to("cuda").
# pipe.enable_model_cpu_offload()
# # xformers
# pipe.enable_xformers_memory_efficient_attention()
# # Generator seed,
# generator = torch.manual_seed(0)
low_threshold = 100
high_threshold = 200
def get_canny_filter(image):
if not isinstance(image, np.ndarray):
image = np.array(image)
image = cv2.Canny(image, low_threshold, high_threshold)
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
canny_image = Image.fromarray(image)
return canny_image
def generate_images(canvas, prompt):
try:
base64_img = canvas
image_data = base64.b64decode(base64_img.split(',')[1])
input_img = Image.open(BytesIO(image_data)).convert(
'RGB').resize((512, 512))
input_img = input_img.filter(ImageFilter.GaussianBlur(radius=2))
input_img = get_canny_filter(input_img)
# output = pipe(
# prompt,
# input_img,
# generator=generator,
# num_images_per_prompt=3,
# num_inference_steps=20,
# )
all_outputs = [input_img, input_img, input_img]
# all_outputs.append(input_img)
# for image in output.images:
# all_outputs.append(image)
return all_outputs
except Exception as e:
raise gr.Error(str(e))
def placeholder_fn(axis):
pass
js_change_rotation_axis = """
async (axis) => {
const poseMakerEl = document.querySelector("pose-maker");
poseMakerEl.changeRotationAxis(axis);
}
"""
js_pose_template = """
async (pose) => {
const poseMakerEl = document.querySelector("pose-maker");
poseMakerEl.setPose(pose);
}
"""
with gr.Blocks() as blocks:
gr.HTML(
"""
Pose in 3D & Render with ControlNet (SD-1.5)
Using ControlNet and three.js/mannequin.js
For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
"""
)
with gr.Row():
with gr.Column():
canvas = gr.HTML(canvas_html, elem_id="canvas_html", visible=True)
with gr.Row():
rotation_axis = gr.Radio(["x", "y", "z"], value="x", label="Joint rotation axis")
pose_template = gr.Radio(["regular", "ballet", "handstand", "split", "kick", "chilling"], value="regular", label="Pose template")
prompt = gr.Textbox(
label="Enter your prompt",
max_lines=1,
placeholder="best quality, extremely detailed",
)
run_button = gr.Button("Generate")
with gr.Column():
gallery = gr.Gallery().style(grid=[2], height="auto")
rotation_axis.change(fn=placeholder_fn,
inputs=[rotation_axis],
outputs=[],
queue=False,
_js=js_change_rotation_axis)
pose_template.change(fn=placeholder_fn,
inputs=[pose_template],
outputs=[],
queue=False,
_js=js_pose_template)
run_button.click(fn=generate_images,
inputs=[canvas, prompt],
outputs=[gallery],
_js=get_js_image)
blocks.load(None, None, None, _js=load_js)
blocks.launch(debug=True)