# from controlnet_aux import OpenposeDetector # from diffusers import StableDiffusionControlNetPipeline, ControlNetModel # from diffusers import UniPCMultistepScheduler import gradio as gr # import torch import base64 from io import BytesIO from PIL import Image, ImageFilter import cv2 import numpy as np canvas_html = '' load_js = """ async () => { const url = "https://huggingface.co/datasets/mishig/gradio-components/raw/main/mannequinAll.js" fetch(url) .then(res => res.text()) .then(text => { const script = document.createElement('script'); script.type = "module" script.src = URL.createObjectURL(new Blob([text], { type: 'application/javascript' })); document.head.appendChild(script); }); } """ get_js_image = """ async (canvas, prompt) => { const poseMakerEl = document.querySelector("pose-maker"); const imgBase64 = poseMakerEl.captureScreenshot(); return [imgBase64, prompt] } """ # # Models # controlnet = ControlNetModel.from_pretrained( # "lllyasviel/sd-controlnet-depth", torch_dtype=torch.float16 # ) # pipe = StableDiffusionControlNetPipeline.from_pretrained( # "runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None, torch_dtype=torch.float16 # ) # pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) # # This command loads the individual model components on GPU on-demand. So, we don't # # need to explicitly call pipe.to("cuda"). # pipe.enable_model_cpu_offload() # # xformers # pipe.enable_xformers_memory_efficient_attention() # # Generator seed, # generator = torch.manual_seed(0) low_threshold = 100 high_threshold = 200 def get_canny_filter(image): if not isinstance(image, np.ndarray): image = np.array(image) image = cv2.Canny(image, low_threshold, high_threshold) image = image[:, :, None] image = np.concatenate([image, image, image], axis=2) canny_image = Image.fromarray(image) return canny_image def generate_images(canvas, prompt): try: base64_img = canvas image_data = base64.b64decode(base64_img.split(',')[1]) input_img = Image.open(BytesIO(image_data)).convert( 'RGB').resize((512, 512)) input_img = input_img.filter(ImageFilter.GaussianBlur(radius=2)) input_img = get_canny_filter(input_img) # output = pipe( # prompt, # input_img, # generator=generator, # num_images_per_prompt=3, # num_inference_steps=20, # ) all_outputs = [input_img, input_img, input_img] # all_outputs.append(input_img) # for image in output.images: # all_outputs.append(image) return all_outputs except Exception as e: raise gr.Error(str(e)) def placeholder_fn(axis): pass js_change_rotation_axis = """ async (axis) => { const poseMakerEl = document.querySelector("pose-maker"); poseMakerEl.changeRotationAxis(axis); } """ js_pose_template = """ async (pose) => { const poseMakerEl = document.querySelector("pose-maker"); poseMakerEl.setPose(pose); } """ with gr.Blocks() as blocks: gr.HTML( """

Pose in 3D & Render with ControlNet (SD-1.5)

Using ControlNet and three.js/mannequin.js

For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.

""" ) with gr.Row(): with gr.Column(): canvas = gr.HTML(canvas_html, elem_id="canvas_html", visible=True) with gr.Row(): rotation_axis = gr.Radio(["x", "y", "z"], value="x", label="Joint rotation axis") pose_template = gr.Radio(["regular", "ballet", "handstand", "split", "kick", "chilling"], value="regular", label="Pose template") prompt = gr.Textbox( label="Enter your prompt", max_lines=1, placeholder="best quality, extremely detailed", ) run_button = gr.Button("Generate") with gr.Column(): gallery = gr.Gallery().style(grid=[2], height="auto") rotation_axis.change(fn=placeholder_fn, inputs=[rotation_axis], outputs=[], queue=False, _js=js_change_rotation_axis) pose_template.change(fn=placeholder_fn, inputs=[pose_template], outputs=[], queue=False, _js=js_pose_template) run_button.click(fn=generate_images, inputs=[canvas, prompt], outputs=[gallery], _js=get_js_image) blocks.load(None, None, None, _js=load_js) blocks.launch(debug=True)