Spaces:

takahirox
/

Fast_Img2Img

Runtime error

App Files Files Community

Fast_Img2Img / app.py

takahirox

initial commit

0c32ef6 12 months ago

raw

history blame contribute delete

3.58 kB

	from diffusers import (
	ControlNetModel,
	StableDiffusionImg2ImgPipeline,
	StableDiffusionControlNetImg2ImgPipeline,
	)
	from compel import Compel
	from PIL import Image
	import cv2
	import gc
	import gradio
	import numpy
	import torch

	base_model = "SimianLuo/LCM_Dreamshaper_v7"
	controlnet_model = "lllyasviel/control_v11p_sd15_canny"
	device = "cuda"
	dtype = torch.float16
	width = 512
	height = 512

	controlnet = ControlNetModel.from_pretrained(
	controlnet_model, tourch_dtype=dtype
	)

	pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
	base_model, controlnet=controlnet, safety_checker=None
	).to(dtype=dtype)
	pipe.enable_model_cpu_offload(device=device)
	pipe.unet.to(memory_format=torch.channels_last)

	compel_proc = Compel(
	tokenizer=pipe.tokenizer,
	text_encoder=pipe.text_encoder,
	truncate_long_prompts=False,
	)

	pipe_no_controlnet = StableDiffusionImg2ImgPipeline.from_pretrained(
	base_model, safety_checker=None
	).to(dtype=dtype)
	pipe.enable_model_cpu_offload(device=device)
	pipe_no_controlnet.enable_model_cpu_offload()

	compel_proc_no_controlnet = Compel(
	tokenizer=pipe_no_controlnet.tokenizer,
	text_encoder=pipe_no_controlnet.text_encoder,
	truncate_long_prompts=False,
	)

	def predict(
	prompt: str,
	image: Image,
	use_controlnet: bool,
	generator: int,
	num_inference_steps: int,
	strength: float,
	guidance_scale: float,
	controlnet_conditioning_scale: float,
	canny_lower_threshold: int,
	canny_higher_threshold: int,
	):
	if image is None:
	return None

	generator = torch.manual_seed(generator)
	# TODO: Keep the original ratio?
	image = image.resize((width, height))

	if use_controlnet:
	prompt_embeds = compel_proc(prompt)
	image_array = numpy.array(image)
	image_array = cv2.Canny(
	image_array,
	canny_lower_threshold,
	canny_higher_threshold
	)
	image_array = image_array[:, :, None]
	image_array = numpy.concatenate([image_array, image_array, image_array], axis=2)
	control_image = Image.fromarray(image_array)
	results = pipe(
	control_image=control_image,
	control_guidance_end=1.0,
	control_guidance_start=0.0,
	controlnet_conditioning_scale=controlnet_conditioning_scale,
	generator=generator,
	guidance_scale=guidance_scale,
	image=image,
	num_inference_steps=num_inference_steps,
	output_type="pil",
	prompt_embeds=prompt_embeds,
	strength=strength,
	)
	control_image.close()
	else:
	prompt_embeds = compel_proc_no_controlnet(prompt)
	results = pipe_no_controlnet(
	generator=generator,
	guidance_scale=guidance_scale,
	image=image,
	num_inference_steps=num_inference_steps,
	output_type="pil",
	prompt_embeds=prompt_embeds,
	strength=strength,
	)

	gc.collect()

	if len(results.images) > 0:
	return results.images[0]
	return None

	app = gradio.Interface(
	fn=predict,
	inputs=[
	gradio.Textbox("Kirisame Marisa, Cute, Smiling, High quality, Realistic"), # prompt
	gradio.Image(type="pil"), # image
	gradio.Checkbox(True), # use_controlnet
	gradio.Slider(0, 2147483647, 2159232, step=1), # generator
	gradio.Slider(2, 15, 4, step=1), # num_inference_steps
	gradio.Slider(0.0, 1.0, 0.5, step=0.01), # strength
	gradio.Slider(0.0, 5.0, 0.2, step=0.01), # guidance_scale
	gradio.Slider(0.0, 1.0, 0.8, step=0.01), # controlnet_conditioning_scale
	gradio.Slider(0, 255, 100, step=1), # canny_lower_threshold
	gradio.Slider(0, 255, 200, step=1), # canny_higher_threshold
	],
	outputs=gradio.Image(type="pil")
	)
	app.launch()