Spaces:

yashvii
/

IDfy-Avatarify

Runtime error

App Files Files Community

IDfy-Avatarify / gradio_demo /app.py

yashvii

Upload folder using huggingface_hub

b2cbfed verified 3 months ago

raw

history blame

25.4 kB

	import sys
	sys.path.append('./')

	from typing import Tuple

	import os
	import cv2
	import math
	import torch
	import random
	import numpy as np
	import argparse
	import pandas as pd

	import PIL
	from PIL import Image

	import diffusers
	from diffusers.utils import load_image
	from diffusers.models import ControlNetModel
	from diffusers import LCMScheduler

	from huggingface_hub import hf_hub_download

	import insightface
	from insightface.app import FaceAnalysis

	from style_template import styles
	from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline
	from model_util import load_models_xl, get_torch_device, torch_gc


	# global variable
	MAX_SEED = np.iinfo(np.int32).max
	device = get_torch_device()
	dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
	STYLE_NAMES = list(styles.keys())
	DEFAULT_STYLE_NAME = "Watercolor"

	# Load face encoder
	app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
	app.prepare(ctx_id=0, det_size=(640, 640))

	# Path to InstantID models
	face_adapter = f'./checkpoints/ip-adapter.bin'
	controlnet_path = f'./checkpoints/ControlNetModel'

	# Load pipeline
	controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)

	logo = Image.open("./gradio_demo/watermark.png")
	logo = logo.resize((100, 70))

	from cv2 import imencode
	import base64

	# def encode_pil_to_base64_new(pil_image):
	# print("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA")
	# image_arr = np.asarray(pil_image)[:,:,::-1]
	# _, byte_data = imencode('.png', image_arr)
	# base64_data = base64.b64encode(byte_data)
	# base64_string_opencv = base64_data.decode("utf-8")
	# return "data:image/png;base64," + base64_string_opencv

	import gradio as gr

	# gr.processing_utils.encode_pil_to_base64 = encode_pil_to_base64_new

	def main(pretrained_model_name_or_path="wangqixun/YamerMIX_v8", enable_lcm_arg=False):

	if pretrained_model_name_or_path.endswith(
	".ckpt"
	) or pretrained_model_name_or_path.endswith(".safetensors"):
	scheduler_kwargs = hf_hub_download(
	repo_id="wangqixun/YamerMIX_v8",
	subfolder="scheduler",
	filename="scheduler_config.json",
	)

	(tokenizers, text_encoders, unet, _, vae) = load_models_xl(
	pretrained_model_name_or_path=pretrained_model_name_or_path,
	scheduler_name=None,
	weight_dtype=dtype,
	)

	scheduler = diffusers.EulerDiscreteScheduler.from_config(scheduler_kwargs)
	pipe = StableDiffusionXLInstantIDPipeline(
	vae=vae,
	text_encoder=text_encoders[0],
	text_encoder_2=text_encoders[1],
	tokenizer=tokenizers[0],
	tokenizer_2=tokenizers[1],
	unet=unet,
	scheduler=scheduler,
	controlnet=controlnet,
	).to(device)

	else:
	pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
	pretrained_model_name_or_path,
	controlnet=controlnet,
	torch_dtype=dtype,
	safety_checker=None,
	feature_extractor=None,
	).to(device)

	pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)

	pipe.load_ip_adapter_instantid(face_adapter)
	# load and disable LCM
	pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
	pipe.disable_lora()

	def remove_tips():
	print("GG")
	return gr.update(visible=False)


	# prompts = [
	# ["superman","Vibrant Color"], ["japanese anime character with white/neon hair","Watercolor"],
	# # ["Suited professional","(No style)"],
	# ["Scooba diver","Line art"], ["eskimo","Snow"]
	# ]

	def convert_from_cv2_to_image(img: np.ndarray) -> Image:
	return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

	def convert_from_image_to_cv2(img: Image) -> np.ndarray:
	return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)

	def run_for_prompts1(face_file,style,progress=gr.Progress(track_tqdm=True)):
	# if email != "":
	p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
	return generate_image(face_file, p[0], n)
	# else:
	# raise gr.Error("Email ID is compulsory")
	def run_for_prompts2(face_file,style,progress=gr.Progress(track_tqdm=True)):
	# if email != "":
	p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
	return generate_image(face_file, p[1], n)
	def run_for_prompts3(face_file,style,progress=gr.Progress(track_tqdm=True)):
	# if email != "":
	p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
	return generate_image(face_file, p[2], n)
	def run_for_prompts4(face_file,style,progress=gr.Progress(track_tqdm=True)):
	# if email != "":
	p,n = styles.get(style, styles.get(STYLE_NAMES[1]))
	return generate_image(face_file, p[3], n)

	# def validate_and_process(face_file, style, email):

	# # Your processing logic here
	# gallery1, gallery2, gallery3, gallery4 = run_for_prompts1(face_file, style), run_for_prompts2(face_file, style), run_for_prompts3(face_file, style), run_for_prompts4(face_file, style)
	# return gallery1, gallery2, gallery3, gallery4

	def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
	stickwidth = 4
	limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
	kps = np.array(kps)

	w, h = image_pil.size
	out_img = np.zeros([h, w, 3])

	for i in range(len(limbSeq)):
	index = limbSeq[i]
	color = color_list[index[0]]

	x = kps[index][:, 0]
	y = kps[index][:, 1]
	length = ((x[0] - x[1]) 2 + (y[0] - y[1]) 2) ** 0.5
	angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
	polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
	out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
	out_img = (out_img * 0.6).astype(np.uint8)

	for idx_kp, kp in enumerate(kps):
	color = color_list[idx_kp]
	x, y = kp
	out_img = cv2.circle(out_img.copy(), (int(x), int(y)), 10, color, -1)

	out_img_pil = Image.fromarray(out_img.astype(np.uint8))
	return out_img_pil

	def resize_img(input_image, max_side=1280, min_side=1280, size=None,
	pad_to_max_side=True, mode=PIL.Image.BILINEAR, base_pixel_number=64):

	w, h = input_image.size
	print(f"Original Size --> {input_image.size}")
	if size is not None:
	w_resize_new, h_resize_new = size
	else:
	ratio = min_side / min(h, w)
	w, h = round(ratiow), round(ratioh)
	ratio = max_side / max(h, w)
	input_image = input_image.resize([round(ratiow), round(ratioh)], mode)
	w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
	h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
	input_image = input_image.resize([w_resize_new, h_resize_new], mode)

	if pad_to_max_side:
	res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
	offset_x = (max_side - w_resize_new) // 2
	offset_y = (max_side - h_resize_new) // 2
	res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
	input_image = Image.fromarray(res)

	print(f"Final modified image size --> {input_image.size}")
	return input_image

	# def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
	# p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
	# return p.replace("{prompt}", positive), n + ' ' + negative

	def store_images(email, gallery1, gallery2, gallery3, gallery4,consent):
	if not consent:
	raise gr.Error("Consent not provided")
	galleries = []
	for i, img in enumerate([gallery1, gallery2, gallery3, gallery4], start=1):
	if isinstance(img, np.ndarray):
	img = Image.fromarray(img)
	print(f"Gallery {i} type after conversion: {type(img)}")
	galleries.append(img)
	# Create the images directory if it doesn't exist
	if not os.path.exists('images'):
	os.makedirs('images')

	# Define image file paths
	image_paths = []
	for i, img in enumerate(galleries, start=1):
	img_path = f'images/{email}_gallery{i}.png'
	img.save(img_path)
	image_paths.append(img_path)

	# Define the CSV file path
	csv_file_path = 'image_data.csv'

	# Create a DataFrame for the email and image paths
	df = pd.DataFrame({
	'email': [email],
	'img1_path': [image_paths[0]],
	'img2_path': [image_paths[1]],
	'img3_path': [image_paths[2]],
	'img4_path': [image_paths[3]],
	})

	# Write to CSV (append if the file exists, create a new one if it doesn't)
	if not os.path.isfile(csv_file_path):
	df.to_csv(csv_file_path, index=False)
	else:
	df.to_csv(csv_file_path, mode='a', header=False, index=False)

	gr.Info("Thankyou!! Your avatar is on the way to your inbox")

	def add_watermark(image, watermark=logo, opacity=128, position="bottom_right", padding=10):
	# Convert NumPy array to PIL Image if needed
	if isinstance(image, np.ndarray):
	image = Image.fromarray(image)

	if isinstance(watermark, np.ndarray):
	watermark = Image.fromarray(watermark)

	# Convert images to 'RGBA' mode to handle transparency
	image = image.convert("RGBA")
	watermark = watermark.convert("RGBA")

	# Adjust the watermark opacity
	watermark = watermark.copy()
	watermark.putalpha(opacity)

	# Calculate the position for the watermark
	if position == "bottom_right":
	x = image.width - watermark.width - padding
	y = image.height - watermark.height - padding
	elif position == "bottom_left":
	x = padding
	y = image.height - watermark.height - padding
	elif position == "top_right":
	x = image.width - watermark.width - padding
	y = padding
	elif position == "top_left":
	x = padding
	y = padding
	else:
	raise ValueError("Unsupported position. Choose from 'bottom_right', 'bottom_left', 'top_right', 'top_left'.")

	# Paste the watermark onto the image
	image.paste(watermark, (x, y), watermark)

	# Convert back to 'RGB' if the original image was not 'RGBA'
	if image.mode != "RGBA":
	image = image.convert("RGB")

	# return resize_img(image)
	return image

	def generate_image(face_image,prompt,negative_prompt):
	pose_image_path = None
	# prompt = "superman"
	enable_LCM = False
	identitynet_strength_ratio = 0.90
	adapter_strength_ratio = 0.60
	num_steps = 15
	guidance_scale = 5
	seed = random.randint(0, MAX_SEED)
	print(f"Seed --> {seed}")

	# negative_prompt = ""
	# negative_prompt += neg
	enhance_face_region = True
	if enable_LCM:
	pipe.enable_lora()
	pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
	else:
	pipe.disable_lora()
	pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)

	if face_image is None:
	raise gr.Error(f"Cannot find any input face image! Please upload the face image")

	# if prompt is None:
	# prompt = "a person"

	# apply the style template
	# prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)

	# face_image = load_image(face_image_path)
	face_image = resize_img(face_image)
	face_image_cv2 = convert_from_image_to_cv2(face_image)
	height, width, _ = face_image_cv2.shape

	# Extract face features
	face_info = app.get(face_image_cv2)

	if len(face_info) == 0:
	raise gr.Error(f"Cannot find any face in the image! Please upload another person image")

	face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*(x['bbox'][3]-x['bbox'][1]))[-1] # only use the maximum face
	face_emb = face_info['embedding']
	face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info['kps'])

	if pose_image_path is not None:
	pose_image = load_image(pose_image_path)
	pose_image = resize_img(pose_image)
	pose_image_cv2 = convert_from_image_to_cv2(pose_image)

	face_info = app.get(pose_image_cv2)

	if len(face_info) == 0:
	raise gr.Error(f"Cannot find any face in the reference image! Please upload another person image")

	face_info = face_info[-1]
	face_kps = draw_kps(pose_image, face_info['kps'])

	width, height = face_kps.size

	if enhance_face_region:
	control_mask = np.zeros([height, width, 3])
	x1, y1, x2, y2 = face_info["bbox"]
	x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
	control_mask[y1:y2, x1:x2] = 255
	control_mask = Image.fromarray(control_mask.astype(np.uint8))
	else:
	control_mask = None

	generator = torch.Generator(device=device).manual_seed(seed)

	print("Start inference...")
	print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")

	pipe.set_ip_adapter_scale(adapter_strength_ratio)
	images = pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	image_embeds=face_emb,
	image=face_kps,
	control_mask=control_mask,
	controlnet_conditioning_scale=float(identitynet_strength_ratio),
	num_inference_steps=num_steps,
	guidance_scale=guidance_scale,
	height=height,
	width=width,
	generator=generator,
	# num_images_per_prompt = 4
	).images

	watermarked_image = add_watermark(images[0])

	# return images[0]
	return watermarked_image

	### Description
	title = r"""
	<h1 align="center" style="color:white;">Choose your AVATAR</h1>
	"""

	description = r"""
	<h2 style="color:white;"> Powered by IDfy </h2>"""

	article = r""""""

	tips = r""""""
	# css = '''
	# .gradio-container {
	# width: 95% !important;
	# background-image: url('./InstantID/gradio_demo/logo.png');
	# background-size: cover;
	# background-position: center;
	# }
	# .image-gallery {
	# height: 100vh !important;
	# overflow: auto;
	# }
	# .gradio-row .gradio-element {
	# margin: 0 !important;
	# }
	# '''
	css = '''
	.gradio-container {width: 100% !important; color: white; background: linear-gradient(135deg, #1C43B9, #254977, #343434);}
	.gradio-row .gradio-element { margin: 0 !important; }
	.centered-column {
	display: flex;
	justify-content: center;
	align-items: center;
	width: 100%;}
	#store-btn {
	background: #f2bb13 !important;
	color: white !important;
	}
	'''
	with gr.Blocks(css=css) as demo:

	# description
	gr.Markdown(title)
	with gr.Column():
	with gr.Row():
	gr.Image("./gradio_demo/logo.png", scale=0, min_width=50, show_label=False, show_download_button=False)
	gr.Markdown(description)
	style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
	with gr.Row(equal_height=True): # Center the face file
	with gr.Column(elem_id="centered-face", elem_classes=["centered-column"]): # Use CSS class for centering
	face_file = gr.Image(label="Upload a photo of your face", type="pil", sources="webcam", height=400, width=500)
	# submit = gr.Button("Submit", variant="primary")
	with gr.Column():
	with gr.Row():
	gallery1 = gr.Image(label="Generated Images")
	gallery2 = gr.Image(label="Generated Images")
	with gr.Row():
	gallery3 = gr.Image(label="Generated Images")
	gallery4 = gr.Image(label="Generated Images")
	email = gr.Textbox(label="Email", info="Enter your email address", value="")
	consent = gr.Checkbox(label="I am giving my consent to use my data to share my AI Avtar and IDfy relevant information from time to time")
	submit1 = gr.Button("STORE",elem_id="store-btn")
	# with gr.Blocks(css=css) as demo:

	# # description
	# gr.Markdown(title)
	# with gr.Column():
	# with gr.Row():
	# gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
	# gr.Markdown(description)
	# style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
	# face_file = gr.Image(label="Upload a photo of your face", type="pil",sources="webcam", height=400, width=500)
	# submit = gr.Button("Submit", variant="primary")
	# with gr.Column():
	# with gr.Row():
	# gallery1 = gr.Image(label="Generated Images")
	# gallery2 = gr.Image(label="Generated Images")
	# with gr.Row():
	# gallery3 = gr.Image(label="Generated Images")
	# gallery4 = gr.Image(label="Generated Images")
	# email = gr.Textbox(label="Email",
	# info="Enter your email address",
	# value="")
	# consent = gr.Checkbox(label="I am giving my consent to use my data to share my AI Avtar and IDfy relevant information from time to time")
	# submit1 = gr.Button("STORE", variant="primary")
	# # submit1 = gr.Button("Store")
	usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)

	face_file.upload(
	fn=remove_tips,
	outputs=usage_tips,
	queue=True,
	api_name=False,
	show_progress = "full"
	).then(
	fn=run_for_prompts1,
	inputs=[face_file,style],
	outputs=[gallery1]
	).then(
	fn=run_for_prompts2,
	inputs=[face_file,style],
	outputs=[gallery2]
	).then(
	fn=run_for_prompts3,
	inputs=[face_file,style],
	outputs=[gallery3]
	).then(
	fn=run_for_prompts4,
	inputs=[face_file,style],
	outputs=[gallery4]
	)
	# submit.click(
	# fn=remove_tips,
	# outputs=usage_tips,
	# queue=True,
	# api_name=False,
	# show_progress = "full"
	# ).then(
	# fn=run_for_prompts1,
	# inputs=[face_file,style],
	# outputs=[gallery1]
	# ).then(
	# fn=run_for_prompts2,
	# inputs=[face_file,style],
	# outputs=[gallery2]
	# ).then(
	# fn=run_for_prompts3,
	# inputs=[face_file,style],
	# outputs=[gallery3]
	# ).then(
	# fn=run_for_prompts4,
	# inputs=[face_file,style],
	# outputs=[gallery4]
	# )

	# submit1.click(
	# fn=store_images,
	# inputs=[email,gallery1,gallery2,gallery3,gallery4,consent],
	# outputs=None)



	gr.Markdown(article)

	demo.launch(share=True)

	# with gr.Blocks(css=css, js=js) as demo:

	# # description
	# gr.Markdown(title)
	# with gr.Row():
	# gr.Image("./gradio_demo/logo.png",scale=0,min_width=50,show_label=False,show_download_button=False)
	# gr.Markdown(description)
	# with gr.Row():
	# with gr.Column():
	# style = gr.Dropdown(label="Choose your STYLE", choices=STYLE_NAMES)
	# face_file = gr.Image(label="Upload a photo of your face", type="pil",sources="webcam")
	# submit = gr.Button("Submit", variant="primary")
	# with gr.Column():
	# with gr.Row():
	# gallery1 = gr.Image(label="Generated Images")
	# gallery2 = gr.Image(label="Generated Images")
	# with gr.Row():
	# gallery3 = gr.Image(label="Generated Images")
	# gallery4 = gr.Image(label="Generated Images")
	# email = gr.Textbox(label="Email",
	# info="Enter your email address",
	# value="")

	# usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)
	# # identitynet_strength_ratio = gr.Slider(
	# # label="IdentityNet strength (for fidelity)",
	# # minimum=0,
	# # maximum=1.5,
	# # step=0.05,
	# # value=0.95,
	# # )
	# # adapter_strength_ratio = gr.Slider(
	# # label="Image adapter strength (for detail)",
	# # minimum=0,
	# # maximum=1.5,
	# # step=0.05,
	# # value=0.60,
	# # )
	# # negative_prompt = gr.Textbox(
	# # label="Negative Prompt",
	# # placeholder="low quality",
	# # value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
	# # )
	# # num_steps = gr.Slider(
	# # label="Number of sample steps",
	# # minimum=15,
	# # maximum=100,
	# # step=1,
	# # value=5 if enable_lcm_arg else 15,
	# # )
	# # guidance_scale = gr.Slider(
	# # label="Guidance scale",
	# # minimum=0.1,
	# # maximum=10.0,
	# # step=0.1,
	# # value=0 if enable_lcm_arg else 8.5,
	# # )
	# # if email is None:
	# # print("STOPPPP")
	# # raise gr.Error("Email ID is compulsory")
	# face_file.upload(
	# fn=remove_tips,
	# outputs=usage_tips,
	# queue=True,
	# api_name=False,
	# show_progress = "full"
	# ).then(
	# fn=run_for_prompts1,
	# inputs=[face_file,style],
	# outputs=[gallery1]
	# ).then(
	# fn=run_for_prompts2,
	# inputs=[face_file,style],
	# outputs=[gallery2]
	# ).then(
	# fn=run_for_prompts3,
	# inputs=[face_file,style],
	# outputs=[gallery3]
	# ).then(
	# fn=run_for_prompts4,
	# inputs=[face_file,style],
	# outputs=[gallery4]
	# )
	# submit.click(
	# fn=remove_tips,
	# outputs=usage_tips,
	# queue=True,
	# api_name=False,
	# show_progress = "full"
	# ).then(
	# fn=run_for_prompts1,
	# inputs=[face_file,style],
	# outputs=[gallery1]
	# ).then(
	# fn=run_for_prompts2,
	# inputs=[face_file,style],
	# outputs=[gallery2]
	# ).then(
	# fn=run_for_prompts3,
	# inputs=[face_file,style],
	# outputs=[gallery3]
	# ).then(
	# fn=run_for_prompts4,
	# inputs=[face_file,style],
	# outputs=[gallery4]
	# )


	# gr.Markdown(article)

	# demo.launch(share=True)

	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("--pretrained_model_name_or_path", type=str, default="wangqixun/YamerMIX_v8")
	args = parser.parse_args()

	main(args.pretrained_model_name_or_path, False)