future-baby / app.py
emirhanbilgic's picture
Update app.py
cc62b3b verified
import gradio as gr
import spaces
import os
import cv2
import torch
from PIL import Image
from insightface.app import FaceAnalysis
from ip_adapter.ip_adapter_faceid import IPAdapterFaceID
from transformers import CLIPFeatureExtractor
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler, AutoencoderKL
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
import dlib
import imutils
from imutils import face_utils
import numpy as np
from skimage import transform as tf
import random
base_model_path = "SG161222/Realistic_Vision_V6.0_B1_noVAE"
base_cache = "model-cache"
vae_model_path = "stabilityai/sd-vae-ft-mse"
ip_cache = "./ip-cache"
device = "cuda"
# Setup function to load models and other dependencies
def setup():
"""Load the model into memory to make running multiple predictions efficient"""
# Get ip-adapter-faceid model
if not os.path.exists("ip-cache/ip-adapter-faceid_sd15.bin"):
os.makedirs(ip_cache, exist_ok=True)
os.system(f"wget -O ip-cache/ip-adapter-faceid_sd15.bin https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid_sd15.bin")
# Download shape_predictor_68_face_landmarks.dat if it doesn't exist
if not os.path.exists("faceid/shape_predictor_68_face_landmarks.dat"):
os.makedirs("faceid", exist_ok=True)
os.system("wget -O faceid/shape_predictor_68_face_landmarks.dat https://github.com/italojs/facial-landmarks-recognition/raw/master/shape_predictor_68_face_landmarks.dat")
# Face embedding
app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
app.prepare(ctx_id=0, det_size=(640, 640))
# SD
noise_scheduler = EulerDiscreteScheduler(
num_train_timesteps=1000,
beta_start=0.00085,
beta_end=0.012
)
vae = AutoencoderKL.from_pretrained(
vae_model_path
).to(dtype=torch.float16)
pipe = StableDiffusionPipeline.from_pretrained(
base_model_path,
torch_dtype=torch.float16,
scheduler=noise_scheduler,
vae=vae,
feature_extractor=CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32"),
safety_checker=StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker"),
cache_dir=base_cache,
)
pipe = pipe.to(device)
# IP adapter
ip_model = IPAdapterFaceID(
pipe,
"ip-cache/ip-adapter-faceid_sd15.bin",
device
)
return app, ip_model
app, ip_model = setup()
def get_face_landmarks(image_path):
def add_padding(image, padding_size=50):
height, width = image.shape[:2]
padded_image = cv2.copyMakeBorder(
image,
top=padding_size,
bottom=padding_size,
left=padding_size,
right=padding_size,
borderType=cv2.BORDER_CONSTANT,
value=[255, 255, 255] # White padding
)
return padded_image
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('faceid/shape_predictor_68_face_landmarks.dat')
image = cv2.imread(image_path)
image = imutils.resize(image, width=512)
# Add padding to the image
image = add_padding(image)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
rects = detector(gray, 1)
for (i, rect) in enumerate(rects):
shape = predictor(gray, rect)
shape = face_utils.shape_to_np(shape)
return shape, image
def morph_faces(image1_path, image2_path, alpha=0.5):
landmarks1, image1 = get_face_landmarks(image1_path)
landmarks2, image2 = get_face_landmarks(image2_path)
average_landmarks = (landmarks1 + landmarks2) / 2
tform1 = tf.estimate_transform('similarity', landmarks1, average_landmarks)
tform2 = tf.estimate_transform('similarity', landmarks2, average_landmarks)
morphed_image1 = tf.warp(image1, inverse_map=tform1.inverse, output_shape=(512, 512))
morphed_image2 = tf.warp(image2, inverse_map=tform2.inverse, output_shape=(512, 512))
morphed_image = (1 - alpha) * morphed_image1 + alpha * morphed_image2
morphed_image = (morphed_image * 255).astype(np.uint8) # Convert to [0, 255] range
output_path = "tmp.png"
cv2.imwrite(output_path, morphed_image)
return output_path
def get_negative_prompt(gender):
if gender == "Boy":
return "(mascara, makeup: 1.4), (breasts, boobs, naked, nude: 1.4), (deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"
elif gender == "Girl":
return "(beard, mustache, male features: 1.4), (naked, nude: 1.4), (deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"
else: # Random
return "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"
def construct_prompt(base_prompt, additional_prompt, gender):
full_prompt = base_prompt
if gender == "Boy":
full_prompt += ", male child, boy"
elif gender == "Girl":
full_prompt += ", female child, girl"
if additional_prompt:
full_prompt += ", " + additional_prompt
return full_prompt
@spaces.GPU(duration = 40)
def generate_image(face_image_1, face_image_2, additional_prompt, gender):
base_prompt = "portrait of a 6 y.o. child, 8k, HD, happy, perfect eyes, cute"
full_prompt = construct_prompt(base_prompt, additional_prompt, gender)
negative_prompt = get_negative_prompt(gender)
baby_image_path = morph_faces(face_image_1, face_image_2)
def generate_images(faceid_embeds, num_outputs=1):
images = ip_model.generate(
prompt=full_prompt,
negative_prompt=negative_prompt,
faceid_embeds=faceid_embeds,
num_samples=num_outputs,
width=768,
height=768,
num_inference_steps=40,
seed=None
)
return images
faceid_embeds = app.get(cv2.imread(baby_image_path))[0].normed_embedding
faceid_embeds = torch.from_numpy(faceid_embeds).unsqueeze(0)
generated_images = generate_images(faceid_embeds)
return generated_images[0]
# Gradio Interface with Examples
gr_interface = gr.Interface(
fn=generate_image,
inputs=[
gr.Image(type="filepath", label="First Face Image"),
gr.Image(type="filepath", label="Second Face Image"),
gr.Textbox(label="Prompt"),
gr.Dropdown(choices=["Boy", "Girl", "Random"], value="Boy", label="Gender")
],
outputs=gr.Image(label="Generated Image"),
title="Face Morphing and Image Generation with Stable Diffusion",
examples=[
["yann-lecun.jpg", "isabelle-guyon.jpg", "playing chess", "Boy"]
]
)
gr_interface.launch(share=True)