Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import spaces | |
import os | |
import cv2 | |
import torch | |
from PIL import Image | |
from insightface.app import FaceAnalysis | |
from ip_adapter.ip_adapter_faceid import IPAdapterFaceID | |
from transformers import CLIPFeatureExtractor | |
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler, AutoencoderKL | |
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker | |
import dlib | |
import imutils | |
from imutils import face_utils | |
import numpy as np | |
from skimage import transform as tf | |
import random | |
base_model_path = "SG161222/Realistic_Vision_V6.0_B1_noVAE" | |
base_cache = "model-cache" | |
vae_model_path = "stabilityai/sd-vae-ft-mse" | |
ip_cache = "./ip-cache" | |
device = "cuda" | |
# Setup function to load models and other dependencies | |
def setup(): | |
"""Load the model into memory to make running multiple predictions efficient""" | |
# Get ip-adapter-faceid model | |
if not os.path.exists("ip-cache/ip-adapter-faceid_sd15.bin"): | |
os.makedirs(ip_cache, exist_ok=True) | |
os.system(f"wget -O ip-cache/ip-adapter-faceid_sd15.bin https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid_sd15.bin") | |
# Download shape_predictor_68_face_landmarks.dat if it doesn't exist | |
if not os.path.exists("faceid/shape_predictor_68_face_landmarks.dat"): | |
os.makedirs("faceid", exist_ok=True) | |
os.system("wget -O faceid/shape_predictor_68_face_landmarks.dat https://github.com/italojs/facial-landmarks-recognition/raw/master/shape_predictor_68_face_landmarks.dat") | |
# Face embedding | |
app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) | |
app.prepare(ctx_id=0, det_size=(640, 640)) | |
# SD | |
noise_scheduler = EulerDiscreteScheduler( | |
num_train_timesteps=1000, | |
beta_start=0.00085, | |
beta_end=0.012 | |
) | |
vae = AutoencoderKL.from_pretrained( | |
vae_model_path | |
).to(dtype=torch.float16) | |
pipe = StableDiffusionPipeline.from_pretrained( | |
base_model_path, | |
torch_dtype=torch.float16, | |
scheduler=noise_scheduler, | |
vae=vae, | |
feature_extractor=CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32"), | |
safety_checker=StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker"), | |
cache_dir=base_cache, | |
) | |
pipe = pipe.to(device) | |
# IP adapter | |
ip_model = IPAdapterFaceID( | |
pipe, | |
"ip-cache/ip-adapter-faceid_sd15.bin", | |
device | |
) | |
return app, ip_model | |
app, ip_model = setup() | |
def get_face_landmarks(image_path): | |
def add_padding(image, padding_size=50): | |
height, width = image.shape[:2] | |
padded_image = cv2.copyMakeBorder( | |
image, | |
top=padding_size, | |
bottom=padding_size, | |
left=padding_size, | |
right=padding_size, | |
borderType=cv2.BORDER_CONSTANT, | |
value=[255, 255, 255] # White padding | |
) | |
return padded_image | |
detector = dlib.get_frontal_face_detector() | |
predictor = dlib.shape_predictor('faceid/shape_predictor_68_face_landmarks.dat') | |
image = cv2.imread(image_path) | |
image = imutils.resize(image, width=512) | |
# Add padding to the image | |
image = add_padding(image) | |
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
rects = detector(gray, 1) | |
for (i, rect) in enumerate(rects): | |
shape = predictor(gray, rect) | |
shape = face_utils.shape_to_np(shape) | |
return shape, image | |
def morph_faces(image1_path, image2_path, alpha=0.5): | |
landmarks1, image1 = get_face_landmarks(image1_path) | |
landmarks2, image2 = get_face_landmarks(image2_path) | |
average_landmarks = (landmarks1 + landmarks2) / 2 | |
tform1 = tf.estimate_transform('similarity', landmarks1, average_landmarks) | |
tform2 = tf.estimate_transform('similarity', landmarks2, average_landmarks) | |
morphed_image1 = tf.warp(image1, inverse_map=tform1.inverse, output_shape=(512, 512)) | |
morphed_image2 = tf.warp(image2, inverse_map=tform2.inverse, output_shape=(512, 512)) | |
morphed_image = (1 - alpha) * morphed_image1 + alpha * morphed_image2 | |
morphed_image = (morphed_image * 255).astype(np.uint8) # Convert to [0, 255] range | |
output_path = "tmp.png" | |
cv2.imwrite(output_path, morphed_image) | |
return output_path | |
def get_negative_prompt(gender): | |
if gender == "Boy": | |
return "(mascara, makeup: 1.4), (breasts, boobs, naked, nude: 1.4), (deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation" | |
elif gender == "Girl": | |
return "(beard, mustache, male features: 1.4), (naked, nude: 1.4), (deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation" | |
else: # Random | |
return "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation" | |
def construct_prompt(base_prompt, additional_prompt, gender): | |
full_prompt = base_prompt | |
if gender == "Boy": | |
full_prompt += ", male child, boy" | |
elif gender == "Girl": | |
full_prompt += ", female child, girl" | |
if additional_prompt: | |
full_prompt += ", " + additional_prompt | |
return full_prompt | |
def generate_image(face_image_1, face_image_2, additional_prompt, gender): | |
base_prompt = "portrait of a 6 y.o. child, 8k, HD, happy, perfect eyes, cute" | |
full_prompt = construct_prompt(base_prompt, additional_prompt, gender) | |
negative_prompt = get_negative_prompt(gender) | |
baby_image_path = morph_faces(face_image_1, face_image_2) | |
def generate_images(faceid_embeds, num_outputs=1): | |
images = ip_model.generate( | |
prompt=full_prompt, | |
negative_prompt=negative_prompt, | |
faceid_embeds=faceid_embeds, | |
num_samples=num_outputs, | |
width=768, | |
height=768, | |
num_inference_steps=40, | |
seed=None | |
) | |
return images | |
faceid_embeds = app.get(cv2.imread(baby_image_path))[0].normed_embedding | |
faceid_embeds = torch.from_numpy(faceid_embeds).unsqueeze(0) | |
generated_images = generate_images(faceid_embeds) | |
return generated_images[0] | |
# Gradio Interface with Examples | |
gr_interface = gr.Interface( | |
fn=generate_image, | |
inputs=[ | |
gr.Image(type="filepath", label="First Face Image"), | |
gr.Image(type="filepath", label="Second Face Image"), | |
gr.Textbox(label="Prompt"), | |
gr.Dropdown(choices=["Boy", "Girl", "Random"], value="Boy", label="Gender") | |
], | |
outputs=gr.Image(label="Generated Image"), | |
title="Face Morphing and Image Generation with Stable Diffusion", | |
examples=[ | |
["yann-lecun.jpg", "isabelle-guyon.jpg", "playing chess", "Boy"] | |
] | |
) | |
gr_interface.launch(share=True) | |