Spaces:

emirhanbilgic
/

future-baby

Running on Zero

File size: 7,526 Bytes

import gradio as gr
import spaces
import os
import cv2
import torch
from PIL import Image
from insightface.app import FaceAnalysis
from ip_adapter.ip_adapter_faceid import IPAdapterFaceID
from transformers import CLIPFeatureExtractor
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler, AutoencoderKL
from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
import dlib
import imutils
from imutils import face_utils
import numpy as np
from skimage import transform as tf
import random

base_model_path = "SG161222/Realistic_Vision_V6.0_B1_noVAE"
base_cache = "model-cache"
vae_model_path = "stabilityai/sd-vae-ft-mse"
ip_cache = "./ip-cache"
device = "cuda"

# Setup function to load models and other dependencies
def setup():
    """Load the model into memory to make running multiple predictions efficient"""
    # Get ip-adapter-faceid model
    if not os.path.exists("ip-cache/ip-adapter-faceid_sd15.bin"):
        os.makedirs(ip_cache, exist_ok=True)
        os.system(f"wget -O ip-cache/ip-adapter-faceid_sd15.bin https://huggingface.co/h94/IP-Adapter-FaceID/resolve/main/ip-adapter-faceid_sd15.bin")
    
    # Download shape_predictor_68_face_landmarks.dat if it doesn't exist
    if not os.path.exists("faceid/shape_predictor_68_face_landmarks.dat"):
        os.makedirs("faceid", exist_ok=True)
        os.system("wget -O faceid/shape_predictor_68_face_landmarks.dat https://github.com/italojs/facial-landmarks-recognition/raw/master/shape_predictor_68_face_landmarks.dat")
    
    # Face embedding
    app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
    app.prepare(ctx_id=0, det_size=(640, 640))
    
    # SD
    noise_scheduler = EulerDiscreteScheduler(
        num_train_timesteps=1000,
        beta_start=0.00085,
        beta_end=0.012
    )
    vae = AutoencoderKL.from_pretrained(
        vae_model_path
    ).to(dtype=torch.float16)
    pipe = StableDiffusionPipeline.from_pretrained(
        base_model_path,
        torch_dtype=torch.float16,
        scheduler=noise_scheduler,
        vae=vae,
        feature_extractor=CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32"),
        safety_checker=StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker"),
        cache_dir=base_cache,
    )
    pipe = pipe.to(device)
    
    # IP adapter
    ip_model = IPAdapterFaceID(
        pipe,
        "ip-cache/ip-adapter-faceid_sd15.bin",
        device
    )
    
    return app, ip_model

app, ip_model = setup()

def get_face_landmarks(image_path):
    def add_padding(image, padding_size=50):
        height, width = image.shape[:2]
        padded_image = cv2.copyMakeBorder(
            image, 
            top=padding_size, 
            bottom=padding_size, 
            left=padding_size, 
            right=padding_size, 
            borderType=cv2.BORDER_CONSTANT, 
            value=[255, 255, 255]  # White padding
        )
        return padded_image

    detector = dlib.get_frontal_face_detector()
    predictor = dlib.shape_predictor('faceid/shape_predictor_68_face_landmarks.dat')  
    
    image = cv2.imread(image_path)
    image = imutils.resize(image, width=512)

    # Add padding to the image
    image = add_padding(image)
    
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    rects = detector(gray, 1)
    for (i, rect) in enumerate(rects):
        shape = predictor(gray, rect)
        shape = face_utils.shape_to_np(shape)
        return shape, image

def morph_faces(image1_path, image2_path, alpha=0.5):
    landmarks1, image1 = get_face_landmarks(image1_path)
    landmarks2, image2 = get_face_landmarks(image2_path)
    average_landmarks = (landmarks1 + landmarks2) / 2
    tform1 = tf.estimate_transform('similarity', landmarks1, average_landmarks)
    tform2 = tf.estimate_transform('similarity', landmarks2, average_landmarks)
    morphed_image1 = tf.warp(image1, inverse_map=tform1.inverse, output_shape=(512, 512))
    morphed_image2 = tf.warp(image2, inverse_map=tform2.inverse, output_shape=(512, 512))
    morphed_image = (1 - alpha) * morphed_image1 + alpha * morphed_image2
    morphed_image = (morphed_image * 255).astype(np.uint8)  # Convert to [0, 255] range
    output_path = "tmp.png"
    cv2.imwrite(output_path, morphed_image) 
    return output_path

def get_negative_prompt(gender):
    if gender == "Boy":
        return "(mascara, makeup: 1.4), (breasts, boobs, naked, nude: 1.4), (deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"
    elif gender == "Girl":
        return "(beard, mustache, male features: 1.4), (naked, nude: 1.4), (deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"
    else:  # Random
        return "(deformed iris, deformed pupils, semi-realistic, cgi, 3d, render, sketch, cartoon, drawing, anime, mutated hands and fingers:1.4), (deformed, distorted, disfigured:1.3), poorly drawn, bad anatomy, wrong anatomy, extra limb, missing limb, floating limbs, disconnected limbs, mutation, mutated, ugly, disgusting, amputation"

def construct_prompt(base_prompt, additional_prompt, gender):
    full_prompt = base_prompt
    if gender == "Boy":
        full_prompt += ", male child, boy"
    elif gender == "Girl":
        full_prompt += ", female child, girl"
    
    if additional_prompt:
        full_prompt += ", " + additional_prompt
    
    return full_prompt

@spaces.GPU(duration = 40)
def generate_image(face_image_1, face_image_2, additional_prompt, gender):
    base_prompt = "portrait of a 6 y.o. child, 8k, HD, happy, perfect eyes, cute"
    full_prompt = construct_prompt(base_prompt, additional_prompt, gender)

    negative_prompt = get_negative_prompt(gender)
    baby_image_path = morph_faces(face_image_1, face_image_2)

    def generate_images(faceid_embeds, num_outputs=1):
        images = ip_model.generate(
            prompt=full_prompt,
            negative_prompt=negative_prompt,
            faceid_embeds=faceid_embeds,
            num_samples=num_outputs,
            width=768,
            height=768,
            num_inference_steps=40,
            seed=None
        )
        return images

    faceid_embeds = app.get(cv2.imread(baby_image_path))[0].normed_embedding
    faceid_embeds = torch.from_numpy(faceid_embeds).unsqueeze(0)
    
    generated_images = generate_images(faceid_embeds)
    return generated_images[0]

# Gradio Interface with Examples
gr_interface = gr.Interface(
    fn=generate_image,
    inputs=[
        gr.Image(type="filepath", label="First Face Image"),
        gr.Image(type="filepath", label="Second Face Image"),
        gr.Textbox(label="Prompt"),
        gr.Dropdown(choices=["Boy", "Girl", "Random"], value="Boy", label="Gender")
    ],
    outputs=gr.Image(label="Generated Image"),
    title="Face Morphing and Image Generation with Stable Diffusion",
    examples=[
        ["yann-lecun.jpg", "isabelle-guyon.jpg", "playing chess", "Boy"]
    ]
)

gr_interface.launch(share=True)