InstantID2

Runtime error

App Files Files Community

wanghaofan

hysts HF staff commited on Jan 24

Commit

7dd7d9c

•

1 Parent(s): 0add3fc

Replace gr.Files with gr.Image (#8)

Browse files

- Apply formatter (b106068ca93ab77524c9462ecb68227be875efb0)
- Clean up (d3fe5c54697abbb6981b1dc434863c55ca0fde3a)
- Use gr.Image instead of gr.Files (938be836b4c274cd120d4e408e793e7247a03342)
- Add cached examples (a075b7a9c7764bfe3c4e6ace40d3b12c06e08015)

Co-authored-by: hysts <[email protected]>

Files changed (8) hide show

.gitattributes +1 -0
app.py +165 -141
gradio_cached_examples/25/Generated Image/2880a3d19ef9b42e3ed2/image.png +3 -0
gradio_cached_examples/25/Generated Image/38dde1388c41109c5d39/image.png +3 -0
gradio_cached_examples/25/Generated Image/6cb5a3af223906666bfd/image.png +3 -0
gradio_cached_examples/25/Generated Image/7e6ea85f77dd925d842c/image.png +3 -0
gradio_cached_examples/25/Generated Image/e56b029833685ff77e6a/image.png +3 -0
gradio_cached_examples/25/log.csv +6 -0

.gitattributes CHANGED Viewed

@@ -36,3 +36,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 examples/kaifu_resize.png filter=lfs diff=lfs merge=lfs -text
 examples/sam_resize.png filter=lfs diff=lfs merge=lfs -text
 examples/schmidhuber_resize.png filter=lfs diff=lfs merge=lfs -text

 examples/kaifu_resize.png filter=lfs diff=lfs merge=lfs -text
 examples/sam_resize.png filter=lfs diff=lfs merge=lfs -text
 examples/schmidhuber_resize.png filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -1,25 +1,19 @@
-import os
-import cv2
 import math
-import spaces
-import torch
 import random
-import numpy as np
 import PIL
-from PIL import Image
-import diffusers
-from diffusers.utils import load_image
 from diffusers.models import ControlNetModel
-import insightface
 from insightface.app import FaceAnalysis
-from style_template import styles
 from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline
-import gradio as gr
 # global variable
 MAX_SEED = np.iinfo(np.int32).max
@@ -29,22 +23,27 @@ DEFAULT_STYLE_NAME = "Watercolor"
 # download checkpoints
 from huggingface_hub import hf_hub_download
 hf_hub_download(repo_id="InstantX/InstantID", filename="ControlNetModel/config.json", local_dir="./checkpoints")
-hf_hub_download(repo_id="InstantX/InstantID", filename="ControlNetModel/diffusion_pytorch_model.safetensors", local_dir="./checkpoints")
 hf_hub_download(repo_id="InstantX/InstantID", filename="ip-adapter.bin", local_dir="./checkpoints")
 # Load face encoder
-app = FaceAnalysis(name='antelopev2', root='./', providers=['CPUExecutionProvider'])
 app.prepare(ctx_id=0, det_size=(640, 640))
 # Path to InstantID models
-face_adapter = f'./checkpoints/ip-adapter.bin'
-controlnet_path = f'./checkpoints/ControlNetModel'
 # Load pipeline
 controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
-base_model_path = 'wangqixun/YamerMIX_v8'
 pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
     base_model_path,
@@ -55,54 +54,48 @@ pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
 )
 pipe.cuda()
 pipe.load_ip_adapter_instantid(face_adapter)
-pipe.image_proj_model.to('cuda')
-pipe.unet.to('cuda')
 def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     return seed
-def swap_to_gallery(images):
-    return gr.update(value=images, visible=True), gr.update(visible=True), gr.update(visible=False)
-def upload_example_to_gallery(images, prompt, style, negative_prompt):
-    return gr.update(value=images, visible=True), gr.update(visible=True), gr.update(visible=False)
-def remove_back_to_files():
-    return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
 def remove_tips():
     return gr.update(visible=False)
 def get_example():
     case = [
         [
-            ['./examples/yann-lecun_resize.jpg'],
             "a man",
             "Snow",
             "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
         ],
         [
-            ['./examples/musk_resize.jpeg'],
             "a man",
             "Mars",
             "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
         ],
         [
-            ['./examples/sam_resize.png'],
             "a man",
             "Jungle",
             "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, gree",
         ],
         [
-            ['./examples/schmidhuber_resize.png'],
             "a man",
             "Neon",
             "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
         ],
         [
-            ['./examples/kaifu_resize.png'],
             "a man",
             "Vibrant Color",
             "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
@@ -110,16 +103,20 @@ def get_example():
     ]
     return case
-def run_for_examples(face_files, prompt, style, negative_prompt):
-    return generate_image(face_files, None, prompt, negative_prompt, style, True, 30, 0.8, 0.8, 5, 42)
 def convert_from_cv2_to_image(img: np.ndarray) -> Image:
     return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
 def convert_from_image_to_cv2(img: Image) -> np.ndarray:
     return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
-def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,255,0), (255,0,255)]):
     stickwidth = 4
     limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
     kps = np.array(kps)
@@ -135,7 +132,9 @@ def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,2
         y = kps[index][:, 1]
         length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
         angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
-        polygon = cv2.ellipse2Poly((int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
         out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
     out_img = (out_img * 0.6).astype(np.uint8)
@@ -147,89 +146,114 @@ def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,2
     out_img_pil = Image.fromarray(out_img.astype(np.uint8))
     return out_img_pil
-def resize_img(input_image, max_side=1280, min_side=1024, size=None,
-               pad_to_max_side=False, mode=PIL.Image.BILINEAR, base_pixel_number=64):
-        w, h = input_image.size
-        if size is not None:
-            w_resize_new, h_resize_new = size
-        else:
-            ratio = min_side / min(h, w)
-            w, h = round(ratio*w), round(ratio*h)
-            ratio = max_side / max(h, w)
-            input_image = input_image.resize([round(ratio*w), round(ratio*h)], mode)
-            w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
-            h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
-        input_image = input_image.resize([w_resize_new, h_resize_new], mode)
-        if pad_to_max_side:
-            res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
-            offset_x = (max_side - w_resize_new) // 2
-            offset_y = (max_side - h_resize_new) // 2
-            res[offset_y:offset_y+h_resize_new, offset_x:offset_x+w_resize_new] = np.array(input_image)
-            input_image = Image.fromarray(res)
-        return input_image
 def apply_style(style_name: str, positive: str, negative: str = "") -> tuple[str, str]:
     p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
-    return p.replace("{prompt}", positive), n + ' ' + negative
-@spaces.GPU
-def generate_image(face_image, pose_image, prompt, negative_prompt, style_name, enhance_face_region, num_steps, identitynet_strength_ratio, adapter_strength_ratio, guidance_scale, seed, progress=gr.Progress(track_tqdm=True)):
     if face_image is None:
-        raise gr.Error(f"Cannot find any input face image! Please upload the face image")
     if prompt is None:
         prompt = "a person"
     # apply the style template
     prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
-    face_image = load_image(face_image[0])
     face_image = resize_img(face_image)
     face_image_cv2 = convert_from_image_to_cv2(face_image)
     height, width, _ = face_image_cv2.shape
     # Extract face features
     face_info = app.get(face_image_cv2)
     if len(face_info) == 0:
-        raise gr.Error(f"Cannot find any face in the image! Please upload another person image")
-    face_info = sorted(face_info, key=lambda x:(x['bbox'][2]-x['bbox'][0])*x['bbox'][3]-x['bbox'][1])[-1]  # only use the maximum face
-    face_emb = face_info['embedding']
-    face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info['kps'])
-    if pose_image is not None:
-        pose_image = load_image(pose_image[0])
         pose_image = resize_img(pose_image)
         pose_image_cv2 = convert_from_image_to_cv2(pose_image)
         face_info = app.get(pose_image_cv2)
         if len(face_info) == 0:
-            raise gr.Error(f"Cannot find any face in the reference image! Please upload another person image")
         face_info = face_info[-1]
-        face_kps = draw_kps(pose_image, face_info['kps'])
         width, height = face_kps.size
     if enhance_face_region:
         control_mask = np.zeros([height, width, 3])
-        x1, y1, x2, y2 = face_info['bbox']
         x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
         control_mask[y1:y2, x1:x2] = 255
         control_mask = Image.fromarray(control_mask.astype(np.uint8))
     else:
         control_mask = None
     generator = torch.Generator(device=device).manual_seed(seed)
     print("Start inference...")
     print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
     pipe.set_ip_adapter_scale(adapter_strength_ratio)
     images = pipe(
         prompt=prompt,
@@ -242,10 +266,11 @@ def generate_image(face_image, pose_image, prompt, negative_prompt, style_name,
         guidance_scale=guidance_scale,
         height=height,
         width=width,
-        generator=generator
     ).images
-    return images, gr.update(visible=True)
 ### Description
 title = r"""
@@ -289,46 +314,34 @@ tips = r"""
 4. Find a good base model always makes a difference.
 """
-css = '''
 .gradio-container {width: 85% !important}
-'''
 with gr.Blocks(css=css) as demo:
     # description
     gr.Markdown(title)
     gr.Markdown(description)
     with gr.Row():
         with gr.Column():
             # upload face image
-            face_files = gr.Files(
-                        label="Upload a photo of your face",
-                        file_types=["image"]
-                    )
-            uploaded_faces = gr.Gallery(label="Your images", visible=False, columns=1, rows=1, height=512)
-            with gr.Column(visible=False) as clear_button_face:
-                remove_and_reupload_faces = gr.ClearButton(value="Remove and upload new ones", components=face_files, size="sm")
             # optional: upload a reference pose image
-            pose_files = gr.Files(
-                        label="Upload a reference pose image (optional)",
-                        file_types=["image"]
-                    )
-            uploaded_poses = gr.Gallery(label="Your images", visible=False, columns=1, rows=1, height=512)
-            with gr.Column(visible=False) as clear_button_pose:
-                remove_and_reupload_poses = gr.ClearButton(value="Remove and upload new ones", components=pose_files, size="sm")
             # prompt
-            prompt = gr.Textbox(label="Prompt",
-                       info="Give simple prompt is enough to achieve good face fedility",
-                       placeholder="A photo of a person",
-                       value="")
             submit = gr.Button("Submit", variant="primary")
             style = gr.Dropdown(label="Style template", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME)
             # strength
             identitynet_strength_ratio = gr.Slider(
                 label="IdentityNet strength (for fedility)",
@@ -344,14 +357,14 @@ with gr.Blocks(css=css) as demo:
                 step=0.05,
                 value=0.80,
             )
             with gr.Accordion(open=False, label="Advanced Options"):
                 negative_prompt = gr.Textbox(
-                    label="Negative Prompt",
                     placeholder="low quality",
                     value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
                 )
-                num_steps = gr.Slider(
                     label="Number of sample steps",
                     minimum=20,
                     maximum=100,
@@ -376,18 +389,14 @@ with gr.Blocks(css=css) as demo:
                 enhance_face_region = gr.Checkbox(label="Enhance non-face region", value=True)
         with gr.Column():
-            gallery = gr.Gallery(label="Generated Images")
-            usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips ,visible=False)
-        face_files.upload(fn=swap_to_gallery, inputs=face_files, outputs=[uploaded_faces, clear_button_face, face_files])
-        pose_files.upload(fn=swap_to_gallery, inputs=pose_files, outputs=[uploaded_poses, clear_button_pose, pose_files])
-        remove_and_reupload_faces.click(fn=remove_back_to_files, outputs=[uploaded_faces, clear_button_face, face_files])
-        remove_and_reupload_poses.click(fn=remove_back_to_files, outputs=[uploaded_poses, clear_button_pose, pose_files])
         submit.click(
             fn=remove_tips,
-            outputs=usage_tips,
         ).then(
             fn=randomize_seed_fn,
             inputs=[seed, randomize_seed],
@@ -395,22 +404,37 @@ with gr.Blocks(css=css) as demo:
             queue=False,
             api_name=False,
         ).then(
             fn=generate_image,
-            inputs=[face_files, pose_files, prompt, negative_prompt, style, enhance_face_region, num_steps, identitynet_strength_ratio, adapter_strength_ratio, guidance_scale, seed],
-            outputs=[gallery, usage_tips]
         )
     gr.Examples(
         examples=get_example(),
-        inputs=[face_files, prompt, style, negative_prompt],
-        run_on_click=True,
-        fn=upload_example_to_gallery,
-        outputs=[uploaded_faces, clear_button_face, face_files],
-        cache_examples=True
     )
-    gr.Markdown(article)
 demo.queue(api_open=False)
-demo.launch()

 import math
 import random
+import cv2
+import gradio as gr
+import numpy as np
 import PIL
+import spaces
+import torch
 from diffusers.models import ControlNetModel
+from diffusers.utils import load_image
 from insightface.app import FaceAnalysis
+from PIL import Image
 from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline
+from style_template import styles
 # global variable
 MAX_SEED = np.iinfo(np.int32).max
 # download checkpoints
 from huggingface_hub import hf_hub_download
 hf_hub_download(repo_id="InstantX/InstantID", filename="ControlNetModel/config.json", local_dir="./checkpoints")
+hf_hub_download(
+    repo_id="InstantX/InstantID",
+    filename="ControlNetModel/diffusion_pytorch_model.safetensors",
+    local_dir="./checkpoints",
+)
 hf_hub_download(repo_id="InstantX/InstantID", filename="ip-adapter.bin", local_dir="./checkpoints")
 # Load face encoder
+app = FaceAnalysis(name="antelopev2", root="./", providers=["CPUExecutionProvider"])
 app.prepare(ctx_id=0, det_size=(640, 640))
 # Path to InstantID models
+face_adapter = "./checkpoints/ip-adapter.bin"
+controlnet_path = "./checkpoints/ControlNetModel"
 # Load pipeline
 controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
+base_model_path = "wangqixun/YamerMIX_v8"
 pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
     base_model_path,
 )
 pipe.cuda()
 pipe.load_ip_adapter_instantid(face_adapter)
+pipe.image_proj_model.to("cuda")
+pipe.unet.to("cuda")
 def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     return seed
 def remove_tips():
     return gr.update(visible=False)
 def get_example():
     case = [
         [
+            "./examples/yann-lecun_resize.jpg",
             "a man",
             "Snow",
             "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
         ],
         [
+            "./examples/musk_resize.jpeg",
             "a man",
             "Mars",
             "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
         ],
         [
+            "./examples/sam_resize.png",
             "a man",
             "Jungle",
             "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, gree",
         ],
         [
+            "./examples/schmidhuber_resize.png",
             "a man",
             "Neon",
             "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
         ],
         [
+            "./examples/kaifu_resize.png",
             "a man",
             "Vibrant Color",
             "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
     ]
     return case
+def run_for_examples(face_file, prompt, style, negative_prompt):
+    return generate_image(face_file, None, prompt, negative_prompt, style, True, 30, 0.8, 0.8, 5, 42)
 def convert_from_cv2_to_image(img: np.ndarray) -> Image:
     return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
 def convert_from_image_to_cv2(img: Image) -> np.ndarray:
     return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+def draw_kps(image_pil, kps, color_list=[(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255)]):
     stickwidth = 4
     limbSeq = np.array([[0, 2], [1, 2], [3, 2], [4, 2]])
     kps = np.array(kps)
         y = kps[index][:, 1]
         length = ((x[0] - x[1]) ** 2 + (y[0] - y[1]) ** 2) ** 0.5
         angle = math.degrees(math.atan2(y[0] - y[1], x[0] - x[1]))
+        polygon = cv2.ellipse2Poly(
+            (int(np.mean(x)), int(np.mean(y))), (int(length / 2), stickwidth), int(angle), 0, 360, 1
+        )
         out_img = cv2.fillConvexPoly(out_img.copy(), polygon, color)
     out_img = (out_img * 0.6).astype(np.uint8)
     out_img_pil = Image.fromarray(out_img.astype(np.uint8))
     return out_img_pil
+def resize_img(
+    input_image,
+    max_side=1280,
+    min_side=1024,
+    size=None,
+    pad_to_max_side=False,
+    mode=PIL.Image.BILINEAR,
+    base_pixel_number=64,
+):
+    w, h = input_image.size
+    if size is not None:
+        w_resize_new, h_resize_new = size
+    else:
+        ratio = min_side / min(h, w)
+        w, h = round(ratio * w), round(ratio * h)
+        ratio = max_side / max(h, w)
+        input_image = input_image.resize([round(ratio * w), round(ratio * h)], mode)
+        w_resize_new = (round(ratio * w) // base_pixel_number) * base_pixel_number
+        h_resize_new = (round(ratio * h) // base_pixel_number) * base_pixel_number
+    input_image = input_image.resize([w_resize_new, h_resize_new], mode)
+    if pad_to_max_side:
+        res = np.ones([max_side, max_side, 3], dtype=np.uint8) * 255
+        offset_x = (max_side - w_resize_new) // 2
+        offset_y = (max_side - h_resize_new) // 2
+        res[offset_y : offset_y + h_resize_new, offset_x : offset_x + w_resize_new] = np.array(input_image)
+        input_image = Image.fromarray(res)
+    return input_image
 def apply_style(style_name: str, positive: str, negative: str = "") -> tuple[str, str]:
     p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
+    return p.replace("{prompt}", positive), n + " " + negative
+def check_input_image(face_image):
     if face_image is None:
+        raise gr.Error("Cannot find any input face image! Please upload the face image")
+@spaces.GPU
+def generate_image(
+    face_image_path,
+    pose_image_path,
+    prompt,
+    negative_prompt,
+    style_name,
+    enhance_face_region,
+    num_steps,
+    identitynet_strength_ratio,
+    adapter_strength_ratio,
+    guidance_scale,
+    seed,
+    progress=gr.Progress(track_tqdm=True),
+):
     if prompt is None:
         prompt = "a person"
     # apply the style template
     prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
+    face_image = load_image(face_image_path)
     face_image = resize_img(face_image)
     face_image_cv2 = convert_from_image_to_cv2(face_image)
     height, width, _ = face_image_cv2.shape
     # Extract face features
     face_info = app.get(face_image_cv2)
     if len(face_info) == 0:
+        raise gr.Error("Cannot find any face in the image! Please upload another person image")
+    face_info = sorted(face_info, key=lambda x: (x["bbox"][2] - x["bbox"][0]) * x["bbox"][3] - x["bbox"][1])[
+        -1
+    ]  # only use the maximum face
+    face_emb = face_info["embedding"]
+    face_kps = draw_kps(convert_from_cv2_to_image(face_image_cv2), face_info["kps"])
+    if pose_image_path is not None:
+        pose_image = load_image(pose_image_path)
         pose_image = resize_img(pose_image)
         pose_image_cv2 = convert_from_image_to_cv2(pose_image)
         face_info = app.get(pose_image_cv2)
         if len(face_info) == 0:
+            raise gr.Error("Cannot find any face in the reference image! Please upload another person image")
         face_info = face_info[-1]
+        face_kps = draw_kps(pose_image, face_info["kps"])
         width, height = face_kps.size
     if enhance_face_region:
         control_mask = np.zeros([height, width, 3])
+        x1, y1, x2, y2 = face_info["bbox"]
         x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
         control_mask[y1:y2, x1:x2] = 255
         control_mask = Image.fromarray(control_mask.astype(np.uint8))
     else:
         control_mask = None
     generator = torch.Generator(device=device).manual_seed(seed)
     print("Start inference...")
     print(f"[Debug] Prompt: {prompt}, \n[Debug] Neg Prompt: {negative_prompt}")
     pipe.set_ip_adapter_scale(adapter_strength_ratio)
     images = pipe(
         prompt=prompt,
         guidance_scale=guidance_scale,
         height=height,
         width=width,
+        generator=generator,
     ).images
+    return images[0], gr.update(visible=True)
 ### Description
 title = r"""
 4. Find a good base model always makes a difference.
 """
+css = """
 .gradio-container {width: 85% !important}
+"""
 with gr.Blocks(css=css) as demo:
     # description
     gr.Markdown(title)
     gr.Markdown(description)
     with gr.Row():
         with gr.Column():
             # upload face image
+            face_file = gr.Image(label="Upload a photo of your face", type="filepath")
             # optional: upload a reference pose image
+            pose_file = gr.Image(label="Upload a reference pose image (optional)", type="filepath")
             # prompt
+            prompt = gr.Textbox(
+                label="Prompt",
+                info="Give simple prompt is enough to achieve good face fedility",
+                placeholder="A photo of a person",
+                value="",
+            )
             submit = gr.Button("Submit", variant="primary")
             style = gr.Dropdown(label="Style template", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME)
             # strength
             identitynet_strength_ratio = gr.Slider(
                 label="IdentityNet strength (for fedility)",
                 step=0.05,
                 value=0.80,
             )
             with gr.Accordion(open=False, label="Advanced Options"):
                 negative_prompt = gr.Textbox(
+                    label="Negative Prompt",
                     placeholder="low quality",
                     value="(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
                 )
+                num_steps = gr.Slider(
                     label="Number of sample steps",
                     minimum=20,
                     maximum=100,
                 enhance_face_region = gr.Checkbox(label="Enhance non-face region", value=True)
         with gr.Column():
+            output_image = gr.Image(label="Generated Image")
+            usage_tips = gr.Markdown(label="Usage tips of InstantID", value=tips, visible=False)
         submit.click(
             fn=remove_tips,
+            outputs=usage_tips,
+            queue=False,
+            api_name=False,
         ).then(
             fn=randomize_seed_fn,
             inputs=[seed, randomize_seed],
             queue=False,
             api_name=False,
         ).then(
+            fn=check_input_image,
+            inputs=face_file,
+            queue=False,
+            api_name=False,
+        ).success(
             fn=generate_image,
+            inputs=[
+                face_file,
+                pose_file,
+                prompt,
+                negative_prompt,
+                style,
+                enhance_face_region,
+                num_steps,
+                identitynet_strength_ratio,
+                adapter_strength_ratio,
+                guidance_scale,
+                seed,
+            ],
+            outputs=[output_image, usage_tips],
         )
     gr.Examples(
         examples=get_example(),
+        inputs=[face_file, prompt, style, negative_prompt],
+        outputs=[output_image, usage_tips],
+        fn=run_for_examples,
+        cache_examples=True,
     )
+    gr.Markdown(article)
 demo.queue(api_open=False)
+demo.launch()

gradio_cached_examples/25/Generated Image/2880a3d19ef9b42e3ed2/image.png ADDED Viewed

Git LFS Details

SHA256: 573444e88e4bf4ab7bf4a693cf53cea3988366f4ca35b41523bf7c027802d0a6
Pointer size: 132 Bytes
Size of remote file: 2.57 MB

gradio_cached_examples/25/Generated Image/38dde1388c41109c5d39/image.png ADDED Viewed

Git LFS Details

SHA256: c4e80ada96212f1acd058324b25602dc611de920de9e710c26286e751a5f1a9a
Pointer size: 132 Bytes
Size of remote file: 2.31 MB

gradio_cached_examples/25/Generated Image/6cb5a3af223906666bfd/image.png ADDED Viewed

Git LFS Details

SHA256: f4b7543b3b1fd8ae301ee77c8479a7b3170e2ee063cd501a04e5e8ecc38417f6
Pointer size: 132 Bytes
Size of remote file: 2.28 MB

gradio_cached_examples/25/Generated Image/7e6ea85f77dd925d842c/image.png ADDED Viewed

Git LFS Details

SHA256: abde3e97dc47d0b95f9909b2575834d6792db49816e5b40b0c8474a48fe467b2
Pointer size: 132 Bytes
Size of remote file: 2.38 MB

gradio_cached_examples/25/Generated Image/e56b029833685ff77e6a/image.png ADDED Viewed

Git LFS Details

SHA256: 2f0992f67fe4839bffb56cf3c527e08c652406c55729257374ee8d630ac21501
Pointer size: 132 Bytes
Size of remote file: 2.69 MB

gradio_cached_examples/25/log.csv ADDED Viewed

	@@ -0,0 +1,6 @@

+Generated Image,Usage tips of InstantID,flag,username,timestamp
+"{""path"":""gradio_cached_examples/25/Generated Image/7e6ea85f77dd925d842c/image.png"",""url"":null,""size"":null,""orig_name"":""image.png"",""mime_type"":null}","{'visible': True, '__type__': 'update'}",,,2024-01-24 08:55:38.846769
+"{""path"":""gradio_cached_examples/25/Generated Image/2880a3d19ef9b42e3ed2/image.png"",""url"":null,""size"":null,""orig_name"":""image.png"",""mime_type"":null}","{'visible': True, '__type__': 'update'}",,,2024-01-24 08:56:11.432078
+"{""path"":""gradio_cached_examples/25/Generated Image/38dde1388c41109c5d39/image.png"",""url"":null,""size"":null,""orig_name"":""image.png"",""mime_type"":null}","{'visible': True, '__type__': 'update'}",,,2024-01-24 08:56:45.563918
+"{""path"":""gradio_cached_examples/25/Generated Image/e56b029833685ff77e6a/image.png"",""url"":null,""size"":null,""orig_name"":""image.png"",""mime_type"":null}","{'visible': True, '__type__': 'update'}",,,2024-01-24 08:57:20.321876
+"{""path"":""gradio_cached_examples/25/Generated Image/6cb5a3af223906666bfd/image.png"",""url"":null,""size"":null,""orig_name"":""image.png"",""mime_type"":null}","{'visible': True, '__type__': 'update'}",,,2024-01-24 08:57:53.871716