Spaces:

fffiloni
/

InstantID-2V

Running

File size: 5,166 Bytes

a7cbda8
 
f56ed08
 
5b422bf
a7cbda8
5e9042c
f56ed08
a7cbda8
 
 
5e9042c
a7cbda8
 
 
 
 
 
 
 
 
f56ed08
f56fa59
a7cbda8
 
 
 
3f095c9
a7cbda8
 
 
00ec618
f56fa59
a7cbda8
 
 
 
3f095c9
 
 
 
 
96136a4
3f095c9
 
 
 
 
 
 
 
 
2c66889
 
 
 
 
 
7a3b159
f6dbf53
 
7a3b159
f6dbf53
7a3b159
f6dbf53
7a3b159
f6dbf53
7a3b159
f6dbf53
7a3b159
f6dbf53
2c66889
5c9c839
2c66889
3f095c9
 
 
 
2c66889
a7cbda8
2c66889
a7cbda8
 
2c66889

import gradio as gr
from gradio_client import Client
import os 

hf_token = os.environ.get("HF_TKN")

def get_instantID(portrait_in, condition_pose, prompt):
    client = Client("https://fffiloni-instantid.hf.space/", hf_token=hf_token)
    negative_prompt = "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green"
    result = client.predict(
		portrait_in,	# filepath  in 'Upload a photo of your face' Image component
		condition_pose,	# filepath  in 'Upload a reference pose image (optional)' Image component
	    prompt,	# str  in 'Prompt' Textbox component
		negative_prompt,	# str  in 'Negative Prompt' Textbox component
		"(No style)",	# Literal['(No style)', 'Watercolor', 'Film Noir', 'Neon', 'Jungle', 'Mars', 'Vibrant Color', 'Snow', 'Line art']  in 'Style template' Dropdown component
		True,	# bool  in 'Enhance non-face region' Checkbox component
		20,	# float (numeric value between 20 and 100) in 'Number of sample steps' Slider component
		0.8,	# float (numeric value between 0 and 1.5) in 'IdentityNet strength (for fedility)' Slider component
		0.8,	# float (numeric value between 0 and 1.5) in 'Image adapter strength (for detail)' Slider component
		5,	# float (numeric value between 0.1 and 10.0) in 'Guidance scale' Slider component
		0,	# float (numeric value between 0 and 2147483647) in 'Seed' Slider component
        True,	# bool  in 'Randomize seed' Checkbox component
		api_name="/generate_image"
    )
    print(result)
    return result[0]

def get_video_i2vgen(image_in, prompt):
    client = Client("https://modelscope-i2vgen-xl.hf.space/")
    result = client.predict(
        image_in,
        prompt,
        fn_index=1
    )
    print(result)
    return result

def get_video_svd(image_in):
    from gradio_client import Client

    client = Client("https://multimodalart-stable-video-diffusion.hf.space/--replicas/ej45m/")
    result = client.predict(
		image_in,	# filepath  in 'Upload your image' Image component
		0,	# float (numeric value between 0 and 9223372036854775807) in 'Seed' Slider component
		True,	# bool  in 'Randomize seed' Checkbox component
		127,	# float (numeric value between 1 and 255) in 'Motion bucket id' Slider component
		6,	# float (numeric value between 5 and 30) in 'Frames per second' Slider component
		api_name="/video"
    )
    print(result)
    return result[0]["video"]

def infer(image_in, camera_shot, conditional_pose, prompt, chosen_model):
    if camera_shot == "custom":
        if conditional_pose != None:
            conditional_pose = conditional_pose
        else :
            raise gr.Error("No custom conditional shot found !")
    
    elif camera_shot == "close-up":
        conditional_pose = "camera_shots/close_up_shot.jpeg"
    elif camera_shot == "medium close-up":
        conditional_pose = "camera_shots/medium_close_up.jpeg"
    elif camera_shot == "medium shot":
        conditional_pose = "camera_shots/medium_shot.png"
    elif camera_shot == "cowboy shot":
        conditional_pose = "camera_shots/cowboy_shot.jpeg"
    elif camera_shot == "medium full shot":
        conditional_pose = "camera_shots/medium_full_shot.png"
    elif camera_shot == "full shot":
        conditional_pose = "camera_shots/full_shot.jpeg"
    
    iid_img = get_instantID(image_in, conditional_pose, prompt)
    
    if chosen_model == "i2vgen-xl" :
        video_res = get_video_i2vgen(iid_img, prompt)
    elif chosen_model == "stable-video" :
        video_res = get_video_svd(image_in)
    
    print(video_res)
    
    return video_res

with gr.Blocks as demo:
    with gr.Column():
        gr.HTML("""
        
        """)
        with gr.Row():
            with gr.Column():
                face_in = gr.Image(type="filepath", label="Face to copy")
                camera_shot = gr.Dropdown(
                    label = "Camera Shot", 
                    info = "Use standard camera shots vocabulary, or drop your custom shot as conditional pose (1280*720 ratio is recommended)"
                    choices = [
                        "custom", "close-up", "medium close-up", "medium shot", "cowboy shot", "medium full shot", "full shot"
                    ],
                    value = "custom"
                )
                condition_shot = gr.Image(type="filepath", label="Custom conditional shot (Optional)")
                prompt = gr.Textbox(label="Prompt")
                chosen_model = gr.Radio(label="Choose a model", choices=["i2vgen-xl", "stable-video"], value="i2vgen-xl", interactive=False, visible=False)
                submit_btn = gr.Button("Submit")
            with gr.Column():
                video_out = gr.Video()

    submit_btn.click(
        fn = infer,
        inputs = [
            face_in,
            camera_shot,
            condition_shot,
            prompt,
            chosen_model
        ],
        outputs = [
            video_out
        ]
    )

demo.queue(max_size=6).launch(debug=True)