X-Portrait / app.py
fffiloni's picture
Update app.py
6662b42 verified
import gradio as gr
import subprocess
import os
import cv2
from huggingface_hub import hf_hub_download
import glob
from moviepy.editor import VideoFileClip
from datetime import datetime
is_shared_ui = True if "fffiloni/X-Portrait" in os.environ['SPACE_ID'] else False
# Ensure 'checkpoint' directory exists
os.makedirs("checkpoint", exist_ok=True)
hf_hub_download(
repo_id="fffiloni/X-Portrait",
filename="model_state-415001.th",
local_dir="checkpoint"
)
def trim_video(video_path, output_dir="trimmed_videos", max_duration=2):
# Create output directory if it does not exist
os.makedirs(output_dir, exist_ok=True)
# Generate a timestamp for the output filename
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_path = os.path.join(output_dir, f"trimmed_video_{timestamp}.mp4")
# Load the video
with VideoFileClip(video_path) as video:
# Check the duration of the video
if video.duration > max_duration:
# Trim the video to the first max_duration seconds
trimmed_video = video.subclip(0, max_duration)
# Write the trimmed video to a file
trimmed_video.write_videofile(output_path, codec="libx264")
return output_path
else:
# If the video is within the duration, return the original path
return video_path
def load_driving_video(video_path):
if is_shared_ui :
video_path = trim_video(video_path)
print("Path to the (trimmed) driving video:", video_path)
frames_data = extract_frames_with_labels(video_path)
return video_path, frames_data, gr.update(open="True")
else:
frames_data = extract_frames_with_labels(video_path)
return video_path, frames_data, gr.update(open="True")
def extract_frames_with_labels(video_path, base_output_dir="frames"):
# Generate a timestamped folder name
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = os.path.join(base_output_dir, f"frames_{timestamp}")
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
# Open the video file
video_capture = cv2.VideoCapture(video_path)
if not video_capture.isOpened():
raise ValueError(f"Cannot open video file: {video_path}")
frame_data = []
frame_index = 0
# Loop through the video frames
while True:
ret, frame = video_capture.read()
if not ret:
break # Exit the loop if there are no frames left to read
# Zero-padded frame index for filename and label
frame_label = f"{frame_index:04}"
frame_filename = os.path.join(output_dir, f"frame_{frame_label}.jpg")
# Save the frame as a .jpg file
cv2.imwrite(frame_filename, frame)
# Append the tuple (filename, label) to the list
frame_data.append((frame_filename, frame_label))
# Increment frame index
frame_index += 1
# Release the video capture object
video_capture.release()
return frame_data
# Define a function to run your script with selected inputs
def run_xportrait(source_image, driving_video, seed, uc_scale, best_frame, out_frames, num_mix, ddim_steps, progress=gr.Progress(track_tqdm=True)):
# Create a unique output directory name based on current date and time
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_dir = f"output_{timestamp}"
os.makedirs(output_dir, exist_ok=True)
model_config = "config/cldm_v15_appearance_pose_local_mm.yaml"
resume_dir = "checkpoint/model_state-415001.th"
# Construct the command
command = [
"python3", "core/test_xportrait.py",
"--model_config", model_config,
"--output_dir", output_dir,
"--resume_dir", resume_dir,
"--seed", str(seed),
"--uc_scale", str(uc_scale),
"--source_image", source_image,
"--driving_video", driving_video,
"--best_frame", str(best_frame),
"--out_frames", str(out_frames),
"--num_mix", str(num_mix),
"--ddim_steps", str(ddim_steps)
]
# Run the command
try:
subprocess.run(command, check=True)
# Find the generated video file in the output directory
video_files = glob.glob(os.path.join(output_dir, "*.mp4"))
print(video_files)
if video_files:
final_vid = convert_video_to_h264_aac(video_files[0])
return f"Output video saved at: {final_vid}", final_vid
else:
return "No video file was found in the output directory.", None
except subprocess.CalledProcessError as e:
return f"An error occurred: {e}", None
def convert_video_to_h264_aac(video_path):
# Get the directory and original filename
original_dir = os.path.dirname(video_path)
original_name, _ = os.path.splitext(os.path.basename(video_path))
# Define the output path in the same directory
output_path = os.path.join(original_dir, f"{original_name}_converted.mp4")
# Load the video
with VideoFileClip(video_path) as video:
# Write the video with H.264 and AAC codecs
video.write_videofile(
output_path,
codec="libx264", # H.264 video codec
audio_codec="aac", # AAC audio codec
temp_audiofile="temp-audio.m4a", # Temporary audio file (moviepy requirement)
remove_temp=True # Remove temporary files after writing
)
return output_path
# Set up Gradio interface
css="""
div#frames-gallery{
overflow: scroll!important;
}
"""
example_frame_data = extract_frames_with_labels("./assets/driving_video.mp4")
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown("# X-Portrait: Expressive Portrait Animation with Hierarchical Motion Attention")
gr.Markdown("On this shared UI, drinving video input will be trimmed to 2 seconds max. Duplicate this space for more controls.")
gr.HTML("""
<div style="display:flex;column-gap:4px;">
<a href='https://github.com/bytedance/X-Portrait'>
<img src='https://img.shields.io/badge/GitHub-Repo-blue'>
</a>
<a href='https://byteaigc.github.io/x-portrait/'>
<img src='https://img.shields.io/badge/Project-Page-green'>
</a>
</div>
""")
with gr.Row():
with gr.Column():
with gr.Row():
source_image = gr.Image(label="Source Image", type="filepath")
driving_video = gr.Video(label="Driving Video")
with gr.Group():
with gr.Row():
best_frame = gr.Number(value=36, label="Best Frame", info="specify the frame index in the driving video where the head pose best matches the source image (note: precision of best_frame index might affect the final quality)")
out_frames = gr.Number(value=-1, label="Out Frames", info="number of generation frames")
with gr.Accordion("Driving video Frames", open=False) as frames_gallery_panel:
driving_frames = gr.Gallery(show_label=True, columns=6, height=380, elem_id="frames-gallery")
with gr.Row():
seed = gr.Number(value=999, label="Seed")
uc_scale = gr.Number(value=5, label="UC Scale")
with gr.Row():
num_mix = gr.Number(value=4, label="Number of Mix")
ddim_steps = gr.Number(value=30, label="DDIM Steps")
submit_btn = gr.Button("Submit")
with gr.Column():
video_output = gr.Video(label="Output Video")
status = gr.Textbox(label="status")
gr.Examples(
examples=[
["./assets/source_image.png", "./assets/driving_video.mp4", "./assets/inference_result.mp4"]
],
inputs=[source_image, driving_video, video_output]
)
gr.HTML("""
<div style="display:flex;column-gap:4px;">
<a href="https://huggingface.co/spaces/fffiloni/X-Portrait?duplicate=true">
<img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-xl.svg" alt="Duplicate this Space">
</a>
<a href="https://huggingface.co/fffiloni">
<img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/follow-me-on-HF-xl-dark.svg" alt="Follow me on HF">
</a>
</div>
""")
driving_video.upload(
fn = load_driving_video,
inputs = [driving_video],
outputs = [driving_video, driving_frames, frames_gallery_panel],
queue = False
)
submit_btn.click(
fn = run_xportrait,
inputs = [source_image, driving_video, seed, uc_scale, best_frame, out_frames, num_mix, ddim_steps],
outputs = [status, video_output]
)
# Launch the Gradio app
demo.launch()