Spaces:
Running
on
L40S
Running
on
L40S
import gradio as gr | |
import subprocess | |
import os | |
import cv2 | |
from huggingface_hub import hf_hub_download | |
import glob | |
from moviepy.editor import VideoFileClip | |
from datetime import datetime | |
is_shared_ui = True if "fffiloni/X-Portrait" in os.environ['SPACE_ID'] else False | |
# Ensure 'checkpoint' directory exists | |
os.makedirs("checkpoint", exist_ok=True) | |
hf_hub_download( | |
repo_id="fffiloni/X-Portrait", | |
filename="model_state-415001.th", | |
local_dir="checkpoint" | |
) | |
def trim_video(video_path, output_dir="trimmed_videos", max_duration=2): | |
# Create output directory if it does not exist | |
os.makedirs(output_dir, exist_ok=True) | |
# Generate a timestamp for the output filename | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
output_path = os.path.join(output_dir, f"trimmed_video_{timestamp}.mp4") | |
# Load the video | |
with VideoFileClip(video_path) as video: | |
# Check the duration of the video | |
if video.duration > max_duration: | |
# Trim the video to the first max_duration seconds | |
trimmed_video = video.subclip(0, max_duration) | |
# Write the trimmed video to a file | |
trimmed_video.write_videofile(output_path, codec="libx264") | |
return output_path | |
else: | |
# If the video is within the duration, return the original path | |
return video_path | |
def load_driving_video(video_path): | |
if is_shared_ui : | |
video_path = trim_video(video_path) | |
print("Path to the (trimmed) driving video:", video_path) | |
frames_data = extract_frames_with_labels(video_path) | |
return video_path, frames_data, gr.update(open="True") | |
else: | |
frames_data = extract_frames_with_labels(video_path) | |
return video_path, frames_data, gr.update(open="True") | |
def extract_frames_with_labels(video_path, base_output_dir="frames"): | |
# Generate a timestamped folder name | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
output_dir = os.path.join(base_output_dir, f"frames_{timestamp}") | |
# Ensure output directory exists | |
os.makedirs(output_dir, exist_ok=True) | |
# Open the video file | |
video_capture = cv2.VideoCapture(video_path) | |
if not video_capture.isOpened(): | |
raise ValueError(f"Cannot open video file: {video_path}") | |
frame_data = [] | |
frame_index = 0 | |
# Loop through the video frames | |
while True: | |
ret, frame = video_capture.read() | |
if not ret: | |
break # Exit the loop if there are no frames left to read | |
# Zero-padded frame index for filename and label | |
frame_label = f"{frame_index:04}" | |
frame_filename = os.path.join(output_dir, f"frame_{frame_label}.jpg") | |
# Save the frame as a .jpg file | |
cv2.imwrite(frame_filename, frame) | |
# Append the tuple (filename, label) to the list | |
frame_data.append((frame_filename, frame_label)) | |
# Increment frame index | |
frame_index += 1 | |
# Release the video capture object | |
video_capture.release() | |
return frame_data | |
# Define a function to run your script with selected inputs | |
def run_xportrait(source_image, driving_video, seed, uc_scale, best_frame, out_frames, num_mix, ddim_steps, progress=gr.Progress(track_tqdm=True)): | |
# Create a unique output directory name based on current date and time | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
output_dir = f"output_{timestamp}" | |
os.makedirs(output_dir, exist_ok=True) | |
model_config = "config/cldm_v15_appearance_pose_local_mm.yaml" | |
resume_dir = "checkpoint/model_state-415001.th" | |
# Construct the command | |
command = [ | |
"python3", "core/test_xportrait.py", | |
"--model_config", model_config, | |
"--output_dir", output_dir, | |
"--resume_dir", resume_dir, | |
"--seed", str(seed), | |
"--uc_scale", str(uc_scale), | |
"--source_image", source_image, | |
"--driving_video", driving_video, | |
"--best_frame", str(best_frame), | |
"--out_frames", str(out_frames), | |
"--num_mix", str(num_mix), | |
"--ddim_steps", str(ddim_steps) | |
] | |
# Run the command | |
try: | |
subprocess.run(command, check=True) | |
# Find the generated video file in the output directory | |
video_files = glob.glob(os.path.join(output_dir, "*.mp4")) | |
print(video_files) | |
if video_files: | |
final_vid = convert_video_to_h264_aac(video_files[0]) | |
return f"Output video saved at: {final_vid}", final_vid | |
else: | |
return "No video file was found in the output directory.", None | |
except subprocess.CalledProcessError as e: | |
return f"An error occurred: {e}", None | |
def convert_video_to_h264_aac(video_path): | |
# Get the directory and original filename | |
original_dir = os.path.dirname(video_path) | |
original_name, _ = os.path.splitext(os.path.basename(video_path)) | |
# Define the output path in the same directory | |
output_path = os.path.join(original_dir, f"{original_name}_converted.mp4") | |
# Load the video | |
with VideoFileClip(video_path) as video: | |
# Write the video with H.264 and AAC codecs | |
video.write_videofile( | |
output_path, | |
codec="libx264", # H.264 video codec | |
audio_codec="aac", # AAC audio codec | |
temp_audiofile="temp-audio.m4a", # Temporary audio file (moviepy requirement) | |
remove_temp=True # Remove temporary files after writing | |
) | |
return output_path | |
# Set up Gradio interface | |
css=""" | |
div#frames-gallery{ | |
overflow: scroll!important; | |
} | |
""" | |
example_frame_data = extract_frames_with_labels("./assets/driving_video.mp4") | |
with gr.Blocks(css=css) as demo: | |
with gr.Column(elem_id="col-container"): | |
gr.Markdown("# X-Portrait: Expressive Portrait Animation with Hierarchical Motion Attention") | |
gr.Markdown("On this shared UI, drinving video input will be trimmed to 2 seconds max. Duplicate this space for more controls.") | |
gr.HTML(""" | |
<div style="display:flex;column-gap:4px;"> | |
<a href='https://github.com/bytedance/X-Portrait'> | |
<img src='https://img.shields.io/badge/GitHub-Repo-blue'> | |
</a> | |
<a href='https://byteaigc.github.io/x-portrait/'> | |
<img src='https://img.shields.io/badge/Project-Page-green'> | |
</a> | |
</div> | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Row(): | |
source_image = gr.Image(label="Source Image", type="filepath") | |
driving_video = gr.Video(label="Driving Video") | |
with gr.Group(): | |
with gr.Row(): | |
best_frame = gr.Number(value=36, label="Best Frame", info="specify the frame index in the driving video where the head pose best matches the source image (note: precision of best_frame index might affect the final quality)") | |
out_frames = gr.Number(value=-1, label="Out Frames", info="number of generation frames") | |
with gr.Accordion("Driving video Frames", open=False) as frames_gallery_panel: | |
driving_frames = gr.Gallery(show_label=True, columns=6, height=380, elem_id="frames-gallery") | |
with gr.Row(): | |
seed = gr.Number(value=999, label="Seed") | |
uc_scale = gr.Number(value=5, label="UC Scale") | |
with gr.Row(): | |
num_mix = gr.Number(value=4, label="Number of Mix") | |
ddim_steps = gr.Number(value=30, label="DDIM Steps") | |
submit_btn = gr.Button("Submit") | |
with gr.Column(): | |
video_output = gr.Video(label="Output Video") | |
status = gr.Textbox(label="status") | |
gr.Examples( | |
examples=[ | |
["./assets/source_image.png", "./assets/driving_video.mp4", "./assets/inference_result.mp4"] | |
], | |
inputs=[source_image, driving_video, video_output] | |
) | |
gr.HTML(""" | |
<div style="display:flex;column-gap:4px;"> | |
<a href="https://huggingface.co/spaces/fffiloni/X-Portrait?duplicate=true"> | |
<img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-xl.svg" alt="Duplicate this Space"> | |
</a> | |
<a href="https://huggingface.co/fffiloni"> | |
<img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/follow-me-on-HF-xl-dark.svg" alt="Follow me on HF"> | |
</a> | |
</div> | |
""") | |
driving_video.upload( | |
fn = load_driving_video, | |
inputs = [driving_video], | |
outputs = [driving_video, driving_frames, frames_gallery_panel], | |
queue = False | |
) | |
submit_btn.click( | |
fn = run_xportrait, | |
inputs = [source_image, driving_video, seed, uc_scale, best_frame, out_frames, num_mix, ddim_steps], | |
outputs = [status, video_output] | |
) | |
# Launch the Gradio app | |
demo.launch() |