hallo / app.py
fspecii's picture
Update app.py
db66eea verified
import os
import shutil
from huggingface_hub import snapshot_download
import gradio as gr
import argparse
import uuid
# Ensure the current working directory is the script's directory
os.chdir(os.path.dirname(os.path.abspath(__file__)))
from scripts.inference import inference_process
# Download the model to the local directory
hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")
def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)):
unique_id = uuid.uuid4()
args = argparse.Namespace(
config='configs/inference/default.yaml',
source_image=source_image,
driving_audio=driving_audio,
output=f'output-{unique_id}.mp4',
pose_weight=1.0,
face_weight=1.0,
lip_weight=1.0,
face_expand_ratio=1.2,
checkpoint=None
)
inference_process(args)
return f'output-{unique_id}.mp4'
css = '''
div#warning-ready {
background-color: #ecfdf5;
padding: 0 16px 16px;
margin: 20px 0;
color: #030303!important;
}
div#warning-ready > .gr-prose > h2, div#warning-ready > .gr-prose > p {
color: #057857!important;
}
div#warning-duplicate {
background-color: #ebf5ff;
padding: 0 16px 16px;
margin: 20px 0;
color: #030303!important;
}
div#warning-duplicate > .gr-prose > h2, div#warning-duplicate > .gr-prose > p {
color: #0f4592!important;
}
div#warning-duplicate strong {
color: #0f4592;
}
p.actions {
display: flex;
align-items: center;
margin: 20px 0;
}
div#warning-duplicate .actions a {
display: inline-block;
margin-right: 10px;
}
.dark #warning-duplicate {
background-color: #0c0c0c !important;
border: 1px solid white !important;
}
'''
with gr.Blocks(css=css) as demo:
gr.Markdown("# Demo for Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation")
gr.Markdown("Generate talking head avatars driven from audio. **5 seconds of audio takes >10 minutes to generate on an L4** - duplicate the space for private use or try for free on Google Colab")
with gr.Row():
with gr.Column():
avatar_face = gr.Image(type="filepath", label="Face")
driving_audio = gr.Audio(type="filepath", label="Driving audio")
generate = gr.Button("Generate")
with gr.Column():
output_video = gr.Video(label="Your talking head")
generate.click(
fn=run_inference,
inputs=[avatar_face, driving_audio],
outputs=output_video
)
demo.launch()