hallo / app.py
multimodalart's picture
Update app.py
59d9186 verified
raw
history blame
1.32 kB
import os
import shutil
from huggingface_hub import snapshot_download
import gradio as gr
os.chdir(os.path.dirname(os.path.abspath(__file__)))
from scripts.inference import inference_process
import argparse
import uuid
hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")
def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)):
unique_id = uuid.uuid4()
args = argparse.Namespace(
config='configs/inference/default.yaml',
source_image=source_image,
driving_audio=driving_audio,
output=f'output-{unique_id}.mp4',
pose_weight=1.0,
face_weight=1.0,
lip_weight=1.0,
face_expand_ratio=1.2,
checkpoint=None
)
inference_process(args)
return f'output-{unique_id}.mp4'
iface = gr.Interface(
title="Demo for Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation",
description="Generate talking head avatars driven from audio. **every 10 seconds of generation takes ~1 minute** - duplicate the space for private use or try for free on Google Colab",
fn=run_inference,
inputs=[gr.Image(type="filepath"), gr.Audio(type="filepath")],
cache_examples=False,
outputs="video"
)
iface.launch(share=True)