|
import os |
|
import shutil |
|
from huggingface_hub import snapshot_download |
|
import gradio as gr |
|
import argparse |
|
import uuid |
|
|
|
|
|
os.chdir(os.path.dirname(os.path.abspath(__file__))) |
|
|
|
from scripts.inference import inference_process |
|
|
|
|
|
hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models") |
|
|
|
def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)): |
|
unique_id = uuid.uuid4() |
|
|
|
args = argparse.Namespace( |
|
config='configs/inference/default.yaml', |
|
source_image=source_image, |
|
driving_audio=driving_audio, |
|
output=f'output-{unique_id}.mp4', |
|
pose_weight=1.0, |
|
face_weight=1.0, |
|
lip_weight=1.0, |
|
face_expand_ratio=1.2, |
|
checkpoint=None |
|
) |
|
|
|
inference_process(args) |
|
return f'output-{unique_id}.mp4' |
|
|
|
css = ''' |
|
div#warning-ready { |
|
background-color: #ecfdf5; |
|
padding: 0 16px 16px; |
|
margin: 20px 0; |
|
color: #030303!important; |
|
} |
|
div#warning-ready > .gr-prose > h2, div#warning-ready > .gr-prose > p { |
|
color: #057857!important; |
|
} |
|
div#warning-duplicate { |
|
background-color: #ebf5ff; |
|
padding: 0 16px 16px; |
|
margin: 20px 0; |
|
color: #030303!important; |
|
} |
|
div#warning-duplicate > .gr-prose > h2, div#warning-duplicate > .gr-prose > p { |
|
color: #0f4592!important; |
|
} |
|
div#warning-duplicate strong { |
|
color: #0f4592; |
|
} |
|
p.actions { |
|
display: flex; |
|
align-items: center; |
|
margin: 20px 0; |
|
} |
|
div#warning-duplicate .actions a { |
|
display: inline-block; |
|
margin-right: 10px; |
|
} |
|
.dark #warning-duplicate { |
|
background-color: #0c0c0c !important; |
|
border: 1px solid white !important; |
|
} |
|
''' |
|
|
|
with gr.Blocks(css=css) as demo: |
|
gr.Markdown("# Demo for Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation") |
|
gr.Markdown("Generate talking head avatars driven from audio. **5 seconds of audio takes >10 minutes to generate on an L4** - duplicate the space for private use or try for free on Google Colab") |
|
with gr.Row(): |
|
with gr.Column(): |
|
avatar_face = gr.Image(type="filepath", label="Face") |
|
driving_audio = gr.Audio(type="filepath", label="Driving audio") |
|
generate = gr.Button("Generate") |
|
with gr.Column(): |
|
output_video = gr.Video(label="Your talking head") |
|
|
|
generate.click( |
|
fn=run_inference, |
|
inputs=[avatar_face, driving_audio], |
|
outputs=output_video |
|
) |
|
|
|
demo.launch() |
|
|