talkingphoto

Running

App Files Files Community

multimodalart HF staff commited on Jun 16

Commit

59d9186

•

1 Parent(s): 751c5b7

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -21

app.py CHANGED Viewed

@@ -2,46 +2,38 @@ import os
 import shutil
 from huggingface_hub import snapshot_download
 import gradio as gr
 from scripts.inference import inference_process
 import argparse
-# Download the repository contents into a directory
-hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo")
-# Define the new directory path for the pretrained models
-new_dir = 'pretrained_models'
-# Ensure the new directory exists
-os.makedirs(new_dir, exist_ok=True)
-# Move all contents from the downloaded directory to the new directory
-for filename in os.listdir(hallo_dir):
-    shutil.move(os.path.join(hallo_dir, filename), os.path.join(new_dir, filename))
 def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)):
-    # Construct the argparse.Namespace object with all necessary attributes
     args = argparse.Namespace(
-        config='configs/inference/default.yaml',  # Adjust this path as necessary
         source_image=source_image,
         driving_audio=driving_audio,
-        output='output.mp4',  # You might want to manage output paths dynamically
         pose_weight=1.0,
         face_weight=1.0,
         lip_weight=1.0,
         face_expand_ratio=1.2,
-        checkpoint=None  # Adjust or set this according to your checkpointing strategy
     )
-    # Call the imported function
     inference_process(args)
-    # Return output or path to output
-    return 'output.mp4'  # Modify based on your output handling
 iface = gr.Interface(
     fn=run_inference,
     inputs=[gr.Image(type="filepath"), gr.Audio(type="filepath")],
     outputs="video"
 )
-iface.launch()

 import shutil
 from huggingface_hub import snapshot_download
 import gradio as gr
+os.chdir(os.path.dirname(os.path.abspath(__file__)))
 from scripts.inference import inference_process
 import argparse
+import uuid
+hallo_dir = snapshot_download(repo_id="fudan-generative-ai/hallo", local_dir="pretrained_models")
 def run_inference(source_image, driving_audio, progress=gr.Progress(track_tqdm=True)):
+    unique_id = uuid.uuid4()
     args = argparse.Namespace(
+        config='configs/inference/default.yaml',
         source_image=source_image,
         driving_audio=driving_audio,
+        output=f'output-{unique_id}.mp4',
         pose_weight=1.0,
         face_weight=1.0,
         lip_weight=1.0,
         face_expand_ratio=1.2,
+        checkpoint=None
     )
     inference_process(args)
+    return f'output-{unique_id}.mp4'
 iface = gr.Interface(
+    title="Demo for Hallo: Hierarchical Audio-Driven Visual Synthesis for Portrait Image Animation",
+    description="Generate talking head avatars driven from audio. **every 10 seconds of generation takes ~1 minute** - duplicate the space for private use or try for free on Google Colab",
     fn=run_inference,
     inputs=[gr.Image(type="filepath"), gr.Audio(type="filepath")],
+    cache_examples=False,
     outputs="video"
 )
+iface.launch(share=True)