Spaces:
Runtime error
Runtime error
File size: 7,018 Bytes
abaecbc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import os, sys
import tempfile
import gradio as gr
from src.gradio_demo import SadTalker
# from src.utils.text2speech import TTSTalker
from huggingface_hub import snapshot_download
def get_source_image(image):
return image
def download_model():
REPO_ID = 'vinthony/SadTalker'
snapshot_download(repo_id=REPO_ID, local_dir='./checkpoints', local_dir_use_symlinks=True)
def sadtalker_demo():
download_model()
sad_talker = SadTalker(lazy_load=True)
# tts_talker = TTSTalker()
with gr.Blocks(analytics_enabled=False) as sadtalker_interface:
gr.Markdown("<div align='center'> <h2> 😭 SadTalker: Learning Realistic 3D Motion Coefficients for Stylized Audio-Driven Single Image Talking Face Animation (CVPR 2023) </span> </h2> \
<a style='font-size:18px;color: #efefef' href='https://arxiv.org/abs/2211.12194'>Arxiv</a> \
<a style='font-size:18px;color: #efefef' href='https://sadtalker.github.io'>Homepage</a> \
<a style='font-size:18px;color: #efefef' href='https://github.com/Winfredy/SadTalker'> Github </div>")
gr.Markdown("""
<b>You may duplicate the space and upgrade to GPU in settings for better performance and faster inference without waiting in the queue. <a style='display:inline-block' href="https://huggingface.co/spaces/vinthony/SadTalker?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></b> \
<br/><b>Alternatively, try our GitHub <a href=https://github.com/Winfredy/SadTalker> code </a> on your own GPU. </b> <a style='display:inline-block' href="https://github.com/Winfredy/SadTalker"><img src="https://img.shields.io/github/stars/Winfredy/SadTalker?style=social"/></a> \
""")
with gr.Row().style(equal_height=False):
with gr.Column(variant='panel'):
with gr.Tabs(elem_id="sadtalker_source_image"):
with gr.TabItem('Upload image'):
with gr.Row():
source_image = gr.Image(label="Source image", source="upload", type="filepath").style(height=256,width=256)
with gr.Tabs(elem_id="sadtalker_driven_audio"):
with gr.TabItem('Upload or Generating from TTS'):
with gr.Column(variant='panel'):
driven_audio = gr.Audio(label="Input audio(.wav/.mp3)", source="upload", type="filepath")
# with gr.Column(variant='panel'):
# input_text = gr.Textbox(label="Generating audio from text", lines=5, placeholder="Alternatively, you can genreate the audio from text using @Coqui.ai TTS.")
# tts = gr.Button('Generate audio',elem_id="sadtalker_audio_generate", variant='primary')
# tts.click(fn=tts_talker.test, inputs=[input_text], outputs=[driven_audio])
with gr.Column(variant='panel'):
with gr.Tabs(elem_id="sadtalker_checkbox"):
with gr.TabItem('Settings'):
with gr.Column(variant='panel'):
preprocess_type = gr.Radio(['crop','resize','full'], value='crop', label='preprocess', info="How to handle input image?")
is_still_mode = gr.Checkbox(label="w/ Still Mode (fewer hand motion, works with preprocess `full`)")
enhancer = gr.Checkbox(label="w/ GFPGAN as Face enhancer")
submit = gr.Button('Generate', elem_id="sadtalker_generate", variant='primary')
with gr.Tabs(elem_id="sadtalker_genearted"):
gen_video = gr.Video(label="Generated video", format="mp4").style(width=256)
with gr.Row():
examples = [
[
'examples/source_image/full_body_1.png',
'examples/driven_audio/bus_chinese.wav',
'crop',
True,
False
],
[
'examples/source_image/full_body_2.png',
'examples/driven_audio/japanese.wav',
'crop',
False,
False
],
[
'examples/source_image/full3.png',
'examples/driven_audio/deyu.wav',
'crop',
False,
True
],
[
'examples/source_image/full4.jpeg',
'examples/driven_audio/eluosi.wav',
'full',
False,
True
],
[
'examples/source_image/full4.jpeg',
'examples/driven_audio/imagine.wav',
'full',
True,
True
],
[
'examples/source_image/full_body_1.png',
'examples/driven_audio/bus_chinese.wav',
'full',
True,
False
],
[
'examples/source_image/art_13.png',
'examples/driven_audio/fayu.wav',
'resize',
True,
False
],
[
'examples/source_image/art_5.png',
'examples/driven_audio/chinese_news.wav',
'resize',
False,
False
],
[
'examples/source_image/art_5.png',
'examples/driven_audio/RD_Radio31_000.wav',
'resize',
True,
True
],
]
gr.Examples(examples=examples,
inputs=[
source_image,
driven_audio,
preprocess_type,
is_still_mode,
enhancer],
outputs=[gen_video],
fn=sad_talker.test,
cache_examples=os.getenv('SYSTEM') == 'spaces') #
submit.click(
fn=sad_talker.test,
inputs=[source_image,
driven_audio,
preprocess_type,
is_still_mode,
enhancer],
outputs=[gen_video]
)
return sadtalker_interface
if __name__ == "__main__":
demo = sadtalker_demo()
demo.queue(max_size=10)
demo.launch(debug=True)
|