Spaces:
Runtime error
Runtime error
File size: 8,034 Bytes
d2dd1cd 6dcf9e0 097f543 6dcf9e0 d2dd1cd e584282 097f543 a4a66f8 7a54de6 097f543 7a54de6 e584282 d2dd1cd d1820b1 d2dd1cd d1820b1 d2dd1cd 65516a6 d2dd1cd 65516a6 d2dd1cd d1820b1 d2dd1cd 6dcf9e0 d2dd1cd d1820b1 d2dd1cd 3a8c535 f4c8778 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 |
#!/usr/bin/env python
from __future__ import annotations
import os
import gradio as gr
from inference_followyourpose import merge_config_then_run
import sys
import subprocess
sys.path.append('FollowYourPose')
current_dir = os.getcwd()
print("path is :", current_dir)
print("current_dir i :", os.listdir(current_dir,'data'))
print("dir i :", os.listdir(os.path.join(current_dir,'data')))
# /home/user/app/data/download.sh
result = subprocess.run(['bash', './data/download.sh'], stdout=subprocess.PIPE)
result.stdout.decode()
HF_TOKEN = os.getenv('HF_TOKEN')
pipe = merge_config_then_run()
with gr.Blocks(css='style.css') as demo:
gr.HTML(
"""
<div style="text-align: center; max-width: 1200px; margin: 20px auto;">
<h1 style="font-weight: 900; font-size: 2rem; margin: 0rem">
🕺🕺🕺 Follow Your Pose 💃💃💃 </font></center> <br> <center>Pose-Guided Text-to-Video Generation using Pose-Free Videos
</h1>
<h2 style="font-weight: 450; font-size: 1rem; margin: 0rem">
<a href="https://mayuelala.github.io/">Yue Ma*</a>
<a href="https://github.com/YingqingHe">Yingqing He*</a> , <a href="http://vinthony.github.io/">Xiaodong Cun</a>,
<a href="https://xinntao.github.io/"> Xintao Wang </a>,
<a href="https://scholar.google.com/citations?user=4oXBp9UAAAAJ&hl=zh-CN">Ying Shan</a>,
<a href="https://scholar.google.com/citations?user=Xrh1OIUAAAAJ&hl=zh-CN">Xiu Li</a>,
<a href="http://cqf.io">Qifeng Chen</a>
</h2>
<h2 style="font-weight: 450; font-size: 1rem; margin: 0rem">
<span class="link-block">
[<a href="https://arxiv.org/abs/2304.01186" target="_blank"
class="external-link ">
<span class="icon">
<i class="ai ai-arxiv"></i>
</span>
<span>arXiv</span>
</a>]
</span>
<!-- Github link -->
<span class="link-block">
[<a href="https://github.com/mayuelala/FollowYourPose" target="_blank"
class="external-link ">
<span class="icon">
<i class="fab fa-github"></i>
</span>
<span>Code</span>
</a>]
</span>
<!-- Github link -->
<span class="link-block">
[<a href="https://follow-your-pose.github.io/" target="_blank"
class="external-link ">
<span class="icon">
<i class="fab fa-github"></i>
</span>
<span>Homepage</span>
</a>]
</span>
</h2>
<h2 style="font-weight: 450; font-size: 1rem; margin-top: 0.5rem; margin-bottom: 0.5rem">
TL;DR: We tune 2D stable-diffusion to generate the character videos from pose and text description.
</h2>
</div>
""")
gr.HTML("""
<p>In order to run the demo successfully, we recommend the length of video is about <b>3~5 seconds</b>.
The temporal crop offset and sampling stride are used to adjust the starting point and interval of video samples.
Alternatively, try our GitHub <a href=https://github.com/mayuelala/FollowYourPose> code </a> on your GPU.
</p>""")
with gr.Row():
with gr.Column():
with gr.Accordion('Input Video', open=True):
# user_input_video = gr.File(label='Input Source Video')
user_input_video = gr.Video(label='Input Source Video', source='upload', type='numpy', format="mp4", visible=True).style(height="auto")
video_type = gr.Dropdown(
label='The type of input video',
choices=[
"Raw Video",
"Skeleton Video"
], value="Raw Video")
with gr.Accordion('Temporal Crop offset and Sampling Stride', open=False):
n_sample_frame = gr.Slider(label='Number of Frames',
minimum=0,
maximum=32,
step=1,
value=8)
stride = gr.Slider(label='Temporal stride',
minimum=0,
maximum=20,
step=1,
value=1)
with gr.Accordion('Spatial Crop offset', open=False):
left_crop = gr.Number(label='Left crop',
value=0,
precision=0)
right_crop = gr.Number(label='Right crop',
value=0,
precision=0)
top_crop = gr.Number(label='Top crop',
value=0,
precision=0)
bottom_crop = gr.Number(label='Bottom crop',
value=0,
precision=0)
offset_list = [
left_crop,
right_crop,
top_crop,
bottom_crop,
]
ImageSequenceDataset_list = [
n_sample_frame,
stride
] + offset_list
with gr.Accordion('Text Prompt', open=True):
target_prompt = gr.Textbox(label='Target Prompt',
info='The simple background may achieve better results(e.g., "beach", "moon" prompt is better than "street" and "market")',
max_lines=1,
placeholder='Example: "Iron man on the beach"',
value='Iron man on the beach')
run_button = gr.Button('Generate')
with gr.Column():
result = gr.Video(label='Result')
# result.style(height=512, width=512)
with gr.Accordion('DDIM Parameters', open=True):
num_steps = gr.Slider(label='Number of Steps',
info='larger value has better editing capacity, but takes more time and memory.',
minimum=0,
maximum=50,
step=1,
value=50)
guidance_scale = gr.Slider(label='CFG Scale',
minimum=0,
maximum=50,
step=0.1,
value=12.0)
with gr.Row():
from example import style_example
examples = style_example
gr.Examples(examples=examples,
inputs = [
user_input_video,
target_prompt,
num_steps,
guidance_scale,
video_type,
*ImageSequenceDataset_list
],
outputs=result,
fn=pipe.run,
cache_examples=True,
)
inputs = [
user_input_video,
target_prompt,
num_steps,
guidance_scale,
video_type,
*ImageSequenceDataset_list
]
target_prompt.submit(fn=pipe.run, inputs=inputs, outputs=result)
run_button.click(fn=pipe.run, inputs=inputs, outputs=result)
demo.queue().launch()
# demo.queue().launch(share=False, server_name='0.0.0.0', server_port=80) |