Spaces:
Runtime error
Runtime error
mayuema
commited on
Commit
•
d2dd1cd
1
Parent(s):
c27592b
first release
Browse files- .gitignore +3 -0
- .gitmodules +0 -0
- .pre-commit-config.yaml +37 -0
- .style.yapf +5 -0
- Dockerfile +63 -0
- FollowYourPose +1 -0
- LICENSE +21 -0
- app_followyourpose.py +211 -0
- docs/OpenSans-Regular.ttf +0 -0
- example.py +85 -0
- inference_followyourpose.py +103 -0
- packages.txt +1 -0
- requirements.txt +133 -0
- style.css +3 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
trash/*
|
2 |
+
tmp
|
3 |
+
gradio_cached_examples
|
.gitmodules
ADDED
File without changes
|
.pre-commit-config.yaml
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
exclude: patch
|
2 |
+
repos:
|
3 |
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
4 |
+
rev: v4.2.0
|
5 |
+
hooks:
|
6 |
+
- id: check-executables-have-shebangs
|
7 |
+
- id: check-json
|
8 |
+
- id: check-merge-conflict
|
9 |
+
- id: check-shebang-scripts-are-executable
|
10 |
+
- id: check-toml
|
11 |
+
- id: check-yaml
|
12 |
+
- id: double-quote-string-fixer
|
13 |
+
- id: end-of-file-fixer
|
14 |
+
- id: mixed-line-ending
|
15 |
+
args: ['--fix=lf']
|
16 |
+
- id: requirements-txt-fixer
|
17 |
+
- id: trailing-whitespace
|
18 |
+
- repo: https://github.com/myint/docformatter
|
19 |
+
rev: v1.4
|
20 |
+
hooks:
|
21 |
+
- id: docformatter
|
22 |
+
args: ['--in-place']
|
23 |
+
- repo: https://github.com/pycqa/isort
|
24 |
+
rev: 5.12.0
|
25 |
+
hooks:
|
26 |
+
- id: isort
|
27 |
+
- repo: https://github.com/pre-commit/mirrors-mypy
|
28 |
+
rev: v0.991
|
29 |
+
hooks:
|
30 |
+
- id: mypy
|
31 |
+
args: ['--ignore-missing-imports']
|
32 |
+
additional_dependencies: ['types-python-slugify']
|
33 |
+
- repo: https://github.com/google/yapf
|
34 |
+
rev: v0.32.0
|
35 |
+
hooks:
|
36 |
+
- id: yapf
|
37 |
+
args: ['--parallel', '--in-place']
|
.style.yapf
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[style]
|
2 |
+
based_on_style = pep8
|
3 |
+
blank_line_before_nested_class_or_def = false
|
4 |
+
spaces_before_comment = 2
|
5 |
+
split_before_logical_operator = true
|
Dockerfile
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
|
2 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
3 |
+
RUN apt-get update && \
|
4 |
+
apt-get upgrade -y && \
|
5 |
+
apt-get install -y --no-install-recommends \
|
6 |
+
git \
|
7 |
+
zip \
|
8 |
+
unzip \
|
9 |
+
git-lfs \
|
10 |
+
wget \
|
11 |
+
curl \
|
12 |
+
# ffmpeg \
|
13 |
+
ffmpeg \
|
14 |
+
x264 \
|
15 |
+
# python build dependencies \
|
16 |
+
build-essential \
|
17 |
+
libssl-dev \
|
18 |
+
zlib1g-dev \
|
19 |
+
libbz2-dev \
|
20 |
+
libreadline-dev \
|
21 |
+
libsqlite3-dev \
|
22 |
+
libncursesw5-dev \
|
23 |
+
xz-utils \
|
24 |
+
tk-dev \
|
25 |
+
libxml2-dev \
|
26 |
+
libxmlsec1-dev \
|
27 |
+
libffi-dev \
|
28 |
+
liblzma-dev && \
|
29 |
+
apt-get clean && \
|
30 |
+
rm -rf /var/lib/apt/lists/*
|
31 |
+
# RUN apt-get update && \
|
32 |
+
# apt-get install zip
|
33 |
+
# RUN wget https://github.com/ChenyangQiQi/FateZero/releases/download/v0.0.1/style.zip && unzip style.zip
|
34 |
+
RUN useradd -m -u 1000 user
|
35 |
+
USER user
|
36 |
+
ENV HOME=/home/user \
|
37 |
+
PATH=/home/user/.local/bin:${PATH}
|
38 |
+
WORKDIR ${HOME}/app
|
39 |
+
|
40 |
+
RUN curl https://pyenv.run | bash
|
41 |
+
ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
|
42 |
+
ENV PYTHON_VERSION=3.10.9
|
43 |
+
RUN pyenv install ${PYTHON_VERSION} && \
|
44 |
+
pyenv global ${PYTHON_VERSION} && \
|
45 |
+
pyenv rehash && \
|
46 |
+
pip install --no-cache-dir -U pip setuptools wheel
|
47 |
+
|
48 |
+
RUN pip install --no-cache-dir -U torch==1.13.1 torchvision==0.14.1
|
49 |
+
COPY --chown=1000 requirements.txt /tmp/requirements.txt
|
50 |
+
RUN pip install --no-cache-dir -U -r /tmp/requirements.txt
|
51 |
+
|
52 |
+
COPY --chown=1000 . ${HOME}/app
|
53 |
+
RUN ls -a
|
54 |
+
RUN cd ./FateZero/ckpt && bash download.sh
|
55 |
+
RUN cd ./FateZero/data && bash download.sh
|
56 |
+
ENV PYTHONPATH=${HOME}/app \
|
57 |
+
PYTHONUNBUFFERED=1 \
|
58 |
+
GRADIO_ALLOW_FLAGGING=never \
|
59 |
+
GRADIO_NUM_PORTS=1 \
|
60 |
+
GRADIO_SERVER_NAME=0.0.0.0 \
|
61 |
+
GRADIO_THEME=huggingface \
|
62 |
+
SYSTEM=spaces
|
63 |
+
CMD ["python", "app_fatezero.py"]
|
FollowYourPose
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Subproject commit 40a333f7f1e3940903916c419b725a4a17f348a1
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2022 hysts
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
app_followyourpose.py
ADDED
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
|
3 |
+
from __future__ import annotations
|
4 |
+
|
5 |
+
import os
|
6 |
+
|
7 |
+
import gradio as gr
|
8 |
+
|
9 |
+
from inference_followyourpose import merge_config_then_run
|
10 |
+
|
11 |
+
|
12 |
+
# TITLE = '# [FateZero](http://fate-zero-edit.github.io/)'
|
13 |
+
HF_TOKEN = os.getenv('HF_TOKEN')
|
14 |
+
# pipe = InferencePipeline(HF_TOKEN)
|
15 |
+
pipe = merge_config_then_run()
|
16 |
+
# app = InferenceUtil(HF_TOKEN)
|
17 |
+
|
18 |
+
with gr.Blocks(css='style.css') as demo:
|
19 |
+
# gr.Markdown(TITLE)
|
20 |
+
|
21 |
+
gr.HTML(
|
22 |
+
"""
|
23 |
+
<div style="text-align: center; max-width: 1200px; margin: 20px auto;">
|
24 |
+
<h1 style="font-weight: 900; font-size: 2rem; margin: 0rem">
|
25 |
+
🕺🕺🕺 Follow Your Pose 💃💃💃 </font></center> <br> <center>Pose-Guided Text-to-Video Generation using Pose-Free Videos
|
26 |
+
</h1>
|
27 |
+
<h2 style="font-weight: 450; font-size: 1rem; margin: 0rem">
|
28 |
+
<a href="https://mayuelala.github.io/">Yue Ma*</a>
|
29 |
+
<a href="https://github.com/YingqingHe">Yingqing He*</a> , <a href="http://vinthony.github.io/">Xiaodong Cun</a>,
|
30 |
+
<a href="https://xinntao.github.io/"> Xintao Wang </a>,
|
31 |
+
<a href="https://scholar.google.com/citations?user=4oXBp9UAAAAJ&hl=zh-CN">Ying Shan</a>,
|
32 |
+
<a href="https://scholar.google.com/citations?user=Xrh1OIUAAAAJ&hl=zh-CN">Xiu Li</a>,
|
33 |
+
<a href="http://cqf.io">Qifeng Chen</a>
|
34 |
+
</h2>
|
35 |
+
|
36 |
+
<h2 style="font-weight: 450; font-size: 1rem; margin: 0rem">
|
37 |
+
<span class="link-block">
|
38 |
+
[<a href="https://arxiv.org/abs/2304.01186" target="_blank"
|
39 |
+
class="external-link ">
|
40 |
+
<span class="icon">
|
41 |
+
<i class="ai ai-arxiv"></i>
|
42 |
+
</span>
|
43 |
+
<span>arXiv</span>
|
44 |
+
</a>]
|
45 |
+
</span>
|
46 |
+
|
47 |
+
<!-- Github link -->
|
48 |
+
<span class="link-block">
|
49 |
+
[<a href="https://github.com/mayuelala/FollowYourPose" target="_blank"
|
50 |
+
class="external-link ">
|
51 |
+
<span class="icon">
|
52 |
+
<i class="fab fa-github"></i>
|
53 |
+
</span>
|
54 |
+
<span>Code</span>
|
55 |
+
</a>]
|
56 |
+
</span>
|
57 |
+
|
58 |
+
<!-- Github link -->
|
59 |
+
<span class="link-block">
|
60 |
+
[<a href="https://follow-your-pose.github.io/" target="_blank"
|
61 |
+
class="external-link ">
|
62 |
+
<span class="icon">
|
63 |
+
<i class="fab fa-github"></i>
|
64 |
+
</span>
|
65 |
+
<span>Homepage</span>
|
66 |
+
</a>]
|
67 |
+
</span>
|
68 |
+
</h2>
|
69 |
+
<h2 style="font-weight: 450; font-size: 1rem; margin-top: 0.5rem; margin-bottom: 0.5rem">
|
70 |
+
TL;DR: We tune 2D stable-diffusion to generate the character videos from pose and text description.
|
71 |
+
</h2>
|
72 |
+
</div>
|
73 |
+
""")
|
74 |
+
|
75 |
+
|
76 |
+
gr.HTML("""
|
77 |
+
<p>Alternatively, try our GitHub <a href=https://github.com/mayuelala/FollowYourPose> code </a> on your GPU.
|
78 |
+
</p>""")
|
79 |
+
|
80 |
+
with gr.Row():
|
81 |
+
with gr.Column():
|
82 |
+
with gr.Accordion('Input Video', open=True):
|
83 |
+
# user_input_video = gr.File(label='Input Source Video')
|
84 |
+
user_input_video = gr.Video(label='Input Source Video', source='upload', type='numpy', format="mp4", visible=True).style(height="auto")
|
85 |
+
with gr.Accordion('Temporal Crop offset and Sampling Stride', open=False):
|
86 |
+
n_sample_frame = gr.Slider(label='Number of Frames',
|
87 |
+
minimum=0,
|
88 |
+
maximum=32,
|
89 |
+
step=1,
|
90 |
+
value=8)
|
91 |
+
stride = gr.Slider(label='Temporal stride',
|
92 |
+
minimum=0,
|
93 |
+
maximum=20,
|
94 |
+
step=1,
|
95 |
+
value=1)
|
96 |
+
start_sample_frame = gr.Number(label='Start frame in the video',
|
97 |
+
value=0,
|
98 |
+
precision=0)
|
99 |
+
|
100 |
+
with gr.Accordion('Spatial Crop offset', open=False):
|
101 |
+
left_crop = gr.Number(label='Left crop',
|
102 |
+
value=0,
|
103 |
+
precision=0)
|
104 |
+
right_crop = gr.Number(label='Right crop',
|
105 |
+
value=0,
|
106 |
+
precision=0)
|
107 |
+
top_crop = gr.Number(label='Top crop',
|
108 |
+
value=0,
|
109 |
+
precision=0)
|
110 |
+
bottom_crop = gr.Number(label='Bottom crop',
|
111 |
+
value=0,
|
112 |
+
precision=0)
|
113 |
+
offset_list = [
|
114 |
+
left_crop,
|
115 |
+
right_crop,
|
116 |
+
top_crop,
|
117 |
+
bottom_crop,
|
118 |
+
]
|
119 |
+
|
120 |
+
ImageSequenceDataset_list = [
|
121 |
+
start_sample_frame,
|
122 |
+
n_sample_frame,
|
123 |
+
stride
|
124 |
+
] + offset_list
|
125 |
+
|
126 |
+
# model_id = gr.Dropdown(
|
127 |
+
# label='Model ID',
|
128 |
+
# choices=[
|
129 |
+
# 'CompVis/stable-diffusion-v1-4',
|
130 |
+
# # add shape editing ckpt here
|
131 |
+
# ],
|
132 |
+
# value='CompVis/stable-diffusion-v1-4')
|
133 |
+
|
134 |
+
|
135 |
+
with gr.Accordion('Text Prompt', open=True):
|
136 |
+
|
137 |
+
# source_prompt = gr.Textbox(label='Source Prompt',
|
138 |
+
# info='A good prompt describes each frame and most objects in video. Especially, it has the object or attribute that we want to edit or preserve.',
|
139 |
+
# max_lines=1,
|
140 |
+
# placeholder='Example: "a silver jeep driving down a curvy road in the countryside"',
|
141 |
+
# value='a silver jeep driving down a curvy road in the countryside')
|
142 |
+
target_prompt = gr.Textbox(label='Target Prompt',
|
143 |
+
info='A reasonable composition of video may achieve better results(e.g., "sunflower" video with "Van Gogh" prompt is better than "sunflower" with "Monet")',
|
144 |
+
max_lines=1,
|
145 |
+
placeholder='Example: "watercolor painting of a silver jeep driving down a curvy road in the countryside"',
|
146 |
+
value='watercolor painting of a silver jeep driving down a curvy road in the countryside')
|
147 |
+
|
148 |
+
|
149 |
+
|
150 |
+
|
151 |
+
|
152 |
+
run_button = gr.Button('Generate')
|
153 |
+
|
154 |
+
with gr.Column():
|
155 |
+
result = gr.Video(label='Result')
|
156 |
+
# result.style(height=512, width=512)
|
157 |
+
# with gr.Accordion('FateZero Parameters for attention fusing', open=True):
|
158 |
+
# cross_replace_steps = gr.Slider(label='Cross-att replace steps',
|
159 |
+
# info='More steps, replace more cross attention to preserve semantic layout.',
|
160 |
+
# minimum=0.0,
|
161 |
+
# maximum=1.0,
|
162 |
+
# step=0.1,
|
163 |
+
# value=0.7)
|
164 |
+
|
165 |
+
# self_replace_steps = gr.Slider(label='Self-att replace steps',
|
166 |
+
# info='More steps, replace more spatial-temporal self-attention to preserve geometry and motion.',
|
167 |
+
# minimum=0.0,
|
168 |
+
# maximum=1.0,
|
169 |
+
# step=0.1,
|
170 |
+
# value=0.7)
|
171 |
+
|
172 |
+
# enhance_words = gr.Textbox(label='Enhanced words',
|
173 |
+
# info='Amplify the target-words cross attention',
|
174 |
+
# max_lines=1,
|
175 |
+
# placeholder='Example: "watercolor "',
|
176 |
+
# value='watercolor')
|
177 |
+
|
178 |
+
# enhance_words_value = gr.Slider(label='Target cross-att amplification',
|
179 |
+
# info='larger value, more elements of target words',
|
180 |
+
# minimum=0.0,
|
181 |
+
# maximum=20.0,
|
182 |
+
# step=1,
|
183 |
+
# value=10)
|
184 |
+
with gr.Accordion('DDIM Parameters', open=True):
|
185 |
+
num_steps = gr.Slider(label='Number of Steps',
|
186 |
+
info='larger value has better editing capacity, but takes more time and memory.',
|
187 |
+
minimum=0,
|
188 |
+
maximum=50,
|
189 |
+
step=1,
|
190 |
+
value=50)
|
191 |
+
guidance_scale = gr.Slider(label='CFG Scale',
|
192 |
+
minimum=0,
|
193 |
+
maximum=50,
|
194 |
+
step=0.1,
|
195 |
+
value=12.5)
|
196 |
+
with gr.Row():
|
197 |
+
from example import style_example
|
198 |
+
examples = style_example
|
199 |
+
|
200 |
+
|
201 |
+
inputs = [
|
202 |
+
user_input_video,
|
203 |
+
target_prompt,
|
204 |
+
num_steps,
|
205 |
+
guidance_scale,
|
206 |
+
*ImageSequenceDataset_list
|
207 |
+
]
|
208 |
+
target_prompt.submit(fn=pipe.run, inputs=inputs, outputs=result)
|
209 |
+
run_button.click(fn=pipe.run, inputs=inputs, outputs=result)
|
210 |
+
|
211 |
+
demo.queue().launch(share=False, server_name='0.0.0.0', server_port=80)
|
docs/OpenSans-Regular.ttf
ADDED
Binary file (148 kB). View file
|
|
example.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
num_steps = 30
|
2 |
+
style_example = [
|
3 |
+
[
|
4 |
+
'CompVis/stable-diffusion-v1-4',
|
5 |
+
'FateZero/data/teaser_car-turn.mp4',
|
6 |
+
'a silver jeep driving down a curvy road in the countryside',
|
7 |
+
'watercolor painting of a silver jeep driving down a curvy road in the countryside',
|
8 |
+
0.8,
|
9 |
+
0.8,
|
10 |
+
"watercolor",
|
11 |
+
10,
|
12 |
+
num_steps,
|
13 |
+
7.5,
|
14 |
+
# input video argument
|
15 |
+
None, 0, 8, 1, 0,0,0,0
|
16 |
+
|
17 |
+
]
|
18 |
+
# [
|
19 |
+
# 'CompVis/stable-diffusion-v1-4',
|
20 |
+
# 'FateZero/data/style/sunflower.mp4',
|
21 |
+
# 'a yellow sunflower',
|
22 |
+
# 'van gogh style painting of a yellow sunflower',
|
23 |
+
# 0.5,
|
24 |
+
# 0.5,
|
25 |
+
# 'van gogh',
|
26 |
+
# 10,
|
27 |
+
# num_steps,
|
28 |
+
# 7.5,
|
29 |
+
# None, 0, 8, 1, 0,0,0,0
|
30 |
+
# ],
|
31 |
+
# [
|
32 |
+
# 'CompVis/stable-diffusion-v1-4',
|
33 |
+
# 'FateZero/data/style/surf.mp4',
|
34 |
+
# 'a man with round helmet surfing on a white wave in blue ocean with a rope',
|
35 |
+
# 'The Ukiyo-e style painting of a man with round helmet surfing on a white wave in blue ocean with a rope',
|
36 |
+
# 0.9,
|
37 |
+
# 0.9,
|
38 |
+
# 'Ukiyo-e',
|
39 |
+
# 10,
|
40 |
+
# num_steps,
|
41 |
+
# 7.5,
|
42 |
+
# None, 0, 8, 1, 0,0,0,0
|
43 |
+
# ],
|
44 |
+
# [
|
45 |
+
# 'CompVis/stable-diffusion-v1-4',
|
46 |
+
# 'FateZero/data/style/train.mp4',
|
47 |
+
# 'a train traveling down tracks next to a forest filled with trees and flowers and a man on the side of the track',
|
48 |
+
# 'a train traveling down tracks next to a forest filled with trees and flowers and a man on the side of the track Makoto Shinkai style',
|
49 |
+
# 0.9,
|
50 |
+
# 0.9,
|
51 |
+
# 'Makoto Shinkai',
|
52 |
+
# 10,
|
53 |
+
# num_steps,
|
54 |
+
# 7.5,
|
55 |
+
# None, 0, 8, 28, 0,0,0,0
|
56 |
+
# ],
|
57 |
+
|
58 |
+
# [
|
59 |
+
# 'CompVis/stable-diffusion-v1-4',
|
60 |
+
# 'FateZero/data/attribute/swan_swarov.mp4',
|
61 |
+
# 'a black swan with a red beak swimming in a river near a wall and bushes',
|
62 |
+
# 'a Swarovski crystal swan with a red beak swimming in a river near a wall and bushes',
|
63 |
+
# 0.8,
|
64 |
+
# 0.6,
|
65 |
+
# 'Swarovski crystal',
|
66 |
+
# 10,
|
67 |
+
# num_steps,
|
68 |
+
# 7.5,
|
69 |
+
# None, 0, 8, 1, 0,0,0,0
|
70 |
+
# ],
|
71 |
+
# [
|
72 |
+
# 'CompVis/stable-diffusion-v1-4',
|
73 |
+
# 'FateZero/data/attribute/squirrel_carrot.mp4',
|
74 |
+
# 'A squirrel is eating a carrot',
|
75 |
+
# 'A rabbit is eating a eggplant',
|
76 |
+
# 0.5,
|
77 |
+
# 0.5,
|
78 |
+
# 'rabbit eggplant',
|
79 |
+
# 10,
|
80 |
+
# num_steps,
|
81 |
+
# 7.5,
|
82 |
+
# None, 0, 8, 1, 0,0,0,0
|
83 |
+
# ],
|
84 |
+
|
85 |
+
]
|
inference_followyourpose.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from FollowYourPose.test_followyourpose import *
|
3 |
+
|
4 |
+
import copy
|
5 |
+
import gradio as gr
|
6 |
+
from transformers import AutoTokenizer, CLIPTextModel
|
7 |
+
|
8 |
+
|
9 |
+
def get_time_string() -> str:
|
10 |
+
x = datetime.datetime.now()
|
11 |
+
return f"{(x.year - 2000):02d}{x.month:02d}{x.day:02d}-{x.hour:02d}{x.minute:02d}{x.second:02d}"
|
12 |
+
|
13 |
+
|
14 |
+
class merge_config_then_run():
|
15 |
+
def __init__(self) -> None:
|
16 |
+
# Load the tokenizer
|
17 |
+
# pretrained_model_path = 'FateZero/ckpt/stable-diffusion-v1-4'
|
18 |
+
self.tokenizer = None
|
19 |
+
self.text_encoder = None
|
20 |
+
self.vae = None
|
21 |
+
self.unet = None
|
22 |
+
|
23 |
+
# cache_ckpt = False
|
24 |
+
# if cache_ckpt:
|
25 |
+
# self.tokenizer = AutoTokenizer.from_pretrained(
|
26 |
+
# pretrained_model_path,
|
27 |
+
# # 'FateZero/ckpt/stable-diffusion-v1-4',
|
28 |
+
# subfolder="tokenizer",
|
29 |
+
# use_fast=False,
|
30 |
+
# )
|
31 |
+
|
32 |
+
# # Load models and create wrapper for stable diffusion
|
33 |
+
# self.text_encoder = CLIPTextModel.from_pretrained(
|
34 |
+
# pretrained_model_path,
|
35 |
+
# subfolder="text_encoder",
|
36 |
+
# )
|
37 |
+
|
38 |
+
# self.vae = AutoencoderKL.from_pretrained(
|
39 |
+
# pretrained_model_path,
|
40 |
+
# subfolder="vae",
|
41 |
+
# )
|
42 |
+
# model_config = {
|
43 |
+
# "lora": 160,
|
44 |
+
# # temporal_downsample_time: 4
|
45 |
+
# "SparseCausalAttention_index": ['mid'],
|
46 |
+
# "least_sc_channel": 640
|
47 |
+
# }
|
48 |
+
# self.unet = UNetPseudo3DConditionModel.from_2d_model(
|
49 |
+
# os.path.join(pretrained_model_path, "unet"), model_config=model_config
|
50 |
+
# )
|
51 |
+
|
52 |
+
def run(
|
53 |
+
self,
|
54 |
+
data_path,
|
55 |
+
target_prompt,
|
56 |
+
num_steps,
|
57 |
+
guidance_scale,
|
58 |
+
user_input_video=None,
|
59 |
+
start_sample_frame=0,
|
60 |
+
n_sample_frame=8,
|
61 |
+
stride=1,
|
62 |
+
left_crop=0,
|
63 |
+
right_crop=0,
|
64 |
+
top_crop=0,
|
65 |
+
bottom_crop=0,
|
66 |
+
):
|
67 |
+
default_edit_config='FollowYourPose/configs/pose_sample.yaml'
|
68 |
+
Omegadict_default_edit_config = OmegaConf.load(default_edit_config)
|
69 |
+
|
70 |
+
dataset_time_string = get_time_string()
|
71 |
+
config_now = copy.deepcopy(Omegadict_default_edit_config)
|
72 |
+
# print(f"config_now['pretrained_model_path'] = model_id {model_id}")
|
73 |
+
|
74 |
+
offset_dict = {
|
75 |
+
"left": left_crop,
|
76 |
+
"right": right_crop,
|
77 |
+
"top": top_crop,
|
78 |
+
"bottom": bottom_crop,
|
79 |
+
}
|
80 |
+
ImageSequenceDataset_dict = {
|
81 |
+
"start_sample_frame" : start_sample_frame,
|
82 |
+
"n_sample_frame" : n_sample_frame,
|
83 |
+
"sampling_rate" : stride,
|
84 |
+
"offset": offset_dict,
|
85 |
+
}
|
86 |
+
config_now['validation_data'].update(ImageSequenceDataset_dict)
|
87 |
+
if user_input_video and data_path is None:
|
88 |
+
raise gr.Error('You need to upload a video or choose a provided video')
|
89 |
+
if user_input_video is not None:
|
90 |
+
if isinstance(user_input_video, str):
|
91 |
+
config_now['validation_data']['path'] = user_input_video
|
92 |
+
elif hasattr(user_input_video, 'name') and user_input_video.name is not None:
|
93 |
+
config_now['validation_data']['path'] = user_input_video.name
|
94 |
+
config_now['validation_data']['prompts'] = [target_prompt]
|
95 |
+
# ddim config
|
96 |
+
config_now['validation_data']['guidance_scale'] = guidance_scale
|
97 |
+
config_now['validation_data']['num_inference_steps'] = num_steps
|
98 |
+
config_now['skeleton_path'] = data_path
|
99 |
+
|
100 |
+
save_path = test(**config_now)
|
101 |
+
mp4_path = save_path.replace('_0.gif', '_0_0_0.mp4')
|
102 |
+
return mp4_path
|
103 |
+
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
ffmpeg
|
requirements.txt
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file may be used to create an environment using:
|
2 |
+
# $ conda create --name <env> --file <this file>
|
3 |
+
# platform: linux-64
|
4 |
+
_libgcc_mutex=0.1=main
|
5 |
+
_openmp_mutex=5.1=1_gnu
|
6 |
+
absl-py=1.4.0=pypi_0
|
7 |
+
accelerate=0.16.0=pypi_0
|
8 |
+
aiofiles=23.1.0=pypi_0
|
9 |
+
aiohttp=3.8.4=pypi_0
|
10 |
+
aiosignal=1.3.1=pypi_0
|
11 |
+
altair=4.2.2=pypi_0
|
12 |
+
antlr4-python3-runtime=4.9.3=pypi_0
|
13 |
+
anyio=3.6.2=pypi_0
|
14 |
+
async-timeout=4.0.2=pypi_0
|
15 |
+
attrs=22.2.0=pypi_0
|
16 |
+
av=10.0.0=pypi_0
|
17 |
+
bitsandbytes=0.35.4=pypi_0
|
18 |
+
ca-certificates=2023.01.10=h06a4308_0
|
19 |
+
cachetools=5.3.0=pypi_0
|
20 |
+
certifi=2022.12.7=py38h06a4308_0
|
21 |
+
charset-normalizer=3.0.1=pypi_0
|
22 |
+
click=8.1.3=pypi_0
|
23 |
+
contourpy=1.0.7=pypi_0
|
24 |
+
cycler=0.11.0=pypi_0
|
25 |
+
decord=0.6.0=pypi_0
|
26 |
+
diffusers=0.11.1=pypi_0
|
27 |
+
einops=0.6.0=pypi_0
|
28 |
+
entrypoints=0.4=pypi_0
|
29 |
+
fastapi=0.95.0=pypi_0
|
30 |
+
ffmpy=0.3.0=pypi_0
|
31 |
+
filelock=3.9.0=pypi_0
|
32 |
+
fonttools=4.39.3=pypi_0
|
33 |
+
frozenlist=1.3.3=pypi_0
|
34 |
+
fsspec=2023.3.0=pypi_0
|
35 |
+
ftfy=6.1.1=pypi_0
|
36 |
+
google-auth=2.16.1=pypi_0
|
37 |
+
google-auth-oauthlib=0.4.6=pypi_0
|
38 |
+
gradio=3.24.1=pypi_0
|
39 |
+
gradio-client=0.0.7=pypi_0
|
40 |
+
grpcio=1.51.3=pypi_0
|
41 |
+
h11=0.14.0=pypi_0
|
42 |
+
httpcore=0.16.3=pypi_0
|
43 |
+
httpx=0.23.3=pypi_0
|
44 |
+
huggingface-hub=0.13.3=pypi_0
|
45 |
+
idna=3.4=pypi_0
|
46 |
+
imageio=2.25.1=pypi_0
|
47 |
+
importlib-metadata=6.0.0=pypi_0
|
48 |
+
importlib-resources=5.12.0=pypi_0
|
49 |
+
jinja2=3.1.2=pypi_0
|
50 |
+
jsonschema=4.17.3=pypi_0
|
51 |
+
kiwisolver=1.4.4=pypi_0
|
52 |
+
ld_impl_linux-64=2.38=h1181459_1
|
53 |
+
libffi=3.4.2=h6a678d5_6
|
54 |
+
libgcc-ng=11.2.0=h1234567_1
|
55 |
+
libgomp=11.2.0=h1234567_1
|
56 |
+
libstdcxx-ng=11.2.0=h1234567_1
|
57 |
+
linkify-it-py=2.0.0=pypi_0
|
58 |
+
markdown=3.4.1=pypi_0
|
59 |
+
markdown-it-py=2.2.0=pypi_0
|
60 |
+
markupsafe=2.1.2=pypi_0
|
61 |
+
matplotlib=3.7.1=pypi_0
|
62 |
+
mdit-py-plugins=0.3.3=pypi_0
|
63 |
+
mdurl=0.1.2=pypi_0
|
64 |
+
modelcards=0.1.6=pypi_0
|
65 |
+
multidict=6.0.4=pypi_0
|
66 |
+
mypy-extensions=1.0.0=pypi_0
|
67 |
+
ncurses=6.4=h6a678d5_0
|
68 |
+
numpy=1.24.2=pypi_0
|
69 |
+
nvidia-cublas-cu11=11.10.3.66=pypi_0
|
70 |
+
nvidia-cuda-nvrtc-cu11=11.7.99=pypi_0
|
71 |
+
nvidia-cuda-runtime-cu11=11.7.99=pypi_0
|
72 |
+
nvidia-cudnn-cu11=8.5.0.96=pypi_0
|
73 |
+
oauthlib=3.2.2=pypi_0
|
74 |
+
omegaconf=2.3.0=pypi_0
|
75 |
+
opencv-python=4.7.0.72=pypi_0
|
76 |
+
openssl=1.1.1t=h7f8727e_0
|
77 |
+
orjson=3.8.9=pypi_0
|
78 |
+
packaging=23.0=pypi_0
|
79 |
+
pandas=1.5.3=pypi_0
|
80 |
+
pillow=9.4.0=pypi_0
|
81 |
+
pip=22.3.1=py38h06a4308_0
|
82 |
+
pkgutil-resolve-name=1.3.10=pypi_0
|
83 |
+
protobuf=4.22.0=pypi_0
|
84 |
+
psutil=5.9.4=pypi_0
|
85 |
+
pyasn1=0.4.8=pypi_0
|
86 |
+
pyasn1-modules=0.2.8=pypi_0
|
87 |
+
pydantic=1.10.7=pypi_0
|
88 |
+
pydub=0.25.1=pypi_0
|
89 |
+
pyparsing=3.0.9=pypi_0
|
90 |
+
pyre-extensions=0.0.23=pypi_0
|
91 |
+
pyrsistent=0.19.3=pypi_0
|
92 |
+
python=3.8.16=h7a1cb2a_2
|
93 |
+
python-dateutil=2.8.2=pypi_0
|
94 |
+
python-multipart=0.0.6=pypi_0
|
95 |
+
pytz=2022.7.1=pypi_0
|
96 |
+
pyyaml=6.0=pypi_0
|
97 |
+
readline=8.2=h5eee18b_0
|
98 |
+
regex=2022.10.31=pypi_0
|
99 |
+
requests=2.28.2=pypi_0
|
100 |
+
requests-oauthlib=1.3.1=pypi_0
|
101 |
+
rfc3986=1.5.0=pypi_0
|
102 |
+
rsa=4.9=pypi_0
|
103 |
+
semantic-version=2.10.0=pypi_0
|
104 |
+
setuptools=65.6.3=py38h06a4308_0
|
105 |
+
six=1.16.0=pypi_0
|
106 |
+
sniffio=1.3.0=pypi_0
|
107 |
+
sqlite=3.40.1=h5082296_0
|
108 |
+
starlette=0.26.1=pypi_0
|
109 |
+
tensorboard=2.12.0=pypi_0
|
110 |
+
tensorboard-data-server=0.7.0=pypi_0
|
111 |
+
tensorboard-plugin-wit=1.8.1=pypi_0
|
112 |
+
tk=8.6.12=h1ccaba5_0
|
113 |
+
tokenizers=0.13.2=pypi_0
|
114 |
+
toolz=0.12.0=pypi_0
|
115 |
+
torch=1.13.1=pypi_0
|
116 |
+
torchvision=0.14.1=pypi_0
|
117 |
+
tqdm=4.64.1=pypi_0
|
118 |
+
transformers=4.26.1=pypi_0
|
119 |
+
triton=1.1.1=pypi_0
|
120 |
+
typing-extensions=4.5.0=pypi_0
|
121 |
+
typing-inspect=0.8.0=pypi_0
|
122 |
+
uc-micro-py=1.0.1=pypi_0
|
123 |
+
urllib3=1.26.14=pypi_0
|
124 |
+
uvicorn=0.21.1=pypi_0
|
125 |
+
wcwidth=0.2.6=pypi_0
|
126 |
+
websockets=11.0=pypi_0
|
127 |
+
werkzeug=2.2.3=pypi_0
|
128 |
+
wheel=0.38.4=py38h06a4308_0
|
129 |
+
xformers=0.0.16=pypi_0
|
130 |
+
xz=5.2.10=h5eee18b_1
|
131 |
+
yarl=1.8.2=pypi_0
|
132 |
+
zipp=3.14.0=pypi_0
|
133 |
+
zlib=1.2.13=h5eee18b_0
|
style.css
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
h1 {
|
2 |
+
text-align: center;
|
3 |
+
}
|