Spaces:
Runtime error
Runtime error
mayuema
commited on
Commit
•
3a8c535
1
Parent(s):
0754e7d
first release
Browse files- app_followyourpose.py → app.py +1 -38
- inference_followyourpose.py +6 -31
app_followyourpose.py → app.py
RENAMED
@@ -9,15 +9,10 @@ import gradio as gr
|
|
9 |
from inference_followyourpose import merge_config_then_run
|
10 |
|
11 |
|
12 |
-
# TITLE = '# [FateZero](http://fate-zero-edit.github.io/)'
|
13 |
HF_TOKEN = os.getenv('HF_TOKEN')
|
14 |
-
# pipe = InferencePipeline(HF_TOKEN)
|
15 |
pipe = merge_config_then_run()
|
16 |
-
# app = InferenceUtil(HF_TOKEN)
|
17 |
|
18 |
with gr.Blocks(css='style.css') as demo:
|
19 |
-
# gr.Markdown(TITLE)
|
20 |
-
|
21 |
gr.HTML(
|
22 |
"""
|
23 |
<div style="text-align: center; max-width: 1200px; margin: 20px auto;">
|
@@ -134,11 +129,6 @@ with gr.Blocks(css='style.css') as demo:
|
|
134 |
|
135 |
with gr.Accordion('Text Prompt', open=True):
|
136 |
|
137 |
-
# source_prompt = gr.Textbox(label='Source Prompt',
|
138 |
-
# info='A good prompt describes each frame and most objects in video. Especially, it has the object or attribute that we want to edit or preserve.',
|
139 |
-
# max_lines=1,
|
140 |
-
# placeholder='Example: "a silver jeep driving down a curvy road in the countryside"',
|
141 |
-
# value='a silver jeep driving down a curvy road in the countryside')
|
142 |
target_prompt = gr.Textbox(label='Target Prompt',
|
143 |
info='A reasonable composition of video may achieve better results(e.g., "sunflower" video with "Van Gogh" prompt is better than "sunflower" with "Monet")',
|
144 |
max_lines=1,
|
@@ -154,33 +144,6 @@ with gr.Blocks(css='style.css') as demo:
|
|
154 |
with gr.Column():
|
155 |
result = gr.Video(label='Result')
|
156 |
# result.style(height=512, width=512)
|
157 |
-
# with gr.Accordion('FateZero Parameters for attention fusing', open=True):
|
158 |
-
# cross_replace_steps = gr.Slider(label='Cross-att replace steps',
|
159 |
-
# info='More steps, replace more cross attention to preserve semantic layout.',
|
160 |
-
# minimum=0.0,
|
161 |
-
# maximum=1.0,
|
162 |
-
# step=0.1,
|
163 |
-
# value=0.7)
|
164 |
-
|
165 |
-
# self_replace_steps = gr.Slider(label='Self-att replace steps',
|
166 |
-
# info='More steps, replace more spatial-temporal self-attention to preserve geometry and motion.',
|
167 |
-
# minimum=0.0,
|
168 |
-
# maximum=1.0,
|
169 |
-
# step=0.1,
|
170 |
-
# value=0.7)
|
171 |
-
|
172 |
-
# enhance_words = gr.Textbox(label='Enhanced words',
|
173 |
-
# info='Amplify the target-words cross attention',
|
174 |
-
# max_lines=1,
|
175 |
-
# placeholder='Example: "watercolor "',
|
176 |
-
# value='watercolor')
|
177 |
-
|
178 |
-
# enhance_words_value = gr.Slider(label='Target cross-att amplification',
|
179 |
-
# info='larger value, more elements of target words',
|
180 |
-
# minimum=0.0,
|
181 |
-
# maximum=20.0,
|
182 |
-
# step=1,
|
183 |
-
# value=10)
|
184 |
with gr.Accordion('DDIM Parameters', open=True):
|
185 |
num_steps = gr.Slider(label='Number of Steps',
|
186 |
info='larger value has better editing capacity, but takes more time and memory.',
|
@@ -208,4 +171,4 @@ with gr.Blocks(css='style.css') as demo:
|
|
208 |
target_prompt.submit(fn=pipe.run, inputs=inputs, outputs=result)
|
209 |
run_button.click(fn=pipe.run, inputs=inputs, outputs=result)
|
210 |
|
211 |
-
demo.queue().launch(
|
|
|
9 |
from inference_followyourpose import merge_config_then_run
|
10 |
|
11 |
|
|
|
12 |
HF_TOKEN = os.getenv('HF_TOKEN')
|
|
|
13 |
pipe = merge_config_then_run()
|
|
|
14 |
|
15 |
with gr.Blocks(css='style.css') as demo:
|
|
|
|
|
16 |
gr.HTML(
|
17 |
"""
|
18 |
<div style="text-align: center; max-width: 1200px; margin: 20px auto;">
|
|
|
129 |
|
130 |
with gr.Accordion('Text Prompt', open=True):
|
131 |
|
|
|
|
|
|
|
|
|
|
|
132 |
target_prompt = gr.Textbox(label='Target Prompt',
|
133 |
info='A reasonable composition of video may achieve better results(e.g., "sunflower" video with "Van Gogh" prompt is better than "sunflower" with "Monet")',
|
134 |
max_lines=1,
|
|
|
144 |
with gr.Column():
|
145 |
result = gr.Video(label='Result')
|
146 |
# result.style(height=512, width=512)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
with gr.Accordion('DDIM Parameters', open=True):
|
148 |
num_steps = gr.Slider(label='Number of Steps',
|
149 |
info='larger value has better editing capacity, but takes more time and memory.',
|
|
|
171 |
target_prompt.submit(fn=pipe.run, inputs=inputs, outputs=result)
|
172 |
run_button.click(fn=pipe.run, inputs=inputs, outputs=result)
|
173 |
|
174 |
+
demo.queue().launch()
|
inference_followyourpose.py
CHANGED
@@ -14,41 +14,16 @@ def get_time_string() -> str:
|
|
14 |
class merge_config_then_run():
|
15 |
def __init__(self) -> None:
|
16 |
# Load the tokenizer
|
17 |
-
# pretrained_model_path = 'FateZero/ckpt/stable-diffusion-v1-4'
|
18 |
self.tokenizer = None
|
19 |
self.text_encoder = None
|
20 |
self.vae = None
|
21 |
self.unet = None
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
# subfolder="tokenizer",
|
29 |
-
# use_fast=False,
|
30 |
-
# )
|
31 |
-
|
32 |
-
# # Load models and create wrapper for stable diffusion
|
33 |
-
# self.text_encoder = CLIPTextModel.from_pretrained(
|
34 |
-
# pretrained_model_path,
|
35 |
-
# subfolder="text_encoder",
|
36 |
-
# )
|
37 |
-
|
38 |
-
# self.vae = AutoencoderKL.from_pretrained(
|
39 |
-
# pretrained_model_path,
|
40 |
-
# subfolder="vae",
|
41 |
-
# )
|
42 |
-
# model_config = {
|
43 |
-
# "lora": 160,
|
44 |
-
# # temporal_downsample_time: 4
|
45 |
-
# "SparseCausalAttention_index": ['mid'],
|
46 |
-
# "least_sc_channel": 640
|
47 |
-
# }
|
48 |
-
# self.unet = UNetPseudo3DConditionModel.from_2d_model(
|
49 |
-
# os.path.join(pretrained_model_path, "unet"), model_config=model_config
|
50 |
-
# )
|
51 |
-
|
52 |
def run(
|
53 |
self,
|
54 |
data_path,
|
@@ -64,12 +39,12 @@ class merge_config_then_run():
|
|
64 |
top_crop=0,
|
65 |
bottom_crop=0,
|
66 |
):
|
|
|
67 |
default_edit_config='FollowYourPose/configs/pose_sample.yaml'
|
68 |
Omegadict_default_edit_config = OmegaConf.load(default_edit_config)
|
69 |
|
70 |
dataset_time_string = get_time_string()
|
71 |
config_now = copy.deepcopy(Omegadict_default_edit_config)
|
72 |
-
# print(f"config_now['pretrained_model_path'] = model_id {model_id}")
|
73 |
|
74 |
offset_dict = {
|
75 |
"left": left_crop,
|
|
|
14 |
class merge_config_then_run():
|
15 |
def __init__(self) -> None:
|
16 |
# Load the tokenizer
|
|
|
17 |
self.tokenizer = None
|
18 |
self.text_encoder = None
|
19 |
self.vae = None
|
20 |
self.unet = None
|
21 |
|
22 |
+
def download_model(self):
|
23 |
+
REPO_ID = 'YueMafighting/FollowYourPose_v1'
|
24 |
+
hf_hub_download(repo_id=REPO_ID, local_dir='./FollowYourPose/checkpoints', local_dir_use_symlinks=False)
|
25 |
+
|
26 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
def run(
|
28 |
self,
|
29 |
data_path,
|
|
|
39 |
top_crop=0,
|
40 |
bottom_crop=0,
|
41 |
):
|
42 |
+
self.download_model()
|
43 |
default_edit_config='FollowYourPose/configs/pose_sample.yaml'
|
44 |
Omegadict_default_edit_config = OmegaConf.load(default_edit_config)
|
45 |
|
46 |
dataset_time_string = get_time_string()
|
47 |
config_now = copy.deepcopy(Omegadict_default_edit_config)
|
|
|
48 |
|
49 |
offset_dict = {
|
50 |
"left": left_crop,
|