Spaces:
Running
on
Zero
Running
on
Zero
QinOwen
commited on
Commit
•
20a2d44
1
Parent(s):
2ad9d00
fix-bug
Browse files- .gitignore +1 -0
- VADER-VideoCrafter/scripts/main/train_t2v_lora.py +19 -11
- app.py +5 -4
- requirements.txt +0 -2
.gitignore
CHANGED
@@ -7,6 +7,7 @@
|
|
7 |
HPSv2
|
8 |
**/HPSv2
|
9 |
wandb
|
|
|
10 |
|
11 |
# VADER-VideoCrafter
|
12 |
VADER-VideoCrafter/.DS_Store
|
|
|
7 |
HPSv2
|
8 |
**/HPSv2
|
9 |
wandb
|
10 |
+
gradio_cached_examples
|
11 |
|
12 |
# VADER-VideoCrafter
|
13 |
VADER-VideoCrafter/.DS_Store
|
VADER-VideoCrafter/scripts/main/train_t2v_lora.py
CHANGED
@@ -27,7 +27,6 @@ from Core.compression_scorer import JpegCompressionScorer, jpeg_compressibility
|
|
27 |
import Core.prompts as prompts_file
|
28 |
from hpsv2.src.open_clip import create_model_and_transforms, get_tokenizer
|
29 |
import hpsv2
|
30 |
-
import bitsandbytes as bnb
|
31 |
from accelerate import Accelerator
|
32 |
from accelerate.utils import gather_object
|
33 |
import torch.distributed as dist
|
@@ -727,7 +726,6 @@ def run_training(args, model, **kwargs):
|
|
727 |
|
728 |
peft_model = peft.get_peft_model(model, config)
|
729 |
|
730 |
-
peft_model.print_trainable_parameters()
|
731 |
|
732 |
# load the pretrained LoRA model
|
733 |
if args.lora_ckpt_path != "Base Model":
|
@@ -751,13 +749,6 @@ def run_training(args, model, **kwargs):
|
|
751 |
if args.inference_only:
|
752 |
peft_model = accelerator.prepare(peft_model)
|
753 |
|
754 |
-
|
755 |
-
print("precision: ", peft_model.dtype)
|
756 |
-
# precision of first_stage_model
|
757 |
-
print("precision of first_stage_model: ", peft_model.first_stage_model.dtype)
|
758 |
-
print("peft_model device: ", peft_model.device)
|
759 |
-
|
760 |
-
|
761 |
# sample shape
|
762 |
assert (args.height % 16 == 0) and (args.width % 16 == 0), "Error: image size [h,w] should be multiples of 16!"
|
763 |
# latent noise shape
|
@@ -824,8 +815,8 @@ def run_training(args, model, **kwargs):
|
|
824 |
dir_name = os.path.join(output_dir, "samples")
|
825 |
# filenames should be related to the gpu index
|
826 |
# get timestamps for filenames to avoid overwriting
|
827 |
-
|
828 |
-
filenames = [f"
|
829 |
# if dir_name is not exists, create it
|
830 |
os.makedirs(dir_name, exist_ok=True)
|
831 |
|
@@ -855,6 +846,12 @@ def run_training(args, model, **kwargs):
|
|
855 |
|
856 |
# video = get_videos(batch_samples)
|
857 |
|
|
|
|
|
|
|
|
|
|
|
|
|
858 |
# # read the video from the saved path
|
859 |
video_path = os.path.join(dir_name[0], filenames[0]+".mp4")
|
860 |
|
@@ -898,6 +895,16 @@ def setup_model():
|
|
898 |
return model
|
899 |
|
900 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
901 |
def main_fn(prompt, lora_model, lora_rank, seed=200, height=320, width=512, unconditional_guidance_scale=12, ddim_steps=25, ddim_eta=1.0,
|
902 |
frames=24, savefps=10, model=None):
|
903 |
|
@@ -920,6 +927,7 @@ def main_fn(prompt, lora_model, lora_rank, seed=200, height=320, width=512, unco
|
|
920 |
args.savefps = savefps
|
921 |
|
922 |
seed_everything(args.seed)
|
|
|
923 |
|
924 |
video_path = run_training(args, model)
|
925 |
|
|
|
27 |
import Core.prompts as prompts_file
|
28 |
from hpsv2.src.open_clip import create_model_and_transforms, get_tokenizer
|
29 |
import hpsv2
|
|
|
30 |
from accelerate import Accelerator
|
31 |
from accelerate.utils import gather_object
|
32 |
import torch.distributed as dist
|
|
|
726 |
|
727 |
peft_model = peft.get_peft_model(model, config)
|
728 |
|
|
|
729 |
|
730 |
# load the pretrained LoRA model
|
731 |
if args.lora_ckpt_path != "Base Model":
|
|
|
749 |
if args.inference_only:
|
750 |
peft_model = accelerator.prepare(peft_model)
|
751 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
752 |
# sample shape
|
753 |
assert (args.height % 16 == 0) and (args.width % 16 == 0), "Error: image size [h,w] should be multiples of 16!"
|
754 |
# latent noise shape
|
|
|
815 |
dir_name = os.path.join(output_dir, "samples")
|
816 |
# filenames should be related to the gpu index
|
817 |
# get timestamps for filenames to avoid overwriting
|
818 |
+
current_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
819 |
+
filenames = [f"temporal_{current_time}"] # only one sample
|
820 |
# if dir_name is not exists, create it
|
821 |
os.makedirs(dir_name, exist_ok=True)
|
822 |
|
|
|
846 |
|
847 |
# video = get_videos(batch_samples)
|
848 |
|
849 |
+
# if the number of the video is more than 20, remove the oldest one
|
850 |
+
if len(os.listdir(dir_name[0])) > 20:
|
851 |
+
# get the oldest video
|
852 |
+
oldest_video = min(os.listdir(dir_name[0]), key=os.path.getctime)
|
853 |
+
os.remove(os.path.join(dir_name[0], oldest_video))
|
854 |
+
|
855 |
# # read the video from the saved path
|
856 |
video_path = os.path.join(dir_name[0], filenames[0]+".mp4")
|
857 |
|
|
|
895 |
return model
|
896 |
|
897 |
|
898 |
+
def seed_everything_self(TORCH_SEED):
|
899 |
+
random.seed(TORCH_SEED)
|
900 |
+
os.environ['PYTHONHASHSEED'] = str(TORCH_SEED)
|
901 |
+
np.random.seed(TORCH_SEED)
|
902 |
+
torch.manual_seed(TORCH_SEED)
|
903 |
+
torch.cuda.manual_seed_all(TORCH_SEED)
|
904 |
+
torch.backends.cudnn.deterministic = True
|
905 |
+
torch.backends.cudnn.benchmark = False
|
906 |
+
|
907 |
+
|
908 |
def main_fn(prompt, lora_model, lora_rank, seed=200, height=320, width=512, unconditional_guidance_scale=12, ddim_steps=25, ddim_eta=1.0,
|
909 |
frames=24, savefps=10, model=None):
|
910 |
|
|
|
927 |
args.savefps = savefps
|
928 |
|
929 |
seed_everything(args.seed)
|
930 |
+
seed_everything_self(args.seed)
|
931 |
|
932 |
video_path = run_training(args, model)
|
933 |
|
app.py
CHANGED
@@ -67,6 +67,8 @@ custom_css = """
|
|
67 |
#centered {
|
68 |
display: flex;
|
69 |
justify-content: center;
|
|
|
|
|
70 |
}
|
71 |
.column-centered {
|
72 |
display: flex;
|
@@ -119,7 +121,6 @@ with gr.Blocks(css=custom_css) as demo:
|
|
119 |
text-decoration: none !important;
|
120 |
color: black !important;
|
121 |
}
|
122 |
-
|
123 |
</style>
|
124 |
<body>
|
125 |
<div style="font-size: 1.4em; margin-bottom: 0.5em; ">
|
@@ -182,7 +183,7 @@ with gr.Blocks(css=custom_css) as demo:
|
|
182 |
)
|
183 |
|
184 |
with gr.Row(elem_id="centered"):
|
185 |
-
with gr.Column(
|
186 |
lora_model = gr.Dropdown(
|
187 |
label="VADER Model",
|
188 |
choices=["huggingface-pickscore", "huggingface-hps-aesthetic", "Base Model"],
|
@@ -193,11 +194,11 @@ with gr.Blocks(css=custom_css) as demo:
|
|
193 |
value="A mermaid with flowing hair and a shimmering tail discovers a hidden underwater kingdom adorned with coral palaces, glowing pearls, and schools of colorful fish, encountering both wonders and dangers along the way.")
|
194 |
run_btn = gr.Button("Run Inference")
|
195 |
|
196 |
-
with gr.Column(
|
197 |
output_video = gr.Video(elem_id="image-upload")
|
198 |
|
199 |
with gr.Row(elem_id="centered"):
|
200 |
-
with gr.Column(
|
201 |
|
202 |
|
203 |
seed = gr.Slider(minimum=0, maximum=65536, label="Seed", step = 1, value=200)
|
|
|
67 |
#centered {
|
68 |
display: flex;
|
69 |
justify-content: center;
|
70 |
+
width: 60%;
|
71 |
+
margin: 0 auto;
|
72 |
}
|
73 |
.column-centered {
|
74 |
display: flex;
|
|
|
121 |
text-decoration: none !important;
|
122 |
color: black !important;
|
123 |
}
|
|
|
124 |
</style>
|
125 |
<body>
|
126 |
<div style="font-size: 1.4em; margin-bottom: 0.5em; ">
|
|
|
183 |
)
|
184 |
|
185 |
with gr.Row(elem_id="centered"):
|
186 |
+
with gr.Column(elem_id="params"):
|
187 |
lora_model = gr.Dropdown(
|
188 |
label="VADER Model",
|
189 |
choices=["huggingface-pickscore", "huggingface-hps-aesthetic", "Base Model"],
|
|
|
194 |
value="A mermaid with flowing hair and a shimmering tail discovers a hidden underwater kingdom adorned with coral palaces, glowing pearls, and schools of colorful fish, encountering both wonders and dangers along the way.")
|
195 |
run_btn = gr.Button("Run Inference")
|
196 |
|
197 |
+
with gr.Column():
|
198 |
output_video = gr.Video(elem_id="image-upload")
|
199 |
|
200 |
with gr.Row(elem_id="centered"):
|
201 |
+
with gr.Column():
|
202 |
|
203 |
|
204 |
seed = gr.Slider(minimum=0, maximum=65536, label="Seed", step = 1, value=200)
|
requirements.txt
CHANGED
@@ -13,14 +13,12 @@ tqdm>=4.66.3
|
|
13 |
transformers==4.25.1
|
14 |
moviepy==1.0.3
|
15 |
av==12.2.0
|
16 |
-
gradio
|
17 |
timm==1.0.7
|
18 |
scikit-learn==1.5.0
|
19 |
open_clip_torch==2.22.0
|
20 |
kornia==0.7.3
|
21 |
albumentations==1.3.1
|
22 |
peft==0.11.1
|
23 |
-
bitsandbytes==0.42.0
|
24 |
accelerate==0.31.0
|
25 |
inflect==7.3.0
|
26 |
wandb==0.17.3
|
|
|
13 |
transformers==4.25.1
|
14 |
moviepy==1.0.3
|
15 |
av==12.2.0
|
|
|
16 |
timm==1.0.7
|
17 |
scikit-learn==1.5.0
|
18 |
open_clip_torch==2.22.0
|
19 |
kornia==0.7.3
|
20 |
albumentations==1.3.1
|
21 |
peft==0.11.1
|
|
|
22 |
accelerate==0.31.0
|
23 |
inflect==7.3.0
|
24 |
wandb==0.17.3
|