QinOwen commited on
Commit
20a2d44
1 Parent(s): 2ad9d00
.gitignore CHANGED
@@ -7,6 +7,7 @@
7
  HPSv2
8
  **/HPSv2
9
  wandb
 
10
 
11
  # VADER-VideoCrafter
12
  VADER-VideoCrafter/.DS_Store
 
7
  HPSv2
8
  **/HPSv2
9
  wandb
10
+ gradio_cached_examples
11
 
12
  # VADER-VideoCrafter
13
  VADER-VideoCrafter/.DS_Store
VADER-VideoCrafter/scripts/main/train_t2v_lora.py CHANGED
@@ -27,7 +27,6 @@ from Core.compression_scorer import JpegCompressionScorer, jpeg_compressibility
27
  import Core.prompts as prompts_file
28
  from hpsv2.src.open_clip import create_model_and_transforms, get_tokenizer
29
  import hpsv2
30
- import bitsandbytes as bnb
31
  from accelerate import Accelerator
32
  from accelerate.utils import gather_object
33
  import torch.distributed as dist
@@ -727,7 +726,6 @@ def run_training(args, model, **kwargs):
727
 
728
  peft_model = peft.get_peft_model(model, config)
729
 
730
- peft_model.print_trainable_parameters()
731
 
732
  # load the pretrained LoRA model
733
  if args.lora_ckpt_path != "Base Model":
@@ -751,13 +749,6 @@ def run_training(args, model, **kwargs):
751
  if args.inference_only:
752
  peft_model = accelerator.prepare(peft_model)
753
 
754
-
755
- print("precision: ", peft_model.dtype)
756
- # precision of first_stage_model
757
- print("precision of first_stage_model: ", peft_model.first_stage_model.dtype)
758
- print("peft_model device: ", peft_model.device)
759
-
760
-
761
  # sample shape
762
  assert (args.height % 16 == 0) and (args.width % 16 == 0), "Error: image size [h,w] should be multiples of 16!"
763
  # latent noise shape
@@ -824,8 +815,8 @@ def run_training(args, model, **kwargs):
824
  dir_name = os.path.join(output_dir, "samples")
825
  # filenames should be related to the gpu index
826
  # get timestamps for filenames to avoid overwriting
827
- # current_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
828
- filenames = [f"temporal"] # only one sample
829
  # if dir_name is not exists, create it
830
  os.makedirs(dir_name, exist_ok=True)
831
 
@@ -855,6 +846,12 @@ def run_training(args, model, **kwargs):
855
 
856
  # video = get_videos(batch_samples)
857
 
 
 
 
 
 
 
858
  # # read the video from the saved path
859
  video_path = os.path.join(dir_name[0], filenames[0]+".mp4")
860
 
@@ -898,6 +895,16 @@ def setup_model():
898
  return model
899
 
900
 
 
 
 
 
 
 
 
 
 
 
901
  def main_fn(prompt, lora_model, lora_rank, seed=200, height=320, width=512, unconditional_guidance_scale=12, ddim_steps=25, ddim_eta=1.0,
902
  frames=24, savefps=10, model=None):
903
 
@@ -920,6 +927,7 @@ def main_fn(prompt, lora_model, lora_rank, seed=200, height=320, width=512, unco
920
  args.savefps = savefps
921
 
922
  seed_everything(args.seed)
 
923
 
924
  video_path = run_training(args, model)
925
 
 
27
  import Core.prompts as prompts_file
28
  from hpsv2.src.open_clip import create_model_and_transforms, get_tokenizer
29
  import hpsv2
 
30
  from accelerate import Accelerator
31
  from accelerate.utils import gather_object
32
  import torch.distributed as dist
 
726
 
727
  peft_model = peft.get_peft_model(model, config)
728
 
 
729
 
730
  # load the pretrained LoRA model
731
  if args.lora_ckpt_path != "Base Model":
 
749
  if args.inference_only:
750
  peft_model = accelerator.prepare(peft_model)
751
 
 
 
 
 
 
 
 
752
  # sample shape
753
  assert (args.height % 16 == 0) and (args.width % 16 == 0), "Error: image size [h,w] should be multiples of 16!"
754
  # latent noise shape
 
815
  dir_name = os.path.join(output_dir, "samples")
816
  # filenames should be related to the gpu index
817
  # get timestamps for filenames to avoid overwriting
818
+ current_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
819
+ filenames = [f"temporal_{current_time}"] # only one sample
820
  # if dir_name is not exists, create it
821
  os.makedirs(dir_name, exist_ok=True)
822
 
 
846
 
847
  # video = get_videos(batch_samples)
848
 
849
+ # if the number of the video is more than 20, remove the oldest one
850
+ if len(os.listdir(dir_name[0])) > 20:
851
+ # get the oldest video
852
+ oldest_video = min(os.listdir(dir_name[0]), key=os.path.getctime)
853
+ os.remove(os.path.join(dir_name[0], oldest_video))
854
+
855
  # # read the video from the saved path
856
  video_path = os.path.join(dir_name[0], filenames[0]+".mp4")
857
 
 
895
  return model
896
 
897
 
898
+ def seed_everything_self(TORCH_SEED):
899
+ random.seed(TORCH_SEED)
900
+ os.environ['PYTHONHASHSEED'] = str(TORCH_SEED)
901
+ np.random.seed(TORCH_SEED)
902
+ torch.manual_seed(TORCH_SEED)
903
+ torch.cuda.manual_seed_all(TORCH_SEED)
904
+ torch.backends.cudnn.deterministic = True
905
+ torch.backends.cudnn.benchmark = False
906
+
907
+
908
  def main_fn(prompt, lora_model, lora_rank, seed=200, height=320, width=512, unconditional_guidance_scale=12, ddim_steps=25, ddim_eta=1.0,
909
  frames=24, savefps=10, model=None):
910
 
 
927
  args.savefps = savefps
928
 
929
  seed_everything(args.seed)
930
+ seed_everything_self(args.seed)
931
 
932
  video_path = run_training(args, model)
933
 
app.py CHANGED
@@ -67,6 +67,8 @@ custom_css = """
67
  #centered {
68
  display: flex;
69
  justify-content: center;
 
 
70
  }
71
  .column-centered {
72
  display: flex;
@@ -119,7 +121,6 @@ with gr.Blocks(css=custom_css) as demo:
119
  text-decoration: none !important;
120
  color: black !important;
121
  }
122
-
123
  </style>
124
  <body>
125
  <div style="font-size: 1.4em; margin-bottom: 0.5em; ">
@@ -182,7 +183,7 @@ with gr.Blocks(css=custom_css) as demo:
182
  )
183
 
184
  with gr.Row(elem_id="centered"):
185
- with gr.Column(scale=0.3, elem_id="params"):
186
  lora_model = gr.Dropdown(
187
  label="VADER Model",
188
  choices=["huggingface-pickscore", "huggingface-hps-aesthetic", "Base Model"],
@@ -193,11 +194,11 @@ with gr.Blocks(css=custom_css) as demo:
193
  value="A mermaid with flowing hair and a shimmering tail discovers a hidden underwater kingdom adorned with coral palaces, glowing pearls, and schools of colorful fish, encountering both wonders and dangers along the way.")
194
  run_btn = gr.Button("Run Inference")
195
 
196
- with gr.Column(scale=0.3):
197
  output_video = gr.Video(elem_id="image-upload")
198
 
199
  with gr.Row(elem_id="centered"):
200
- with gr.Column(scale=0.6):
201
 
202
 
203
  seed = gr.Slider(minimum=0, maximum=65536, label="Seed", step = 1, value=200)
 
67
  #centered {
68
  display: flex;
69
  justify-content: center;
70
+ width: 60%;
71
+ margin: 0 auto;
72
  }
73
  .column-centered {
74
  display: flex;
 
121
  text-decoration: none !important;
122
  color: black !important;
123
  }
 
124
  </style>
125
  <body>
126
  <div style="font-size: 1.4em; margin-bottom: 0.5em; ">
 
183
  )
184
 
185
  with gr.Row(elem_id="centered"):
186
+ with gr.Column(elem_id="params"):
187
  lora_model = gr.Dropdown(
188
  label="VADER Model",
189
  choices=["huggingface-pickscore", "huggingface-hps-aesthetic", "Base Model"],
 
194
  value="A mermaid with flowing hair and a shimmering tail discovers a hidden underwater kingdom adorned with coral palaces, glowing pearls, and schools of colorful fish, encountering both wonders and dangers along the way.")
195
  run_btn = gr.Button("Run Inference")
196
 
197
+ with gr.Column():
198
  output_video = gr.Video(elem_id="image-upload")
199
 
200
  with gr.Row(elem_id="centered"):
201
+ with gr.Column():
202
 
203
 
204
  seed = gr.Slider(minimum=0, maximum=65536, label="Seed", step = 1, value=200)
requirements.txt CHANGED
@@ -13,14 +13,12 @@ tqdm>=4.66.3
13
  transformers==4.25.1
14
  moviepy==1.0.3
15
  av==12.2.0
16
- gradio
17
  timm==1.0.7
18
  scikit-learn==1.5.0
19
  open_clip_torch==2.22.0
20
  kornia==0.7.3
21
  albumentations==1.3.1
22
  peft==0.11.1
23
- bitsandbytes==0.42.0
24
  accelerate==0.31.0
25
  inflect==7.3.0
26
  wandb==0.17.3
 
13
  transformers==4.25.1
14
  moviepy==1.0.3
15
  av==12.2.0
 
16
  timm==1.0.7
17
  scikit-learn==1.5.0
18
  open_clip_torch==2.22.0
19
  kornia==0.7.3
20
  albumentations==1.3.1
21
  peft==0.11.1
 
22
  accelerate==0.31.0
23
  inflect==7.3.0
24
  wandb==0.17.3