Hugo Flores Garcia commited on
Commit
e4e3c4e
1 Parent(s): a63cce0
requirements.txt CHANGED
@@ -2,7 +2,7 @@ argbind>=0.3.1
2
  pytorch-ignite
3
  rich
4
  audiotools @ git+https://github.com/descriptinc/lyrebird-audiotools.git@hf/backup-info
5
- lac @ git+https://github.com/descriptinc/lyrebird-audio-codec.git@main
6
  tqdm
7
  tensorboard
8
  google-cloud-logging==2.2.0
 
2
  pytorch-ignite
3
  rich
4
  audiotools @ git+https://github.com/descriptinc/lyrebird-audiotools.git@hf/backup-info
5
+ lac @ git+https://github.com/descriptinc/lyrebird-audio-codec.git@hf/vampnet-temp
6
  tqdm
7
  tensorboard
8
  google-cloud-logging==2.2.0
scripts/exp/train.py CHANGED
@@ -545,6 +545,30 @@ def train(
545
  plot_fn=None,
546
  )
547
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
548
  def save_imputation(self, z: torch.Tensor):
549
  # imputations
550
  _prefix_amt = prefix_amt
 
545
  plot_fn=None,
546
  )
547
 
548
+ # sample in 1 step (only for coarse2fine models)
549
+ if accel.unwrap(model).n_conditioning_codebooks > 0:
550
+ sampled_argmax = accel.unwrap(model).sample(
551
+ codec=codec,
552
+ time_steps=z.shape[-1],
553
+ start_tokens=z[i : i + 1],
554
+ sample="argmax",
555
+ sampling_steps=1,
556
+ )
557
+ sampled_argmax.cpu().write_audio_to_tb(
558
+ f"sampled_1step-argmax/{i}",
559
+ self.writer,
560
+ step=self.state.epoch,
561
+ plot_fn=None,
562
+ )
563
+ conditioning = z[i:i+1, : accel.unwrap(model).n_conditioning_codebooks, :]
564
+ conditioning = accel.unwrap(model).to_signal(conditioning, codec)
565
+ conditioning.cpu().write_audio_to_tb(
566
+ f"conditioning/{i}",
567
+ self.writer,
568
+ step=self.state.epoch,
569
+ plot_fn=None,
570
+ )
571
+
572
  def save_imputation(self, z: torch.Tensor):
573
  # imputations
574
  _prefix_amt = prefix_amt
scripts/utils/parallel-gpu.sh ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Get the command to execute from the user
4
+ command_to_execute="$1"
5
+
6
+ # Get the maximum number of GPUs to use from the user
7
+ max_gpus="$2"
8
+
9
+ # Get the number of instances to start per GPU from the user
10
+ instances_per_gpu="$3"
11
+
12
+ # Set the CUDA_VISIBLE_DEVICES flag for each GPU
13
+ for gpu_id in $(seq 0 $(($max_gpus - 1))); do
14
+ export CUDA_VISIBLE_DEVICES="$gpu_id"
15
+ # Start the specified number of instances for this GPU
16
+ for i in $(seq 1 "$instances_per_gpu"); do
17
+ # Run the command in the background
18
+ $command_to_execute &
19
+ done
20
+ done
21
+
22
+ # Wait for all instances to finish
23
+ wait
scripts/utils/process_folder-c2f.py CHANGED
@@ -6,6 +6,8 @@ import argbind
6
  from tqdm import tqdm
7
  import random
8
 
 
 
9
  from collections import defaultdict
10
 
11
  def coarse2fine_infer(
@@ -15,14 +17,15 @@ def coarse2fine_infer(
15
  device,
16
  signal_window=3,
17
  signal_hop=1.5,
18
- max_excerpts=25,
19
  ):
20
  output = defaultdict(list)
21
 
22
  # split into 3 seconds
23
  windows = [s for s in signal.clone().windows(signal_window, signal_hop)]
 
24
  random.shuffle(windows)
25
- for w in windows[1:max_excerpts]: # skip the first window since it's mostly zero padded?
26
  # batch the signal into chunks of 3
27
  with torch.no_grad():
28
  # get codes
@@ -68,20 +71,21 @@ def coarse2fine_infer(
68
  @argbind.bind(without_prefix=True)
69
  def main(
70
  sources=[
71
- "/home/hugo/data/spotdl/audio/val", "/home/hugo/data/spotdl/audio/test"
72
  ],
73
  audio_ext="mp3",
74
  exp_name="noise_mode",
75
  model_paths=[
76
- "ckpt/mask/best/vampnet/weights.pth",
77
- "ckpt/random/best/vampnet/weights.pth",
78
  ],
79
  model_keys=[
80
- "noise_mode=mask",
81
- "noise_mode=random",
82
  ],
83
- vqvae_path="ckpt/wav2wav.pth",
84
- device="cuda",
 
85
  ):
86
  from vampnet.modules.transformer import VampNet
87
  from lac.model.lac import LAC
@@ -99,20 +103,28 @@ def main(
99
  vqvae.eval()
100
  print("Loaded VQVAE.")
101
 
102
- audio_dict = defaultdict(list)
 
103
  for source in sources:
104
  print(f"Processing {source}...")
105
- for path in tqdm(list(Path(source).glob(f"**/*.{audio_ext}"))):
 
 
106
  sig = AudioSignal(path)
107
  sig.resample(vqvae.sample_rate).normalize(-24).ensure_max_of_audio(1.0)
108
 
 
 
 
 
 
 
109
  for model_key, model in models.items():
110
  out = coarse2fine_infer(sig, model, vqvae, device)
111
- for k in out:
112
- audio_dict[f"{model_key}-{k}"].extend(out[k])
113
-
114
- audio_zip(audio_dict, f"{exp_name}-results.zip")
115
-
116
 
117
  if __name__ == "__main__":
118
  args = argbind.parse_args()
 
6
  from tqdm import tqdm
7
  import random
8
 
9
+ from typing import List
10
+
11
  from collections import defaultdict
12
 
13
  def coarse2fine_infer(
 
17
  device,
18
  signal_window=3,
19
  signal_hop=1.5,
20
+ max_excerpts=20,
21
  ):
22
  output = defaultdict(list)
23
 
24
  # split into 3 seconds
25
  windows = [s for s in signal.clone().windows(signal_window, signal_hop)]
26
+ windows = windows[1:] # skip first window since it's half zero padded
27
  random.shuffle(windows)
28
+ for w in windows[:max_excerpts]:
29
  # batch the signal into chunks of 3
30
  with torch.no_grad():
31
  # get codes
 
71
  @argbind.bind(without_prefix=True)
72
  def main(
73
  sources=[
74
+ "/data/spotdl/audio/val", "/data/spotdl/audio/test"
75
  ],
76
  audio_ext="mp3",
77
  exp_name="noise_mode",
78
  model_paths=[
79
+ "runs/c2f-exp-03.22.23/ckpt/mask/best/vampnet/weights.pth",
80
+ "runs/c2f-exp-03.22.23/ckpt/random/best/vampnet/weights.pth",
81
  ],
82
  model_keys=[
83
+ "mask",
84
+ "random",
85
  ],
86
+ vqvae_path: str = "runs/codec-ckpt/codec.pth",
87
+ device: str = "cuda",
88
+ output_dir: str = ".",
89
  ):
90
  from vampnet.modules.transformer import VampNet
91
  from lac.model.lac import LAC
 
103
  vqvae.eval()
104
  print("Loaded VQVAE.")
105
 
106
+ output_dir = Path(output_dir) / f"{exp_name}-samples"
107
+
108
  for source in sources:
109
  print(f"Processing {source}...")
110
+ source_files = list(Path(source).glob(f"**/*.{audio_ext}"))
111
+ random.shuffle(source_files)
112
+ for path in tqdm(source_files):
113
  sig = AudioSignal(path)
114
  sig.resample(vqvae.sample_rate).normalize(-24).ensure_max_of_audio(1.0)
115
 
116
+ out_dir = output_dir / path.stem
117
+ out_dir.mkdir(parents=True, exist_ok=True)
118
+ if out_dir.exists():
119
+ print(f"Skipping {path.stem} since {out_dir} already exists.")
120
+ continue
121
+
122
  for model_key, model in models.items():
123
  out = coarse2fine_infer(sig, model, vqvae, device)
124
+ for k, sig_list in out.items():
125
+ for i, s in enumerate(sig_list):
126
+ s.write(out_dir / f"{model_key}-{k}-{i}.wav")
127
+
 
128
 
129
  if __name__ == "__main__":
130
  args = argbind.parse_args()