Spaces:
Sleeping
Sleeping
Hugo Flores Garcia
commited on
Commit
•
e4e3c4e
1
Parent(s):
a63cce0
c2f
Browse files- requirements.txt +1 -1
- scripts/exp/train.py +24 -0
- scripts/utils/parallel-gpu.sh +23 -0
- scripts/utils/process_folder-c2f.py +28 -16
requirements.txt
CHANGED
@@ -2,7 +2,7 @@ argbind>=0.3.1
|
|
2 |
pytorch-ignite
|
3 |
rich
|
4 |
audiotools @ git+https://github.com/descriptinc/lyrebird-audiotools.git@hf/backup-info
|
5 |
-
lac @ git+https://github.com/descriptinc/lyrebird-audio-codec.git@
|
6 |
tqdm
|
7 |
tensorboard
|
8 |
google-cloud-logging==2.2.0
|
|
|
2 |
pytorch-ignite
|
3 |
rich
|
4 |
audiotools @ git+https://github.com/descriptinc/lyrebird-audiotools.git@hf/backup-info
|
5 |
+
lac @ git+https://github.com/descriptinc/lyrebird-audio-codec.git@hf/vampnet-temp
|
6 |
tqdm
|
7 |
tensorboard
|
8 |
google-cloud-logging==2.2.0
|
scripts/exp/train.py
CHANGED
@@ -545,6 +545,30 @@ def train(
|
|
545 |
plot_fn=None,
|
546 |
)
|
547 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
548 |
def save_imputation(self, z: torch.Tensor):
|
549 |
# imputations
|
550 |
_prefix_amt = prefix_amt
|
|
|
545 |
plot_fn=None,
|
546 |
)
|
547 |
|
548 |
+
# sample in 1 step (only for coarse2fine models)
|
549 |
+
if accel.unwrap(model).n_conditioning_codebooks > 0:
|
550 |
+
sampled_argmax = accel.unwrap(model).sample(
|
551 |
+
codec=codec,
|
552 |
+
time_steps=z.shape[-1],
|
553 |
+
start_tokens=z[i : i + 1],
|
554 |
+
sample="argmax",
|
555 |
+
sampling_steps=1,
|
556 |
+
)
|
557 |
+
sampled_argmax.cpu().write_audio_to_tb(
|
558 |
+
f"sampled_1step-argmax/{i}",
|
559 |
+
self.writer,
|
560 |
+
step=self.state.epoch,
|
561 |
+
plot_fn=None,
|
562 |
+
)
|
563 |
+
conditioning = z[i:i+1, : accel.unwrap(model).n_conditioning_codebooks, :]
|
564 |
+
conditioning = accel.unwrap(model).to_signal(conditioning, codec)
|
565 |
+
conditioning.cpu().write_audio_to_tb(
|
566 |
+
f"conditioning/{i}",
|
567 |
+
self.writer,
|
568 |
+
step=self.state.epoch,
|
569 |
+
plot_fn=None,
|
570 |
+
)
|
571 |
+
|
572 |
def save_imputation(self, z: torch.Tensor):
|
573 |
# imputations
|
574 |
_prefix_amt = prefix_amt
|
scripts/utils/parallel-gpu.sh
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Get the command to execute from the user
|
4 |
+
command_to_execute="$1"
|
5 |
+
|
6 |
+
# Get the maximum number of GPUs to use from the user
|
7 |
+
max_gpus="$2"
|
8 |
+
|
9 |
+
# Get the number of instances to start per GPU from the user
|
10 |
+
instances_per_gpu="$3"
|
11 |
+
|
12 |
+
# Set the CUDA_VISIBLE_DEVICES flag for each GPU
|
13 |
+
for gpu_id in $(seq 0 $(($max_gpus - 1))); do
|
14 |
+
export CUDA_VISIBLE_DEVICES="$gpu_id"
|
15 |
+
# Start the specified number of instances for this GPU
|
16 |
+
for i in $(seq 1 "$instances_per_gpu"); do
|
17 |
+
# Run the command in the background
|
18 |
+
$command_to_execute &
|
19 |
+
done
|
20 |
+
done
|
21 |
+
|
22 |
+
# Wait for all instances to finish
|
23 |
+
wait
|
scripts/utils/process_folder-c2f.py
CHANGED
@@ -6,6 +6,8 @@ import argbind
|
|
6 |
from tqdm import tqdm
|
7 |
import random
|
8 |
|
|
|
|
|
9 |
from collections import defaultdict
|
10 |
|
11 |
def coarse2fine_infer(
|
@@ -15,14 +17,15 @@ def coarse2fine_infer(
|
|
15 |
device,
|
16 |
signal_window=3,
|
17 |
signal_hop=1.5,
|
18 |
-
max_excerpts=
|
19 |
):
|
20 |
output = defaultdict(list)
|
21 |
|
22 |
# split into 3 seconds
|
23 |
windows = [s for s in signal.clone().windows(signal_window, signal_hop)]
|
|
|
24 |
random.shuffle(windows)
|
25 |
-
for w in windows[
|
26 |
# batch the signal into chunks of 3
|
27 |
with torch.no_grad():
|
28 |
# get codes
|
@@ -68,20 +71,21 @@ def coarse2fine_infer(
|
|
68 |
@argbind.bind(without_prefix=True)
|
69 |
def main(
|
70 |
sources=[
|
71 |
-
"/
|
72 |
],
|
73 |
audio_ext="mp3",
|
74 |
exp_name="noise_mode",
|
75 |
model_paths=[
|
76 |
-
"ckpt/mask/best/vampnet/weights.pth",
|
77 |
-
"ckpt/random/best/vampnet/weights.pth",
|
78 |
],
|
79 |
model_keys=[
|
80 |
-
"
|
81 |
-
"
|
82 |
],
|
83 |
-
vqvae_path="ckpt/
|
84 |
-
device="cuda",
|
|
|
85 |
):
|
86 |
from vampnet.modules.transformer import VampNet
|
87 |
from lac.model.lac import LAC
|
@@ -99,20 +103,28 @@ def main(
|
|
99 |
vqvae.eval()
|
100 |
print("Loaded VQVAE.")
|
101 |
|
102 |
-
|
|
|
103 |
for source in sources:
|
104 |
print(f"Processing {source}...")
|
105 |
-
|
|
|
|
|
106 |
sig = AudioSignal(path)
|
107 |
sig.resample(vqvae.sample_rate).normalize(-24).ensure_max_of_audio(1.0)
|
108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
for model_key, model in models.items():
|
110 |
out = coarse2fine_infer(sig, model, vqvae, device)
|
111 |
-
for k in out:
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
|
117 |
if __name__ == "__main__":
|
118 |
args = argbind.parse_args()
|
|
|
6 |
from tqdm import tqdm
|
7 |
import random
|
8 |
|
9 |
+
from typing import List
|
10 |
+
|
11 |
from collections import defaultdict
|
12 |
|
13 |
def coarse2fine_infer(
|
|
|
17 |
device,
|
18 |
signal_window=3,
|
19 |
signal_hop=1.5,
|
20 |
+
max_excerpts=20,
|
21 |
):
|
22 |
output = defaultdict(list)
|
23 |
|
24 |
# split into 3 seconds
|
25 |
windows = [s for s in signal.clone().windows(signal_window, signal_hop)]
|
26 |
+
windows = windows[1:] # skip first window since it's half zero padded
|
27 |
random.shuffle(windows)
|
28 |
+
for w in windows[:max_excerpts]:
|
29 |
# batch the signal into chunks of 3
|
30 |
with torch.no_grad():
|
31 |
# get codes
|
|
|
71 |
@argbind.bind(without_prefix=True)
|
72 |
def main(
|
73 |
sources=[
|
74 |
+
"/data/spotdl/audio/val", "/data/spotdl/audio/test"
|
75 |
],
|
76 |
audio_ext="mp3",
|
77 |
exp_name="noise_mode",
|
78 |
model_paths=[
|
79 |
+
"runs/c2f-exp-03.22.23/ckpt/mask/best/vampnet/weights.pth",
|
80 |
+
"runs/c2f-exp-03.22.23/ckpt/random/best/vampnet/weights.pth",
|
81 |
],
|
82 |
model_keys=[
|
83 |
+
"mask",
|
84 |
+
"random",
|
85 |
],
|
86 |
+
vqvae_path: str = "runs/codec-ckpt/codec.pth",
|
87 |
+
device: str = "cuda",
|
88 |
+
output_dir: str = ".",
|
89 |
):
|
90 |
from vampnet.modules.transformer import VampNet
|
91 |
from lac.model.lac import LAC
|
|
|
103 |
vqvae.eval()
|
104 |
print("Loaded VQVAE.")
|
105 |
|
106 |
+
output_dir = Path(output_dir) / f"{exp_name}-samples"
|
107 |
+
|
108 |
for source in sources:
|
109 |
print(f"Processing {source}...")
|
110 |
+
source_files = list(Path(source).glob(f"**/*.{audio_ext}"))
|
111 |
+
random.shuffle(source_files)
|
112 |
+
for path in tqdm(source_files):
|
113 |
sig = AudioSignal(path)
|
114 |
sig.resample(vqvae.sample_rate).normalize(-24).ensure_max_of_audio(1.0)
|
115 |
|
116 |
+
out_dir = output_dir / path.stem
|
117 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
118 |
+
if out_dir.exists():
|
119 |
+
print(f"Skipping {path.stem} since {out_dir} already exists.")
|
120 |
+
continue
|
121 |
+
|
122 |
for model_key, model in models.items():
|
123 |
out = coarse2fine_infer(sig, model, vqvae, device)
|
124 |
+
for k, sig_list in out.items():
|
125 |
+
for i, s in enumerate(sig_list):
|
126 |
+
s.write(out_dir / f"{model_key}-{k}-{i}.wav")
|
127 |
+
|
|
|
128 |
|
129 |
if __name__ == "__main__":
|
130 |
args = argbind.parse_args()
|