keithhon commited on
Commit
cb03820
1 Parent(s): 93dbe1b

Upload vocoder_preprocess.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. vocoder_preprocess.py +59 -0
vocoder_preprocess.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from synthesizer.synthesize import run_synthesis
2
+ from synthesizer.hparams import hparams
3
+ from utils.argutils import print_args
4
+ import argparse
5
+ import os
6
+
7
+
8
+ if __name__ == "__main__":
9
+ class MyFormatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter):
10
+ pass
11
+
12
+ parser = argparse.ArgumentParser(
13
+ description="Creates ground-truth aligned (GTA) spectrograms from the vocoder.",
14
+ formatter_class=MyFormatter
15
+ )
16
+ parser.add_argument("datasets_root", type=str, help=\
17
+ "Path to the directory containing your SV2TTS directory. If you specify both --in_dir and "
18
+ "--out_dir, this argument won't be used.")
19
+ parser.add_argument("--model_dir", type=str,
20
+ default="synthesizer/saved_models/pretrained/", help=\
21
+ "Path to the pretrained model directory.")
22
+ parser.add_argument("-i", "--in_dir", type=str, default=argparse.SUPPRESS, help= \
23
+ "Path to the synthesizer directory that contains the mel spectrograms, the wavs and the "
24
+ "embeds. Defaults to <datasets_root>/SV2TTS/synthesizer/.")
25
+ parser.add_argument("-o", "--out_dir", type=str, default=argparse.SUPPRESS, help= \
26
+ "Path to the output vocoder directory that will contain the ground truth aligned mel "
27
+ "spectrograms. Defaults to <datasets_root>/SV2TTS/vocoder/.")
28
+ parser.add_argument("--hparams", default="",
29
+ help="Hyperparameter overrides as a comma-separated list of name=value "
30
+ "pairs")
31
+ parser.add_argument("--no_trim", action="store_true", help=\
32
+ "Preprocess audio without trimming silences (not recommended).")
33
+ parser.add_argument("--cpu", action="store_true", help=\
34
+ "If True, processing is done on CPU, even when a GPU is available.")
35
+ args = parser.parse_args()
36
+ print_args(args, parser)
37
+ modified_hp = hparams.parse(args.hparams)
38
+
39
+ if not hasattr(args, "in_dir"):
40
+ args.in_dir = os.path.join(args.datasets_root, "SV2TTS", "synthesizer")
41
+ if not hasattr(args, "out_dir"):
42
+ args.out_dir = os.path.join(args.datasets_root, "SV2TTS", "vocoder")
43
+
44
+ if args.cpu:
45
+ # Hide GPUs from Pytorch to force CPU processing
46
+ os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
47
+
48
+ # Verify webrtcvad is available
49
+ if not args.no_trim:
50
+ try:
51
+ import webrtcvad
52
+ except:
53
+ raise ModuleNotFoundError("Package 'webrtcvad' not found. This package enables "
54
+ "noise removal and is recommended. Please install and try again. If installation fails, "
55
+ "use --no_trim to disable this error message.")
56
+ del args.no_trim
57
+
58
+ run_synthesis(args.in_dir, args.out_dir, args.model_dir, modified_hp)
59
+