tobiasc commited on
Commit
a6f6513
1 Parent(s): 7881a87

Change phonemizer to proprietary one

Browse files
Files changed (3) hide show
  1. .gitignore +11 -1
  2. app.py +27 -6
  3. packages.txt +0 -3
.gitignore CHANGED
@@ -157,4 +157,14 @@ cython_debug/
157
  # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
  # and can be added to the global gitignore or merged into this file. For a more nuclear
159
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
- #.idea/
 
 
 
 
 
 
 
 
 
 
 
157
  # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
  # and can be added to the global gitignore or merged into this file. For a more nuclear
159
  # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
161
+
162
+ deletion_token.txt
163
+
164
+ .vscode
165
+
166
+ en_us
167
+ input.txt
168
+ input.xml
169
+ phn.zip
170
+ xml_nlp
app.py CHANGED
@@ -1,11 +1,11 @@
1
  from espnet2.bin.tts_inference import Text2Speech
2
  import torch
3
  from parallel_wavegan.utils import download_pretrained_model, load_model
4
- from phonemizer import phonemize
5
- from phonemizer.separator import Separator
6
  import gradio as gr
 
 
 
7
 
8
- s = Separator(word=None, phone=" ")
9
  config_path = "config.yaml"
10
  model_path = "model.pth"
11
 
@@ -14,6 +14,13 @@ vocoder_tag = "ljspeech_parallel_wavegan.v3"
14
  vocoder = load_model(download_pretrained_model(vocoder_tag)).to("cpu").eval()
15
  vocoder.remove_weight_norm()
16
 
 
 
 
 
 
 
 
17
  global_styles = {
18
  "Style 1": torch.load("style1.pt"),
19
  "Style 2": torch.load("style2.pt"),
@@ -24,6 +31,22 @@ global_styles = {
24
  }
25
 
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def inference(text, global_style, alpha, prev_fg_inds, input_fg_inds):
28
  with torch.no_grad():
29
  text2speech = Text2Speech(
@@ -44,9 +67,7 @@ def inference(text, global_style, alpha, prev_fg_inds, input_fg_inds):
44
 
45
  style_emb = torch.flatten(global_styles[global_style])
46
 
47
- phoneme_string = phonemize(
48
- text, language="mb-us1", backend="espeak-mbrola", separator=s
49
- )
50
  phonemes = phoneme_string.split(" ")
51
 
52
  max_edit_index = -1
 
1
  from espnet2.bin.tts_inference import Text2Speech
2
  import torch
3
  from parallel_wavegan.utils import download_pretrained_model, load_model
 
 
4
  import gradio as gr
5
+ import os
6
+ import subprocess
7
+ from zipfile import ZipFile
8
 
 
9
  config_path = "config.yaml"
10
  model_path = "model.pth"
11
 
 
14
  vocoder = load_model(download_pretrained_model(vocoder_tag)).to("cpu").eval()
15
  vocoder.remove_weight_norm()
16
 
17
+ url = os.environ.get("PHN_URL")
18
+ subprocess.call(["wget", url, "-q"])
19
+
20
+ with ZipFile("phn.zip", "r") as zip_ref:
21
+ zip_ref.extractall()
22
+ subprocess.call(["chmod", "+x", "xml_nlp"])
23
+
24
  global_styles = {
25
  "Style 1": torch.load("style1.pt"),
26
  "Style 2": torch.load("style2.pt"),
 
31
  }
32
 
33
 
34
+ def phonemize(text):
35
+ with open("input.txt", "w+") as f:
36
+ f.write(text)
37
+
38
+ with open("input.xml", "w") as f:
39
+ pass
40
+
41
+ subprocess.call(["./xml_nlp", "input", "180", "en_us/enu.ini", "en_us"])
42
+
43
+ phoneme_string = ""
44
+ with open("input.xml", "r") as f:
45
+ for line in f.readlines():
46
+ phoneme_string += line.split("[")[-1][:-2]
47
+ return phoneme_string
48
+
49
+
50
  def inference(text, global_style, alpha, prev_fg_inds, input_fg_inds):
51
  with torch.no_grad():
52
  text2speech = Text2Speech(
 
67
 
68
  style_emb = torch.flatten(global_styles[global_style])
69
 
70
+ phoneme_string = phonemize(text)
 
 
71
  phonemes = phoneme_string.split(" ")
72
 
73
  max_edit_index = -1
packages.txt DELETED
@@ -1,3 +0,0 @@
1
- mbrola
2
- mbrola-us1
3
- espeak-ng