Flux9665 commited on
Commit
502b0b6
β€’
1 Parent(s): 99a3986

add speakers and languages

Browse files
Files changed (1) hide show
  1. app.py +53 -14
app.py CHANGED
@@ -31,20 +31,44 @@ class TTS_Interface:
31
  def __init__(self):
32
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
33
  self.model = Meta_FastSpeech2(device=self.device)
34
-
35
- def read(self, prompt, language):
36
- language_id_lookup = {
37
- "English" : "en",
38
- "German" : "de",
39
- "Greek" : "el",
40
- "Spanish" : "es",
41
- "Finnish" : "fi",
42
- "Russian" : "ru",
43
- "Hungarian": "hu",
44
- "Dutch" : "nl",
45
- "French" : "fr"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  }
47
- self.model.set_language(language_id_lookup[language])
 
 
 
 
 
 
 
48
  wav = self.model(prompt)
49
  return 48000, float2pcm(wav.cpu().numpy())
50
 
@@ -64,7 +88,22 @@ iface = gr.Interface(fn=meta_model.read,
64
  'Russian',
65
  'Hungarian',
66
  'Dutch',
67
- 'French'], type="value", default='English', label="Language Selection")],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  outputs=gr.outputs.Audio(type="numpy", label=None),
69
  layout="vertical",
70
  title="IMS Toucan Multilingual Multispeaker Demo",
 
31
  def __init__(self):
32
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
33
  self.model = Meta_FastSpeech2(device=self.device)
34
+ self.current_speaker = "English Speaker's Voice"
35
+ self.current_language = "English"
36
+ self.language_id_lookup = {
37
+ "English" : "en",
38
+ "German" : "de",
39
+ "Greek" : "el",
40
+ "Spanish" : "es",
41
+ "Finnish" : "fi",
42
+ "Russian" : "ru",
43
+ "Hungarian" : "hu",
44
+ "Dutch" : "nl",
45
+ "French" : "fr",
46
+ 'Polish' : "pl",
47
+ 'Portuguese': "pt",
48
+ 'Italian' : "it",
49
+ }
50
+ self.speaker_path_lookup = {
51
+ "English Speaker's Voice" : "reference_audios/english.wav",
52
+ "German Speaker's Voice" : "reference_audios/german.wav",
53
+ "Greek Speaker's Voice" : "reference_audios/greek.wav",
54
+ "Spanish Speaker's Voice" : "reference_audios/spanish.wav",
55
+ "Finnish Speaker's Voice" : "reference_audios/finnish.wav",
56
+ "Russian Speaker's Voice" : "reference_audios/russian.wav",
57
+ "Hungarian Speaker's Voice" : "reference_audios/hungarian.wav",
58
+ "Dutch Speaker's Voice" : "reference_audios/dutch.wav",
59
+ "French Speaker's Voice" : "reference_audios/french.wav",
60
+ "Polish Speaker's Voice" : "reference_audios/polish.flac",
61
+ "Portuguese Speaker's Voice": "reference_audios/portuguese.flac",
62
+ "Italian Speaker's Voice" : "reference_audios/italian.flac",
63
  }
64
+
65
+ def read(self, prompt, language, speaker):
66
+ if self.current_language != language:
67
+ self.model.set_language(self.language_id_lookup[language])
68
+ self.current_language = language
69
+ if self.current_speaker != speaker:
70
+ self.model.set_utterance_embedding(self.speaker_path_lookup[speaker])
71
+ self.current_speaker = speaker
72
  wav = self.model(prompt)
73
  return 48000, float2pcm(wav.cpu().numpy())
74
 
 
88
  'Russian',
89
  'Hungarian',
90
  'Dutch',
91
+ 'French',
92
+ 'Polish',
93
+ 'Portuguese',
94
+ 'Italian'], type="value", default='English', label="Language Selection"),
95
+ gr.inputs.Dropdown(["English Speaker's Voice",
96
+ "German Speaker's Voice",
97
+ "Greek Speaker's Voice",
98
+ "Spanish Speaker's Voice",
99
+ "Finnish Speaker's Voice",
100
+ "Russian Speaker's Voice",
101
+ "Hungarian Speaker's Voice",
102
+ "Dutch Speaker's Voice",
103
+ "French Speaker's Voice",
104
+ "Polish Speaker's Voice",
105
+ "Portuguese Speaker's Voice",
106
+ "Italian Speaker's Voice"], type="value", default="English Speaker's Voice", label="Speaker Selection")],
107
  outputs=gr.outputs.Audio(type="numpy", label=None),
108
  layout="vertical",
109
  title="IMS Toucan Multilingual Multispeaker Demo",