Spaces:
Runtime error
Runtime error
add speakers and languages
Browse files
app.py
CHANGED
@@ -31,20 +31,44 @@ class TTS_Interface:
|
|
31 |
def __init__(self):
|
32 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
33 |
self.model = Meta_FastSpeech2(device=self.device)
|
34 |
-
|
35 |
-
|
36 |
-
language_id_lookup = {
|
37 |
-
"English"
|
38 |
-
"German"
|
39 |
-
"Greek"
|
40 |
-
"Spanish"
|
41 |
-
"Finnish"
|
42 |
-
"Russian"
|
43 |
-
"Hungarian": "hu",
|
44 |
-
"Dutch"
|
45 |
-
"French"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
}
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
wav = self.model(prompt)
|
49 |
return 48000, float2pcm(wav.cpu().numpy())
|
50 |
|
@@ -64,7 +88,22 @@ iface = gr.Interface(fn=meta_model.read,
|
|
64 |
'Russian',
|
65 |
'Hungarian',
|
66 |
'Dutch',
|
67 |
-
'French'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
outputs=gr.outputs.Audio(type="numpy", label=None),
|
69 |
layout="vertical",
|
70 |
title="IMS Toucan Multilingual Multispeaker Demo",
|
|
|
31 |
def __init__(self):
|
32 |
self.device = "cuda" if torch.cuda.is_available() else "cpu"
|
33 |
self.model = Meta_FastSpeech2(device=self.device)
|
34 |
+
self.current_speaker = "English Speaker's Voice"
|
35 |
+
self.current_language = "English"
|
36 |
+
self.language_id_lookup = {
|
37 |
+
"English" : "en",
|
38 |
+
"German" : "de",
|
39 |
+
"Greek" : "el",
|
40 |
+
"Spanish" : "es",
|
41 |
+
"Finnish" : "fi",
|
42 |
+
"Russian" : "ru",
|
43 |
+
"Hungarian" : "hu",
|
44 |
+
"Dutch" : "nl",
|
45 |
+
"French" : "fr",
|
46 |
+
'Polish' : "pl",
|
47 |
+
'Portuguese': "pt",
|
48 |
+
'Italian' : "it",
|
49 |
+
}
|
50 |
+
self.speaker_path_lookup = {
|
51 |
+
"English Speaker's Voice" : "reference_audios/english.wav",
|
52 |
+
"German Speaker's Voice" : "reference_audios/german.wav",
|
53 |
+
"Greek Speaker's Voice" : "reference_audios/greek.wav",
|
54 |
+
"Spanish Speaker's Voice" : "reference_audios/spanish.wav",
|
55 |
+
"Finnish Speaker's Voice" : "reference_audios/finnish.wav",
|
56 |
+
"Russian Speaker's Voice" : "reference_audios/russian.wav",
|
57 |
+
"Hungarian Speaker's Voice" : "reference_audios/hungarian.wav",
|
58 |
+
"Dutch Speaker's Voice" : "reference_audios/dutch.wav",
|
59 |
+
"French Speaker's Voice" : "reference_audios/french.wav",
|
60 |
+
"Polish Speaker's Voice" : "reference_audios/polish.flac",
|
61 |
+
"Portuguese Speaker's Voice": "reference_audios/portuguese.flac",
|
62 |
+
"Italian Speaker's Voice" : "reference_audios/italian.flac",
|
63 |
}
|
64 |
+
|
65 |
+
def read(self, prompt, language, speaker):
|
66 |
+
if self.current_language != language:
|
67 |
+
self.model.set_language(self.language_id_lookup[language])
|
68 |
+
self.current_language = language
|
69 |
+
if self.current_speaker != speaker:
|
70 |
+
self.model.set_utterance_embedding(self.speaker_path_lookup[speaker])
|
71 |
+
self.current_speaker = speaker
|
72 |
wav = self.model(prompt)
|
73 |
return 48000, float2pcm(wav.cpu().numpy())
|
74 |
|
|
|
88 |
'Russian',
|
89 |
'Hungarian',
|
90 |
'Dutch',
|
91 |
+
'French',
|
92 |
+
'Polish',
|
93 |
+
'Portuguese',
|
94 |
+
'Italian'], type="value", default='English', label="Language Selection"),
|
95 |
+
gr.inputs.Dropdown(["English Speaker's Voice",
|
96 |
+
"German Speaker's Voice",
|
97 |
+
"Greek Speaker's Voice",
|
98 |
+
"Spanish Speaker's Voice",
|
99 |
+
"Finnish Speaker's Voice",
|
100 |
+
"Russian Speaker's Voice",
|
101 |
+
"Hungarian Speaker's Voice",
|
102 |
+
"Dutch Speaker's Voice",
|
103 |
+
"French Speaker's Voice",
|
104 |
+
"Polish Speaker's Voice",
|
105 |
+
"Portuguese Speaker's Voice",
|
106 |
+
"Italian Speaker's Voice"], type="value", default="English Speaker's Voice", label="Speaker Selection")],
|
107 |
outputs=gr.outputs.Audio(type="numpy", label=None),
|
108 |
layout="vertical",
|
109 |
title="IMS Toucan Multilingual Multispeaker Demo",
|