khof312 commited on
Commit
b4c3706
1 Parent(s): 11b8286

Add fine-tuned Spanish and Lingala.

Browse files
Files changed (1) hide show
  1. app.py +35 -3
app.py CHANGED
@@ -50,8 +50,13 @@ type=['wav'])
50
  base_mms = synth_mms(tts_text, models[tts_lang]['mms'])
51
  base_coqui= synth_coqui(tts_text, models[tts_lang]['coqui'])
52
  base_espeakng= synth_espeakng(tts_text, models[tts_lang]['espeakng'])
 
 
53
  base_toucan= synth_toucan(tts_text, models[tts_lang]['toucan'])
54
 
 
 
 
55
  if tts_lang=="swh":
56
  finetuned_mms1 = synth_mms(tts_text, "khof312/mms-tts-swh-female-1")
57
  finetuned_mms2 = synth_mms(tts_text, "khof312/mms-tts-swh-female-2")
@@ -59,6 +64,9 @@ type=['wav'])
59
  finetuned_mms1 = synth_mms(tts_text, "ylacombe/mms-spa-finetuned-argentinian-monospeaker")
60
  finetuned_mms2 = synth_mms(tts_text, "ylacombe/mms-spa-finetuned-chilean-monospeaker")
61
  finetuned_mms3 = synth_mms(tts_text, "ylacombe/mms-spa-finetuned-colombian-monospeaker")
 
 
 
62
 
63
  #vc_mms
64
  #vc_coqui
@@ -70,6 +78,7 @@ type=['wav'])
70
  row3 = st.columns([1,1,2])
71
  row4 = st.columns([1,1,2])
72
  row5 = st.columns([1,1,2])
 
73
 
74
  row1[0].write("**Model**")
75
  row1[1].write("**Configuration**")
@@ -90,12 +99,15 @@ type=['wav'])
90
  row4[1].write("default")
91
  row4[2].audio(base_espeakng[0], sample_rate = base_espeakng[1])
92
 
93
-
94
  row5[0].write(f"[IMS-Toucan](https://github.com/DigitalPhonetics/IMS-Toucan)")
95
  row5[1].write("default")
96
  row5[2].audio(base_toucan[0], sample_rate = base_toucan[1])
97
 
98
-
 
 
 
 
99
  #################################################################
100
  if tts_lang == "swh":
101
  "### Fine Tuned"
@@ -121,6 +133,7 @@ type=['wav'])
121
  row2 = st.columns([1,1,2])
122
  row3 = st.columns([1,1,2])
123
  row4 = st.columns([1,1,2])
 
124
 
125
  row1[0].write("**Model**")
126
  row1[1].write("**Configuration**")
@@ -135,6 +148,22 @@ type=['wav'])
135
  row4[0].write(f"Meta MMS")
136
  row4[1].write("[ylacombe - Colombian](https://huggingface.co/ylacombe/mms-spa-finetuned-colombian-monospeaker)")
137
  row4[2].audio(finetuned_mms3[0], sample_rate = finetuned_mms3[1])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
  st.divider()
140
 
@@ -217,7 +246,8 @@ This page is a demo of the openly available Text to Speech models for various la
217
  - [**Meta's Massively Multilingual Speech (MMS)**](https://ai.meta.com/blog/multilingual-model-speech-recognition/) model, which supports over 1000 languages.[^1]
218
  - [**Coqui's TTS**](https://docs.coqui.ai/en/latest/#) package;[^2] while no longer supported, Coqui acted as a hub for TTS model hosting and these models are still available.
219
  - [**ESpeak-NG's**](https://github.com/espeak-ng/espeak-ng/tree/master)'s synthetic voices**[^3]
220
- - [**IMS Toucan**](https://github.com/DigitalPhonetics/IMS-Toucan), which supports 7000 languages. [^4]
 
221
 
222
  Voice conversion is currently achieved through Coqui.
223
 
@@ -237,5 +267,7 @@ Notes:
237
  [^2]: [Available models](https://github.com/coqui-ai/TTS/blob/dev/TTS/.models.json)
238
  [^3]: [Language list](https://github.com/espeak-ng/espeak-ng/blob/master/docs/languages.md)
239
  [^4]: Language list is available in the Gradio API documentation [here](https://huggingface.co/spaces/Flux9665/MassivelyMultilingualTTS).
 
240
  ''')
241
 
 
 
50
  base_mms = synth_mms(tts_text, models[tts_lang]['mms'])
51
  base_coqui= synth_coqui(tts_text, models[tts_lang]['coqui'])
52
  base_espeakng= synth_espeakng(tts_text, models[tts_lang]['espeakng'])
53
+
54
+ #models[tts_lang]['toucan']
55
  base_toucan= synth_toucan(tts_text, models[tts_lang]['toucan'])
56
 
57
+ #for m in models[tts_lang]['toucan']:
58
+ # base_toucan= synth_toucan(tts_text, models[tts_lang]['toucan'][0])
59
+
60
  if tts_lang=="swh":
61
  finetuned_mms1 = synth_mms(tts_text, "khof312/mms-tts-swh-female-1")
62
  finetuned_mms2 = synth_mms(tts_text, "khof312/mms-tts-swh-female-2")
 
64
  finetuned_mms1 = synth_mms(tts_text, "ylacombe/mms-spa-finetuned-argentinian-monospeaker")
65
  finetuned_mms2 = synth_mms(tts_text, "ylacombe/mms-spa-finetuned-chilean-monospeaker")
66
  finetuned_mms3 = synth_mms(tts_text, "ylacombe/mms-spa-finetuned-colombian-monospeaker")
67
+ finetuned_mms4 = synth_mms(tts_text, "khof312/mms-tts-spa-female")
68
+ if tts_lang=="lin":
69
+ finetuned_mms1 = synth_mms(tts_text, "khof312/mms-tts-lin-female")
70
 
71
  #vc_mms
72
  #vc_coqui
 
78
  row3 = st.columns([1,1,2])
79
  row4 = st.columns([1,1,2])
80
  row5 = st.columns([1,1,2])
81
+ #row6 = st.columns([1,1,2])
82
 
83
  row1[0].write("**Model**")
84
  row1[1].write("**Configuration**")
 
99
  row4[1].write("default")
100
  row4[2].audio(base_espeakng[0], sample_rate = base_espeakng[1])
101
 
 
102
  row5[0].write(f"[IMS-Toucan](https://github.com/DigitalPhonetics/IMS-Toucan)")
103
  row5[1].write("default")
104
  row5[2].audio(base_toucan[0], sample_rate = base_toucan[1])
105
 
106
+ #if base_piper is not None:
107
+ # row6[0].write(f"[Piper](https://github.com/rhasspy/piper)")
108
+ # row6[1].write("default")
109
+ # row6[2].audio(base_piper[0], sample_rate = base_piper[1])
110
+
111
  #################################################################
112
  if tts_lang == "swh":
113
  "### Fine Tuned"
 
133
  row2 = st.columns([1,1,2])
134
  row3 = st.columns([1,1,2])
135
  row4 = st.columns([1,1,2])
136
+ row5 = st.columns([1,1,2])
137
 
138
  row1[0].write("**Model**")
139
  row1[1].write("**Configuration**")
 
148
  row4[0].write(f"Meta MMS")
149
  row4[1].write("[ylacombe - Colombian](https://huggingface.co/ylacombe/mms-spa-finetuned-colombian-monospeaker)")
150
  row4[2].audio(finetuned_mms3[0], sample_rate = finetuned_mms3[1])
151
+ row5[0].write(f"Meta MMS")
152
+ row5[1].write("[khof312 - female](https://huggingface.co/khof312/mms-tts-spa-female)")
153
+ row5[2].audio(finetuned_mms4[0], sample_rate = finetuned_mms4[1])
154
+
155
+ if tts_lang == "lin":
156
+ "### Fine Tuned"
157
+ row1 = st.columns([1,1,2])
158
+ row2 = st.columns([1,1,2])
159
+
160
+ row1[0].write("**Model**")
161
+ row1[1].write("**Configuration**")
162
+ row1[2].write("**Audio**")
163
+
164
+ row2[0].write(f"Meta MMS")
165
+ row2[1].write("[khof312 - femalehttps://huggingface.co/khof312/mms-tts-lin-female)")
166
+ row2[2].audio(finetuned_mms1[0], sample_rate = finetuned_mms1[1])
167
 
168
  st.divider()
169
 
 
246
  - [**Meta's Massively Multilingual Speech (MMS)**](https://ai.meta.com/blog/multilingual-model-speech-recognition/) model, which supports over 1000 languages.[^1]
247
  - [**Coqui's TTS**](https://docs.coqui.ai/en/latest/#) package;[^2] while no longer supported, Coqui acted as a hub for TTS model hosting and these models are still available.
248
  - [**ESpeak-NG's**](https://github.com/espeak-ng/espeak-ng/tree/master)'s synthetic voices**[^3]
249
+ - [**IMS Toucan**](https://github.com/DigitalPhonetics/IMS-Toucan), which supports 7000 languages.[^4]
250
+ - [**Piper**](https://github.com/rhasspy/piper), a TTS system that supports multiple voices per language and approximately 30 languages.[^5]
251
 
252
  Voice conversion is currently achieved through Coqui.
253
 
 
267
  [^2]: [Available models](https://github.com/coqui-ai/TTS/blob/dev/TTS/.models.json)
268
  [^3]: [Language list](https://github.com/espeak-ng/espeak-ng/blob/master/docs/languages.md)
269
  [^4]: Language list is available in the Gradio API documentation [here](https://huggingface.co/spaces/Flux9665/MassivelyMultilingualTTS).
270
+ [^5]: The list of available voices is [here](https://github.com/rhasspy/piper/blob/master/VOICES.md), model checkpoints are [here](https://huggingface.co/datasets/rhasspy/piper-checkpoints/tree/main), and they can be tested [here](https://rhasspy.github.io/piper-samples/).
271
  ''')
272
 
273
+