Add support for African voices.
Browse files- app.py +8 -1
- src/lookups.py +2 -1
- src/synthesize.py +24 -0
app.py
CHANGED
@@ -63,6 +63,7 @@ type=['wav'])
|
|
63 |
finetuned_mms4 = synth_mms(tts_text, "khof312/mms-tts-spa-female")
|
64 |
if tts_lang=="lin":
|
65 |
finetuned_mms1 = synth_mms(tts_text, "khof312/mms-tts-lin-female")
|
|
|
66 |
|
67 |
#vc_mms
|
68 |
#vc_coqui
|
@@ -152,14 +153,20 @@ type=['wav'])
|
|
152 |
"### Fine Tuned"
|
153 |
row1 = st.columns([1,1,2])
|
154 |
row2 = st.columns([1,1,2])
|
|
|
155 |
|
156 |
row1[0].write("**Model**")
|
157 |
row1[1].write("**Configuration**")
|
158 |
row1[2].write("**Audio**")
|
159 |
|
160 |
row2[0].write(f"Meta MMS")
|
161 |
-
row2[1].write("[khof312 -
|
162 |
row2[2].audio(finetuned_mms1[0], sample_rate = finetuned_mms1[1])
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
st.divider()
|
165 |
|
|
|
63 |
finetuned_mms4 = synth_mms(tts_text, "khof312/mms-tts-spa-female")
|
64 |
if tts_lang=="lin":
|
65 |
finetuned_mms1 = synth_mms(tts_text, "khof312/mms-tts-lin-female")
|
66 |
+
finetuned_africanvoices = synth_africanvoices(tts_text, models[tts_lang]['africanvoices'])
|
67 |
|
68 |
#vc_mms
|
69 |
#vc_coqui
|
|
|
153 |
"### Fine Tuned"
|
154 |
row1 = st.columns([1,1,2])
|
155 |
row2 = st.columns([1,1,2])
|
156 |
+
row3 = st.columns([1,1,2])
|
157 |
|
158 |
row1[0].write("**Model**")
|
159 |
row1[1].write("**Configuration**")
|
160 |
row1[2].write("**Audio**")
|
161 |
|
162 |
row2[0].write(f"Meta MMS")
|
163 |
+
row2[1].write("[khof312 - female](https://huggingface.co/khof312/mms-tts-lin-female)")
|
164 |
row2[2].audio(finetuned_mms1[0], sample_rate = finetuned_mms1[1])
|
165 |
+
|
166 |
+
|
167 |
+
row3[0].write(f"African voices")
|
168 |
+
row3[1].write("[African Voices]()")
|
169 |
+
row3[2].audio(finetuned_africanvoices[0], sample_rate = finetuned_africanvoices[1])
|
170 |
|
171 |
st.divider()
|
172 |
|
src/lookups.py
CHANGED
@@ -82,7 +82,8 @@ models = {
|
|
82 |
'coqui': 'tts_models/lin/openbible/vits', # Sampling rate: 22050
|
83 |
'espeakng': None,
|
84 |
'toucan': 'Lingala (lin)',
|
85 |
-
'piper': None
|
|
|
86 |
},
|
87 |
'mos':{
|
88 |
'mms': 'facebook/mms-tts-mos',
|
|
|
82 |
'coqui': 'tts_models/lin/openbible/vits', # Sampling rate: 22050
|
83 |
'espeakng': None,
|
84 |
'toucan': 'Lingala (lin)',
|
85 |
+
'piper': None,
|
86 |
+
'africanvoices': 'cmu_lin_ope',
|
87 |
},
|
88 |
'mos':{
|
89 |
'mms': 'facebook/mms-tts-mos',
|
src/synthesize.py
CHANGED
@@ -88,7 +88,31 @@ def synth_espeakng(text:str, model:str):
|
|
88 |
return wav, sampling_rate
|
89 |
else:
|
90 |
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
def synth_toucan(text:str, model:str):
|
94 |
'''
|
|
|
88 |
return wav, sampling_rate
|
89 |
else:
|
90 |
return None
|
91 |
+
|
92 |
+
def synth_africanvoices(text:str, model:str):
|
93 |
+
'''
|
94 |
+
Use ESpeak-NG to synthesize text.
|
95 |
+
|
96 |
+
Inputs:
|
97 |
+
text: Text to synthesze
|
98 |
+
model: Model code
|
99 |
+
Returns:
|
100 |
+
Streaming Wav and sampling rate.
|
101 |
+
'''
|
102 |
+
if model is not None:
|
103 |
+
|
104 |
+
subprocess.run(['flite', f'-voice {model}.flitevox', f'"{text}"', " test.wav"])
|
105 |
+
#esng = espeakng.Speaker()
|
106 |
+
#esng.voice = model
|
107 |
+
#esng.say(text, export_path="test.wav")
|
108 |
|
109 |
+
sampling_rate, wav = wavfile.read('test.wav')
|
110 |
+
os.remove("test.wav")
|
111 |
+
|
112 |
+
#wav = tts.tts(text=text)
|
113 |
+
return wav, sampling_rate
|
114 |
+
else:
|
115 |
+
return None
|
116 |
|
117 |
def synth_toucan(text:str, model:str):
|
118 |
'''
|