gTTS / app.py
Nick088's picture
Update app.py
1cdadae verified
raw
history blame
1.8 kB
import gradio as gr
from gtts import gTTS
import io
import os
import librosa
import soundfile as sf
def text_to_speech(text, language_accent, pitch):
lang, tld = language_tld_map[language_accent].split(',')
# create the text-to-speech audio
tts = gTTS(text, lang=lang, tld=tld)
tts.save('gtts.wav')
# Load the audio file
y, sr = librosa.load('gtts.wav')
# Specify the number of semitones to shift
new_y = librosa.effects.pitch_shift(y=y, sr=sr, n_steps=pitch)
sf.write("generated_gtts.wav", new_y, sr)
# return the generated audio
return 'generated_gtts.wav'
language_tld_map = {
"English_Australia": "en,com.au",
"English_United_Kingdom": "en,co.uk",
"English_United_States": "en,com",
"English_Canada": "en,ca",
"English_Nigerian": "en,com.ng",
"English_Ireland": "en,ie",
"English_South Africa": "en,co.za",
"French_Canada": "fr,ca",
"French_France": "fr,fr",
"Mandarin_China_Mainland": "zh-CN,com",
"Mandarin_Taiwan": "zh-TW,com",
"Portuguese_Brazil": "pt,com.br",
"Portuguese_Portugal": "pt,pt",
"Spanish_Mexico": "es,com.mx",
"Spanish_Spain": "es,es",
"Spanish_United_States": "es,com"
}
# create the Gradio interface
iface = gr.Interface(fn=text_to_speech,
inputs=[gr.Textbox(lines=10, label="Enter your text here:"),
gr.Dropdown(choices=list(language_tld_map.keys()), label="Select Language & Accent:", value="English_United_Kingdom", type="value"),
gr.Number(label="Pitch (0 = no variations, negative pitch makes it more masculine, + pitch makes it more feminine):", value=0)],
outputs=[gr.Audio(label="Audio")],
allow_flagging="never")
iface.launch()