Spaces:
Running
on
T4
Running
on
T4
mrfakename
commited on
Commit
•
205284d
1
Parent(s):
19d70cb
Progress bar; readme update
Browse files- README.md +2 -0
- app.py +2 -2
- melo/api.py +8 -5
README.md
CHANGED
@@ -3,6 +3,8 @@ title: MeloTTS
|
|
3 |
colorFrom: blue
|
4 |
colorTo: blue
|
5 |
sdk: gradio
|
|
|
|
|
6 |
app_file: app.py
|
7 |
pinned: false
|
8 |
---
|
|
|
3 |
colorFrom: blue
|
4 |
colorTo: blue
|
5 |
sdk: gradio
|
6 |
+
emoji: 🗣️
|
7 |
+
license: mit
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
app.py
CHANGED
@@ -7,9 +7,9 @@ import tempfile
|
|
7 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
8 |
model = TTS(language='EN', device=device)
|
9 |
speaker_ids = model.hps.data.spk2id
|
10 |
-
def synthesize(speaker, text, speed=1.0):
|
11 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
12 |
-
model.tts_to_file(text, speaker_ids[speaker], f.name, speed=speed)
|
13 |
return f.name
|
14 |
with gr.Blocks() as demo:
|
15 |
gr.Markdown('# MeloTTS\n\nAn unofficial demo of [MeloTTS](https://github.com/myshell-ai/MeloTTS) from MyShell AI. MeloTTS is a permissively licensed (MIT) SOTA multi-speaker TTS model.\n\nI am not affiliated with MyShell AI in any way.\n\nThis demo currently only supports English, but the model itself supports other languages.')
|
|
|
7 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
8 |
model = TTS(language='EN', device=device)
|
9 |
speaker_ids = model.hps.data.spk2id
|
10 |
+
def synthesize(speaker, text, speed=1.0, progress=gr.Progress()):
|
11 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
12 |
+
model.tts_to_file(text, speaker_ids[speaker], f.name, speed=speed, pbar=progress.tqdm)
|
13 |
return f.name
|
14 |
with gr.Blocks() as demo:
|
15 |
gr.Markdown('# MeloTTS\n\nAn unofficial demo of [MeloTTS](https://github.com/myshell-ai/MeloTTS) from MyShell AI. MeloTTS is a permissively licensed (MIT) SOTA multi-speaker TTS model.\n\nI am not affiliated with MyShell AI in any way.\n\nThis demo currently only supports English, but the model itself supports other languages.')
|
melo/api.py
CHANGED
@@ -65,16 +65,19 @@ class TTS(nn.Module):
|
|
65 |
@staticmethod
|
66 |
def split_sentences_into_pieces(text, language):
|
67 |
texts = split_sentence(text, language_str=language)
|
68 |
-
print(" > Text splitted to sentences.")
|
69 |
-
print('\n'.join(texts))
|
70 |
-
print(" > ===========================")
|
71 |
return texts
|
72 |
|
73 |
-
def tts_to_file(self, text, speaker_id, output_path=None, sdp_ratio=0.2, noise_scale=0.6, noise_scale_w=0.8, speed=1.0):
|
74 |
language = self.language
|
75 |
texts = self.split_sentences_into_pieces(text, language)
|
76 |
audio_list = []
|
77 |
-
|
|
|
|
|
|
|
78 |
if language in ['EN', 'ZH_MIX_EN']:
|
79 |
t = re.sub(r'([a-z])([A-Z])', r'\1 \2', t)
|
80 |
device = self.device
|
|
|
65 |
@staticmethod
|
66 |
def split_sentences_into_pieces(text, language):
|
67 |
texts = split_sentence(text, language_str=language)
|
68 |
+
# print(" > Text splitted to sentences.")
|
69 |
+
# print('\n'.join(texts))
|
70 |
+
# print(" > ===========================")
|
71 |
return texts
|
72 |
|
73 |
+
def tts_to_file(self, text, speaker_id, output_path=None, sdp_ratio=0.2, noise_scale=0.6, noise_scale_w=0.8, speed=1.0, pbar=None):
|
74 |
language = self.language
|
75 |
texts = self.split_sentences_into_pieces(text, language)
|
76 |
audio_list = []
|
77 |
+
tx = texts
|
78 |
+
if pbar:
|
79 |
+
tx = pbar(texts)
|
80 |
+
for t in tx:
|
81 |
if language in ['EN', 'ZH_MIX_EN']:
|
82 |
t = re.sub(r'([a-z])([A-Z])', r'\1 \2', t)
|
83 |
device = self.device
|