Spaces:
Running
Running
Added Microsoft Edge TTS HF Space; model basenames
Browse files
app.py
CHANGED
@@ -63,6 +63,9 @@ AVAILABLE_MODELS = {
|
|
63 |
'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29.0 4.42.0
|
64 |
'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29.0 4.42.0
|
65 |
|
|
|
|
|
|
|
66 |
# TTS w issues
|
67 |
# 'PolyAI/pheme': '/predict#0', # sleepy HF Space
|
68 |
# 'amphion/Text-to-Speech': '/predict#0', # old running space, takes a whole minute to synthesize
|
@@ -77,63 +80,63 @@ AVAILABLE_MODELS = {
|
|
77 |
HF_SPACES = {
|
78 |
# XTTS v2
|
79 |
'coqui/xtts': {
|
80 |
-
'name': '
|
81 |
'function': '1',
|
82 |
'text_param_index': 0,
|
83 |
'return_audio_index': 1,
|
84 |
},
|
85 |
# WhisperSpeech
|
86 |
'collabora/WhisperSpeech': {
|
87 |
-
'name': '
|
88 |
'function': '/whisper_speech_demo',
|
89 |
'text_param_index': 0,
|
90 |
'return_audio_index': 0,
|
91 |
},
|
92 |
# OpenVoice (MyShell.ai)
|
93 |
'myshell-ai/OpenVoice': {
|
94 |
-
'name':'
|
95 |
'function': '1',
|
96 |
'text_param_index': 0,
|
97 |
'return_audio_index': 1,
|
98 |
},
|
99 |
# OpenVoice v2 (MyShell.ai)
|
100 |
'myshell-ai/OpenVoiceV2': {
|
101 |
-
'name':'
|
102 |
'function': '1',
|
103 |
'text_param_index': 0,
|
104 |
'return_audio_index': 1,
|
105 |
},
|
106 |
# MetaVoice
|
107 |
'mrfakename/MetaVoice-1B-v0.1': {
|
108 |
-
'name':'
|
109 |
'function': '/tts',
|
110 |
'text_param_index': 0,
|
111 |
'return_audio_index': 0,
|
112 |
},
|
113 |
# xVASynth (CPU)
|
114 |
'Pendrokar/xVASynth': {
|
115 |
-
'name': '
|
116 |
'function': '/predict',
|
117 |
'text_param_index': 0,
|
118 |
'return_audio_index': 0,
|
119 |
},
|
120 |
# CoquiTTS (CPU)
|
121 |
'coqui/CoquiTTS': {
|
122 |
-
'name': '
|
123 |
'function': '0',
|
124 |
'text_param_index': 0,
|
125 |
'return_audio_index': 0,
|
126 |
},
|
127 |
# HierSpeech_TTS
|
128 |
'LeeSangHoon/HierSpeech_TTS': {
|
129 |
-
'name': '
|
130 |
'function': '/predict',
|
131 |
'text_param_index': 0,
|
132 |
'return_audio_index': 0,
|
133 |
},
|
134 |
# MeloTTS (MyShell.ai)
|
135 |
'mrfakename/MeloTTS': {
|
136 |
-
'name': '
|
137 |
'function': '/synthesize',
|
138 |
'text_param_index': 0,
|
139 |
'return_audio_index': 0,
|
@@ -141,26 +144,33 @@ HF_SPACES = {
|
|
141 |
|
142 |
# Parler
|
143 |
'parler-tts/parler_tts': {
|
144 |
-
'name': '
|
145 |
'function': '/gen_tts',
|
146 |
'text_param_index': 0,
|
147 |
'return_audio_index': 0,
|
148 |
},
|
149 |
# Parler
|
150 |
'parler-tts/parler_tts_mini': {
|
151 |
-
'name': '
|
152 |
'function': '/gen_tts',
|
153 |
'text_param_index': 0,
|
154 |
'return_audio_index': 0,
|
155 |
},
|
156 |
# Parler, using Expresso dataset
|
157 |
'parler-tts/parler-tts-expresso': {
|
158 |
-
'name': '
|
159 |
'function': '/gen_tts',
|
160 |
'text_param_index': 0,
|
161 |
'return_audio_index': 0,
|
162 |
},
|
163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
|
165 |
# TTS w issues
|
166 |
# 'PolyAI/pheme': '/predict#0', #sleepy HF Space
|
@@ -239,6 +249,11 @@ OVERRIDE_INPUTS = {
|
|
239 |
'parler-tts/parler-tts-expresso': {
|
240 |
1: 'Elisabeth. Elisabeth\'s clear sharp voice.', # description/prompt
|
241 |
},
|
|
|
|
|
|
|
|
|
|
|
242 |
}
|
243 |
|
244 |
hf_clients = {}
|
@@ -579,12 +594,24 @@ def get_leaderboard(reveal_prelim = False):
|
|
579 |
def make_link_to_space(model_name):
|
580 |
# create a anchor link if a HF space
|
581 |
style = 'text-decoration: underline;text-decoration-style: dotted;'
|
|
|
|
|
|
|
|
|
|
|
582 |
if model_name in AVAILABLE_MODELS:
|
583 |
style += 'color: var(--link-text-color);'
|
|
|
584 |
else:
|
585 |
style += 'font-style: italic;'
|
|
|
|
|
|
|
|
|
|
|
|
|
586 |
if '/' in model_name:
|
587 |
-
return '🤗 <a style="'
|
588 |
|
589 |
# otherwise just return the model name
|
590 |
return model_name
|
@@ -817,7 +844,6 @@ def synthandreturn(text):
|
|
817 |
log_text(text)
|
818 |
print("[debug] Using", mdl1, mdl2)
|
819 |
def predict_and_update_result(text, model, result_storage):
|
820 |
-
print(model)
|
821 |
# 3 attempts
|
822 |
attempt_count = 0
|
823 |
while attempt_count < 3:
|
@@ -829,7 +855,7 @@ def synthandreturn(text):
|
|
829 |
hf_clients[model] = Client(model, hf_token=hf_token)
|
830 |
mdl_space = hf_clients[model]
|
831 |
|
832 |
-
print(f"{model}: Fetching endpoints of HF Space")
|
833 |
# assume the index is one of the first 9 return params
|
834 |
return_audio_index = int(HF_SPACES[model]['return_audio_index'])
|
835 |
endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
|
|
63 |
'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29.0 4.42.0
|
64 |
'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29.0 4.42.0
|
65 |
|
66 |
+
# Microsoft Edge TTS
|
67 |
+
'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech',
|
68 |
+
|
69 |
# TTS w issues
|
70 |
# 'PolyAI/pheme': '/predict#0', # sleepy HF Space
|
71 |
# 'amphion/Text-to-Speech': '/predict#0', # old running space, takes a whole minute to synthesize
|
|
|
80 |
HF_SPACES = {
|
81 |
# XTTS v2
|
82 |
'coqui/xtts': {
|
83 |
+
'name': 'XTTS v2',
|
84 |
'function': '1',
|
85 |
'text_param_index': 0,
|
86 |
'return_audio_index': 1,
|
87 |
},
|
88 |
# WhisperSpeech
|
89 |
'collabora/WhisperSpeech': {
|
90 |
+
'name': 'WhisperSpeech',
|
91 |
'function': '/whisper_speech_demo',
|
92 |
'text_param_index': 0,
|
93 |
'return_audio_index': 0,
|
94 |
},
|
95 |
# OpenVoice (MyShell.ai)
|
96 |
'myshell-ai/OpenVoice': {
|
97 |
+
'name':'OpenVoice',
|
98 |
'function': '1',
|
99 |
'text_param_index': 0,
|
100 |
'return_audio_index': 1,
|
101 |
},
|
102 |
# OpenVoice v2 (MyShell.ai)
|
103 |
'myshell-ai/OpenVoiceV2': {
|
104 |
+
'name':'OpenVoice v2',
|
105 |
'function': '1',
|
106 |
'text_param_index': 0,
|
107 |
'return_audio_index': 1,
|
108 |
},
|
109 |
# MetaVoice
|
110 |
'mrfakename/MetaVoice-1B-v0.1': {
|
111 |
+
'name':'MetaVoice',
|
112 |
'function': '/tts',
|
113 |
'text_param_index': 0,
|
114 |
'return_audio_index': 0,
|
115 |
},
|
116 |
# xVASynth (CPU)
|
117 |
'Pendrokar/xVASynth': {
|
118 |
+
'name': 'xVASynth v3',
|
119 |
'function': '/predict',
|
120 |
'text_param_index': 0,
|
121 |
'return_audio_index': 0,
|
122 |
},
|
123 |
# CoquiTTS (CPU)
|
124 |
'coqui/CoquiTTS': {
|
125 |
+
'name': 'CoquiTTS',
|
126 |
'function': '0',
|
127 |
'text_param_index': 0,
|
128 |
'return_audio_index': 0,
|
129 |
},
|
130 |
# HierSpeech_TTS
|
131 |
'LeeSangHoon/HierSpeech_TTS': {
|
132 |
+
'name': 'HierSpeech++',
|
133 |
'function': '/predict',
|
134 |
'text_param_index': 0,
|
135 |
'return_audio_index': 0,
|
136 |
},
|
137 |
# MeloTTS (MyShell.ai)
|
138 |
'mrfakename/MeloTTS': {
|
139 |
+
'name': 'MeloTTS',
|
140 |
'function': '/synthesize',
|
141 |
'text_param_index': 0,
|
142 |
'return_audio_index': 0,
|
|
|
144 |
|
145 |
# Parler
|
146 |
'parler-tts/parler_tts': {
|
147 |
+
'name': 'Parler',
|
148 |
'function': '/gen_tts',
|
149 |
'text_param_index': 0,
|
150 |
'return_audio_index': 0,
|
151 |
},
|
152 |
# Parler
|
153 |
'parler-tts/parler_tts_mini': {
|
154 |
+
'name': 'Parler Mini',
|
155 |
'function': '/gen_tts',
|
156 |
'text_param_index': 0,
|
157 |
'return_audio_index': 0,
|
158 |
},
|
159 |
# Parler, using Expresso dataset
|
160 |
'parler-tts/parler-tts-expresso': {
|
161 |
+
'name': 'Parler Expresso',
|
162 |
'function': '/gen_tts',
|
163 |
'text_param_index': 0,
|
164 |
'return_audio_index': 0,
|
165 |
},
|
166 |
|
167 |
+
# Microsoft Edge TTS
|
168 |
+
'innoai/Edge-TTS-Text-to-Speech': {
|
169 |
+
'name': 'Edge TTS',
|
170 |
+
'function': '/predict',
|
171 |
+
'text_param_index': 0,
|
172 |
+
'return_audio_index': 0,
|
173 |
+
},
|
174 |
|
175 |
# TTS w issues
|
176 |
# 'PolyAI/pheme': '/predict#0', #sleepy HF Space
|
|
|
249 |
'parler-tts/parler-tts-expresso': {
|
250 |
1: 'Elisabeth. Elisabeth\'s clear sharp voice.', # description/prompt
|
251 |
},
|
252 |
+
'innoai/Edge-TTS-Text-to-Speech': {
|
253 |
+
1: 'en-US-EmmaMultilingualNeural - en-US (Female)', # voice
|
254 |
+
2: 0, # pace rate
|
255 |
+
3: 0, # pitch
|
256 |
+
},
|
257 |
}
|
258 |
|
259 |
hf_clients = {}
|
|
|
594 |
def make_link_to_space(model_name):
|
595 |
# create a anchor link if a HF space
|
596 |
style = 'text-decoration: underline;text-decoration-style: dotted;'
|
597 |
+
title = ''
|
598 |
+
|
599 |
+
# bolden actual name
|
600 |
+
# model_name_split = model_name.split('/')
|
601 |
+
# model_name_split = model_name_split[:-1].join('/') +'/<strong>'+ model_name_split[-1] +'</strong>'
|
602 |
if model_name in AVAILABLE_MODELS:
|
603 |
style += 'color: var(--link-text-color);'
|
604 |
+
title = model_name
|
605 |
else:
|
606 |
style += 'font-style: italic;'
|
607 |
+
title = 'Disabled for Arena'
|
608 |
+
|
609 |
+
model_basename = model_name
|
610 |
+
if model_name in HF_SPACES:
|
611 |
+
model_basename = HF_SPACES[model_name]['name']
|
612 |
+
|
613 |
if '/' in model_name:
|
614 |
+
return '🤗 <a style="'+ style +'" title="'+ title +'" href="'+ 'https://huggingface.co/spaces/'+ model_name +'">'+ model_basename +'</a>'
|
615 |
|
616 |
# otherwise just return the model name
|
617 |
return model_name
|
|
|
844 |
log_text(text)
|
845 |
print("[debug] Using", mdl1, mdl2)
|
846 |
def predict_and_update_result(text, model, result_storage):
|
|
|
847 |
# 3 attempts
|
848 |
attempt_count = 0
|
849 |
while attempt_count < 3:
|
|
|
855 |
hf_clients[model] = Client(model, hf_token=hf_token)
|
856 |
mdl_space = hf_clients[model]
|
857 |
|
858 |
+
# print(f"{model}: Fetching endpoints of HF Space")
|
859 |
# assume the index is one of the first 9 return params
|
860 |
return_audio_index = int(HF_SPACES[model]['return_audio_index'])
|
861 |
endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')
|