Pendrokar commited on
Commit
a1d10f4
1 Parent(s): 410e787

country flags; default text in lang

Browse files
Files changed (1) hide show
  1. app.py +76 -32
app.py CHANGED
@@ -28,36 +28,67 @@ voice_models = [
28
  current_voice_model = None
29
 
30
  languages = [
31
- "en",
32
- "de",
33
- "es",
34
- "it",
35
- "fr",
36
- "ru",
37
- "tr",
38
- "la",
39
- "ro",
40
- "da",
41
- "vi",
42
- "ha",
43
- "nl",
44
- "zh",
45
- "ar",
46
- "uk",
47
- "hi",
48
- "ko",
49
- "pl",
50
- "sw",
51
- "fi",
52
- "hu",
53
- "pt",
54
- "yo",
55
- "sv",
56
- "el",
57
- "wo",
58
- "jp"
59
  ]
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def run_xvaserver():
62
  # start the process without waiting for a response
63
  print('Running xVAServer subprocess...\n')
@@ -154,23 +185,35 @@ def predict(input_text, pacing, voice, lang):
154
 
155
  input_textbox = gr.Textbox(
156
  label="Input Text",
 
157
  lines=1,
158
  max_lines=5,
159
  autofocus=True
160
  )
161
- pacing_slider = gr.Slider(0.5, 2.0, value=1.0, step=0.1, label="Pacing")
162
  voice_radio = gr.Radio(
163
  voice_models,
164
  value=voice_models[0],
165
  label="Voice",
166
  info="NVIDIA HIFI CC-BY-4.0 xVAPitch/v3 xVASynth model"
167
  )
 
 
 
 
 
 
 
 
 
 
168
  language_radio = gr.Radio(
169
  languages,
170
  value="en",
171
  label="Language",
172
- info="Will have an English accent as the models were English. Tested only by a native Briton."
173
  )
 
174
 
175
  gradio_app = gr.Interface(
176
  predict,
@@ -180,8 +223,9 @@ gradio_app = gr.Interface(
180
  voice_radio,
181
  language_radio
182
  ],
183
- outputs=gr.Audio(label="22kHz audio", type="filepath"),
184
- title="xVASynth (WIP)"
 
185
  # examples=[
186
  # ["Once, I headed in much deeper. But I doubt I'll ever do that again.", 1],
187
  # ["You love hurting me, huh?", 1.5],
 
28
  current_voice_model = None
29
 
30
  languages = [
31
+ ("🇬🇧 EN", "en"),
32
+ ("🇩🇪 DE", "de"),
33
+ ("🇪🇸 ES", "es"),
34
+ ("🇮🇹 IT", "it"),
35
+ ("🇫🇷 FR", "fr"),
36
+ ("🇷🇺 RU", "ru"),
37
+ ("🇹🇷 TR", "tr"),
38
+ ("🇻🇦 LA", "la"),
39
+ ("🇷🇴 RO", "ro"),
40
+ ("🇩🇰 DA", "da"),
41
+ ("🇻🇳 VI", "vi"),
42
+ ("🇳🇬 HA", "ha"),
43
+ ("🇳🇱 NL", "nl"),
44
+ ("🇨🇳 ZH", "zh"),
45
+ ("🇸🇦 AR", "ar"),
46
+ ("🇺🇦 UK", "uk"),
47
+ ("🇮🇳 HI", "hi"),
48
+ ("🇰🇷 KO", "ko"),
49
+ ("🇵🇱 PL", "pl"),
50
+ ("🇸🇪 SW", "sw"),
51
+ ("🇫🇮 FI", "fi"),
52
+ ("🇭🇺 HU", "hu"),
53
+ ("🇵🇹 PT", "pt"),
54
+ ("🇳🇬 YO", "yo"),
55
+ ("🇸🇪 SV", "sv"),
56
+ ("🇬🇷 EL", "el"),
57
+ ("🇸🇳 WO", "wo"),
58
+ ("🇯🇵 JP", "jp"),
59
  ]
60
 
61
+ default_text = {
62
+ "en": "This is what my voice sounds like.",
63
+ "de": "So klingt meine Stimme.",
64
+ "es": "Así suena mi voz.",
65
+ "it": "Così suona la mia voce.",
66
+ "fr": "Voici à quoi ressemble ma voix.",
67
+ "ru": "Вот как звучит мой голос.",
68
+ "tr": "Benim sesimin sesi böyle.",
69
+ "la": "Haec est vox mea sonans.",
70
+ "ro": "Așa sună vocea mea.",
71
+ "da": "Sådan lyder min stemme.",
72
+ "vi": "Đây là giọng nói của tôi.",
73
+ "ha": "Wannan ne muryata ke.",
74
+ "nl": "Dit is hoe mijn stem klinkt.",
75
+ "zh": "这是我的声音。",
76
+ "ar": "هذا هو صوتي.",
77
+ "uk": "Ось як звучить мій голос.",
78
+ "hi": "यह मेरी आवाज़ कैसी लगती है।",
79
+ "ko": "여기 제 목소리가 어떤지 들어보세요.",
80
+ "pl": "Tak brzmi mój głos.",
81
+ "sw": "Sauti yangu inasikika hivi.",
82
+ "fi": "Näin ääneni kuulostaa.",
83
+ "hu": "Így hangzik a hangom.",
84
+ "pt": "É assim que minha voz soa.",
85
+ "yo": "Ìyí ni ohùn mi ńlá.",
86
+ "sv": "Såhär låter min röst.",
87
+ "el": "Έτσι ακούγεται η φωνή μου.",
88
+ "wo": "Ndox li neen xewnaal ma.",
89
+ "jp": "これが私の声です。",
90
+ }
91
+
92
  def run_xvaserver():
93
  # start the process without waiting for a response
94
  print('Running xVAServer subprocess...\n')
 
185
 
186
  input_textbox = gr.Textbox(
187
  label="Input Text",
188
+ value="This is what my voice sounds like.",
189
  lines=1,
190
  max_lines=5,
191
  autofocus=True
192
  )
193
+ pacing_slider = gr.Slider(0.5, 2.0, value=1.0, step=0.1, label="Duration")
194
  voice_radio = gr.Radio(
195
  voice_models,
196
  value=voice_models[0],
197
  label="Voice",
198
  info="NVIDIA HIFI CC-BY-4.0 xVAPitch/v3 xVASynth model"
199
  )
200
+
201
+ def set_default_text(lang):
202
+ input_textbox = gr.Textbox(
203
+ label="Input Text",
204
+ value=default_text[lang],
205
+ lines=1,
206
+ max_lines=5,
207
+ autofocus=True
208
+ )
209
+
210
  language_radio = gr.Radio(
211
  languages,
212
  value="en",
213
  label="Language",
214
+ info="Will be more monotone and have an English accent. Tested mostly by a native Briton."
215
  )
216
+ language_radio.change(set_default_text)
217
 
218
  gradio_app = gr.Interface(
219
  predict,
 
223
  voice_radio,
224
  language_radio
225
  ],
226
+ outputs=gr.Audio(label="22kHz audio output", type="filepath"),
227
+ title="xVASynth (WIP)",
228
+ clear_btn=None
229
  # examples=[
230
  # ["Once, I headed in much deeper. But I doubt I'll ever do that again.", 1],
231
  # ["You love hurting me, huh?", 1.5],