Pendrokar commited on
Commit
83ebf46
โ€ข
1 Parent(s): 10cf936

emotional sliders

Browse files
Files changed (1) hide show
  1. app.py +36 -4
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import sys
3
  import time
4
  import requests
 
5
  from subprocess import Popen, PIPE
6
  import threading
7
  from huggingface_hub import hf_hub_download
@@ -143,7 +144,18 @@ def load_model(voice_model_name):
143
 
144
  return
145
 
146
- def predict(input_text, pacing, voice, lang):
 
 
 
 
 
 
 
 
 
 
 
147
  # grab only the first 1000 characters
148
  input_text = input_text[:1000]
149
 
@@ -159,8 +171,16 @@ def predict(input_text, pacing, voice, lang):
159
  use_sr = 0
160
  use_cleanup = 0
161
 
 
 
 
 
 
 
 
 
162
  data = {
163
- 'pluginsContext': '{}',
164
  'modelType': model_type,
165
  # pad with whitespaces as a workaround to avoid cutoffs
166
  'sequence': input_text.center(len(input_text) + 2, ' '),
@@ -192,6 +212,12 @@ input_textbox = gr.Textbox(
192
  autofocus=True
193
  )
194
  pacing_slider = gr.Slider(0.5, 2.0, value=1.0, step=0.1, label="Duration")
 
 
 
 
 
 
195
  voice_radio = gr.Radio(
196
  voice_models,
197
  value=voice_models[0],
@@ -220,9 +246,15 @@ gradio_app = gr.Interface(
220
  predict,
221
  [
222
  input_textbox,
223
- pacing_slider,
224
  voice_radio,
225
- language_radio
 
 
 
 
 
 
 
226
  ],
227
  outputs=gr.Audio(label="22kHz audio output", type="filepath"),
228
  title="xVASynth (WIP)",
 
2
  import sys
3
  import time
4
  import requests
5
+ import json
6
  from subprocess import Popen, PIPE
7
  import threading
8
  from huggingface_hub import hf_hub_download
 
144
 
145
  return
146
 
147
+ def predict(
148
+ input_text,
149
+ voice,
150
+ lang,
151
+ pacing,
152
+ pitch,
153
+ energy,
154
+ anger,
155
+ happy,
156
+ sad,
157
+ surprise
158
+ ):
159
  # grab only the first 1000 characters
160
  input_text = input_text[:1000]
161
 
 
171
  use_sr = 0
172
  use_cleanup = 0
173
 
174
+ pluginsContext = {}
175
+ pluginsContext["mantella_settings"] = {
176
+ "emAngry": anger if anger > 0 else 0,
177
+ "emHappy": happy if happy > 0 else 0,
178
+ "emSad": sad if sad > 0 else 0,
179
+ "emSurprise": surprise if surprise > 0 else 0
180
+ }
181
+
182
  data = {
183
+ 'pluginsContext': json.dumps(pluginsContext),
184
  'modelType': model_type,
185
  # pad with whitespaces as a workaround to avoid cutoffs
186
  'sequence': input_text.center(len(input_text) + 2, ' '),
 
212
  autofocus=True
213
  )
214
  pacing_slider = gr.Slider(0.5, 2.0, value=1.0, step=0.1, label="Duration")
215
+ pitch_slider = gr.Slider(0, 1.0, value=0.5, step=0.05, label="Pitch", visible=False)
216
+ energy_slider = gr.Slider(0.1, 1.0, value=1.0, step=0.05, label="Energy", visible=False)
217
+ anger_slider = gr.Slider(0, 1.0, value=1.0, step=0.05, label="๐Ÿ˜  Anger")
218
+ happy_slider = gr.Slider(0, 1.0, value=1.0, step=0.05, label="๐Ÿ˜ƒ Happy")
219
+ sad_slider = gr.Slider(0, 1.0, value=1.0, step=0.05, label="๐Ÿ˜ญ Sad")
220
+ surprise_slider = gr.Slider(0, 1.0, value=1.0, step=0.05, label="๐Ÿ˜ฎ Surprise")
221
  voice_radio = gr.Radio(
222
  voice_models,
223
  value=voice_models[0],
 
246
  predict,
247
  [
248
  input_textbox,
 
249
  voice_radio,
250
+ language_radio,
251
+ pacing_slider,
252
+ pitch_slider,
253
+ energy_slider,
254
+ anger_slider,
255
+ happy_slider,
256
+ sad_slider,
257
+ surprise_slider
258
  ],
259
  outputs=gr.Audio(label="22kHz audio output", type="filepath"),
260
  title="xVASynth (WIP)",