ButterCream commited on
Commit
9ef0c39
1 Parent(s): 359caa8

add examples

Browse files
Files changed (1) hide show
  1. app.py +42 -12
app.py CHANGED
@@ -51,8 +51,6 @@ INTRO = """
51
  <hr>
52
  """
53
 
54
-
55
-
56
  js_func = """
57
  function refresh() {
58
  const url = new URL(window.location);
@@ -64,8 +62,27 @@ function refresh() {
64
  }
65
  """
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  theme = gr.themes.Soft(
68
- primary_hue=gr.themes.Color(c100="#ffd7d1", c200="#ff593e", c300="#ff593e", c400="#ff593e", c50="#fff0f0", c500="#ff593e", c600="#ea580c", c700="#c2410c", c800="#9a3412", c900="#7c2d12", c950="#6c2e12"),
 
 
69
  secondary_hue="orange",
70
  radius_size=gr.themes.Size(lg="20px", md="8px", sm="6px", xl="30px", xs="4px", xxl="40px", xxs="2px"),
71
  font=[gr.themes.GoogleFont('M PLUS Rounded 1c'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
@@ -183,6 +200,7 @@ def generate(audio_path, ins, speed, alpha, beta, embedding, steps=100):
183
 
184
  return 24000, scaled
185
 
 
186
  if torch.cuda.is_available():
187
  other_tts = tts.StyleTTS2(model_checkpoint_path='./epoch_2nd_00012.pth', config_path="models/config_ft.yml")
188
  else:
@@ -192,17 +210,29 @@ with gr.Blocks(theme=theme, js=js_func) as clone:
192
  gr.HTML(INTRO)
193
  with gr.Row():
194
  with gr.Column(scale=1):
195
- inp = gr.Textbox(label="Text", info="What do you want Vokan to say? -- Longform generation currently produces artifacts in between certain sentences, this will be resolved in the next version", interactive=True)
196
- voice = gr.Audio(label="Voice", interactive=True, type='filepath', max_length=1000, waveform_options={'waveform_progress_color': '#FF593E'})
197
- steps = gr.Slider(minimum=3, maximum=100, value=20, step=1, label="Diffusion Steps", info="Higher produces better results typically", interactive=True)
198
- embscale = gr.Slider(minimum=1, maximum=10, value=2, step=0.1, label="Embedding Scale", info="Defaults to 2 | High scales may produce unexpected results but may produce more emotional texts", interactive=True)
199
- alpha = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.1, label="Alpha", info="Defaults to 0.3 | Resemblance to speakers voice - lower = more similar", interactive=True)
200
- beta = gr.Slider(minimum=0, maximum=1, value=0.7, step=0.1, label="Beta", info="Defaults to 0.7 | Resemblance to speakers prosody - lower = more similar - higher = based on sentence", interactive=True)
201
- speed = gr.Slider(minimum=0.5, maximum=1.5, value=1, step=0.1, label="Speed of speech", info="Defaults to 1", interactive=True)
 
 
 
 
 
 
202
  with gr.Column(scale=1):
203
  clbtn = gr.Button("Synthesize", variant="primary")
204
- claudio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#FF593E'})
205
- clbtn.click(generate, inputs=[voice, inp, speed, alpha, beta, embscale, steps], outputs=[claudio], concurrency_limit=4)
 
 
 
 
 
 
206
 
207
  if __name__ == "__main__":
208
  # demo.queue(api_open=False, max_size=15).launch(show_api=False)
 
51
  <hr>
52
  """
53
 
 
 
54
  js_func = """
55
  function refresh() {
56
  const url = new URL(window.location);
 
62
  }
63
  """
64
 
65
+ examples = [
66
+ ["./Examples/David Attenborough.mp3",
67
+ "An understanding of the natural world is a source of not only great curiosity, but great fulfilment.",
68
+ 1, 0.2, 0.5, 2, 100],
69
+ ["./Examples/Linus Tech Tips.mp3",
70
+ "sometimes I get so in the zone while building a computer it's like an out of body experience.",
71
+ 1, 0.3, 0.8, 2, 100],
72
+ ["./Examples/Melina.mp3",
73
+ "If you intend to claim the Frenzied Flame, I ask that you cease. It is not to be meddled with. It is chaos, "
74
+ "devouring life and thought unending. However ruined this world has become, "
75
+ "however mired in torment and despair, life endures.",
76
+ 1, 0.3, 0.5, 2, 100],
77
+ ["./Examples/Patrick Bateman.mp3",
78
+ "My Pain Is Constant And Sharp, And I Do Not Wish For A Better World For Anyone.",
79
+ 1, 0.3, 0.6, 2, 100]
80
+ ]
81
+
82
  theme = gr.themes.Soft(
83
+ primary_hue=gr.themes.Color(c100="#ffd7d1", c200="#ff593e", c300="#ff593e", c400="#ff593e", c50="#fff0f0",
84
+ c500="#ff593e", c600="#ea580c", c700="#c2410c", c800="#9a3412", c900="#7c2d12",
85
+ c950="#6c2e12"),
86
  secondary_hue="orange",
87
  radius_size=gr.themes.Size(lg="20px", md="8px", sm="6px", xl="30px", xs="4px", xxl="40px", xxs="2px"),
88
  font=[gr.themes.GoogleFont('M PLUS Rounded 1c'), 'ui-sans-serif', 'system-ui', 'sans-serif'],
 
200
 
201
  return 24000, scaled
202
 
203
+
204
  if torch.cuda.is_available():
205
  other_tts = tts.StyleTTS2(model_checkpoint_path='./epoch_2nd_00012.pth', config_path="models/config_ft.yml")
206
  else:
 
210
  gr.HTML(INTRO)
211
  with gr.Row():
212
  with gr.Column(scale=1):
213
+ inp = gr.Textbox(label="Text", info="What do you want Vokan to say?", interactive=True)
214
+ voice = gr.Audio(label="Voice", interactive=True, type='filepath', max_length=300,
215
+ waveform_options={'waveform_progress_color': '#FF593E'})
216
+ steps = gr.Slider(minimum=3, maximum=60, value=20, step=1, label="Diffusion Steps",
217
+ info="Higher produces better results typically", interactive=True)
218
+ embscale = gr.Slider(minimum=1, maximum=10, value=2, step=0.1, label="Embedding Scale",
219
+ info="Defaults to 2 | low scales may produce unexpected results", interactive=True)
220
+ alpha = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.1, label="Alpha", info="Defaults to 0.3",
221
+ interactive=True)
222
+ beta = gr.Slider(minimum=0, maximum=1, value=0.7, step=0.1, label="Beta", info="Defaults to 0.7",
223
+ interactive=True)
224
+ speed = gr.Slider(minimum=0.5, maximum=1.5, value=1, step=0.1, label="Speed of speech",
225
+ info="Defaults to 1", interactive=True)
226
  with gr.Column(scale=1):
227
  clbtn = gr.Button("Synthesize", variant="primary")
228
+ claudio = gr.Audio(interactive=False, label="Synthesized Audio",
229
+ waveform_options={'waveform_progress_color': '#FF593E'})
230
+ clbtn.click(generate, inputs=[voice, inp, speed, alpha, beta, embscale, steps], outputs=[claudio],
231
+ concurrency_limit=4)
232
+
233
+ gr.Examples(examples=examples,
234
+ inputs=[voice, inp, speed, alpha, beta, embscale, steps],
235
+ outputs=[claudio])
236
 
237
  if __name__ == "__main__":
238
  # demo.queue(api_open=False, max_size=15).launch(show_api=False)