BadriNarayanan commited on
Commit
66fc23c
1 Parent(s): 40b77de
Files changed (1) hide show
  1. app.py +1 -161
app.py CHANGED
@@ -303,7 +303,7 @@ with gr.Blocks(theme=custom_theme, css=custom_css) as app:
303
  )
304
 
305
  with gr.Column(scale=1, elem_id="logo-column"):
306
- gr.Image("logo/logo.jpg", label="", show_label=False)
307
 
308
  with gr.Row():
309
  with gr.Column(scale=1):
@@ -382,165 +382,5 @@ with gr.Blocks(theme=custom_theme, css=custom_css) as app:
382
  """
383
  )
384
 
385
- # Text input for the prompt
386
- gen_text_input_emotional = gr.Textbox(label="Text to Generate", lines=10)
387
-
388
- # Model choice
389
- model_choice_emotional = gr.Radio(
390
- choices=["F5-TTS", "E2-TTS"], label="Choose TTS Model", value="F5-TTS"
391
- )
392
-
393
- with gr.Accordion("Advanced Settings", open=False):
394
- remove_silence_emotional = gr.Checkbox(
395
- label="Remove Silences",
396
- value=True,
397
- )
398
-
399
- # Generate button
400
- generate_emotional_btn = gr.Button("Generate Emotional Speech", variant="primary")
401
-
402
- # Output audio
403
- audio_output_emotional = gr.Audio(label="Synthesized Audio")
404
- @gpu_decorator
405
- def generate_emotional_speech(
406
- regular_audio,
407
- regular_ref_text,
408
- gen_text,
409
- *args,
410
- ):
411
- num_additional_speech_types = max_speech_types - 1
412
- speech_type_names_list = args[:num_additional_speech_types]
413
- speech_type_audios_list = args[num_additional_speech_types:2 * num_additional_speech_types]
414
- speech_type_ref_texts_list = args[2 * num_additional_speech_types:3 * num_additional_speech_types]
415
- model_choice = args[3 * num_additional_speech_types]
416
- remove_silence = args[3 * num_additional_speech_types + 1]
417
-
418
- # Collect the speech types and their audios into a dict
419
- speech_types = {'Regular': {'audio': regular_audio, 'ref_text': regular_ref_text}}
420
-
421
- for name_input, audio_input, ref_text_input in zip(speech_type_names_list, speech_type_audios_list, speech_type_ref_texts_list):
422
- if name_input and audio_input:
423
- speech_types[name_input] = {'audio': audio_input, 'ref_text': ref_text_input}
424
-
425
- # Parse the gen_text into segments
426
- segments = parse_speechtypes_text(gen_text)
427
-
428
- # For each segment, generate speech
429
- generated_audio_segments = []
430
- current_emotion = 'Regular'
431
-
432
- for segment in segments:
433
- emotion = segment['emotion']
434
- text = segment['text']
435
-
436
- if emotion in speech_types:
437
- current_emotion = emotion
438
- else:
439
- # If emotion not available, default to Regular
440
- current_emotion = 'Regular'
441
-
442
- ref_audio = speech_types[current_emotion]['audio']
443
- ref_text = speech_types[current_emotion].get('ref_text', '')
444
-
445
- # Generate speech for this segment
446
- audio, _ = infer(ref_audio, ref_text, text, model_choice, remove_silence, 0)
447
- sr, audio_data = audio
448
-
449
- generated_audio_segments.append(audio_data)
450
-
451
- # Concatenate all audio segments
452
- if generated_audio_segments:
453
- final_audio_data = np.concatenate(generated_audio_segments)
454
- return (sr, final_audio_data)
455
- else:
456
- gr.Warning("No audio generated.")
457
- return None
458
-
459
- generate_emotional_btn.click(
460
- generate_emotional_speech,
461
- inputs=[
462
- regular_audio,
463
- regular_ref_text,
464
- gen_text_input_emotional,
465
- ] + speech_type_names + speech_type_audios + speech_type_ref_texts + [
466
- model_choice_emotional,
467
- remove_silence_emotional,
468
- ],
469
- outputs=audio_output_emotional,
470
- )
471
-
472
- # Validation function to disable Generate button if speech types are missing
473
- def validate_speech_types(
474
- gen_text,
475
- regular_name,
476
- *args
477
- ):
478
- num_additional_speech_types = max_speech_types - 1
479
- speech_type_names_list = args[:num_additional_speech_types]
480
-
481
- # Collect the speech types names
482
- speech_types_available = set()
483
- if regular_name:
484
- speech_types_available.add(regular_name)
485
- for name_input in speech_type_names_list:
486
- if name_input:
487
- speech_types_available.add(name_input)
488
-
489
- # Parse the gen_text to get the speech types used
490
- segments = parse_emotional_text(gen_text)
491
- speech_types_in_text = set(segment['emotion'] for segment in segments)
492
-
493
- # Check if all speech types in text are available
494
- missing_speech_types = speech_types_in_text - speech_types_available
495
-
496
- if missing_speech_types:
497
- # Disable the generate button
498
- return gr.update(interactive=False)
499
- else:
500
- # Enable the generate button
501
- return gr.update(interactive=True)
502
-
503
- gen_text_input_emotional.change(
504
- validate_speech_types,
505
- inputs=[gen_text_input_emotional, regular_name] + speech_type_names,
506
- outputs=generate_emotional_btn
507
- )
508
- with gr.Blocks() as app:
509
- gr.Markdown(
510
- """
511
- # Antriksh AI
512
- """
513
- )
514
-
515
- # Add the image here
516
- gr.Image(
517
- value="logo.jpg",
518
- label="AI System Logo",
519
- show_label=False,
520
- width=300,
521
- height=150
522
- )
523
-
524
- gr.TabbedInterface([app_tts, app_podcast, app_emotional, app_credits], ["TTS", "Podcast", "Multi-Style", "Credits"])
525
-
526
-
527
- @click.command()
528
- @click.option("--port", "-p", default=None, type=int, help="Port to run the app on")
529
- @click.option("--host", "-H", default=None, help="Host to run the app on")
530
- @click.option(
531
- "--share",
532
- "-s",
533
- default=False,
534
- is_flag=True,
535
- help="Share the app via Gradio share link",
536
- )
537
- @click.option("--api", "-a", default=True, is_flag=True, help="Allow API access")
538
- def main(port, host, share, api):
539
- global app
540
- print(f"Starting app...")
541
- app.queue(api_open=api).launch(
542
- server_name=host, server_port=port, share=share, show_api=api
543
- )
544
-
545
  if __name__ == "__main__":
546
  app.launch(share=True)
 
303
  )
304
 
305
  with gr.Column(scale=1, elem_id="logo-column"):
306
+ gr.Image("logo/logo-removebg-preview.png", label="", show_label=False)
307
 
308
  with gr.Row():
309
  with gr.Column(scale=1):
 
382
  """
383
  )
384
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  if __name__ == "__main__":
386
  app.launch(share=True)