BadriNarayanan
commited on
Commit
•
66fc23c
1
Parent(s):
40b77de
Updated
Browse files
app.py
CHANGED
@@ -303,7 +303,7 @@ with gr.Blocks(theme=custom_theme, css=custom_css) as app:
|
|
303 |
)
|
304 |
|
305 |
with gr.Column(scale=1, elem_id="logo-column"):
|
306 |
-
gr.Image("logo/logo.
|
307 |
|
308 |
with gr.Row():
|
309 |
with gr.Column(scale=1):
|
@@ -382,165 +382,5 @@ with gr.Blocks(theme=custom_theme, css=custom_css) as app:
|
|
382 |
"""
|
383 |
)
|
384 |
|
385 |
-
# Text input for the prompt
|
386 |
-
gen_text_input_emotional = gr.Textbox(label="Text to Generate", lines=10)
|
387 |
-
|
388 |
-
# Model choice
|
389 |
-
model_choice_emotional = gr.Radio(
|
390 |
-
choices=["F5-TTS", "E2-TTS"], label="Choose TTS Model", value="F5-TTS"
|
391 |
-
)
|
392 |
-
|
393 |
-
with gr.Accordion("Advanced Settings", open=False):
|
394 |
-
remove_silence_emotional = gr.Checkbox(
|
395 |
-
label="Remove Silences",
|
396 |
-
value=True,
|
397 |
-
)
|
398 |
-
|
399 |
-
# Generate button
|
400 |
-
generate_emotional_btn = gr.Button("Generate Emotional Speech", variant="primary")
|
401 |
-
|
402 |
-
# Output audio
|
403 |
-
audio_output_emotional = gr.Audio(label="Synthesized Audio")
|
404 |
-
@gpu_decorator
|
405 |
-
def generate_emotional_speech(
|
406 |
-
regular_audio,
|
407 |
-
regular_ref_text,
|
408 |
-
gen_text,
|
409 |
-
*args,
|
410 |
-
):
|
411 |
-
num_additional_speech_types = max_speech_types - 1
|
412 |
-
speech_type_names_list = args[:num_additional_speech_types]
|
413 |
-
speech_type_audios_list = args[num_additional_speech_types:2 * num_additional_speech_types]
|
414 |
-
speech_type_ref_texts_list = args[2 * num_additional_speech_types:3 * num_additional_speech_types]
|
415 |
-
model_choice = args[3 * num_additional_speech_types]
|
416 |
-
remove_silence = args[3 * num_additional_speech_types + 1]
|
417 |
-
|
418 |
-
# Collect the speech types and their audios into a dict
|
419 |
-
speech_types = {'Regular': {'audio': regular_audio, 'ref_text': regular_ref_text}}
|
420 |
-
|
421 |
-
for name_input, audio_input, ref_text_input in zip(speech_type_names_list, speech_type_audios_list, speech_type_ref_texts_list):
|
422 |
-
if name_input and audio_input:
|
423 |
-
speech_types[name_input] = {'audio': audio_input, 'ref_text': ref_text_input}
|
424 |
-
|
425 |
-
# Parse the gen_text into segments
|
426 |
-
segments = parse_speechtypes_text(gen_text)
|
427 |
-
|
428 |
-
# For each segment, generate speech
|
429 |
-
generated_audio_segments = []
|
430 |
-
current_emotion = 'Regular'
|
431 |
-
|
432 |
-
for segment in segments:
|
433 |
-
emotion = segment['emotion']
|
434 |
-
text = segment['text']
|
435 |
-
|
436 |
-
if emotion in speech_types:
|
437 |
-
current_emotion = emotion
|
438 |
-
else:
|
439 |
-
# If emotion not available, default to Regular
|
440 |
-
current_emotion = 'Regular'
|
441 |
-
|
442 |
-
ref_audio = speech_types[current_emotion]['audio']
|
443 |
-
ref_text = speech_types[current_emotion].get('ref_text', '')
|
444 |
-
|
445 |
-
# Generate speech for this segment
|
446 |
-
audio, _ = infer(ref_audio, ref_text, text, model_choice, remove_silence, 0)
|
447 |
-
sr, audio_data = audio
|
448 |
-
|
449 |
-
generated_audio_segments.append(audio_data)
|
450 |
-
|
451 |
-
# Concatenate all audio segments
|
452 |
-
if generated_audio_segments:
|
453 |
-
final_audio_data = np.concatenate(generated_audio_segments)
|
454 |
-
return (sr, final_audio_data)
|
455 |
-
else:
|
456 |
-
gr.Warning("No audio generated.")
|
457 |
-
return None
|
458 |
-
|
459 |
-
generate_emotional_btn.click(
|
460 |
-
generate_emotional_speech,
|
461 |
-
inputs=[
|
462 |
-
regular_audio,
|
463 |
-
regular_ref_text,
|
464 |
-
gen_text_input_emotional,
|
465 |
-
] + speech_type_names + speech_type_audios + speech_type_ref_texts + [
|
466 |
-
model_choice_emotional,
|
467 |
-
remove_silence_emotional,
|
468 |
-
],
|
469 |
-
outputs=audio_output_emotional,
|
470 |
-
)
|
471 |
-
|
472 |
-
# Validation function to disable Generate button if speech types are missing
|
473 |
-
def validate_speech_types(
|
474 |
-
gen_text,
|
475 |
-
regular_name,
|
476 |
-
*args
|
477 |
-
):
|
478 |
-
num_additional_speech_types = max_speech_types - 1
|
479 |
-
speech_type_names_list = args[:num_additional_speech_types]
|
480 |
-
|
481 |
-
# Collect the speech types names
|
482 |
-
speech_types_available = set()
|
483 |
-
if regular_name:
|
484 |
-
speech_types_available.add(regular_name)
|
485 |
-
for name_input in speech_type_names_list:
|
486 |
-
if name_input:
|
487 |
-
speech_types_available.add(name_input)
|
488 |
-
|
489 |
-
# Parse the gen_text to get the speech types used
|
490 |
-
segments = parse_emotional_text(gen_text)
|
491 |
-
speech_types_in_text = set(segment['emotion'] for segment in segments)
|
492 |
-
|
493 |
-
# Check if all speech types in text are available
|
494 |
-
missing_speech_types = speech_types_in_text - speech_types_available
|
495 |
-
|
496 |
-
if missing_speech_types:
|
497 |
-
# Disable the generate button
|
498 |
-
return gr.update(interactive=False)
|
499 |
-
else:
|
500 |
-
# Enable the generate button
|
501 |
-
return gr.update(interactive=True)
|
502 |
-
|
503 |
-
gen_text_input_emotional.change(
|
504 |
-
validate_speech_types,
|
505 |
-
inputs=[gen_text_input_emotional, regular_name] + speech_type_names,
|
506 |
-
outputs=generate_emotional_btn
|
507 |
-
)
|
508 |
-
with gr.Blocks() as app:
|
509 |
-
gr.Markdown(
|
510 |
-
"""
|
511 |
-
# Antriksh AI
|
512 |
-
"""
|
513 |
-
)
|
514 |
-
|
515 |
-
# Add the image here
|
516 |
-
gr.Image(
|
517 |
-
value="logo.jpg",
|
518 |
-
label="AI System Logo",
|
519 |
-
show_label=False,
|
520 |
-
width=300,
|
521 |
-
height=150
|
522 |
-
)
|
523 |
-
|
524 |
-
gr.TabbedInterface([app_tts, app_podcast, app_emotional, app_credits], ["TTS", "Podcast", "Multi-Style", "Credits"])
|
525 |
-
|
526 |
-
|
527 |
-
@click.command()
|
528 |
-
@click.option("--port", "-p", default=None, type=int, help="Port to run the app on")
|
529 |
-
@click.option("--host", "-H", default=None, help="Host to run the app on")
|
530 |
-
@click.option(
|
531 |
-
"--share",
|
532 |
-
"-s",
|
533 |
-
default=False,
|
534 |
-
is_flag=True,
|
535 |
-
help="Share the app via Gradio share link",
|
536 |
-
)
|
537 |
-
@click.option("--api", "-a", default=True, is_flag=True, help="Allow API access")
|
538 |
-
def main(port, host, share, api):
|
539 |
-
global app
|
540 |
-
print(f"Starting app...")
|
541 |
-
app.queue(api_open=api).launch(
|
542 |
-
server_name=host, server_port=port, share=share, show_api=api
|
543 |
-
)
|
544 |
-
|
545 |
if __name__ == "__main__":
|
546 |
app.launch(share=True)
|
|
|
303 |
)
|
304 |
|
305 |
with gr.Column(scale=1, elem_id="logo-column"):
|
306 |
+
gr.Image("logo/logo-removebg-preview.png", label="", show_label=False)
|
307 |
|
308 |
with gr.Row():
|
309 |
with gr.Column(scale=1):
|
|
|
382 |
"""
|
383 |
)
|
384 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
385 |
if __name__ == "__main__":
|
386 |
app.launch(share=True)
|