anyantudre's picture
Upload 5 files
e41ca58 verified
raw
history blame
1.54 kB
import torch
import scipy
import gradio as gr
from transformers import set_seed, pipeline
from transformers import VitsTokenizer, VitsModel
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset, Audio
import speech_to_text, text_to_speech, translation
language_list = ['mos', 'fra', 'eng']
demo = gr.Blocks()
mms_stt = gr.Interface(
fn=speech_to_text.transcribe,
inputs=[
gr.Audio(sources=["microphone", "upload"], type="filepath"),
gr.Dropdown(language_list, label="Language")
],
outputs="text",
title="Speech-to-text"
)
mms_tts = gr.Interface(
fn=text_to_speech.synthesize_facebook,
inputs=[
gr.Text(label="Input text"),
gr.Dropdown(language_list, label="Language")
],
outputs=[
gr.Audio(label="Generated Audio", type="numpy")
],
title="Text-to-speech"
)
mms_translate = gr.Interface(
fn=translation.translation,
inputs=[
gr.Textbox(label="Text", placeholder="Yaa sõama"),
gr.Dropdown(label="Source Language", choices=["eng_Latn", "fra_Latn", "mos_Latn"]),
gr.Dropdown(label="Target Language", choices=["eng_Latn", "fra_Latn", "mos_Latn"])
],
outputs=["text"],
examples=[["Building a translation demo with Gradio is so easy!", "eng_Latn", "mos_Latn"]],
title="Translation Demo",
)
with demo:
gr.TabbedInterface(
[mms_translate, mms_tts, mms_stt],
["Translation", "Text-to-speech", "Speech-to-text"],
)
demo.launch()