#!/usr/bin/python3 # -*- coding: utf-8 -*- import argparse import gradio as gr from examples import examples from models import model_map from project_settings import project_path def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--examples_dir", default=(project_path / "data/examples").as_posix(), type=str ) parser.add_argument( "--trained_model_dir", default=(project_path / "trained_models").as_posix(), type=str ) args = parser.parse_args() return args def update_model_dropdown(language: str): if language not in model_map.keys(): raise ValueError(f"Unsupported language: {language}") choices = model_map[language] choices = [c["repo_id"] for c in choices] return gr.Dropdown( choices=choices, value=choices[0], interactive=True, ) def build_html_output(s: str, style: str = "result_item_success"): return f"""
{s}
""" def process_uploaded_file(language: str, repo_id: str, decoding_method: str, num_active_paths: int, add_punctuation: str, in_filename: str, ): return "Dummy", build_html_output("Dummy") def main(): title = "# Automatic Speech Recognition with Next-gen Kaldi" language_choices = ["Chinese"] language_to_models = { "Chinese": ["None"] } # components language_radio = gr.Radio( label="Language", choices=language_choices, value=language_choices[0], ) model_dropdown = gr.Dropdown( choices=language_to_models[language_choices[0]], label="Select a model", value=language_to_models[language_choices[0]][0], ) decoding_method_radio = gr.Radio( label="Decoding method", choices=["greedy_search", "modified_beam_search"], value="greedy_search", ) num_active_paths_slider = gr.Slider( minimum=1, value=4, step=1, label="Number of active paths for modified_beam_search", ) punct_radio = gr.Radio( label="Whether to add punctuation (Only for Chinese and English)", choices=["Yes", "No"], value="Yes", ) # blocks with gr.Blocks() as blocks: gr.Markdown(value=title) with gr.Tabs(): with gr.TabItem("Upload from disk"): uploaded_file = gr.Audio( sources=["upload"], type="filepath", label="Upload from disk", ) upload_button = gr.Button("Submit for recognition") uploaded_output = gr.Textbox(label="Recognized speech from uploaded file") uploaded_html_info = gr.HTML(label="Info") gr.Examples( examples=examples, inputs=[ language_radio, model_dropdown, decoding_method_radio, num_active_paths_slider, punct_radio, uploaded_file, ], outputs=[uploaded_output, uploaded_html_info], fn=process_uploaded_file, ) language_radio.change( update_model_dropdown, inputs=language_radio, outputs=model_dropdown, ) blocks.queue().launch() return if __name__ == "__main__": main()