import nemo.collections.asr as nemo_asr import gradio as gr asr_model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained("theodotus/stt_ua_fastconformer_hybrid_large_pc", map_location="cpu") def process_file(in_filename: str,): if in_filename is None or in_filename == "": return "Error: No file" transcript = asr_model.transcribe(paths2audio_files = [in_filename])[0][0] return transcript demo = gr.Blocks() with demo: with gr.Tabs(): with gr.TabItem("Upload from disk"): uploaded_file = gr.Audio( source="upload", # Choose between "microphone", "upload" type="filepath", optional=False, label="Upload from disk", ) upload_button = gr.Button("Submit for recognition") uploaded_output = gr.Textbox(label="Recognized speech from uploaded file") with gr.TabItem("Record from microphone"): microphone = gr.Audio( source="microphone", # Choose between "microphone", "upload" type="filepath", optional=False, label="Record from microphone", ) record_button = gr.Button("Submit for recognition") recorded_output = gr.Textbox(label="Recognized speech from recordings") upload_button.click( process_file, inputs=[ uploaded_file, ], outputs=[uploaded_output], ) record_button.click( process_file, inputs=[ microphone, ], outputs=[recorded_output], ) if __name__ == "__main__": demo.launch()