Spaces:
Runtime error
Runtime error
import gradio as gr | |
import nemo.collections.asr as nemo_asr | |
from pydub import AudioSegment | |
import pyaudioconvert as pac | |
hf_model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained( | |
model_name="mbazaNLP/Kinyarwanda_nemo_stt_conformer_model") | |
def convert (audio): | |
file_name = audio.name | |
if file_name.endswith("mp3") or file_name.endswith("wav") or file_name.endswith("ogg"): | |
if file_name.endswith("mp3"): | |
sound = AudioSegment.from_mp3(audio.name) | |
sound.export(audio.name, format="wav") | |
elif file_name.endswith("ogg"): | |
sound = AudioSegment.from_ogg(audio.name) | |
sound.export(audio.name, format="wav") | |
else: | |
return False | |
pac.convert_wav_to_16bit_mono(audio.name,audio.name) | |
return True | |
def transcribe(audio, audio_microphone): | |
audio = audio_microphone if audio_microphone else audio | |
if convert(audio)== False: | |
return "The format must be mp3,wav and ogg" | |
files = [audio.name] | |
print(audio.name) | |
result= "The format must be mp3,wav and ogg" | |
for fname, transcription in zip(files, hf_model.transcribe(paths2audio_files=files)): | |
result= transcription[0] | |
return result | |
gradio_ui = gr.Interface( | |
fn=transcribe, | |
title="Kinyarwanda Speech Recognition", | |
description="Upload an audio clip or record from browser using microphone, and let AI do the hard work of transcribing.", | |
inputs=[gr.inputs.Audio(label="Upload Audio File", type="file", optional=True), gr.inputs.Audio(source="microphone", type="file", optional=True, label="Record from microphone")], | |
outputs=[gr.outputs.Textbox(label="Recognized speech")] | |
) | |
gradio_ui.launch() | |