Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
from espnet2.bin.asr_inference import Speech2Text | |
from espnet_model_zoo.downloader import ModelDownloader | |
# Download a pretrained model | |
d = ModelDownloader() | |
asr_model = Speech2Text( | |
**d.download_and_unpack("espnet/simple_asr_train_asr_transformer_e18_raw_bpe_sp_valid.acc.best"), | |
device="cpu", # Change to "cuda" if using a GPU | |
) | |
def transcribe(audio): | |
"""Transcribe speech to text using ESPnet.""" | |
# Convert audio input (from Gradio) to text | |
speech = torch.tensor(audio[1]) # Extract the audio waveform | |
result = asr_model(speech) | |
text, *_ = result[0] # Get the transcription from the result | |
return text | |
# Create a simple Gradio interface | |
interface = gr.Interface( | |
fn=transcribe, # Function to call | |
inputs=gr.Audio(source="microphone", type="numpy"), # Audio input from microphone | |
outputs="text", # Output type (text transcription) | |
title="ESPnet ASR Demo", # Title of the UI | |
description="Simple ESPnet-based speech recognition", # Description of the app | |
) | |
# Launch the app | |
interface.launch() | |