Spaces:
Runtime error
Runtime error
File size: 1,130 Bytes
07f4993 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
import gradio as gr
import torch
from espnet2.bin.asr_inference import Speech2Text
from espnet_model_zoo.downloader import ModelDownloader
# Download a pretrained model
d = ModelDownloader()
asr_model = Speech2Text(
**d.download_and_unpack("espnet/simple_asr_train_asr_transformer_e18_raw_bpe_sp_valid.acc.best"),
device="cpu", # Change to "cuda" if using a GPU
)
def transcribe(audio):
"""Transcribe speech to text using ESPnet."""
# Convert audio input (from Gradio) to text
speech = torch.tensor(audio[1]) # Extract the audio waveform
result = asr_model(speech)
text, *_ = result[0] # Get the transcription from the result
return text
# Create a simple Gradio interface
interface = gr.Interface(
fn=transcribe, # Function to call
inputs=gr.Audio(source="microphone", type="numpy"), # Audio input from microphone
outputs="text", # Output type (text transcription)
title="ESPnet ASR Demo", # Title of the UI
description="Simple ESPnet-based speech recognition", # Description of the app
)
# Launch the app
interface.launch()
|