Spaces:
Sleeping
Sleeping
File size: 1,537 Bytes
7e54b28 ab0b8b5 6d4cee8 7e54b28 ab0b8b5 7e54b28 ab0b8b5 6d4cee8 ab0b8b5 6d4cee8 ab0b8b5 6d4cee8 ab0b8b5 7e54b28 ab0b8b5 7e54b28 ab0b8b5 7e54b28 ab0b8b5 7e54b28 6d4cee8 ab0b8b5 6d4cee8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import streamlit as st
from transformers import pipeline
import librosa
import soundfile as sf
import numpy as np
import io
# Load the ASR pipeline with the specified model
pipe = pipeline("automatic-speech-recognition", model="kingabzpro/wav2vec2-large-xls-r-300m-Urdu")
def load_audio(audio_file):
"""Load an audio file and convert to the correct format."""
audio_bytes = audio_file.read()
audio = io.BytesIO(audio_bytes)
# Use librosa to load the audio file
audio_np, sr = librosa.load(audio, sr=16000)
return audio_np, sr
def transcribe_audio(audio_np):
"""Transcribe the given audio numpy array using the model pipeline."""
# Convert the audio numpy array to a format acceptable by the pipeline
audio = sf.write(io.BytesIO(), audio_np, 16000, format='wav')
# Transcribe audio
transcription = pipe(audio)
return transcription['text']
# Streamlit UI
st.title("Urdu Speech-to-Text Transcription App")
st.write("Upload an audio file to transcribe its content into Urdu text.")
uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3"])
if uploaded_file is not None:
try:
# Load and process the audio file
audio_np, sr = load_audio(uploaded_file)
# Transcribe the audio
text = transcribe_audio(audio_np)
# Display the transcription result
st.subheader("Transcription Result:")
st.write(text)
except Exception as e:
st.error(f"An error occurred: {e}")
|