import streamlit as st from transformers import pipeline import torchaudio import tempfile import os import torch # Create a Streamlit app title st.title("ASR with Hugging Face Whisper") # Load the ASR model asr = pipeline(task = "automatic-speech-recognition", model="openai/whisper-large-v2", device=0 if torch.cuda.is_available() else "cpu") # Create a file uploader widget uploaded_audio = st.file_uploader("Upload an audio file (wav/mp3)") # Check if an audio file is uploaded if uploaded_audio: # Read the uploaded audio file audio_data, sample_rate = torchaudio.load(uploaded_audio) # Perform ASR on the uploaded audio with st.spinner("Performing ASR..."): transcriptions = asr(audio_data.numpy(), sample_rate=sample_rate) # Display the ASR result st.subheader("Transcription:") for idx, transcription in enumerate(transcriptions): st.write(f"Segment {idx + 1}: {transcription['text']}") # Provide instructions st.write("Instructions:") st.write("1. Upload an audio file in WAV or MP3 format.") st.write("2. Click the 'Perform ASR' button to transcribe the audio.") # Add a sample audio file for testing (optional) st.write("Sample Audio for Testing:") sample_audio = "Wave_files_demos_Welcome.wav" st.audio(sample_audio, format="audio/wav") # Define the path to the sample audio file sample_audio_path = os.path.join(os.getcwd(), sample_audio) # Add a button to transcribe the sample audio (optional) if st.button("Transcribe Sample Audio"): # Read the sample audio file sample_audio_data, sample_audio_rate = torchaudio.load(sample_audio_path) # Perform ASR on the sample audio with st.spinner("Performing ASR..."): sample_transcriptions = asr(sample_audio_data.numpy(), sample_rate=sample_audio_rate) # Display the ASR result for the sample audio st.subheader("Transcription (Sample Audio):") for idx, transcription in enumerate(sample_transcriptions): st.write(f"Segment {idx + 1}: {transcription['text']}")