|
import streamlit as st |
|
from faster_whisper import WhisperModel |
|
import logging |
|
import tempfile |
|
import os |
|
|
|
|
|
logging.basicConfig() |
|
logging.getLogger("faster_whisper").setLevel(logging.DEBUG) |
|
|
|
def format_timestamp(seconds): |
|
"""Convert seconds to HH:MM:SS.mmm format.""" |
|
hours = int(seconds // 3600) |
|
minutes = int((seconds % 3600) // 60) |
|
seconds_remainder = seconds % 60 |
|
return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}" |
|
|
|
def transcribe(audio_file, model_size): |
|
|
|
progress_text = st.empty() |
|
progress_bar = st.progress(0) |
|
|
|
|
|
device = "cpu" |
|
compute_type = "int8" |
|
|
|
model = WhisperModel(model_size, device=device, compute_type=compute_type) |
|
|
|
|
|
progress_text.text("Preparing file for transcription...") |
|
progress_bar.progress(10) |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(audio_file.name)[1]) as tmp: |
|
tmp.write(audio_file.getvalue()) |
|
tmp_path = tmp.name |
|
|
|
|
|
progress_text.text("Transcribing audio...") |
|
progress_bar.progress(30) |
|
|
|
|
|
segments, _ = model.transcribe(tmp_path) |
|
|
|
|
|
progress_text.text("Processing transcription...") |
|
progress_bar.progress(70) |
|
|
|
|
|
os.remove(tmp_path) |
|
|
|
|
|
transcription_with_timestamps = [ |
|
f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}" |
|
for segment in segments |
|
] |
|
|
|
|
|
progress_text.text("Transcription complete.") |
|
progress_bar.progress(100) |
|
progress_text.empty() |
|
|
|
return "\n".join(transcription_with_timestamps) |
|
|
|
|
|
st.title("Whisper") |
|
st.write("For Remove Timestamps please visit [this Space](https://huggingface.co/spaces/Lenylvt/Whisper_Timestamps_Remover). For API use please visit [this space](https://huggingface.co/spaces/Lenylvt/Whisper-API)") |
|
|
|
audio_file = st.file_uploader("π΅ Upload Audio or Video", type=['wav', 'mp3', 'ogg', 'mp4', 'avi']) |
|
model_size = st.selectbox("π Model Size", ["base", "small", "medium", "large", "large-v2", "large-v3"]) |
|
|
|
if audio_file is not None and model_size is not None: |
|
transcription = transcribe(audio_file, model_size) |
|
st.text_area("π Transcription", transcription, height=300) |
|
|