Spaces:
Running
Running
import gradio as gr | |
from faster_whisper import WhisperModel | |
import logging | |
import torch | |
# Configure logging for debugging purposes | |
logging.basicConfig() | |
logging.getLogger("faster_whisper").setLevel(logging.DEBUG) | |
def format_timestamp(seconds): | |
"""Convert seconds to HH:MM:SS.mmm format.""" | |
hours = int(seconds // 3600) | |
minutes = int((seconds % 3600) // 60) | |
seconds_remainder = seconds % 60 | |
return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}" | |
def transcribe(audio_file, model_size): | |
# Initialize the Whisper model based on the selected model size | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
compute_type = "float16" if device == "cuda" else "int8" | |
model = WhisperModel(model_size, device=device, compute_type=compute_type) | |
# Transcribe the audio file | |
segments, _ = model.transcribe(audio_file) | |
# Format and gather transcription with enhanced timestamps | |
transcription_with_timestamps = [ | |
f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}" | |
for segment in segments | |
] | |
return "\n".join(transcription_with_timestamps) | |
# Define the Gradio interface with a dropdown for model selection | |
iface = gr.Interface(fn=transcribe, | |
inputs=[gr.Audio(sources="upload", type="filepath", label="Upload Audio"), | |
gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size")], | |
outputs="text", | |
title="Whisper API", | |
description="For web use please visit [this space](https://huggingface.co/spaces/Lenylvt/Whisper)") | |
# Launch the app | |
if __name__ == "__main__": | |
iface.launch() | |