Spaces:
Running
Running
File size: 1,766 Bytes
c60e096 7752cd2 0bcb158 c60e096 7752cd2 c60e096 dc2f23e 2e93abd c31e25e 2e93abd dc2f23e 3a81031 7752cd2 dc2f23e 3a81031 dc2f23e 3a81031 7752cd2 c60e096 2e93abd c60e096 2e93abd c60e096 2e93abd c60e096 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import gradio as gr
from faster_whisper import WhisperModel
import logging
import torch
# Configure logging for debugging purposes
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
def format_timestamp(seconds):
"""Convert seconds to HH:MM:SS.mmm format."""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds_remainder = seconds % 60
return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
def transcribe(audio_file, model_size):
# Initialize the Whisper model based on the selected model size
device = "cuda" if torch.cuda.is_available() else "cpu"
compute_type = "float16" if device == "cuda" else "int8"
model = WhisperModel(model_size, device=device, compute_type=compute_type)
# Transcribe the audio file
segments, _ = model.transcribe(audio_file)
# Format and gather transcription with enhanced timestamps
transcription_with_timestamps = [
f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
for segment in segments
]
return "\n".join(transcription_with_timestamps)
# Define the Gradio interface with a dropdown for model selection
iface = gr.Interface(fn=transcribe,
inputs=[gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size")],
outputs="text",
title="Whisper API",
description="For web use please visit [this space](https://huggingface.co/spaces/Lenylvt/Whisper)")
# Launch the app
if __name__ == "__main__":
iface.launch()
|