Spaces:

Lenylvt
/

Whisper-API

Running

File size: 1,766 Bytes

c60e096
7752cd2
 
0bcb158
c60e096
7752cd2
 
 
c60e096
dc2f23e
 
 
 
 
 
 
2e93abd
 
c31e25e
 
2e93abd
 
 
dc2f23e
3a81031
7752cd2
dc2f23e
3a81031
dc2f23e
 
3a81031
7752cd2
 
c60e096
2e93abd
c60e096
2e93abd
 
c60e096
2e93abd
 
c60e096

import gradio as gr
from faster_whisper import WhisperModel
import logging
import torch

# Configure logging for debugging purposes
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)

def format_timestamp(seconds):
    """Convert seconds to HH:MM:SS.mmm format."""
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds_remainder = seconds % 60
    return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"

def transcribe(audio_file, model_size):
    # Initialize the Whisper model based on the selected model size
    device = "cuda" if torch.cuda.is_available() else "cpu"
    compute_type = "float16" if device == "cuda" else "int8"
    
    model = WhisperModel(model_size, device=device, compute_type=compute_type)
    
    # Transcribe the audio file
    segments, _ = model.transcribe(audio_file)
    
    # Format and gather transcription with enhanced timestamps
    transcription_with_timestamps = [
        f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
        for segment in segments
    ]
    
    return "\n".join(transcription_with_timestamps)

# Define the Gradio interface with a dropdown for model selection
iface = gr.Interface(fn=transcribe,
                     inputs=[gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
                             gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size")],
                     outputs="text",
                     title="Whisper API",
                     description="For web use please visit [this space](https://huggingface.co/spaces/Lenylvt/Whisper)")

# Launch the app
if __name__ == "__main__":
    iface.launch()