Spaces:

Lenylvt
/

Whisper-API

Running

App Files Files Community

Whisper-API / app.py

Lenylvt

Update app.py

0bcb158 verified 9 months ago

raw

history blame

1.77 kB

	import gradio as gr
	from faster_whisper import WhisperModel
	import logging
	import torch

	# Configure logging for debugging purposes
	logging.basicConfig()
	logging.getLogger("faster_whisper").setLevel(logging.DEBUG)

	def format_timestamp(seconds):
	"""Convert seconds to HH:MM:SS.mmm format."""
	hours = int(seconds // 3600)
	minutes = int((seconds % 3600) // 60)
	seconds_remainder = seconds % 60
	return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"

	def transcribe(audio_file, model_size):
	# Initialize the Whisper model based on the selected model size
	device = "cuda" if torch.cuda.is_available() else "cpu"
	compute_type = "float16" if device == "cuda" else "int8"

	model = WhisperModel(model_size, device=device, compute_type=compute_type)

	# Transcribe the audio file
	segments, _ = model.transcribe(audio_file)

	# Format and gather transcription with enhanced timestamps
	transcription_with_timestamps = [
	f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
	for segment in segments
	]

	return "\n".join(transcription_with_timestamps)

	# Define the Gradio interface with a dropdown for model selection
	iface = gr.Interface(fn=transcribe,
	inputs=[gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
	gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size")],
	outputs="text",
	title="Whisper API",
	description="For web use please visit [this space](https://huggingface.co/spaces/Lenylvt/Whisper)")

	# Launch the app
	if __name__ == "__main__":
	iface.launch()