Spaces:

Lenylvt
/

Whisper-API

Running

Lenylvt commited on Feb 17

Commit

2e93abd

•

1 Parent(s): 64736ec

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,13 +6,6 @@ import logging
 logging.basicConfig()
 logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
-# Initialize the Whisper model with your desired configuration
-model_size = "large-v3"  # Choose the model size
-device = "cpu"  # GPU : cuda  CPU : cpu
-compute_type = "int8"  # GPU : float16 or int8 - CPU : int8
-model = WhisperModel(model_size, device=device, compute_type=compute_type)
 def format_timestamp(seconds):
     """Convert seconds to HH:MM:SS.mmm format."""
     hours = int(seconds // 3600)
@@ -20,7 +13,13 @@ def format_timestamp(seconds):
     seconds_remainder = seconds % 60
     return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
-def transcribe(audio_file):
     # Transcribe the audio file
     segments, _ = model.transcribe(audio_file)
@@ -32,14 +31,14 @@ def transcribe(audio_file):
     return "\n".join(transcription_with_timestamps)
-# Define the Gradio interface
 iface = gr.Interface(fn=transcribe,
-                     inputs=gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
                      outputs="text",
-                     title="Whisper Transcription with Enhanced Timestamps",
-                     description="Upload an audio file to get transcription with enhanced timestamps in HH:MM:SS.mmm format using Faster Whisper.")
 # Launch the app
 if __name__ == "__main__":
     iface.launch()

 logging.basicConfig()
 logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
 def format_timestamp(seconds):
     """Convert seconds to HH:MM:SS.mmm format."""
     hours = int(seconds // 3600)
     seconds_remainder = seconds % 60
     return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
+def transcribe(audio_file, model_size):
+    # Initialize the Whisper model based on the selected model size
+    device = "cpu"  # GPU : cuda  CPU : cpu
+    compute_type = "int8"  # GPU : float16 or int8 - CPU : int8
+    model = WhisperModel(model_size, device=device, compute_type=compute_type)
     # Transcribe the audio file
     segments, _ = model.transcribe(audio_file)
     return "\n".join(transcription_with_timestamps)
+# Define the Gradio interface with a dropdown for model selection
 iface = gr.Interface(fn=transcribe,
+                     inputs=[gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
+                             gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size")],
                      outputs="text",
+                     title="Whisper API",
+                     description="For web use please visit [this space](https://huggingface.co/spaces/Lenylvt/Whisper)")
 # Launch the app
 if __name__ == "__main__":
     iface.launch()