Spaces:

Lenylvt
/

Whisper-API

Running

Lenylvt commited on Feb 17

Commit

dc2f23e

•

1 Parent(s): e508c61

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,29 +7,37 @@ logging.basicConfig()
 logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
 # Initialize the Whisper model with your desired configuration
-model_size = "small"  # Choose the model size
 device = "cpu"  # GPU : cuda  CPU : cpu
 compute_type = "int8"  # GPU : float16 or int8 - CPU : int8
 model = WhisperModel(model_size, device=device, compute_type=compute_type)
 def transcribe(audio_file):
-    # Transcribe the audio file without word-level timestamps
     segments, _ = model.transcribe(audio_file)
-    # Format and gather transcription with segment timestamps
     transcription_with_timestamps = [
-        f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}" for segment in segments
     ]
     return "\n".join(transcription_with_timestamps)
 # Define the Gradio interface
 iface = gr.Interface(fn=transcribe,
-                     inputs=gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
                      outputs="text",
-                     title="Whisper Transcription with Line-by-Line Timestamps",
-                     description="Upload an audio file to get transcription with line-by-line timestamps using Faster Whisper.")
 # Launch the app
 if __name__ == "__main__":

 logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
 # Initialize the Whisper model with your desired configuration
+model_size = "large-v3"  # Choose the model size
 device = "cpu"  # GPU : cuda  CPU : cpu
 compute_type = "int8"  # GPU : float16 or int8 - CPU : int8
 model = WhisperModel(model_size, device=device, compute_type=compute_type)
+def format_timestamp(seconds):
+    """Convert seconds to HH:MM:SS.mmm format."""
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    seconds_remainder = seconds % 60
+    return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
 def transcribe(audio_file):
+    # Transcribe the audio file
     segments, _ = model.transcribe(audio_file)
+    # Format and gather transcription with enhanced timestamps
     transcription_with_timestamps = [
+        f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
+        for segment in segments
     ]
     return "\n".join(transcription_with_timestamps)
 # Define the Gradio interface
 iface = gr.Interface(fn=transcribe,
+                     inputs=gr.inputs.Audio(source="upload", type="file", label="Upload Audio"),
                      outputs="text",
+                     title="Whisper Transcription with Enhanced Timestamps",
+                     description="Upload an audio file to get transcription with enhanced timestamps in HH:MM:SS.mmm format using Faster Whisper.")
 # Launch the app
 if __name__ == "__main__":