Lenylvt commited on
Commit
dc2f23e
1 Parent(s): e508c61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -7
app.py CHANGED
@@ -7,29 +7,37 @@ logging.basicConfig()
7
  logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
8
 
9
  # Initialize the Whisper model with your desired configuration
10
- model_size = "small" # Choose the model size
11
  device = "cpu" # GPU : cuda CPU : cpu
12
  compute_type = "int8" # GPU : float16 or int8 - CPU : int8
13
 
14
  model = WhisperModel(model_size, device=device, compute_type=compute_type)
15
 
 
 
 
 
 
 
 
16
  def transcribe(audio_file):
17
- # Transcribe the audio file without word-level timestamps
18
  segments, _ = model.transcribe(audio_file)
19
 
20
- # Format and gather transcription with segment timestamps
21
  transcription_with_timestamps = [
22
- f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}" for segment in segments
 
23
  ]
24
 
25
  return "\n".join(transcription_with_timestamps)
26
 
27
  # Define the Gradio interface
28
  iface = gr.Interface(fn=transcribe,
29
- inputs=gr.Audio(sources="upload", type="filepath", label="Upload Audio"),
30
  outputs="text",
31
- title="Whisper Transcription with Line-by-Line Timestamps",
32
- description="Upload an audio file to get transcription with line-by-line timestamps using Faster Whisper.")
33
 
34
  # Launch the app
35
  if __name__ == "__main__":
 
7
  logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
8
 
9
  # Initialize the Whisper model with your desired configuration
10
+ model_size = "large-v3" # Choose the model size
11
  device = "cpu" # GPU : cuda CPU : cpu
12
  compute_type = "int8" # GPU : float16 or int8 - CPU : int8
13
 
14
  model = WhisperModel(model_size, device=device, compute_type=compute_type)
15
 
16
+ def format_timestamp(seconds):
17
+ """Convert seconds to HH:MM:SS.mmm format."""
18
+ hours = int(seconds // 3600)
19
+ minutes = int((seconds % 3600) // 60)
20
+ seconds_remainder = seconds % 60
21
+ return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
22
+
23
  def transcribe(audio_file):
24
+ # Transcribe the audio file
25
  segments, _ = model.transcribe(audio_file)
26
 
27
+ # Format and gather transcription with enhanced timestamps
28
  transcription_with_timestamps = [
29
+ f"[{format_timestamp(segment.start)} -> {format_timestamp(segment.end)}] {segment.text}"
30
+ for segment in segments
31
  ]
32
 
33
  return "\n".join(transcription_with_timestamps)
34
 
35
  # Define the Gradio interface
36
  iface = gr.Interface(fn=transcribe,
37
+ inputs=gr.inputs.Audio(source="upload", type="file", label="Upload Audio"),
38
  outputs="text",
39
+ title="Whisper Transcription with Enhanced Timestamps",
40
+ description="Upload an audio file to get transcription with enhanced timestamps in HH:MM:SS.mmm format using Faster Whisper.")
41
 
42
  # Launch the app
43
  if __name__ == "__main__":