KingNish commited on
Commit
1a6b2bc
1 Parent(s): fd5a036

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -4
app.py CHANGED
@@ -42,10 +42,36 @@ def transcribe(inputs, previous_transcription):
42
  try:
43
  filename = f"{uuid.uuid4().hex}.wav"
44
  sample_rate, audio_data = inputs
45
- scipy.io.wavfile.write(filename, sample_rate, audio_data)
 
 
 
 
 
 
 
 
46
 
47
- transcription = pipe(filename)["text"]
48
- previous_transcription += transcription
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  end_time = time.time()
51
  latency = end_time - start_time
@@ -82,7 +108,7 @@ with gr.Blocks() as flie:
82
  submit_button = gr.Button("Submit")
83
  clear_button = gr.Button("Clear Output")
84
 
85
- submit_button.clcik(transcribe, [input_audio_microphone, output], [output, latency_textbox], concurrency_limit=None)
86
  clear_button.click(clear, outputs=[output])
87
 
88
  with gr.Blocks() as demo:
 
42
  try:
43
  filename = f"{uuid.uuid4().hex}.wav"
44
  sample_rate, audio_data = inputs
45
+
46
+ # Check the duration of the audio
47
+ duration = len(audio_data) / sample_rate # Duration in seconds
48
+
49
+ if duration > 5:
50
+ # Split audio into chunks of 5 seconds
51
+ chunk_size = 5 * sample_rate # Number of samples for 5 seconds
52
+ num_chunks = int(np.ceil(len(audio_data) / chunk_size))
53
+ transcriptions = []
54
 
55
+ for i in range(num_chunks):
56
+ start_index = i * chunk_size
57
+ end_index = min(start_index + chunk_size, len(audio_data))
58
+ chunk_data = audio_data[start_index:end_index]
59
+
60
+ # Write chunk to a temporary file
61
+ chunk_filename = f"{uuid.uuid4().hex}_chunk.wav"
62
+ scipy.io.wavfile.write(chunk_filename, sample_rate, chunk_data)
63
+
64
+ # Transcribe the chunk
65
+ transcription = pipe(chunk_filename)["text"]
66
+ transcriptions.append(transcription)
67
+
68
+ # Combine all transcriptions
69
+ previous_transcription += " ".join(transcriptions)
70
+ else:
71
+ # Write the original audio file if it's 5 seconds or less
72
+ scipy.io.wavfile.write(filename, sample_rate, audio_data)
73
+ transcription = pipe(filename)["text"]
74
+ previous_transcription += transcription
75
 
76
  end_time = time.time()
77
  latency = end_time - start_time
 
108
  submit_button = gr.Button("Submit")
109
  clear_button = gr.Button("Clear Output")
110
 
111
+ submit_button.click(transcribe, [input_audio_microphone, output], [output, latency_textbox], concurrency_limit=None)
112
  clear_button.click(clear, outputs=[output])
113
 
114
  with gr.Blocks() as demo: