ashhadahsan commited on
Commit
eeb50b0
1 Parent(s): a526070

update to handle m3p

Browse files
Files changed (1) hide show
  1. app.py +19 -11
app.py CHANGED
@@ -164,12 +164,20 @@ with output:
164
  if audio_uploaded is not None:
165
  if audio_uploaded.name.endswith(".wav"):
166
  temp = AudioSegment.from_wav(audio_uploaded)
167
- temp.export(f"{name}.wav")
 
168
  if audio_uploaded.name.endswith(".mp3"):
 
 
 
 
 
 
 
169
 
170
 
171
- subprocess.call(['ffmpeg', '-i', audio_uploaded.name,
172
- f'{name}.wav'])
173
  # try:
174
 
175
  # temp = AudioSegment.from_file(audio_uploaded, format="mp3")
@@ -182,7 +190,7 @@ with output:
182
  if language == "":
183
  model = whisper.load_model(model_name)
184
  with st.spinner("Detecting language..."):
185
- detection = detect_language(f"{name}.wav", model)
186
  language = detection.get("detected_language")
187
  del model
188
  if len(language) > 2:
@@ -196,7 +204,7 @@ with output:
196
  with st.container():
197
  with st.spinner(f"Running with {model_name} model"):
198
  result = model.transcribe(
199
- f"{name}.wav",
200
  language=language,
201
  patience=patience,
202
  initial_prompt=initial_prompt,
@@ -220,15 +228,15 @@ with output:
220
  result["segments"],
221
  model_a,
222
  metadata,
223
- f"{name}.wav",
224
  device=device,
225
  )
226
  write(
227
- f"{name}.wav",
228
  dtype=transcription,
229
  result_aligned=result_aligned,
230
  )
231
- trans_text = read(f"{name}.wav", transcription)
232
  trans.text_area(
233
  "transcription", trans_text, height=None, max_chars=None, key=None
234
  )
@@ -311,16 +319,16 @@ with output:
311
  cont,
312
  model_a,
313
  metadata,
314
- f"{name}.wav",
315
  device=device,
316
  )
317
  words_segments = result_aligned["word_segments"]
318
  write(
319
- f"{name}.wav",
320
  dtype=transcription,
321
  result_aligned=result_aligned,
322
  )
323
- trans_text = read(f"{name}.wav", transcription)
324
  char_segments = []
325
  word_segments = []
326
 
 
164
  if audio_uploaded is not None:
165
  if audio_uploaded.name.endswith(".wav"):
166
  temp = AudioSegment.from_wav(audio_uploaded)
167
+ input=f"{name}.wav"
168
+ temp.export(input)
169
  if audio_uploaded.name.endswith(".mp3"):
170
+ input=f"{name}.mp3"
171
+
172
+
173
+ with open(input, "wb") as f:
174
+
175
+ f.write(audio_uploaded.getbuffer())
176
+
177
 
178
 
179
+ # subprocess.call(['ffmpeg', '-i', audio_uploaded.name,
180
+ # f'{name}.wav'])
181
  # try:
182
 
183
  # temp = AudioSegment.from_file(audio_uploaded, format="mp3")
 
190
  if language == "":
191
  model = whisper.load_model(model_name)
192
  with st.spinner("Detecting language..."):
193
+ detection = detect_language(input, model)
194
  language = detection.get("detected_language")
195
  del model
196
  if len(language) > 2:
 
204
  with st.container():
205
  with st.spinner(f"Running with {model_name} model"):
206
  result = model.transcribe(
207
+ input,
208
  language=language,
209
  patience=patience,
210
  initial_prompt=initial_prompt,
 
228
  result["segments"],
229
  model_a,
230
  metadata,
231
+ input,
232
  device=device,
233
  )
234
  write(
235
+ input,
236
  dtype=transcription,
237
  result_aligned=result_aligned,
238
  )
239
+ trans_text = read(input, transcription)
240
  trans.text_area(
241
  "transcription", trans_text, height=None, max_chars=None, key=None
242
  )
 
319
  cont,
320
  model_a,
321
  metadata,
322
+ input,
323
  device=device,
324
  )
325
  words_segments = result_aligned["word_segments"]
326
  write(
327
+ input,
328
  dtype=transcription,
329
  result_aligned=result_aligned,
330
  )
331
+ trans_text = read(input, transcription)
332
  char_segments = []
333
  word_segments = []
334