ashhadahsan commited on
Commit
5e3e8ef
1 Parent(s): 2f1bcc3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -5
app.py CHANGED
@@ -6,6 +6,8 @@ from utils import translate_to_english, detect_language, write, read, get_key
6
  import whisperx as whisper
7
  import json
8
  import pandas as pd
 
 
9
 
10
  if "btn1" not in st.session_state:
11
  st.session_state["btn1"] = False
@@ -135,22 +137,37 @@ with input:
135
  )
136
  else:
137
  temperature = [temperature]
 
 
 
 
 
138
  # st.write(temperature)
139
  submit = st.button("Submit", type="primary")
140
  with output:
141
  st.header("Output")
 
 
 
142
  if submit:
143
  if audio_uploaded is None:
144
  # st.audio(audio_bytes, format="audio/wav")
145
  audio_uploaded = audio_file
146
  if audio_uploaded is not None:
 
 
 
 
 
 
 
147
 
148
  # audio_bytes = audio_uploaded.read()
149
  # st.audio(audio_bytes, format="audio/wav")
150
  if language == "":
151
  model = whisper.load_model(model_name)
152
  with st.spinner("Detecting language..."):
153
- detection = detect_language(audio_uploaded.name, model)
154
  language = detection.get("detected_language")
155
  del model
156
  # st.write(language)
@@ -169,7 +186,7 @@ with output:
169
  with st.container():
170
  with st.spinner(f"Running with {model_name} model"):
171
  result = model.transcribe(
172
- audio_uploaded.name,
173
  language=language,
174
  patience=patience,
175
  initial_prompt=initial_prompt,
@@ -193,7 +210,7 @@ with output:
193
  result["segments"],
194
  model_a,
195
  metadata,
196
- audio_uploaded.name,
197
  device=device,
198
  )
199
 
@@ -212,11 +229,11 @@ with output:
212
  if text_json is None:
213
  words_segments = result_aligned["word_segments"]
214
  write(
215
- audio_uploaded.name,
216
  dtype=transcription,
217
  result_aligned=result_aligned,
218
  )
219
- trans_text = read(audio_uploaded.name, transcription)
220
  trans.text_area(
221
  "transcription", trans_text, height=None, max_chars=None, key=None
222
  )
@@ -248,3 +265,4 @@ with output:
248
  lang.text_input(
249
  "detected language", language_dict.get(language), disabled=True
250
  )
 
 
6
  import whisperx as whisper
7
  import json
8
  import pandas as pd
9
+ from pydub import AudioSegment
10
+ import os
11
 
12
  if "btn1" not in st.session_state:
13
  st.session_state["btn1"] = False
 
137
  )
138
  else:
139
  temperature = [temperature]
140
+ try:
141
+ if len(temperature) == 0:
142
+ st.error("Choose correct value for temperature")
143
+ except:
144
+ pass
145
  # st.write(temperature)
146
  submit = st.button("Submit", type="primary")
147
  with output:
148
  st.header("Output")
149
+ import uuid
150
+
151
+ name = str(uuid.uuid1())
152
  if submit:
153
  if audio_uploaded is None:
154
  # st.audio(audio_bytes, format="audio/wav")
155
  audio_uploaded = audio_file
156
  if audio_uploaded is not None:
157
+ if audio_uploaded.name.endswith(".wav"):
158
+ temp = AudioSegment.from_wav(audio_uploaded)
159
+ temp.export(f"{name}.wav")
160
+
161
+ if audio_uploaded.name.endswith(".mp3"):
162
+ temp = AudioSegment.from_wav(audio_uploaded)
163
+ temp.export(f"{name}.wav")
164
 
165
  # audio_bytes = audio_uploaded.read()
166
  # st.audio(audio_bytes, format="audio/wav")
167
  if language == "":
168
  model = whisper.load_model(model_name)
169
  with st.spinner("Detecting language..."):
170
+ detection = detect_language(f"{name}.wav", model)
171
  language = detection.get("detected_language")
172
  del model
173
  # st.write(language)
 
186
  with st.container():
187
  with st.spinner(f"Running with {model_name} model"):
188
  result = model.transcribe(
189
+ f"{name}.wav",
190
  language=language,
191
  patience=patience,
192
  initial_prompt=initial_prompt,
 
210
  result["segments"],
211
  model_a,
212
  metadata,
213
+ f"{name}.wav",
214
  device=device,
215
  )
216
 
 
229
  if text_json is None:
230
  words_segments = result_aligned["word_segments"]
231
  write(
232
+ f"{name}.wav",
233
  dtype=transcription,
234
  result_aligned=result_aligned,
235
  )
236
+ trans_text = read(f"{name}.wav", transcription)
237
  trans.text_area(
238
  "transcription", trans_text, height=None, max_chars=None, key=None
239
  )
 
265
  lang.text_input(
266
  "detected language", language_dict.get(language), disabled=True
267
  )
268
+ os.remove(f"{name}.wav")