Spaces:
Sleeping
Sleeping
Integration of progress bar with translation model compatibility
Browse files- app.py +27 -12
- src/hooks/progressListener.py +1 -1
- src/hooks/subTaskProgressListener.py +4 -4
- src/nllb/nllbModel.py +3 -0
- src/vad.py +6 -3
- src/vadParallel.py +2 -2
- src/whisper/fasterWhisperContainer.py +1 -1
app.py
CHANGED
@@ -156,13 +156,15 @@ class WhisperTranscriber:
|
|
156 |
word_timestamps=word_timestamps, prepend_punctuations=prepend_punctuations, append_punctuations=append_punctuations, highlight_words=highlight_words,
|
157 |
progress=progress)
|
158 |
|
159 |
-
def transcribe_webui(self, modelName, languageName, nllbModelName, nllbLangName, urlData, multipleFiles, microphoneData, task,
|
160 |
vadOptions: VadOptions, progress: gr.Progress = None, highlight_words: bool = False,
|
161 |
**decodeOptions: dict):
|
162 |
try:
|
|
|
163 |
sources = self.__get_source(urlData, multipleFiles, microphoneData)
|
164 |
|
165 |
try:
|
|
|
166 |
whisper_lang = get_language_from_name(languageName)
|
167 |
selectedLanguage = languageName.lower() if languageName is not None and len(languageName) > 0 else None
|
168 |
selectedModel = modelName if modelName is not None else "base"
|
@@ -170,13 +172,15 @@ class WhisperTranscriber:
|
|
170 |
model = create_whisper_container(whisper_implementation=self.app_config.whisper_implementation,
|
171 |
model_name=selectedModel, compute_type=self.app_config.compute_type,
|
172 |
cache=self.model_cache, models=self.app_config.models)
|
173 |
-
|
|
|
174 |
nllb_lang = get_nllb_lang_from_name(nllbLangName)
|
175 |
selectedNllbModelName = nllbModelName if nllbModelName is not None and len(nllbModelName) > 0 else "nllb-200-distilled-600M/facebook"
|
176 |
selectedNllbModel = next((modelConfig for modelConfig in self.app_config.nllb_models if modelConfig.name == selectedNllbModelName), None)
|
177 |
-
|
178 |
nllb_model = NllbModel(model_config=selectedNllbModel, whisper_lang=whisper_lang, nllb_lang=nllb_lang) # load_model=True
|
179 |
-
|
|
|
180 |
# Result
|
181 |
download = []
|
182 |
zip_file_lookup = {}
|
@@ -186,6 +190,7 @@ class WhisperTranscriber:
|
|
186 |
# Write result
|
187 |
downloadDirectory = tempfile.mkdtemp()
|
188 |
source_index = 0
|
|
|
189 |
|
190 |
outputDirectory = self.output_dir if self.output_dir is not None else downloadDirectory
|
191 |
|
@@ -195,9 +200,10 @@ class WhisperTranscriber:
|
|
195 |
|
196 |
# A listener that will report progress to Gradio
|
197 |
root_progress_listener = self._create_progress_listener(progress)
|
|
|
198 |
|
199 |
# Execute whisper
|
200 |
-
for source in sources:
|
201 |
source_prefix = ""
|
202 |
source_audio_duration = source.get_audio_duration()
|
203 |
|
@@ -208,9 +214,9 @@ class WhisperTranscriber:
|
|
208 |
print("Transcribing ", source.source_path)
|
209 |
|
210 |
scaled_progress_listener = SubTaskProgressListener(root_progress_listener,
|
211 |
-
base_task_total=
|
212 |
-
sub_task_start=
|
213 |
-
sub_task_total=
|
214 |
|
215 |
# Transcribe
|
216 |
result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vadOptions, scaled_progress_listener, **decodeOptions)
|
@@ -219,7 +225,7 @@ class WhisperTranscriber:
|
|
219 |
# Update progress
|
220 |
current_progress += source_audio_duration
|
221 |
|
222 |
-
source_download, source_text, source_vtt = self.write_result(result, nllb_model, filePrefix, outputDirectory, highlight_words)
|
223 |
|
224 |
if len(sources) > 1:
|
225 |
# Add new line separators
|
@@ -377,9 +383,9 @@ class WhisperTranscriber:
|
|
377 |
def __init__(self, progress: gr.Progress):
|
378 |
self.progress = progress
|
379 |
|
380 |
-
def on_progress(self, current: Union[int, float], total: Union[int, float]):
|
381 |
# From 0 to 1
|
382 |
-
self.progress(current / total)
|
383 |
|
384 |
def on_finished(self):
|
385 |
self.progress(1)
|
@@ -435,7 +441,7 @@ class WhisperTranscriber:
|
|
435 |
|
436 |
return config
|
437 |
|
438 |
-
def write_result(self, result: dict, nllb_model: NllbModel, source_name: str, output_dir: str, highlight_words: bool = False):
|
439 |
if not os.path.exists(output_dir):
|
440 |
os.makedirs(output_dir)
|
441 |
|
@@ -446,6 +452,10 @@ class WhisperTranscriber:
|
|
446 |
|
447 |
if nllb_model.nllb_lang is not None:
|
448 |
try:
|
|
|
|
|
|
|
|
|
449 |
pbar = tqdm.tqdm(total=len(segments))
|
450 |
perf_start_time = time.perf_counter()
|
451 |
nllb_model.load_model()
|
@@ -456,9 +466,14 @@ class WhisperTranscriber:
|
|
456 |
if nllb_model.nllb_lang is not None:
|
457 |
segment["text"] = nllb_model.translation(seg_text)
|
458 |
pbar.update(1)
|
|
|
459 |
|
460 |
nllb_model.release_vram()
|
461 |
perf_end_time = time.perf_counter()
|
|
|
|
|
|
|
|
|
462 |
print("\n\nprocess segments took {} seconds.\n\n".format(perf_end_time - perf_start_time))
|
463 |
except Exception as e:
|
464 |
# Ignore error - it's just a cleanup
|
|
|
156 |
word_timestamps=word_timestamps, prepend_punctuations=prepend_punctuations, append_punctuations=append_punctuations, highlight_words=highlight_words,
|
157 |
progress=progress)
|
158 |
|
159 |
+
def transcribe_webui(self, modelName: str, languageName: str, nllbModelName: str, nllbLangName: str, urlData: str, multipleFiles, microphoneData: str, task: str,
|
160 |
vadOptions: VadOptions, progress: gr.Progress = None, highlight_words: bool = False,
|
161 |
**decodeOptions: dict):
|
162 |
try:
|
163 |
+
progress(0, desc="init audio sources")
|
164 |
sources = self.__get_source(urlData, multipleFiles, microphoneData)
|
165 |
|
166 |
try:
|
167 |
+
progress(0, desc="init whisper model")
|
168 |
whisper_lang = get_language_from_name(languageName)
|
169 |
selectedLanguage = languageName.lower() if languageName is not None and len(languageName) > 0 else None
|
170 |
selectedModel = modelName if modelName is not None else "base"
|
|
|
172 |
model = create_whisper_container(whisper_implementation=self.app_config.whisper_implementation,
|
173 |
model_name=selectedModel, compute_type=self.app_config.compute_type,
|
174 |
cache=self.model_cache, models=self.app_config.models)
|
175 |
+
|
176 |
+
progress(0, desc="init translate model")
|
177 |
nllb_lang = get_nllb_lang_from_name(nllbLangName)
|
178 |
selectedNllbModelName = nllbModelName if nllbModelName is not None and len(nllbModelName) > 0 else "nllb-200-distilled-600M/facebook"
|
179 |
selectedNllbModel = next((modelConfig for modelConfig in self.app_config.nllb_models if modelConfig.name == selectedNllbModelName), None)
|
180 |
+
|
181 |
nllb_model = NllbModel(model_config=selectedNllbModel, whisper_lang=whisper_lang, nllb_lang=nllb_lang) # load_model=True
|
182 |
+
|
183 |
+
progress(0, desc="init transcribe")
|
184 |
# Result
|
185 |
download = []
|
186 |
zip_file_lookup = {}
|
|
|
190 |
# Write result
|
191 |
downloadDirectory = tempfile.mkdtemp()
|
192 |
source_index = 0
|
193 |
+
extra_tasks_count = 1 if nllb_lang is not None else 0
|
194 |
|
195 |
outputDirectory = self.output_dir if self.output_dir is not None else downloadDirectory
|
196 |
|
|
|
200 |
|
201 |
# A listener that will report progress to Gradio
|
202 |
root_progress_listener = self._create_progress_listener(progress)
|
203 |
+
sub_task_total = 1/(len(sources)+extra_tasks_count*len(sources))
|
204 |
|
205 |
# Execute whisper
|
206 |
+
for idx, source in enumerate(sources):
|
207 |
source_prefix = ""
|
208 |
source_audio_duration = source.get_audio_duration()
|
209 |
|
|
|
214 |
print("Transcribing ", source.source_path)
|
215 |
|
216 |
scaled_progress_listener = SubTaskProgressListener(root_progress_listener,
|
217 |
+
base_task_total=1,
|
218 |
+
sub_task_start=idx*1/len(sources),
|
219 |
+
sub_task_total=sub_task_total)
|
220 |
|
221 |
# Transcribe
|
222 |
result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vadOptions, scaled_progress_listener, **decodeOptions)
|
|
|
225 |
# Update progress
|
226 |
current_progress += source_audio_duration
|
227 |
|
228 |
+
source_download, source_text, source_vtt = self.write_result(result, nllb_model, filePrefix, outputDirectory, highlight_words, scaled_progress_listener)
|
229 |
|
230 |
if len(sources) > 1:
|
231 |
# Add new line separators
|
|
|
383 |
def __init__(self, progress: gr.Progress):
|
384 |
self.progress = progress
|
385 |
|
386 |
+
def on_progress(self, current: Union[int, float], total: Union[int, float], desc: str = None):
|
387 |
# From 0 to 1
|
388 |
+
self.progress(current / total, desc=desc)
|
389 |
|
390 |
def on_finished(self):
|
391 |
self.progress(1)
|
|
|
441 |
|
442 |
return config
|
443 |
|
444 |
+
def write_result(self, result: dict, nllb_model: NllbModel, source_name: str, output_dir: str, highlight_words: bool = False, progressListener: ProgressListener = None):
|
445 |
if not os.path.exists(output_dir):
|
446 |
os.makedirs(output_dir)
|
447 |
|
|
|
452 |
|
453 |
if nllb_model.nllb_lang is not None:
|
454 |
try:
|
455 |
+
segments_progress_listener = SubTaskProgressListener(progressListener,
|
456 |
+
base_task_total=progressListener.sub_task_total,
|
457 |
+
sub_task_start=1,
|
458 |
+
sub_task_total=1)
|
459 |
pbar = tqdm.tqdm(total=len(segments))
|
460 |
perf_start_time = time.perf_counter()
|
461 |
nllb_model.load_model()
|
|
|
466 |
if nllb_model.nllb_lang is not None:
|
467 |
segment["text"] = nllb_model.translation(seg_text)
|
468 |
pbar.update(1)
|
469 |
+
segments_progress_listener.on_progress(idx+1, len(segments), "Process segments")
|
470 |
|
471 |
nllb_model.release_vram()
|
472 |
perf_end_time = time.perf_counter()
|
473 |
+
# Call the finished callback
|
474 |
+
if segments_progress_listener is not None:
|
475 |
+
segments_progress_listener.on_finished()
|
476 |
+
|
477 |
print("\n\nprocess segments took {} seconds.\n\n".format(perf_end_time - perf_start_time))
|
478 |
except Exception as e:
|
479 |
# Ignore error - it's just a cleanup
|
src/hooks/progressListener.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from typing import Union
|
2 |
|
3 |
class ProgressListener:
|
4 |
-
def on_progress(self, current: Union[int, float], total: Union[int, float]):
|
5 |
self.total = total
|
6 |
|
7 |
def on_finished(self):
|
|
|
1 |
from typing import Union
|
2 |
|
3 |
class ProgressListener:
|
4 |
+
def on_progress(self, current: Union[int, float], total: Union[int, float], desc: str = None):
|
5 |
self.total = total
|
6 |
|
7 |
def on_finished(self):
|
src/hooks/subTaskProgressListener.py
CHANGED
@@ -25,13 +25,13 @@ class SubTaskProgressListener(ProgressListener):
|
|
25 |
):
|
26 |
self.base_task_listener = base_task_listener
|
27 |
self.base_task_total = base_task_total
|
28 |
-
self.sub_task_start = sub_task_start
|
29 |
-
self.sub_task_total = sub_task_total
|
30 |
|
31 |
-
def on_progress(self, current: Union[int, float], total: Union[int, float]):
|
32 |
sub_task_progress_frac = current / total
|
33 |
sub_task_progress = self.sub_task_start + self.sub_task_total * sub_task_progress_frac
|
34 |
-
self.base_task_listener.on_progress(sub_task_progress, self.base_task_total)
|
35 |
|
36 |
def on_finished(self):
|
37 |
self.base_task_listener.on_progress(self.sub_task_start + self.sub_task_total, self.base_task_total)
|
|
|
25 |
):
|
26 |
self.base_task_listener = base_task_listener
|
27 |
self.base_task_total = base_task_total
|
28 |
+
self.sub_task_start = base_task_total*sub_task_start
|
29 |
+
self.sub_task_total = base_task_total*sub_task_total
|
30 |
|
31 |
+
def on_progress(self, current: Union[int, float], total: Union[int, float], desc: str = None):
|
32 |
sub_task_progress_frac = current / total
|
33 |
sub_task_progress = self.sub_task_start + self.sub_task_total * sub_task_progress_frac
|
34 |
+
self.base_task_listener.on_progress(sub_task_progress, self.base_task_total, desc=desc)
|
35 |
|
36 |
def on_finished(self):
|
37 |
self.base_task_listener.on_progress(self.sub_task_start + self.sub_task_total, self.base_task_total)
|
src/nllb/nllbModel.py
CHANGED
@@ -54,6 +54,9 @@ class NllbModel:
|
|
54 |
self.nllb_lang = nllb_lang
|
55 |
self.model_config = model_config
|
56 |
|
|
|
|
|
|
|
57 |
if os.path.isdir(model_config.url):
|
58 |
self.model_path = model_config.url
|
59 |
else:
|
|
|
54 |
self.nllb_lang = nllb_lang
|
55 |
self.model_config = model_config
|
56 |
|
57 |
+
if nllb_lang is None:
|
58 |
+
return
|
59 |
+
|
60 |
if os.path.isdir(model_config.url):
|
61 |
self.model_path = model_config.url
|
62 |
else:
|
src/vad.py
CHANGED
@@ -181,9 +181,10 @@ class AbstractTranscription(ABC):
|
|
181 |
# Calculate progress
|
182 |
progress_start_offset = merged[0]['start'] if len(merged) > 0 else 0
|
183 |
progress_total_duration = sum([segment['end'] - segment['start'] for segment in merged])
|
|
|
184 |
|
185 |
# For each time segment, run whisper
|
186 |
-
for segment in merged:
|
187 |
segment_index += 1
|
188 |
segment_start = segment['start']
|
189 |
segment_end = segment['end']
|
@@ -208,8 +209,10 @@ class AbstractTranscription(ABC):
|
|
208 |
|
209 |
perf_start_time = time.perf_counter()
|
210 |
|
211 |
-
scaled_progress_listener = SubTaskProgressListener(progressListener,
|
212 |
-
|
|
|
|
|
213 |
segment_result = whisperCallable.invoke(segment_audio, segment_index, segment_prompt, detected_language, progress_listener=scaled_progress_listener)
|
214 |
|
215 |
perf_end_time = time.perf_counter()
|
|
|
181 |
# Calculate progress
|
182 |
progress_start_offset = merged[0]['start'] if len(merged) > 0 else 0
|
183 |
progress_total_duration = sum([segment['end'] - segment['start'] for segment in merged])
|
184 |
+
sub_task_total = 1/len(merged)
|
185 |
|
186 |
# For each time segment, run whisper
|
187 |
+
for idx, segment in enumerate(merged):
|
188 |
segment_index += 1
|
189 |
segment_start = segment['start']
|
190 |
segment_end = segment['end']
|
|
|
209 |
|
210 |
perf_start_time = time.perf_counter()
|
211 |
|
212 |
+
scaled_progress_listener = SubTaskProgressListener(progressListener,
|
213 |
+
base_task_total=progressListener.sub_task_total if isinstance(progressListener, SubTaskProgressListener) else progress_total_duration,
|
214 |
+
sub_task_start=idx*(1/len(merged)),
|
215 |
+
sub_task_total=1/len(merged))
|
216 |
segment_result = whisperCallable.invoke(segment_audio, segment_index, segment_prompt, detected_language, progress_listener=scaled_progress_listener)
|
217 |
|
218 |
perf_end_time = time.perf_counter()
|
src/vadParallel.py
CHANGED
@@ -18,7 +18,7 @@ class _ProgressListenerToQueue(ProgressListener):
|
|
18 |
self.progress_total = 0
|
19 |
self.prev_progress = 0
|
20 |
|
21 |
-
def on_progress(self, current: Union[int, float], total: Union[int, float]):
|
22 |
delta = current - self.prev_progress
|
23 |
self.prev_progress = current
|
24 |
self.progress_total = total
|
@@ -178,7 +178,7 @@ class ParallelTranscription(AbstractTranscription):
|
|
178 |
|
179 |
total_progress += delta
|
180 |
if progress_listener is not None:
|
181 |
-
progress_listener.on_progress(total_progress, total_duration)
|
182 |
|
183 |
results = results_async.get()
|
184 |
|
|
|
18 |
self.progress_total = 0
|
19 |
self.prev_progress = 0
|
20 |
|
21 |
+
def on_progress(self, current: Union[int, float], total: Union[int, float], desc: str = None):
|
22 |
delta = current - self.prev_progress
|
23 |
self.prev_progress = current
|
24 |
self.progress_total = total
|
|
|
178 |
|
179 |
total_progress += delta
|
180 |
if progress_listener is not None:
|
181 |
+
progress_listener.on_progress(total_progress, total_duration, desc="Transcribe parallel")
|
182 |
|
183 |
results = results_async.get()
|
184 |
|
src/whisper/fasterWhisperContainer.py
CHANGED
@@ -150,7 +150,7 @@ class FasterWhisperCallback(AbstractWhisperCallback):
|
|
150 |
segments.append(segment)
|
151 |
|
152 |
if progress_listener is not None:
|
153 |
-
progress_listener.on_progress(segment.end, info.duration)
|
154 |
if verbose:
|
155 |
print("[{}->{}] {}".format(format_timestamp(segment.start, True), format_timestamp(segment.end, True),
|
156 |
segment.text))
|
|
|
150 |
segments.append(segment)
|
151 |
|
152 |
if progress_listener is not None:
|
153 |
+
progress_listener.on_progress(segment.end, info.duration, "Transcribe")
|
154 |
if verbose:
|
155 |
print("[{}->{}] {}".format(format_timestamp(segment.start, True), format_timestamp(segment.end, True),
|
156 |
segment.text))
|