If the user does not choose a language for Whisper,
Browse filesthe detected language by Whisper will be automatically set for the nllb model to avoid abnormal errors when determining the source language in nllb.
- app.py +16 -12
- src/vad.py +2 -2
- webui.bat +1 -1
app.py
CHANGED
@@ -20,9 +20,7 @@ from src.diarization.diarizationContainer import DiarizationContainer
|
|
20 |
from src.hooks.progressListener import ProgressListener
|
21 |
from src.hooks.subTaskProgressListener import SubTaskProgressListener
|
22 |
from src.hooks.whisperProgressHook import create_progress_listener_handle
|
23 |
-
from src.languages import _TO_LANGUAGE_CODE
|
24 |
-
from src.languages import get_language_names
|
25 |
-
from src.languages import get_language_from_name
|
26 |
from src.modelCache import ModelCache
|
27 |
from src.prompts.jsonPromptStrategy import JsonPromptStrategy
|
28 |
from src.prompts.prependPromptStrategy import PrependPromptStrategy
|
@@ -269,6 +267,10 @@ class WhisperTranscriber:
|
|
269 |
|
270 |
# Transcribe
|
271 |
result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vadOptions, scaled_progress_listener, **decodeOptions)
|
|
|
|
|
|
|
|
|
272 |
short_name, suffix = source.get_short_name_suffix(max_length=self.app_config.input_max_file_name_length)
|
273 |
filePrefix = slugify(source_prefix + short_name, allow_unicode=True)
|
274 |
|
@@ -700,8 +702,8 @@ def create_ui(app_config: ApplicationConfig):
|
|
700 |
|
701 |
common_output = lambda : [
|
702 |
gr.File(label="Download"),
|
703 |
-
gr.Text(label="Transcription"),
|
704 |
-
gr.Text(label="Segments"),
|
705 |
]
|
706 |
|
707 |
is_queue_mode = app_config.queue_concurrency_count is not None and app_config.queue_concurrency_count > 0
|
@@ -863,13 +865,15 @@ if __name__ == '__main__':
|
|
863 |
|
864 |
updated_config = default_app_config.update(**args)
|
865 |
|
866 |
-
#updated_config.whisper_implementation = "faster-whisper"
|
867 |
-
#updated_config.input_audio_max_duration = -1
|
868 |
-
#updated_config.default_model_name = "large-v2"
|
869 |
-
#updated_config.output_dir = "output"
|
870 |
-
#updated_config.vad_max_merge_size = 90
|
871 |
-
#updated_config.merge_subtitle_with_sources =
|
872 |
-
#updated_config.autolaunch = True
|
|
|
|
|
873 |
|
874 |
if (threads := args.pop("threads")) > 0:
|
875 |
torch.set_num_threads(threads)
|
|
|
20 |
from src.hooks.progressListener import ProgressListener
|
21 |
from src.hooks.subTaskProgressListener import SubTaskProgressListener
|
22 |
from src.hooks.whisperProgressHook import create_progress_listener_handle
|
23 |
+
from src.languages import _TO_LANGUAGE_CODE, get_language_names, get_language_from_name, get_language_from_code
|
|
|
|
|
24 |
from src.modelCache import ModelCache
|
25 |
from src.prompts.jsonPromptStrategy import JsonPromptStrategy
|
26 |
from src.prompts.prependPromptStrategy import PrependPromptStrategy
|
|
|
267 |
|
268 |
# Transcribe
|
269 |
result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vadOptions, scaled_progress_listener, **decodeOptions)
|
270 |
+
if whisper_lang is None and result["language"] is not None and len(result["language"]) > 0:
|
271 |
+
whisper_lang = get_language_from_code(result["language"])
|
272 |
+
nllb_model.whisper_lang = whisper_lang
|
273 |
+
|
274 |
short_name, suffix = source.get_short_name_suffix(max_length=self.app_config.input_max_file_name_length)
|
275 |
filePrefix = slugify(source_prefix + short_name, allow_unicode=True)
|
276 |
|
|
|
702 |
|
703 |
common_output = lambda : [
|
704 |
gr.File(label="Download"),
|
705 |
+
gr.Text(label="Transcription", autoscroll=False),
|
706 |
+
gr.Text(label="Segments", autoscroll=False),
|
707 |
]
|
708 |
|
709 |
is_queue_mode = app_config.queue_concurrency_count is not None and app_config.queue_concurrency_count > 0
|
|
|
865 |
|
866 |
updated_config = default_app_config.update(**args)
|
867 |
|
868 |
+
# updated_config.whisper_implementation = "faster-whisper"
|
869 |
+
# updated_config.input_audio_max_duration = -1
|
870 |
+
# updated_config.default_model_name = "large-v2"
|
871 |
+
# updated_config.output_dir = "output"
|
872 |
+
# updated_config.vad_max_merge_size = 90
|
873 |
+
# updated_config.merge_subtitle_with_sources = False
|
874 |
+
# updated_config.autolaunch = True
|
875 |
+
# updated_config.auto_parallel = False
|
876 |
+
# updated_config.save_downloaded_files = True
|
877 |
|
878 |
if (threads := args.pop("threads")) > 0:
|
879 |
torch.set_num_threads(threads)
|
src/vad.py
CHANGED
@@ -205,7 +205,7 @@ class AbstractTranscription(ABC):
|
|
205 |
# Detected language
|
206 |
detected_language = languageCounter.most_common(1)[0][0] if len(languageCounter) > 0 else None
|
207 |
|
208 |
-
print("Running whisper from ", format_timestamp(segment_start), " to ", format_timestamp(segment_end), ", duration: ",
|
209 |
segment_duration, "expanded: ", segment_expand_amount, ", prompt: ", segment_prompt, ", detected language: ", detected_language)
|
210 |
|
211 |
perf_start_time = time.perf_counter()
|
@@ -217,7 +217,7 @@ class AbstractTranscription(ABC):
|
|
217 |
segment_result = whisperCallable.invoke(segment_audio, segment_index, segment_prompt, detected_language, progress_listener=scaled_progress_listener)
|
218 |
|
219 |
perf_end_time = time.perf_counter()
|
220 |
-
print("
|
221 |
|
222 |
adjusted_segments = self.adjust_timestamp(segment_result["segments"], adjust_seconds=segment_start, max_source_time=segment_duration)
|
223 |
|
|
|
205 |
# Detected language
|
206 |
detected_language = languageCounter.most_common(1)[0][0] if len(languageCounter) > 0 else None
|
207 |
|
208 |
+
print(f"Running whisper {idx}: from ", format_timestamp(segment_start), " to ", format_timestamp(segment_end), ", duration: ",
|
209 |
segment_duration, "expanded: ", segment_expand_amount, ", prompt: ", segment_prompt, ", detected language: ", detected_language)
|
210 |
|
211 |
perf_start_time = time.perf_counter()
|
|
|
217 |
segment_result = whisperCallable.invoke(segment_audio, segment_index, segment_prompt, detected_language, progress_listener=scaled_progress_listener)
|
218 |
|
219 |
perf_end_time = time.perf_counter()
|
220 |
+
print("\tWhisper took {} seconds".format(perf_end_time - perf_start_time))
|
221 |
|
222 |
adjusted_segments = self.adjust_timestamp(segment_result["segments"], adjust_seconds=segment_start, max_source_time=segment_duration)
|
223 |
|
webui.bat
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
@echo off
|
2 |
|
3 |
:: The source of the webui.bat file is stable-diffusion-webui
|
4 |
-
set COMMANDLINE_ARGS=--whisper_implementation faster-whisper --input_audio_max_duration -1 --default_model_name large-v2 --auto_parallel True --output_dir output --vad_max_merge_size 90 --
|
5 |
|
6 |
if not defined PYTHON (set PYTHON=python)
|
7 |
if not defined VENV_DIR (set "VENV_DIR=%~dp0%venv")
|
|
|
1 |
@echo off
|
2 |
|
3 |
:: The source of the webui.bat file is stable-diffusion-webui
|
4 |
+
set COMMANDLINE_ARGS=--whisper_implementation faster-whisper --input_audio_max_duration -1 --default_model_name large-v2 --auto_parallel True --output_dir output --vad_max_merge_size 90 --save_downloaded_files --autolaunch
|
5 |
|
6 |
if not defined PYTHON (set PYTHON=python)
|
7 |
if not defined VENV_DIR (set "VENV_DIR=%~dp0%venv")
|