Spaces:
Running
Running
Fix WHISPER_IMPLEMENTATION argument
Browse files- app.py +24 -19
- cli.py +7 -3
- dockerfile +12 -2
- requirements-fastWhisper.txt → requirements-fasterWhisper.txt +2 -1
- src/whisper/whisperFactory.py +2 -0
app.py
CHANGED
@@ -125,7 +125,7 @@ class WhisperTranscriber:
|
|
125 |
selectedLanguage = languageName.lower() if len(languageName) > 0 else None
|
126 |
selectedModel = modelName if modelName is not None else "base"
|
127 |
|
128 |
-
model = create_whisper_container(whisper_implementation=app_config.whisper_implementation,
|
129 |
model_name=selectedModel, cache=self.model_cache, models=self.app_config.models)
|
130 |
|
131 |
# Result
|
@@ -485,38 +485,43 @@ def create_ui(app_config: ApplicationConfig):
|
|
485 |
ui.close()
|
486 |
|
487 |
if __name__ == '__main__':
|
488 |
-
|
489 |
-
whisper_models =
|
|
|
|
|
|
|
490 |
|
491 |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
492 |
-
parser.add_argument("--input_audio_max_duration", type=int, default=
|
493 |
help="Maximum audio file length in seconds, or -1 for no limit.") # 600
|
494 |
-
parser.add_argument("--share", type=bool, default=
|
495 |
help="True to share the app on HuggingFace.") # False
|
496 |
-
parser.add_argument("--server_name", type=str, default=
|
497 |
help="The host or IP to bind to. If None, bind to localhost.") # None
|
498 |
-
parser.add_argument("--server_port", type=int, default=
|
499 |
help="The port to bind to.") # 7860
|
500 |
-
parser.add_argument("--queue_concurrency_count", type=int, default=
|
501 |
help="The number of concurrent requests to process.") # 1
|
502 |
-
parser.add_argument("--default_model_name", type=str, choices=whisper_models, default=
|
503 |
help="The default model name.") # medium
|
504 |
-
parser.add_argument("--default_vad", type=str, default=
|
505 |
help="The default VAD.") # silero-vad
|
506 |
-
parser.add_argument("--vad_parallel_devices", type=str, default=
|
507 |
help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.") # ""
|
508 |
-
parser.add_argument("--vad_cpu_cores", type=int, default=
|
509 |
help="The number of CPU cores to use for VAD pre-processing.") # 1
|
510 |
-
parser.add_argument("--vad_process_timeout", type=float, default=
|
511 |
help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.") # 1800
|
512 |
-
parser.add_argument("--auto_parallel", type=bool, default=
|
513 |
help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.") # False
|
514 |
-
parser.add_argument("--output_dir", "-o", type=str, default=
|
515 |
-
help="directory to save the outputs")
|
516 |
-
parser.add_argument("--whisper_implementation", type=str, default=
|
517 |
-
help="the Whisper implementation to use")
|
518 |
|
519 |
args = parser.parse_args().__dict__
|
520 |
|
521 |
-
updated_config =
|
|
|
|
|
522 |
create_ui(app_config=updated_config)
|
|
|
125 |
selectedLanguage = languageName.lower() if len(languageName) > 0 else None
|
126 |
selectedModel = modelName if modelName is not None else "base"
|
127 |
|
128 |
+
model = create_whisper_container(whisper_implementation=self.app_config.whisper_implementation,
|
129 |
model_name=selectedModel, cache=self.model_cache, models=self.app_config.models)
|
130 |
|
131 |
# Result
|
|
|
485 |
ui.close()
|
486 |
|
487 |
if __name__ == '__main__':
|
488 |
+
default_app_config = ApplicationConfig.create_default()
|
489 |
+
whisper_models = default_app_config.get_model_names()
|
490 |
+
|
491 |
+
# Environment variable overrides
|
492 |
+
default_whisper_implementation = os.environ.get("WHISPER_IMPLEMENTATION", default_app_config.whisper_implementation)
|
493 |
|
494 |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
495 |
+
parser.add_argument("--input_audio_max_duration", type=int, default=default_app_config.input_audio_max_duration, \
|
496 |
help="Maximum audio file length in seconds, or -1 for no limit.") # 600
|
497 |
+
parser.add_argument("--share", type=bool, default=default_app_config.share, \
|
498 |
help="True to share the app on HuggingFace.") # False
|
499 |
+
parser.add_argument("--server_name", type=str, default=default_app_config.server_name, \
|
500 |
help="The host or IP to bind to. If None, bind to localhost.") # None
|
501 |
+
parser.add_argument("--server_port", type=int, default=default_app_config.server_port, \
|
502 |
help="The port to bind to.") # 7860
|
503 |
+
parser.add_argument("--queue_concurrency_count", type=int, default=default_app_config.queue_concurrency_count, \
|
504 |
help="The number of concurrent requests to process.") # 1
|
505 |
+
parser.add_argument("--default_model_name", type=str, choices=whisper_models, default=default_app_config.default_model_name, \
|
506 |
help="The default model name.") # medium
|
507 |
+
parser.add_argument("--default_vad", type=str, default=default_app_config.default_vad, \
|
508 |
help="The default VAD.") # silero-vad
|
509 |
+
parser.add_argument("--vad_parallel_devices", type=str, default=default_app_config.vad_parallel_devices, \
|
510 |
help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.") # ""
|
511 |
+
parser.add_argument("--vad_cpu_cores", type=int, default=default_app_config.vad_cpu_cores, \
|
512 |
help="The number of CPU cores to use for VAD pre-processing.") # 1
|
513 |
+
parser.add_argument("--vad_process_timeout", type=float, default=default_app_config.vad_process_timeout, \
|
514 |
help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.") # 1800
|
515 |
+
parser.add_argument("--auto_parallel", type=bool, default=default_app_config.auto_parallel, \
|
516 |
help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.") # False
|
517 |
+
parser.add_argument("--output_dir", "-o", type=str, default=default_app_config.output_dir, \
|
518 |
+
help="directory to save the outputs")
|
519 |
+
parser.add_argument("--whisper_implementation", type=str, default=default_whisper_implementation, choices=["whisper", "faster-whisper"],\
|
520 |
+
help="the Whisper implementation to use")
|
521 |
|
522 |
args = parser.parse_args().__dict__
|
523 |
|
524 |
+
updated_config = default_app_config.update(**args)
|
525 |
+
|
526 |
+
print(f"Using {updated_config.whisper_implementation} for Whisper")
|
527 |
create_ui(app_config=updated_config)
|
cli.py
CHANGED
@@ -20,6 +20,9 @@ def cli():
|
|
20 |
# For the CLI, we fallback to saving the output to the current directory
|
21 |
output_dir = app_config.output_dir if app_config.output_dir is not None else "."
|
22 |
|
|
|
|
|
|
|
23 |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
24 |
parser.add_argument("audio", nargs="+", type=str, \
|
25 |
help="audio file(s) to transcribe")
|
@@ -32,9 +35,9 @@ def cli():
|
|
32 |
parser.add_argument("--output_dir", "-o", type=str, default=output_dir, \
|
33 |
help="directory to save the outputs")
|
34 |
parser.add_argument("--verbose", type=str2bool, default=app_config.verbose, \
|
35 |
-
help="whether to print out the progress and debug messages")
|
36 |
-
parser.add_argument("--whisper_implementation", type=str, default=
|
37 |
-
help="the Whisper implementation to use")
|
38 |
|
39 |
parser.add_argument("--task", type=str, default=app_config.task, choices=["transcribe", "translate"], \
|
40 |
help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
|
@@ -95,6 +98,7 @@ def cli():
|
|
95 |
os.makedirs(output_dir, exist_ok=True)
|
96 |
|
97 |
whisper_implementation = args.pop("whisper_implementation")
|
|
|
98 |
|
99 |
if model_name.endswith(".en") and args["language"] not in {"en", "English"}:
|
100 |
warnings.warn(f"{model_name} is an English-only model but receipted '{args['language']}'; using English instead.")
|
|
|
20 |
# For the CLI, we fallback to saving the output to the current directory
|
21 |
output_dir = app_config.output_dir if app_config.output_dir is not None else "."
|
22 |
|
23 |
+
# Environment variable overrides
|
24 |
+
default_whisper_implementation = os.environ.get("WHISPER_IMPLEMENTATION", app_config.whisper_implementation)
|
25 |
+
|
26 |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
27 |
parser.add_argument("audio", nargs="+", type=str, \
|
28 |
help="audio file(s) to transcribe")
|
|
|
35 |
parser.add_argument("--output_dir", "-o", type=str, default=output_dir, \
|
36 |
help="directory to save the outputs")
|
37 |
parser.add_argument("--verbose", type=str2bool, default=app_config.verbose, \
|
38 |
+
help="whether to print out the progress and debug messages")
|
39 |
+
parser.add_argument("--whisper_implementation", type=str, default=default_whisper_implementation, choices=["whisper", "faster-whisper"],\
|
40 |
+
help="the Whisper implementation to use")
|
41 |
|
42 |
parser.add_argument("--task", type=str, default=app_config.task, choices=["transcribe", "translate"], \
|
43 |
help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
|
|
|
98 |
os.makedirs(output_dir, exist_ok=True)
|
99 |
|
100 |
whisper_implementation = args.pop("whisper_implementation")
|
101 |
+
print(f"Using {whisper_implementation} for Whisper")
|
102 |
|
103 |
if model_name.endswith(".en") and args["language"] not in {"en", "English"}:
|
104 |
warnings.warn(f"{model_name} is an English-only model but receipted '{args['language']}'; using English instead.")
|
dockerfile
CHANGED
@@ -1,13 +1,23 @@
|
|
|
|
|
|
1 |
FROM huggingface/transformers-pytorch-gpu
|
2 |
EXPOSE 7860
|
3 |
|
|
|
|
|
|
|
4 |
ADD . /opt/whisper-webui/
|
5 |
|
6 |
# Latest version of transformers-pytorch-gpu seems to lack tk.
|
7 |
# Further, pip install fails, so we must upgrade pip first.
|
8 |
RUN apt-get -y install python3-tk
|
9 |
-
RUN python3 -m pip install --upgrade pip
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
# Note: Models will be downloaded on demand to the directory /root/.cache/whisper.
|
13 |
# You can also bind this directory in the container to somewhere on the host.
|
|
|
1 |
+
# docker build -t whisper-webui --build-arg WHISPER_IMPLEMENTATION=whisper .
|
2 |
+
|
3 |
FROM huggingface/transformers-pytorch-gpu
|
4 |
EXPOSE 7860
|
5 |
|
6 |
+
ARG WHISPER_IMPLEMENTATION=whisper
|
7 |
+
ENV WHISPER_IMPLEMENTATION=${WHISPER_IMPLEMENTATION}
|
8 |
+
|
9 |
ADD . /opt/whisper-webui/
|
10 |
|
11 |
# Latest version of transformers-pytorch-gpu seems to lack tk.
|
12 |
# Further, pip install fails, so we must upgrade pip first.
|
13 |
RUN apt-get -y install python3-tk
|
14 |
+
RUN python3 -m pip install --upgrade pip
|
15 |
+
|
16 |
+
RUN if [ "${WHISPER_IMPLEMENTATION}" = "whisper" ]; then \
|
17 |
+
python3 -m pip install -r /opt/whisper-webui/requirements.txt; \
|
18 |
+
else \
|
19 |
+
python3 -m pip install -r /opt/whisper-webui/requirements-fasterWhisper.txt; \
|
20 |
+
fi
|
21 |
|
22 |
# Note: Models will be downloaded on demand to the directory /root/.cache/whisper.
|
23 |
# You can also bind this directory in the container to somewhere on the host.
|
requirements-fastWhisper.txt → requirements-fasterWhisper.txt
RENAMED
@@ -5,4 +5,5 @@ gradio==3.23.0
|
|
5 |
yt-dlp
|
6 |
json5
|
7 |
torch
|
8 |
-
torchaudio
|
|
|
|
5 |
yt-dlp
|
6 |
json5
|
7 |
torch
|
8 |
+
torchaudio
|
9 |
+
more_itertools
|
src/whisper/whisperFactory.py
CHANGED
@@ -6,6 +6,8 @@ from src.whisper.abstractWhisperContainer import AbstractWhisperContainer
|
|
6 |
def create_whisper_container(whisper_implementation: str,
|
7 |
model_name: str, device: str = None, download_root: str = None,
|
8 |
cache: modelCache = None, models: List[ModelConfig] = []) -> AbstractWhisperContainer:
|
|
|
|
|
9 |
if (whisper_implementation == "whisper"):
|
10 |
from src.whisper.whisperContainer import WhisperContainer
|
11 |
return WhisperContainer(model_name, device, download_root, cache, models)
|
|
|
6 |
def create_whisper_container(whisper_implementation: str,
|
7 |
model_name: str, device: str = None, download_root: str = None,
|
8 |
cache: modelCache = None, models: List[ModelConfig] = []) -> AbstractWhisperContainer:
|
9 |
+
print("Creating whisper container for " + whisper_implementation)
|
10 |
+
|
11 |
if (whisper_implementation == "whisper"):
|
12 |
from src.whisper.whisperContainer import WhisperContainer
|
13 |
return WhisperContainer(model_name, device, download_root, cache, models)
|