whisper-webui-translate

Running

App Files Files Community

aadnk commited on Mar 28, 2023

Commit

2698c96

•

1 Parent(s): b5af58b

Fix WHISPER_IMPLEMENTATION argument

Browse files

Files changed (5) hide show

app.py +24 -19
cli.py +7 -3
dockerfile +12 -2
requirements-fastWhisper.txt → requirements-fasterWhisper.txt +2 -1
src/whisper/whisperFactory.py +2 -0

app.py CHANGED Viewed

@@ -125,7 +125,7 @@ class WhisperTranscriber:
                 selectedLanguage = languageName.lower() if len(languageName) > 0 else None
                 selectedModel = modelName if modelName is not None else "base"
-                model = create_whisper_container(whisper_implementation=app_config.whisper_implementation,
                                                  model_name=selectedModel, cache=self.model_cache, models=self.app_config.models)
                 # Result
@@ -485,38 +485,43 @@ def create_ui(app_config: ApplicationConfig):
     ui.close()
 if __name__ == '__main__':
-    app_config = ApplicationConfig.create_default()
-    whisper_models = app_config.get_model_names()
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument("--input_audio_max_duration", type=int, default=app_config.input_audio_max_duration, \
                         help="Maximum audio file length in seconds, or -1 for no limit.") # 600
-    parser.add_argument("--share", type=bool, default=app_config.share, \
                         help="True to share the app on HuggingFace.") # False
-    parser.add_argument("--server_name", type=str, default=app_config.server_name, \
                         help="The host or IP to bind to. If None, bind to localhost.") # None
-    parser.add_argument("--server_port", type=int, default=app_config.server_port, \
                         help="The port to bind to.") # 7860
-    parser.add_argument("--queue_concurrency_count", type=int, default=app_config.queue_concurrency_count, \
                         help="The number of concurrent requests to process.") # 1
-    parser.add_argument("--default_model_name", type=str, choices=whisper_models, default=app_config.default_model_name, \
                         help="The default model name.") # medium
-    parser.add_argument("--default_vad", type=str, default=app_config.default_vad, \
                         help="The default VAD.") # silero-vad
-    parser.add_argument("--vad_parallel_devices", type=str, default=app_config.vad_parallel_devices, \
                         help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.") # ""
-    parser.add_argument("--vad_cpu_cores", type=int, default=app_config.vad_cpu_cores, \
                         help="The number of CPU cores to use for VAD pre-processing.") # 1
-    parser.add_argument("--vad_process_timeout", type=float, default=app_config.vad_process_timeout, \
                         help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.") # 1800
-    parser.add_argument("--auto_parallel", type=bool, default=app_config.auto_parallel, \
                         help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.") # False
-    parser.add_argument("--output_dir", "-o", type=str, default=app_config.output_dir, \
-                        help="directory to save the outputs"), \
-    parser.add_argument("--whisper_implementation", type=str, default=app_config.whisper_implementation, choices=["whisper", "faster-whisper"],\
-                        help="the Whisper implementation to use"), \
     args = parser.parse_args().__dict__
-    updated_config = app_config.update(**args)
     create_ui(app_config=updated_config)

                 selectedLanguage = languageName.lower() if len(languageName) > 0 else None
                 selectedModel = modelName if modelName is not None else "base"
+                model = create_whisper_container(whisper_implementation=self.app_config.whisper_implementation,
                                                  model_name=selectedModel, cache=self.model_cache, models=self.app_config.models)
                 # Result
     ui.close()
 if __name__ == '__main__':
+    default_app_config = ApplicationConfig.create_default()
+    whisper_models = default_app_config.get_model_names()
+    # Environment variable overrides
+    default_whisper_implementation = os.environ.get("WHISPER_IMPLEMENTATION", default_app_config.whisper_implementation)
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument("--input_audio_max_duration", type=int, default=default_app_config.input_audio_max_duration, \
                         help="Maximum audio file length in seconds, or -1 for no limit.") # 600
+    parser.add_argument("--share", type=bool, default=default_app_config.share, \
                         help="True to share the app on HuggingFace.") # False
+    parser.add_argument("--server_name", type=str, default=default_app_config.server_name, \
                         help="The host or IP to bind to. If None, bind to localhost.") # None
+    parser.add_argument("--server_port", type=int, default=default_app_config.server_port, \
                         help="The port to bind to.") # 7860
+    parser.add_argument("--queue_concurrency_count", type=int, default=default_app_config.queue_concurrency_count, \
                         help="The number of concurrent requests to process.") # 1
+    parser.add_argument("--default_model_name", type=str, choices=whisper_models, default=default_app_config.default_model_name, \
                         help="The default model name.") # medium
+    parser.add_argument("--default_vad", type=str, default=default_app_config.default_vad, \
                         help="The default VAD.") # silero-vad
+    parser.add_argument("--vad_parallel_devices", type=str, default=default_app_config.vad_parallel_devices, \
                         help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.") # ""
+    parser.add_argument("--vad_cpu_cores", type=int, default=default_app_config.vad_cpu_cores, \
                         help="The number of CPU cores to use for VAD pre-processing.") # 1
+    parser.add_argument("--vad_process_timeout", type=float, default=default_app_config.vad_process_timeout, \
                         help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.") # 1800
+    parser.add_argument("--auto_parallel", type=bool, default=default_app_config.auto_parallel, \
                         help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.") # False
+    parser.add_argument("--output_dir", "-o", type=str, default=default_app_config.output_dir, \
+                        help="directory to save the outputs")
+    parser.add_argument("--whisper_implementation", type=str, default=default_whisper_implementation, choices=["whisper", "faster-whisper"],\
+                        help="the Whisper implementation to use")
     args = parser.parse_args().__dict__
+    updated_config = default_app_config.update(**args)
+    print(f"Using {updated_config.whisper_implementation} for Whisper")
     create_ui(app_config=updated_config)

cli.py CHANGED Viewed

@@ -20,6 +20,9 @@ def cli():
     # For the CLI, we fallback to saving the output to the current directory
     output_dir = app_config.output_dir if app_config.output_dir is not None else "."
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument("audio", nargs="+", type=str, \
                         help="audio file(s) to transcribe")
@@ -32,9 +35,9 @@ def cli():
     parser.add_argument("--output_dir", "-o", type=str, default=output_dir, \
                         help="directory to save the outputs")
     parser.add_argument("--verbose", type=str2bool, default=app_config.verbose, \
-                        help="whether to print out the progress and debug messages"), \
-    parser.add_argument("--whisper_implementation", type=str, default=app_config.whisper_implementation, choices=["whisper", "faster-whisper"],\
-                        help="the Whisper implementation to use"), \
     parser.add_argument("--task", type=str, default=app_config.task, choices=["transcribe", "translate"], \
                         help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
@@ -95,6 +98,7 @@ def cli():
     os.makedirs(output_dir, exist_ok=True)
     whisper_implementation = args.pop("whisper_implementation")
     if model_name.endswith(".en") and args["language"] not in {"en", "English"}:
         warnings.warn(f"{model_name} is an English-only model but receipted '{args['language']}'; using English instead.")

     # For the CLI, we fallback to saving the output to the current directory
     output_dir = app_config.output_dir if app_config.output_dir is not None else "."
+    # Environment variable overrides
+    default_whisper_implementation = os.environ.get("WHISPER_IMPLEMENTATION", app_config.whisper_implementation)
     parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
     parser.add_argument("audio", nargs="+", type=str, \
                         help="audio file(s) to transcribe")
     parser.add_argument("--output_dir", "-o", type=str, default=output_dir, \
                         help="directory to save the outputs")
     parser.add_argument("--verbose", type=str2bool, default=app_config.verbose, \
+                        help="whether to print out the progress and debug messages")
+    parser.add_argument("--whisper_implementation", type=str, default=default_whisper_implementation, choices=["whisper", "faster-whisper"],\
+                        help="the Whisper implementation to use")
     parser.add_argument("--task", type=str, default=app_config.task, choices=["transcribe", "translate"], \
                         help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
     os.makedirs(output_dir, exist_ok=True)
     whisper_implementation = args.pop("whisper_implementation")
+    print(f"Using {whisper_implementation} for Whisper")
     if model_name.endswith(".en") and args["language"] not in {"en", "English"}:
         warnings.warn(f"{model_name} is an English-only model but receipted '{args['language']}'; using English instead.")

dockerfile CHANGED Viewed

@@ -1,13 +1,23 @@
 FROM huggingface/transformers-pytorch-gpu
 EXPOSE 7860
 ADD . /opt/whisper-webui/
 # Latest version of transformers-pytorch-gpu seems to lack tk.
 # Further, pip install fails, so we must upgrade pip first.
 RUN apt-get -y install python3-tk
-RUN  python3 -m pip install --upgrade pip &&\
-     python3 -m pip install -r /opt/whisper-webui/requirements.txt
 # Note: Models will be downloaded on demand to the directory /root/.cache/whisper.
 # You can also bind this directory in the container to somewhere on the host.

+# docker build -t whisper-webui --build-arg WHISPER_IMPLEMENTATION=whisper .
 FROM huggingface/transformers-pytorch-gpu
 EXPOSE 7860
+ARG WHISPER_IMPLEMENTATION=whisper
+ENV WHISPER_IMPLEMENTATION=${WHISPER_IMPLEMENTATION}
 ADD . /opt/whisper-webui/
 # Latest version of transformers-pytorch-gpu seems to lack tk.
 # Further, pip install fails, so we must upgrade pip first.
 RUN apt-get -y install python3-tk
+RUN  python3 -m pip install --upgrade pip
+RUN if [ "${WHISPER_IMPLEMENTATION}" = "whisper" ]; then \
+    python3 -m pip install -r /opt/whisper-webui/requirements.txt; \
+  else \
+    python3 -m pip install -r /opt/whisper-webui/requirements-fasterWhisper.txt; \
+  fi
 # Note: Models will be downloaded on demand to the directory /root/.cache/whisper.
 # You can also bind this directory in the container to somewhere on the host.

requirements-fastWhisper.txt → requirements-fasterWhisper.txt RENAMED Viewed

@@ -5,4 +5,5 @@ gradio==3.23.0
 yt-dlp
 json5
 torch
-torchaudio

 yt-dlp
 json5
 torch
+torchaudio
+more_itertools

src/whisper/whisperFactory.py CHANGED Viewed

@@ -6,6 +6,8 @@ from src.whisper.abstractWhisperContainer import AbstractWhisperContainer
 def create_whisper_container(whisper_implementation: str,
                              model_name: str, device: str = None, download_root: str = None,
                              cache: modelCache = None, models: List[ModelConfig] = []) -> AbstractWhisperContainer:
     if (whisper_implementation == "whisper"):
         from src.whisper.whisperContainer import WhisperContainer
         return WhisperContainer(model_name, device, download_root, cache, models)

 def create_whisper_container(whisper_implementation: str,
                              model_name: str, device: str = None, download_root: str = None,
                              cache: modelCache = None, models: List[ModelConfig] = []) -> AbstractWhisperContainer:
+    print("Creating whisper container for " + whisper_implementation)
     if (whisper_implementation == "whisper"):
         from src.whisper.whisperContainer import WhisperContainer
         return WhisperContainer(model_name, device, download_root, cache, models)