Spaces:

lenML
/

ChatTTS-Forge

Running on Zero

App Files Files Community

zhzluke96 commited on Jun 9

Commit

bf13828

•

1 Parent(s): 00c033c

update

Browse files

Files changed (21) hide show

.env.webui +2 -0
language/en.json +118 -0
language/zh-CN.json +116 -0
modules/ChatTTS/ChatTTS/core.py +6 -57
modules/webui/app.py +14 -2
modules/webui/changelog_tab.py +1 -1
modules/webui/css/style.css +418 -0
modules/webui/gradio_extensions.py +59 -0
modules/webui/gradio_hijack.py +14 -0
modules/webui/js/index.js +231 -0
modules/webui/js/localization.js +201 -0
modules/webui/localization.py +74 -0
modules/webui/localization_runtime.py +226 -0
modules/webui/readme_tab.py +1 -1
modules/webui/speaker/speaker_creator.py +3 -12
modules/webui/speaker/speaker_merger.py +3 -14
modules/webui/ssml/spliter_tab.py +1 -2
modules/webui/ssml/ssml_tab.py +3 -8
modules/webui/tts_tab.py +7 -14
modules/webui/webui_config.py +4 -0
webui.py +8 -2

.env.webui CHANGED Viewed

@@ -18,3 +18,5 @@ MAX_BATCH_SIZE=12
 V_GIT_TAG=🤗hf
 V_GIT_COMMIT=main

 V_GIT_TAG=🤗hf
 V_GIT_COMMIT=main
+LANGUAGE=zh-CN

language/en.json ADDED Viewed

	@@ -0,0 +1,118 @@

+{
+  "TTS": "TTS",
+  "🎛️Sampling": "🎛️Sampling Configuration",
+  "Temperature": "Temperature",
+  "Top P": "Top P",
+  "Top K": "Top K",
+  "Batch Size": "Batch Size",
+  "🎭Style": "🎭Style",
+  "🗣️Speaker": "🗣️Voice",
+  "Pick": "Select",
+  "🎲": "🎲",
+  "Upload": "Upload",
+  "Speaker (Upload)": "Voice (Upload)",
+  "📝Speaker info": "📝Speaker Information",
+  "empty": "empty",
+  "💃Inference Seed": "💃Inference Seed",
+  "Inference Seed": "Inference Seed",
+  "Use Decoder": "Use Decoder",
+  "📝Text Input": "📝Text Input",
+  "[laugh]": "[laugh]",
+  "[uv_break]": "[uv_break]",
+  "[v_break]": "[v_break]",
+  "[lbreak]": "[lbreak]",
+  "🎄Examples": "🎄Examples",
+  "🎨Output": "🎨Output",
+  "Generated Audio": "Generated Audio",
+  "🎶Refiner": "🎶Refiner",
+  "✍️Refine Text": "✍️Refine Text",
+  "🔧Prompt engineering": "🔧Prompt Engineering",
+  "prompt_audio": "prompt_audio",
+  "🔊Generate": "🔊Generate",
+  "Disable Normalize": "Disable Normalize",
+  "💪🏼Enhance": "💪🏼Enhance",
+  "Enable Enhance": "Enable Enhance",
+  "Enable De-noise": "Enable De-noise",
+  "🔊Generate Audio": "🔊Generate Audio",
+  "SSML": "SSML",
+  "Editor": "Editor",
+  "📝SSML Input": "📝SSML Input",
+  "🔊Synthesize SSML": "🔊Synthesize SSML",
+  "🎛️Parameters": "🎛️Parameters",
+  "Spilter": "Splitter",
+  "🗣️Seed": "🗣️Seed",
+  "📩Send to SSML": "📩Send to Editor",
+  "📝Long Text Input": "📝Long Text Input",
+  "🔪Split Text": "🔪Split Text",
+  "Podcast": "Podcast",
+  "Add": "Add",
+  "Undo": "Undo",
+  "Clear": "Clear",
+  "📔Script": "📔Script",
+  "Speaker": "Voice",
+  "Creator": "Creator",
+  "ℹ️Speaker info": "ℹ️Speaker Information",
+  "Seed": "Seed",
+  "Random Speaker": "Random Voice",
+  "🔊Generate speaker.pt": "🔊Generate speaker.pt",
+  "Save .pt file": "Save .pt file",
+  "Save to File": "Save to File",
+  "🎤Test voice": "🎤Test Voice",
+  "Test Voice": "Test Voice",
+  "Current Seed": "Current Seed",
+  "Output Audio": "Output Audio",
+  "Merger": "Merger",
+  "🔄": "🔄",
+  "Weight A": "Weight A",
+  "Weight B": "Weight B",
+  "Weight C": "Weight C",
+  "Weight D": "Weight D",
+  "🗃️Save to file": "🗃️Save to File",
+  "Save Speaker": "Save Voice",
+  "Merged Speaker": "Merged Voice",
+  "Inpainting": "Inpainting",
+  "🚧 Under construction": "🚧 Under Construction",
+  "ASR": "ASR",
+  "System": "System",
+  "info": "info",
+  "Enable Experimental Features": "Enable Experimental Features",
+  "README": "README",
+  "readme": "readme",
+  "changelog": "changelog",
+  "TTS_STYLE_GUIDE": [
+    "Suffix _p indicates prompt, which has stronger effect but may impact quality."
+  ],
+  "SSML_SPLITER_GUIDE": [
+    "- Character limit details can be found in README. Excess will be truncated.",
+    "- If the last character is swallowed and not read, try adding `[lbreak]` at the end.",
+    "- If the text is all in English, it is recommended to disable text standardization."
+  ],
+  "SPEAKER_CREATOR_GUIDE": [
+    "### Speaker Creator",
+    "Use this panel to quickly draw cards to generate speaker.pt files.",
+    "",
+    "1. Generate Speaker: Enter a seed, name, gender, and description. Click the \"Generate speaker.pt\" button, and the generated speaker configuration will be saved as a .pt file.",
+    "2. Test Speaker Voice: Enter a test text. Click the \"Test Voice\" button, and the generated audio will play in the \"Output Audio\" section.",
+    "3. Randomly Generate Speaker: Click the \"Random Speaker\" button to randomly generate a seed and name, which can then be further edited and tested."
+  ],
+  "SSML_TEXT_GUIDE": [
+    "- Maximum {webui_config.ssml_max:,} characters. Excess will be truncated.",
+    "- For more information about SSML, refer to this [documentation](https://github.com/lenML/ChatTTS-Forge/blob/main/docs/SSML.md)"
+  ],
+  "TTS_TEXT_GUIDE": [
+    "- Character limit. Excess will be truncated.",
+    "- If the last character is swallowed and not read, try adding `[lbreak]` at the end.",
+    "- If the input text is all in English, it is recommended to disable text standardization."
+  ],
+  "SPEAKER_MERGER_GUIDE": [
+    "### Speaker Merger",
+    "In this panel, you can select multiple speakers and specify their weights to synthesize a new voice and test it. Below are detailed explanations of each feature:",
+    "",
+    "1. Select Speakers: You can choose up to four speakers (A, B, C, D) from the dropdown menu, each with a corresponding weight slider ranging from 0 to 10. The weight determines the influence of each speaker on the synthesized voice.",
+    "2. Synthesize Voice: After selecting the speakers and setting the weights, you can input a test text in the \"Test Text\" box and click the \"Test Voice\" button to generate and play the synthesized voice.",
+    "3. Save Speaker: You can also fill in a new speaker's name, gender, and description in the \"Speaker Information\" section on the right, and click \"Save Speaker\" to save the synthesized voice. The saved speaker file will be displayed in the \"Merged Speaker\" section for download."
+  ]
+}

language/zh-CN.json ADDED Viewed

	@@ -0,0 +1,116 @@

+{
+  "TTS": "TTS",
+  "🎛️Sampling": "🎛️采样配置",
+  "Temperature": "温度",
+  "Top P": "Top P",
+  "Top K": "Top K",
+  "Batch Size": "批量大小",
+  "🎭Style": "🎭风格",
+  "🗣️Speaker": "🗣️音色",
+  "Pick": "选择",
+  "🎲": "🎲",
+  "Upload": "上传",
+  "Speaker (Upload)": "音色 (上传)",
+  "📝Speaker info": "📝音色信息",
+  "empty": "empty",
+  "💃Inference Seed": "💃推理种子",
+  "Inference Seed": "推理种子",
+  "Use Decoder": "使用解码器",
+  "📝Text Input": "📝文本输入",
+  "[laugh]": "[laugh]",
+  "[uv_break]": "[uv_break]",
+  "[v_break]": "[v_break]",
+  "[lbreak]": "[lbreak]",
+  "🎄Examples": "🎄示例",
+  "🎨Output": "🎨输出",
+  "Generated Audio": "生成的音频",
+  "🎶Refiner": "🎶优化器",
+  "✍️Refine Text": "✍️优化文本",
+  "🔧Prompt engineering": "🔧提示工程",
+  "prompt_audio": "提示音频",
+  "🔊Generate": "🔊生成",
+  "Disable Normalize": "禁用归一化",
+  "💪🏼Enhance": "💪🏼增强",
+  "Enable Enhance": "启用增强",
+  "Enable De-noise": "启用降噪",
+  "🔊Generate Audio": "🔊生成音频",
+  "SSML": "SSML",
+  "Editor": "编辑器",
+  "📝SSML Input": "📝SSML输入",
+  "🔊Synthesize SSML": "🔊合成SSML",
+  "🎛️Parameters": "🎛️参数",
+  "Spilter": "分割器",
+  "🗣️Seed": "🗣️种子",
+  "📩Send to SSML": "📩发送到Editor",
+  "📝Long Text Input": "📝长文本输入",
+  "🔪Split Text": "🔪分割文本",
+  "Podcast": "播客",
+  "Add": "添加",
+  "Undo": "撤销",
+  "Clear": "清除",
+  "📔Script": "📔脚本",
+  "Speaker": "音色",
+  "Creator": "创建者",
+  "ℹ️Speaker info": "ℹ️音色信息",
+  "Seed": "种子",
+  "Random Speaker": "随机音色",
+  "🔊Generate speaker.pt": "🔊生成 speaker.pt",
+  "Save .pt file": "保存.pt文件",
+  "Save to File": "保存到文件",
+  "🎤Test voice": "🎤测试语音",
+  "Test Voice": "测试语音",
+  "Current Seed": "当前种子",
+  "Output Audio": "输出音频",
+  "Merger": "融合",
+  "🔄": "🔄",
+  "Weight A": "权重A",
+  "Weight B": "权重B",
+  "Weight C": "权重C",
+  "Weight D": "权重D",
+  "🗃️Save to file": "🗃️保存到文件",
+  "Save Speaker": "保存音色",
+  "Merged Speaker": "融合的音色",
+  "Inpainting": "修复",
+  "🚧 Under construction": "🚧 施工中",
+  "ASR": "ASR",
+  "System": "系统",
+  "info": "信息",
+  "Enable Experimental Features": "启用实验性功能",
+  "README": "README",
+  "readme": "readme",
+  "changelog": "changelog",
+  "TTS_STYLE_GUIDE": ["后缀为 _p 表示带prompt，效果更强但是影响质量"],
+  "SSML_SPLITER_GUIDE": [
+    "- 字数限制详见README，超过部分将截断",
+    "- 如果尾字吞字不读，可以试试结尾加上 `[lbreak]`",
+    "- 如果文本为全英文，建议关闭文本标准化"
+  ],
+  "SPEAKER_CREATOR_GUIDE": [
+    "### Speaker Creator",
+    "使用本面板快捷抽卡生成 speaker.pt 文件。",
+    "",
+    "1. 生成说话人：输入种子、名字、性别和描述。点击 \"Generate speaker.pt\" 按钮，生成的说话人配置会保存为.pt文件。",
+    "2. 测试说话人声音：输入测试文本。点击 \"Test Voice\" 按钮，生成的音频会在 \"Output Audio\" 中播放。",
+    "3. 随机生成说话人：点击 \"Random Speaker\" 按钮，随机生成一个种子和名字，可以进一步编辑其他信息并测试。"
+  ],
+  "SSML_TEXT_GUIDE": [
+    "- 最长{webui_config.ssml_max:,}字符，超过会被截断",
+    "- 关于SSML可以看这个 [文档](https://github.com/lenML/ChatTTS-Forge/blob/main/docs/SSML.md)"
+  ],
+  "TTS_TEXT_GUIDE": [
+    "- 字数限制，超过部分将截断",
+    "- 如果尾字吞字不读，可以试试结尾加上 `[lbreak]`",
+    "- If the input text is all in English, it is recommended to check disable_normalize"
+  ],
+  "SPEAKER_MERGER_GUIDE": [
+    "### Speaker Merger",
+    "在本面板中，您可以选择多个说话人并指定他们的权重，合成新的语音并进行测试。以下是各个功能的详细说明：",
+    "",
+    "1. 选择说话人: 您可以从下拉菜单中选择最多四个说话人（A、B、C、D），每个说话人都有一个对应的权重滑块，范围从0到10。权重决定了每个说话人在合成语音中的影响程度。",
+    "2. 合成语音: 在选择好说话人和设置好权重后，您可以在“Test Text”框中输入要测试的文本，然后点击“测试语音”按钮来生成并播放合成的语音。",
+    "3. 保存说话人: 您还可以在右侧的“说话人信息”部分填写新的说话人的名称、性别和描述，并点击“Save Speaker”按钮来保存合成的说话人。保存后的说话人文件将显示在“Merged Speaker”栏中，供下载使用。"
+  ]
+}

modules/ChatTTS/ChatTTS/core.py CHANGED Viewed

@@ -1,13 +1,11 @@
 import os
 import logging
-from functools import partial
 from omegaconf import OmegaConf
 import torch
 from vocos import Vocos
 from .model.dvae import DVAE
 from .model.gpt import GPT_warpper
-from .utils.gpu_utils import select_device
 from .utils.infer_utils import (
     count_invalid_characters,
     detect_language,
@@ -107,9 +105,7 @@ class Chat:
         dtype_gpt: torch.dtype = None,
         dtype_decoder: torch.dtype = None,
     ):
-        if not device:
-            device = select_device(4096)
-            self.logger.log(logging.INFO, f"use {device}")
         dtype_vocos = dtype_vocos or dtype
         dtype_dvae = dtype_dvae or dtype
@@ -179,8 +175,6 @@ class Chat:
         params_refine_text={},
         params_infer_code={"prompt": "[speed_5]"},
         use_decoder=True,
-        do_text_normalization=True,
-        lang=None,
     ):
         assert self.check_model(use_decoder=use_decoder)
@@ -188,14 +182,6 @@ class Chat:
         if not isinstance(text, list):
             text = [text]
-        if do_text_normalization:
-            for i, t in enumerate(text):
-                _lang = detect_language(t) if lang is None else lang
-                self.init_normalizer(_lang)
-                text[i] = self.normalizer[_lang](t)
-                if _lang == "zh":
-                    text[i] = apply_half2full_map(text[i])
         for i, t in enumerate(text):
             reserved_tokens = self.pretrain_models[
                 "tokenizer"
@@ -251,8 +237,6 @@ class Chat:
         self,
         text,
         params_refine_text={},
-        do_text_normalization=True,
-        lang=None,
     ) -> str:
         # assert self.check_model(use_decoder=False)
@@ -260,14 +244,6 @@ class Chat:
         if not isinstance(text, list):
             text = [text]
-        if do_text_normalization:
-            for i, t in enumerate(text):
-                _lang = detect_language(t) if lang is None else lang
-                self.init_normalizer(_lang)
-                text[i] = self.normalizer[_lang](t)
-                if _lang == "zh":
-                    text[i] = apply_half2full_map(text[i])
         for i, t in enumerate(text):
             reserved_tokens = self.pretrain_models[
                 "tokenizer"
@@ -305,7 +281,10 @@ class Chat:
         prompt = [params_infer_code.get("prompt", "") + i for i in prompt]
         params_infer_code.pop("prompt", "")
         result = infer_code(
-            self.pretrain_models, prompt, **params_infer_code, return_hidden=use_decoder
         )
         if use_decoder:
@@ -326,37 +305,7 @@ class Chat:
     def sample_random_speaker(
         self,
     ) -> torch.Tensor:
         dim = self.pretrain_models["gpt"].gpt.layers[0].mlp.gate_proj.in_features
         std, mean = self.pretrain_models["spk_stat"].chunk(2)
         return torch.randn(dim, device=std.device) * std + mean
-    def init_normalizer(self, lang):
-        if lang not in self.normalizer:
-            if lang == "zh":
-                try:
-                    from tn.chinese.normalizer import Normalizer
-                except:
-                    self.logger.log(
-                        logging.WARNING,
-                        f"Package WeTextProcessing not found! \
-                        Run: conda install -c conda-forge pynini=2.1.5 && pip install WeTextProcessing",
-                    )
-                self.normalizer[lang] = Normalizer().normalize
-            else:
-                try:
-                    from nemo_text_processing.text_normalization.normalize import (
-                        Normalizer,
-                    )
-                except:
-                    self.logger.log(
-                        logging.WARNING,
-                        f"Package nemo_text_processing not found! \
-                        Run: conda install -c conda-forge pynini=2.1.5 && pip install nemo_text_processing",
-                    )
-                self.normalizer[lang] = partial(
-                    Normalizer(input_case="cased", lang=lang).normalize,
-                    verbose=False,
-                    punct_post_process=True,
-                )

 import os
 import logging
 from omegaconf import OmegaConf
 import torch
 from vocos import Vocos
 from .model.dvae import DVAE
 from .model.gpt import GPT_warpper
 from .utils.infer_utils import (
     count_invalid_characters,
     detect_language,
         dtype_gpt: torch.dtype = None,
         dtype_decoder: torch.dtype = None,
     ):
+        assert device is not None, "device should not be None"
         dtype_vocos = dtype_vocos or dtype
         dtype_dvae = dtype_dvae or dtype
         params_refine_text={},
         params_infer_code={"prompt": "[speed_5]"},
         use_decoder=True,
     ):
         assert self.check_model(use_decoder=use_decoder)
         if not isinstance(text, list):
             text = [text]
         for i, t in enumerate(text):
             reserved_tokens = self.pretrain_models[
                 "tokenizer"
         self,
         text,
         params_refine_text={},
     ) -> str:
         # assert self.check_model(use_decoder=False)
         if not isinstance(text, list):
             text = [text]
         for i, t in enumerate(text):
             reserved_tokens = self.pretrain_models[
                 "tokenizer"
         prompt = [params_infer_code.get("prompt", "") + i for i in prompt]
         params_infer_code.pop("prompt", "")
         result = infer_code(
+            self.pretrain_models,
+            prompt,
+            return_hidden=use_decoder,
+            **params_infer_code,
         )
         if use_decoder:
     def sample_random_speaker(
         self,
     ) -> torch.Tensor:
+        assert self.pretrain_models["gpt"] is not None, "gpt model not loaded"
         dim = self.pretrain_models["gpt"].gpt.layers[0].mlp.gate_proj.in_features
         std, mean = self.pretrain_models["spk_stat"].chunk(2)
         return torch.randn(dim, device=std.device) * std + mean

modules/webui/app.py CHANGED Viewed

@@ -5,9 +5,10 @@ import torch
 import gradio as gr
 from modules import config
-from modules.webui import webui_config
 from modules.webui.changelog_tab import create_changelog_tab
 from modules.webui.ssml.podcast_tab import create_ssml_podcast_tab
 from modules.webui.system_tab import create_system_tab
 from modules.webui.tts_tab import create_tts_interface
@@ -27,6 +28,11 @@ def webui_init():
     torch._dynamo.config.suppress_errors = True
     torch.set_float32_matmul_precision("high")
     logger.info("WebUI module initialized")
@@ -44,11 +50,13 @@ def create_app_footer():
         f"""
 🍦 [ChatTTS-Forge](https://github.com/lenML/ChatTTS-Forge)
 version: [{git_tag}](https://github.com/lenML/ChatTTS-Forge/commit/{git_commit}) | branch: `{git_branch}` | python: `{python_version}` | torch: `{torch_version}`
-        """
     )
 def create_interface():
     js_func = """
     function refresh() {
@@ -117,4 +125,8 @@ def create_interface():
                         create_changelog_tab()
         create_app_footer()
     return demo

 import gradio as gr
 from modules import config
+from modules.webui import gradio_extensions, localization, webui_config, gradio_hijack
 from modules.webui.changelog_tab import create_changelog_tab
+from modules.webui.localization_runtime import ENLocalizationVars, ZHLocalizationVars
 from modules.webui.ssml.podcast_tab import create_ssml_podcast_tab
 from modules.webui.system_tab import create_system_tab
 from modules.webui.tts_tab import create_tts_interface
     torch._dynamo.config.suppress_errors = True
     torch.set_float32_matmul_precision("high")
+    if config.runtime_env_vars.language == "en":
+        webui_config.localization = ENLocalizationVars()
+    else:
+        webui_config.localization = ZHLocalizationVars()
     logger.info("WebUI module initialized")
         f"""
 🍦 [ChatTTS-Forge](https://github.com/lenML/ChatTTS-Forge)
 version: [{git_tag}](https://github.com/lenML/ChatTTS-Forge/commit/{git_commit}) | branch: `{git_branch}` | python: `{python_version}` | torch: `{torch_version}`
+        """,
+        elem_classes=["no-translate"],
     )
 def create_interface():
+    gradio_extensions.reload_javascript()
     js_func = """
     function refresh() {
                         create_changelog_tab()
         create_app_footer()
+    # Dump the English config for the localization
+    # ** JUST for developer
+    # localization.dump_english_config(gradio_hijack.all_components)
     return demo

modules/webui/changelog_tab.py CHANGED Viewed

@@ -10,4 +10,4 @@ def read_local_changelog():
 def create_changelog_tab():
     changelog_content = read_local_changelog()
-    gr.Markdown(changelog_content)

 def create_changelog_tab():
     changelog_content = read_local_changelog()
+    gr.Markdown(changelog_content, elem_classes=["no-translate"])

modules/webui/css/style.css ADDED Viewed

	@@ -0,0 +1,418 @@

+/* based on https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/v1.6.0/style.css */
+.loader-container {
+    display: flex; /* Use flex to align items horizontally */
+    align-items: center; /* Center items vertically within the container */
+    white-space: nowrap; /* Prevent line breaks within the container */
+  }
+  .loader {
+    border: 8px solid #f3f3f3; /* Light grey */
+    border-top: 8px solid #3498db; /* Blue */
+    border-radius: 50%;
+    width: 30px;
+    height: 30px;
+    animation: spin 2s linear infinite;
+  }
+  @keyframes spin {
+    0% { transform: rotate(0deg); }
+    100% { transform: rotate(360deg); }
+  }
+  /* Style the progress bar */
+  progress {
+    appearance: none; /* Remove default styling */
+    height: 20px; /* Set the height of the progress bar */
+    border-radius: 5px; /* Round the corners of the progress bar */
+    background-color: #f3f3f3; /* Light grey background */
+    width: 100%;
+    vertical-align: middle !important;
+  }
+  /* Style the progress bar container */
+  .progress-container {
+    margin-left: 20px;
+    margin-right: 20px;
+    flex-grow: 1; /* Allow the progress container to take up remaining space */
+  }
+  /* Set the color of the progress bar fill */
+  progress::-webkit-progress-value {
+    background-color: #3498db; /* Blue color for the fill */
+  }
+  progress::-moz-progress-bar {
+    background-color: #3498db; /* Blue color for the fill in Firefox */
+  }
+  /* Style the text on the progress bar */
+  progress::after {
+    content: attr(value '%'); /* Display the progress value followed by '%' */
+    position: absolute;
+    top: 50%;
+    left: 50%;
+    transform: translate(-50%, -50%);
+    color: white; /* Set text color */
+    font-size: 14px; /* Set font size */
+  }
+  /* Style other texts */
+  .loader-container > span {
+    margin-left: 5px; /* Add spacing between the progress bar and the text */
+  }
+  .progress-bar > .generating {
+    display: none !important;
+  }
+  .progress-bar{
+    height: 30px !important;
+  }
+  .progress-bar span {
+      text-align: right;
+      width: 215px;
+  }
+  div:has(> #positive_prompt) {
+      border: none;
+  }
+  #positive_prompt {
+      padding: 1px;
+      background: var(--background-fill-primary);
+  }
+  .type_row {
+    height: 84px !important;
+  }
+  .type_row_half {
+    height: 34px !important;
+  }
+  .refresh_button {
+    border: none !important;
+    background: none !important;
+    font-size: none !important;
+    box-shadow: none !important;
+  }
+  .advanced_check_row {
+    width: 250px !important;
+  }
+  .min_check {
+    min-width: min(1px, 100%) !important;
+  }
+  .resizable_area {
+    resize: vertical;
+    overflow: auto !important;
+  }
+  .performance_selection label {
+      width: 140px !important;
+  }
+  .aspect_ratios label {
+      flex: calc(50% - 5px) !important;
+  }
+  .aspect_ratios label span {
+      white-space: nowrap !important;
+  }
+  .aspect_ratios label input {
+      margin-left: -5px !important;
+  }
+  .lora_enable label {
+    height: 100%;
+  }
+  .lora_enable label input {
+    margin: auto;
+  }
+  .lora_enable label span {
+    display: none;
+  }
+  @-moz-document url-prefix() {
+    .lora_weight input[type=number] {
+      width: 80px;
+    }
+  }
+  #context-menu{
+      z-index:9999;
+      position:absolute;
+      display:block;
+      padding:0px 0;
+      border:2px solid #a55000;
+      border-radius:8px;
+      box-shadow:1px 1px 2px #CE6400;
+      width: 200px;
+  }
+  .context-menu-items{
+      list-style: none;
+      margin: 0;
+      padding: 0;
+  }
+  .context-menu-items a{
+      display:block;
+      padding:5px;
+      cursor:pointer;
+  }
+  .context-menu-items a:hover{
+      background: #a55000;
+  }
+  .canvas-tooltip-info {
+    position: absolute;
+    top: 28px;
+    left: 2px;
+    cursor: help;
+    background-color: rgba(0, 0, 0, 0.3);
+    width: 20px;
+    height: 20px;
+    border-radius: 50%;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    flex-direction: column;
+    z-index: 100;
+  }
+  .canvas-tooltip-info::after {
+    content: '';
+    display: block;
+    width: 2px;
+    height: 7px;
+    background-color: white;
+    margin-top: 2px;
+  }
+  .canvas-tooltip-info::before {
+    content: '';
+    display: block;
+    width: 2px;
+    height: 2px;
+    background-color: white;
+  }
+  .canvas-tooltip-content {
+    display: none;
+    background-color: #f9f9f9;
+    color: #333;
+    border: 1px solid #ddd;
+    padding: 15px;
+    position: absolute;
+    top: 40px;
+    left: 10px;
+    width: 250px;
+    font-size: 16px;
+    opacity: 0;
+    border-radius: 8px;
+    box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
+    z-index: 100;
+  }
+  .canvas-tooltip:hover .canvas-tooltip-content {
+    display: block;
+    animation: fadeIn 0.5s;
+    opacity: 1;
+  }
+  @keyframes fadeIn {
+    from {opacity: 0;}
+    to {opacity: 1;}
+  }
+  .styler {
+    overflow:inherit !important;
+  }
+  .gradio-container{
+    overflow: visible;
+  }
+  /* fullpage image viewer */
+  #lightboxModal{
+      display: none;
+      position: fixed;
+      z-index: 1001;
+      left: 0;
+      top: 0;
+      width: 100%;
+      height: 100%;
+      overflow: auto;
+      background-color: rgba(20, 20, 20, 0.95);
+      user-select: none;
+      -webkit-user-select: none;
+      flex-direction: column;
+  }
+  .modalControls {
+      display: flex;
+      position: absolute;
+      right: 0px;
+      left: 0px;
+      gap: 1em;
+      padding: 1em;
+      background-color:rgba(0,0,0,0);
+      z-index: 1;
+      transition: 0.2s ease background-color;
+  }
+  .modalControls:hover {
+      background-color:rgba(0,0,0,0.9);
+  }
+  .modalClose {
+      margin-left: auto;
+  }
+  .modalControls span{
+      color: white;
+      text-shadow: 0px 0px 0.25em black;
+      font-size: 35px;
+      font-weight: bold;
+      cursor: pointer;
+      width: 1em;
+  }
+  .modalControls span:hover, .modalControls span:focus{
+      color: #999;
+      text-decoration: none;
+  }
+  #lightboxModal > img {
+      display: block;
+      margin: auto;
+      width: auto;
+  }
+  #lightboxModal > img.modalImageFullscreen{
+      object-fit: contain;
+      height: 100%;
+      width: 100%;
+      min-height: 0;
+  }
+  .modalPrev,
+  .modalNext {
+    cursor: pointer;
+    position: absolute;
+    top: 50%;
+    width: auto;
+    padding: 16px;
+    margin-top: -50px;
+    color: white;
+    font-weight: bold;
+    font-size: 20px;
+    transition: 0.6s ease;
+    border-radius: 0 3px 3px 0;
+    user-select: none;
+    -webkit-user-select: none;
+  }
+  .modalNext {
+    right: 0;
+    border-radius: 3px 0 0 3px;
+  }
+  .modalPrev:hover,
+  .modalNext:hover {
+    background-color: rgba(0, 0, 0, 0.8);
+  }
+  #imageARPreview {
+      position: absolute;
+      top: 0px;
+      left: 0px;
+      border: 2px solid red;
+      background: rgba(255, 0, 0, 0.3);
+      z-index: 900;
+      pointer-events: none;
+      display: none;
+  }
+  #stylePreviewOverlay {
+      opacity: 0;
+      pointer-events: none;
+      width: 128px;
+      height: 128px;
+      position: fixed;
+      top: 0px;
+      left: 0px;
+      border: solid 1px lightgrey;
+      transform: translate(-140px, 20px);
+      background-size: cover;
+      background-position: center;
+      background-color: rgba(0, 0, 0, 0.3);
+      border-radius: 5px;
+      z-index: 100;
+      transition: transform 0.1s ease, opacity 0.3s ease;
+  }
+  #stylePreviewOverlay.lower-half {
+      transform: translate(-140px, -140px);
+  }
+  /* scrollable box for style selections */
+  .contain .tabs {
+    height: 100%;
+  }
+  .contain .tabs .tabitem.style_selections_tab {
+    height: 100%;
+  }
+  .contain .tabs .tabitem.style_selections_tab > div:first-child {
+    height: 100%;
+  }
+  .contain .tabs .tabitem.style_selections_tab .style_selections {
+    min-height: 200px;
+    height: 100%;
+  }
+  .contain .tabs .tabitem.style_selections_tab .style_selections .wrap[data-testid="checkbox-group"] {
+    position: absolute; /* remove this to disable scrolling within the checkbox-group */
+    overflow: auto;
+    padding-right: 2px;
+    max-height: 100%;
+  }
+  .contain .tabs .tabitem.style_selections_tab .style_selections .wrap[data-testid="checkbox-group"] label {
+    /* max-width: calc(35% - 15px) !important; */ /* add this to enable 3 columns layout */
+    flex: calc(50% - 5px) !important;
+  }
+  .contain .tabs .tabitem.style_selections_tab .style_selections .wrap[data-testid="checkbox-group"] label span {
+    /* white-space:nowrap; */ /* add this to disable text wrapping (better choice for 3 columns layout) */
+    overflow: hidden;
+    text-overflow: ellipsis;
+  }
+  /* styles preview tooltip */
+  .preview-tooltip {
+    background-color: #fff8;
+    font-family: monospace;
+    text-align: center;
+    border-radius: 5px 5px 0px 0px;
+    display: none; /* remove this to enable tooltip in preview image */
+  }
+  #inpaint_canvas .canvas-tooltip-info {
+    top: 2px;
+  }
+  #inpaint_brush_color input[type=color]{
+    background: none;
+  }

modules/webui/gradio_extensions.py ADDED Viewed

	@@ -0,0 +1,59 @@

+# based on https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/v1.6.0/modules/ui_gradio_extensions.py
+import os
+from pathlib import Path
+import gradio as gr
+from modules import config
+from .localization import localization_js
+GradioTemplateResponseOriginal = gr.routes.templates.TemplateResponse
+WEBUI_DIR_PATH = Path(os.path.dirname(os.path.realpath(__file__)))
+def read_file(fp):
+    with open(WEBUI_DIR_PATH / fp, "r") as f:
+        return f.read()
+def javascript_html():
+    def s(text: str):
+        return f'<script type="text/javascript">{text}</script>\n'
+    def src(src: str):
+        return f"<script src='{src}'></script>\n"
+    def sf(fp: str):
+        return s(read_file(fp))
+    head = ""
+    head += src("https://jsd.onmicrosoft.cn/npm/[email protected]")
+    head += s(localization_js(config.runtime_env_vars.language))
+    head += sf("js/index.js")
+    head += sf("js/localization.js")
+    if config.runtime_env_vars.theme:
+        head += s(f"set_theme('{config.runtime_env_vars.theme}');")
+    return head
+def css_html():
+    head = f'<style>{read_file("css/style.css")}</style>'
+    return head
+def reload_javascript():
+    js = javascript_html()
+    css = css_html()
+    def template_response(*args, **kwargs):
+        res = GradioTemplateResponseOriginal(*args, **kwargs)
+        res.body = res.body.replace(b"</head>", f"{js}</head>".encode("utf8"))
+        res.body = res.body.replace(b"</body>", f"{css}</body>".encode("utf8"))
+        res.init_headers()
+        return res
+    gr.routes.templates.TemplateResponse = template_response

modules/webui/gradio_hijack.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from gradio.components.base import Block
+all_components = []
+if not hasattr(Block, "original__init__"):
+    Block.original_init = Block.__init__
+def blk_ini(self, *args, **kwargs):
+    all_components.append(self)
+    return Block.original_init(self, *args, **kwargs)
+Block.__init__ = blk_ini

modules/webui/js/index.js ADDED Viewed

	@@ -0,0 +1,231 @@

+// based on https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/v1.6.0/script.js
+function gradioApp() {
+  const elems = document.getElementsByTagName("gradio-app");
+  const elem = elems.length == 0 ? document : elems[0];
+  if (elem !== document) {
+    elem.getElementById = function (id) {
+      return document.getElementById(id);
+    };
+  }
+  return elem.shadowRoot ? elem.shadowRoot : elem;
+}
+/**
+ * Get the currently selected top-level UI tab button (e.g. the button that says "Extras").
+ */
+function get_uiCurrentTab() {
+  return gradioApp().querySelector("#tabs > .tab-nav > button.selected");
+}
+/**
+ * Get the first currently visible top-level UI tab content (e.g. the div hosting the "txt2img" UI).
+ */
+function get_uiCurrentTabContent() {
+  return gradioApp().querySelector(
+    '#tabs > .tabitem[id^=tab_]:not([style*="display: none"])'
+  );
+}
+var uiUpdateCallbacks = [];
+var uiAfterUpdateCallbacks = [];
+var uiLoadedCallbacks = [];
+var uiTabChangeCallbacks = [];
+var optionsChangedCallbacks = [];
+var uiAfterUpdateTimeout = null;
+var uiCurrentTab = null;
+/**
+ * Register callback to be called at each UI update.
+ * The callback receives an array of MutationRecords as an argument.
+ */
+function onUiUpdate(callback) {
+  uiUpdateCallbacks.push(callback);
+}
+/**
+ * Register callback to be called soon after UI updates.
+ * The callback receives no arguments.
+ *
+ * This is preferred over `onUiUpdate` if you don't need
+ * access to the MutationRecords, as your function will
+ * not be called quite as often.
+ */
+function onAfterUiUpdate(callback) {
+  uiAfterUpdateCallbacks.push(callback);
+}
+/**
+ * Register callback to be called when the UI is loaded.
+ * The callback receives no arguments.
+ */
+function onUiLoaded(callback) {
+  uiLoadedCallbacks.push(callback);
+}
+/**
+ * Register callback to be called when the UI tab is changed.
+ * The callback receives no arguments.
+ */
+function onUiTabChange(callback) {
+  uiTabChangeCallbacks.push(callback);
+}
+/**
+ * Register callback to be called when the options are changed.
+ * The callback receives no arguments.
+ * @param callback
+ */
+function onOptionsChanged(callback) {
+  optionsChangedCallbacks.push(callback);
+}
+function executeCallbacks(queue, arg) {
+  for (const callback of queue) {
+    try {
+      callback(arg);
+    } catch (e) {
+      console.error("error running callback", callback, ":", e);
+    }
+  }
+}
+/**
+ * Schedule the execution of the callbacks registered with onAfterUiUpdate.
+ * The callbacks are executed after a short while, unless another call to this function
+ * is made before that time. IOW, the callbacks are executed only once, even
+ * when there are multiple mutations observed.
+ */
+function scheduleAfterUiUpdateCallbacks() {
+  clearTimeout(uiAfterUpdateTimeout);
+  uiAfterUpdateTimeout = setTimeout(function () {
+    executeCallbacks(uiAfterUpdateCallbacks);
+  }, 200);
+}
+var executedOnLoaded = false;
+document.addEventListener("DOMContentLoaded", function () {
+  var mutationObserver = new MutationObserver(function (m) {
+    if (!executedOnLoaded && gradioApp().querySelector("#generate_button")) {
+      executedOnLoaded = true;
+      executeCallbacks(uiLoadedCallbacks);
+    }
+    executeCallbacks(uiUpdateCallbacks, m);
+    scheduleAfterUiUpdateCallbacks();
+    const newTab = get_uiCurrentTab();
+    if (newTab && newTab !== uiCurrentTab) {
+      uiCurrentTab = newTab;
+      executeCallbacks(uiTabChangeCallbacks);
+    }
+  });
+  mutationObserver.observe(gradioApp(), { childList: true, subtree: true });
+});
+var onAppend = function (elem, f) {
+  var observer = new MutationObserver(function (mutations) {
+    mutations.forEach(function (m) {
+      if (m.addedNodes.length) {
+        f(m.addedNodes);
+      }
+    });
+  });
+  observer.observe(elem, { childList: true });
+};
+function addObserverIfDesiredNodeAvailable(querySelector, callback) {
+  var elem = document.querySelector(querySelector);
+  if (!elem) {
+    window.setTimeout(
+      () => addObserverIfDesiredNodeAvailable(querySelector, callback),
+      1000
+    );
+    return;
+  }
+  onAppend(elem, callback);
+}
+/**
+ * Show reset button on toast "Connection errored out."
+ */
+addObserverIfDesiredNodeAvailable(".toast-wrap", function (added) {
+  added.forEach(function (element) {
+    if (element.innerText.includes("Connection errored out.")) {
+      window.setTimeout(function () {
+        document.getElementById("reset_button").classList.remove("hidden");
+        document.getElementById("generate_button").classList.add("hidden");
+        document.getElementById("skip_button").classList.add("hidden");
+        document.getElementById("stop_button").classList.add("hidden");
+      });
+    }
+  });
+});
+/**
+ * Add a ctrl+enter as a shortcut to start a generation
+ */
+document.addEventListener("keydown", function (e) {
+  const isModifierKey = e.metaKey || e.ctrlKey || e.altKey;
+  const isEnterKey = e.key == "Enter" || e.keyCode == 13;
+  if (isModifierKey && isEnterKey) {
+    const generateButton = gradioApp().querySelector(
+      "button:not(.hidden)[id=generate_button]"
+    );
+    if (generateButton) {
+      generateButton.click();
+      e.preventDefault();
+      return;
+    }
+    const stopButton = gradioApp().querySelector(
+      "button:not(.hidden)[id=stop_button]"
+    );
+    if (stopButton) {
+      stopButton.click();
+      e.preventDefault();
+      return;
+    }
+  }
+});
+/**
+ * checks that a UI element is not in another hidden element or tab content
+ */
+function uiElementIsVisible(el) {
+  if (el === document) {
+    return true;
+  }
+  const computedStyle = getComputedStyle(el);
+  const isVisible = computedStyle.display !== "none";
+  if (!isVisible) return false;
+  return uiElementIsVisible(el.parentNode);
+}
+function uiElementInSight(el) {
+  const clRect = el.getBoundingClientRect();
+  const windowHeight = window.innerHeight;
+  const isOnScreen = clRect.bottom > 0 && clRect.top < windowHeight;
+  return isOnScreen;
+}
+function playNotification() {
+  gradioApp().querySelector("#audio_notification audio")?.play();
+}
+function set_theme(theme) {
+  var gradioURL = window.location.href;
+  if (!gradioURL.includes("?__theme=")) {
+    window.location.replace(gradioURL + "?__theme=" + theme);
+  }
+}
+function htmlDecode(input) {
+  var doc = new DOMParser().parseFromString(input, "text/html");
+  return doc.documentElement.textContent;
+}

modules/webui/js/localization.js ADDED Viewed

	@@ -0,0 +1,201 @@

+var re_num = /^[.\d]+$/;
+var original_lines = {};
+var translated_lines = {};
+function hasLocalization() {
+  return window.localization && Object.keys(window.localization).length > 0;
+}
+function textNodesUnder(el) {
+  var n,
+    a = [],
+    walk = document.createTreeWalker(el, NodeFilter.SHOW_TEXT, null, false);
+  while ((n = walk.nextNode())) a.push(n);
+  return a;
+}
+function canBeTranslated(node, text) {
+  if (!text) return false;
+  if (!node.parentElement) return false;
+  var parentType = node.parentElement.nodeName;
+  if (
+    parentType == "SCRIPT" ||
+    parentType == "STYLE" ||
+    parentType == "TEXTAREA"
+  )
+    return false;
+  if (re_num.test(text)) return false;
+  return true;
+}
+function getTranslation(text) {
+  if (!text) return undefined;
+  if (translated_lines[text] === undefined) {
+    original_lines[text] = 1;
+  }
+  var tl = localization[text];
+  if (tl !== undefined) {
+    translated_lines[tl] = 1;
+  }
+  return tl;
+}
+function processTextNode(node) {
+  var text = node.textContent.trim();
+  if (!canBeTranslated(node, text)) return;
+  var tl = getTranslation(text);
+  if (tl !== undefined) {
+    node.textContent = tl;
+    if (text && node.parentElement) {
+      node.parentElement.setAttribute("data-original-text", text);
+    }
+  }
+}
+/**
+ *
+ * @param {HTMLElement} node
+ * @returns
+ */
+function processMDNode(node) {
+  const text = node.children[0].textContent.trim();
+  let tl = getTranslation(text);
+  if (!tl) return;
+  if (Array.isArray(tl)) {
+    tl = tl.join("\n");
+  }
+  const md = marked.marked(tl);
+  node.innerHTML = md;
+  node.setAttribute("data-original-text", text);
+}
+function is_md_child(node) {
+  while (node.parentElement !== document.body) {
+    if (node?.classList?.contains("md")) {
+      return true;
+    }
+    node = node.parentElement;
+    if (!node) break;
+  }
+  return false;
+}
+function processNode(node) {
+  if (node.nodeType == 3) {
+    processTextNode(node);
+    return;
+  }
+  if (node.classList.contains("md")) {
+    processMDNode(node);
+    return;
+  }
+  if (is_md_child(node)) return;
+  if (node.title) {
+    let tl = getTranslation(node.title);
+    if (tl !== undefined) {
+      node.title = tl;
+    }
+  }
+  if (node.placeholder) {
+    let tl = getTranslation(node.placeholder);
+    if (tl !== undefined) {
+      node.placeholder = tl;
+    }
+  }
+  textNodesUnder(node).forEach(function (node) {
+    if (is_md_child(node)) return;
+    processTextNode(node);
+  });
+}
+function refresh_style_localization() {
+  processNode(document.querySelector(".style_selections"));
+}
+function refresh_aspect_ratios_label(value) {
+  label = document.querySelector("#aspect_ratios_accordion div span");
+  translation = getTranslation("Aspect Ratios");
+  if (typeof translation == "undefined") {
+    translation = "Aspect Ratios";
+  }
+  label.textContent = translation + " " + htmlDecode(value);
+}
+function localizeWholePage() {
+  processNode(gradioApp());
+  function elem(comp) {
+    var elem_id = comp.props.elem_id
+      ? comp.props.elem_id
+      : "component-" + comp.id;
+    return gradioApp().getElementById(elem_id);
+  }
+  for (var comp of window.gradio_config.components) {
+    if (comp.props.webui_tooltip) {
+      let e = elem(comp);
+      let tl = e ? getTranslation(e.title) : undefined;
+      if (tl !== undefined) {
+        e.title = tl;
+      }
+    }
+    if (comp.props.placeholder) {
+      let e = elem(comp);
+      let textbox = e ? e.querySelector("[placeholder]") : null;
+      let tl = textbox ? getTranslation(textbox.placeholder) : undefined;
+      if (tl !== undefined) {
+        textbox.placeholder = tl;
+      }
+    }
+  }
+}
+document.addEventListener("DOMContentLoaded", function () {
+  if (!hasLocalization()) {
+    return;
+  }
+  onUiUpdate(function (m) {
+    m.forEach(function (mutation) {
+      mutation.addedNodes.forEach(function (node) {
+        processNode(node);
+      });
+    });
+  });
+  localizeWholePage();
+  if (localization.rtl) {
+    // if the language is from right to left,
+    new MutationObserver((mutations, observer) => {
+      // wait for the style to load
+      mutations.forEach((mutation) => {
+        mutation.addedNodes.forEach((node) => {
+          if (node.tagName === "STYLE") {
+            observer.disconnect();
+            for (const x of node.sheet.rules) {
+              // find all rtl media rules
+              if (Array.from(x.media || []).includes("rtl")) {
+                x.media.appendMedium("all"); // enable them
+              }
+            }
+          }
+        });
+      });
+    }).observe(gradioApp(), { childList: true });
+  }
+});

modules/webui/localization.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import json
+import os
+import gradio as gr
+current_translation = {}
+localization_root = os.path.join(
+    os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "language"
+)
+def localization_js(filename):
+    global current_translation
+    if isinstance(filename, str):
+        full_name = os.path.abspath(os.path.join(localization_root, filename + ".json"))
+        if os.path.exists(full_name):
+            try:
+                with open(full_name, encoding="utf-8") as f:
+                    current_translation = json.load(f)
+                    assert isinstance(current_translation, dict)
+                    for k, v in current_translation.items():
+                        assert isinstance(k, str), f"Key is not a string, got {k}"
+                        assert isinstance(v, str) or isinstance(
+                            v, list
+                        ), f"Value for key {k} is not a string or list"
+            except Exception as e:
+                print(str(e))
+                print(f"Failed to load localization file {full_name}")
+    # current_translation = {k: 'XXX' for k in current_translation.keys()}  # use this to see if all texts are covered
+    return f"window.localization = {json.dumps(current_translation)}"
+def dump_english_config(components):
+    all_texts = []
+    for c in components:
+        if isinstance(c, gr.Markdown) and "no-translate" in c.elem_classes:
+            continue
+        if isinstance(c, gr.Dropdown):
+            continue
+        if isinstance(c, gr.HTML):
+            continue
+        if isinstance(c, gr.Textbox):
+            continue
+        label = getattr(c, "label", None)
+        value = getattr(c, "value", None)
+        choices = getattr(c, "choices", None)
+        info = getattr(c, "info", None)
+        if isinstance(label, str):
+            all_texts.append(label)
+        if isinstance(value, str):
+            all_texts.append(value)
+        if isinstance(info, str):
+            all_texts.append(info)
+        if isinstance(choices, list):
+            for x in choices:
+                if isinstance(x, str):
+                    all_texts.append(x)
+                if isinstance(x, tuple):
+                    for y in x:
+                        if isinstance(y, str):
+                            all_texts.append(y)
+    config_dict = {k: k for k in all_texts if k != "" and "progress-container" not in k}
+    full_name = os.path.abspath(os.path.join(localization_root, "en.json"))
+    with open(full_name, "w", encoding="utf-8") as json_file:
+        json.dump(config_dict, json_file, indent=4, ensure_ascii=False)
+    return

modules/webui/localization_runtime.py ADDED Viewed

	@@ -0,0 +1,226 @@

+class LocalizationVars:
+    def __init__(self):
+        self.DEFAULT_TTS_TEXT = ""
+        self.DEFAULT_SPEAKER_TEST_TEXT = ""
+        self.DEFAULT_SPEAKER_MERAGE_TEXT = ""
+        self.DEFAULT_SSML_TEXT = ""
+        self.ssml_examples = []
+        self.tts_examples = []
+class ZHLocalizationVars(LocalizationVars):
+    def __init__(self):
+        super().__init__()
+        self.DEFAULT_TTS_TEXT = "chat T T S 是一款强大的对话式文本转语音模型。它有中英混读和多说话人的能力。"
+        self.DEFAULT_SPEAKER_TEST_TEXT = (
+            "说话人测试 123456789 [uv_break] ok, test done [lbreak]"
+        )
+        self.DEFAULT_SPEAKER_MERAGE_TEXT = (
+            "说话人合并测试 123456789 [uv_break] ok, test done [lbreak]"
+        )
+        self.DEFAULT_SSML_TEXT = """
+<speak version="0.1">
+  <voice spk="Bob" seed="42" style="narration-relaxed">
+    这里是一个简单的 SSML 示例 [lbreak]
+  </voice>
+</speak>
+        """.strip()
+        self.ssml_examples = [
+            """
+<speak version="0.1">
+    <voice spk="Bob" seed="42" style="narration-relaxed">
+        下面是一个 ChatTTS 用于合成多角色多情感的有声书示例[lbreak]
+    </voice>
+    <voice spk="Bob" seed="42" style="narration-relaxed">
+        黛玉冷笑道：[lbreak]
+    </voice>
+    <voice spk="female2" seed="42" style="angry">
+        我说呢 [uv_break] ，亏了绊住，不然，早就飞起来了[lbreak]
+    </voice>
+    <voice spk="Bob" seed="42" style="narration-relaxed">
+        宝玉道：[lbreak]
+    </voice>
+    <voice spk="Alice" seed="42" style="unfriendly">
+        “只许和你玩 [uv_break] ，替你解闷。不过偶然到他那里，就说这些闲话。”[lbreak]
+    </voice>
+    <voice spk="female2" seed="42" style="angry">
+        “好没意思的话！[uv_break] 去不去，关我什么事儿？ 又没叫你替我解闷儿 [uv_break]，还许你不理我呢” [lbreak]
+    </voice>
+    <voice spk="Bob" seed="42" style="narration-relaxed">
+        说着，便赌气回房去了 [lbreak]
+    </voice>
+</speak>
+""",
+            """
+<speak version="0.1">
+    <voice spk="Bob" seed="42" style="narration-relaxed">
+        使用 prosody 控制生成文本的语速语调和音量，示例如下 [lbreak]
+        <prosody>
+            无任何限制将会继承父级voice配置进行生成 [lbreak]
+        </prosody>
+        <prosody rate="1.5">
+            设置 rate 大于1表示加速，小于1为减速 [lbreak]
+        </prosody>
+        <prosody pitch="6">
+            设置 pitch 调整音调，设置为6表示提高6个半音 [lbreak]
+        </prosody>
+        <prosody volume="2">
+            设置 volume 调整音量，设置为2表示提高2个分贝 [lbreak]
+        </prosody>
+        在 voice 中无prosody包裹的文本即为默认生成状态下的语音 [lbreak]
+    </voice>
+</speak>
+""",
+            """
+<speak version="0.1">
+    <voice spk="Bob" seed="42" style="narration-relaxed">
+        使用 break 标签将会简单的 [lbreak]
+        <break time="500" />
+        插入一段空白到生成结果中 [lbreak]
+    </voice>
+</speak>
+""",
+        ]
+        self.tts_examples = [
+            {
+                "text": "大🍌，一条大🍌，嘿，你的感觉真的很奇妙  [lbreak]",
+            },
+            {
+                "text": "Big 🍌, a big 🍌, hey, your feeling is really wonderful [lbreak]"
+            },
+            {
+                "text": """
+# 这是 markdown 标题
+```
+代码块将跳过
+```
+- **文本标准化**:
+  - **Markdown**: 自动检测处理 markdown 格式文本。
+  - **数字转写**: 自动将数字转为模型可识别的文本。
+  - **Emoji 适配**: 自动翻译 emoji 为可读文本。
+  - **基于分词器**: 基于 tokenizer 预处理文本，覆盖模型所有不支持字符范围。
+  - **中英文识别**: 适配英文环境。
+        """
+            },
+            {
+                "text": "天气预报显示，今天会有小雨，请大家出门时记得带伞。降温的天气也提醒我们要适时添衣保暖 [lbreak]",
+            },
+            {
+                "text": "公司的年度总结会议将在下周三举行，请各部门提前准备好相关材料，确保会议顺利进行 [lbreak]",
+            },
+            {
+                "text": "今天的午餐菜单包括烤鸡、沙拉和蔬菜汤，大家可以根据自己的口味选择适合的菜品 [lbreak]",
+            },
+            {
+                "text": "请注意，电梯将在下午两点进行例行维护，预计需要一个小时的时间，请大家在此期间使用楼梯 [lbreak]",
+            },
+            {
+                "text": "图书馆新到了一批书籍，涵盖了文学、科学和历史等多个领域，欢迎大家前来借阅 [lbreak]",
+            },
+            {
+                "text": "电影中梁朝伟扮演的陈永仁的编号27149 [lbreak]",
+            },
+            {
+                "text": "这块黄金重达324.75克 [lbreak]",
+            },
+            {
+                "text": "我们班的最高总分为583分 [lbreak]",
+            },
+            {
+                "text": "12~23 [lbreak]",
+            },
+            {
+                "text": "-1.5~2 [lbreak]",
+            },
+            {
+                "text": "她出生于86年8月18日，她弟弟出生于1995年3月1日 [lbreak]",
+            },
+            {
+                "text": "等会请在12:05请通知我 [lbreak]",
+            },
+            {
+                "text": "今天的最低气温达到-10°C [lbreak]",
+            },
+            {
+                "text": "现场有7/12的观众投出了赞成票 [lbreak]",
+            },
+            {
+                "text": "明天有62％的概率降雨 [lbreak]",
+            },
+            {
+                "text": "随便来几个价格12块5，34.5元，20.1万 [lbreak]",
+            },
+            {
+                "text": "这是固话0421-33441122 [lbreak]",
+            },
+            {
+                "text": "这是手机+86 18544139121 [lbreak]",
+            },
+        ]
+class ENLocalizationVars(LocalizationVars):
+    def __init__(self):
+        super().__init__()
+        self.DEFAULT_TTS_TEXT = "Chat T T S is a powerful conversational text-to-speech model. It has the ability to mix Chinese and English and multiple speakers."
+        self.DEFAULT_SPEAKER_TEST_TEXT = (
+            "Speaker test 123456789 [uv_break] ok, test done [lbreak]"
+        )
+        self.DEFAULT_SPEAKER_MERAGE_TEXT = (
+            "Speaker merge test 123456789 [uv_break] ok, test done [lbreak]"
+        )
+        self.DEFAULT_SSML_TEXT = """
+<speak version="0.1">
+    <voice spk="Bob" seed="42" style="narration-relaxed">
+        Here is a simple SSML example [lbreak]
+    </voice>
+</speak>
+        """.strip()
+        self.ssml_examples = [
+            """
+<speak version="0.1">
+    <voice spk="Bob" seed="42" style="narration-relaxed">
+        Below is an example of ChatTTS synthesizing an audiobook with multiple roles and emotions [lbreak]
+    </voice>
+    <voice spk="Bob" seed="42" style="narration-relaxed">
+        Daiyu sneered: [lbreak]
+    </voice>
+    <voice spk="female2" seed="42" style="angry">
+        I said [uv_break], it's a loss to trip, otherwise, I would have flown up long ago [lbreak]
+    </voice>
+    <voice spk="Bob" seed="42" style="narration-relaxed">
+        Bao Yu said: [lbreak]
+    </voice>
+    <voice spk="Alice" seed="42" style="unfriendly">
+        "Only play with you [uv_break], to relieve your boredom. But occasionally go to his place, just say these idle words." [lbreak]
+    </voice>
+    <voice spk="female2" seed="42" style="angry">
+        "What a boring thing! [uv_break] Go or not, it's none of my business? I didn't ask you to relieve my boredom [uv_break], and you don't even care about me." [lbreak]
+    </voice>
+    <voice spk="Bob" seed="42" style="narration-relaxed">
+        Saying that, he went back to the room in anger [lbreak]
+    </voice>
+</speak>""",
+        ]
+        self.tts_examples = [
+            {
+                "text": "I guess it comes down a simple choice. Get busy living or get busy dying.",
+            },
+            {
+                "text": "You got a dream, you gotta protect it. People can't do something themselves, they wanna tell you you can't do it. If you want something, go get it.",
+            },
+            {
+                "text": "Don't ever let somebody tell you you can't do something. Not even me. Alright? You got a dream, you gotta protect it. When people can't do something themselves, they're gonna tell you that you can't do it. You want something, go get it. Period.",
+            },
+        ]

modules/webui/readme_tab.py CHANGED Viewed

@@ -10,4 +10,4 @@ def read_local_readme():
 def create_readme_tab():
     readme_content = read_local_readme()
-    gr.Markdown(readme_content)

 def create_readme_tab():
     readme_content = read_local_readme()
+    gr.Markdown(readme_content, elem_classes=["no-translate"])

modules/webui/speaker/speaker_creator.py CHANGED Viewed

@@ -5,6 +5,7 @@ from modules.utils.SeedContext import SeedContext
 from modules.hf import spaces
 from modules.models import load_chat_tts
 from modules.utils.rng import np_rng
 from modules.webui.webui_utils import get_speakers, tts_generate
 import tempfile
@@ -88,16 +89,6 @@ def random_speaker():
     return seed, name
-creator_ui_desc = """
-## Speaker Creator
-使用本面板快捷抽卡生成 speaker.pt 文件。
-1. **生成说话人**：输入种子、名字、性别和描述。点击 "Generate speaker.pt" 按钮，生成的说话人配置会保存为.pt文件。
-2. **测试说话人声音**：输入测试文本。点击 "Test Voice" 按钮，生成的音频会在 "Output Audio" 中播放。
-3. **随机生成说话人**：点击 "Random Speaker" 按钮，随机生成一个种子和名字，可以进一步编辑其他信息并测试。
-"""
 def speaker_creator_ui():
     def on_generate(seed, name, gender, desc):
         file_path = create_spk_from_seed(seed, name, gender, desc)
@@ -113,7 +104,7 @@ def speaker_creator_ui():
                     test_text = gr.Textbox(
                         label="Test Text",
                         placeholder="Please input test text",
-                        value="说话人测试 123456789 [uv_break] ok, test done [lbreak]",
                     )
                     with gr.Row():
                         current_seed = gr.Label(label="Current Seed", value=-1)
@@ -131,7 +122,7 @@ def speaker_creator_ui():
             outputs=[current_seed],
         )
-    gr.Markdown(creator_ui_desc)
     with gr.Row():
         with gr.Column(scale=2):

 from modules.hf import spaces
 from modules.models import load_chat_tts
 from modules.utils.rng import np_rng
+from modules.webui import webui_config
 from modules.webui.webui_utils import get_speakers, tts_generate
 import tempfile
     return seed, name
 def speaker_creator_ui():
     def on_generate(seed, name, gender, desc):
         file_path = create_spk_from_seed(seed, name, gender, desc)
                     test_text = gr.Textbox(
                         label="Test Text",
                         placeholder="Please input test text",
+                        value=webui_config.localization.DEFAULT_SPEAKER_TEST_TEXT,
                     )
                     with gr.Row():
                         current_seed = gr.Label(label="Current Seed", value=-1)
             outputs=[current_seed],
         )
+    gr.Markdown("SPEAKER_CREATOR_GUIDE")
     with gr.Row():
         with gr.Column(scale=2):

modules/webui/speaker/speaker_merger.py CHANGED Viewed

@@ -3,7 +3,7 @@ import gradio as gr
 import torch
 from modules.hf import spaces
-from modules.webui import webui_utils
 from modules.webui.webui_utils import get_speakers, tts_generate
 from modules.speaker import speaker_mgr, Speaker
@@ -128,17 +128,6 @@ def merge_spk_to_file(
     return tmp_file_path
-merge_desc = """
-## Speaker Merger
-在本面板中，您可以选择多个说话人并指定他们的权重，合成新的语音并进行测试。以下是各个功能的详细说明：
-1. 选择说话人: 您可以从下拉菜单中选择最多四个说话人（A、B、C、D），每个说话人都有一个对应的权重滑块，范围从0到10。权重决定了每个说话人在合成语音中的影响程度。
-2. 合成语音: 在选择好说话人和设置好权重后，您可以在“Test Text”框中输入要测试的文本，然后点击“测试语音”按钮来生成并播放合成的语音。
-3. 保存说话人: 您还可以在右侧的“说话人信息”部分填写新的说话人的名称、性别和描述，并点击“Save Speaker”按钮来保存合成的说话人。保存后的说话人文件将显示在“Merged Speaker”栏中，供下载使用。
-"""
 # 显示 a b c d 四个选择框，选择一个或多个，然后可以试音，并导出
 def create_speaker_merger():
     def get_spk_choices():
@@ -146,7 +135,7 @@ def create_speaker_merger():
         speaker_names = ["None"] + speaker_names
         return speaker_names
-    gr.Markdown(merge_desc)
     def spk_picker(label_tail: str):
         with gr.Row():
@@ -198,7 +187,7 @@ def create_speaker_merger():
                                 test_text = gr.Textbox(
                                     label="Test Text",
                                     placeholder="Please input test text",
-                                    value="说话人合并测试 123456789 [uv_break] ok, test done [lbreak]",
                                 )
                                 output_audio = gr.Audio(

 import torch
 from modules.hf import spaces
+from modules.webui import webui_config, webui_utils
 from modules.webui.webui_utils import get_speakers, tts_generate
 from modules.speaker import speaker_mgr, Speaker
     return tmp_file_path
 # 显示 a b c d 四个选择框，选择一个或多个，然后可以试音，并导出
 def create_speaker_merger():
     def get_spk_choices():
         speaker_names = ["None"] + speaker_names
         return speaker_names
+    gr.Markdown("SPEAKER_MERGER_GUIDE")
     def spk_picker(label_tail: str):
         with gr.Row():
                                 test_text = gr.Textbox(
                                     label="Test Text",
                                     placeholder="Please input test text",
+                                    value=webui_config.localization.DEFAULT_SPEAKER_MERAGE_TEXT,
                                 )
                                 output_audio = gr.Audio(

modules/webui/ssml/spliter_tab.py CHANGED Viewed

@@ -95,8 +95,7 @@ def create_spliter_tab(ssml_input, tabs1, tabs2):
         with gr.Column(scale=3):
             with gr.Group():
                 gr.Markdown("📝Long Text Input")
-                gr.Markdown("- 此页面用于处理超长文本")
-                gr.Markdown("- 切割后，可以选择说话人、风格、seed，然后发送到SSML")
                 long_text_input = gr.Textbox(
                     label="Long Text Input",
                     lines=10,

         with gr.Column(scale=3):
             with gr.Group():
                 gr.Markdown("📝Long Text Input")
+                gr.Markdown("SSML_SPLITER_GUIDE")
                 long_text_input = gr.Textbox(
                     label="Long Text Input",
                     lines=10,

modules/webui/ssml/ssml_tab.py CHANGED Viewed

@@ -3,7 +3,6 @@ from modules.webui.webui_utils import (
     synthesize_ssml,
 )
 from modules.webui import webui_config
-from modules.webui.examples import ssml_examples, default_ssml
 def create_ssml_interface():
@@ -11,15 +10,11 @@ def create_ssml_interface():
         with gr.Column(scale=3):
             with gr.Group():
                 gr.Markdown("📝SSML Input")
-                gr.Markdown(f"- 最长{webui_config.ssml_max:,}字符，超过会被截断")
-                gr.Markdown("- 尽量保证使用相同的 seed")
-                gr.Markdown(
-                    "- 关于SSML可以看这个 [文档](https://github.com/lenML/ChatTTS-Forge/blob/main/docs/SSML.md)"
-                )
                 ssml_input = gr.Textbox(
                     label="SSML Input",
                     lines=10,
-                    value=default_ssml,
                     placeholder="输入 SSML 或选择示例",
                     elem_id="ssml_input",
                     show_label=False,
@@ -46,7 +41,7 @@ def create_ssml_interface():
             with gr.Group():
                 gr.Markdown("🎄Examples")
                 gr.Examples(
-                    examples=ssml_examples,
                     inputs=[ssml_input],
                 )

     synthesize_ssml,
 )
 from modules.webui import webui_config
 def create_ssml_interface():
         with gr.Column(scale=3):
             with gr.Group():
                 gr.Markdown("📝SSML Input")
+                gr.Markdown("SSML_TEXT_GUIDE")
                 ssml_input = gr.Textbox(
                     label="SSML Input",
                     lines=10,
+                    value=webui_config.localization.DEFAULT_SSML_TEXT,
                     placeholder="输入 SSML 或选择示例",
                     elem_id="ssml_input",
                     show_label=False,
             with gr.Group():
                 gr.Markdown("🎄Examples")
                 gr.Examples(
+                    examples=webui_config.localization.ssml_examples,
                     inputs=[ssml_input],
                 )

modules/webui/tts_tab.py CHANGED Viewed

@@ -8,12 +8,6 @@ from modules.webui.webui_utils import (
     tts_generate,
 )
 from modules.webui import webui_config
-from modules.webui.examples import example_texts
-from modules import config
-default_text_content = """
-chat T T S 是一款强大的对话式文本转语音模型。它有中英混读和多说话人的能力。
-""".strip()
 def create_tts_interface():
@@ -53,7 +47,7 @@ def create_tts_interface():
             with gr.Row():
                 with gr.Group():
                     gr.Markdown("🎭Style")
-                    gr.Markdown("- 后缀为 `_p` 表示带prompt，效果更强但是影响质量")
                     style_input_dropdown = gr.Dropdown(
                         choices=styles,
                         # label="Choose Style",
@@ -138,18 +132,14 @@ def create_tts_interface():
                     "📝Text Input",
                     elem_id="input-title",
                 )
-                gr.Markdown(f"- 字数限制{webui_config.tts_max:,}字，超过部分截断")
-                gr.Markdown("- 如果尾字吞字不读，可以试试结尾加上 `[lbreak]`")
-                gr.Markdown(
-                    "- If the input text is all in English, it is recommended to check disable_normalize"
-                )
                 text_input = gr.Textbox(
                     show_label=False,
                     label="Text to Speech",
                     lines=10,
                     placeholder="输入文本或选择示例",
                     elem_id="text-input",
-                    value=default_text_content,
                 )
                 # TODO 字数统计，其实实现很好写，但是就是会触发loading...并且还要和后端交互...
                 # text_input.change(
@@ -184,7 +174,10 @@ def create_tts_interface():
             with gr.Group():
                 gr.Markdown("🎄Examples")
                 sample_dropdown = gr.Dropdown(
-                    choices=[sample["text"] for sample in example_texts],
                     show_label=False,
                     value=None,
                     interactive=True,

     tts_generate,
 )
 from modules.webui import webui_config
 def create_tts_interface():
             with gr.Row():
                 with gr.Group():
                     gr.Markdown("🎭Style")
+                    gr.Markdown("TTS_STYLE_GUIDE")
                     style_input_dropdown = gr.Dropdown(
                         choices=styles,
                         # label="Choose Style",
                     "📝Text Input",
                     elem_id="input-title",
                 )
+                gr.Markdown(f"TTS_TEXT_GUIDE")
                 text_input = gr.Textbox(
                     show_label=False,
                     label="Text to Speech",
                     lines=10,
                     placeholder="输入文本或选择示例",
                     elem_id="text-input",
+                    value=webui_config.localization.DEFAULT_TTS_TEXT,
                 )
                 # TODO 字数统计，其实实现很好写，但是就是会触发loading...并且还要和后端交互...
                 # text_input.change(
             with gr.Group():
                 gr.Markdown("🎄Examples")
                 sample_dropdown = gr.Dropdown(
+                    choices=[
+                        sample["text"]
+                        for sample in webui_config.localization.tts_examples
+                    ],
                     show_label=False,
                     value=None,
                     interactive=True,

modules/webui/webui_config.py CHANGED Viewed

@@ -1,8 +1,12 @@
 from typing import Literal
 tts_max = 1000
 ssml_max = 1000
 spliter_threshold = 100
 max_batch_size = 8
 experimental = False

 from typing import Literal
+from modules.webui.localization_runtime import LocalizationVars
 tts_max = 1000
 ssml_max = 1000
 spliter_threshold = 100
 max_batch_size = 8
 experimental = False
+localization: LocalizationVars = None

webui.py CHANGED Viewed

@@ -81,6 +81,12 @@ if __name__ == "__main__":
         help="Enable webui_experimental features",
     )
     args = parser.parse_args()
     def get_and_update_env(*args):
@@ -100,6 +106,7 @@ if __name__ == "__main__":
     device_id = get_and_update_env(args, "device_id", None, str)
     use_cpu = get_and_update_env(args, "use_cpu", [], list)
     compile = get_and_update_env(args, "compile", False, bool)
     webui_config.experimental = get_and_update_env(
         args, "webui_experimental", False, bool
@@ -108,6 +115,7 @@ if __name__ == "__main__":
     webui_config.ssml_max = get_and_update_env(args, "ssml_max_len", 5000, int)
     webui_config.max_batch_size = get_and_update_env(args, "max_batch_size", 8, int)
     demo = create_interface()
     if auth:
@@ -117,8 +125,6 @@ if __name__ == "__main__":
     devices.reset_device()
     devices.first_time_calculation()
-    webui_init()
     demo.queue().launch(
         server_name=server_name,
         server_port=server_port,

         help="Enable webui_experimental features",
     )
+    parser.add_argument(
+        "--language",
+        type=str,
+        default="zh-CN",
+        help="Set the default language for the webui",
+    )
     args = parser.parse_args()
     def get_and_update_env(*args):
     device_id = get_and_update_env(args, "device_id", None, str)
     use_cpu = get_and_update_env(args, "use_cpu", [], list)
     compile = get_and_update_env(args, "compile", False, bool)
+    language = get_and_update_env(args, "language", False, bool)
     webui_config.experimental = get_and_update_env(
         args, "webui_experimental", False, bool
     webui_config.ssml_max = get_and_update_env(args, "ssml_max_len", 5000, int)
     webui_config.max_batch_size = get_and_update_env(args, "max_batch_size", 8, int)
+    webui_init()
     demo = create_interface()
     if auth:
     devices.reset_device()
     devices.first_time_calculation()
     demo.queue().launch(
         server_name=server_name,
         server_port=server_port,