theodotus
/

tts_uk_fastpitch

Model card Files Files and versions Community

theodotus commited on May 29, 2023

Commit

260c2c0

•

1 Parent(s): 4c2a6ed

Added tokenizer

Files changed (2) hide show

README.md +7 -0
tokenizer.py +45 -0

README.md CHANGED Viewed

@@ -32,6 +32,13 @@ Note: This model generates only spectrograms and a vocoder is needed to convert
 In this example HiFiGAN is used.
 ```python
 # Load FastPitch
 from nemo.collections.tts.models import FastPitchModel
 spec_generator = FastPitchModel.from_pretrained("theodotus/tts_uk_fastpitch")

 In this example HiFiGAN is used.
 ```python
+# Load Tokenizer
+from huggingface_hub import hf_hub_download
+hf_hub_download(
+    repo_id="theodotus/tts_uk_fastpitch",
+    filename="tokenizer.py",
+    local_dir = "./"
+)
 # Load FastPitch
 from nemo.collections.tts.models import FastPitchModel
 spec_generator = FastPitchModel.from_pretrained("theodotus/tts_uk_fastpitch")

tokenizer.py ADDED Viewed

	@@ -0,0 +1,45 @@

+from nemo.collections.tts.torch.tts_tokenizers import BaseCharsTokenizer
+from nemo.collections.common.tokenizers.text_to_speech.tokenizer_utils import any_locale_text_preprocessing
+def lowercase_text_preprocessing(text):
+    text = any_locale_text_preprocessing(text)
+    text = text.lower()
+    return text
+class CharsTokenizer(BaseCharsTokenizer):
+    PUNCT_LIST = BaseCharsTokenizer.PUNCT_LIST+('+',"—")
+    def __init__(
+        self,
+        chars,
+        punct=True,
+        apostrophe=True,
+        add_blank_at=None,
+        pad_with_space=False,
+        non_default_punct_list=None,
+        text_preprocessing_func=lowercase_text_preprocessing,
+    ):
+        """Char-based tokenizer.
+        Args:
+            chars: string that represents all possible characters.
+            punct: Whether to reserve grapheme for basic punctuation or not.
+            apostrophe: Whether to use apostrophe or not.
+            add_blank_at: Add blank to labels in the specified order ("last") or after tokens (any non None),
+             if None then no blank in labels.
+            pad_with_space: Whether to pad text with spaces at the beginning and at the end or not.
+            non_default_punct_list: List of punctuation marks which will be used instead default.
+            text_preprocessing_func: Text preprocessing function for correct execution of the tokenizer.
+        """
+        super().__init__(
+            chars=chars,
+            punct=punct,
+            apostrophe=apostrophe,
+            add_blank_at=add_blank_at,
+            pad_with_space=pad_with_space,
+            non_default_punct_list=non_default_punct_list,
+            text_preprocessing_func=text_preprocessing_func,
+        )