theodotus
/

tts_uk_fastpitch

Model card Files Files and versions Community

tts_uk_fastpitch / tokenizer.py

theodotus's picture

Added tokenizer

260c2c0 over 1 year ago

history blame contribute delete

1.71 kB

	from nemo.collections.tts.torch.tts_tokenizers import BaseCharsTokenizer
	from nemo.collections.common.tokenizers.text_to_speech.tokenizer_utils import any_locale_text_preprocessing



	def lowercase_text_preprocessing(text):
	text = any_locale_text_preprocessing(text)
	text = text.lower()
	return text



	class CharsTokenizer(BaseCharsTokenizer):
	PUNCT_LIST = BaseCharsTokenizer.PUNCT_LIST+('+',"—")

	def __init__(
	self,
	chars,
	punct=True,
	apostrophe=True,
	add_blank_at=None,
	pad_with_space=False,
	non_default_punct_list=None,
	text_preprocessing_func=lowercase_text_preprocessing,
	):
	"""Char-based tokenizer.
	Args:
	chars: string that represents all possible characters.
	punct: Whether to reserve grapheme for basic punctuation or not.
	apostrophe: Whether to use apostrophe or not.
	add_blank_at: Add blank to labels in the specified order ("last") or after tokens (any non None),
	if None then no blank in labels.
	pad_with_space: Whether to pad text with spaces at the beginning and at the end or not.
	non_default_punct_list: List of punctuation marks which will be used instead default.
	text_preprocessing_func: Text preprocessing function for correct execution of the tokenizer.
	"""
	super().__init__(
	chars=chars,
	punct=punct,
	apostrophe=apostrophe,
	add_blank_at=add_blank_at,
	pad_with_space=pad_with_space,
	non_default_punct_list=non_default_punct_list,
	text_preprocessing_func=text_preprocessing_func,
	)