import os import re import sys import json import codecs import glob from unidecode import unidecode # from g2pc import G2pC from h2p_parser.h2p import H2p from num2words import num2words import pykakasi import epitran # https://www.lexilogos.com/keyboard/pinyin_conversion.htm import nltk nltk.download('punkt', quiet=True) from nltk.tokenize import word_tokenize # I really need to find a better way to do this (handling many different possible entry points) try: sys.path.append(".") from resources.app.python.xvapitch.text.ipa_to_xvaarpabet import ESpeak, ipa2xvaarpabet, PUNCTUATION, ALL_SYMBOLS, PIN_YIN_ENDS, pinyin_to_arpabet_mappings, text_pinyin_to_pinyin_symbs, manual_phone_replacements from resources.app.python.xvapitch.text.en_numbers import normalize_numbers as en_normalize_numbers from resources.app.python.xvapitch.text.ro_numbers import generateWords as ro_generateWords except ModuleNotFoundError: try: from python.xvapitch.text.ipa_to_xvaarpabet import ESpeak, ipa2xvaarpabet, PUNCTUATION, ALL_SYMBOLS, PIN_YIN_ENDS, pinyin_to_arpabet_mappings, text_pinyin_to_pinyin_symbs, manual_phone_replacements from python.xvapitch.text.en_numbers import normalize_numbers as en_normalize_numbers from python.xvapitch.text.ro_numbers import generateWords as ro_generateWords except ModuleNotFoundError: try: from text.ipa_to_xvaarpabet import ESpeak, ipa2xvaarpabet, PUNCTUATION, ALL_SYMBOLS, PIN_YIN_ENDS, pinyin_to_arpabet_mappings, text_pinyin_to_pinyin_symbs, manual_phone_replacements from text.en_numbers import normalize_numbers as en_normalize_numbers from text.ro_numbers import generateWords as ro_generateWords except ModuleNotFoundError: from ipa_to_xvaarpabet import ESpeak, ipa2xvaarpabet, PUNCTUATION, ALL_SYMBOLS, PIN_YIN_ENDS, pinyin_to_arpabet_mappings, text_pinyin_to_pinyin_symbs, manual_phone_replacements from en_numbers import normalize_numbers as en_normalize_numbers from ro_numbers import generateWords as ro_generateWords # Processing order: # - text-to-text, clean up numbers # - text-to-text, clean up abbreviations # - text->phone, Custom dict replacements # - text->phone, Heteronyms detection and replacement # - text->phone, built-in dicts replacements (eg CMUdict) # - text->text/phone, missed words ngram/POS splitting, and re-trying built-in dicts (eg CMUdict) # - text->phone, g2p (eg espeak) # - phone->[integer], convert phonemes to their index numbers, for use by the models # class EspeakWrapper(object): # def __init__(self, base_dir, lang): # super(EspeakWrapper, self).__init__() # from phonemizer.backend import EspeakBackend # from phonemizer.backend.espeak.base import BaseEspeakBackend # # from phonemizer.backend.espeak import EspeakBackend # from phonemizer.separator import Separator # base_dir = f'C:/Program Files/' # espeak_dll_path = f'{base_dir}/eSpeak_NG/libespeak-ng.dll' # # espeak_dll_path = f'{base_dir}/libespeak-ng.dll' # # espeak_dll_path = f'{base_dir}/' # print(f'espeak_dll_path, {espeak_dll_path}') # BaseEspeakBackend.set_library(espeak_dll_path) # # EspeakBackend.set_library(espeak_dll_path) # self.backend = EspeakBackend(lang) # print(f'self.backend, {self.backend}') # self.separator = Separator(phone="|", syllable="", word="") # print(f'self.separator, {self.separator}') # def phonemize (self, word): # return self.backend.phonemize(word, self.separator) class TextPreprocessor(): def __init__(self, lang_code, lang_code2, base_dir, add_blank=True, logger=None, use_g2p=True, use_epitran=False): super(TextPreprocessor, self).__init__() self.use_g2p = use_g2p self.use_epitran = use_epitran self.logger = logger self.ALL_SYMBOLS = ALL_SYMBOLS self.lang_code = lang_code self.lang_code2 = lang_code2 self.g2p_cache = {} self.g2p_cache_path = None self.add_blank = add_blank self.dicts = [] self.dict_words = [] # Cache self.dict_is_custom = [] # Built-in, or custom; Give custom dict entries priority over other pre-processing steps self._punctuation = '!\'(),.:;? ' # Standard english pronunciation symbols self.punct_to_whitespace_reg = re.compile(f'[\.,!?]*') self.espeak = None self.epitran = None # self.custom_g2p_fn = None if lang_code2: # if self.use_epitran and self.use_g2p: if self.use_epitran: self.epitran = epitran.Epitran(self.lang_code2) elif self.use_g2p: base_dir = os.path.dirname(os.path.realpath(__file__)) self.espeak = ESpeak(base_dir, language=self.lang_code2, keep_puncs=True) self.h2p = None if lang_code=="en": self.h2p = H2p(preload=True) # Regular expression matching text enclosed in curly braces: self._curly_re = re.compile(r'(.*?)\{(.+?)\}(.*)') self.num2words_fn = None num2words_supported_langs = ["en","ar","cz","de","dk","en_GB","en_IN","es","es_CO","es_VE","eu","fi","fr","fr_CH","fr_BE","fr_DZ","he","id","it","ja","kn","ko","lt","lv","no","pl","pt","pt_BR","sl","sr","ro","ru","sl","tr","th","vi","nl","uk"] if lang_code in num2words_supported_langs: self.num2words_fn = num2words def init_post(self): self.re_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in self.abbreviations] # Override - language specific def clean_numbers(self, text): return text # Override - language specific def clean_am_pm(self, text): return text def clean_abbreviations(self, text): for regex, replacement in self.re_abbreviations: text = re.sub(regex, replacement, text) return text def collapse_whitespace(self, text): _whitespace_re = re.compile(r'\s+') return re.sub(_whitespace_re, ' ', text) def load_dict (self, dict_path, isCustom=False): pron_dict = {} if dict_path.endswith(".txt"): pron_dict = self.read_txt_dict(dict_path, pron_dict) elif dict_path.endswith(".json"): pron_dict = self.read_json_dict(dict_path, pron_dict) pron_dict = self.post_process_dict(pron_dict) self.dict_is_custom.append(isCustom) self.dicts.append(pron_dict) self.dict_words.append(list(pron_dict.keys())) # Override def post_process_dict(self, pron_dict): return pron_dict def read_txt_dict (self, dict_path, pron_dict): with codecs.open(dict_path, encoding="utf-8") as f: lines = f.read().split("\n") for line in lines: if len(line.strip()): # if len(line.strip()) and (line[0] >= 'A' and line[0] <= 'Z' or line[0] == "'"): word = line.split(" ")[0].lower() pron = " ".join(line.split(" ")[1:]).strip().upper() # TODO? Check if the phonemes are valid? # TODO? Handle variants(1) pron_dict[word] = pron return pron_dict def read_json_dict (self, dict_path, pron_dict): with codecs.open(dict_path, encoding="utf-8") as f: json_data = json.load(f) for word in list(json_data["data"].keys()): if json_data["data"][word]["enabled"]==True: # TODO? Check if the phonemes are valid? # TODO? Handle variants(1) pron_dict[word.lower()] = json_data["data"][word]["arpabet"].upper() return pron_dict def dict_replace (self, text, customDicts): for di, pron_dict in enumerate(self.dicts): if (customDicts and self.dict_is_custom[di]) or (not customDicts and not self.dict_is_custom[di]): dict_words = self.dict_words[di] text_graphites = re.sub("{([^}]*)}", "", text, flags=re.IGNORECASE) # Don't run the ARPAbet replacement for every single word, as it would be too slow. Instead, do it only for words that are actually present in the prompt words_in_prompt = (text_graphites+" ").replace("}","").replace("{","").replace(",","").replace("?","").replace("!","").replace(";","").replace(":","").replace("...",".").replace(". "," ").lower().split(" ") words_in_prompt = [word.strip() for word in words_in_prompt if len(word.strip()) and word.lower() in dict_words] if len(words_in_prompt): # Pad out punctuation, to make sure they don't get used in the word look-ups text = " "+text.replace(",", " ,").replace(".", " .").replace("!", " !").replace("?", " ?")+" " for di, dict_word in enumerate(words_in_prompt): dict_word_with_spaces = "{"+pron_dict[dict_word]+"}" dict_word_replace = dict_word.strip().replace(".", "\.").replace("(", "\(").replace(")", "\)") # Do it twice, because re will not re-use spaces, so if you have two neighbouring words to be replaced, # and they share a space character, one of them won't get changed for _ in range(2): text = re.sub(r'(? IPA converter. There were no g2p libraries supporting Wolof at the time of writing. It was this or nothing. def custom_g2p_fn(self, word): # print(f'custom_g2p_fn | IN: {word}') word = word.lower() # lossy word = word.replace("à", "a") word = word.replace("ó", "o") word = word.replace("aa", "aː") word = re.sub('a(?!:)', 'ɐ', word) word = word.replace("bb", "bː") word = word.replace("cc", "cːʰ") word = word.replace("dd", "dː") word = word.replace("ee", "ɛː") word = word.replace("ée", "eː") word = word.replace("ëe", "əː") word = re.sub('e(?!:)', 'ɛ', word) word = re.sub('ë(?!:)', 'ə', word) word = word.replace("gg", "gː") word = word.replace("ii", "iː") word = word.replace("jj", "ɟːʰ") word = re.sub('j(?!:)', 'ɟ', word) word = word.replace("kk", "kːʰ") word = word.replace("ll", "ɫː") word = word.replace("mb", "m̩b") word = word.replace("mm", "mː") word = word.replace("nc", "ɲc") word = word.replace("nd", "n̩d") word = word.replace("ng", "ŋ̩g") word = word.replace("nj", "ɲɟ") word = word.replace("nk", "ŋ̩k") word = word.replace("nn", "nː") word = word.replace("nq", "ɴq") word = word.replace("nt", "n̩t") word = word.replace("ññ", "ɲː") word = word.replace("ŋŋ", "ŋː") word = re.sub('ñ(?!:)', 'ɲ', word) word = word.replace("oo", "oː") word = word.replace("o", "ɔ") word = word.replace("pp", "pːʰ") word = word.replace("rr", "rː") word = word.replace("tt", "tːʰ") word = word.replace("uu", "uː") word = word.replace("ww", "wː") word = word.replace("yy", "jː") word = word.replace("y", "j") # lossy word = word.replace("é", "e") word = word.replace("ë", "e") word = word.replace("ñ", "n") word = word.replace("ŋ", "n") # print(f'custom_g2p_fn | OUT: {word}') return word # def save_g2p_cache(self): # # TEMPORARY # pass class YorubaTextPreprocessor(TextPreprocessor): def __init__(self, base_dir, logger=None, use_g2p=True, use_epitran=False): super(YorubaTextPreprocessor, self).__init__("yo", "yor-Latn", base_dir, logger=logger, use_g2p=use_g2p, use_epitran=use_epitran) self.lang_name = "Yoruba" self.abbreviations = [ ] self.normalize_numbers = self.num2words_fn self.init_post() class ChineseTextPreprocessor(TextPreprocessor): def __init__(self, base_dir, logger=None, use_g2p=True, use_epitran=False): super(ChineseTextPreprocessor, self).__init__("zh", None, base_dir, logger=logger, use_g2p=use_g2p, use_epitran=use_epitran) self.lang_name = "Chinese" self.abbreviations = [ ] self.init_post() # self.g2p = None # if self.use_g2p: # self.g2p = G2pC() from g2pc import G2pC self.g2p = G2pC() self.TEMP_unhandled = [] def split_pinyin (self, pinyin): symbs_split = [] pinyin = pinyin.lower() splitting_symbs = ["zh", "ch", "sh", "b", "p", "m", "f", "d", "t", "n", "l", "g", "k", "h", "z", "c", "s", "r", "j", "q", "x"] for ss in splitting_symbs: # if phon.startswith(ss) and not phon.endswith("i"): if pinyin.startswith(ss): symbs_split.append(ss.upper()) pinyin = pinyin[len(ss):] break symbs_split.append(pinyin.upper()) return symbs_split def post_process_pinyin_symbs (self, symbs): post_processed = [] # splitting_symbs = ["zh", "ch", "sh", "b", "p", "m", "f", "d", "t", "n", "l", "g", "k", "h", "z", "c", "s", "r", "j", "q", "x"] for symb in symbs.split(" "): if len(symb)==0: continue symbs = self.split_pinyin(symb) for symb in symbs: post_processed.append(symb) # for ss in splitting_symbs: # # if phon.startswith(ss) and not phon.endswith("i"): # if symb.startswith(ss): # post_processed.append(ss.upper()) # symb = symb[len(ss):] # break # post_processed.append(symb.upper()) return " ".join(post_processed) def fill_missing_via_g2p_zh (self, text): # TODO, switch to from nltk.tokenize import word_tokenize orig_text = text # print(f'[g2p] orig_text, |{orig_text}|') text_parts = text.split("{") text_parts2 = [(part.split("}")[1] if "}" in part else part) for part in text_parts] # print(f'[g2p] text_parts, {text_parts}') # print(f'[g2p] text_parts2, {text_parts2}') phonemised = [] for part in text_parts2: # words = part.split(" ") words = [part] part_phonemes = [] for word in words: word = word.strip() if len(word): # print(f'[g2p] word, {word}') sub_parts = [] sub_part_phonemes = [] # ====== punctuation stuff start ======== # Get which punctuation symbols are contained in the text fragment puncs_contained = [] for punc in PUNCTUATION: if punc in word: puncs_contained.append(punc) # Split away the punctuation from text sub_parts = [word] # print(f'puncs_contained, {puncs_contained}') if len(puncs_contained): for punc in puncs_contained: # init a new sub part list (list 2) sub_parts2 = [] # for each sub part... for sp in sub_parts: sp = sp.strip() # ...if it not already a punctuation symbol, try splitting it by the current punctuation symbol if sp not in PUNCTUATION: sp_split = sp.split(punc) # if the split list length is 1, add to list 2 if len(sp_split)==1: sub_parts2.append(sp_split[0]) else: # if it's more than 1 # print(f'sp_split, {sp_split}') for spspi, sps_part in enumerate(sp_split): # iterate through each item, and add to list, but also add the punct, apart from the last item sub_parts2.append(sps_part) if spspi<(len(sp_split)-1): sub_parts2.append(punc) else: # otherwise add the punct to list 2 sub_parts2.append(sp) # set the sub parts list to list 2, for the next loop, or ready sub_parts = sub_parts2 else: sub_parts = [word] # ====== punctuation stuff end ======== # print(f'sub_parts, {sub_parts}') for sp in sub_parts: if sp in PUNCTUATION: sub_part_phonemes.append(sp) else: sp = sp.replace("\"", "").replace(")", "").replace("(", "").replace("]", "").replace("[", "").strip() if len(sp): if sp.lower() in self.g2p_cache.keys() and len(self.g2p_cache[sp.lower()].strip()): g2p_out = self.g2p_cache[sp.lower()] g2p_out = self.post_process_pinyin_symbs(g2p_out) sub_part_phonemes.append(g2p_out) else: # print(f'sp, {sp} ({len(self.g2p_cache.keys())})') # g2p_out = self.espeak.phonemize(sp).replace("|", " ") g2p_out = self.g2p(sp) g2p_out = " ".join([out_part[2] for out_part in g2p_out]) self.g2p_cache[sp.lower()] = g2p_out self.save_g2p_cache() # g2p_out = ipa2xvaarpabet(g2p_out) g2p_out = self.post_process_pinyin_symbs(g2p_out) # print(f'g2p_out, {g2p_out}') sub_part_phonemes.append(g2p_out) part_phonemes.append(" ".join(sub_part_phonemes)) phonemised.append(" _ ".join(part_phonemes)) # print("--") # print(f'text_parts ({len(text_parts)}), {text_parts}') # print(f'[g2p] phonemised ({len(phonemised)}), {phonemised}') text = [] for ppi, phon_part in enumerate(phonemised): # print(f'phon_part, {phon_part}') prefix = "" if "}" in text_parts[ppi]: if ppi1: # A split happened. Add the first split-pinyin into the list... final_parts.append(split_symbs[0]) # ... then check if the second half of the split starts with one of the "ending" pinyin phonemes second_half = split_symbs[1] for phone in PIN_YIN_ENDS: if second_half.startswith(phone): final_parts.append(phone) second_half = second_half[len(phone):] if len(second_half): do_again = True break # Check to see if the leftover starts with one of the pinyin to arpabet mappings for phone_key in pinyin_to_arpabet_mappings.keys(): if second_half.startswith(phone_key): final_parts.append(pinyin_to_arpabet_mappings[phone_key]) second_half = second_half[len(pinyin_to_arpabet_mappings[phone_key]):] if len(second_half): do_again = True break part = second_half else: # If the part wasn't split up, then check if it starts with a "split" pinyin symbol, but not with the splitting consonants for phone in PIN_YIN_ENDS: if part.startswith(phone): # Starts with an "ending" phoneme, so add to the list and remove from the part final_parts.append(phone) part = part[len(phone):] if len(part): # Repeat the whole thing, if there's still any left-over stuff do_again = True break # Check to see if the leftover starts with one of the pinyin to arpabet mappings for phone_key in pinyin_to_arpabet_mappings.keys(): if part.startswith(phone_key): # Starts with a replacement phone, so add to the list and remove from the part final_parts.append(pinyin_to_arpabet_mappings[phone_key]) part = part[len(pinyin_to_arpabet_mappings[phone_key]):] if len(part): # Repeat the whole thing, if there's still any left-over stuff do_again = True break # print(f'part, {part}') if len(part): final_parts.append(part) # print(f'final_parts, {final_parts}') # self.logger.info(f'preprocess_pinyin final_parts: {final_parts}') all_split_are_pinyin = True final_parts_post = [] for split in final_parts: if split in pinyin_to_arpabet_mappings.keys(): # self.logger.info(f'preprocess_pinyin changing split from: {split} to {pinyin_to_arpabet_mappings[split]}') split = pinyin_to_arpabet_mappings[split] # if split=="J": # split = "JH" if split in ALL_SYMBOLS: final_parts_post.append(split) else: if split+"5" in ALL_SYMBOLS: final_parts_post.append(split+"5") else: all_split_are_pinyin = False # self.logger.info(f'preprocess_pinyin final_parts_post: {final_parts_post}') if all_split_are_pinyin: # text_final.append("{"+" ".join(final_parts)+"}") text_final.append("{"+" ".join(final_parts_post)+"}") else: text_final.append(part) # print(f'text_final, {text_final}') final_out.append(" ".join(text_final)) # self.logger.info(f'preprocess_pinyin final_out: {final_out}') text = " ".join(final_out) # self.logger.info(f'preprocess_pinyin return text: {text}') return text def text_to_phonemes (self, text): # print(f'text_to_phonemes, {text}') text = self.collapse_whitespace(text).replace(" }", "}").replace("{ ", "{") text = self.preprocess_pinyin(text) # text = self.clean_numbers(text) # print(f'clean_numbers: |{text}|') # text = self.clean_abbreviations(text) # print(f'clean_abbreviations: |{text}|') # text = self.dict_replace(text, customDicts=True) # print(f'dict_replace(custom): |{text}|') # text = self.detect_and_fill_heteronyms(text) # print(f'detect_and_fill_heteronyms: |{text}|') # text = self.dict_replace(text, customDicts=False) # print(f'dict_replace(built-in):, |{text}|') # text = self.clean_POS_and_subword_misses(text) # self.logger.info(f'clean_POS_and_subword_misses: |{text}|') text = self.fill_missing_via_g2p_zh(text) # self.logger.info(f'1 text: {text}') # text = self.en_processor.text_to_phonemes(text) # self.logger.info(f'2 text: {text}') # print(f'fill_missing_via_g2p: |{text}|') return text def text_to_sequence (self, text): orig_text = text text = self.collapse_whitespace(text) # Get rid of duplicate/padding spaces text = text.replace("!", "!").replace("?", "?").replace(",", ",").replace("。", ",").replace("…", "...").replace(")", "").replace("(", "")\ .replace("、", ",").replace("“", ",").replace("”", ",").replace(":", ":") text = self.text_to_phonemes(text) # Get 100% phonemes from the text # if self.logger is not None: # self.logger.info(f'1 text: {text}') # text = self.en_processor.text_to_phonemes(text) # self.logger.info(f'2 text: {text}') phonemes = self.collapse_whitespace(text).strip().split(" ") # self.logger.info(f'1 phonemes: {phonemes}') sequence = [] for pi,phone in enumerate(phonemes): phone = phone.replace(":","").strip() if len(phone): try: sequence.append(ALL_SYMBOLS.index(phone)) except: if phone in pinyin_to_arpabet_mappings.keys(): sequence.append(ALL_SYMBOLS.index(pinyin_to_arpabet_mappings[phone])) else: if phone not in ["5"]: self.TEMP_unhandled.append(f'{orig_text}: {phone}') # with open(f'F:/Speech/xVA-Synth/python/xvapitch/text/DEBUG.txt', "w+") as f: # f.write("\n".join(self.TEMP_unhandled)) # Add a space character between each symbol # if pi is not len(phonemes)-1: # sequence.append(ALL_SYMBOLS.index("_")) # Intersperse blank symbol if required if self.add_blank: sequence_ = [] for si,symb in enumerate(sequence): sequence_.append(symb) if si2: phones = tp.text_to_phonemes(word) print(f'word, {word}') print(f'phones, {phones}') metadata_out.append(f'skyrim|sk_femaleeventoned|This is what '+"{" + phones +"}"+f' sounds like.|./{word}.wav') txt_out.append(f'{word}|{phones}') with open(f'./g2p_batch.csv', "w+") as f: f.write("\n".join(metadata_out)) with open(f'./txt_out.csv', "w+") as f: f.write("\n".join(txt_out)) fddfg() if __name__ == '__main__': base_dir = "/".join(os.path.abspath(__file__).split("\\")[:-1]) # tp = get_text_preprocessor("th", base_dir) # tp = get_text_preprocessor("mn", base_dir) tp = get_text_preprocessor("wo", base_dir) # print(tp.text_to_phonemes("นี่คือประโยคภาษาไทยที่พูดโดย xVASynth ประมาณนี้ค่ะ")) # print(tp.text_to_phonemes("Энэ бол {EH1 G S V EY0 EY0 IH0 S IH0 N TH}-ийн ярьдаг монгол хэл дээрх өгүүлбэр юм. ")) print(tp.text_to_phonemes(" Kii est ab baat ci wolof, janga par xvasynth ")) fddfg() if __name__ == '__main__': base_dir = "/".join(os.path.abspath(__file__).split("\\")[:-1]) tp = get_text_preprocessor("ha", base_dir) print(tp.text_to_phonemes("Wannan jimla ce a cikin hausa, xVASynth ta yi magana ")) fddfg() # if __name__ == '__main__': if False: print("Mass pre-caching g2p") def get_datasets (root_f): data_folders = os.listdir(root_f) data_folders = [f'{root_f}/{dataset_folder}' for dataset_folder in sorted(data_folders) if not dataset_folder.startswith("_") and "." not in dataset_folder] return data_folders base_dir = "/".join(os.path.abspath(__file__).split("\\")[:-1]) # all_data_folders = get_datasets(f'D:/xVASpeech/DATASETS')+get_datasets(f'D:/xVASpeech/GAME_DATA') all_data_folders = get_datasets(f'D:/xVASpeech/GAME_DATA') for dfi,dataset_folder in enumerate(all_data_folders): lang = dataset_folder.split("/")[-1].split("_")[0] if "de_f4" in dataset_folder: continue # if lang not in ["zh"]: # continue # if lang in ["am", "sw"]: # continue # Skip currently running training tp = get_text_preprocessor(lang, base_dir) with open(f'{dataset_folder}/metadata.csv') as f: lines = f.read().split("\n") for li,line in enumerate(lines): print(f'\r{dfi+1}/{len(all_data_folders)} | {li+1}/{len(lines)} | {dataset_folder} ', end="", flush=True) if "|" in line: text = line.split("|")[1] if len(text): tp.text_to_phonemes(text) print("") fsdf() # kks = pykakasi.kakasi() # pron_dict = {} # # with open(f'F:/Speech/xva-trainer/python/xvapitch/text_prep/dicts/japanese.txt') as f: # with open(f'F:/Speech/xVA-Synth/python/xvapitch/text/dicts/japanese.txt') as f: # lines = f.read().split("\n") # for li,line in enumerate(lines): # print(f'\r{li+1}/{len(lines)}', end="", flush=True) # if len(line.strip()): # word = line.split(" ")[0] # phon = " ".join(line.split(" ")[1:]) # word = kks.convert(word) # word = "".join([part["hira"] for part in word]) # # word = word.replace(" ", "").replace(" ", "") # pron_dict[word] = phon # csv_out = [] # for key in pron_dict.keys(): # csv_out.append(f'{key} {pron_dict[key]}') # with open(f'F:/Speech/xva-trainer/python/xvapitch/text_prep/dicts/japanese_h.txt', "w+") as f: # f.write("\n".join(csv_out)) if False: tp = ChineseTextPreprocessor(base_dir) # tp.load_g2p_cache(f'F:/Speech/xva-trainer/python/xvapitch/text_prep/g2p_cache/g2pc_cache_zh.txt') line = "你好。 这就是 xVASynth 声音的样子。" line = "遛弯儿都得躲远点。" # line = "Nǐ hǎo" # line = "Zhè shì yīgè jiào zhǎng de jùzi. Wǒ xīwàng tā shì zhèngquè de, yīnwèi wǒ zhèngzài shǐyòng gǔgē fānyì tā" # phones = tp.text_to_phonemes(line) # print(f'phones, |{phones}|') phones = tp.text_to_sequence(line) print(f'phones, |{phones[1]}|') print("start setup...") text = [] # text.append("nords") # text.append("I read the book... It was a good book to read?{T EH S T}! Test dovahkiin word") # text.append(" I read the book... It was a good book to read?{T EH S T}! Test dovahkiin word") # text.append("{AY1 } read the book... It was a good book to read?{T EH S T}! Test 1 dovahkiin word") text.append(" {AY1 } read the book... It was a good book to read?{T EH S T}! Test 1 dovahkiin word ") # text.append("the scaffold hung with black; and the inhabitants of the neighborhood, having petitioned the sheriffs to remove the scene of execution to the old place,") text.append("oxenfurt") text.append("atomatoys") import os base_dir = "/".join(os.path.abspath(__file__).split("\\")[:-1]) print(f'base_dir, {base_dir}') tp = EnglishTextPreprocessor(base_dir) tp.load_dict(f'F:/Speech/xva-trainer/python/xvapitch/text_prep/dicts/cmudict.txt') tp.load_dict(f'F:/Speech/xVA-Synth/arpabet/xvadict-elder_scrolls.json', isCustom=True) # tp.load_g2p_cache(f'F:/Speech/xva-trainer/python/xvapitch/text_prep/g2p_cache/espeak/espeak_cache_en.txt') print("start inferring...") for line in text: print(f'Line: |{line}|') phonemes = tp.text_to_phonemes(line) print(f'xVAARPAbet: |{phonemes}|') # TODO # - Add the POS, and extra cleaning stuff