Spaces:
Runtime error
Runtime error
# """ from https://github.com/keithito/tacotron """ | |
# from text import cleaners | |
# #from text.symbols import symbols | |
# class TextMapper(object): | |
# def __init__(self, vocab_file): | |
# self.symbols = [x.replace("\n", "") for x in open(vocab_file, encoding="utf-8").readlines()] | |
# self.SPACE_ID = self.symbols.index(" ") | |
# self._symbol_to_id = {s: i for i, s in enumerate(self.symbols)} | |
# self._id_to_symbol = {i: s for i, s in enumerate(self.symbols)} | |
# def text_to_sequence(self, text, cleaner_names): | |
# '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text. | |
# Args: | |
# text: string to convert to a sequence | |
# cleaner_names: names of the cleaner functions to run the text through | |
# Returns: | |
# List of integers corresponding to the symbols in the text | |
# ''' | |
# sequence = [] | |
# clean_text = text.strip() | |
# for symbol in clean_text: | |
# symbol_id = self._symbol_to_id[symbol] | |
# sequence += [symbol_id] | |
# return sequence | |
# def uromanize(self, text, uroman_pl): | |
# iso = "xxx" | |
# with tempfile.NamedTemporaryFile() as tf, \ | |
# tempfile.NamedTemporaryFile() as tf2: | |
# with open(tf.name, "w") as f: | |
# f.write("\n".join([text])) | |
# cmd = f"perl " + uroman_pl | |
# cmd += f" -l {iso} " | |
# cmd += f" < {tf.name} > {tf2.name}" | |
# os.system(cmd) | |
# outtexts = [] | |
# with open(tf2.name) as f: | |
# for line in f: | |
# line = re.sub(r"\s+", " ", line).strip() | |
# outtexts.append(line) | |
# outtext = outtexts[0] | |
# return outtext | |
# def get_text(self, text, hps): | |
# text_norm = self.text_to_sequence(text, hps.data.text_cleaners) | |
# if hps.data.add_blank: | |
# text_norm = commons.intersperse(text_norm, 0) | |
# text_norm = torch.LongTensor(text_norm) | |
# return text_norm | |
# def filter_oov(self, text): | |
# val_chars = self._symbol_to_id | |
# txt_filt = "".join(list(filter(lambda x: x in val_chars, text))) | |
# #print(f"text after filtering OOV: {txt_filt}") | |
# return txt_filt | |
# def preprocess_text(txt, text_mapper, hps, uroman_dir=None, lang=None): | |
# txt = preprocess_char(txt, lang=lang) | |
# is_uroman = hps.data.training_files.split('.')[-1] == 'uroman' | |
# if is_uroman: | |
# with tempfile.TemporaryDirectory() as tmp_dir: | |
# if uroman_dir is None: | |
# cmd = f"git clone [email protected]:isi-nlp/uroman.git {tmp_dir}" | |
# print(cmd) | |
# subprocess.check_output(cmd, shell=True) | |
# uroman_dir = tmp_dir | |
# uroman_pl = os.path.join(uroman_dir, "bin", "uroman.pl") | |
# print(f"uromanize") | |
# txt = text_mapper.uromanize(txt, uroman_pl) | |
# print(f"uroman text: {txt}") | |
# txt = txt.lower() | |
# txt = text_mapper.filter_oov(txt) | |
# return txt | |
# # Mappings from symbol to numeric ID and vice versa: | |
# mapper = TextMapper("ach/vocab.txt") | |
# _symbol_to_id = mapper._symbol_to_id#{s: i for i, s in enumerate(symbols)} | |
# _id_to_symbol = mapper._id_to_symbol#{i: s for i, s in enumerate(symbols)} | |
# def preprocess_text(txt, text_mapper, hps, uroman_dir=None, lang=None): | |
# txt = preprocess_char(txt, lang=lang) | |
# is_uroman = hps.data.training_files.split('.')[-1] == 'uroman' | |
# if is_uroman: | |
# with tempfile.TemporaryDirectory() as tmp_dir: | |
# if uroman_dir is None: | |
# cmd = f"git clone [email protected]:isi-nlp/uroman.git {tmp_dir}" | |
# print(cmd) | |
# subprocess.check_output(cmd, shell=True) | |
# uroman_dir = tmp_dir | |
# uroman_pl = os.path.join(uroman_dir, "bin", "uroman.pl") | |
# print(f"uromanize") | |
# txt = text_mapper.uromanize(txt, uroman_pl) | |
# print(f"uroman text: {txt}") | |
# txt = txt.lower() | |
# txt = text_mapper.filter_oov(txt) | |
# return txt | |
# def text_to_sequence(text, cleaner_names): | |
# '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text. | |
# Args: | |
# text: string to convert to a sequence | |
# cleaner_names: names of the cleaner functions to run the text through | |
# Returns: | |
# List of integers corresponding to the symbols in the text | |
# ''' | |
# sequence = [] | |
# clean_text = _clean_text(text, cleaner_names) | |
# for symbol in clean_text: | |
# symbol_id = _symbol_to_id[symbol] | |
# sequence += [symbol_id] | |
# return sequence | |
# def cleaned_text_to_sequence(cleaned_text): | |
# '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text. | |
# Args: | |
# text: string to convert to a sequence | |
# Returns: | |
# List of integers corresponding to the symbols in the text | |
# ''' | |
# sequence = [_symbol_to_id[symbol] for symbol in cleaned_text] | |
# return sequence | |
# def sequence_to_text(sequence): | |
# '''Converts a sequence of IDs back to a string''' | |
# result = '' | |
# for symbol_id in sequence: | |
# s = _id_to_symbol[symbol_id] | |
# result += s | |
# return result | |
# def _clean_text(text, cleaner_names): | |
# for name in cleaner_names: | |
# cleaner = getattr(cleaners, name) | |
# if not cleaner: | |
# raise Exception('Unknown cleaner: %s' % name) | |
# text = cleaner(text) | |
# return text | |