Spaces:

Sunbird
/

luganda2english-stts

Runtime error

App Files Files Community

luganda2english-stts / text /__init__.py

mekaneeky

Go GO

0a06de9 about 1 year ago

raw

history blame

No virus

5.53 kB

	# """ from https://github.com/keithito/tacotron """
	# from text import cleaners
	# #from text.symbols import symbols


	# class TextMapper(object):
	# def __init__(self, vocab_file):
	# self.symbols = [x.replace("\n", "") for x in open(vocab_file, encoding="utf-8").readlines()]
	# self.SPACE_ID = self.symbols.index(" ")
	# self._symbol_to_id = {s: i for i, s in enumerate(self.symbols)}
	# self._id_to_symbol = {i: s for i, s in enumerate(self.symbols)}

	# def text_to_sequence(self, text, cleaner_names):
	# '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
	# Args:
	# text: string to convert to a sequence
	# cleaner_names: names of the cleaner functions to run the text through
	# Returns:
	# List of integers corresponding to the symbols in the text
	# '''
	# sequence = []
	# clean_text = text.strip()
	# for symbol in clean_text:
	# symbol_id = self._symbol_to_id[symbol]
	# sequence += [symbol_id]
	# return sequence

	# def uromanize(self, text, uroman_pl):
	# iso = "xxx"
	# with tempfile.NamedTemporaryFile() as tf, \
	# tempfile.NamedTemporaryFile() as tf2:
	# with open(tf.name, "w") as f:
	# f.write("\n".join([text]))
	# cmd = f"perl " + uroman_pl
	# cmd += f" -l {iso} "
	# cmd += f" < {tf.name} > {tf2.name}"
	# os.system(cmd)
	# outtexts = []
	# with open(tf2.name) as f:
	# for line in f:
	# line = re.sub(r"\s+", " ", line).strip()
	# outtexts.append(line)
	# outtext = outtexts[0]
	# return outtext

	# def get_text(self, text, hps):
	# text_norm = self.text_to_sequence(text, hps.data.text_cleaners)
	# if hps.data.add_blank:
	# text_norm = commons.intersperse(text_norm, 0)
	# text_norm = torch.LongTensor(text_norm)
	# return text_norm

	# def filter_oov(self, text):
	# val_chars = self._symbol_to_id
	# txt_filt = "".join(list(filter(lambda x: x in val_chars, text)))
	# #print(f"text after filtering OOV: {txt_filt}")
	# return txt_filt

	# def preprocess_text(txt, text_mapper, hps, uroman_dir=None, lang=None):
	# txt = preprocess_char(txt, lang=lang)
	# is_uroman = hps.data.training_files.split('.')[-1] == 'uroman'
	# if is_uroman:
	# with tempfile.TemporaryDirectory() as tmp_dir:
	# if uroman_dir is None:
	# cmd = f"git clone [email protected]:isi-nlp/uroman.git {tmp_dir}"
	# print(cmd)
	# subprocess.check_output(cmd, shell=True)
	# uroman_dir = tmp_dir
	# uroman_pl = os.path.join(uroman_dir, "bin", "uroman.pl")
	# print(f"uromanize")
	# txt = text_mapper.uromanize(txt, uroman_pl)
	# print(f"uroman text: {txt}")
	# txt = txt.lower()
	# txt = text_mapper.filter_oov(txt)
	# return txt


	# # Mappings from symbol to numeric ID and vice versa:
	# mapper = TextMapper("ach/vocab.txt")
	# _symbol_to_id = mapper._symbol_to_id#{s: i for i, s in enumerate(symbols)}
	# _id_to_symbol = mapper._id_to_symbol#{i: s for i, s in enumerate(symbols)}


	# def preprocess_text(txt, text_mapper, hps, uroman_dir=None, lang=None):
	# txt = preprocess_char(txt, lang=lang)
	# is_uroman = hps.data.training_files.split('.')[-1] == 'uroman'
	# if is_uroman:
	# with tempfile.TemporaryDirectory() as tmp_dir:
	# if uroman_dir is None:
	# cmd = f"git clone [email protected]:isi-nlp/uroman.git {tmp_dir}"
	# print(cmd)
	# subprocess.check_output(cmd, shell=True)
	# uroman_dir = tmp_dir
	# uroman_pl = os.path.join(uroman_dir, "bin", "uroman.pl")
	# print(f"uromanize")
	# txt = text_mapper.uromanize(txt, uroman_pl)
	# print(f"uroman text: {txt}")
	# txt = txt.lower()
	# txt = text_mapper.filter_oov(txt)
	# return txt


	# def text_to_sequence(text, cleaner_names):
	# '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
	# Args:
	# text: string to convert to a sequence
	# cleaner_names: names of the cleaner functions to run the text through
	# Returns:
	# List of integers corresponding to the symbols in the text
	# '''
	# sequence = []

	# clean_text = _clean_text(text, cleaner_names)
	# for symbol in clean_text:
	# symbol_id = _symbol_to_id[symbol]
	# sequence += [symbol_id]
	# return sequence


	# def cleaned_text_to_sequence(cleaned_text):
	# '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
	# Args:
	# text: string to convert to a sequence
	# Returns:
	# List of integers corresponding to the symbols in the text
	# '''
	# sequence = [_symbol_to_id[symbol] for symbol in cleaned_text]
	# return sequence


	# def sequence_to_text(sequence):
	# '''Converts a sequence of IDs back to a string'''
	# result = ''
	# for symbol_id in sequence:
	# s = _id_to_symbol[symbol_id]
	# result += s
	# return result


	# def _clean_text(text, cleaner_names):
	# for name in cleaner_names:
	# cleaner = getattr(cleaners, name)
	# if not cleaner:
	# raise Exception('Unknown cleaner: %s' % name)
	# text = cleaner(text)
	# return text