Spaces:

Pendrokar
/

xVASynth-TTS

Running on CPU Upgrade

App Files Files Community

xVASynth-TTS / resources /app /python /xvapitch /text /h2p_parser /utils /parser.py

Pendrokar

ionite34's h2p_parser and dep required for English

2080fde 9 months ago

raw

history blame

4.62 kB

	# Parses annotation files for conversion of sentences to phonemes
	from __future__ import annotations
	from h2p_parser import cmudictext
	from h2p_parser.filter import filter_text
	from h2p_parser.text.numbers import normalize_numbers
	from h2p_parser.symbols import punctuation

	# Reads a file into a list of lines
	from tqdm import tqdm


	def read_file(file_name, delimiter) -> list:
	with open(file_name, 'r', encoding="utf-8") as f:
	result = []
	for line in f:
	line = line.split(delimiter)
	# Take the second element
	result.append(line[1].lower())
	return result

	# Method that checks if a single line is resolvable


	# Checks a list of lines for unresolvable words
	# Returns a list of lines with unresolvable words, or None if no unresolvable words
	def check_lines(lines: list) -> ParseResult:
	cde = cmudictext.CMUDictExt()
	# Holds result
	result = ParseResult()
	# Loop with nqdm
	for line in tqdm(lines, desc='Checking lines'):
	# Add
	result.all_lines.append(line)
	result.lines.add(line)
	# If line contains het, add to result
	if cde.h2p.contains_het(line):
	result.all_lines_cont_het.append(line)
	# Filter the line
	f_line = filter_text(line)
	# Number converter
	f_line = normalize_numbers(f_line)
	# Tokenize
	tokens = cde.h2p.tokenize(f_line)
	for word in tokens:
	# Skip word if punctuation
	if word in punctuation:
	continue
	# Add word to result
	result.all_words.append(word)
	result.words.add(word)
	# Check if word is resolvable
	h2p_res = cde.h2p.contains_het(word)
	cmu_res = cde.dict.get(word) is not None
	fet_res = cde.lookup(word) is not None
	if not h2p_res and not cmu_res and not fet_res:
	# If word ends in "'s", remove it and add the base word
	if word.endswith("'s"):
	word = word[:-2]
	result.unres_all_lines.append(line)
	result.unres_all_words.append(word)
	result.unres_lines.add(line)
	result.unres_words.add(word)
	elif h2p_res:
	result.n_words_res += 1
	result.n_words_het += 1
	elif cmu_res:
	result.n_words_res += 1
	result.n_words_cmu += 1
	elif fet_res:
	result.n_words_res += 1
	result.n_words_fet += 1

	# Also pass stats
	result.ft_stats = cde.p.stat_resolves

	return result


	# Class to hold the result of a parse
	class ParseResult:
	def __init__(self):
	self.all_lines = []
	self.all_lines_cont_het = []
	self.unres_all_lines = []
	self.lines = set()
	self.unres_lines = set()
	# Words
	self.all_words = []
	self.unres_all_words = []
	self.words = set()
	self.unres_words = set()
	# Numerical stats
	self.n_words_res = 0 # Number of total resolved words
	self.n_words_cmu = 0 # Resolved words from CMU
	self.n_words_fet = 0 # Resolved words from Features
	self.n_words_het = 0 # Resolved words from H2p
	# Stats from cmudictext
	self.ft_stats = None

	# Get percentage of lines covered
	def line_unique_coverage(self) -> float:
	dec = 1 - len(self.unres_lines) / len(self.lines)
	return round(dec * 100, 2)

	# Get percentage of words covered
	def word_unique_coverage(self) -> float:
	dec = 1 - len(self.unres_words) / len(self.words)
	return round(dec * 100, 2)

	# Get percentage of lines covered (All)
	def line_coverage(self) -> float:
	dec = 1 - len(self.unres_all_lines) / len(self.all_lines)
	return round(dec * 100, 2)

	# Get percentage of words covered (All)
	def word_coverage(self) -> float:
	dec = 1 - len(self.unres_all_words) / len(self.all_words)
	return round(dec * 100, 2)

	# Get percentage of heteronyms containing lines
	def percent_line_het(self) -> float:
	dec = len(self.all_lines_cont_het) / len(self.all_lines)
	return round(dec * 100, 2)

	# Get percentage of words resolved by H2p
	def percent_word_h2p(self) -> float:
	dec = self.n_words_het / self.n_words_res
	return round(dec * 100, 2)

	# Get percentage of words resolved by CMU
	def percent_word_cmu(self) -> float:
	dec = self.n_words_cmu / self.n_words_res
	return round(dec * 100, 2)