Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
# Holds symbols for graphemes, phonemes, and pos-tags. | |
# noinspection SpellCheckingInspection,GrazieInspection | |
""" | |
POS tag list: | |
CC coordinating conjunction | |
CD cardinal digit | |
DT determiner | |
EX existential there ("there is" -> "there exists") | |
FW foreign word | |
IN preposition/subordinating conjunction | |
JJ adjective ('big') | |
JJR adjective, comparative ('bigger') | |
JJS adjective, superlative ('biggest') | |
LS list marker ("1)", "2)", "3)") | |
MD modal ('could', 'will') | |
NN noun, singular | |
NNS noun plural | |
NNP proper noun, singular 'Harrison' | |
NNPS proper noun, plural 'Americans' | |
PDT predeterminer ('all' in 'all the kids') | |
POS possessive ending (parent's) | |
PRP personal pronoun (I, he, she) | |
PRP$ possessive pronoun (my, his, hers) | |
RB adverb ('very', 'silently') | |
RBR adverb, comparative ('better') | |
RBS adverb, superlative ('best') | |
RP particle ('give up') | |
TO to ("go 'to' the store.") | |
UH interjection ("errrrrrrrm") | |
VB verb, base form take | |
VBD verb, past tense took | |
VBG verb, gerund/present participle taking | |
VBN verb, past participle taken | |
VBP verb, sing. present, non-3d take | |
VBZ verb, 3rd person sing. present takes | |
WDT wh-determiner which | |
WP wh-pronoun who, what | |
WP$ possessive wh-pronoun whose | |
WRB wh-abverb where, when | |
""" | |
from __future__ import annotations | |
# noinspection SpellCheckingInspection,GrazieInspection | |
graphemes = list("abcdefghijklmnopqrstuvwxyz") | |
phonemes = ['AA0', 'AA1', 'AA2', 'AE0', 'AE1', 'AE2', 'AH0', 'AH1', 'AH2', 'AO0', | |
'AO1', 'AO2', 'AW0', 'AW1', 'AW2', 'AY0', 'AY1', 'AY2', 'B', 'CH', 'D', 'DH', | |
'EH0', 'EH1', 'EH2', 'ER0', 'ER1', 'ER2', 'EY0', 'EY1', 'EY2', 'F', 'G', 'HH', | |
'IH0', 'IH1', 'IH2', 'IY0', 'IY1', 'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG', | |
'OW0', 'OW1', 'OW2', 'OY0', 'OY1', 'OY2', 'P', 'R', 'S', 'SH', 'T', 'TH', | |
'UH0', 'UH1', 'UH2', 'UW', 'UW0', 'UW1', 'UW2', 'V', 'W', 'Y', 'Z', 'ZH'] | |
pos_tags = ['CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD', 'NN', 'NNS', | |
'NNP', 'NNPS', 'PDT', 'POS', 'PRP', 'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'TO', 'UH', | |
'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB'] | |
pos_type_tags = ['VERB', 'NOUN', 'PRON', 'ADJ', 'ADV'] | |
pos_type_short_tags = ['V', 'N', 'P', 'A', 'R'] | |
pos_type_form_dict = {'V': 'VERB', 'N': 'NOUN', 'P': 'PRON', 'A': 'ADJ', 'R': 'ADV'} | |
graphemes_set = set(graphemes) | |
phonemes_set = set(phonemes) | |
pos_tags_set = set(pos_tags) | |
pos_type_tags_set = set(pos_type_tags) | |
pos_type_short_tags_set = set(pos_type_short_tags) | |
punctuation = {'.', ',', ':', ';', '?', '!', '-', '_', '\'', '\"', '`', '~', '@', '#', '$'} | |
consonants = {'B', 'CH', 'D', 'DH', 'F', 'G', 'HH', 'JH', 'K', 'L', 'M', 'N', 'NG', 'P', 'R', | |
'S', 'SH', 'T', 'TH', 'V', 'W', 'Y', 'Z', 'ZH'} | |
# Method to convert from short type tags to full type tags. | |
def to_full_type_tag(short_type_tag: str) -> str | None: | |
if short_type_tag == 'V': | |
return 'VERB' | |
elif short_type_tag == 'N': | |
return 'NOUN' | |
elif short_type_tag == 'P': | |
return 'PRON' | |
elif short_type_tag == 'A': | |
return 'ADJ' | |
elif short_type_tag == 'R': | |
return 'ADV' | |
else: | |
return None | |