File size: 2,928 Bytes
2080fde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# dictionary.py

# Defines a dictionary class that can be used to store and retrieve from the json file
import sys
if sys.version_info < (3, 9):
    # In Python versions below 3.9, this is needed
    import importlib_resources as pkg_resources
else:
    # Since python 3.9+, importlib.resources.files is built-in
    import importlib.resources as pkg_resources
from os.path import exists
import json
import h2p_parser.pos_parser as pos_parser


# Method to get data path
def get_data_path():
    data_path = pkg_resources.files('h2p_parser.data')
    if data_path is None:
        raise FileNotFoundError("Data folder not found")
    return data_path


# Dictionary class
class Dictionary:
    def __init__(self, file_name=None):
        # If a file name is not provided, use the default file name
        self.file_name = file_name
        if file_name is None:
            self.file_name = 'dict.json'
            self.use_default = True
        else:
            self.file_name = file_name
            self.use_default = False
        self.dictionary = {}
        self.dictionary = self.load_dictionary(file_name)

    # Loads the dictionary from the json file
    def load_dictionary(self, path=None):
        if path is None:
            data_path = get_data_path()
            dict_path = data_path.joinpath(self.file_name)
            with open(str(dict_path)) as def_file:
                read_dict = json.load(def_file)
        else:
            if not exists(path):
                raise FileNotFoundError(f'Dictionary {self.file_name} file not found')
            with open(path) as file:
                try:
                    read_dict = json.load(file)
                except json.decoder.JSONDecodeError:
                    raise ValueError(f'Dictionary {self.file_name} file is not valid JSON')
        # Check dictionary has at least one entry
        if len(read_dict) == 0:
            raise ValueError('Dictionary is empty or invalid')
        return read_dict

    # Check if a word is in the dictionary
    def contains(self, word):
        word = word.lower()
        return word in self.dictionary

    # Get the phonetic pronunciation of a word using Part of Speech tag
    def get_phoneme(self, word, pos):
        # Get the sub-dictionary at dictionary[word]
        sub_dict = self.dictionary[word.lower()]

        # First, check if the exact pos is a key
        if pos in sub_dict:
            return sub_dict[pos]

        # If not, use the parent pos of the pos tag
        parent_pos = pos_parser.get_parent_pos(pos)

        if parent_pos is not None:
            # Check if the sub_dict contains the parent pos
            if parent_pos in sub_dict:
                return sub_dict[parent_pos]

        # If not, check if the sub_dict contains a DEFAULT key
        if 'DEFAULT' in sub_dict:
            return sub_dict['DEFAULT']

        # If no matches, return None
        return None