import subprocess import spacy import pyinflect import pandas as pd from typing import Tuple, Union, Any class Parser: def __init__( self ) -> None: self.parser = self.__init_parser("en_core_web_sm") self.__overrides = self.__load_overrides("data/overrides.csv") def __load_overrides( self, file_path: str ) -> Any: dic = pd.read_csv(file_path).set_index('VB').to_dict() fun = lambda verb, tag: dic[tag].get(verb, False) return fun def __init_parser( self, model: str ) -> spacy.language: parser = None try: parser = spacy.load(model) except: print(f"* Downloading {model} model...") _ = subprocess.Popen( f"python -m spacy download {model}", stdout=subprocess.PIPE, shell=True ).communicate() parser = spacy.load(model) return parser def __format_error( self, error: str ) -> str: template = """
""" return template.format(error) def __format_output( self, infinitive: str, past: str, participle: str, ) -> str: template = """ |Infinitive| Simple Past | Past Participle | | :----: | :----: | :----: | |{} | {}| {}| """ return template.format(infinitive, past, participle) def is_in_overrides( self, verb: str, tense: str ) -> Tuple[str, bool]: res = self.__overrides(verb, tense) if isinstance(res, bool): return res, "" return True, res def __get_inflections( self, infinitive: spacy.tokens.token.Token, tense: str ) -> Union[str, None]: # Check if verb is in overrides file res, inflections = self.is_in_overrides(infinitive.text, tense) if not res: form1 = infinitive._.inflect(tense, form_num=0) form2 = infinitive._.inflect(tense, form_num=1) inflections = list(set([form1, form2])) if len(inflections) == 1 and inflections[0] == None: return None inflections = '/'.join(inflections) return inflections def __getAllTenses( self, tk_verb: spacy.tokens.token.Token ) -> Tuple[str,str,str,str]: infinitive = tk_verb._.inflect('VB') past = self.__get_inflections(tk_verb, 'VBD') participle = self.__get_inflections(tk_verb, 'VBN') return infinitive, past, participle def __tokenizer( self, verb: str ) -> spacy.tokens.token.Token: return self.parser(verb)[0] def get( self, verb: str ) -> Tuple[str,str]: verb = verb.strip().lower() error, output = "", "" if verb == "": error = self.__format_error( f"Error: The Verb field can not be empty!" ) return error, output tk_verb = self.__tokenizer(verb) infinitive, past, participle = self.__getAllTenses(tk_verb) if infinitive is None or past is None or participle is None: error = self.__format_error( f"Error: The verb '{verb}' has not been found or not spelled correctly!" ) return error, output print(f"{verb} -> {infinitive},{past},{participle}") output = self.__format_output(infinitive, past, participle) return error, output