nanom commited on
Commit
80c77a7
1 Parent(s): 1b87c17

Added tense correction file for errors coming from pyinflect library database

Browse files
Files changed (4) hide show
  1. .gitignore +1 -1
  2. app.py +1 -1
  3. data/overrides.csv +12 -0
  4. modules/m_parser.py +81 -49
.gitignore CHANGED
@@ -1 +1 @@
1
- __pycache__
 
1
+ __pycache__
app.py CHANGED
@@ -23,7 +23,7 @@ with iface:
23
  output = gr.Markdown()
24
 
25
  btn_get.click(
26
- fn = execute.get_verbs,
27
  inputs = input_verb,
28
  outputs = [error, output],
29
  api_name="get"
 
23
  output = gr.Markdown()
24
 
25
  btn_get.click(
26
+ fn = execute.get,
27
  inputs = input_verb,
28
  outputs = [error, output],
29
  api_name="get"
data/overrides.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ VB,VBD,VBN
2
+ work,worked,worked
3
+ can,could,-
4
+ wake,woke,woken
5
+ cost,cost,cost
6
+ drink,drank,drunk
7
+ learn,learnt,learnt
8
+ leave,left,left
9
+ pay,paid,paid
10
+ ring,rang,rung
11
+ speak,spoke,spoken
12
+ write,wrote,written
modules/m_parser.py CHANGED
@@ -1,14 +1,25 @@
1
  import subprocess
2
  import spacy
3
  import pyinflect
4
- from typing import Tuple, Union
 
5
 
6
  class Parser:
7
  def __init__(
8
  self
9
  ) -> None:
10
 
11
- self.parser = self.__init_parser("en_core_web_md")
 
 
 
 
 
 
 
 
 
 
12
 
13
  def __init_parser(
14
  self,
@@ -30,63 +41,86 @@ class Parser:
30
 
31
  return parser
32
 
33
- def __v2participle(
34
  self,
35
- tk_verb: spacy.tokens.token.Token
36
- ) -> Union[str, None]:
37
 
38
- return tk_verb._.inflect('VBN')
39
-
40
- def __v2past(
 
 
 
 
 
 
 
41
  self,
42
- tk_verb: spacy.tokens.token.Token
43
- ) -> Union[str, None]:
 
 
44
 
45
- return tk_verb._.inflect('VBD')
 
 
 
 
 
46
 
47
- def __v2infinitive(
48
  self,
49
- tk_verb: spacy.tokens.token.Token
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  ) -> Union[str, None]:
 
 
 
51
 
52
- return tk_verb._.inflect('VB')
 
 
 
53
 
54
- def __tokenizer(
55
- self,
56
- verb: str
57
- ) -> spacy.tokens.token.Token:
58
 
59
- return self.parser(verb)[0]
60
 
61
- def ___format_output(
62
- self,
63
- infiniteve: str,
64
- past: str,
65
- participle: str
66
- ) -> str:
67
- template = """
68
- |infinitive| Simple Past | Past Participle |
69
- | :----: | :----: | :----: |
70
- |{} | {}| {}|
71
- """
72
- return template.format(infiniteve, past, participle)
73
 
 
74
 
75
- def ___format_error(
76
  self,
77
- error: str
78
- ) -> str:
79
 
80
- template = """
81
- <center>
82
- <div class="alert alert-warning" role="alert">
83
- <h6><b>{}</b></h6>
84
- </div>
85
- </center>
86
- """
87
- return template.format(error)
88
 
89
- def get_verbs(
90
  self,
91
  verb: str
92
  ) -> Tuple[str,str]:
@@ -95,22 +129,20 @@ class Parser:
95
  error, output = "", ""
96
 
97
  if verb == "":
98
- error = self.___format_error(
99
  f"Error: The Verb field can not be empty!"
100
  )
101
  return error, output
102
 
103
  tk_verb = self.__tokenizer(verb)
104
- infinitive = self.__v2infinitive(tk_verb)
105
- past = self.__v2past(tk_verb)
106
- participle = self.__v2participle(tk_verb)
107
 
108
  if infinitive is None or past is None or participle is None:
109
- error = self.___format_error(
110
  f"Error: The verb '<b>{verb}</b>' has not been found or not spelled correctly!"
111
  )
112
  return error, output
113
 
114
- output = self.___format_output(infinitive, past, participle)
115
 
116
  return error, output
 
1
  import subprocess
2
  import spacy
3
  import pyinflect
4
+ import pandas as pd
5
+ from typing import Tuple, Union, Any
6
 
7
  class Parser:
8
  def __init__(
9
  self
10
  ) -> None:
11
 
12
+ self.parser = self.__init_parser("en_core_web_sm")
13
+ self.__overrides = self.__load_overrides("data/overrides.csv")
14
+
15
+ def __load_overrides(
16
+ self,
17
+ file_path: str
18
+ ) -> Any:
19
+
20
+ dic = pd.read_csv(file_path).set_index('VB').to_dict()
21
+ fun = lambda verb, tag: dic[tag].get(verb, False)
22
+ return fun
23
 
24
  def __init_parser(
25
  self,
 
41
 
42
  return parser
43
 
44
+ def __format_error(
45
  self,
46
+ error: str
47
+ ) -> str:
48
 
49
+ template = """
50
+ <center>
51
+ <div class="alert alert-warning" role="alert">
52
+ <h6><b>{}</b></h6>
53
+ </div>
54
+ </center>
55
+ """
56
+ return template.format(error)
57
+
58
+ def __format_output(
59
  self,
60
+ infinitive: str,
61
+ past: str,
62
+ participle: str,
63
+ ) -> str:
64
 
65
+ template = """
66
+ |Infinitive| Simple Past | Past Participle |
67
+ | :----: | :----: | :----: |
68
+ |{} | {}| {}|
69
+ """
70
+ return template.format(infinitive, past, participle)
71
 
72
+ def is_in_overrides(
73
  self,
74
+ verb: str,
75
+ tense: str
76
+ ) -> Tuple[str, bool]:
77
+
78
+ res = self.__overrides(verb, tense)
79
+ if isinstance(res, bool):
80
+ return res, ""
81
+
82
+ return True, res
83
+
84
+ def __get_inflections(
85
+ self,
86
+ infinitive: spacy.tokens.token.Token,
87
+ tense: str
88
  ) -> Union[str, None]:
89
+
90
+ # Check if verb is in overrides file
91
+ res, inflections = self.is_in_overrides(infinitive.text, tense)
92
 
93
+ if not res:
94
+ form1 = infinitive._.inflect(tense, form_num=0)
95
+ form2 = infinitive._.inflect(tense, form_num=1)
96
+ inflections = list(set([form1, form2]))
97
 
98
+ if len(inflections) == 1 and inflections[0] == None:
99
+ return None
100
+
101
+ inflections = '/'.join(inflections)
102
 
103
+ return inflections
104
 
105
+ def __getAllTenses(
106
+ self,
107
+ tk_verb: spacy.tokens.token.Token
108
+ ) -> Tuple[str,str,str,str]:
109
+
110
+ infinitive = tk_verb._.inflect('VB')
111
+ past = self.__get_inflections(tk_verb, 'VBD')
112
+ participle = self.__get_inflections(tk_verb, 'VBN')
 
 
 
 
113
 
114
+ return infinitive, past, participle
115
 
116
+ def __tokenizer(
117
  self,
118
+ verb: str
119
+ ) -> spacy.tokens.token.Token:
120
 
121
+ return self.parser(verb)[0]
 
 
 
 
 
 
 
122
 
123
+ def get(
124
  self,
125
  verb: str
126
  ) -> Tuple[str,str]:
 
129
  error, output = "", ""
130
 
131
  if verb == "":
132
+ error = self.__format_error(
133
  f"Error: The Verb field can not be empty!"
134
  )
135
  return error, output
136
 
137
  tk_verb = self.__tokenizer(verb)
138
+ infinitive, past, participle = self.__getAllTenses(tk_verb)
 
 
139
 
140
  if infinitive is None or past is None or participle is None:
141
+ error = self.__format_error(
142
  f"Error: The verb '<b>{verb}</b>' has not been found or not spelled correctly!"
143
  )
144
  return error, output
145
 
146
+ output = self.__format_output(infinitive, past, participle)
147
 
148
  return error, output