hobs commited on
Commit
d625a73
1 Parent(s): 20c813e

test a lot of things like import spacy and preprocessing and text box labels

Browse files
Files changed (1) hide show
  1. app.py +53 -19
app.py CHANGED
@@ -1,18 +1,47 @@
1
- import gradio as gr
2
  import inspect
 
 
3
  from gradio import routes
 
4
  from typing import List, Type
5
- """
6
- >>> !curl -X POST https://Hobson-gradio-rest-api.hf.space/api/predict/ -H 'Content-Type: application/json' -d '{"data": ["one hundred twenty-three"], "fn_index": 0}'
7
- {"data":["123"],"duration":0.00019359588623046875,"average_duration":0.00019359588623046875}
8
- """
9
 
10
 
11
- def normalize_text(text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  return text.replace('-', ' ')
13
 
14
 
15
- def text2int(text, numwords={}):
16
  """ Convert an English str containing number words into an int
17
 
18
  >>> text2int("nine")
@@ -23,18 +52,13 @@ def text2int(text, numwords={}):
23
  123
24
  """
25
  if not numwords:
26
- units = [
27
- "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
28
- "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
29
- "sixteen", "seventeen", "eighteen", "nineteen",
30
- ]
31
 
32
  tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
33
 
34
  scales = ["hundred", "thousand", "million", "billion", "trillion"]
35
 
36
  numwords["and"] = (1, 0)
37
- for idx, word in enumerate(units):
38
  numwords[word] = (1, idx)
39
  for idx, word in enumerate(tens):
40
  numwords[word] = (1, idx * 10)
@@ -42,8 +66,8 @@ def text2int(text, numwords={}):
42
  numwords[word] = (10 ** (idx * 3 or 2), 0)
43
 
44
  current = result = 0
45
- text = normalize_text(text)
46
- for word in text.split():
47
  if word not in numwords:
48
  raise Exception("Illegal word: " + word)
49
 
@@ -56,6 +80,14 @@ def text2int(text, numwords={}):
56
  return str(result + current)
57
 
58
 
 
 
 
 
 
 
 
 
59
  def get_types(cls_set: List[Type], component: str):
60
  docset = []
61
  types = []
@@ -79,10 +111,12 @@ routes.get_types = get_types
79
 
80
  with gr.Blocks() as html_block:
81
  gr.Markdown("# Gradio Blocks (3.0) with REST API")
82
- textbox = gr.Textbox()
83
- button = gr.Button("text2int")
84
- output = gr.Textbox()
85
- button.click(text2int, inputs=[textbox], outputs=[output])
 
 
86
  gr.Markdown(r"""
87
 
88
  ## API
 
 
1
  import inspect
2
+ import json
3
+ import gradio as gr
4
  from gradio import routes
5
+ import spacy
6
  from typing import List, Type
 
 
 
 
7
 
8
 
9
+ ONES = [
10
+ "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
11
+ "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
12
+ "sixteen", "seventeen", "eighteen", "nineteen",
13
+ ]
14
+
15
+ # token_mapping = json.load(open('str_mapping.json'))
16
+ CHAR_MAPPING = {
17
+ "-": " ",
18
+ "_": " ",
19
+ }
20
+
21
+ TOKEN_MAPPING = dict(enumerate(ONES))
22
+
23
+
24
+ def tokenize(text):
25
+ return text.split()
26
+
27
+
28
+ def detokenize(tokens):
29
+ return ' '.join(tokens)
30
+
31
+
32
+ def replace_tokens(tokens, token_mapping=TOKEN_MAPPING):
33
+ return [token_mapping.get(tok, tok) for tok in tokens]
34
+
35
+
36
+ def replace_chars(text, char_mapping=CHAR_MAPPING):
37
+ return ''.join((char_mapping.get(c, c) for c in text))
38
+
39
+
40
+ def preprocess(text):
41
  return text.replace('-', ' ')
42
 
43
 
44
+ def tokens2int(tokens, numwords={}):
45
  """ Convert an English str containing number words into an int
46
 
47
  >>> text2int("nine")
 
52
  123
53
  """
54
  if not numwords:
 
 
 
 
 
55
 
56
  tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
57
 
58
  scales = ["hundred", "thousand", "million", "billion", "trillion"]
59
 
60
  numwords["and"] = (1, 0)
61
+ for idx, word in enumerate(ONES):
62
  numwords[word] = (1, idx)
63
  for idx, word in enumerate(tens):
64
  numwords[word] = (1, idx * 10)
 
66
  numwords[word] = (10 ** (idx * 3 or 2), 0)
67
 
68
  current = result = 0
69
+
70
+ for word in tokens:
71
  if word not in numwords:
72
  raise Exception("Illegal word: " + word)
73
 
 
80
  return str(result + current)
81
 
82
 
83
+ def text2int(text):
84
+ return tokens2int(tokenize(replace_chars(text)))
85
+
86
+
87
+ def text2int_preprocessed(text):
88
+ return tokens2int(replace_tokens(tokenize(replace_chars(text))))
89
+
90
+
91
  def get_types(cls_set: List[Type], component: str):
92
  docset = []
93
  types = []
 
111
 
112
  with gr.Blocks() as html_block:
113
  gr.Markdown("# Gradio Blocks (3.0) with REST API")
114
+ textbox = gr.Textbox("Input word number text:")
115
+ button_text2int = gr.Button("text2int")
116
+ button_text2int_preprocessed = gr.Button("text2int with preprocessing")
117
+ output = gr.Textbox("Output integer as text:")
118
+ button_text2int.click(text2int, inputs=[textbox], outputs=[output])
119
+ button_text2int_preprocessed.click(text2int_preprocessed, inputs=[textbox], outputs=[output])
120
  gr.Markdown(r"""
121
 
122
  ## API