mathtext

Runtime error

App Files Files Community

hobs commited on Dec 21, 2022

Commit

d625a73

•

1 Parent(s): 20c813e

test a lot of things like import spacy and preprocessing and text box labels

Browse files

Files changed (1) hide show

app.py +53 -19

app.py CHANGED Viewed

@@ -1,18 +1,47 @@
-import gradio as gr
 import inspect
 from gradio import routes
 from typing import List, Type
-"""
->>> !curl -X POST https://Hobson-gradio-rest-api.hf.space/api/predict/ -H 'Content-Type: application/json' -d '{"data": ["one hundred twenty-three"], "fn_index": 0}'
-{"data":["123"],"duration":0.00019359588623046875,"average_duration":0.00019359588623046875}
-"""
-def normalize_text(text):
     return text.replace('-', ' ')
-def text2int(text, numwords={}):
     """ Convert an English str containing number words into an int
     >>> text2int("nine")
@@ -23,18 +52,13 @@ def text2int(text, numwords={}):
     123
     """
     if not numwords:
-        units = [
-            "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
-            "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
-            "sixteen", "seventeen", "eighteen", "nineteen",
-        ]
         tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
         scales = ["hundred", "thousand", "million", "billion", "trillion"]
         numwords["and"] = (1, 0)
-        for idx, word in enumerate(units):
             numwords[word] = (1, idx)
         for idx, word in enumerate(tens):
             numwords[word] = (1, idx * 10)
@@ -42,8 +66,8 @@ def text2int(text, numwords={}):
             numwords[word] = (10 ** (idx * 3 or 2), 0)
     current = result = 0
-    text = normalize_text(text)
-    for word in text.split():
         if word not in numwords:
             raise Exception("Illegal word: " + word)
@@ -56,6 +80,14 @@ def text2int(text, numwords={}):
     return str(result + current)
 def get_types(cls_set: List[Type], component: str):
     docset = []
     types = []
@@ -79,10 +111,12 @@ routes.get_types = get_types
 with gr.Blocks() as html_block:
     gr.Markdown("# Gradio Blocks (3.0) with REST API")
-    textbox = gr.Textbox()
-    button = gr.Button("text2int")
-    output = gr.Textbox()
-    button.click(text2int, inputs=[textbox], outputs=[output])
     gr.Markdown(r"""
 ## API

 import inspect
+import json
+import gradio as gr
 from gradio import routes
+import spacy
 from typing import List, Type
+ONES = [
+    "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
+    "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
+    "sixteen", "seventeen", "eighteen", "nineteen",
+]
+# token_mapping = json.load(open('str_mapping.json'))
+CHAR_MAPPING = {
+    "-": " ",
+    "_": " ",
+}
+TOKEN_MAPPING = dict(enumerate(ONES))
+def tokenize(text):
+    return text.split()
+def detokenize(tokens):
+    return ' '.join(tokens)
+def replace_tokens(tokens, token_mapping=TOKEN_MAPPING):
+    return [token_mapping.get(tok, tok) for tok in tokens]
+def replace_chars(text, char_mapping=CHAR_MAPPING):
+    return ''.join((char_mapping.get(c, c) for c in text))
+def preprocess(text):
     return text.replace('-', ' ')
+def tokens2int(tokens, numwords={}):
     """ Convert an English str containing number words into an int
     >>> text2int("nine")
     123
     """
     if not numwords:
         tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
         scales = ["hundred", "thousand", "million", "billion", "trillion"]
         numwords["and"] = (1, 0)
+        for idx, word in enumerate(ONES):
             numwords[word] = (1, idx)
         for idx, word in enumerate(tens):
             numwords[word] = (1, idx * 10)
             numwords[word] = (10 ** (idx * 3 or 2), 0)
     current = result = 0
+    for word in tokens:
         if word not in numwords:
             raise Exception("Illegal word: " + word)
     return str(result + current)
+def text2int(text):
+    return tokens2int(tokenize(replace_chars(text)))
+def text2int_preprocessed(text):
+    return tokens2int(replace_tokens(tokenize(replace_chars(text))))
 def get_types(cls_set: List[Type], component: str):
     docset = []
     types = []
 with gr.Blocks() as html_block:
     gr.Markdown("# Gradio Blocks (3.0) with REST API")
+    textbox = gr.Textbox("Input word number text:")
+    button_text2int = gr.Button("text2int")
+    button_text2int_preprocessed = gr.Button("text2int with preprocessing")
+    output = gr.Textbox("Output integer as text:")
+    button_text2int.click(text2int, inputs=[textbox], outputs=[output])
+    button_text2int_preprocessed.click(text2int_preprocessed, inputs=[textbox], outputs=[output])
     gr.Markdown(r"""
 ## API