mathtext / app.py
hobs
test a lot of things like import spacy and preprocessing and text box labels
d625a73
raw
history blame
3.97 kB
import inspect
import json
import gradio as gr
from gradio import routes
import spacy
from typing import List, Type
ONES = [
"zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
"nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
"sixteen", "seventeen", "eighteen", "nineteen",
]
# token_mapping = json.load(open('str_mapping.json'))
CHAR_MAPPING = {
"-": " ",
"_": " ",
}
TOKEN_MAPPING = dict(enumerate(ONES))
def tokenize(text):
return text.split()
def detokenize(tokens):
return ' '.join(tokens)
def replace_tokens(tokens, token_mapping=TOKEN_MAPPING):
return [token_mapping.get(tok, tok) for tok in tokens]
def replace_chars(text, char_mapping=CHAR_MAPPING):
return ''.join((char_mapping.get(c, c) for c in text))
def preprocess(text):
return text.replace('-', ' ')
def tokens2int(tokens, numwords={}):
""" Convert an English str containing number words into an int
>>> text2int("nine")
9
>>> text2int("forty two")
42
>>> text2int("1 2 three")
123
"""
if not numwords:
tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
scales = ["hundred", "thousand", "million", "billion", "trillion"]
numwords["and"] = (1, 0)
for idx, word in enumerate(ONES):
numwords[word] = (1, idx)
for idx, word in enumerate(tens):
numwords[word] = (1, idx * 10)
for idx, word in enumerate(scales):
numwords[word] = (10 ** (idx * 3 or 2), 0)
current = result = 0
for word in tokens:
if word not in numwords:
raise Exception("Illegal word: " + word)
scale, increment = numwords[word]
current = current * scale + increment
if scale > 100:
result += current
current = 0
return str(result + current)
def text2int(text):
return tokens2int(tokenize(replace_chars(text)))
def text2int_preprocessed(text):
return tokens2int(replace_tokens(tokenize(replace_chars(text))))
def get_types(cls_set: List[Type], component: str):
docset = []
types = []
if component == "input":
for cls in cls_set:
doc = inspect.getdoc(cls)
doc_lines = doc.split("\n")
docset.append(doc_lines[1].split(":")[-1])
types.append(doc_lines[1].split(")")[0].split("(")[-1])
else:
for cls in cls_set:
doc = inspect.getdoc(cls)
doc_lines = doc.split("\n")
docset.append(doc_lines[-1].split(":")[-1])
types.append(doc_lines[-1].split(")")[0].split("(")[-1])
return docset, types
routes.get_types = get_types
with gr.Blocks() as html_block:
gr.Markdown("# Gradio Blocks (3.0) with REST API")
textbox = gr.Textbox("Input word number text:")
button_text2int = gr.Button("text2int")
button_text2int_preprocessed = gr.Button("text2int with preprocessing")
output = gr.Textbox("Output integer as text:")
button_text2int.click(text2int, inputs=[textbox], outputs=[output])
button_text2int_preprocessed.click(text2int_preprocessed, inputs=[textbox], outputs=[output])
gr.Markdown(r"""
## API
You can select which function to run using the `fn_index` argument:
```python
import requests
requests.post(
url="https://Hobson-gradio-rest-api.hf.space/api/predict/", json={"data": ["one hundred forty-two"], "fn_index": 0}
).json()
```
Or using `curl`:
```bash
curl -X POST https://Hobson-gradio-rest-api.hf.space/api/predict/ -H 'Content-Type: application/json' -d '{"data": ["one hundred forty-two"], "fn_index": 0}'
```
""")
interface = gr.Interface(lambda: None, inputs=[textbox], outputs=[output])
html_block.input_components = interface.input_components
html_block.output_components = interface.output_components
html_block.examples = None
html_block.predict_durations = []
bapp = html_block.launch()