docquery

Paused

App Files Files Community

Ankur Goyal commited on Aug 28, 2022

Commit

2359223

•

1 Parent(s): d229b67

Switch to Gradio

Browse files

Files changed (5) hide show

README.md +2 -4
app.py +137 -141
contract.jpeg +0 -0
invoice.png +0 -0
statement.png +0 -0

README.md CHANGED Viewed

@@ -3,10 +3,8 @@ title: DocQuery
 emoji: 🦉
 colorFrom: gray
 colorTo: pink
-sdk: streamlit
-sdk_version: 1.10.0
 app_file: app.py
 pinned: true
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 emoji: 🦉
 colorFrom: gray
 colorTo: pink
+sdk: gradio
+sdk_version: 3.1.7
 app_file: app.py
 pinned: true
 ---

app.py CHANGED Viewed

@@ -2,15 +2,13 @@ import os
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
-from PIL import ImageDraw
-import streamlit as st
-from st_clickable_images import clickable_images
-st.set_page_config(layout="wide")
 import torch
 from docquery.pipeline import get_pipeline
-from docquery.document import load_bytes, load_document
 def ensure_list(x):
@@ -25,15 +23,21 @@ CHECKPOINTS = {
     "Donut 🍩": "naver-clova-ix/donut-base-finetuned-docvqa",
 }
-@st.experimental_singleton(show_spinner=False)
 def construct_pipeline(model):
     device = "cuda" if torch.cuda.is_available() else "cpu"
     ret = get_pipeline(checkpoint=CHECKPOINTS[model], device=device)
     return ret
-@st.cache(show_spinner=False)
 def run_pipeline(model, question, document, top_k):
     pipeline = construct_pipeline(model)
     return pipeline(question=question, **document.context, top_k=top_k)
@@ -59,150 +63,142 @@ def normalize_bbox(box, width, height):
     return [pct[0] * width, pct[1] * height, pct[2] * width, pct[3] * height]
-st.markdown("# DocQuery: Query Documents w/ NLP")
-if "document" not in st.session_state:
-    st.session_state["document"] = None
-if "last_clicked" not in st.session_state:
-    st.session_state["last_clicked"] = None
-input_col, model_col = st.columns(2)
-with input_col:
-    input_type = st.radio(
-        "Pick an input type", ["Upload", "URL", "Examples"], horizontal=True
-    )
-with model_col:
-    model_type = st.radio("Pick a model", list(CHECKPOINTS.keys()), horizontal=True)
-def load_file_cb():
-    if st.session_state.file_input is None:
-        return
-    file = st.session_state.file_input
-    with loading_placeholder:
-        with st.spinner("Processing..."):
-            document = load_bytes(file, file.name)
-            _ = document.context
-            st.session_state.document = document
-def load_url_cb():
-    if st.session_state.url_input is None:
-        return
-    url = st.session_state.url_input
-    with loading_placeholder:
-        with st.spinner("Downloading..."):
-            document = load_document(url)
-        with st.spinner("Processing..."):
-            _ = document.context
-        st.session_state.document = document
-examples = [
-    (
-        "https://templates.invoicehome.com/invoice-template-us-neat-750px.png",
-        "What is the invoice number?",
-    ),
-    (
-        "https://miro.medium.com/max/787/1*iECQRIiOGTmEFLdWkVIH2g.jpeg",
-        "What is the purchase amount?",
-    ),
-    (
-        "https://www.accountingcoach.com/wp-content/uploads/2013/10/[email protected]",
-        "What are net sales for 2020?",
-    ),
-]
-imgs_clicked = []
-if input_type == "Upload":
-    file = st.file_uploader(
-        "Upload a PDF or Image document", key="file_input", on_change=load_file_cb
     )
-elif input_type == "URL":
-    url = st.text_input("URL", "", key="url_input", on_change=load_url_cb)
-elif input_type == "Examples":
-    example_cols = st.columns(len(examples))
-    for (i, (path, question)) in enumerate(examples):
-        with example_cols[i]:
-            imgs_clicked.append(
-                clickable_images(
-                    [path],
-                    div_style={
-                        "display": "flex",
-                        "justify-content": "center",
-                        "flex-wrap": "wrap",
-                        "cursor": "pointer",
-                    },
-                    img_style={"margin": "5px", "height": "200px"},
-                )
-            )
-            st.markdown(
-                f"<p style='text-align: center'>{question}</p>",
-                unsafe_allow_html=True,
-            )
-print(imgs_clicked)
-imgs_clicked = [-1] * len(imgs_clicked)
-#    clicked = clickable_images(
-#        [x[0] for x in examples],
-#        titles=[x[1] for x in examples],
-#        div_style={"display": "flex", "justify-content": "center", "flex-wrap": "wrap"},
-#        img_style={"margin": "5px", "height": "200px"},
-#    )
-#
-#    st.markdown(f"Image #{clicked} clicked" if clicked > -1 else "No image clicked")
-question = st.text_input("QUESTION", "", key="question")
-document = st.session_state.document
-loading_placeholder = st.empty()
-if document is not None:
-    col1, col2 = st.columns(2)
-    image = document.preview
-question = st.session_state.question
-colors = ["blue", "red", "green"]
-if document is not None and question is not None and len(question) > 0:
-    col2.header(f"Answers ({model_type})")
-    with col2:
-        answers_placeholder = st.container()
-        answers_loading_placeholder = st.container()
-        with answers_loading_placeholder:
-            # Run this (one-time) expensive operation outside of the processing
-            # question placeholder
-            with st.spinner("Constructing pipeline..."):
-                construct_pipeline(model_type)
-            with st.spinner("Processing question..."):
-                predictions = run_pipeline(
-                    model=model_type, question=question, document=document, top_k=1
-                )
-        with answers_placeholder:
-            image = image.copy()
-            draw = ImageDraw.Draw(image)
-            for i, p in enumerate(ensure_list(predictions)):
-                col2.markdown(f"#### { p['answer'] }: ({round(p['score'] * 100, 1)}%)")
-                if "start" in p and "end" in p:
-                    x1, y1, x2, y2 = normalize_bbox(
-                        expand_bbox(
-                            lift_word_boxes(document)[p["start"] : p["end"] + 1]
-                        ),
-                        image.width,
-                        image.height,
-                    )
-                    draw.rectangle(((x1, y1), (x2, y2)), outline=colors[i], width=3)
-if document is not None:
-    col1.image(image, use_column_width="auto")
-"DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question answering dataset, as well as SQuAD, which boosts its English-language comprehension. To use it, simply upload an image or PDF, type a question, and click 'submit', or click one of the examples to load them."
-"[Github Repo](https://github.com/impira/docquery)"

 os.environ["TOKENIZERS_PARALLELISM"] = "false"
+import functools
+from PIL import Image, ImageDraw
+import gradio as gr
 import torch
 from docquery.pipeline import get_pipeline
+from docquery.document import load_bytes, load_document, ImageDocument
 def ensure_list(x):
     "Donut 🍩": "naver-clova-ix/donut-base-finetuned-docvqa",
 }
+PIPELINES = {}
 def construct_pipeline(model):
+    global PIPELINES
+    if model in PIPELINES:
+        return PIPELINES[model]
     device = "cuda" if torch.cuda.is_available() else "cpu"
     ret = get_pipeline(checkpoint=CHECKPOINTS[model], device=device)
+    PIPELINES[model] = ret
     return ret
+@functools.lru_cache(1024)
 def run_pipeline(model, question, document, top_k):
     pipeline = construct_pipeline(model)
     return pipeline(question=question, **document.context, top_k=top_k)
     return [pct[0] * width, pct[1] * height, pct[2] * width, pct[3] * height]
+examples = [
+    [
+        "invoice.png",
+        "What is the invoice number?",
+    ],
+    [
+        "contract.jpeg",
+        "What is the purchase amount?",
+    ],
+    [
+        "statement.png",
+        "What are net sales for 2020?",
+    ],
+]
+def process_path(path):
+    if path:
+        try:
+            document = load_document(path)
+            return document, document.preview, None
+        except Exception:
+            pass
+    return None, None, None
+def process_upload(file):
+    if file:
+        return process_path(file.name)
+    else:
+        return None, None, None
+colors = ["blue", "green", "black"]
+def process_question(question, document, model=list(CHECKPOINTS.keys())[0]):
+    if document is None:
+        return None, None
+    predictions = run_pipeline(model, question, document, 3)
+    image = document.preview.copy()
+    draw = ImageDraw.Draw(image)
+    for i, p in enumerate(ensure_list(predictions)):
+        if i > 0:
+            # Keep the code around to produce multiple boxes, but only show the top
+            # prediction for now
+            break
+        if "start" in p and "end" in p:
+            x1, y1, x2, y2 = normalize_bbox(
+                expand_bbox(lift_word_boxes(document)[p["start"] : p["end"] + 1]),
+                image.width,
+                image.height,
+            )
+            draw.rectangle(((x1, y1), (x2, y2)), outline=colors[i], width=2)
+    return image, predictions
+def load_example_document(img, question, model):
+    document = ImageDocument(Image.fromarray(img))
+    preview, answer = process_question(question, document, model)
+    return document, question, preview, answer
+with gr.Blocks() as demo:
+    gr.Markdown("# DocQuery: Query Documents w/ NLP")
+    document = gr.Variable()
+    example_question = gr.Textbox(visible=False)
+    example_image = gr.Image(visible=False)
+    gr.Markdown("## 1. Upload a file or select an example")
+    with gr.Row(equal_height=True):
+        with gr.Column():
+            upload = gr.File(label="Upload a file", interactive=True)
+            url = gr.Textbox(label="... or a URL", interactive=True)
+        gr.Examples(
+            examples=examples,
+            inputs=[example_image, example_question],
+        )
+    gr.Markdown("## 2. Ask a question")
+    with gr.Row(equal_height=True):
+        # NOTE: When https://github.com/gradio-app/gradio/issues/2103 is resolved,
+        # we can support enter-key submit
+        question = gr.Textbox(
+            label="Question", placeholder="e.g. What is the invoice number?"
+        )
+        model = gr.Radio(
+            choices=list(CHECKPOINTS.keys()),
+            value=list(CHECKPOINTS.keys())[0],
+            label="Model",
+        )
+    with gr.Row():
+        clear_button = gr.Button("Clear", variant="secondary")
+        submit_button = gr.Button("Submit", variant="primary", elem_id="submit-button")
+    with gr.Row():
+        image = gr.Image(visible=True)
+        with gr.Column():
+            output = gr.JSON(label="Output")
+    clear_button.click(
+        lambda _: (None, None, None, None),
+        inputs=clear_button,
+        outputs=[image, document, question, output],
+    )
+    upload.change(fn=process_upload, inputs=[upload], outputs=[document, image, output])
+    url.change(fn=process_path, inputs=[url], outputs=[document, image, output])
+    submit_button.click(
+        process_question,
+        inputs=[question, document, model],
+        outputs=[image, output],
+    )
+    # This is handy but commented out for now because we can't "auto submit" questions either
+    # model.change(
+    #    process_question, inputs=[question, document, model], outputs=[image, output]
+    # )
+    example_image.change(
+        fn=load_example_document,
+        inputs=[example_image, example_question, model],
+        outputs=[document, question, image, output],
     )
+    gr.Markdown("### More Info")
+    gr.Markdown("DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question"
+    " answering dataset, as well as SQuAD, which boosts its English-language comprehension."
+    " To use it, simply upload an image or PDF, type a question, and click 'submit', or "
+    " click one of the examples to load them.")
+    gr.Markdown("[Github Repo](https://github.com/impira/docquery)")
+if __name__ == "__main__":
+    demo.launch(debug=True)

contract.jpeg ADDED Viewed

invoice.png ADDED Viewed

statement.png ADDED Viewed