Spaces:
Runtime error
Runtime error
update space
Browse files- .gitignore +2 -0
- app.py +37 -15
- requirements.txt +1 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
/.env
|
2 |
+
/dummy.py
|
app.py
CHANGED
@@ -1,14 +1,26 @@
|
|
|
|
1 |
import os
|
2 |
-
os
|
|
|
|
|
3 |
|
4 |
-
# work around: https://discuss.huggingface.co/t/how-to-install-a-specific-version-of-gradio-in-spaces/13552
|
5 |
-
os.system("pip uninstall -y gradio")
|
6 |
-
os.system("pip install gradio==3.4.1")
|
7 |
-
os.system("pip install packaging==21.3")
|
8 |
-
os.system(os.environ["DD_ADDONS"])
|
9 |
|
10 |
-
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
import deepdoctection as dd
|
14 |
from deepdoctection.dataflow.serialize import DataFromList
|
@@ -21,7 +33,10 @@ from dd_addons.extern.openai import OpenAiLmmTokenClassifier, is_api_key_valid
|
|
21 |
|
22 |
import gradio as gr
|
23 |
|
24 |
-
|
|
|
|
|
|
|
25 |
|
26 |
demo = gr.Blocks(css="scrollbar.css")
|
27 |
|
@@ -30,6 +45,9 @@ def process_analyzer(openai_api_key, categories_str, instruction_str, img, pdf,
|
|
30 |
if not is_api_key_valid(openai_api_key):
|
31 |
return [], {}, "You have entered no or an invalid api key. Please enter a valid api key"
|
32 |
categories_list = categories_str.split(",")
|
|
|
|
|
|
|
33 |
register_string_categories_from_list(categories_list, "custom_token_classes")
|
34 |
custom_token_class = dd.object_types_registry.get("custom_token_classes")
|
35 |
print([token_class for token_class in custom_token_class])
|
@@ -61,13 +79,15 @@ def process_analyzer(openai_api_key, categories_str, instruction_str, img, pdf,
|
|
61 |
|
62 |
json_out = {}
|
63 |
dpts = []
|
|
|
64 |
|
65 |
for idx, dp in enumerate(df):
|
66 |
dpts.append(dp)
|
67 |
json_out[f"page_{idx}"] = dp.get_token()
|
|
|
68 |
|
69 |
return [dp.viz(show_cells=False, show_layouts=False, show_tables=False, show_words=True, show_token_class=True, ignore_default_token_class=True)
|
70 |
-
for dp in dpts], json_out, "No error"
|
71 |
|
72 |
|
73 |
with demo:
|
@@ -125,18 +145,20 @@ with demo:
|
|
125 |
with gr.Box():
|
126 |
gr.Markdown("<center><strong>JSON</strong></center>")
|
127 |
json = gr.JSON()
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
with gr.Column():
|
129 |
with gr.Box():
|
130 |
gr.Markdown("<center><strong>Layout detection</strong></center>")
|
131 |
gallery = gr.Gallery(
|
132 |
label="Output images", show_label=False, elem_id="gallery"
|
133 |
).style(grid=2)
|
134 |
-
with gr.Row():
|
135 |
-
with gr.Box():
|
136 |
-
gr.Markdown("<center><strong>Table</strong></center>")
|
137 |
-
html = gr.HTML()
|
138 |
|
139 |
btn.click(fn=process_analyzer, inputs=[user_token, categories, instruction, inputs, inputs_pdf, max_imgs],
|
140 |
-
outputs=[gallery, json, msg])
|
141 |
|
142 |
demo.launch()
|
|
|
1 |
+
import time
|
2 |
import os
|
3 |
+
from os import getcwd, path
|
4 |
+
import importlib.metadata
|
5 |
+
from dotenv import load_dotenv
|
6 |
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
+
def check_additional_requirements():
|
9 |
+
if importlib.util.find_spec("detectron2") is None:
|
10 |
+
os.system('pip install detectron2@git+https://github.com/facebookresearch/detectron2.git')
|
11 |
+
if importlib.util.find_spec("gradio") is not None:
|
12 |
+
if importlib.metadata.version("gradio")!="3.4.1":
|
13 |
+
os.system("pip uninstall -y gradio")
|
14 |
+
os.system("pip install gradio==3.4.1")
|
15 |
+
else:
|
16 |
+
os.system("pip install gradio==3.4.1")
|
17 |
+
os.system(os.environ["DD_ADDONS"])
|
18 |
+
return
|
19 |
+
|
20 |
+
|
21 |
+
load_dotenv()
|
22 |
+
check_additional_requirements()
|
23 |
+
|
24 |
|
25 |
import deepdoctection as dd
|
26 |
from deepdoctection.dataflow.serialize import DataFromList
|
|
|
33 |
|
34 |
import gradio as gr
|
35 |
|
36 |
+
dd.Page.add_attribute_name("raw_json_output")
|
37 |
+
analyzer = get_loader(reset_config_file=True, config_overwrite=["OCR.USE_TESSERACT=False",
|
38 |
+
"OCR.USE_TEXTRACT=True",
|
39 |
+
"WORD_MATCHING.MAX_PARENT_ONLY=True"])
|
40 |
|
41 |
demo = gr.Blocks(css="scrollbar.css")
|
42 |
|
|
|
45 |
if not is_api_key_valid(openai_api_key):
|
46 |
return [], {}, "You have entered no or an invalid api key. Please enter a valid api key"
|
47 |
categories_list = categories_str.split(",")
|
48 |
+
if not categories_str:
|
49 |
+
return [], {}, "You did not enter any entities. Please enter a at least one category."
|
50 |
+
|
51 |
register_string_categories_from_list(categories_list, "custom_token_classes")
|
52 |
custom_token_class = dd.object_types_registry.get("custom_token_classes")
|
53 |
print([token_class for token_class in custom_token_class])
|
|
|
79 |
|
80 |
json_out = {}
|
81 |
dpts = []
|
82 |
+
json_out_raw = {}
|
83 |
|
84 |
for idx, dp in enumerate(df):
|
85 |
dpts.append(dp)
|
86 |
json_out[f"page_{idx}"] = dp.get_token()
|
87 |
+
json_out_raw[f"page_{idx}"] = dp.raw_json_output
|
88 |
|
89 |
return [dp.viz(show_cells=False, show_layouts=False, show_tables=False, show_words=True, show_token_class=True, ignore_default_token_class=True)
|
90 |
+
for dp in dpts], json_out, json_out_raw, "No error"
|
91 |
|
92 |
|
93 |
with demo:
|
|
|
145 |
with gr.Box():
|
146 |
gr.Markdown("<center><strong>JSON</strong></center>")
|
147 |
json = gr.JSON()
|
148 |
+
with gr.Box():
|
149 |
+
gr.Markdown("<center><strong>ChatGPT output. </strong> <br />"
|
150 |
+
"It is possible that ChatGPT answers in an unexpected way, "
|
151 |
+
"such that the answer cannot be properly processed. In this case you might get"
|
152 |
+
"an empty JSON but you can still see the raw output.</center>")
|
153 |
+
json_raw = gr.JSON()
|
154 |
with gr.Column():
|
155 |
with gr.Box():
|
156 |
gr.Markdown("<center><strong>Layout detection</strong></center>")
|
157 |
gallery = gr.Gallery(
|
158 |
label="Output images", show_label=False, elem_id="gallery"
|
159 |
).style(grid=2)
|
|
|
|
|
|
|
|
|
160 |
|
161 |
btn.click(fn=process_analyzer, inputs=[user_token, categories, instruction, inputs, inputs_pdf, max_imgs],
|
162 |
+
outputs=[gallery, json, json_raw, msg])
|
163 |
|
164 |
demo.launch()
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
Pillow==9.5.0
|
2 |
torch==1.12.0
|
3 |
torchvision==0.13.0
|
|
|
1 |
+
python-dotenv
|
2 |
Pillow==9.5.0
|
3 |
torch==1.12.0
|
4 |
torchvision==0.13.0
|