Spaces:
Running
Running
add japan support lang, change to ppocr-v4, fix several bugs related padding
Browse files- app.py +48 -13
- test_pdf2img.py +16 -0
app.py
CHANGED
@@ -3,6 +3,7 @@ import string
|
|
3 |
import random
|
4 |
from collections import Counter
|
5 |
from itertools import count, tee
|
|
|
6 |
|
7 |
import cv2
|
8 |
import matplotlib.pyplot as plt
|
@@ -14,7 +15,7 @@ from PIL import Image
|
|
14 |
from transformers import DetrImageProcessor, TableTransformerForObjectDetection
|
15 |
from paddleocr import PaddleOCR
|
16 |
|
17 |
-
ocr = PaddleOCR(use_angle_cls=True, lang="en",use_gpu=False)
|
18 |
|
19 |
st.set_option('deprecation.showPyplotGlobalUse', False)
|
20 |
st.set_page_config(layout='wide')
|
@@ -28,6 +29,10 @@ table_detection_model = TableTransformerForObjectDetection.from_pretrained(
|
|
28 |
table_recognition_model = TableTransformerForObjectDetection.from_pretrained(
|
29 |
"microsoft/table-transformer-structure-recognition")
|
30 |
|
|
|
|
|
|
|
|
|
31 |
|
32 |
def PIL_to_cv(pil_img):
|
33 |
return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
|
@@ -201,6 +206,32 @@ class TableExtractionPipeline():
|
|
201 |
result.paste(pil_img, (left, top))
|
202 |
return result
|
203 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
def plot_results_detection(self, c1, model, pil_img, prob, boxes,
|
205 |
delta_xmin, delta_ymin, delta_xmax, delta_ymax):
|
206 |
'''
|
@@ -213,7 +244,7 @@ class TableExtractionPipeline():
|
|
213 |
|
214 |
for p, (xmin, ymin, xmax, ymax) in zip(prob, boxes.tolist()):
|
215 |
cl = p.argmax()
|
216 |
-
xmin, ymin, xmax, ymax = xmin
|
217 |
ax.add_patch(
|
218 |
plt.Rectangle((xmin, ymin),
|
219 |
xmax - xmin,
|
@@ -238,8 +269,7 @@ class TableExtractionPipeline():
|
|
238 |
cropped_img_list = []
|
239 |
|
240 |
for p, (xmin, ymin, xmax, ymax) in zip(prob, boxes.tolist()):
|
241 |
-
|
242 |
-
xmin, ymin, xmax, ymax = xmin - delta_xmin, ymin - delta_ymin, xmax + delta_xmax, ymax + delta_ymax
|
243 |
cropped_img = pil_img.crop((xmin, ymin, xmax, ymax))
|
244 |
cropped_img_list.append(cropped_img)
|
245 |
|
@@ -412,7 +442,8 @@ class TableExtractionPipeline():
|
|
412 |
|
413 |
@st.cache
|
414 |
def convert_df(self, df):
|
415 |
-
|
|
|
416 |
|
417 |
def create_dataframe(self, c3, cell_ocr_res: list, max_cols: int,
|
418 |
max_rows: int):
|
@@ -456,15 +487,15 @@ class TableExtractionPipeline():
|
|
456 |
csv = self.convert_df(df)
|
457 |
|
458 |
try:
|
459 |
-
numkey = df.iloc[0, 0]
|
460 |
-
except:
|
461 |
numkey = str(0)
|
462 |
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
|
469 |
return df
|
470 |
|
@@ -548,7 +579,11 @@ class TableExtractionPipeline():
|
|
548 |
|
549 |
if __name__ == "__main__":
|
550 |
|
551 |
-
|
|
|
|
|
|
|
|
|
552 |
st1, st2, st3 = st.columns((1, 1, 1))
|
553 |
TD_th = st1.slider('Table detection threshold', 0.0, 1.0, 0.8)
|
554 |
TSR_th = st2.slider('Table structure recognition threshold', 0.0, 1.0, 0.7)
|
|
|
3 |
import random
|
4 |
from collections import Counter
|
5 |
from itertools import count, tee
|
6 |
+
import base64
|
7 |
|
8 |
import cv2
|
9 |
import matplotlib.pyplot as plt
|
|
|
15 |
from transformers import DetrImageProcessor, TableTransformerForObjectDetection
|
16 |
from paddleocr import PaddleOCR
|
17 |
|
18 |
+
ocr = PaddleOCR(use_angle_cls=True, lang="en", use_gpu=False, ocr_version='PP-OCRv4')
|
19 |
|
20 |
st.set_option('deprecation.showPyplotGlobalUse', False)
|
21 |
st.set_page_config(layout='wide')
|
|
|
29 |
table_recognition_model = TableTransformerForObjectDetection.from_pretrained(
|
30 |
"microsoft/table-transformer-structure-recognition")
|
31 |
|
32 |
+
def reload_ocr(vlang):
|
33 |
+
global ocr
|
34 |
+
ocr = PaddleOCR(use_angle_cls=True, lang=vlang, use_gpu=False, ocr_version='PP-OCRv4')
|
35 |
+
|
36 |
|
37 |
def PIL_to_cv(pil_img):
|
38 |
return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
|
|
|
206 |
result.paste(pil_img, (left, top))
|
207 |
return result
|
208 |
|
209 |
+
@staticmethod
|
210 |
+
def dynamic_delta(xmin, ymin, xmax, ymax, delta_xmin, delta_ymin, delta_xmax, delta_ymax, pil_img):
|
211 |
+
offset_x = (xmax - xmin) * 0.05
|
212 |
+
offset_y = (ymax - ymin) * 0.05
|
213 |
+
|
214 |
+
w_img, h_img = pil_img.size
|
215 |
+
|
216 |
+
doxmin = xmin - (delta_xmin + offset_x)
|
217 |
+
if (doxmin < 0):
|
218 |
+
doxmin = 0
|
219 |
+
|
220 |
+
doymin = ymin - (delta_ymin + offset_y)
|
221 |
+
if (doymin < 0):
|
222 |
+
doymin = 0
|
223 |
+
|
224 |
+
doxmax = xmax + (delta_xmax + offset_x)
|
225 |
+
if (doxmax > w_img):
|
226 |
+
doxmax = w_img
|
227 |
+
|
228 |
+
doymax = ymax + (delta_ymax + offset_y)
|
229 |
+
if (doymax > h_img):
|
230 |
+
doymax = h_img
|
231 |
+
|
232 |
+
|
233 |
+
return doxmin, doymin, doxmax, doymax
|
234 |
+
|
235 |
def plot_results_detection(self, c1, model, pil_img, prob, boxes,
|
236 |
delta_xmin, delta_ymin, delta_xmax, delta_ymax):
|
237 |
'''
|
|
|
244 |
|
245 |
for p, (xmin, ymin, xmax, ymax) in zip(prob, boxes.tolist()):
|
246 |
cl = p.argmax()
|
247 |
+
xmin, ymin, xmax, ymax = self.dynamic_delta(xmin, ymin, xmax, ymax, delta_xmin, delta_ymin, delta_xmax, delta_ymax, pil_img)
|
248 |
ax.add_patch(
|
249 |
plt.Rectangle((xmin, ymin),
|
250 |
xmax - xmin,
|
|
|
269 |
cropped_img_list = []
|
270 |
|
271 |
for p, (xmin, ymin, xmax, ymax) in zip(prob, boxes.tolist()):
|
272 |
+
xmin, ymin, xmax, ymax = self.dynamic_delta(xmin, ymin, xmax, ymax, delta_xmin, delta_ymin, delta_xmax, delta_ymax, pil_img)
|
|
|
273 |
cropped_img = pil_img.crop((xmin, ymin, xmax, ymax))
|
274 |
cropped_img_list.append(cropped_img)
|
275 |
|
|
|
442 |
|
443 |
@st.cache
|
444 |
def convert_df(self, df):
|
445 |
+
csv = df.to_csv(index=False, encoding='utf-8-sig') # utf-8-sig to handle BOM for Excel
|
446 |
+
return csv.encode('utf-8')
|
447 |
|
448 |
def create_dataframe(self, c3, cell_ocr_res: list, max_cols: int,
|
449 |
max_rows: int):
|
|
|
487 |
csv = self.convert_df(df)
|
488 |
|
489 |
try:
|
490 |
+
numkey = str(df.iloc[0, 0])
|
491 |
+
except IndexError:
|
492 |
numkey = str(0)
|
493 |
|
494 |
+
# Create a download link with filename and extension
|
495 |
+
filename = f"table_{numkey}.csv" # Adjust the filename as needed
|
496 |
+
b64_csv = base64.b64encode(csv).decode() # Encode CSV data to base64
|
497 |
+
href = f'<a href="data:file/csv;base64,{b64_csv}" download="{filename}">Download {filename}</a>'
|
498 |
+
c3.markdown(href, unsafe_allow_html=True)
|
499 |
|
500 |
return df
|
501 |
|
|
|
579 |
|
580 |
if __name__ == "__main__":
|
581 |
|
582 |
+
st_up, st_lang = st.columns((1, 1))
|
583 |
+
img_name = st_up.file_uploader("Upload an image with table(s)")
|
584 |
+
lang = st_lang.selectbox('Language', ('en', 'japan'))
|
585 |
+
reload_ocr(lang)
|
586 |
+
|
587 |
st1, st2, st3 = st.columns((1, 1, 1))
|
588 |
TD_th = st1.slider('Table detection threshold', 0.0, 1.0, 0.8)
|
589 |
TSR_th = st2.slider('Table structure recognition threshold', 0.0, 1.0, 0.7)
|
test_pdf2img.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pdf2image import convert_from_path
|
3 |
+
|
4 |
+
# Set the PDF file path
|
5 |
+
pdf_path = 'test.pdf'
|
6 |
+
|
7 |
+
# Convert the first page of the PDF to a JPEG image
|
8 |
+
first = 14
|
9 |
+
last = 14
|
10 |
+
images = convert_from_path(pdf_path, dpi=300, first_page=first, last_page=last, poppler_path=r"C:\poppler-23.07.0\Library\bin")
|
11 |
+
|
12 |
+
# Save the image file
|
13 |
+
image_path = os.path.splitext(pdf_path)[0]
|
14 |
+
|
15 |
+
for index, image in enumerate(images):
|
16 |
+
image.save(image_path + "p" + str(index+first) + '.jpg', 'JPEG')
|