Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- datatypes/config.py +13 -0
- datatypes/datatypes.py +53 -0
datatypes/config.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
# config values
|
4 |
+
|
5 |
+
Config = {"table_detection_padding_pixel":10, 'table_recognition_padding_pixel':5,
|
6 |
+
'table_detection_threshold':0.7, 'table_recognition_threshold':0.8,
|
7 |
+
'table_padd': 20, 'row_padd':6, 'cell_padd':3,
|
8 |
+
}
|
9 |
+
|
10 |
+
tesseract_config = {'tesseractpath':'C://Program Files//Tesseract-OCR//tesseract.exe'}
|
11 |
+
|
12 |
+
model_config = {'detection_model_path':'D:/Table-detection/models/detection-model',
|
13 |
+
'recognition_model_path':'D:/Table-detection/models/recognition-model'}
|
datatypes/datatypes.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dataclasses import dataclass, field
|
3 |
+
from enum import Enum
|
4 |
+
from typing import List, Dict
|
5 |
+
|
6 |
+
class DetectionLabels(Enum):
|
7 |
+
table = 0
|
8 |
+
table_column = 1
|
9 |
+
table_row = 2
|
10 |
+
table_column_header = 3
|
11 |
+
table_projected_row_header = 4
|
12 |
+
table_spanning_cell = 5
|
13 |
+
|
14 |
+
class ExtractionContext(Enum):
|
15 |
+
document = 1
|
16 |
+
table = 2
|
17 |
+
row = 3
|
18 |
+
|
19 |
+
@dataclass
|
20 |
+
class Cell:
|
21 |
+
cellindex : int = 0
|
22 |
+
value : str = ''
|
23 |
+
prob : float = 0.5
|
24 |
+
|
25 |
+
@dataclass
|
26 |
+
class Row:
|
27 |
+
rowindex : int = 0
|
28 |
+
extracted_cells : List[Cell]= field(default_factory=lambda: [])
|
29 |
+
|
30 |
+
@dataclass
|
31 |
+
class TableRecognitionData:
|
32 |
+
scores : List = field(default_factory=lambda: [])
|
33 |
+
labels : List = field(default_factory=lambda: [])
|
34 |
+
boxes : List = field(default_factory=lambda: [])
|
35 |
+
|
36 |
+
@dataclass
|
37 |
+
class TableRecognitionOrdered:
|
38 |
+
recognized_row : List = field(default_factory=lambda: [])
|
39 |
+
recognized_column : List = field(default_factory=lambda: [])
|
40 |
+
|
41 |
+
@dataclass
|
42 |
+
class TableDetectionData:
|
43 |
+
detection_score : float = 0.0
|
44 |
+
detection_label : int = 0
|
45 |
+
detection_box : List = field(default_factory=lambda: [])
|
46 |
+
recognitiondata : TableRecognitionData = field(default_factory=lambda: [])
|
47 |
+
ordered_recognitiondata : List[TableRecognitionOrdered] = field(default_factory=lambda: [])
|
48 |
+
extracted_rows : List[Row] = field(default_factory=lambda: [])
|
49 |
+
|
50 |
+
|
51 |
+
@dataclass
|
52 |
+
class ImageData:
|
53 |
+
tables: List[TableDetectionData]
|