Spaces:

praysimanjuntak
/

extract-solar-data

Sleeping

App Files Files Community

praysimanjuntak commited on Jul 10

Commit

9d496b5

•

1 Parent(s): 4c44204

Create app.py

Browse files

Files changed (1) hide show

app.py +96 -0

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import numpy as np
+import supervision as sv
+from ultralytics import YOLO
+from tqdm import tqdm
+import re
+from collections import defaultdict
+from paddleocr import PaddleOCR
+from pdf2image import convert_from_path
+import json
+import cv2
+import gradio as gr
+# Initialize YOLO model
+model_yolo = YOLO(model="runs/detect/train/weights/best.pt")
+ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=False)
+def process_pdf(file):
+    images = convert_from_path(file.name)
+    # Function to process each slice of the image
+    def slicer_callback(slice: np.ndarray) -> sv.Detections:
+        result = model_yolo.predict(slice, conf=0.85)[0]
+        detections = sv.Detections.from_ultralytics(result)
+        return detections
+    # Initialize the slicer
+    slicer = sv.InferenceSlicer(
+        callback=slicer_callback,
+        slice_wh=(2000, 800),
+        overlap_ratio_wh=(0.6, 0.6),
+        overlap_filter_strategy=sv.OverlapFilter.NON_MAX_MERGE,
+        iou_threshold=0.05,
+    )
+    results = []
+    for pil_image in images:
+        opencvImage = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
+        opencvImage = cv2.rotate(opencvImage, cv2.ROTATE_90_CLOCKWISE)
+        # Perform inference on the entire image
+        detections = slicer(opencvImage)
+        # Function to run the TrOCR model with detections
+        def run_example(detections):
+            for detection in tqdm(detections):
+                # Extract bounding box coordinates
+                bbox = detection[0]
+                x_min, y_min, x_max, y_max = bbox
+                x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max)
+                # Crop the detected region from the image
+                cropped_image = opencvImage[y_min:y_max, x_min:x_max]
+                result = ocr.ocr(cropped_image, cls=True)[0]
+                if result is not None:
+                    text = ''
+                    if re.match(r"([A-Z])(\d+)-(\d+)", result[0][1][0]):
+                        text = result[0][1][0]
+                    elif re.match(r"([A-Z])(\d+)-(\d+)", ''.join([line[1][0] for line in result])):
+                        text = ''.join([line[1][0] for line in result])
+                    # Print the generated text
+                    results.append(text)
+        # Run example with detections
+        run_example(detections)
+    detected_numbers = defaultdict(list)
+    for result in results:
+        match = re.match(r"([A-Z])(\d+)-(\d+)", result)
+        if match:
+            letter = match.group(1)
+            x = int(match.group(2))
+            y = int(match.group(3))
+            detected_numbers[(letter, x)].append(y)
+    # Generate the desired JSON output
+    output = {}
+    for (letter, x) in sorted(detected_numbers.keys()):
+        key = f"CB-{letter}{x}"
+        value = [f"{letter}{x}-{i}" for i in sorted(detected_numbers[(letter, x)])]
+        output[key] = value
+    return json.dumps(output, indent=4)
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=process_pdf,
+    inputs=gr.File(label="Upload PDF"),
+    outputs="json",
+    title="Extract Data from PDF",
+    description="Upload a PDF file and get the JSON output of detected numbers."
+)
+# Launch the Gradio app
+iface.launch()