Spaces:

praysimanjuntak
/

extract-solar-data

Sleeping

File size: 3,260 Bytes

9d496b5

import numpy as np
import supervision as sv
from ultralytics import YOLO
from tqdm import tqdm
import re
from collections import defaultdict
from paddleocr import PaddleOCR
from pdf2image import convert_from_path
import json
import cv2
import gradio as gr

# Initialize YOLO model
model_yolo = YOLO(model="runs/detect/train/weights/best.pt")
ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=False)

def process_pdf(file):
    images = convert_from_path(file.name)

    # Function to process each slice of the image
    def slicer_callback(slice: np.ndarray) -> sv.Detections:
        result = model_yolo.predict(slice, conf=0.85)[0]
        detections = sv.Detections.from_ultralytics(result)
        return detections

    # Initialize the slicer
    slicer = sv.InferenceSlicer(
        callback=slicer_callback,
        slice_wh=(2000, 800),
        overlap_ratio_wh=(0.6, 0.6),
        overlap_filter_strategy=sv.OverlapFilter.NON_MAX_MERGE,
        iou_threshold=0.05,
    )

    results = []
    for pil_image in images:
        opencvImage = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
        opencvImage = cv2.rotate(opencvImage, cv2.ROTATE_90_CLOCKWISE)
        # Perform inference on the entire image
        detections = slicer(opencvImage)

        # Function to run the TrOCR model with detections
        def run_example(detections):
            for detection in tqdm(detections):
                # Extract bounding box coordinates
                bbox = detection[0]
                x_min, y_min, x_max, y_max = bbox
                x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max)

                # Crop the detected region from the image
                cropped_image = opencvImage[y_min:y_max, x_min:x_max]
                result = ocr.ocr(cropped_image, cls=True)[0]
                if result is not None:
                    text = ''

                    if re.match(r"([A-Z])(\d+)-(\d+)", result[0][1][0]):
                        text = result[0][1][0]
                    elif re.match(r"([A-Z])(\d+)-(\d+)", ''.join([line[1][0] for line in result])):
                        text = ''.join([line[1][0] for line in result])

                    # Print the generated text
                    results.append(text)

        # Run example with detections
        run_example(detections)

    detected_numbers = defaultdict(list)
    for result in results:
        match = re.match(r"([A-Z])(\d+)-(\d+)", result)
        if match:
            letter = match.group(1)
            x = int(match.group(2))
            y = int(match.group(3))
            detected_numbers[(letter, x)].append(y)

    # Generate the desired JSON output
    output = {}

    for (letter, x) in sorted(detected_numbers.keys()):
        key = f"CB-{letter}{x}"
        value = [f"{letter}{x}-{i}" for i in sorted(detected_numbers[(letter, x)])]
        output[key] = value

    return json.dumps(output, indent=4)

# Create the Gradio interface
iface = gr.Interface(
    fn=process_pdf,
    inputs=gr.File(label="Upload PDF"),
    outputs="json",
    title="Extract Data from PDF",
    description="Upload a PDF file and get the JSON output of detected numbers."
)

# Launch the Gradio app
iface.launch()