Spaces:

praysimanjuntak
/

extract-solar-data

Sleeping

App Files Files Community

extract-solar-data / app.py

praysimanjuntak

Update app.py

3cb12d8 verified 4 months ago

raw

history blame

3.24 kB

	import numpy as np
	import supervision as sv
	from ultralytics import YOLO
	from tqdm import tqdm
	import re
	from collections import defaultdict
	from paddleocr import PaddleOCR
	from pdf2image import convert_from_path
	import json
	import cv2
	import gradio as gr

	# Initialize YOLO model
	model_yolo = YOLO(model="yolov8n-box.pt")
	ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=False)

	def process_pdf(file):
	images = convert_from_path(file.name)

	# Function to process each slice of the image
	def slicer_callback(slice: np.ndarray) -> sv.Detections:
	result = model_yolo.predict(slice, conf=0.85)[0]
	detections = sv.Detections.from_ultralytics(result)
	return detections

	# Initialize the slicer
	slicer = sv.InferenceSlicer(
	callback=slicer_callback,
	slice_wh=(2000, 800),
	overlap_ratio_wh=(0.6, 0.6),
	overlap_filter_strategy=sv.OverlapFilter.NON_MAX_MERGE,
	iou_threshold=0.05,
	)

	results = []
	for pil_image in images:
	opencvImage = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
	opencvImage = cv2.rotate(opencvImage, cv2.ROTATE_90_CLOCKWISE)
	# Perform inference on the entire image
	detections = slicer(opencvImage)

	# Function to run the TrOCR model with detections
	def run_example(detections):
	for detection in tqdm(detections):
	# Extract bounding box coordinates
	bbox = detection[0]
	x_min, y_min, x_max, y_max = bbox
	x_min, y_min, x_max, y_max = int(x_min), int(y_min), int(x_max), int(y_max)

	# Crop the detected region from the image
	cropped_image = opencvImage[y_min:y_max, x_min:x_max]
	result = ocr.ocr(cropped_image, cls=True)[0]
	if result is not None:
	text = ''

	if re.match(r"([A-Z])(\d+)-(\d+)", result[0][1][0]):
	text = result[0][1][0]
	elif re.match(r"([A-Z])(\d+)-(\d+)", ''.join([line[1][0] for line in result])):
	text = ''.join([line[1][0] for line in result])

	# Print the generated text
	results.append(text)

	# Run example with detections
	run_example(detections)

	detected_numbers = defaultdict(list)
	for result in results:
	match = re.match(r"([A-Z])(\d+)-(\d+)", result)
	if match:
	letter = match.group(1)
	x = int(match.group(2))
	y = int(match.group(3))
	detected_numbers[(letter, x)].append(y)

	# Generate the desired JSON output
	output = {}

	for (letter, x) in sorted(detected_numbers.keys()):
	key = f"CB-{letter}{x}"
	value = [f"{letter}{x}-{i}" for i in sorted(detected_numbers[(letter, x)])]
	output[key] = value

	return json.dumps(output, indent=4)

	# Create the Gradio interface
	iface = gr.Interface(
	fn=process_pdf,
	inputs=gr.File(label="Upload PDF"),
	outputs="json",
	title="Extract Data from PDF",
	description="Upload a PDF file and get the JSON output of detected numbers."
	)

	# Launch the Gradio app
	iface.launch()