import numpy as np from PIL import Image, ImageDraw from pdf2image import convert_from_path import os import shutil import re import fitz import base64 def draw_boxes(image_path, boxes): image = Image.open(image_path) draw = ImageDraw.Draw(image) for box in boxes: draw.rectangle(box, outline="red", width=2) return image def pdf_to_images(pdf_path): images = convert_from_path(pdf_path) image_paths = [] for idx, image in enumerate(images): image_file_path = f"extract_tables/table_outputs/pdf-image-{idx + 1}.png" image.save(image_file_path, format="PNG") image_paths.append(image_file_path) return image_paths def parse_bboxs_gemini_flash(input_string): lines = [line for line in input_string.strip().split("\n") if line] bounding_boxes = [list(map(int, re.findall(r"\d+", line))) for line in lines] return bounding_boxes def convert_pdf_to_images(pdf_path): images = [] with fitz.open(pdf_path) as doc: for page_num in range(len(doc)): page = doc.load_page(page_num) pix = page.get_pixmap() images.append(pix) return images def encode_image_to_base64(image): image_bytes = image.tobytes() base64_encoded = base64.b64encode(image_bytes) base64_string = base64_encoded.decode("utf-8") return base64_string def calculate_scaling_factors(groundtruth_boxes, extracted_boxes): assert len(groundtruth_boxes) == len( extracted_boxes ), "Mismatch in the number of bounding boxes." x_factors = [] y_factors = [] for gt_box, ext_box in zip(groundtruth_boxes, extracted_boxes): gt_xmin, gt_ymin, gt_xmax, gt_ymax = gt_box ext_xmin, ext_ymin, ext_xmax, ext_ymax = ext_box gt_width = gt_xmax - gt_xmin gt_height = gt_ymax - gt_ymin ext_width = ext_xmax - ext_xmin ext_height = ext_ymax - ext_ymin x_factors.append(ext_width / gt_width) y_factors.append(ext_height / gt_height) x_scale = np.mean(x_factors) y_scale = np.mean(y_factors) return x_scale, y_scale def scale_bounding_boxes(extracted_boxes, scaling_factors): scaled_boxes = [] for page_boxes in extracted_boxes: scaled_page_boxes = [] for box in page_boxes: scaled_box = [ box[0] / scaling_factors[0], box[1] / scaling_factors[1], box[2] / scaling_factors[0], box[3] / scaling_factors[1], ] scaled_page_boxes.append(scaled_box) scaled_boxes.append(scaled_page_boxes) return scaled_boxes def clear_directory(directory_path): if os.path.exists(directory_path): for filename in os.listdir(directory_path): file_path = os.path.join(directory_path, filename) try: if os.path.isfile(file_path) or os.path.islink(file_path): os.unlink(file_path) elif os.path.isdir(file_path): shutil.rmtree(file_path) except Exception as e: print(f"Failed to delete {file_path}. Reason: {e}")