import numpy as np from PIL import Image, ImageDraw from pdf2image import convert_from_path import os import shutil import re import fitz import base64 def draw_boxes(image_path, boxes): image = Image.open(image_path) draw = ImageDraw.Draw(image) for box in boxes: draw.rectangle(box, outline="red", width=2) return image def pdf_to_images( pdf_path, output_dir="extract_tables/table_outputs", output_format="png" ): if not os.path.exists(output_dir): os.makedirs(output_dir) pdf_document = fitz.open(pdf_path) image_paths = [] for page_num in range(len(pdf_document)): page = pdf_document.load_page(page_num) pix = page.get_pixmap(dpi=300) image_file_path = os.path.join( output_dir, f"pdf-image-{page_num + 1}.{output_format}" ) try: pix.save(image_file_path) image_paths.append(image_file_path) except Exception as e: print(f"Error saving image {image_file_path}: {e}") return image_paths def parse_bboxs_gemini_flash(input_string): lines = [line for line in input_string.strip().split("\n") if line] bounding_boxes = [list(map(int, re.findall(r"\d+", line))) for line in lines] return bounding_boxes def convert_pdf_to_images(pdf_path): images = [] with fitz.open(pdf_path) as doc: for page_num in range(len(doc)): page = doc.load_page(page_num) pix = page.get_pixmap() images.append(pix) return images def encode_image_to_base64(image): image_bytes = image.tobytes() base64_encoded = base64.b64encode(image_bytes) base64_string = base64_encoded.decode("utf-8") return base64_string def calculate_scaling_factors(groundtruth_boxes, extracted_boxes): assert len(groundtruth_boxes) == len( extracted_boxes ), "Mismatch in the number of bounding boxes." x_factors = [] y_factors = [] for gt_box, ext_box in zip(groundtruth_boxes, extracted_boxes): gt_xmin, gt_ymin, gt_xmax, gt_ymax = gt_box ext_xmin, ext_ymin, ext_xmax, ext_ymax = ext_box gt_width = gt_xmax - gt_xmin gt_height = gt_ymax - gt_ymin ext_width = ext_xmax - ext_xmin ext_height = ext_ymax - ext_ymin x_factors.append(ext_width / gt_width) y_factors.append(ext_height / gt_height) x_scale = np.mean(x_factors) y_scale = np.mean(y_factors) return x_scale, y_scale def scale_bounding_boxes(extracted_boxes, scaling_factors): scaled_boxes = [] for page_boxes in extracted_boxes: scaled_page_boxes = [] for box in page_boxes: scaled_box = [ box[0] / scaling_factors[0], box[1] / scaling_factors[1], box[2] / scaling_factors[0], box[3] / scaling_factors[1], ] scaled_page_boxes.append(scaled_box) scaled_boxes.append(scaled_page_boxes) return scaled_boxes def clear_directory(directory_path): if os.path.exists(directory_path): for filename in os.listdir(directory_path): file_path = os.path.join(directory_path, filename) try: if os.path.isfile(file_path) or os.path.islink(file_path): os.unlink(file_path) elif os.path.isdir(file_path): shutil.rmtree(file_path) except Exception as e: print(f"Failed to delete {file_path}. Reason: {e}")