Spaces:
Sleeping
Sleeping
import gradio as gr | |
import fitz # PyMuPDF | |
from PIL import Image, ImageDraw | |
from io import BytesIO | |
import pandas as pd | |
import os | |
import numpy as np | |
import google.generativeai as genai | |
import openai | |
import base64 | |
import requests | |
import tempfile | |
import ast | |
genai.configure(api_key="AIzaSyBwk94xRhPOIkvO0E3pYhXQ7Rrk5my5IyY") | |
openai.api_key = "sk-proj-YOl2xepEsNppWm3xLshlT3BlbkFJL04qQgahGxFcFGEClnQK" | |
import gradio as gr | |
import fitz # PyMuPDF | |
from PIL import Image | |
from io import BytesIO | |
import pandas as pd | |
import numpy as np | |
import tempfile | |
# Define the model extraction functions | |
def extract_bounding_box_pymupdf(pdf_content): | |
bounding_boxes = [] | |
pdf_file = fitz.open(stream=pdf_content, filetype="pdf") | |
for page_index in range(len(pdf_file)): | |
page_bbox = [] | |
page = pdf_file[page_index] | |
image_list = page.get_images(full=True) | |
for image_index, img in enumerate(page.get_images(full=True), start=1): | |
rect = page.get_image_bbox(img[7]) | |
bbox = list(rect) | |
page_bbox.append(bbox) | |
bounding_boxes.append(page_bbox) | |
pdf_file.close() # Close the PDF file after use | |
return bounding_boxes | |
def extract_bounding_boxes_gemini(api_key, images): | |
# Placeholder for Gemini API integration | |
bounding_boxes = [[(0, 0, 100, 100)]] * len(images) # Dummy bounding boxes | |
return bounding_boxes | |
def extract_bounding_box_gpt(api_key, pdf_content): | |
# Placeholder for GPT-4 API integration | |
bounding_boxes = [[(0, 0, 100, 100)]] * len( | |
fitz.open(stream=pdf_content, filetype="pdf") | |
) # Dummy bounding boxes | |
return bounding_boxes | |
def extract_images_and_tables(pdf_file, model_option): | |
if isinstance(pdf_file, str): | |
# If input is a file path (usually in testing or local execution) | |
with open(pdf_file, "rb") as f: | |
pdf_bytes = f.read() | |
elif isinstance(pdf_file, bytes): | |
# If input is bytes (from Gradio) | |
pdf_bytes = pdf_file | |
else: | |
raise TypeError("Unsupported input type for pdf_file.") | |
pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf") | |
images = [] | |
for page_index in range(len(pdf_document)): | |
for img_index, img in enumerate(pdf_document.get_page_images(page_index)): | |
xref = img[0] | |
base_image = pdf_document.extract_image(xref) | |
image_bytes = base_image["image"] | |
image = Image.open(BytesIO(image_bytes)) | |
images.append(image) | |
tables = [] | |
for page_num in range(len(pdf_document)): | |
page = pdf_document.load_page(page_num) | |
text = page.get_text("text") | |
lines = [line.strip() for line in text.split("\n") if line.strip()] | |
if any("," in line for line in lines): | |
rows = [line.split(",") for line in lines] | |
tables.extend(rows) | |
table_content = "" | |
if tables: | |
max_columns = max(len(row) for row in tables) | |
tables = [row + [""] * (max_columns - len(row)) for row in tables] | |
df = pd.DataFrame(tables[1:], columns=tables[0]) | |
table_content = df.to_csv(index=False) | |
pdf_document.close() | |
if model_option == "PyMuPDF": | |
bounding_boxes = extract_bounding_box_pymupdf(pdf_bytes) | |
elif model_option == "Gemini": | |
bounding_boxes = extract_bounding_boxes_gemini( | |
"your_gemini_api_key_here", images | |
) | |
elif model_option == "GPT-4": | |
bounding_boxes = extract_bounding_box_gpt("your_gpt4_api_key_here", pdf_bytes) | |
else: | |
bounding_boxes = [] | |
return images, table_content, bounding_boxes | |
def handle_model_selection(pdf_file, model_option): | |
return extract_images_and_tables(pdf_file, model_option) | |
# Define the Gradio interface | |
interface = gr.Interface( | |
fn=handle_model_selection, | |
inputs=[ | |
gr.File(type="filepath", label="Upload PDF"), | |
gr.Dropdown( | |
label="Select Model", | |
choices=["PyMuPDF", "Gemini", "GPT-4"], | |
value="PyMuPDF", | |
), | |
], | |
outputs=[ | |
gr.Gallery(label="Extracted Images"), | |
gr.Textbox(label="Extracted Tables"), | |
gr.JSON(label="Extracted Bounding Boxes"), | |
], | |
title="PDF Image and Table Extractor", | |
description="Upload a PDF to extract images and tables. Choose the model for extraction.", | |
) | |
interface.launch(share=True) | |