import gradio as gr from google.cloud import vision from PIL import Image import pandas as pd import os from io import BytesIO def extract_tables_with_google_vision(image_file): # Initialize Google Cloud Vision client client = vision.ImageAnnotatorClient() # Read the image file with BytesIO(image_file) as image_stream: image = Image.open(image_stream) # Convert image to bytes img_bytes = image_stream.getvalue() # Perform text detection on the image image = vision.Image(content=img_bytes) response = client.text_detection(image=image) texts = response.text_annotations # Extract text lines lines = [text.description for text in texts] # Check if lines resemble a table (e.g., have commas) tables = [] is_table = False table_rows = [] for line in lines: if "," in line: # Assuming comma-separated values indicate a table is_table = True table_rows.append([cell.strip() for cell in line.split(",")]) else: if is_table: tables.extend(table_rows) is_table = False table_rows = [] table_content = "" if tables: df = pd.DataFrame(tables[1:], columns=tables[0]) table_content = df.to_csv(index=False) return table_content interface = gr.Interface( fn=extract_tables_with_google_vision, inputs=gr.Image(type="pil", label="Upload a PDF page image"), outputs=gr.Textbox(label="Extracted Tables"), title="PDF Table Extractor with Google Cloud Vision", description="Upload an image of a PDF page to extract tables.", allow_flagging=False ) interface.launch()