Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files
extract_images/input_docs/uploaded_pdf.pdf
CHANGED
Binary files a/extract_images/input_docs/uploaded_pdf.pdf and b/extract_images/input_docs/uploaded_pdf.pdf differ
|
|
extract_images/services.py
CHANGED
@@ -7,6 +7,7 @@ import google.generativeai as genai
|
|
7 |
from PIL import Image, ImageDraw
|
8 |
import openai
|
9 |
import requests
|
|
|
10 |
|
11 |
from constants import GEMINI_API_KEY, OPENAI_API_KEY
|
12 |
from utils import (
|
@@ -41,6 +42,7 @@ def extract_images_pdfplumber(pdf_file):
|
|
41 |
f.write(pdf_file)
|
42 |
|
43 |
images = []
|
|
|
44 |
pdf_obj = pdfplumber.open(pdf_path)
|
45 |
for page_idx, page in enumerate(pdf_obj.pages):
|
46 |
page_bbox = []
|
@@ -55,8 +57,8 @@ def extract_images_pdfplumber(pdf_file):
|
|
55 |
page_bbox.append(image_bbox)
|
56 |
cropped_page = page.crop(image_bbox)
|
57 |
image_obj = cropped_page.to_image(resolution=400)
|
58 |
-
image_path = (
|
59 |
-
f"
|
60 |
)
|
61 |
image_obj.save(image_path)
|
62 |
image = Image.open(image_path)
|
|
|
7 |
from PIL import Image, ImageDraw
|
8 |
import openai
|
9 |
import requests
|
10 |
+
import os
|
11 |
|
12 |
from constants import GEMINI_API_KEY, OPENAI_API_KEY
|
13 |
from utils import (
|
|
|
42 |
f.write(pdf_file)
|
43 |
|
44 |
images = []
|
45 |
+
output_dir = "extract_tables/table_outputs"
|
46 |
pdf_obj = pdfplumber.open(pdf_path)
|
47 |
for page_idx, page in enumerate(pdf_obj.pages):
|
48 |
page_bbox = []
|
|
|
57 |
page_bbox.append(image_bbox)
|
58 |
cropped_page = page.crop(image_bbox)
|
59 |
image_obj = cropped_page.to_image(resolution=400)
|
60 |
+
image_path = os.path.join(
|
61 |
+
output_dir, f"image-{page_idx + 1}-{image_idx}.png"
|
62 |
)
|
63 |
image_obj.save(image_path)
|
64 |
image = Image.open(image_path)
|