shimer56 commited on
Commit
409e708
1 Parent(s): 63f6421

Upload folder using huggingface_hub

Browse files
extract_images/input_docs/uploaded_pdf.pdf CHANGED
Binary files a/extract_images/input_docs/uploaded_pdf.pdf and b/extract_images/input_docs/uploaded_pdf.pdf differ
 
extract_images/services.py CHANGED
@@ -7,6 +7,7 @@ import google.generativeai as genai
7
  from PIL import Image, ImageDraw
8
  import openai
9
  import requests
 
10
 
11
  from constants import GEMINI_API_KEY, OPENAI_API_KEY
12
  from utils import (
@@ -41,6 +42,7 @@ def extract_images_pdfplumber(pdf_file):
41
  f.write(pdf_file)
42
 
43
  images = []
 
44
  pdf_obj = pdfplumber.open(pdf_path)
45
  for page_idx, page in enumerate(pdf_obj.pages):
46
  page_bbox = []
@@ -55,8 +57,8 @@ def extract_images_pdfplumber(pdf_file):
55
  page_bbox.append(image_bbox)
56
  cropped_page = page.crop(image_bbox)
57
  image_obj = cropped_page.to_image(resolution=400)
58
- image_path = (
59
- f"extract_images/image_outputs/image-{page_idx}-{image_idx}.png"
60
  )
61
  image_obj.save(image_path)
62
  image = Image.open(image_path)
 
7
  from PIL import Image, ImageDraw
8
  import openai
9
  import requests
10
+ import os
11
 
12
  from constants import GEMINI_API_KEY, OPENAI_API_KEY
13
  from utils import (
 
42
  f.write(pdf_file)
43
 
44
  images = []
45
+ output_dir = "extract_tables/table_outputs"
46
  pdf_obj = pdfplumber.open(pdf_path)
47
  for page_idx, page in enumerate(pdf_obj.pages):
48
  page_bbox = []
 
57
  page_bbox.append(image_bbox)
58
  cropped_page = page.crop(image_bbox)
59
  image_obj = cropped_page.to_image(resolution=400)
60
+ image_path = os.path.join(
61
+ output_dir, f"image-{page_idx + 1}-{image_idx}.png"
62
  )
63
  image_obj.save(image_path)
64
  image = Image.open(image_path)