Spaces:

not-lain
/

utils

Running

not-lain commited on 10 days ago

Commit

c982cf8

•

1 Parent(s): d08c253

add pdf to text

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,10 +1,20 @@
 import gradio as gr
 from pdf2image import convert_from_path
-def process(file) :
-  images = convert_from_path(file)
-  return images
-pdf_to_img = gr.Interface(process,gr.File(),gr.Gallery(),api_name="pdf_to_img")
-demo = gr.TabbedInterface([pdf_to_img],["pdf_to_img"])
 demo.launch(debug=True)

 import gradio as gr
 from pdf2image import convert_from_path
+import pdfplumber
+def convert_pdf_to_image(file):
+    images = convert_from_path(file)
+    return images
+def extract_text_from_pdf(file):
+    text = ""
+    with pdfplumber.open(file) as pdf:
+        for page in pdf.pages:
+            text += page.extract_text()
+    return text
+pdf_to_img = gr.Interface(convert_pdf_to_image, gr.File(), gr.Gallery(), api_name="pdf_to_img")
+pdf_to_text = gr.Interface(extract_text_from_pdf, gr.File(), gr.Textbox(placeholder="Extracted text will appear here"), api_name="pdf_to_text")
+demo = gr.TabbedInterface([pdf_to_img, pdf_to_text], ["PDF to Image", "Extract Text"])
 demo.launch(debug=True)

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 pdf2image
-gradio

 pdf2image
+gradio
+pdfplumber