not-lain commited on
Commit
c982cf8
1 Parent(s): d08c253

add pdf to text

Browse files
Files changed (2) hide show
  1. app.py +15 -5
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,10 +1,20 @@
1
  import gradio as gr
2
  from pdf2image import convert_from_path
 
3
 
4
- def process(file) :
5
- images = convert_from_path(file)
6
- return images
7
 
8
- pdf_to_img = gr.Interface(process,gr.File(),gr.Gallery(),api_name="pdf_to_img")
9
- demo = gr.TabbedInterface([pdf_to_img],["pdf_to_img"])
 
 
 
 
 
 
 
 
 
10
  demo.launch(debug=True)
 
1
  import gradio as gr
2
  from pdf2image import convert_from_path
3
+ import pdfplumber
4
 
5
+ def convert_pdf_to_image(file):
6
+ images = convert_from_path(file)
7
+ return images
8
 
9
+ def extract_text_from_pdf(file):
10
+ text = ""
11
+ with pdfplumber.open(file) as pdf:
12
+ for page in pdf.pages:
13
+ text += page.extract_text()
14
+ return text
15
+
16
+ pdf_to_img = gr.Interface(convert_pdf_to_image, gr.File(), gr.Gallery(), api_name="pdf_to_img")
17
+ pdf_to_text = gr.Interface(extract_text_from_pdf, gr.File(), gr.Textbox(placeholder="Extracted text will appear here"), api_name="pdf_to_text")
18
+
19
+ demo = gr.TabbedInterface([pdf_to_img, pdf_to_text], ["PDF to Image", "Extract Text"])
20
  demo.launch(debug=True)
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  pdf2image
2
- gradio
 
 
1
  pdf2image
2
+ gradio
3
+ pdfplumber