File size: 1,143 Bytes
e9afe19 06055f2 e9afe19 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import streamlit as st
import subprocess
from subprocess import STDOUT, check_call
import os
import base64
import camelot as cam
@st.cache
def gh():
proc = subprocess.Popen('apt-get install -y ghostscript', shell=True, stdin=None, stdout=open(os.devnull,'wb'), stderr=STDOUT, executable="/bin/bash")
gh()
st.title("Extract Tables from PDFs")
input_pdf = st.file_uploader(label="Upload PDF here",type='pdf')
st.markdown("### Page Number")
page_number = st.text_input("Enter the page # from where you want the table", value=1)
if input_pdf is not None:
with open("input.pdf","wb") as f:
base64_pdf = base64.b64encode(input_pdf.read()).decode('utf-8')
f.write(base64.b64decode(base64_pdf))
f.close()
table = cam.read_pdf("input.pdf",pages = page_number, flavor = 'stream')
st.markdown("## Number of Tables")
st.write(table)
if len(table)>0:
option = st.selectbox(label="Select the table to be displayed", options = range(len(table)+1))
st.markdown("### Output Table")
st.dataframe(table[int(option)-1].df)
|