akarshrajsingh7 commited on
Commit
f3a9c70
1 Parent(s): 4d081e4

Initial Commit

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. app.py +87 -0
  3. fast_reader.py +38 -0
  4. requirements.txt +2 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ *.pyc
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from concurrent.futures import ThreadPoolExecutor
4
+ from base64 import b64encode
5
+ from fpdf import FPDF
6
+ import io, string, re, math
7
+
8
+ #importing the class
9
+ from fast_reader import Fastify_Reader
10
+
11
+ def pdf_extract_text(pdf_docs):
12
+ '''
13
+ Basic function for extracting text from the PDFs
14
+ '''
15
+ text = ""
16
+ for pdf in pdf_docs:
17
+ pdf_reader = PdfReader(pdf)
18
+ for page in pdf_reader.pages:
19
+ text += page.extract_text()
20
+ return text
21
+
22
+ def text_to_pdf_fastify(text):
23
+
24
+ bold_text = Fastify_Reader(text).fastify()
25
+ pdf = FPDF()
26
+ pdf.add_page()
27
+ pdf.set_font("Arial", size = 12)
28
+ pdf.multi_cell(0, 10, txt = bold_text, markdown=True)
29
+ return bytes(pdf.output())
30
+
31
+ def text_to_pdf(text):
32
+ pdf = FPDF()
33
+ pdf.add_page()
34
+ pdf.set_font("Arial", size = 12)
35
+ pdf.multi_cell(0, 10, txt = text, markdown=True)
36
+ return bytes(pdf.output())
37
+
38
+ st.set_page_config(layout="wide")
39
+ st.header("fastify Reader :books:")
40
+ tab1, tab2= st.tabs(["PDF file", "Text File"])
41
+
42
+ with tab1:
43
+ st.header("PDF File")
44
+ uploaded_file = st.file_uploader("Upload a PDF file", type="pdf", accept_multiple_files=True)
45
+
46
+ if st.button("Submit", key="pdf"):
47
+ with st.spinner("Processing"):
48
+ text = pdf_extract_text(uploaded_file)
49
+
50
+ original_pdf = b64encode(text_to_pdf(text)).decode("utf-8")
51
+ base64_pdf = b64encode(text_to_pdf_fastify(text)).decode("utf-8")
52
+
53
+ original_display = f'<embed src="data:application/pdf;base64,{original_pdf}" width = "100%" height = 600 type="application/pdf">'
54
+ pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width = "100%" height = 600 type="application/pdf">'
55
+
56
+ col1, col2 = st.columns([1, 1], gap="small")
57
+ with col1:
58
+ with st.container(border = True):
59
+ st.markdown(original_display, unsafe_allow_html=True)
60
+ with col2:
61
+ with st.container(border = True):
62
+ st.markdown(pdf_display, unsafe_allow_html=True)
63
+ st.download_button(label="Download as PDF", data=text_to_pdf_fastify(text), file_name='output.pdf', mime='application/pdf')
64
+
65
+
66
+ with tab2:
67
+ st.header("Input Text")
68
+ user_input = st.text_input("Enter some text")
69
+
70
+ if st.button("Submit", key="input-text"):
71
+ with st.spinner("Processing"):
72
+ text = user_input
73
+
74
+ original_pdf = b64encode(text_to_pdf(text)).decode("utf-8")
75
+ base64_pdf = b64encode(text_to_pdf_fastify(text)).decode("utf-8")
76
+
77
+ original_display = f'<embed src="data:application/pdf;base64,{original_pdf}" width = "100%" height = 600 type="application/pdf">'
78
+ pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width = "100%" height = 600 type="application/pdf">'
79
+
80
+ col1, col2 = st.columns([1, 1], gap="small")
81
+ with col1:
82
+ with st.container(border = True):
83
+ st.markdown(original_display, unsafe_allow_html=True)
84
+ with col2:
85
+ with st.container(border = True):
86
+ st.markdown(pdf_display, unsafe_allow_html=True)
87
+ st.download_button(label="Download as PDF", data=text_to_pdf_fastify(text), file_name='output.pdf', mime='application/pdf')
fast_reader.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import string
2
+
3
+ class Fastify_Reader:
4
+ '''
5
+ Class to implement the fastify Fast Reader. The intention is to enable the users to
6
+ read text faster by bolding some parts of the words.
7
+ '''
8
+ def __init__(self, data):
9
+ self.text = data
10
+ self.offset_factor = 1.6
11
+
12
+ def _get_offset(self, word: str):
13
+ word_stripped = word.translate(str.maketrans('', '', string.punctuation))
14
+ fixation = int(len(word_stripped) / self.offset_factor)
15
+ return fixation if fixation != 0 else 1
16
+
17
+ def fastify_word(self, word: str):
18
+ if '-' in word:
19
+ part_1, part_2 = word.split('-')
20
+ part_1 = f"**{part_1[:self._get_offset(part_1)]}**{part_1[self._get_offset(part_1):]}"
21
+ part_2 = f"**{part_2[:self._get_offset(part_2)]}**{part_2[self._get_offset(part_2):]}"
22
+ fastify_word = f"{part_1}-{part_2}"
23
+ else:
24
+ fastify_word = f"**{word[:self._get_offset(word)]}**{word[self._get_offset(word):]}"
25
+ return fastify_word
26
+
27
+ def fastify_line(self, line: str):
28
+ fastify_line = ""
29
+ for word in line.split():
30
+ fastify_line += f"{self.fastify_word(word)} "
31
+ return fastify_line.strip()
32
+
33
+ def fastify(self):
34
+ fastify_text = ""
35
+ for line in self.text.split('\n'):
36
+ fastify_text += f"{self.fastify_line(line)}\n"
37
+ fastify_text = fastify_text.replace("****","")
38
+ return fastify_text.strip()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ PyPDF2==3.0.1
2
+ fpdf2==2.7.8