Spaces:
Sleeping
Sleeping
akarshrajsingh7
commited on
Commit
•
698579d
1
Parent(s):
4d78b95
Added compare feature + Basic Comments
Browse files- app.py +98 -47
- fast_reader.py +11 -4
app.py
CHANGED
@@ -21,8 +21,14 @@ def pdf_extract_text(pdf_docs):
|
|
21 |
return text
|
22 |
|
23 |
def text_to_pdf_fastify(text):
|
|
|
|
|
|
|
|
|
24 |
bold_text = Fastify_Reader(text).fastify()
|
25 |
-
bold_text = bold_text.encode('latin-1', 'ignore').decode('latin-1')
|
|
|
|
|
26 |
pdf = FPDF()
|
27 |
pdf.add_page()
|
28 |
pdf.set_font("Arial", size = 12)
|
@@ -30,47 +36,117 @@ def text_to_pdf_fastify(text):
|
|
30 |
return bytes(pdf.output())
|
31 |
|
32 |
def text_to_pdf(text):
|
33 |
-
|
|
|
|
|
|
|
|
|
34 |
pdf = FPDF()
|
35 |
pdf.add_page()
|
36 |
pdf.set_font("Arial", size = 12)
|
37 |
pdf.multi_cell(0, 10, txt = text, markdown=True)
|
38 |
return bytes(pdf.output())
|
39 |
|
|
|
40 |
st.set_page_config(page_title="Fastify Reader",
|
41 |
page_icon=":books:",
|
42 |
layout="wide")
|
43 |
|
|
|
44 |
note_text = """
|
45 |
If the PDF is not being rendered by your browser, <br>
|
46 |
 1. Try this link - (https://akarshrajsingh7-fastify-reader.hf.space/) <br>
|
47 |
 2. Download the PDF and open it in a PDF viewer.
|
48 |
"""
|
49 |
-
|
50 |
with st.sidebar:
|
51 |
st.image("Logo.jpg")
|
52 |
st.markdown("<div style='text-align: center;'>How fast can you read?</div>", unsafe_allow_html=True)
|
53 |
|
54 |
-
|
|
|
55 |
|
|
|
56 |
with tab1:
|
57 |
-
st.
|
58 |
-
|
|
|
|
|
59 |
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
with st.spinner("Processing"):
|
62 |
-
|
63 |
-
|
64 |
-
original_pdf = b64encode(text_to_pdf(text)).decode("utf-8")
|
65 |
-
base64_pdf = b64encode(text_to_pdf_fastify(text)).decode("utf-8")
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
col1, col2, col3 = st.columns(3)
|
71 |
with col3:
|
72 |
st.download_button(label="Download Fastified PDF", data=text_to_pdf_fastify(text), file_name='output.pdf', mime='application/pdf')
|
73 |
-
|
|
|
74 |
col1, col2 = st.columns([1, 1], gap="small")
|
75 |
with col1:
|
76 |
with st.container(border = True):
|
@@ -80,43 +156,18 @@ with tab1:
|
|
80 |
with st.container(border = True):
|
81 |
st.markdown("<div style='text-align: center;'><strong>Fastified PDF viewer</strong></div>", unsafe_allow_html=True)
|
82 |
st.markdown(pdf_display, unsafe_allow_html=True)
|
|
|
83 |
st.markdown(f"""
|
84 |
<div style='background-color: #FFD580; border-radius: 5px;'>
|
85 |
<p style='color: black;'><strong>Note</strong> - {note_text}</p>
|
86 |
</div>
|
87 |
""", unsafe_allow_html=True)
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
if st.button("Submit", key="input-text"):
|
94 |
-
with st.spinner("Processing"):
|
95 |
-
text = user_input
|
96 |
-
|
97 |
-
original_pdf = b64encode(text_to_pdf(text)).decode("utf-8")
|
98 |
-
base64_pdf = b64encode(text_to_pdf_fastify(text)).decode("utf-8")
|
99 |
-
|
100 |
-
original_display = f'<embed src="data:application/pdf;base64,{original_pdf}" width = "100%" height = 600 type="application/pdf" download="original.pdf">'
|
101 |
-
pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width = "100%" height = 600 type="application/pdf" download="Modified.pdf">'
|
102 |
-
|
103 |
-
col1, col2, col3 = st.columns(3)
|
104 |
-
with col3:
|
105 |
-
st.download_button(label="Download Fastified PDF", data=text_to_pdf_fastify(text), file_name='output.pdf', mime='application/pdf')
|
106 |
-
|
107 |
-
col1, col2 = st.columns([1, 1], gap="small")
|
108 |
-
with col1:
|
109 |
-
with st.container(border = True):
|
110 |
-
st.markdown("<div style='text-align: center;'><strong>Original PDF viewer</strong></div>", unsafe_allow_html=True)
|
111 |
-
st.markdown(original_display, unsafe_allow_html=True)
|
112 |
-
with col2:
|
113 |
with st.container(border = True):
|
114 |
st.markdown("<div style='text-align: center;'><strong>Fastified PDF viewer</strong></div>", unsafe_allow_html=True)
|
115 |
st.markdown(pdf_display, unsafe_allow_html=True)
|
116 |
-
|
117 |
-
st.markdown(f"""
|
118 |
-
<div style='background-color: #FFD580; border-radius: 5px;'>
|
119 |
-
<p style='color: black;'><strong>Note</strong> - {note_text}</p>
|
120 |
-
</div>
|
121 |
-
""", unsafe_allow_html=True)
|
122 |
-
|
|
|
21 |
return text
|
22 |
|
23 |
def text_to_pdf_fastify(text):
|
24 |
+
'''
|
25 |
+
Basic function to apply fastification on the input text and convert it to bytes for PDF rendering
|
26 |
+
'''
|
27 |
+
# Applying the Fastify Logic
|
28 |
bold_text = Fastify_Reader(text).fastify()
|
29 |
+
bold_text = bold_text.encode('latin-1', 'ignore').decode('latin-1') #since fpdf works with latin-1 encoding
|
30 |
+
|
31 |
+
# Creating the PDF
|
32 |
pdf = FPDF()
|
33 |
pdf.add_page()
|
34 |
pdf.set_font("Arial", size = 12)
|
|
|
36 |
return bytes(pdf.output())
|
37 |
|
38 |
def text_to_pdf(text):
|
39 |
+
'''
|
40 |
+
Basic function on the input text and convert it to bytes for PDF rendering
|
41 |
+
'''
|
42 |
+
text = text.encode('latin-1', 'ignore').decode('latin-1') #since fpdf works with latin-1 encoding
|
43 |
+
# Creating the PDF
|
44 |
pdf = FPDF()
|
45 |
pdf.add_page()
|
46 |
pdf.set_font("Arial", size = 12)
|
47 |
pdf.multi_cell(0, 10, txt = text, markdown=True)
|
48 |
return bytes(pdf.output())
|
49 |
|
50 |
+
#Setting the page config
|
51 |
st.set_page_config(page_title="Fastify Reader",
|
52 |
page_icon=":books:",
|
53 |
layout="wide")
|
54 |
|
55 |
+
# Due to browser cache and streamlit issue, sometimes the PDFs are not rendered properly.
|
56 |
note_text = """
|
57 |
If the PDF is not being rendered by your browser, <br>
|
58 |
 1. Try this link - (https://akarshrajsingh7-fastify-reader.hf.space/) <br>
|
59 |
 2. Download the PDF and open it in a PDF viewer.
|
60 |
"""
|
61 |
+
# Sidebar
|
62 |
with st.sidebar:
|
63 |
st.image("Logo.jpg")
|
64 |
st.markdown("<div style='text-align: center;'>How fast can you read?</div>", unsafe_allow_html=True)
|
65 |
|
66 |
+
# Main Page
|
67 |
+
tab1, tab2= st.tabs(["Input Text", "PDF file"])
|
68 |
|
69 |
+
# First tab where Text is input
|
70 |
with tab1:
|
71 |
+
user_input = st.text_input("Enter some text")
|
72 |
+
|
73 |
+
# Compare Check Box
|
74 |
+
compare = st.checkbox('Compare with Fastified Text', value=False, key='compare')
|
75 |
|
76 |
+
# Submit Button
|
77 |
+
if st.button("Submit", key="input-text"):
|
78 |
+
#Progess Bar for the processing
|
79 |
+
with st.spinner("Processing"):
|
80 |
+
text = user_input
|
81 |
+
|
82 |
+
# Generating base64 encoded text bytes for PDF rendering
|
83 |
+
original_pdf = b64encode(text_to_pdf(text)).decode("utf-8")
|
84 |
+
base64_pdf = b64encode(text_to_pdf_fastify(text)).decode("utf-8")
|
85 |
+
|
86 |
+
# Embedding the PDFs in the HTML
|
87 |
+
original_display = f'<embed src="data:application/pdf;base64,{original_pdf}" width = "100%" height = 600 type="application/pdf" download="original.pdf">'
|
88 |
+
pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width = "100%" height = 600 type="application/pdf" download="Modified.pdf">'
|
89 |
+
|
90 |
+
# Compare Logic implementation
|
91 |
+
if compare:
|
92 |
+
col1, col2, col3 = st.columns(3)
|
93 |
+
with col3:
|
94 |
+
st.download_button(label="Download Fastified PDF", data=text_to_pdf_fastify(text), file_name='output.pdf', mime='application/pdf')
|
95 |
+
|
96 |
+
# Side by Side comparison
|
97 |
+
col1, col2 = st.columns([1, 1], gap="small")
|
98 |
+
with col1:
|
99 |
+
with st.container(border = True):
|
100 |
+
st.markdown("<div style='text-align: center;'><strong>Original PDF viewer</strong></div>", unsafe_allow_html=True)
|
101 |
+
st.markdown(original_display, unsafe_allow_html=True)
|
102 |
+
with col2:
|
103 |
+
with st.container(border = True):
|
104 |
+
st.markdown("<div style='text-align: center;'><strong>Fastified PDF viewer</strong></div>", unsafe_allow_html=True)
|
105 |
+
st.markdown(pdf_display, unsafe_allow_html=True)
|
106 |
+
|
107 |
+
# Browser Cache Note
|
108 |
+
st.markdown(f"""
|
109 |
+
<div style='background-color: #FFD580; border-radius: 5px;'>
|
110 |
+
<p style='color: black;'><strong>Note</strong> - {note_text}</p>
|
111 |
+
</div>
|
112 |
+
""", unsafe_allow_html=True)
|
113 |
+
else:
|
114 |
+
# No Comparisons
|
115 |
+
col1, col2, col3 = st.columns(3)
|
116 |
+
with col2:
|
117 |
+
st.download_button(label="Download Fastified PDF", data=text_to_pdf_fastify(text), file_name='output.pdf', mime='application/pdf')
|
118 |
+
with st.container(border = True):
|
119 |
+
st.markdown("<div style='text-align: center;'><strong>Fastified PDF viewer</strong></div>", unsafe_allow_html=True)
|
120 |
+
st.markdown(pdf_display, unsafe_allow_html=True)
|
121 |
+
|
122 |
+
# Added support for PDFs having text
|
123 |
+
with tab2:
|
124 |
+
uploaded_file = st.file_uploader("Upload a PDF file", type="pdf", accept_multiple_files=True)
|
125 |
+
|
126 |
+
# Compare Check Box
|
127 |
+
compare = st.checkbox('Compare with Fastified Text', value=False, key='compare_pdf')
|
128 |
+
|
129 |
+
# Submit Button
|
130 |
+
if st.button("Submit", key="pdf"):
|
131 |
+
#Progess Bar for the processing
|
132 |
with st.spinner("Processing"):
|
133 |
+
text = pdf_extract_text(uploaded_file)
|
|
|
|
|
|
|
134 |
|
135 |
+
# Generating base64 encoded text bytes for PDF rendering
|
136 |
+
original_pdf = b64encode(text_to_pdf(text)).decode("utf-8")
|
137 |
+
base64_pdf = b64encode(text_to_pdf_fastify(text)).decode("utf-8")
|
138 |
+
|
139 |
+
# Embedding the PDFs in the HTML
|
140 |
+
original_display = f'<embed src="data:application/pdf;base64,{original_pdf}" width = "100%" height = 600 type="application/pdf">'
|
141 |
+
pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width = "100%" height = 600 type="application/pdf">'
|
142 |
+
|
143 |
+
# Compare Logic implementation
|
144 |
+
if compare:
|
145 |
col1, col2, col3 = st.columns(3)
|
146 |
with col3:
|
147 |
st.download_button(label="Download Fastified PDF", data=text_to_pdf_fastify(text), file_name='output.pdf', mime='application/pdf')
|
148 |
+
|
149 |
+
# Side by Side comparison
|
150 |
col1, col2 = st.columns([1, 1], gap="small")
|
151 |
with col1:
|
152 |
with st.container(border = True):
|
|
|
156 |
with st.container(border = True):
|
157 |
st.markdown("<div style='text-align: center;'><strong>Fastified PDF viewer</strong></div>", unsafe_allow_html=True)
|
158 |
st.markdown(pdf_display, unsafe_allow_html=True)
|
159 |
+
# Browser Cache Note
|
160 |
st.markdown(f"""
|
161 |
<div style='background-color: #FFD580; border-radius: 5px;'>
|
162 |
<p style='color: black;'><strong>Note</strong> - {note_text}</p>
|
163 |
</div>
|
164 |
""", unsafe_allow_html=True)
|
165 |
+
else:
|
166 |
+
# No Comparison
|
167 |
+
col1, col2, col3 = st.columns(3)
|
168 |
+
with col2:
|
169 |
+
st.download_button(label="Download Fastified PDF", data=text_to_pdf_fastify(text), file_name='output.pdf', mime='application/pdf')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
170 |
with st.container(border = True):
|
171 |
st.markdown("<div style='text-align: center;'><strong>Fastified PDF viewer</strong></div>", unsafe_allow_html=True)
|
172 |
st.markdown(pdf_display, unsafe_allow_html=True)
|
173 |
+
|
|
|
|
|
|
|
|
|
|
|
|
fast_reader.py
CHANGED
@@ -5,14 +5,18 @@ class Fastify_Reader:
|
|
5 |
Class to implement the fastify Fast Reader. The intention is to enable the users to
|
6 |
read text faster by bolding some parts of the words.
|
7 |
'''
|
8 |
-
def __init__(self,
|
9 |
-
self.text =
|
10 |
self.offset_factor = 1.6
|
11 |
|
12 |
def _get_offset(self, word: str):
|
|
|
|
|
|
|
13 |
word_stripped = word.translate(str.maketrans('', '', string.punctuation))
|
14 |
-
|
15 |
-
|
|
|
16 |
|
17 |
def fastify_word(self, word: str):
|
18 |
if '-' in word:
|
@@ -31,6 +35,9 @@ class Fastify_Reader:
|
|
31 |
return fastify_line.strip()
|
32 |
|
33 |
def fastify(self):
|
|
|
|
|
|
|
34 |
fastify_text = ""
|
35 |
for line in self.text.split('\n'):
|
36 |
fastify_text += f"{self.fastify_line(line)}\n"
|
|
|
5 |
Class to implement the fastify Fast Reader. The intention is to enable the users to
|
6 |
read text faster by bolding some parts of the words.
|
7 |
'''
|
8 |
+
def __init__(self, text: str):
|
9 |
+
self.text = text
|
10 |
self.offset_factor = 1.6
|
11 |
|
12 |
def _get_offset(self, word: str):
|
13 |
+
'''
|
14 |
+
Getting the offset for the word
|
15 |
+
'''
|
16 |
word_stripped = word.translate(str.maketrans('', '', string.punctuation))
|
17 |
+
offset = int(len(word_stripped) / self.offset_factor)
|
18 |
+
offset_value = offset if offset != 0 else 1
|
19 |
+
return offset_value
|
20 |
|
21 |
def fastify_word(self, word: str):
|
22 |
if '-' in word:
|
|
|
35 |
return fastify_line.strip()
|
36 |
|
37 |
def fastify(self):
|
38 |
+
'''
|
39 |
+
Triggering the Fastify Logic
|
40 |
+
'''
|
41 |
fastify_text = ""
|
42 |
for line in self.text.split('\n'):
|
43 |
fastify_text += f"{self.fastify_line(line)}\n"
|