prashant
commited on
Commit
•
c8b3108
1
Parent(s):
07dfa2c
upload update
Browse files- appStore/sdg_analysis.py +5 -74
- docStore/sample/files.json +2 -0
- utils/uploadAndExample.py +16 -10
appStore/sdg_analysis.py
CHANGED
@@ -8,10 +8,6 @@ import matplotlib.pyplot as plt
|
|
8 |
import numpy as np
|
9 |
import pandas as pd
|
10 |
import streamlit as st
|
11 |
-
import docx
|
12 |
-
from docx.shared import Inches
|
13 |
-
from docx.shared import Pt
|
14 |
-
from docx.enum.style import WD_STYLE_TYPE
|
15 |
from st_aggrid import AgGrid
|
16 |
from st_aggrid.shared import ColumnsAutoSizeMode
|
17 |
from utils.sdg_classifier import sdg_classification
|
@@ -75,6 +71,7 @@ def app():
|
|
75 |
""")
|
76 |
st.markdown("")
|
77 |
|
|
|
78 |
_lab_dict = {0: 'no_cat',
|
79 |
1:'SDG 1 - No poverty',
|
80 |
2:'SDG 2 - Zero hunger',
|
@@ -94,6 +91,7 @@ def app():
|
|
94 |
16:'SDG 16 - Peace, justice and strong institutions',
|
95 |
17:'SDG 17 - Partnership for the goals',}
|
96 |
|
|
|
97 |
with st.container():
|
98 |
if st.button("RUN SDG Analysis"):
|
99 |
|
@@ -114,12 +112,9 @@ def app():
|
|
114 |
textrankkeywordlist = []
|
115 |
for label in sdg_labels:
|
116 |
sdgdata = " ".join(df[df.SDG == label].text.to_list())
|
117 |
-
# tfidflist_ = keywordExtraction(label,[sdgdata])
|
118 |
textranklist_ = textrank(sdgdata)
|
119 |
if len(textranklist_) > 0:
|
120 |
-
# tfidfkeywordList.append({'SDG':label, 'TFIDF Keywords':tfidflist_})
|
121 |
textrankkeywordlist.append({'SDG':label, 'TextRank Keywords':",".join(textranklist_)})
|
122 |
-
# tfidfkeywordsDf = pd.DataFrame(tfidfkeywordList)
|
123 |
tRkeywordsDf = pd.DataFrame(textrankkeywordlist)
|
124 |
|
125 |
|
@@ -145,19 +140,15 @@ def app():
|
|
145 |
labeldf = x['SDG_name'].values.tolist()
|
146 |
labeldf = "<br>".join(labeldf)
|
147 |
st.markdown(labeldf, unsafe_allow_html=True)
|
148 |
-
|
149 |
st.markdown("###### What keywords are present under SDG classified text? ######")
|
150 |
|
151 |
-
# c1, c2, c3 = st.columns([1, 10, 1])
|
152 |
-
# with c2:
|
153 |
-
# st.table(tRkeywordsDf)
|
154 |
AgGrid(tRkeywordsDf, reload_data = False,
|
155 |
update_mode="value_changed",
|
156 |
columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)
|
157 |
-
|
158 |
st.markdown("###### Top few SDG Classified paragraph/text results ######")
|
159 |
-
|
160 |
-
# with c8:
|
161 |
AgGrid(df, reload_data = False, update_mode="value_changed",
|
162 |
columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)
|
163 |
else:
|
@@ -165,63 +156,3 @@ def app():
|
|
165 |
logging.warning("Terminated as no document provided")
|
166 |
|
167 |
|
168 |
-
|
169 |
-
|
170 |
-
# 1. Keyword heatmap \n
|
171 |
-
# 2. SDG Classification for the paragraphs/texts in the document
|
172 |
-
#
|
173 |
-
|
174 |
-
# with st.container():
|
175 |
-
# if 'docs' in st.session_state:
|
176 |
-
# docs = st.session_state['docs']
|
177 |
-
# docs_processed, df, all_text, par_list = clean.preprocessingForSDG(docs)
|
178 |
-
# # paraList = st.session_state['paraList']
|
179 |
-
# logging.info("keybert")
|
180 |
-
# with st.spinner("Running Key bert"):
|
181 |
-
|
182 |
-
# kw_model = load_keyBert()
|
183 |
-
|
184 |
-
# keywords = kw_model.extract_keywords(
|
185 |
-
# all_text,
|
186 |
-
# keyphrase_ngram_range=(1, 3),
|
187 |
-
# use_mmr=True,
|
188 |
-
# stop_words="english",
|
189 |
-
# top_n=10,
|
190 |
-
# diversity=0.7,
|
191 |
-
# )
|
192 |
-
|
193 |
-
# st.markdown("## 🎈 What is my document about?")
|
194 |
-
|
195 |
-
# df = (
|
196 |
-
# DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"])
|
197 |
-
# .sort_values(by="Relevancy", ascending=False)
|
198 |
-
# .reset_index(drop=True)
|
199 |
-
# )
|
200 |
-
# df1 = (
|
201 |
-
# DataFrame(keywords, columns=["Keyword/Keyphrase", "Relevancy"])
|
202 |
-
# .sort_values(by="Relevancy", ascending=False)
|
203 |
-
# .reset_index(drop=True)
|
204 |
-
# )
|
205 |
-
# df.index += 1
|
206 |
-
|
207 |
-
# # Add styling
|
208 |
-
# cmGreen = sns.light_palette("green", as_cmap=True)
|
209 |
-
# cmRed = sns.light_palette("red", as_cmap=True)
|
210 |
-
# df = df.style.background_gradient(
|
211 |
-
# cmap=cmGreen,
|
212 |
-
# subset=[
|
213 |
-
# "Relevancy",
|
214 |
-
# ],
|
215 |
-
# )
|
216 |
-
|
217 |
-
# c1, c2, c3 = st.columns([1, 3, 1])
|
218 |
-
|
219 |
-
# format_dictionary = {
|
220 |
-
# "Relevancy": "{:.1%}",
|
221 |
-
# }
|
222 |
-
|
223 |
-
# df = df.format(format_dictionary)
|
224 |
-
|
225 |
-
# with c2:
|
226 |
-
#
|
227 |
-
# st.table(df)
|
|
|
8 |
import numpy as np
|
9 |
import pandas as pd
|
10 |
import streamlit as st
|
|
|
|
|
|
|
|
|
11 |
from st_aggrid import AgGrid
|
12 |
from st_aggrid.shared import ColumnsAutoSizeMode
|
13 |
from utils.sdg_classifier import sdg_classification
|
|
|
71 |
""")
|
72 |
st.markdown("")
|
73 |
|
74 |
+
### Label Dictionary ###
|
75 |
_lab_dict = {0: 'no_cat',
|
76 |
1:'SDG 1 - No poverty',
|
77 |
2:'SDG 2 - Zero hunger',
|
|
|
91 |
16:'SDG 16 - Peace, justice and strong institutions',
|
92 |
17:'SDG 17 - Partnership for the goals',}
|
93 |
|
94 |
+
### Main app code ###
|
95 |
with st.container():
|
96 |
if st.button("RUN SDG Analysis"):
|
97 |
|
|
|
112 |
textrankkeywordlist = []
|
113 |
for label in sdg_labels:
|
114 |
sdgdata = " ".join(df[df.SDG == label].text.to_list())
|
|
|
115 |
textranklist_ = textrank(sdgdata)
|
116 |
if len(textranklist_) > 0:
|
|
|
117 |
textrankkeywordlist.append({'SDG':label, 'TextRank Keywords':",".join(textranklist_)})
|
|
|
118 |
tRkeywordsDf = pd.DataFrame(textrankkeywordlist)
|
119 |
|
120 |
|
|
|
140 |
labeldf = x['SDG_name'].values.tolist()
|
141 |
labeldf = "<br>".join(labeldf)
|
142 |
st.markdown(labeldf, unsafe_allow_html=True)
|
143 |
+
st.write("")
|
144 |
st.markdown("###### What keywords are present under SDG classified text? ######")
|
145 |
|
|
|
|
|
|
|
146 |
AgGrid(tRkeywordsDf, reload_data = False,
|
147 |
update_mode="value_changed",
|
148 |
columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)
|
149 |
+
st.write("")
|
150 |
st.markdown("###### Top few SDG Classified paragraph/text results ######")
|
151 |
+
|
|
|
152 |
AgGrid(df, reload_data = False, update_mode="value_changed",
|
153 |
columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)
|
154 |
else:
|
|
|
156 |
logging.warning("Terminated as no document provided")
|
157 |
|
158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
docStore/sample/files.json
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
{"South Africa:Low Emission strategy":"docStore/sample/South Africa_s Low Emission Development Strategy.txt",
|
2 |
+
"Ethiopia: 10 Year Development Plan":"docStore/sample/Ethiopia_s_2021_10 Year Development Plan.txt"}
|
utils/uploadAndExample.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import streamlit as st
|
2 |
import tempfile
|
|
|
3 |
|
4 |
def add_upload(choice):
|
5 |
"""
|
@@ -22,14 +23,19 @@ def add_upload(choice):
|
|
22 |
|
23 |
else:
|
24 |
# listing the options
|
|
|
|
|
|
|
25 |
option = st.sidebar.selectbox('Select the example document',
|
26 |
-
(
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import tempfile
|
3 |
+
import json
|
4 |
|
5 |
def add_upload(choice):
|
6 |
"""
|
|
|
23 |
|
24 |
else:
|
25 |
# listing the options
|
26 |
+
with open('docStore/sample/files.json','r') as json_file:
|
27 |
+
files = json.load(json_file)
|
28 |
+
|
29 |
option = st.sidebar.selectbox('Select the example document',
|
30 |
+
list(files.keys()))
|
31 |
+
file_name = file_path = files[option]
|
32 |
+
st.session_state['filename'] = file_name
|
33 |
+
st.session_state['filepath'] = file_path
|
34 |
+
# if option is 'South Africa:Low Emission strategy':
|
35 |
+
# file_name = file_path = 'docStore/sample/South Africa_s Low Emission Development Strategy.txt'
|
36 |
+
# st.session_state['filename'] = file_name
|
37 |
+
# st.session_state['filepath'] = file_path
|
38 |
+
# else:
|
39 |
+
# file_name = file_path = 'docStore/sample/Ethiopia_s_2021_10 Year Development Plan.txt'
|
40 |
+
# st.session_state['filename'] = file_name
|
41 |
+
# st.session_state['filepath'] = file_path
|