prashant
commited on
Commit
•
1b62a9f
1
Parent(s):
fa8823d
coherence results display
Browse files- appStore/coherence.py +37 -19
- paramconfig.cfg +1 -3
appStore/coherence.py
CHANGED
@@ -7,8 +7,10 @@ import ast
|
|
7 |
import logging
|
8 |
from utils.ndc_explorer import countrySpecificCCA, countrySpecificCCM
|
9 |
from utils.checkconfig import getconfig
|
10 |
-
from utils.semantic_search import runSemanticPreprocessingPipeline
|
11 |
-
|
|
|
|
|
12 |
|
13 |
# Reading data and Declaring necessary variables
|
14 |
with open('docStore/ndcs/countryList.txt') as dfile:
|
@@ -35,9 +37,9 @@ embedding_model = config.get('coherence','RETRIEVER')
|
|
35 |
embedding_model_format = config.get('coherence','RETRIEVER_FORMAT')
|
36 |
embedding_layer = int(config.get('coherence','RETRIEVER_EMB_LAYER'))
|
37 |
embedding_dim = int(config.get('coherence','EMBEDDING_DIM'))
|
|
|
38 |
retriever_top_k = int(config.get('coherence','RETRIEVER_TOP_K'))
|
39 |
-
|
40 |
-
reader_top_k = int(config.get('coherence','RETRIEVER_TOP_K'))
|
41 |
|
42 |
|
43 |
def app():
|
@@ -57,7 +59,8 @@ def app():
|
|
57 |
coherence between a given policy document and a country’s (Intended)\
|
58 |
Nationally Determined Contribution (INDCs/NDCs) using open-source \
|
59 |
data from the German Institute of Development and Sustainability’s \
|
60 |
-
(IDOS) [NDC Explorer]
|
|
|
61 |
""")
|
62 |
st.write("")
|
63 |
st.write(""" User can select a country context via the drop-down menu \
|
@@ -81,6 +84,10 @@ def app():
|
|
81 |
option = st.selectbox('Select Country', (countrynames))
|
82 |
countryCode = countryList[option]
|
83 |
st.markdown("---")
|
|
|
|
|
|
|
|
|
84 |
|
85 |
with st.container():
|
86 |
if st.button("Check Coherence"):
|
@@ -89,14 +96,14 @@ def app():
|
|
89 |
|
90 |
if 'filepath' in st.session_state:
|
91 |
allDocuments = runSemanticPreprocessingPipeline(
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
genre = st.radio( "Select Category",('Climate Change Adaptation', 'Climate Change Mitigation'))
|
100 |
if genre == 'Climate Change Adaptation':
|
101 |
sent_dict = sent_cca
|
102 |
else:
|
@@ -105,14 +112,25 @@ def app():
|
|
105 |
for key,sent in sent_dict.items():
|
106 |
sent_labels.append(sent)
|
107 |
if len(allDocuments['documents']) > 100:
|
108 |
-
|
109 |
else:
|
110 |
warning_msg = ""
|
111 |
-
logging.info("starting Coherence analysis,
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
else:
|
117 |
st.info("🤔 No document found, please try to upload it at the sidebar!")
|
118 |
logging.warning("Terminated as no document provided")
|
|
|
7 |
import logging
|
8 |
from utils.ndc_explorer import countrySpecificCCA, countrySpecificCCM
|
9 |
from utils.checkconfig import getconfig
|
10 |
+
from utils.semantic_search import runSemanticPreprocessingPipeline,process_semantic_output
|
11 |
+
from utils.semantic_search import semanticSearchPipeline, runSemanticPipeline
|
12 |
+
from st_aggrid import AgGrid
|
13 |
+
from st_aggrid.shared import ColumnsAutoSizeMode
|
14 |
|
15 |
# Reading data and Declaring necessary variables
|
16 |
with open('docStore/ndcs/countryList.txt') as dfile:
|
|
|
37 |
embedding_model_format = config.get('coherence','RETRIEVER_FORMAT')
|
38 |
embedding_layer = int(config.get('coherence','RETRIEVER_EMB_LAYER'))
|
39 |
embedding_dim = int(config.get('coherence','EMBEDDING_DIM'))
|
40 |
+
max_seq_len = int(config.get('coherence','MAX_SEQ_LENGTH'))
|
41 |
retriever_top_k = int(config.get('coherence','RETRIEVER_TOP_K'))
|
42 |
+
|
|
|
43 |
|
44 |
|
45 |
def app():
|
|
|
59 |
coherence between a given policy document and a country’s (Intended)\
|
60 |
Nationally Determined Contribution (INDCs/NDCs) using open-source \
|
61 |
data from the German Institute of Development and Sustainability’s \
|
62 |
+
(IDOS) [NDC Explorer]
|
63 |
+
(https://klimalog.idos-research.de/ndc/#NDCExplorer/worldMap?NewAndUpdatedNDC??income???catIncome).\
|
64 |
""")
|
65 |
st.write("")
|
66 |
st.write(""" User can select a country context via the drop-down menu \
|
|
|
84 |
option = st.selectbox('Select Country', (countrynames))
|
85 |
countryCode = countryList[option]
|
86 |
st.markdown("---")
|
87 |
+
|
88 |
+
genre = st.radio( "Select Category",('Climate Change Adaptation',
|
89 |
+
'Climate Change Mitigation'))
|
90 |
+
st.markdown("---")
|
91 |
|
92 |
with st.container():
|
93 |
if st.button("Check Coherence"):
|
|
|
96 |
|
97 |
if 'filepath' in st.session_state:
|
98 |
allDocuments = runSemanticPreprocessingPipeline(
|
99 |
+
file_path= st.session_state['filepath'],
|
100 |
+
file_name = st.session_state['filename'],
|
101 |
+
split_by=split_by,
|
102 |
+
split_length= split_length,
|
103 |
+
split_overlap=split_overlap,
|
104 |
+
removePunc= remove_punc,
|
105 |
+
split_respect_sentence_boundary=split_respect_sentence_boundary)
|
106 |
+
# genre = st.radio( "Select Category",('Climate Change Adaptation', 'Climate Change Mitigation'))
|
107 |
if genre == 'Climate Change Adaptation':
|
108 |
sent_dict = sent_cca
|
109 |
else:
|
|
|
112 |
for key,sent in sent_dict.items():
|
113 |
sent_labels.append(sent)
|
114 |
if len(allDocuments['documents']) > 100:
|
115 |
+
warning_msg = ": This might take sometime, please sit back and relax."
|
116 |
else:
|
117 |
warning_msg = ""
|
118 |
+
logging.info("starting Coherence analysis, \
|
119 |
+
country selected {}".format(option))
|
120 |
+
with st.spinner("Performing Coherence Analysis for {} \
|
121 |
+
under {} category{}".format(option,genre,warning_msg)):
|
122 |
+
semanticsearch_pipeline, doc_store = semanticSearchPipeline(documents = allDocuments['documents'],
|
123 |
+
embedding_model= embedding_model,
|
124 |
+
embedding_layer= embedding_layer,
|
125 |
+
embedding_model_format= embedding_model_format,
|
126 |
+
retriever_top_k= retriever_top_k,
|
127 |
+
embedding_dim=embedding_dim,
|
128 |
+
max_seq_len=max_seq_len, useQueryCheck=False)
|
129 |
+
raw_output = runSemanticPipeline(pipeline=semanticsearch_pipeline,queries=sent_labels)
|
130 |
+
results_df = process_semantic_output(raw_output)
|
131 |
+
AgGrid(results_df, reload_data = False, update_mode="value_changed",
|
132 |
+
columns_auto_size_mode = ColumnsAutoSizeMode.FIT_CONTENTS)
|
133 |
+
|
134 |
else:
|
135 |
st.info("🤔 No document found, please try to upload it at the sidebar!")
|
136 |
logging.warning("Terminated as no document provided")
|
paramconfig.cfg
CHANGED
@@ -36,13 +36,11 @@ TOP_N = 20
|
|
36 |
|
37 |
[coherence]
|
38 |
RETRIEVER_TOP_K = 10
|
39 |
-
MAX_SEQ_LENGTH =
|
40 |
RETRIEVER = all-MiniLM-L6-v2
|
41 |
RETRIEVER_FORMAT = sentence_transformers
|
42 |
RETRIEVER_EMB_LAYER = -1
|
43 |
EMBEDDING_DIM = 384
|
44 |
-
READER = deepset/tinyroberta-squad2
|
45 |
-
READER_TOP_K = 10
|
46 |
THRESHOLD = 0.55
|
47 |
SPLIT_BY = sentence
|
48 |
SPLIT_LENGTH = 3
|
|
|
36 |
|
37 |
[coherence]
|
38 |
RETRIEVER_TOP_K = 10
|
39 |
+
MAX_SEQ_LENGTH = 256
|
40 |
RETRIEVER = all-MiniLM-L6-v2
|
41 |
RETRIEVER_FORMAT = sentence_transformers
|
42 |
RETRIEVER_EMB_LAYER = -1
|
43 |
EMBEDDING_DIM = 384
|
|
|
|
|
44 |
THRESHOLD = 0.55
|
45 |
SPLIT_BY = sentence
|
46 |
SPLIT_LENGTH = 3
|