Spaces:

GIZ
/

SDSN-demo

Running on CPU Upgrade

App Files Files Community

prashant commited on Nov 22, 2022

Commit

eaa8795

•

1 Parent(s): dd2ea3c

updating keywordslist and about app

Browse files

Files changed (4) hide show

appStore/keyword_search.py +20 -11
docStore/sample/keywordexample.json +2 -1
paramconfig.cfg +3 -6
utils/semantic_search.py +1 -1

appStore/keyword_search.py CHANGED Viewed

@@ -53,7 +53,8 @@ def app():
         st.write("")
         st.write(""" The application allows its user to perform a keyword search\
              based on two options: a lexical ([TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf))\
-             search and semantic bi-encoder search. The difference between both \
             approaches is quite straightforward; while the lexical search only \
             displays paragraphs in the document with exact matching results, \
             the semantic search shows paragraphs with meaningful connections \
@@ -62,9 +63,12 @@ def app():
             methods employ a probabilistic retrieval framework in its identification\
             of relevant paragraphs. By defualt the search is performed using \
             'Semantic Search', and to find 'Exact/Lexical Matches' please tick the \
-            checkbox provided, which will by-pass semantic search. Furthermore,\
             the application allows the user to search for pre-defined keywords \
             from different thematic buckets present in sidebar.""")
     with st.sidebar:
@@ -82,17 +86,22 @@ def app():
         st.markdown("---")
     with st.container():
-        # if keywordList is not None:
         #     queryList = st.text_input("You selected the {} category we \
-        #                 will look for these keywords in document".format(genre),
         #                             value="{}".format(keywordList))
-        queryList = st.text_input("Please enter here your question and we \
-                                    will look for an answer in the document\
-                                    OR enter the keyword you are looking \
-                                    for and we will we will look for similar\
-                                    context in the document. If dont have anything\
-                                    try the presets of keywords from sidebar. ",
-                                    value = "{}".format(keywordList))
         searchtype = st.checkbox("Show only Exact Matches")
         if st.button("Find them"):

         st.write("")
         st.write(""" The application allows its user to perform a keyword search\
              based on two options: a lexical ([TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf))\
+             search and semantic [bi-encoder](https://www.sbert.net/examples/applications/retrieve_rerank/README.html)\
+            search. The difference between both \
             approaches is quite straightforward; while the lexical search only \
             displays paragraphs in the document with exact matching results, \
             the semantic search shows paragraphs with meaningful connections \
             methods employ a probabilistic retrieval framework in its identification\
             of relevant paragraphs. By defualt the search is performed using \
             'Semantic Search', and to find 'Exact/Lexical Matches' please tick the \
+            checkbox provided which will by-pass semantic search. Furthermore,\
             the application allows the user to search for pre-defined keywords \
             from different thematic buckets present in sidebar.""")
+        st.write("")
+        st.write(""" The Exact Matches gives back top {} findings, and Semantic
+        search provides with top {} answers.""".format(lexical_top_k, retriever_top_k))
     with st.sidebar:
         st.markdown("---")
     with st.container():
+        type_hinting = "Please enter here your question and we \
+                        will look for an answer in the document\
+                        OR enter the keyword you are looking \
+                        for and we will we will look for similar\
+                        context in the document. If dont have anything,\
+                        try the presets of keywords from sidebar. "
+        if keywordList is not None:
         #     queryList = st.text_input("You selected the {} category we \
+        #                 will look for these keywords in document".format(genre)
         #                             value="{}".format(keywordList))
+            queryList = st.text_input(type_hinting,
+                                        value = "{}".format(keywordList))
+        else:
+             queryList = st.text_input(type_hinting,
+                                       placeholder="Enter keyword/query here")
         searchtype = st.checkbox("Show only Exact Matches")
         if st.button("Find them"):

docStore/sample/keywordexample.json CHANGED Viewed

@@ -3,5 +3,6 @@
 "Food":"Food security,Nutrition,Diets,Food loss",
 "Implementation":"Implementation,transformation,reform,integration,strategy,policy",
 "Nature":"Nature,Nature-based solutions,Biodiversity,Degradation",
-"Social":"Indigenous,Local community(ies),Gender,Rural livelihoods,Minority"
 }

 "Food":"Food security,Nutrition,Diets,Food loss",
 "Implementation":"Implementation,transformation,reform,integration,strategy,policy",
 "Nature":"Nature,Nature-based solutions,Biodiversity,Degradation",
+"Social":"Indigenous,Local community(ies),Rural livelihoods,Minority",
+"Gender":"gender, women empowernment, women economic power, gender bias"
 }

paramconfig.cfg CHANGED Viewed

@@ -14,10 +14,9 @@ EMBEDDING_DIM = 768
 RETRIEVER_EMB_LAYER = -1
 READER = deepset/tinyroberta-squad2
 READER_TOP_K = 10
-THRESHOLD = 0.1
-SPLIT_BY = sentence
-SPLIT_LENGTH = 4
-SPLIT_OVERLAP = 1
 RESPECT_SENTENCE_BOUNDARY = 1
 REMOVE_PUNC = 0
@@ -31,8 +30,6 @@ SPLIT_OVERLAP = 10
 RESPECT_SENTENCE_BOUNDARY = 1
 TOP_KEY = 15
-[tfidf]
-TOP_N = 20
 [coherence]
 RETRIEVER_TOP_K = 10

 RETRIEVER_EMB_LAYER = -1
 READER = deepset/tinyroberta-squad2
 READER_TOP_K = 10
+SPLIT_BY = word
+SPLIT_LENGTH = 120
+SPLIT_OVERLAP = 10
 RESPECT_SENTENCE_BOUNDARY = 1
 REMOVE_PUNC = 0
 RESPECT_SENTENCE_BOUNDARY = 1
 TOP_KEY = 15
 [coherence]
 RETRIEVER_TOP_K = 10

utils/semantic_search.py CHANGED Viewed

@@ -450,7 +450,7 @@ def process_semantic_output(results):
     'reader_score','retriever_score','id',]. Distingushes if its single query or
     multi queries by reading the pipeline output dictionary keys.
     Uses the process_query_output to get the dataframe for each query and create
-    one concataneted dataframe. In case f Docs2Answers as final node, deletes
     the answers part. See documentations of process_query_output.
     Params

     'reader_score','retriever_score','id',]. Distingushes if its single query or
     multi queries by reading the pipeline output dictionary keys.
     Uses the process_query_output to get the dataframe for each query and create
+    one concataneted dataframe. In case of Docs2Answers as final node, deletes
     the answers part. See documentations of process_query_output.
     Params