Spaces:
GIZ
/
Running on CPU Upgrade

prashant commited on
Commit
eaa8795
1 Parent(s): dd2ea3c

updating keywordslist and about app

Browse files
appStore/keyword_search.py CHANGED
@@ -53,7 +53,8 @@ def app():
53
  st.write("")
54
  st.write(""" The application allows its user to perform a keyword search\
55
  based on two options: a lexical ([TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf))\
56
- search and semantic bi-encoder search. The difference between both \
 
57
  approaches is quite straightforward; while the lexical search only \
58
  displays paragraphs in the document with exact matching results, \
59
  the semantic search shows paragraphs with meaningful connections \
@@ -62,9 +63,12 @@ def app():
62
  methods employ a probabilistic retrieval framework in its identification\
63
  of relevant paragraphs. By defualt the search is performed using \
64
  'Semantic Search', and to find 'Exact/Lexical Matches' please tick the \
65
- checkbox provided, which will by-pass semantic search. Furthermore,\
66
  the application allows the user to search for pre-defined keywords \
67
  from different thematic buckets present in sidebar.""")
 
 
 
68
 
69
 
70
  with st.sidebar:
@@ -82,17 +86,22 @@ def app():
82
  st.markdown("---")
83
 
84
  with st.container():
85
- # if keywordList is not None:
 
 
 
 
 
 
86
  # queryList = st.text_input("You selected the {} category we \
87
- # will look for these keywords in document".format(genre),
88
  # value="{}".format(keywordList))
89
- queryList = st.text_input("Please enter here your question and we \
90
- will look for an answer in the document\
91
- OR enter the keyword you are looking \
92
- for and we will we will look for similar\
93
- context in the document. If dont have anything\
94
- try the presets of keywords from sidebar. ",
95
- value = "{}".format(keywordList))
96
  searchtype = st.checkbox("Show only Exact Matches")
97
  if st.button("Find them"):
98
 
 
53
  st.write("")
54
  st.write(""" The application allows its user to perform a keyword search\
55
  based on two options: a lexical ([TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf))\
56
+ search and semantic [bi-encoder](https://www.sbert.net/examples/applications/retrieve_rerank/README.html)\
57
+ search. The difference between both \
58
  approaches is quite straightforward; while the lexical search only \
59
  displays paragraphs in the document with exact matching results, \
60
  the semantic search shows paragraphs with meaningful connections \
 
63
  methods employ a probabilistic retrieval framework in its identification\
64
  of relevant paragraphs. By defualt the search is performed using \
65
  'Semantic Search', and to find 'Exact/Lexical Matches' please tick the \
66
+ checkbox provided which will by-pass semantic search. Furthermore,\
67
  the application allows the user to search for pre-defined keywords \
68
  from different thematic buckets present in sidebar.""")
69
+ st.write("")
70
+ st.write(""" The Exact Matches gives back top {} findings, and Semantic
71
+ search provides with top {} answers.""".format(lexical_top_k, retriever_top_k))
72
 
73
 
74
  with st.sidebar:
 
86
  st.markdown("---")
87
 
88
  with st.container():
89
+ type_hinting = "Please enter here your question and we \
90
+ will look for an answer in the document\
91
+ OR enter the keyword you are looking \
92
+ for and we will we will look for similar\
93
+ context in the document. If dont have anything,\
94
+ try the presets of keywords from sidebar. "
95
+ if keywordList is not None:
96
  # queryList = st.text_input("You selected the {} category we \
97
+ # will look for these keywords in document".format(genre)
98
  # value="{}".format(keywordList))
99
+ queryList = st.text_input(type_hinting,
100
+ value = "{}".format(keywordList))
101
+ else:
102
+ queryList = st.text_input(type_hinting,
103
+ placeholder="Enter keyword/query here")
104
+
 
105
  searchtype = st.checkbox("Show only Exact Matches")
106
  if st.button("Find them"):
107
 
docStore/sample/keywordexample.json CHANGED
@@ -3,5 +3,6 @@
3
  "Food":"Food security,Nutrition,Diets,Food loss",
4
  "Implementation":"Implementation,transformation,reform,integration,strategy,policy",
5
  "Nature":"Nature,Nature-based solutions,Biodiversity,Degradation",
6
- "Social":"Indigenous,Local community(ies),Gender,Rural livelihoods,Minority"
 
7
  }
 
3
  "Food":"Food security,Nutrition,Diets,Food loss",
4
  "Implementation":"Implementation,transformation,reform,integration,strategy,policy",
5
  "Nature":"Nature,Nature-based solutions,Biodiversity,Degradation",
6
+ "Social":"Indigenous,Local community(ies),Rural livelihoods,Minority",
7
+ "Gender":"gender, women empowernment, women economic power, gender bias"
8
  }
paramconfig.cfg CHANGED
@@ -14,10 +14,9 @@ EMBEDDING_DIM = 768
14
  RETRIEVER_EMB_LAYER = -1
15
  READER = deepset/tinyroberta-squad2
16
  READER_TOP_K = 10
17
- THRESHOLD = 0.1
18
- SPLIT_BY = sentence
19
- SPLIT_LENGTH = 4
20
- SPLIT_OVERLAP = 1
21
  RESPECT_SENTENCE_BOUNDARY = 1
22
  REMOVE_PUNC = 0
23
 
@@ -31,8 +30,6 @@ SPLIT_OVERLAP = 10
31
  RESPECT_SENTENCE_BOUNDARY = 1
32
  TOP_KEY = 15
33
 
34
- [tfidf]
35
- TOP_N = 20
36
 
37
  [coherence]
38
  RETRIEVER_TOP_K = 10
 
14
  RETRIEVER_EMB_LAYER = -1
15
  READER = deepset/tinyroberta-squad2
16
  READER_TOP_K = 10
17
+ SPLIT_BY = word
18
+ SPLIT_LENGTH = 120
19
+ SPLIT_OVERLAP = 10
 
20
  RESPECT_SENTENCE_BOUNDARY = 1
21
  REMOVE_PUNC = 0
22
 
 
30
  RESPECT_SENTENCE_BOUNDARY = 1
31
  TOP_KEY = 15
32
 
 
 
33
 
34
  [coherence]
35
  RETRIEVER_TOP_K = 10
utils/semantic_search.py CHANGED
@@ -450,7 +450,7 @@ def process_semantic_output(results):
450
  'reader_score','retriever_score','id',]. Distingushes if its single query or
451
  multi queries by reading the pipeline output dictionary keys.
452
  Uses the process_query_output to get the dataframe for each query and create
453
- one concataneted dataframe. In case f Docs2Answers as final node, deletes
454
  the answers part. See documentations of process_query_output.
455
 
456
  Params
 
450
  'reader_score','retriever_score','id',]. Distingushes if its single query or
451
  multi queries by reading the pipeline output dictionary keys.
452
  Uses the process_query_output to get the dataframe for each query and create
453
+ one concataneted dataframe. In case of Docs2Answers as final node, deletes
454
  the answers part. See documentations of process_query_output.
455
 
456
  Params