Spaces:
GIZ
/
Running on CPU Upgrade

prashant commited on
Commit
2370cfa
1 Parent(s): eaa8795

adding reader top)k per candidate

Browse files
appStore/keyword_search.py CHANGED
@@ -26,6 +26,7 @@ max_seq_len = int(config.get('semantic_search','MAX_SEQ_LENGTH'))
26
  retriever_top_k = int(config.get('semantic_search','RETRIEVER_TOP_K'))
27
  reader_model = config.get('semantic_search','READER')
28
  reader_top_k = int(config.get('semantic_search','RETRIEVER_TOP_K'))
 
29
  lexical_split_by= config.get('lexical_search','SPLIT_BY')
30
  lexical_split_length=int(config.get('lexical_search','SPLIT_LENGTH'))
31
  lexical_split_overlap = int(config.get('lexical_search','SPLIT_OVERLAP'))
@@ -149,7 +150,8 @@ def app():
149
  embedding_model_format=embedding_model_format,
150
  reader_model=reader_model,reader_top_k=reader_top_k,
151
  retriever_top_k=retriever_top_k, embedding_dim=embedding_dim,
152
- max_seq_len=max_seq_len)
 
153
 
154
  else:
155
  st.info("🤔 No document found, please try to upload it at the sidebar!")
 
26
  retriever_top_k = int(config.get('semantic_search','RETRIEVER_TOP_K'))
27
  reader_model = config.get('semantic_search','READER')
28
  reader_top_k = int(config.get('semantic_search','RETRIEVER_TOP_K'))
29
+ top_k_per_candidate = int(config.get('semantic_search','READER_TOP_K_PER_CANDIDATE'))
30
  lexical_split_by= config.get('lexical_search','SPLIT_BY')
31
  lexical_split_length=int(config.get('lexical_search','SPLIT_LENGTH'))
32
  lexical_split_overlap = int(config.get('lexical_search','SPLIT_OVERLAP'))
 
150
  embedding_model_format=embedding_model_format,
151
  reader_model=reader_model,reader_top_k=reader_top_k,
152
  retriever_top_k=retriever_top_k, embedding_dim=embedding_dim,
153
+ max_seq_len=max_seq_len,
154
+ top_k_per_candidate = top_k_per_candidate)
155
 
156
  else:
157
  st.info("🤔 No document found, please try to upload it at the sidebar!")
paramconfig.cfg CHANGED
@@ -14,6 +14,7 @@ EMBEDDING_DIM = 768
14
  RETRIEVER_EMB_LAYER = -1
15
  READER = deepset/tinyroberta-squad2
16
  READER_TOP_K = 10
 
17
  SPLIT_BY = word
18
  SPLIT_LENGTH = 120
19
  SPLIT_OVERLAP = 10
 
14
  RETRIEVER_EMB_LAYER = -1
15
  READER = deepset/tinyroberta-squad2
16
  READER_TOP_K = 10
17
+ READER_TOP_K_PER_CANDIDATE = 1
18
  SPLIT_BY = word
19
  SPLIT_LENGTH = 120
20
  SPLIT_OVERLAP = 10
utils/semantic_search.py CHANGED
@@ -245,7 +245,8 @@ def semanticSearchPipeline(documents:List[Document], embedding_model:Text = Non
245
  embedding_model_format:Text = None,embedding_layer:int = None,
246
  embedding_dim:int = 768,retriever_top_k:int = 10,
247
  reader_model:str = None, reader_top_k:int = 10,
248
- max_seq_len:int =512,useQueryCheck = True, ):
 
249
  """
250
  creates the semantic search pipeline and document Store object from the
251
  list of haystack documents. The top_k for the Reader and Retirever are kept
@@ -290,7 +291,8 @@ def semanticSearchPipeline(documents:List[Document], embedding_model:Text = Non
290
  max_seq_len:everymodel has max seq len it can handle, check in model card.
291
  Needed to hanlde the edge cases
292
  useQueryCheck: Whether to use the querycheck which modifies the query or not.
293
-
 
294
 
295
  Return
296
  ---------
@@ -318,7 +320,8 @@ def semanticSearchPipeline(documents:List[Document], embedding_model:Text = Non
318
  if useQueryCheck and reader_model:
319
  querycheck = QueryCheck()
320
  reader = FARMReader(model_name_or_path=reader_model,
321
- top_k = reader_top_k, use_gpu=True)
 
322
  semantic_search_pipeline.add_node(component = querycheck,
323
  name = "QueryCheck",inputs = ["Query"])
324
  semantic_search_pipeline.add_node(component = retriever,
@@ -328,7 +331,8 @@ def semanticSearchPipeline(documents:List[Document], embedding_model:Text = Non
328
 
329
  elif reader_model :
330
  reader = FARMReader(model_name_or_path=reader_model,
331
- top_k = reader_top_k, use_gpu=True)
 
332
  semantic_search_pipeline.add_node(component = retriever,
333
  name = "EmbeddingRetriever",inputs = ["Query"])
334
  semantic_search_pipeline.add_node(component = reader,
 
245
  embedding_model_format:Text = None,embedding_layer:int = None,
246
  embedding_dim:int = 768,retriever_top_k:int = 10,
247
  reader_model:str = None, reader_top_k:int = 10,
248
+ max_seq_len:int =512,useQueryCheck = True,
249
+ top_k_per_candidate:int = 1):
250
  """
251
  creates the semantic search pipeline and document Store object from the
252
  list of haystack documents. The top_k for the Reader and Retirever are kept
 
291
  max_seq_len:everymodel has max seq len it can handle, check in model card.
292
  Needed to hanlde the edge cases
293
  useQueryCheck: Whether to use the querycheck which modifies the query or not.
294
+ top_k_per_candidate:How many answers to extract for each candidate doc
295
+ that is coming from the retriever
296
 
297
  Return
298
  ---------
 
320
  if useQueryCheck and reader_model:
321
  querycheck = QueryCheck()
322
  reader = FARMReader(model_name_or_path=reader_model,
323
+ top_k = reader_top_k, use_gpu=True,
324
+ top_k_per_candidate = top_k_per_candidate)
325
  semantic_search_pipeline.add_node(component = querycheck,
326
  name = "QueryCheck",inputs = ["Query"])
327
  semantic_search_pipeline.add_node(component = retriever,
 
331
 
332
  elif reader_model :
333
  reader = FARMReader(model_name_or_path=reader_model,
334
+ top_k = reader_top_k, use_gpu=True,
335
+ top_k_per_candidate = top_k_per_candidate)
336
  semantic_search_pipeline.add_node(component = retriever,
337
  name = "EmbeddingRetriever",inputs = ["Query"])
338
  semantic_search_pipeline.add_node(component = reader,