prashant
commited on
Commit
•
2bccbcb
1
Parent(s):
1d3978a
error fix empty result
Browse files- utils/search.py +7 -4
utils/search.py
CHANGED
@@ -139,8 +139,9 @@ def lexical_search(query:Text,documents:List[Document]):
|
|
139 |
top_k= int(config.get('lexical_search','TOP_K')))
|
140 |
query_tokens = tokenize_lexical_query(query)
|
141 |
for count, result in enumerate(results):
|
142 |
-
if result.content != "":
|
143 |
-
|
|
|
144 |
st.write("Result {}".format(count))
|
145 |
searchAnnotator(matches, doc)
|
146 |
|
@@ -192,12 +193,14 @@ def runSemanticPreprocessingPipeline()->List[Document]:
|
|
192 |
sdg_processing_pipeline = processingpipeline()
|
193 |
split_by = config.get('lexical_search','SPLIT_BY')
|
194 |
split_length = int(config.get('lexical_search','SPLIT_LENGTH'))
|
|
|
195 |
|
196 |
output_lexical_pre = sdg_processing_pipeline.run(file_paths = file_path,
|
197 |
params= {"FileConverter": {"file_path": file_path, \
|
198 |
"file_name": file_name},
|
199 |
"UdfPreProcessor": {"removePunc": False, \
|
200 |
"split_by": split_by, \
|
201 |
-
"split_length":split_length
|
|
|
202 |
|
203 |
-
return output_lexical_pre['documents']
|
|
|
139 |
top_k= int(config.get('lexical_search','TOP_K')))
|
140 |
query_tokens = tokenize_lexical_query(query)
|
141 |
for count, result in enumerate(results):
|
142 |
+
# if result.content != "":
|
143 |
+
matches, doc = runSpacyMatcher(query_tokens,result.content)
|
144 |
+
if len(matches) != 0:
|
145 |
st.write("Result {}".format(count))
|
146 |
searchAnnotator(matches, doc)
|
147 |
|
|
|
193 |
sdg_processing_pipeline = processingpipeline()
|
194 |
split_by = config.get('lexical_search','SPLIT_BY')
|
195 |
split_length = int(config.get('lexical_search','SPLIT_LENGTH'))
|
196 |
+
split_overlap = int(config.get('lexical_search','SPLIT_OVERLAP'))
|
197 |
|
198 |
output_lexical_pre = sdg_processing_pipeline.run(file_paths = file_path,
|
199 |
params= {"FileConverter": {"file_path": file_path, \
|
200 |
"file_name": file_name},
|
201 |
"UdfPreProcessor": {"removePunc": False, \
|
202 |
"split_by": split_by, \
|
203 |
+
"split_length":split_length,\
|
204 |
+
"split_overlap": split_overlap}})
|
205 |
|
206 |
+
return output_lexical_pre['documents']
|