Spaces:
Paused
Paused
error handling
Browse files
app.py
CHANGED
@@ -71,6 +71,10 @@ def process_pii(text):
|
|
71 |
return text
|
72 |
|
73 |
|
|
|
|
|
|
|
|
|
74 |
def format_result(result, highlight_terms, exact_search, datasets_filter=None):
|
75 |
text, url, docid = result
|
76 |
if datasets_filter is not None:
|
@@ -114,17 +118,17 @@ def format_result(result, highlight_terms, exact_search, datasets_filter=None):
|
|
114 |
else ""
|
115 |
)
|
116 |
docid_html = get_docid_html(docid)
|
117 |
-
language =
|
118 |
result_html = """{}
|
119 |
-
<span style='font-size:14px; font-family: Arial; color
|
|
|
120 |
<a href="https://forms.gle/AdBLLwRApqcLkHYA8" target="_blank">
|
121 |
<button style="color:#ffcdf8; ">π΄ββ οΈ Flag result π΄ββ οΈ</button>
|
122 |
</a><br>
|
123 |
-
<!-- <span style='font-size:12px; font-family: Arial; color:MediumAquaMarine'>Language: {}</span><br> -->
|
124 |
<span style='font-family: Arial;'>{}</span><br>
|
125 |
<br>
|
126 |
""".format(
|
127 |
-
url_html,
|
128 |
)
|
129 |
return "<p>" + result_html + "</p>"
|
130 |
|
@@ -144,6 +148,7 @@ def format_result_page(
|
|
144 |
|
145 |
result_page_html = ""
|
146 |
for lang, results_for_lang in results.items():
|
|
|
147 |
if len(results_for_lang) == 0:
|
148 |
if exact_search:
|
149 |
result_page_html += """<div style='font-family: Arial; color:Silver; text-align: left; line-height: 3em'>
|
@@ -177,17 +182,16 @@ def format_result_page(
|
|
177 |
Total number of matches: <b style='color:MediumAquaMarine'>{}</b></div>""".format(
|
178 |
num_results
|
179 |
)
|
180 |
-
|
181 |
return header_html + result_page_html
|
182 |
|
183 |
|
184 |
def extract_results_from_payload(query, language, payload, exact_search):
|
185 |
results = payload["results"]
|
186 |
-
|
187 |
processed_results = dict()
|
188 |
datasets = set()
|
189 |
highlight_terms = None
|
190 |
num_results = None
|
|
|
191 |
if exact_search:
|
192 |
highlight_terms = query
|
193 |
num_results = payload["num_results"]
|
@@ -214,7 +218,14 @@ def extract_results_from_payload(query, language, payload, exact_search):
|
|
214 |
return processed_results, highlight_terms, num_results, list(datasets)
|
215 |
|
216 |
|
217 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
if error_type == "unsupported_lang":
|
219 |
detected_lang = payload["err"]["meta"]["detected_lang"]
|
220 |
return f"""
|
@@ -333,12 +344,12 @@ if __name__ == "__main__":
|
|
333 |
else:
|
334 |
query = " ".join(query.split())
|
335 |
if query == "" or query is None:
|
336 |
-
return
|
337 |
|
338 |
payload = request_payload(query, lang, exact_search, k, received_results)
|
339 |
err = extract_error_from_payload(payload)
|
340 |
if err is not None:
|
341 |
-
return process_error(err)
|
342 |
|
343 |
(
|
344 |
processed_results,
|
@@ -351,7 +362,7 @@ if __name__ == "__main__":
|
|
351 |
payload,
|
352 |
exact_search,
|
353 |
)
|
354 |
-
|
355 |
lang, processed_results, highlight_terms, num_results, exact_search
|
356 |
)
|
357 |
return (
|
@@ -359,20 +370,23 @@ if __name__ == "__main__":
|
|
359 |
highlight_terms,
|
360 |
num_results,
|
361 |
exact_search,
|
362 |
-
|
363 |
ds,
|
364 |
)
|
365 |
|
366 |
def submit(query, lang, k, dropdown_input):
|
367 |
print("submitting", query, lang, k)
|
|
|
|
|
|
|
368 |
(
|
369 |
processed_results,
|
370 |
highlight_terms,
|
371 |
num_results,
|
372 |
exact_search,
|
373 |
-
|
374 |
datasets,
|
375 |
-
) =
|
376 |
has_more_results = exact_search and (num_results > k)
|
377 |
return [
|
378 |
processed_results,
|
@@ -383,7 +397,7 @@ if __name__ == "__main__":
|
|
383 |
gr.Dropdown.update(choices=datasets, value=datasets),
|
384 |
gr.update(visible=has_more_results),
|
385 |
len(next(iter(processed_results.values()))),
|
386 |
-
|
387 |
]
|
388 |
|
389 |
def next_page(
|
@@ -394,14 +408,17 @@ if __name__ == "__main__":
|
|
394 |
received_results,
|
395 |
processed_results,
|
396 |
):
|
|
|
|
|
|
|
397 |
(
|
398 |
processed_results,
|
399 |
highlight_terms,
|
400 |
num_results,
|
401 |
exact_search,
|
402 |
-
|
403 |
datasets,
|
404 |
-
) =
|
405 |
current_results = sum(
|
406 |
len(results) for results in processed_results.values()
|
407 |
)
|
@@ -420,7 +437,7 @@ if __name__ == "__main__":
|
|
420 |
gr.Dropdown.update(choices=datasets, value=datasets),
|
421 |
gr.update(visible=current_results >= k and has_more_results),
|
422 |
received_results + current_results,
|
423 |
-
|
424 |
]
|
425 |
|
426 |
def filter_datasets(
|
|
|
71 |
return text
|
72 |
|
73 |
|
74 |
+
def extract_lang_from_docid(docid):
|
75 |
+
return docid.split("_")[1]
|
76 |
+
|
77 |
+
|
78 |
def format_result(result, highlight_terms, exact_search, datasets_filter=None):
|
79 |
text, url, docid = result
|
80 |
if datasets_filter is not None:
|
|
|
118 |
else ""
|
119 |
)
|
120 |
docid_html = get_docid_html(docid)
|
121 |
+
language = extract_lang_from_docid(docid)
|
122 |
result_html = """{}
|
123 |
+
<span style='font-size:14px; font-family: Arial; color:MediumAquaMarine'>Language: {} | </span>
|
124 |
+
<span style='font-size:14px; font-family: Arial; color:#7978FF; text-align: left;'>Document ID: {} | </span>
|
125 |
<a href="https://forms.gle/AdBLLwRApqcLkHYA8" target="_blank">
|
126 |
<button style="color:#ffcdf8; ">π΄ββ οΈ Flag result π΄ββ οΈ</button>
|
127 |
</a><br>
|
|
|
128 |
<span style='font-family: Arial;'>{}</span><br>
|
129 |
<br>
|
130 |
""".format(
|
131 |
+
url_html, language, docid_html, tokens_html
|
132 |
)
|
133 |
return "<p>" + result_html + "</p>"
|
134 |
|
|
|
148 |
|
149 |
result_page_html = ""
|
150 |
for lang, results_for_lang in results.items():
|
151 |
+
print("Processing language", lang)
|
152 |
if len(results_for_lang) == 0:
|
153 |
if exact_search:
|
154 |
result_page_html += """<div style='font-family: Arial; color:Silver; text-align: left; line-height: 3em'>
|
|
|
182 |
Total number of matches: <b style='color:MediumAquaMarine'>{}</b></div>""".format(
|
183 |
num_results
|
184 |
)
|
|
|
185 |
return header_html + result_page_html
|
186 |
|
187 |
|
188 |
def extract_results_from_payload(query, language, payload, exact_search):
|
189 |
results = payload["results"]
|
|
|
190 |
processed_results = dict()
|
191 |
datasets = set()
|
192 |
highlight_terms = None
|
193 |
num_results = None
|
194 |
+
|
195 |
if exact_search:
|
196 |
highlight_terms = query
|
197 |
num_results = payload["num_results"]
|
|
|
218 |
return processed_results, highlight_terms, num_results, list(datasets)
|
219 |
|
220 |
|
221 |
+
def no_query_error_message():
|
222 |
+
return f"""
|
223 |
+
<p style='font-size:18px; font-family: Arial; color:MediumVioletRed; text-align: center;'>
|
224 |
+
Please provide a non-empty query.
|
225 |
+
</p><br><hr><br>"""
|
226 |
+
|
227 |
+
|
228 |
+
def process_error(error_type, payload):
|
229 |
if error_type == "unsupported_lang":
|
230 |
detected_lang = payload["err"]["meta"]["detected_lang"]
|
231 |
return f"""
|
|
|
344 |
else:
|
345 |
query = " ".join(query.split())
|
346 |
if query == "" or query is None:
|
347 |
+
return no_query_error_message()
|
348 |
|
349 |
payload = request_payload(query, lang, exact_search, k, received_results)
|
350 |
err = extract_error_from_payload(payload)
|
351 |
if err is not None:
|
352 |
+
return process_error(err, payload)
|
353 |
|
354 |
(
|
355 |
processed_results,
|
|
|
362 |
payload,
|
363 |
exact_search,
|
364 |
)
|
365 |
+
result_page = format_result_page(
|
366 |
lang, processed_results, highlight_terms, num_results, exact_search
|
367 |
)
|
368 |
return (
|
|
|
370 |
highlight_terms,
|
371 |
num_results,
|
372 |
exact_search,
|
373 |
+
result_page,
|
374 |
ds,
|
375 |
)
|
376 |
|
377 |
def submit(query, lang, k, dropdown_input):
|
378 |
print("submitting", query, lang, k)
|
379 |
+
response = run_query(query, lang, k, dropdown_input, 0)
|
380 |
+
if not isinstance(response, tuple):
|
381 |
+
return {result_page_html: response}
|
382 |
(
|
383 |
processed_results,
|
384 |
highlight_terms,
|
385 |
num_results,
|
386 |
exact_search,
|
387 |
+
result_page,
|
388 |
datasets,
|
389 |
+
) = response
|
390 |
has_more_results = exact_search and (num_results > k)
|
391 |
return [
|
392 |
processed_results,
|
|
|
397 |
gr.Dropdown.update(choices=datasets, value=datasets),
|
398 |
gr.update(visible=has_more_results),
|
399 |
len(next(iter(processed_results.values()))),
|
400 |
+
result_page,
|
401 |
]
|
402 |
|
403 |
def next_page(
|
|
|
408 |
received_results,
|
409 |
processed_results,
|
410 |
):
|
411 |
+
response = run_query(query, lang, k, dropdown_input, received_results)
|
412 |
+
if not isinstance(response, tuple):
|
413 |
+
return {result_page_html: response}
|
414 |
(
|
415 |
processed_results,
|
416 |
highlight_terms,
|
417 |
num_results,
|
418 |
exact_search,
|
419 |
+
result_page,
|
420 |
datasets,
|
421 |
+
) = response
|
422 |
current_results = sum(
|
423 |
len(results) for results in processed_results.values()
|
424 |
)
|
|
|
437 |
gr.Dropdown.update(choices=datasets, value=datasets),
|
438 |
gr.update(visible=current_results >= k and has_more_results),
|
439 |
received_results + current_results,
|
440 |
+
result_page,
|
441 |
]
|
442 |
|
443 |
def filter_datasets(
|