ola13 commited on
Commit
be21dae
β€’
1 Parent(s): 553d6f1

flagging refactor

Browse files
Files changed (1) hide show
  1. app.py +39 -58
app.py CHANGED
@@ -19,31 +19,39 @@ roots_datasets = {
19
  def get_docid_html(docid):
20
  data_org, dataset, docid = docid.split("/")
21
  metadata = roots_datasets[dataset]
 
 
22
  if metadata.private:
23
  docid_html = """
24
  <a title="This dataset is private. See the introductory text for more information"
25
- style="color:#AA4A44; font-weight: bold; text-decoration:none"
26
- onmouseover="style='color:#AA4A44; font-weight: bold; text-decoration:underline'"
27
- onmouseout="style='color:#AA4A44; font-weight: bold; text-decoration:none'"
28
  href="https://huggingface.co/datasets/bigscience-data/{dataset}"
29
  target="_blank">
30
  πŸ”’{dataset}
31
  </a>
32
- <span style="color:#7978FF; ">/{docid}</span>""".format(
33
- dataset=dataset, docid=docid
 
 
 
34
  )
35
  else:
36
  docid_html = """
37
  <a title="This dataset is licensed {metadata}"
38
- style="color:#7978FF; font-weight: bold; text-decoration:none"
39
- onmouseover="style='color:#7978FF; font-weight: bold; text-decoration:underline'"
40
- onmouseout="style='color:#7978FF; font-weight: bold; text-decoration:none'"
41
  href="https://huggingface.co/datasets/bigscience-data/{dataset}"
42
  target="_blank">
43
  {dataset}
44
  </a>
45
- <span style="color:#7978FF; ">/{docid}</span>""".format(
46
- metadata=metadata.tags[0].split(":")[-1], dataset=dataset, docid=docid
 
 
 
47
  )
48
  return docid_html
49
 
@@ -63,30 +71,6 @@ def process_pii(text):
63
  return text
64
 
65
 
66
- def flag(query, language, num_results, issue_description):
67
- try:
68
- post_data = {
69
- "query": query,
70
- "k": num_results,
71
- "flag": True,
72
- "description": issue_description,
73
- }
74
- if language != "detect_language":
75
- post_data["lang"] = language
76
-
77
- output = requests.post(
78
- os.environ.get("address"),
79
- headers={"Content-type": "application/json"},
80
- data=json.dumps(post_data),
81
- timeout=120,
82
- )
83
-
84
- results = json.loads(output.text)
85
- except:
86
- print("Error flagging")
87
- return ""
88
-
89
-
90
  def format_result(result, highlight_terms, exact_search, datasets_filter=None):
91
  text, url, docid = result
92
  if datasets_filter is not None:
@@ -133,8 +117,9 @@ def format_result(result, highlight_terms, exact_search, datasets_filter=None):
133
  language = "FIXME"
134
  result_html = """{}
135
  <span style='font-size:14px; font-family: Arial; color:#7978FF; text-align: left;'>Document ID: {}</span>
136
- <a href="https://forms.gle/AdBLLwRApqcLkHYA8" target="_blank"><button>πŸ΄β€β˜ οΈ Flag result πŸ΄β€β˜ οΈ</button></a><br>
137
-
 
138
  <!-- <span style='font-size:12px; font-family: Arial; color:MediumAquaMarine'>Language: {}</span><br> -->
139
  <span style='font-family: Arial;'>{}</span><br>
140
  <br>
@@ -273,9 +258,7 @@ information and instructions on how to access the full corpus check [this form](
273
 
274
 
275
  if __name__ == "__main__":
276
- demo = gr.Blocks(
277
- css=".underline-on-hover:hover { text-decoration: underline; } .flagging { font-size:12px; color:Silver; }"
278
- )
279
 
280
  with demo:
281
  processed_results_state = gr.State([])
@@ -283,7 +266,6 @@ if __name__ == "__main__":
283
  num_results_state = gr.State(0)
284
  exact_search_state = gr.State(False)
285
  lang_state = gr.State("")
286
- max_page_size_state = gr.State(100)
287
  received_results_state = gr.State(0)
288
 
289
  with gr.Row():
@@ -319,7 +301,13 @@ if __name__ == "__main__":
319
  value="en",
320
  label="Language",
321
  )
322
- k = gr.Slider(1, 100, value=10, step=1, label="Max Results")
 
 
 
 
 
 
323
  with gr.Row():
324
  submit_btn = gr.Button("Submit")
325
  with gr.Row(visible=False) as datasets_filter:
@@ -336,19 +324,17 @@ if __name__ == "__main__":
336
  with gr.Row(visible=False) as pagination:
337
  next_page_btn = gr.Button("Next Page")
338
 
339
- def run_query(query, lang, k, dropdown_input, max_page_size, received_results):
340
  query = query.strip()
341
  exact_search = False
342
  if query.startswith('"') and query.endswith('"') and len(query) >= 2:
343
  exact_search = True
344
  query = query[1:-1]
345
- k = max_page_size
346
  else:
347
  query = " ".join(query.split())
348
  if query == "" or query is None:
349
  return None
350
 
351
- print("submitting", query, lang, k)
352
  payload = request_payload(query, lang, exact_search, k, received_results)
353
  err = extract_error_from_payload(payload)
354
  if err is not None:
@@ -377,7 +363,8 @@ if __name__ == "__main__":
377
  ds,
378
  )
379
 
380
- def submit(query, lang, k, dropdown_input, max_page_size):
 
381
  (
382
  processed_results,
383
  highlight_terms,
@@ -385,8 +372,8 @@ if __name__ == "__main__":
385
  exact_search,
386
  result_page_html,
387
  datasets,
388
- ) = run_query(query, lang, k, dropdown_input, max_page_size, 0)
389
- has_more_results = exact_search and (num_results > max_page_size)
390
  return [
391
  processed_results,
392
  highlight_terms,
@@ -404,7 +391,6 @@ if __name__ == "__main__":
404
  lang,
405
  k,
406
  dropdown_input,
407
- max_page_size,
408
  received_results,
409
  processed_results,
410
  ):
@@ -415,11 +401,9 @@ if __name__ == "__main__":
415
  exact_search,
416
  result_page_html,
417
  datasets,
418
- ) = run_query(
419
- query, lang, k, dropdown_input, max_page_size, received_results
420
- )
421
  num_processed_results = len(next(iter(processed_results.values())))
422
- has_more_results = exact_search and (num_results > max_page_size)
423
  print("num_processed_results", num_processed_results)
424
  print("has_more_results", has_more_results)
425
  print("received_results", received_results)
@@ -430,9 +414,7 @@ if __name__ == "__main__":
430
  exact_search,
431
  gr.update(visible=True),
432
  gr.Dropdown.update(choices=datasets, value=datasets),
433
- gr.update(
434
- visible=num_processed_results >= max_page_size and has_more_results
435
- ),
436
  received_results + num_processed_results,
437
  result_page_html,
438
  ]
@@ -457,7 +439,7 @@ if __name__ == "__main__":
457
 
458
  query.submit(
459
  fn=submit,
460
- inputs=[query, lang, k, available_datasets, max_page_size_state],
461
  outputs=[
462
  processed_results_state,
463
  highlight_terms_state,
@@ -472,7 +454,7 @@ if __name__ == "__main__":
472
  )
473
  submit_btn.click(
474
  submit,
475
- inputs=[query, lang, k, available_datasets, max_page_size_state],
476
  outputs=[
477
  processed_results_state,
478
  highlight_terms_state,
@@ -493,7 +475,6 @@ if __name__ == "__main__":
493
  lang,
494
  k,
495
  available_datasets,
496
- max_page_size_state,
497
  received_results_state,
498
  processed_results_state,
499
  ],
 
19
  def get_docid_html(docid):
20
  data_org, dataset, docid = docid.split("/")
21
  metadata = roots_datasets[dataset]
22
+ locked_color = "LightGray"
23
+ open_color = "#7978FF"
24
  if metadata.private:
25
  docid_html = """
26
  <a title="This dataset is private. See the introductory text for more information"
27
+ style="color:{locked_color}; font-weight: bold; text-decoration:none"
28
+ onmouseover="style='color:{locked_color}; font-weight: bold; text-decoration:underline'"
29
+ onmouseout="style='color:{locked_color}; font-weight: bold; text-decoration:none'"
30
  href="https://huggingface.co/datasets/bigscience-data/{dataset}"
31
  target="_blank">
32
  πŸ”’{dataset}
33
  </a>
34
+ <span style="color:{open_color}; ">/{docid}</span>""".format(
35
+ dataset=dataset,
36
+ docid=docid,
37
+ locked_color=locked_color,
38
+ open_color=open_color,
39
  )
40
  else:
41
  docid_html = """
42
  <a title="This dataset is licensed {metadata}"
43
+ style="color:{open_color}; font-weight: bold; text-decoration:none"
44
+ onmouseover="style='color:{open_color}; font-weight: bold; text-decoration:underline'"
45
+ onmouseout="style='color:{open_color}; font-weight: bold; text-decoration:none'"
46
  href="https://huggingface.co/datasets/bigscience-data/{dataset}"
47
  target="_blank">
48
  {dataset}
49
  </a>
50
+ <span style="color:{open_color}; ">/{docid}</span>""".format(
51
+ metadata=metadata.tags[0].split(":")[-1],
52
+ dataset=dataset,
53
+ docid=docid,
54
+ open_color=open_color,
55
  )
56
  return docid_html
57
 
 
71
  return text
72
 
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  def format_result(result, highlight_terms, exact_search, datasets_filter=None):
75
  text, url, docid = result
76
  if datasets_filter is not None:
 
117
  language = "FIXME"
118
  result_html = """{}
119
  <span style='font-size:14px; font-family: Arial; color:#7978FF; text-align: left;'>Document ID: {}</span>
120
+ <a href="https://forms.gle/AdBLLwRApqcLkHYA8" target="_blank">
121
+ <button style="color:#ffcdf8; ">πŸ΄β€β˜ οΈ Flag result πŸ΄β€β˜ οΈ</button>
122
+ </a><br>
123
  <!-- <span style='font-size:12px; font-family: Arial; color:MediumAquaMarine'>Language: {}</span><br> -->
124
  <span style='font-family: Arial;'>{}</span><br>
125
  <br>
 
258
 
259
 
260
  if __name__ == "__main__":
261
+ demo = gr.Blocks(css=".underline-on-hover:hover { text-decoration: underline; }")
 
 
262
 
263
  with demo:
264
  processed_results_state = gr.State([])
 
266
  num_results_state = gr.State(0)
267
  exact_search_state = gr.State(False)
268
  lang_state = gr.State("")
 
269
  received_results_state = gr.State(0)
270
 
271
  with gr.Row():
 
301
  value="en",
302
  label="Language",
303
  )
304
+ k = gr.Slider(
305
+ 1,
306
+ 100,
307
+ value=10,
308
+ step=1,
309
+ label="Max Results in fuzzy search or Max Results per page in exact search",
310
+ )
311
  with gr.Row():
312
  submit_btn = gr.Button("Submit")
313
  with gr.Row(visible=False) as datasets_filter:
 
324
  with gr.Row(visible=False) as pagination:
325
  next_page_btn = gr.Button("Next Page")
326
 
327
+ def run_query(query, lang, k, dropdown_input, received_results):
328
  query = query.strip()
329
  exact_search = False
330
  if query.startswith('"') and query.endswith('"') and len(query) >= 2:
331
  exact_search = True
332
  query = query[1:-1]
 
333
  else:
334
  query = " ".join(query.split())
335
  if query == "" or query is None:
336
  return None
337
 
 
338
  payload = request_payload(query, lang, exact_search, k, received_results)
339
  err = extract_error_from_payload(payload)
340
  if err is not None:
 
363
  ds,
364
  )
365
 
366
+ def submit(query, lang, k, dropdown_input):
367
+ print("submitting", query, lang, k)
368
  (
369
  processed_results,
370
  highlight_terms,
 
372
  exact_search,
373
  result_page_html,
374
  datasets,
375
+ ) = run_query(query, lang, k, dropdown_input, 0)
376
+ has_more_results = exact_search and (num_results > k)
377
  return [
378
  processed_results,
379
  highlight_terms,
 
391
  lang,
392
  k,
393
  dropdown_input,
 
394
  received_results,
395
  processed_results,
396
  ):
 
401
  exact_search,
402
  result_page_html,
403
  datasets,
404
+ ) = run_query(query, lang, k, dropdown_input, received_results)
 
 
405
  num_processed_results = len(next(iter(processed_results.values())))
406
+ has_more_results = exact_search and (num_results > k)
407
  print("num_processed_results", num_processed_results)
408
  print("has_more_results", has_more_results)
409
  print("received_results", received_results)
 
414
  exact_search,
415
  gr.update(visible=True),
416
  gr.Dropdown.update(choices=datasets, value=datasets),
417
+ gr.update(visible=num_processed_results >= k and has_more_results),
 
 
418
  received_results + num_processed_results,
419
  result_page_html,
420
  ]
 
439
 
440
  query.submit(
441
  fn=submit,
442
+ inputs=[query, lang, k, available_datasets],
443
  outputs=[
444
  processed_results_state,
445
  highlight_terms_state,
 
454
  )
455
  submit_btn.click(
456
  submit,
457
+ inputs=[query, lang, k, available_datasets],
458
  outputs=[
459
  processed_results_state,
460
  highlight_terms_state,
 
475
  lang,
476
  k,
477
  available_datasets,
 
478
  received_results_state,
479
  processed_results_state,
480
  ],