Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
f765492
1 Parent(s): 67024a0

feat: implement versioning for long-doc reranker only

Browse files
Files changed (1) hide show
  1. app.py +28 -7
app.py CHANGED
@@ -14,7 +14,7 @@ from src.display.css_html_js import custom_css
14
  from src.envs import (
15
  API,
16
  EVAL_RESULTS_PATH,
17
- REPO_ID, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST, LATEST_BENCHMARK_VERSION, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL, BM25_LINK
18
  )
19
  from src.loaders import (
20
  load_eval_results
@@ -23,7 +23,7 @@ from src.utils import (
23
  update_metric,
24
  set_listeners,
25
  reset_rank,
26
- remove_html
27
  )
28
  from src.display.gradio_formatting import (
29
  get_version_dropdown,
@@ -38,6 +38,8 @@ from src.display.gradio_formatting import (
38
  get_leaderboard_table
39
  )
40
 
 
 
41
 
42
  def restart_space():
43
  API.restart_space(repo_id=REPO_ID)
@@ -247,7 +249,12 @@ with demo:
247
  queue=True
248
  )
249
  with gr.TabItem("Reranking Only", id=12):
250
- lb_df_reranker = datastore.leaderboard_df_qa[datastore.leaderboard_df_qa[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
 
 
 
 
 
251
  lb_df_reranker = reset_rank(lb_df_reranker)
252
  reranking_models_reranker = lb_df_reranker[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
253
  with gr.Row():
@@ -439,10 +446,12 @@ with demo:
439
  lb_table_retriever_long_doc,
440
  queue=True
441
  )
442
- """
443
  with gr.TabItem("Reranking Only", id=22):
444
- lb_df_reranker_ldoc = datastore.leaderboard_df_long_doc[
445
- datastore.leaderboard_df_long_doc[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
 
 
 
446
  ]
447
  lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
448
  reranking_models_reranker_ldoc = lb_df_reranker_ldoc[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
@@ -458,11 +467,24 @@ with demo:
458
  hidden_lb_df_reranker_ldoc, datastore.types_long_doc, visible=False
459
  )
460
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  set_listeners(
462
  "long-doc",
463
  lb_table_reranker_ldoc,
464
  hidden_lb_table_reranker_ldoc,
465
  search_bar_reranker_ldoc,
 
466
  selected_domains,
467
  selected_langs,
468
  selected_rerankings_reranker_ldoc,
@@ -551,7 +573,6 @@ with demo:
551
 
552
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
553
  gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text")
554
- """
555
 
556
  if __name__ == "__main__":
557
  scheduler = BackgroundScheduler()
 
14
  from src.envs import (
15
  API,
16
  EVAL_RESULTS_PATH,
17
+ REPO_ID, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST, LATEST_BENCHMARK_VERSION, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL, BM25_LINK, BENCHMARK_VERSION_LIST
18
  )
19
  from src.loaders import (
20
  load_eval_results
 
23
  update_metric,
24
  set_listeners,
25
  reset_rank,
26
+ remove_html, upload_file, submit_results
27
  )
28
  from src.display.gradio_formatting import (
29
  get_version_dropdown,
 
38
  get_leaderboard_table
39
  )
40
 
41
+ from src.about import EVALUATION_QUEUE_TEXT, BENCHMARKS_TEXT
42
+
43
 
44
  def restart_space():
45
  API.restart_space(repo_id=REPO_ID)
 
249
  queue=True
250
  )
251
  with gr.TabItem("Reranking Only", id=12):
252
+ lb_df_reranker = \
253
+ datastore.leaderboard_df_qa[
254
+ datastore.leaderboard_df_qa[
255
+ COL_NAME_RETRIEVAL_MODEL
256
+ ] == BM25_LINK
257
+ ]
258
  lb_df_reranker = reset_rank(lb_df_reranker)
259
  reranking_models_reranker = lb_df_reranker[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
260
  with gr.Row():
 
446
  lb_table_retriever_long_doc,
447
  queue=True
448
  )
 
449
  with gr.TabItem("Reranking Only", id=22):
450
+ lb_df_reranker_ldoc = \
451
+ datastore.leaderboard_df_long_doc[
452
+ datastore.leaderboard_df_long_doc[
453
+ COL_NAME_RETRIEVAL_MODEL
454
+ ] == BM25_LINK
455
  ]
456
  lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
457
  reranking_models_reranker_ldoc = lb_df_reranker_ldoc[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
 
467
  hidden_lb_df_reranker_ldoc, datastore.types_long_doc, visible=False
468
  )
469
 
470
+ selected_version.change(
471
+ update_datastore_long_doc,
472
+ [selected_version,],
473
+ [
474
+ selected_domains,
475
+ selected_langs,
476
+ selected_rerankings_reranker_ldoc,
477
+ lb_table_reranker_ldoc,
478
+ hidden_lb_table_reranker_ldoc
479
+ ]
480
+ )
481
+
482
  set_listeners(
483
  "long-doc",
484
  lb_table_reranker_ldoc,
485
  hidden_lb_table_reranker_ldoc,
486
  search_bar_reranker_ldoc,
487
+ selected_version,
488
  selected_domains,
489
  selected_langs,
490
  selected_rerankings_reranker_ldoc,
 
573
 
574
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
575
  gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text")
 
576
 
577
  if __name__ == "__main__":
578
  scheduler = BackgroundScheduler()