Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat: implement versioning for long-doc reranker only
Browse files
app.py
CHANGED
@@ -14,7 +14,7 @@ from src.display.css_html_js import custom_css
|
|
14 |
from src.envs import (
|
15 |
API,
|
16 |
EVAL_RESULTS_PATH,
|
17 |
-
REPO_ID, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST, LATEST_BENCHMARK_VERSION, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL, BM25_LINK
|
18 |
)
|
19 |
from src.loaders import (
|
20 |
load_eval_results
|
@@ -23,7 +23,7 @@ from src.utils import (
|
|
23 |
update_metric,
|
24 |
set_listeners,
|
25 |
reset_rank,
|
26 |
-
remove_html
|
27 |
)
|
28 |
from src.display.gradio_formatting import (
|
29 |
get_version_dropdown,
|
@@ -38,6 +38,8 @@ from src.display.gradio_formatting import (
|
|
38 |
get_leaderboard_table
|
39 |
)
|
40 |
|
|
|
|
|
41 |
|
42 |
def restart_space():
|
43 |
API.restart_space(repo_id=REPO_ID)
|
@@ -247,7 +249,12 @@ with demo:
|
|
247 |
queue=True
|
248 |
)
|
249 |
with gr.TabItem("Reranking Only", id=12):
|
250 |
-
lb_df_reranker =
|
|
|
|
|
|
|
|
|
|
|
251 |
lb_df_reranker = reset_rank(lb_df_reranker)
|
252 |
reranking_models_reranker = lb_df_reranker[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
253 |
with gr.Row():
|
@@ -439,10 +446,12 @@ with demo:
|
|
439 |
lb_table_retriever_long_doc,
|
440 |
queue=True
|
441 |
)
|
442 |
-
"""
|
443 |
with gr.TabItem("Reranking Only", id=22):
|
444 |
-
lb_df_reranker_ldoc =
|
445 |
-
datastore.leaderboard_df_long_doc[
|
|
|
|
|
|
|
446 |
]
|
447 |
lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
|
448 |
reranking_models_reranker_ldoc = lb_df_reranker_ldoc[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
@@ -458,11 +467,24 @@ with demo:
|
|
458 |
hidden_lb_df_reranker_ldoc, datastore.types_long_doc, visible=False
|
459 |
)
|
460 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
461 |
set_listeners(
|
462 |
"long-doc",
|
463 |
lb_table_reranker_ldoc,
|
464 |
hidden_lb_table_reranker_ldoc,
|
465 |
search_bar_reranker_ldoc,
|
|
|
466 |
selected_domains,
|
467 |
selected_langs,
|
468 |
selected_rerankings_reranker_ldoc,
|
@@ -551,7 +573,6 @@ with demo:
|
|
551 |
|
552 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
|
553 |
gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text")
|
554 |
-
"""
|
555 |
|
556 |
if __name__ == "__main__":
|
557 |
scheduler = BackgroundScheduler()
|
|
|
14 |
from src.envs import (
|
15 |
API,
|
16 |
EVAL_RESULTS_PATH,
|
17 |
+
REPO_ID, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST, LATEST_BENCHMARK_VERSION, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL, BM25_LINK, BENCHMARK_VERSION_LIST
|
18 |
)
|
19 |
from src.loaders import (
|
20 |
load_eval_results
|
|
|
23 |
update_metric,
|
24 |
set_listeners,
|
25 |
reset_rank,
|
26 |
+
remove_html, upload_file, submit_results
|
27 |
)
|
28 |
from src.display.gradio_formatting import (
|
29 |
get_version_dropdown,
|
|
|
38 |
get_leaderboard_table
|
39 |
)
|
40 |
|
41 |
+
from src.about import EVALUATION_QUEUE_TEXT, BENCHMARKS_TEXT
|
42 |
+
|
43 |
|
44 |
def restart_space():
|
45 |
API.restart_space(repo_id=REPO_ID)
|
|
|
249 |
queue=True
|
250 |
)
|
251 |
with gr.TabItem("Reranking Only", id=12):
|
252 |
+
lb_df_reranker = \
|
253 |
+
datastore.leaderboard_df_qa[
|
254 |
+
datastore.leaderboard_df_qa[
|
255 |
+
COL_NAME_RETRIEVAL_MODEL
|
256 |
+
] == BM25_LINK
|
257 |
+
]
|
258 |
lb_df_reranker = reset_rank(lb_df_reranker)
|
259 |
reranking_models_reranker = lb_df_reranker[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
260 |
with gr.Row():
|
|
|
446 |
lb_table_retriever_long_doc,
|
447 |
queue=True
|
448 |
)
|
|
|
449 |
with gr.TabItem("Reranking Only", id=22):
|
450 |
+
lb_df_reranker_ldoc = \
|
451 |
+
datastore.leaderboard_df_long_doc[
|
452 |
+
datastore.leaderboard_df_long_doc[
|
453 |
+
COL_NAME_RETRIEVAL_MODEL
|
454 |
+
] == BM25_LINK
|
455 |
]
|
456 |
lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
|
457 |
reranking_models_reranker_ldoc = lb_df_reranker_ldoc[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
|
|
|
467 |
hidden_lb_df_reranker_ldoc, datastore.types_long_doc, visible=False
|
468 |
)
|
469 |
|
470 |
+
selected_version.change(
|
471 |
+
update_datastore_long_doc,
|
472 |
+
[selected_version,],
|
473 |
+
[
|
474 |
+
selected_domains,
|
475 |
+
selected_langs,
|
476 |
+
selected_rerankings_reranker_ldoc,
|
477 |
+
lb_table_reranker_ldoc,
|
478 |
+
hidden_lb_table_reranker_ldoc
|
479 |
+
]
|
480 |
+
)
|
481 |
+
|
482 |
set_listeners(
|
483 |
"long-doc",
|
484 |
lb_table_reranker_ldoc,
|
485 |
hidden_lb_table_reranker_ldoc,
|
486 |
search_bar_reranker_ldoc,
|
487 |
+
selected_version,
|
488 |
selected_domains,
|
489 |
selected_langs,
|
490 |
selected_rerankings_reranker_ldoc,
|
|
|
573 |
|
574 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
|
575 |
gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text")
|
|
|
576 |
|
577 |
if __name__ == "__main__":
|
578 |
scheduler = BackgroundScheduler()
|