Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
refactor: refactor the naming
Browse files- app.py +36 -36
- src/loaders.py +14 -14
- src/models.py +7 -6
app.py
CHANGED
@@ -114,9 +114,9 @@ def update_datastore(version):
|
|
114 |
selected_domains = get_domain_dropdown(QABenchmarks[datastore.slug])
|
115 |
selected_langs = get_language_dropdown(QABenchmarks[datastore.slug])
|
116 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
117 |
-
leaderboard_table = get_leaderboard_table(datastore.
|
118 |
hidden_leaderboard_table = \
|
119 |
-
get_leaderboard_table(datastore.
|
120 |
return selected_domains, selected_langs, selected_rerankings, leaderboard_table, hidden_leaderboard_table
|
121 |
|
122 |
|
@@ -127,9 +127,9 @@ def update_datastore_long_doc(version):
|
|
127 |
selected_domains = get_domain_dropdown(LongDocBenchmarks[datastore.slug])
|
128 |
selected_langs = get_language_dropdown(LongDocBenchmarks[datastore.slug])
|
129 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
130 |
-
leaderboard_table = get_leaderboard_table(datastore.
|
131 |
hidden_leaderboard_table = \
|
132 |
-
get_leaderboard_table(datastore.
|
133 |
return selected_domains, selected_langs, selected_rerankings, leaderboard_table, hidden_leaderboard_table
|
134 |
|
135 |
|
@@ -170,9 +170,9 @@ with demo:
|
|
170 |
with gr.Column():
|
171 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
172 |
# shown_table
|
173 |
-
lb_table = get_leaderboard_table(datastore.
|
174 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
175 |
-
hidden_lb_table = get_leaderboard_table(datastore.
|
176 |
|
177 |
selected_version.change(
|
178 |
update_datastore,
|
@@ -218,19 +218,19 @@ with demo:
|
|
218 |
with gr.Column(scale=1):
|
219 |
selected_noreranker = get_noreranking_dropdown()
|
220 |
|
221 |
-
lb_df_retriever = datastore.
|
222 |
-
datastore.
|
223 |
]
|
224 |
lb_df_retriever = reset_rank(lb_df_retriever)
|
225 |
-
lb_table_retriever = get_leaderboard_table(lb_df_retriever, datastore.
|
226 |
|
227 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
228 |
-
hidden_lb_df_retriever = datastore.
|
229 |
-
datastore.
|
230 |
-
|
231 |
hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
|
232 |
hidden_lb_table_retriever = get_leaderboard_table(
|
233 |
-
hidden_lb_df_retriever, datastore.
|
234 |
)
|
235 |
|
236 |
selected_version.change(
|
@@ -276,8 +276,8 @@ with demo:
|
|
276 |
queue=True,
|
277 |
)
|
278 |
with gr.TabItem("Reranking Only", id=12):
|
279 |
-
lb_df_reranker = datastore.
|
280 |
-
datastore.
|
281 |
]
|
282 |
lb_df_reranker = reset_rank(lb_df_reranker)
|
283 |
reranking_models_reranker = (
|
@@ -288,14 +288,14 @@ with demo:
|
|
288 |
selected_rerankings_reranker = get_reranking_dropdown(reranking_models_reranker)
|
289 |
with gr.Column(scale=1):
|
290 |
search_bar_reranker = gr.Textbox(show_label=False, visible=False)
|
291 |
-
lb_table_reranker = get_leaderboard_table(lb_df_reranker, datastore.
|
292 |
|
293 |
-
hidden_lb_df_reranker = datastore.
|
294 |
-
datastore.
|
295 |
-
|
296 |
hidden_lb_df_reranker = reset_rank(hidden_lb_df_reranker)
|
297 |
hidden_lb_table_reranker = get_leaderboard_table(
|
298 |
-
hidden_lb_df_reranker, datastore.
|
299 |
)
|
300 |
|
301 |
selected_version.change(
|
@@ -365,12 +365,12 @@ with demo:
|
|
365 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
366 |
|
367 |
lb_table_long_doc = get_leaderboard_table(
|
368 |
-
datastore.
|
369 |
)
|
370 |
|
371 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
372 |
hidden_lb_table_long_doc = get_leaderboard_table(
|
373 |
-
datastore.
|
374 |
)
|
375 |
|
376 |
selected_version.change(
|
@@ -421,20 +421,20 @@ with demo:
|
|
421 |
search_bar_retriever = get_search_bar()
|
422 |
with gr.Column(scale=1):
|
423 |
selected_noreranker = get_noreranking_dropdown()
|
424 |
-
lb_df_retriever_long_doc = datastore.
|
425 |
-
datastore.
|
426 |
]
|
427 |
lb_df_retriever_long_doc = reset_rank(lb_df_retriever_long_doc)
|
428 |
lb_table_retriever_long_doc = get_leaderboard_table(
|
429 |
-
lb_df_retriever_long_doc, datastore.
|
430 |
)
|
431 |
|
432 |
-
hidden_lb_df_retriever_long_doc = datastore.
|
433 |
-
datastore.
|
434 |
-
|
435 |
hidden_lb_df_retriever_long_doc = reset_rank(hidden_lb_df_retriever_long_doc)
|
436 |
hidden_lb_table_retriever_long_doc = get_leaderboard_table(
|
437 |
-
hidden_lb_df_retriever_long_doc, datastore.
|
438 |
)
|
439 |
|
440 |
selected_version.change(
|
@@ -479,8 +479,8 @@ with demo:
|
|
479 |
queue=True,
|
480 |
)
|
481 |
with gr.TabItem("Reranking Only", id=22):
|
482 |
-
lb_df_reranker_ldoc = datastore.
|
483 |
-
datastore.
|
484 |
]
|
485 |
lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
|
486 |
reranking_models_reranker_ldoc = (
|
@@ -493,13 +493,13 @@ with demo:
|
|
493 |
)
|
494 |
with gr.Column(scale=1):
|
495 |
search_bar_reranker_ldoc = gr.Textbox(show_label=False, visible=False)
|
496 |
-
lb_table_reranker_ldoc = get_leaderboard_table(lb_df_reranker_ldoc, datastore.
|
497 |
-
hidden_lb_df_reranker_ldoc = datastore.
|
498 |
-
datastore.
|
499 |
-
|
500 |
hidden_lb_df_reranker_ldoc = reset_rank(hidden_lb_df_reranker_ldoc)
|
501 |
hidden_lb_table_reranker_ldoc = get_leaderboard_table(
|
502 |
-
hidden_lb_df_reranker_ldoc, datastore.
|
503 |
)
|
504 |
|
505 |
selected_version.change(
|
|
|
114 |
selected_domains = get_domain_dropdown(QABenchmarks[datastore.slug])
|
115 |
selected_langs = get_language_dropdown(QABenchmarks[datastore.slug])
|
116 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
117 |
+
leaderboard_table = get_leaderboard_table(datastore.qa_fmt_df, datastore.qa_types)
|
118 |
hidden_leaderboard_table = \
|
119 |
+
get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
|
120 |
return selected_domains, selected_langs, selected_rerankings, leaderboard_table, hidden_leaderboard_table
|
121 |
|
122 |
|
|
|
127 |
selected_domains = get_domain_dropdown(LongDocBenchmarks[datastore.slug])
|
128 |
selected_langs = get_language_dropdown(LongDocBenchmarks[datastore.slug])
|
129 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
130 |
+
leaderboard_table = get_leaderboard_table(datastore.doc_fmt_df, datastore.doc_types)
|
131 |
hidden_leaderboard_table = \
|
132 |
+
get_leaderboard_table(datastore.doc_raw_df, datastore.doc_types, visible=False)
|
133 |
return selected_domains, selected_langs, selected_rerankings, leaderboard_table, hidden_leaderboard_table
|
134 |
|
135 |
|
|
|
170 |
with gr.Column():
|
171 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
172 |
# shown_table
|
173 |
+
lb_table = get_leaderboard_table(datastore.qa_fmt_df, datastore.qa_types)
|
174 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
175 |
+
hidden_lb_table = get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
|
176 |
|
177 |
selected_version.change(
|
178 |
update_datastore,
|
|
|
218 |
with gr.Column(scale=1):
|
219 |
selected_noreranker = get_noreranking_dropdown()
|
220 |
|
221 |
+
lb_df_retriever = datastore.qa_fmt_df[
|
222 |
+
datastore.qa_fmt_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
223 |
]
|
224 |
lb_df_retriever = reset_rank(lb_df_retriever)
|
225 |
+
lb_table_retriever = get_leaderboard_table(lb_df_retriever, datastore.qa_types)
|
226 |
|
227 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
228 |
+
hidden_lb_df_retriever = datastore.qa_raw_df[
|
229 |
+
datastore.qa_raw_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
230 |
+
]
|
231 |
hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
|
232 |
hidden_lb_table_retriever = get_leaderboard_table(
|
233 |
+
hidden_lb_df_retriever, datastore.qa_types, visible=False
|
234 |
)
|
235 |
|
236 |
selected_version.change(
|
|
|
276 |
queue=True,
|
277 |
)
|
278 |
with gr.TabItem("Reranking Only", id=12):
|
279 |
+
lb_df_reranker = datastore.qa_fmt_df[
|
280 |
+
datastore.qa_fmt_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
|
281 |
]
|
282 |
lb_df_reranker = reset_rank(lb_df_reranker)
|
283 |
reranking_models_reranker = (
|
|
|
288 |
selected_rerankings_reranker = get_reranking_dropdown(reranking_models_reranker)
|
289 |
with gr.Column(scale=1):
|
290 |
search_bar_reranker = gr.Textbox(show_label=False, visible=False)
|
291 |
+
lb_table_reranker = get_leaderboard_table(lb_df_reranker, datastore.qa_types)
|
292 |
|
293 |
+
hidden_lb_df_reranker = datastore.qa_raw_df[
|
294 |
+
datastore.qa_raw_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
|
295 |
+
]
|
296 |
hidden_lb_df_reranker = reset_rank(hidden_lb_df_reranker)
|
297 |
hidden_lb_table_reranker = get_leaderboard_table(
|
298 |
+
hidden_lb_df_reranker, datastore.qa_types, visible=False
|
299 |
)
|
300 |
|
301 |
selected_version.change(
|
|
|
365 |
selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
|
366 |
|
367 |
lb_table_long_doc = get_leaderboard_table(
|
368 |
+
datastore.doc_fmt_df, datastore.doc_types
|
369 |
)
|
370 |
|
371 |
# Dummy leaderboard for handling the case when the user uses backspace key
|
372 |
hidden_lb_table_long_doc = get_leaderboard_table(
|
373 |
+
datastore.doc_raw_df, datastore.doc_types, visible=False
|
374 |
)
|
375 |
|
376 |
selected_version.change(
|
|
|
421 |
search_bar_retriever = get_search_bar()
|
422 |
with gr.Column(scale=1):
|
423 |
selected_noreranker = get_noreranking_dropdown()
|
424 |
+
lb_df_retriever_long_doc = datastore.doc_fmt_df[
|
425 |
+
datastore.doc_fmt_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
426 |
]
|
427 |
lb_df_retriever_long_doc = reset_rank(lb_df_retriever_long_doc)
|
428 |
lb_table_retriever_long_doc = get_leaderboard_table(
|
429 |
+
lb_df_retriever_long_doc, datastore.doc_types
|
430 |
)
|
431 |
|
432 |
+
hidden_lb_df_retriever_long_doc = datastore.doc_raw_df[
|
433 |
+
datastore.doc_raw_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
|
434 |
+
]
|
435 |
hidden_lb_df_retriever_long_doc = reset_rank(hidden_lb_df_retriever_long_doc)
|
436 |
hidden_lb_table_retriever_long_doc = get_leaderboard_table(
|
437 |
+
hidden_lb_df_retriever_long_doc, datastore.doc_types, visible=False
|
438 |
)
|
439 |
|
440 |
selected_version.change(
|
|
|
479 |
queue=True,
|
480 |
)
|
481 |
with gr.TabItem("Reranking Only", id=22):
|
482 |
+
lb_df_reranker_ldoc = datastore.doc_fmt_df[
|
483 |
+
datastore.doc_fmt_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
|
484 |
]
|
485 |
lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
|
486 |
reranking_models_reranker_ldoc = (
|
|
|
493 |
)
|
494 |
with gr.Column(scale=1):
|
495 |
search_bar_reranker_ldoc = gr.Textbox(show_label=False, visible=False)
|
496 |
+
lb_table_reranker_ldoc = get_leaderboard_table(lb_df_reranker_ldoc, datastore.doc_types)
|
497 |
+
hidden_lb_df_reranker_ldoc = datastore.doc_raw_df[
|
498 |
+
datastore.doc_raw_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
|
499 |
+
]
|
500 |
hidden_lb_df_reranker_ldoc = reset_rank(hidden_lb_df_reranker_ldoc)
|
501 |
hidden_lb_table_reranker_ldoc = get_leaderboard_table(
|
502 |
+
hidden_lb_df_reranker_ldoc, datastore.doc_types, visible=False
|
503 |
)
|
504 |
|
505 |
selected_version.change(
|
src/loaders.py
CHANGED
@@ -68,25 +68,25 @@ def load_leaderboard_datastore(file_path, version) -> LeaderboardDataStore:
|
|
68 |
lb_data_store.raw_data = load_raw_eval_results(file_path)
|
69 |
print(f"raw data: {len(lb_data_store.raw_data)}")
|
70 |
|
71 |
-
lb_data_store.
|
72 |
-
print(f"QA data loaded: {lb_data_store.
|
73 |
-
lb_data_store.
|
74 |
shown_columns_qa, types_qa = get_default_cols("qa", lb_data_store.slug, add_fix_cols=True)
|
75 |
-
lb_data_store.
|
76 |
-
lb_data_store.
|
77 |
-
~lb_data_store.
|
78 |
][shown_columns_qa]
|
79 |
-
lb_data_store.
|
80 |
|
81 |
-
lb_data_store.
|
82 |
-
print(f"Long-Doc data loaded: {len(lb_data_store.
|
83 |
-
lb_data_store.
|
84 |
shown_columns_long_doc, types_long_doc = get_default_cols("long-doc", lb_data_store.slug, add_fix_cols=True)
|
85 |
-
lb_data_store.
|
86 |
-
lb_data_store.
|
87 |
-
~lb_data_store.
|
88 |
][shown_columns_long_doc]
|
89 |
-
lb_data_store.
|
90 |
|
91 |
lb_data_store.reranking_models = sorted(
|
92 |
list(frozenset([eval_result.reranking_model for eval_result in lb_data_store.raw_data]))
|
|
|
68 |
lb_data_store.raw_data = load_raw_eval_results(file_path)
|
69 |
print(f"raw data: {len(lb_data_store.raw_data)}")
|
70 |
|
71 |
+
lb_data_store.qa_raw_df = get_leaderboard_df(lb_data_store, task="qa", metric=DEFAULT_METRIC_QA)
|
72 |
+
print(f"QA data loaded: {lb_data_store.qa_raw_df.shape}")
|
73 |
+
lb_data_store.qa_fmt_df = lb_data_store.qa_raw_df.copy()
|
74 |
shown_columns_qa, types_qa = get_default_cols("qa", lb_data_store.slug, add_fix_cols=True)
|
75 |
+
lb_data_store.qa_types = types_qa
|
76 |
+
lb_data_store.qa_fmt_df = lb_data_store.qa_fmt_df[
|
77 |
+
~lb_data_store.qa_fmt_df[COL_NAME_IS_ANONYMOUS]
|
78 |
][shown_columns_qa]
|
79 |
+
lb_data_store.qa_fmt_df.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
|
80 |
|
81 |
+
lb_data_store.doc_raw_df = get_leaderboard_df(lb_data_store, task="long-doc", metric=DEFAULT_METRIC_LONG_DOC)
|
82 |
+
print(f"Long-Doc data loaded: {len(lb_data_store.doc_raw_df)}")
|
83 |
+
lb_data_store.doc_fmt_df = lb_data_store.doc_raw_df.copy()
|
84 |
shown_columns_long_doc, types_long_doc = get_default_cols("long-doc", lb_data_store.slug, add_fix_cols=True)
|
85 |
+
lb_data_store.doc_types = types_long_doc
|
86 |
+
lb_data_store.doc_fmt_df = lb_data_store.doc_fmt_df[
|
87 |
+
~lb_data_store.doc_fmt_df[COL_NAME_IS_ANONYMOUS]
|
88 |
][shown_columns_long_doc]
|
89 |
+
lb_data_store.doc_fmt_df.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
|
90 |
|
91 |
lb_data_store.reranking_models = sorted(
|
92 |
list(frozenset([eval_result.reranking_model for eval_result in lb_data_store.raw_data]))
|
src/models.py
CHANGED
@@ -141,10 +141,11 @@ class LeaderboardDataStore:
|
|
141 |
version: str
|
142 |
slug: str
|
143 |
raw_data: Optional[list]
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
reranking_models: Optional[list]
|
149 |
-
|
150 |
-
|
|
|
|
141 |
version: str
|
142 |
slug: str
|
143 |
raw_data: Optional[list]
|
144 |
+
qa_raw_df: Optional[pd.DataFrame]
|
145 |
+
doc_raw_df: Optional[pd.DataFrame]
|
146 |
+
qa_fmt_df: Optional[pd.DataFrame]
|
147 |
+
doc_fmt_df: Optional[pd.DataFrame]
|
148 |
reranking_models: Optional[list]
|
149 |
+
qa_types: Optional[list]
|
150 |
+
doc_types: Optional[list]
|
151 |
+
# qa_raw_df, docs_raw_df, qa_fmt_df, docs_fmt_df,
|