Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
26e4c47
1 Parent(s): 254881e

refactor: refactor the naming

Browse files
Files changed (3) hide show
  1. app.py +36 -36
  2. src/loaders.py +14 -14
  3. src/models.py +7 -6
app.py CHANGED
@@ -114,9 +114,9 @@ def update_datastore(version):
114
  selected_domains = get_domain_dropdown(QABenchmarks[datastore.slug])
115
  selected_langs = get_language_dropdown(QABenchmarks[datastore.slug])
116
  selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
117
- leaderboard_table = get_leaderboard_table(datastore.formatted_df_qa, datastore.types_qa)
118
  hidden_leaderboard_table = \
119
- get_leaderboard_table(datastore.raw_df_qa, datastore.types_qa, visible=False)
120
  return selected_domains, selected_langs, selected_rerankings, leaderboard_table, hidden_leaderboard_table
121
 
122
 
@@ -127,9 +127,9 @@ def update_datastore_long_doc(version):
127
  selected_domains = get_domain_dropdown(LongDocBenchmarks[datastore.slug])
128
  selected_langs = get_language_dropdown(LongDocBenchmarks[datastore.slug])
129
  selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
130
- leaderboard_table = get_leaderboard_table(datastore.formatted_df_ldoc, datastore.types_long_doc)
131
  hidden_leaderboard_table = \
132
- get_leaderboard_table(datastore.raw_df_long_doc, datastore.types_long_doc, visible=False)
133
  return selected_domains, selected_langs, selected_rerankings, leaderboard_table, hidden_leaderboard_table
134
 
135
 
@@ -170,9 +170,9 @@ with demo:
170
  with gr.Column():
171
  selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
172
  # shown_table
173
- lb_table = get_leaderboard_table(datastore.formatted_df_qa, datastore.types_qa)
174
  # Dummy leaderboard for handling the case when the user uses backspace key
175
- hidden_lb_table = get_leaderboard_table(datastore.raw_df_qa, datastore.types_qa, visible=False)
176
 
177
  selected_version.change(
178
  update_datastore,
@@ -218,19 +218,19 @@ with demo:
218
  with gr.Column(scale=1):
219
  selected_noreranker = get_noreranking_dropdown()
220
 
221
- lb_df_retriever = datastore.formatted_df_qa[
222
- datastore.formatted_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"
223
  ]
224
  lb_df_retriever = reset_rank(lb_df_retriever)
225
- lb_table_retriever = get_leaderboard_table(lb_df_retriever, datastore.types_qa)
226
 
227
  # Dummy leaderboard for handling the case when the user uses backspace key
228
- hidden_lb_df_retriever = datastore.raw_df_qa[
229
- datastore.raw_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"
230
- ]
231
  hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
232
  hidden_lb_table_retriever = get_leaderboard_table(
233
- hidden_lb_df_retriever, datastore.types_qa, visible=False
234
  )
235
 
236
  selected_version.change(
@@ -276,8 +276,8 @@ with demo:
276
  queue=True,
277
  )
278
  with gr.TabItem("Reranking Only", id=12):
279
- lb_df_reranker = datastore.formatted_df_qa[
280
- datastore.formatted_df_qa[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
281
  ]
282
  lb_df_reranker = reset_rank(lb_df_reranker)
283
  reranking_models_reranker = (
@@ -288,14 +288,14 @@ with demo:
288
  selected_rerankings_reranker = get_reranking_dropdown(reranking_models_reranker)
289
  with gr.Column(scale=1):
290
  search_bar_reranker = gr.Textbox(show_label=False, visible=False)
291
- lb_table_reranker = get_leaderboard_table(lb_df_reranker, datastore.types_qa)
292
 
293
- hidden_lb_df_reranker = datastore.raw_df_qa[
294
- datastore.raw_df_qa[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
295
- ]
296
  hidden_lb_df_reranker = reset_rank(hidden_lb_df_reranker)
297
  hidden_lb_table_reranker = get_leaderboard_table(
298
- hidden_lb_df_reranker, datastore.types_qa, visible=False
299
  )
300
 
301
  selected_version.change(
@@ -365,12 +365,12 @@ with demo:
365
  selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
366
 
367
  lb_table_long_doc = get_leaderboard_table(
368
- datastore.formatted_df_ldoc, datastore.types_long_doc
369
  )
370
 
371
  # Dummy leaderboard for handling the case when the user uses backspace key
372
  hidden_lb_table_long_doc = get_leaderboard_table(
373
- datastore.raw_df_long_doc, datastore.types_long_doc, visible=False
374
  )
375
 
376
  selected_version.change(
@@ -421,20 +421,20 @@ with demo:
421
  search_bar_retriever = get_search_bar()
422
  with gr.Column(scale=1):
423
  selected_noreranker = get_noreranking_dropdown()
424
- lb_df_retriever_long_doc = datastore.formatted_df_ldoc[
425
- datastore.formatted_df_ldoc[COL_NAME_RERANKING_MODEL] == "NoReranker"
426
  ]
427
  lb_df_retriever_long_doc = reset_rank(lb_df_retriever_long_doc)
428
  lb_table_retriever_long_doc = get_leaderboard_table(
429
- lb_df_retriever_long_doc, datastore.types_long_doc
430
  )
431
 
432
- hidden_lb_df_retriever_long_doc = datastore.raw_df_long_doc[
433
- datastore.raw_df_long_doc[COL_NAME_RERANKING_MODEL] == "NoReranker"
434
- ]
435
  hidden_lb_df_retriever_long_doc = reset_rank(hidden_lb_df_retriever_long_doc)
436
  hidden_lb_table_retriever_long_doc = get_leaderboard_table(
437
- hidden_lb_df_retriever_long_doc, datastore.types_long_doc, visible=False
438
  )
439
 
440
  selected_version.change(
@@ -479,8 +479,8 @@ with demo:
479
  queue=True,
480
  )
481
  with gr.TabItem("Reranking Only", id=22):
482
- lb_df_reranker_ldoc = datastore.formatted_df_ldoc[
483
- datastore.formatted_df_ldoc[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
484
  ]
485
  lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
486
  reranking_models_reranker_ldoc = (
@@ -493,13 +493,13 @@ with demo:
493
  )
494
  with gr.Column(scale=1):
495
  search_bar_reranker_ldoc = gr.Textbox(show_label=False, visible=False)
496
- lb_table_reranker_ldoc = get_leaderboard_table(lb_df_reranker_ldoc, datastore.types_long_doc)
497
- hidden_lb_df_reranker_ldoc = datastore.raw_df_long_doc[
498
- datastore.raw_df_long_doc[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
499
- ]
500
  hidden_lb_df_reranker_ldoc = reset_rank(hidden_lb_df_reranker_ldoc)
501
  hidden_lb_table_reranker_ldoc = get_leaderboard_table(
502
- hidden_lb_df_reranker_ldoc, datastore.types_long_doc, visible=False
503
  )
504
 
505
  selected_version.change(
 
114
  selected_domains = get_domain_dropdown(QABenchmarks[datastore.slug])
115
  selected_langs = get_language_dropdown(QABenchmarks[datastore.slug])
116
  selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
117
+ leaderboard_table = get_leaderboard_table(datastore.qa_fmt_df, datastore.qa_types)
118
  hidden_leaderboard_table = \
119
+ get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
120
  return selected_domains, selected_langs, selected_rerankings, leaderboard_table, hidden_leaderboard_table
121
 
122
 
 
127
  selected_domains = get_domain_dropdown(LongDocBenchmarks[datastore.slug])
128
  selected_langs = get_language_dropdown(LongDocBenchmarks[datastore.slug])
129
  selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
130
+ leaderboard_table = get_leaderboard_table(datastore.doc_fmt_df, datastore.doc_types)
131
  hidden_leaderboard_table = \
132
+ get_leaderboard_table(datastore.doc_raw_df, datastore.doc_types, visible=False)
133
  return selected_domains, selected_langs, selected_rerankings, leaderboard_table, hidden_leaderboard_table
134
 
135
 
 
170
  with gr.Column():
171
  selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
172
  # shown_table
173
+ lb_table = get_leaderboard_table(datastore.qa_fmt_df, datastore.qa_types)
174
  # Dummy leaderboard for handling the case when the user uses backspace key
175
+ hidden_lb_table = get_leaderboard_table(datastore.qa_raw_df, datastore.qa_types, visible=False)
176
 
177
  selected_version.change(
178
  update_datastore,
 
218
  with gr.Column(scale=1):
219
  selected_noreranker = get_noreranking_dropdown()
220
 
221
+ lb_df_retriever = datastore.qa_fmt_df[
222
+ datastore.qa_fmt_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
223
  ]
224
  lb_df_retriever = reset_rank(lb_df_retriever)
225
+ lb_table_retriever = get_leaderboard_table(lb_df_retriever, datastore.qa_types)
226
 
227
  # Dummy leaderboard for handling the case when the user uses backspace key
228
+ hidden_lb_df_retriever = datastore.qa_raw_df[
229
+ datastore.qa_raw_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
230
+ ]
231
  hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
232
  hidden_lb_table_retriever = get_leaderboard_table(
233
+ hidden_lb_df_retriever, datastore.qa_types, visible=False
234
  )
235
 
236
  selected_version.change(
 
276
  queue=True,
277
  )
278
  with gr.TabItem("Reranking Only", id=12):
279
+ lb_df_reranker = datastore.qa_fmt_df[
280
+ datastore.qa_fmt_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
281
  ]
282
  lb_df_reranker = reset_rank(lb_df_reranker)
283
  reranking_models_reranker = (
 
288
  selected_rerankings_reranker = get_reranking_dropdown(reranking_models_reranker)
289
  with gr.Column(scale=1):
290
  search_bar_reranker = gr.Textbox(show_label=False, visible=False)
291
+ lb_table_reranker = get_leaderboard_table(lb_df_reranker, datastore.qa_types)
292
 
293
+ hidden_lb_df_reranker = datastore.qa_raw_df[
294
+ datastore.qa_raw_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
295
+ ]
296
  hidden_lb_df_reranker = reset_rank(hidden_lb_df_reranker)
297
  hidden_lb_table_reranker = get_leaderboard_table(
298
+ hidden_lb_df_reranker, datastore.qa_types, visible=False
299
  )
300
 
301
  selected_version.change(
 
365
  selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
366
 
367
  lb_table_long_doc = get_leaderboard_table(
368
+ datastore.doc_fmt_df, datastore.doc_types
369
  )
370
 
371
  # Dummy leaderboard for handling the case when the user uses backspace key
372
  hidden_lb_table_long_doc = get_leaderboard_table(
373
+ datastore.doc_raw_df, datastore.doc_types, visible=False
374
  )
375
 
376
  selected_version.change(
 
421
  search_bar_retriever = get_search_bar()
422
  with gr.Column(scale=1):
423
  selected_noreranker = get_noreranking_dropdown()
424
+ lb_df_retriever_long_doc = datastore.doc_fmt_df[
425
+ datastore.doc_fmt_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
426
  ]
427
  lb_df_retriever_long_doc = reset_rank(lb_df_retriever_long_doc)
428
  lb_table_retriever_long_doc = get_leaderboard_table(
429
+ lb_df_retriever_long_doc, datastore.doc_types
430
  )
431
 
432
+ hidden_lb_df_retriever_long_doc = datastore.doc_raw_df[
433
+ datastore.doc_raw_df[COL_NAME_RERANKING_MODEL] == "NoReranker"
434
+ ]
435
  hidden_lb_df_retriever_long_doc = reset_rank(hidden_lb_df_retriever_long_doc)
436
  hidden_lb_table_retriever_long_doc = get_leaderboard_table(
437
+ hidden_lb_df_retriever_long_doc, datastore.doc_types, visible=False
438
  )
439
 
440
  selected_version.change(
 
479
  queue=True,
480
  )
481
  with gr.TabItem("Reranking Only", id=22):
482
+ lb_df_reranker_ldoc = datastore.doc_fmt_df[
483
+ datastore.doc_fmt_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
484
  ]
485
  lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
486
  reranking_models_reranker_ldoc = (
 
493
  )
494
  with gr.Column(scale=1):
495
  search_bar_reranker_ldoc = gr.Textbox(show_label=False, visible=False)
496
+ lb_table_reranker_ldoc = get_leaderboard_table(lb_df_reranker_ldoc, datastore.doc_types)
497
+ hidden_lb_df_reranker_ldoc = datastore.doc_raw_df[
498
+ datastore.doc_raw_df[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK
499
+ ]
500
  hidden_lb_df_reranker_ldoc = reset_rank(hidden_lb_df_reranker_ldoc)
501
  hidden_lb_table_reranker_ldoc = get_leaderboard_table(
502
+ hidden_lb_df_reranker_ldoc, datastore.doc_types, visible=False
503
  )
504
 
505
  selected_version.change(
src/loaders.py CHANGED
@@ -68,25 +68,25 @@ def load_leaderboard_datastore(file_path, version) -> LeaderboardDataStore:
68
  lb_data_store.raw_data = load_raw_eval_results(file_path)
69
  print(f"raw data: {len(lb_data_store.raw_data)}")
70
 
71
- lb_data_store.raw_df_qa = get_leaderboard_df(lb_data_store, task="qa", metric=DEFAULT_METRIC_QA)
72
- print(f"QA data loaded: {lb_data_store.raw_df_qa.shape}")
73
- lb_data_store.formatted_df_qa = lb_data_store.raw_df_qa.copy()
74
  shown_columns_qa, types_qa = get_default_cols("qa", lb_data_store.slug, add_fix_cols=True)
75
- lb_data_store.types_qa = types_qa
76
- lb_data_store.formatted_df_qa = lb_data_store.formatted_df_qa[
77
- ~lb_data_store.formatted_df_qa[COL_NAME_IS_ANONYMOUS]
78
  ][shown_columns_qa]
79
- lb_data_store.formatted_df_qa.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
80
 
81
- lb_data_store.raw_df_long_doc = get_leaderboard_df(lb_data_store, task="long-doc", metric=DEFAULT_METRIC_LONG_DOC)
82
- print(f"Long-Doc data loaded: {len(lb_data_store.raw_df_long_doc)}")
83
- lb_data_store.formatted_df_ldoc = lb_data_store.raw_df_long_doc.copy()
84
  shown_columns_long_doc, types_long_doc = get_default_cols("long-doc", lb_data_store.slug, add_fix_cols=True)
85
- lb_data_store.types_long_doc = types_long_doc
86
- lb_data_store.formatted_df_ldoc = lb_data_store.formatted_df_ldoc[
87
- ~lb_data_store.formatted_df_ldoc[COL_NAME_IS_ANONYMOUS]
88
  ][shown_columns_long_doc]
89
- lb_data_store.formatted_df_ldoc.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
90
 
91
  lb_data_store.reranking_models = sorted(
92
  list(frozenset([eval_result.reranking_model for eval_result in lb_data_store.raw_data]))
 
68
  lb_data_store.raw_data = load_raw_eval_results(file_path)
69
  print(f"raw data: {len(lb_data_store.raw_data)}")
70
 
71
+ lb_data_store.qa_raw_df = get_leaderboard_df(lb_data_store, task="qa", metric=DEFAULT_METRIC_QA)
72
+ print(f"QA data loaded: {lb_data_store.qa_raw_df.shape}")
73
+ lb_data_store.qa_fmt_df = lb_data_store.qa_raw_df.copy()
74
  shown_columns_qa, types_qa = get_default_cols("qa", lb_data_store.slug, add_fix_cols=True)
75
+ lb_data_store.qa_types = types_qa
76
+ lb_data_store.qa_fmt_df = lb_data_store.qa_fmt_df[
77
+ ~lb_data_store.qa_fmt_df[COL_NAME_IS_ANONYMOUS]
78
  ][shown_columns_qa]
79
+ lb_data_store.qa_fmt_df.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
80
 
81
+ lb_data_store.doc_raw_df = get_leaderboard_df(lb_data_store, task="long-doc", metric=DEFAULT_METRIC_LONG_DOC)
82
+ print(f"Long-Doc data loaded: {len(lb_data_store.doc_raw_df)}")
83
+ lb_data_store.doc_fmt_df = lb_data_store.doc_raw_df.copy()
84
  shown_columns_long_doc, types_long_doc = get_default_cols("long-doc", lb_data_store.slug, add_fix_cols=True)
85
+ lb_data_store.doc_types = types_long_doc
86
+ lb_data_store.doc_fmt_df = lb_data_store.doc_fmt_df[
87
+ ~lb_data_store.doc_fmt_df[COL_NAME_IS_ANONYMOUS]
88
  ][shown_columns_long_doc]
89
+ lb_data_store.doc_fmt_df.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
90
 
91
  lb_data_store.reranking_models = sorted(
92
  list(frozenset([eval_result.reranking_model for eval_result in lb_data_store.raw_data]))
src/models.py CHANGED
@@ -141,10 +141,11 @@ class LeaderboardDataStore:
141
  version: str
142
  slug: str
143
  raw_data: Optional[list]
144
- raw_df_qa: Optional[pd.DataFrame]
145
- raw_df_long_doc: Optional[pd.DataFrame]
146
- formatted_df_qa: Optional[pd.DataFrame]
147
- formatted_df_ldoc: Optional[pd.DataFrame]
148
  reranking_models: Optional[list]
149
- types_qa: Optional[list]
150
- types_long_doc: Optional[list]
 
 
141
  version: str
142
  slug: str
143
  raw_data: Optional[list]
144
+ qa_raw_df: Optional[pd.DataFrame]
145
+ doc_raw_df: Optional[pd.DataFrame]
146
+ qa_fmt_df: Optional[pd.DataFrame]
147
+ doc_fmt_df: Optional[pd.DataFrame]
148
  reranking_models: Optional[list]
149
+ qa_types: Optional[list]
150
+ doc_types: Optional[list]
151
+ # qa_raw_df, docs_raw_df, qa_fmt_df, docs_fmt_df,