Spaces:
Runtime error
Runtime error
chore: clean up
Browse files- .gitignore +1 -0
- app.py +8 -0
- utils.py +7 -11
.gitignore
CHANGED
@@ -15,3 +15,4 @@ logs/
|
|
15 |
.idea/
|
16 |
.venv/
|
17 |
toys/
|
|
|
|
15 |
.idea/
|
16 |
.venv/
|
17 |
toys/
|
18 |
+
.DS_Store
|
app.py
CHANGED
@@ -290,6 +290,14 @@ with demo:
|
|
290 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
291 |
with gr.Row():
|
292 |
gr.Markdown("## ✉️Submit your model here!", elem_classes="markdown-text")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
293 |
with gr.Row():
|
294 |
file_output = gr.File()
|
295 |
with gr.Row():
|
|
|
290 |
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
291 |
with gr.Row():
|
292 |
gr.Markdown("## ✉️Submit your model here!", elem_classes="markdown-text")
|
293 |
+
with gr.Row():
|
294 |
+
with gr.Column():
|
295 |
+
benchmark_version = gr.Dropdown(
|
296 |
+
['AIR-Bench_24.04',], value=['AIR-Bench_24.04',], interactive=True, label="AIR-Bench Version")
|
297 |
+
with gr.Column():
|
298 |
+
model_name_textbox = gr.Textbox(label="Model name")
|
299 |
+
with gr.Column():
|
300 |
+
model_url = gr.Textbox(label="Model URL")
|
301 |
with gr.Row():
|
302 |
file_output = gr.File()
|
303 |
with gr.Row():
|
utils.py
CHANGED
@@ -1,14 +1,10 @@
|
|
1 |
-
|
2 |
-
import os
|
3 |
-
|
4 |
-
from src.display.formatting import styled_error, styled_message, styled_warning
|
5 |
|
6 |
-
|
7 |
|
8 |
-
from src.display.utils import AutoEvalColumnQA, AutoEvalColumnLongDoc, COLS_QA, COLS_LONG_DOC, QA_BENCHMARK_COLS, LONG_DOC_BENCHMARK_COLS
|
9 |
from src.benchmarks import BENCHMARK_COLS_QA, BENCHMARK_COLS_LONG_DOC, BenchmarksQA, BenchmarksLongDoc
|
|
|
10 |
from src.leaderboard.read_evals import FullEvalResult, get_leaderboard_df
|
11 |
-
from typing import List
|
12 |
|
13 |
|
14 |
def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
|
@@ -41,7 +37,7 @@ def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
|
|
41 |
return df[(df[AutoEvalColumnQA.retrieval_model.name].str.contains(query, case=False))]
|
42 |
|
43 |
|
44 |
-
def select_columns(df: pd.DataFrame, domain_query: list, language_query: list, task: str="qa") -> pd.DataFrame:
|
45 |
if task == "qa":
|
46 |
always_here_cols = [
|
47 |
AutoEvalColumnQA.retrieval_model.name,
|
@@ -111,7 +107,7 @@ def update_metric(
|
|
111 |
query: str,
|
112 |
) -> pd.DataFrame:
|
113 |
if task == 'qa':
|
114 |
-
leaderboard_df = get_leaderboard_df(raw_data,
|
115 |
return update_table(
|
116 |
leaderboard_df,
|
117 |
domains,
|
@@ -120,7 +116,7 @@ def update_metric(
|
|
120 |
query
|
121 |
)
|
122 |
elif task == 'long_doc':
|
123 |
-
leaderboard_df = get_leaderboard_df(raw_data,
|
124 |
return update_table_long_doc(
|
125 |
leaderboard_df,
|
126 |
domains,
|
@@ -138,4 +134,4 @@ def upload_file(files):
|
|
138 |
# print(file_paths)
|
139 |
# HfApi(token="").upload_file(...)
|
140 |
# os.remove(fp)
|
141 |
-
return file_paths
|
|
|
1 |
+
from typing import List
|
|
|
|
|
|
|
2 |
|
3 |
+
import pandas as pd
|
4 |
|
|
|
5 |
from src.benchmarks import BENCHMARK_COLS_QA, BENCHMARK_COLS_LONG_DOC, BenchmarksQA, BenchmarksLongDoc
|
6 |
+
from src.display.utils import AutoEvalColumnQA, AutoEvalColumnLongDoc, COLS_QA, COLS_LONG_DOC
|
7 |
from src.leaderboard.read_evals import FullEvalResult, get_leaderboard_df
|
|
|
8 |
|
9 |
|
10 |
def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
|
|
|
37 |
return df[(df[AutoEvalColumnQA.retrieval_model.name].str.contains(query, case=False))]
|
38 |
|
39 |
|
40 |
+
def select_columns(df: pd.DataFrame, domain_query: list, language_query: list, task: str = "qa") -> pd.DataFrame:
|
41 |
if task == "qa":
|
42 |
always_here_cols = [
|
43 |
AutoEvalColumnQA.retrieval_model.name,
|
|
|
107 |
query: str,
|
108 |
) -> pd.DataFrame:
|
109 |
if task == 'qa':
|
110 |
+
leaderboard_df = get_leaderboard_df(raw_data, task=task, metric=metric)
|
111 |
return update_table(
|
112 |
leaderboard_df,
|
113 |
domains,
|
|
|
116 |
query
|
117 |
)
|
118 |
elif task == 'long_doc':
|
119 |
+
leaderboard_df = get_leaderboard_df(raw_data, task=task, metric=metric)
|
120 |
return update_table_long_doc(
|
121 |
leaderboard_df,
|
122 |
domains,
|
|
|
134 |
# print(file_paths)
|
135 |
# HfApi(token="").upload_file(...)
|
136 |
# os.remove(fp)
|
137 |
+
return file_paths
|