Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat: improve the submitting process
Browse files
app.py
CHANGED
@@ -12,7 +12,7 @@ from src.display.css_html_js import custom_css
|
|
12 |
from src.leaderboard.read_evals import get_raw_eval_results, get_leaderboard_df
|
13 |
|
14 |
from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
|
15 |
-
from utils import update_table, update_metric, update_table_long_doc, upload_file, get_default_cols
|
16 |
from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, metric_list
|
17 |
from src.display.utils import TYPES_QA, TYPES_LONG_DOC
|
18 |
|
@@ -306,10 +306,14 @@ with demo:
|
|
306 |
model_name = gr.Textbox(label="Model name")
|
307 |
with gr.Column():
|
308 |
model_url = gr.Textbox(label="Model URL")
|
|
|
|
|
309 |
with gr.Row():
|
310 |
file_output = gr.File()
|
311 |
with gr.Row():
|
312 |
-
|
|
|
|
|
313 |
upload_button.upload(
|
314 |
upload_file,
|
315 |
[
|
@@ -319,6 +323,16 @@ with demo:
|
|
319 |
benchmark_version,
|
320 |
],
|
321 |
file_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
322 |
|
323 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
|
324 |
gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text")
|
|
|
12 |
from src.leaderboard.read_evals import get_raw_eval_results, get_leaderboard_df
|
13 |
|
14 |
from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
|
15 |
+
from utils import update_table, update_metric, update_table_long_doc, upload_file, get_default_cols, submit_results
|
16 |
from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, metric_list
|
17 |
from src.display.utils import TYPES_QA, TYPES_LONG_DOC
|
18 |
|
|
|
306 |
model_name = gr.Textbox(label="Model name")
|
307 |
with gr.Column():
|
308 |
model_url = gr.Textbox(label="Model URL")
|
309 |
+
with gr.Row():
|
310 |
+
upload_button = gr.UploadButton("Upload search results", file_count="single")
|
311 |
with gr.Row():
|
312 |
file_output = gr.File()
|
313 |
with gr.Row():
|
314 |
+
submit_button = gr.Button("Submit")
|
315 |
+
with gr.Row():
|
316 |
+
submission_result = gr.Markdown()
|
317 |
upload_button.upload(
|
318 |
upload_file,
|
319 |
[
|
|
|
323 |
benchmark_version,
|
324 |
],
|
325 |
file_output)
|
326 |
+
submit_button.click(
|
327 |
+
submit_results,
|
328 |
+
[
|
329 |
+
file_output,
|
330 |
+
model_name,
|
331 |
+
model_url
|
332 |
+
],
|
333 |
+
submission_result,
|
334 |
+
show_progress="hidden"
|
335 |
+
)
|
336 |
|
337 |
with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
|
338 |
gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text")
|
utils.py
CHANGED
@@ -12,6 +12,7 @@ from src.benchmarks import BENCHMARK_COLS_QA, BENCHMARK_COLS_LONG_DOC, Benchmark
|
|
12 |
from src.display.utils import AutoEvalColumnQA, AutoEvalColumnLongDoc, COLS_QA, COLS_LONG_DOC, COL_NAME_RANK, COL_NAME_AVG, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL
|
13 |
from src.leaderboard.read_evals import FullEvalResult, get_leaderboard_df
|
14 |
from src.envs import API, SEARCH_RESULTS_REPO, CACHE_PATH
|
|
|
15 |
|
16 |
|
17 |
def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
|
@@ -149,8 +150,34 @@ def upload_file(
|
|
149 |
print(f"file uploading aborted. wrong file type: {filepath}")
|
150 |
return filepath
|
151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
# rename the uploaded file
|
153 |
input_fp = Path(filepath)
|
|
|
154 |
timezone = pytz.timezone('UTC')
|
155 |
timestamp = datetime.now(timezone).strftime('%Y%m%d%H%M%S')
|
156 |
output_fn = f"{timestamp}-{input_fp.name}"
|
@@ -166,7 +193,9 @@ def upload_file(
|
|
166 |
output_config = {
|
167 |
"model_name": f"{model}",
|
168 |
"model_url": f"{model_url}",
|
169 |
-
"version": f"{version}"
|
|
|
|
|
170 |
}
|
171 |
with open(input_folder_path / output_config_fn, "w") as f:
|
172 |
json.dump(output_config, f, ensure_ascii=False)
|
@@ -176,4 +205,6 @@ def upload_file(
|
|
176 |
repo_id=SEARCH_RESULTS_REPO,
|
177 |
repo_type="dataset",
|
178 |
commit_message=f"feat: submit {model} config")
|
179 |
-
return
|
|
|
|
|
|
12 |
from src.display.utils import AutoEvalColumnQA, AutoEvalColumnLongDoc, COLS_QA, COLS_LONG_DOC, COL_NAME_RANK, COL_NAME_AVG, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL
|
13 |
from src.leaderboard.read_evals import FullEvalResult, get_leaderboard_df
|
14 |
from src.envs import API, SEARCH_RESULTS_REPO, CACHE_PATH
|
15 |
+
from src.display.formatting import styled_message, styled_error
|
16 |
|
17 |
|
18 |
def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
|
|
|
150 |
print(f"file uploading aborted. wrong file type: {filepath}")
|
151 |
return filepath
|
152 |
|
153 |
+
return filepath
|
154 |
+
|
155 |
+
from huggingface_hub import ModelCard
|
156 |
+
from huggingface_hub.utils import EntryNotFoundError
|
157 |
+
|
158 |
+
def submit_results(filepath: str, model: str, model_url: str, version: str="AIR-Bench_24.04"):
|
159 |
+
if not filepath.endswith(".zip"):
|
160 |
+
return styled_error(f"file uploading aborted. wrong file type: {filepath}")
|
161 |
+
|
162 |
+
# validate model
|
163 |
+
if not model:
|
164 |
+
return styled_error("failed to submit. Model name can not be empty.")
|
165 |
+
|
166 |
+
# validate model url
|
167 |
+
if not model_url.startswith("https://huggingface.co/"):
|
168 |
+
return styled_error(f"failed to submit. Model url must be a link to a valid HuggingFace model on HuggingFace space. Illegal model url: {model_url}")
|
169 |
+
|
170 |
+
# validate model card
|
171 |
+
repo_id=model_url.removeprefix("https://huggingface.co/")
|
172 |
+
try:
|
173 |
+
card = ModelCard.load(repo_id)
|
174 |
+
except EntryNotFoundError as e:
|
175 |
+
print(e)
|
176 |
+
return styled_error(f"failed to submit. Model url must be a link to a valid HuggingFace model on HuggingFace space. Could not get model {repo_id}")
|
177 |
+
|
178 |
# rename the uploaded file
|
179 |
input_fp = Path(filepath)
|
180 |
+
revision = input_fp.name.removesuffix(".zip")
|
181 |
timezone = pytz.timezone('UTC')
|
182 |
timestamp = datetime.now(timezone).strftime('%Y%m%d%H%M%S')
|
183 |
output_fn = f"{timestamp}-{input_fp.name}"
|
|
|
193 |
output_config = {
|
194 |
"model_name": f"{model}",
|
195 |
"model_url": f"{model_url}",
|
196 |
+
"version": f"{version}",
|
197 |
+
"revision": f"{revision}",
|
198 |
+
"timestamp": f"{timestamp}"
|
199 |
}
|
200 |
with open(input_folder_path / output_config_fn, "w") as f:
|
201 |
json.dump(output_config, f, ensure_ascii=False)
|
|
|
205 |
repo_id=SEARCH_RESULTS_REPO,
|
206 |
repo_type="dataset",
|
207 |
commit_message=f"feat: submit {model} config")
|
208 |
+
return styled_message(
|
209 |
+
f"Thanks for submission!\nSubmission revision: {revision}"
|
210 |
+
)
|