Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
4a6f9cd
1 Parent(s): de3c2ba

feat: improve the submitting process

Browse files
Files changed (2) hide show
  1. app.py +16 -2
  2. utils.py +33 -2
app.py CHANGED
@@ -12,7 +12,7 @@ from src.display.css_html_js import custom_css
12
  from src.leaderboard.read_evals import get_raw_eval_results, get_leaderboard_df
13
 
14
  from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
15
- from utils import update_table, update_metric, update_table_long_doc, upload_file, get_default_cols
16
  from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, metric_list
17
  from src.display.utils import TYPES_QA, TYPES_LONG_DOC
18
 
@@ -306,10 +306,14 @@ with demo:
306
  model_name = gr.Textbox(label="Model name")
307
  with gr.Column():
308
  model_url = gr.Textbox(label="Model URL")
 
 
309
  with gr.Row():
310
  file_output = gr.File()
311
  with gr.Row():
312
- upload_button = gr.UploadButton("Click to submit evaluation", file_count="single")
 
 
313
  upload_button.upload(
314
  upload_file,
315
  [
@@ -319,6 +323,16 @@ with demo:
319
  benchmark_version,
320
  ],
321
  file_output)
 
 
 
 
 
 
 
 
 
 
322
 
323
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
324
  gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text")
 
12
  from src.leaderboard.read_evals import get_raw_eval_results, get_leaderboard_df
13
 
14
  from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
15
+ from utils import update_table, update_metric, update_table_long_doc, upload_file, get_default_cols, submit_results
16
  from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, metric_list
17
  from src.display.utils import TYPES_QA, TYPES_LONG_DOC
18
 
 
306
  model_name = gr.Textbox(label="Model name")
307
  with gr.Column():
308
  model_url = gr.Textbox(label="Model URL")
309
+ with gr.Row():
310
+ upload_button = gr.UploadButton("Upload search results", file_count="single")
311
  with gr.Row():
312
  file_output = gr.File()
313
  with gr.Row():
314
+ submit_button = gr.Button("Submit")
315
+ with gr.Row():
316
+ submission_result = gr.Markdown()
317
  upload_button.upload(
318
  upload_file,
319
  [
 
323
  benchmark_version,
324
  ],
325
  file_output)
326
+ submit_button.click(
327
+ submit_results,
328
+ [
329
+ file_output,
330
+ model_name,
331
+ model_url
332
+ ],
333
+ submission_result,
334
+ show_progress="hidden"
335
+ )
336
 
337
  with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=3):
338
  gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text")
utils.py CHANGED
@@ -12,6 +12,7 @@ from src.benchmarks import BENCHMARK_COLS_QA, BENCHMARK_COLS_LONG_DOC, Benchmark
12
  from src.display.utils import AutoEvalColumnQA, AutoEvalColumnLongDoc, COLS_QA, COLS_LONG_DOC, COL_NAME_RANK, COL_NAME_AVG, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL
13
  from src.leaderboard.read_evals import FullEvalResult, get_leaderboard_df
14
  from src.envs import API, SEARCH_RESULTS_REPO, CACHE_PATH
 
15
 
16
 
17
  def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
@@ -149,8 +150,34 @@ def upload_file(
149
  print(f"file uploading aborted. wrong file type: {filepath}")
150
  return filepath
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  # rename the uploaded file
153
  input_fp = Path(filepath)
 
154
  timezone = pytz.timezone('UTC')
155
  timestamp = datetime.now(timezone).strftime('%Y%m%d%H%M%S')
156
  output_fn = f"{timestamp}-{input_fp.name}"
@@ -166,7 +193,9 @@ def upload_file(
166
  output_config = {
167
  "model_name": f"{model}",
168
  "model_url": f"{model_url}",
169
- "version": f"{version}"
 
 
170
  }
171
  with open(input_folder_path / output_config_fn, "w") as f:
172
  json.dump(output_config, f, ensure_ascii=False)
@@ -176,4 +205,6 @@ def upload_file(
176
  repo_id=SEARCH_RESULTS_REPO,
177
  repo_type="dataset",
178
  commit_message=f"feat: submit {model} config")
179
- return filepath
 
 
 
12
  from src.display.utils import AutoEvalColumnQA, AutoEvalColumnLongDoc, COLS_QA, COLS_LONG_DOC, COL_NAME_RANK, COL_NAME_AVG, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL
13
  from src.leaderboard.read_evals import FullEvalResult, get_leaderboard_df
14
  from src.envs import API, SEARCH_RESULTS_REPO, CACHE_PATH
15
+ from src.display.formatting import styled_message, styled_error
16
 
17
 
18
  def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
 
150
  print(f"file uploading aborted. wrong file type: {filepath}")
151
  return filepath
152
 
153
+ return filepath
154
+
155
+ from huggingface_hub import ModelCard
156
+ from huggingface_hub.utils import EntryNotFoundError
157
+
158
+ def submit_results(filepath: str, model: str, model_url: str, version: str="AIR-Bench_24.04"):
159
+ if not filepath.endswith(".zip"):
160
+ return styled_error(f"file uploading aborted. wrong file type: {filepath}")
161
+
162
+ # validate model
163
+ if not model:
164
+ return styled_error("failed to submit. Model name can not be empty.")
165
+
166
+ # validate model url
167
+ if not model_url.startswith("https://huggingface.co/"):
168
+ return styled_error(f"failed to submit. Model url must be a link to a valid HuggingFace model on HuggingFace space. Illegal model url: {model_url}")
169
+
170
+ # validate model card
171
+ repo_id=model_url.removeprefix("https://huggingface.co/")
172
+ try:
173
+ card = ModelCard.load(repo_id)
174
+ except EntryNotFoundError as e:
175
+ print(e)
176
+ return styled_error(f"failed to submit. Model url must be a link to a valid HuggingFace model on HuggingFace space. Could not get model {repo_id}")
177
+
178
  # rename the uploaded file
179
  input_fp = Path(filepath)
180
+ revision = input_fp.name.removesuffix(".zip")
181
  timezone = pytz.timezone('UTC')
182
  timestamp = datetime.now(timezone).strftime('%Y%m%d%H%M%S')
183
  output_fn = f"{timestamp}-{input_fp.name}"
 
193
  output_config = {
194
  "model_name": f"{model}",
195
  "model_url": f"{model_url}",
196
+ "version": f"{version}",
197
+ "revision": f"{revision}",
198
+ "timestamp": f"{timestamp}"
199
  }
200
  with open(input_folder_path / output_config_fn, "w") as f:
201
  json.dump(output_config, f, ensure_ascii=False)
 
205
  repo_id=SEARCH_RESULTS_REPO,
206
  repo_type="dataset",
207
  commit_message=f"feat: submit {model} config")
208
+ return styled_message(
209
+ f"Thanks for submission!\nSubmission revision: {revision}"
210
+ )