Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
missing
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -469,12 +469,18 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
|
|
469 |
|
470 |
print(f"Missing sbatch results:")
|
471 |
for r in for_run:
|
472 |
-
|
|
|
|
|
473 |
if ',chat' in fm:
|
|
|
474 |
fm=fm.replace(',chat','')
|
475 |
-
|
476 |
-
|
477 |
-
|
|
|
|
|
|
|
478 |
|
479 |
# print('missing_results_for_task', missing_results_for_task)
|
480 |
for task, models in missing_results_for_task.items():
|
|
|
469 |
|
470 |
print(f"Missing sbatch results:")
|
471 |
for r in for_run:
|
472 |
+
if r[0]==5 and r[1] in ['polish_eqbench']: continue
|
473 |
+
fm=r[2]
|
474 |
+
script='bash eval_model_task_bs1.sh'
|
475 |
if ',chat' in fm:
|
476 |
+
script='bash eval_model_task_bs1_chat.sh'
|
477 |
fm=fm.replace(',chat','')
|
478 |
+
if ',multiturn' in fm:
|
479 |
+
script='bash eval_model_task_bs1_chat_few.sh'
|
480 |
+
fm=fm.replace(',multiturn','')
|
481 |
+
|
482 |
+
print(f'sbatch start.sh "bash {script} {r[0]} {r[1]} {fm}"')
|
483 |
+
|
484 |
|
485 |
# print('missing_results_for_task', missing_results_for_task)
|
486 |
for task, models in missing_results_for_task.items():
|