djstrong commited on
Commit
45551c3
1 Parent(s): 28627fa
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +10 -4
src/leaderboard/read_evals.py CHANGED
@@ -469,12 +469,18 @@ def get_raw_eval_results(results_path: str, requests_path: str, metadata) -> lis
469
 
470
  print(f"Missing sbatch results:")
471
  for r in for_run:
472
- fm=r[2].replace(',multiturn','')
 
 
473
  if ',chat' in fm:
 
474
  fm=fm.replace(',chat','')
475
- print(f'sbatch start.sh "bash eval_model_task_bs1_chat.sh {r[0]} {r[1]} {fm}"')
476
- else:
477
- print(f'sbatch start.sh "bash eval_model_task_bs1.sh {r[0]} {r[1]} {fm}"')
 
 
 
478
 
479
  # print('missing_results_for_task', missing_results_for_task)
480
  for task, models in missing_results_for_task.items():
 
469
 
470
  print(f"Missing sbatch results:")
471
  for r in for_run:
472
+ if r[0]==5 and r[1] in ['polish_eqbench']: continue
473
+ fm=r[2]
474
+ script='bash eval_model_task_bs1.sh'
475
  if ',chat' in fm:
476
+ script='bash eval_model_task_bs1_chat.sh'
477
  fm=fm.replace(',chat','')
478
+ if ',multiturn' in fm:
479
+ script='bash eval_model_task_bs1_chat_few.sh'
480
+ fm=fm.replace(',multiturn','')
481
+
482
+ print(f'sbatch start.sh "bash {script} {r[0]} {r[1]} {fm}"')
483
+
484
 
485
  # print('missing_results_for_task', missing_results_for_task)
486
  for task, models in missing_results_for_task.items():