djstrong commited on
Commit
1f30b67
1 Parent(s): 476a46c

add 5-shot

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +14 -13
src/leaderboard/read_evals.py CHANGED
@@ -33,7 +33,7 @@ class EvalResult:
33
  still_on_hub: bool = False
34
 
35
  @classmethod
36
- def init_from_json_file(self, json_filepath):
37
  """Inits the result from the specific model result file"""
38
  with open(json_filepath) as fp:
39
  data = json.load(fp)
@@ -74,7 +74,7 @@ class EvalResult:
74
  task = task.value
75
 
76
  # We average all scores of a given metric (not all metrics are present in all files)
77
- accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k and n_shot.get(k, -1) == NUM_FEWSHOT])
78
  if accs.size == 0 or any([acc is None for acc in accs]):
79
  continue
80
 
@@ -253,17 +253,18 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
253
  model_result_filepaths.append(os.path.join(root, file))
254
 
255
  eval_results = {}
256
- for model_result_filepath in model_result_filepaths:
257
- # Creation of result
258
- eval_result = EvalResult.init_from_json_file(model_result_filepath)
259
- eval_result.update_with_request_file(requests_path)
260
-
261
- # Store results of same eval together
262
- eval_name = eval_result.eval_name
263
- if eval_name in eval_results.keys():
264
- eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
265
- else:
266
- eval_results[eval_name] = eval_result
 
267
 
268
  results = []
269
  for v in eval_results.values():
 
33
  still_on_hub: bool = False
34
 
35
  @classmethod
36
+ def init_from_json_file(self, json_filepath, n_shot_num):
37
  """Inits the result from the specific model result file"""
38
  with open(json_filepath) as fp:
39
  data = json.load(fp)
 
74
  task = task.value
75
 
76
  # We average all scores of a given metric (not all metrics are present in all files)
77
+ accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k and n_shot.get(k, -1) == n_shot_num])
78
  if accs.size == 0 or any([acc is None for acc in accs]):
79
  continue
80
 
 
253
  model_result_filepaths.append(os.path.join(root, file))
254
 
255
  eval_results = {}
256
+ for n_shot in [0,5]:
257
+ for model_result_filepath in model_result_filepaths:
258
+ # Creation of result
259
+ eval_result = EvalResult.init_from_json_file(model_result_filepath, n_shot_num=n_shot)
260
+ eval_result.update_with_request_file(requests_path)
261
+
262
+ # Store results of same eval together
263
+ eval_name = f"{eval_result.eval_name}_{n_shot}-shot"
264
+ if eval_name in eval_results.keys():
265
+ eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
266
+ else:
267
+ eval_results[eval_name] = eval_result
268
 
269
  results = []
270
  for v in eval_results.values():