Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
fix read_evals
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -100,15 +100,14 @@ class EvalResult:
|
|
100 |
ko_ifeval = data["results"]["ko_ifeval"]
|
101 |
accs = np.mean([ko_ifeval["prompt_level_strict_acc,none"], ko_ifeval["inst_level_strict_acc,none"]])
|
102 |
mean_acc = np.mean(accs) * 100.0
|
103 |
-
results[task.benchmark] = mean_acc
|
104 |
-
|
105 |
if task.benchmark in ["ko_winogrande", "ko_gsm8k", "ko_eqbench", "kornat_common", "kornat_social", "kornat_harmless", "kornat_helpful", "ko_gpqa_diamond_zeroshot"]:
|
106 |
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
107 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
108 |
continue
|
109 |
-
|
110 |
if task.benchmark not in ["ko_eqbench"]:
|
111 |
mean_acc = accs[0] * 100.0
|
|
|
|
|
112 |
results[task.benchmark] = mean_acc
|
113 |
|
114 |
return self(
|
|
|
100 |
ko_ifeval = data["results"]["ko_ifeval"]
|
101 |
accs = np.mean([ko_ifeval["prompt_level_strict_acc,none"], ko_ifeval["inst_level_strict_acc,none"]])
|
102 |
mean_acc = np.mean(accs) * 100.0
|
|
|
|
|
103 |
if task.benchmark in ["ko_winogrande", "ko_gsm8k", "ko_eqbench", "kornat_common", "kornat_social", "kornat_harmless", "kornat_helpful", "ko_gpqa_diamond_zeroshot"]:
|
104 |
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
105 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
106 |
continue
|
|
|
107 |
if task.benchmark not in ["ko_eqbench"]:
|
108 |
mean_acc = accs[0] * 100.0
|
109 |
+
else:
|
110 |
+
mean_acc = accs[0]
|
111 |
results[task.benchmark] = mean_acc
|
112 |
|
113 |
return self(
|