Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Sean Cho
commited on
Commit
•
150c99b
1
Parent(s):
f1b022c
update evaluation fields
Browse files- app.py +2 -0
- src/display_models/read_results.py +6 -10
app.py
CHANGED
@@ -77,6 +77,8 @@ BENCHMARK_COLS = [
|
|
77 |
AutoEvalColumn.hellaswag,
|
78 |
AutoEvalColumn.mmlu,
|
79 |
AutoEvalColumn.truthfulqa,
|
|
|
|
|
80 |
]
|
81 |
]
|
82 |
|
|
|
77 |
AutoEvalColumn.hellaswag,
|
78 |
AutoEvalColumn.mmlu,
|
79 |
AutoEvalColumn.truthfulqa,
|
80 |
+
AutoEvalColumn.commongen,
|
81 |
+
AutoEvalColumn.ethicalverification,
|
82 |
]
|
83 |
]
|
84 |
|
src/display_models/read_results.py
CHANGED
@@ -9,13 +9,13 @@ import numpy as np
|
|
9 |
from src.display_models.utils import AutoEvalColumn, make_clickable_model
|
10 |
|
11 |
METRICS = ["acc_norm", "acc_norm", "acc", "mc2"]
|
12 |
-
BENCHMARKS = ["
|
13 |
BENCH_TO_NAME = {
|
14 |
-
"
|
15 |
-
"
|
16 |
-
"
|
17 |
-
"
|
18 |
-
"
|
19 |
"ethicalverification": AutoEvalColumn.ethicalverification.name,
|
20 |
}
|
21 |
|
@@ -66,10 +66,6 @@ def parse_eval_result(json_filepath: str) -> Tuple[str, list[dict]]:
|
|
66 |
with open(json_filepath) as fp:
|
67 |
data = json.load(fp)
|
68 |
|
69 |
-
for mmlu_k in ["harness|hendrycksTest-abstract_algebra|5", "hendrycksTest-abstract_algebra"]:
|
70 |
-
if mmlu_k in data["versions"] and data["versions"][mmlu_k] == 0:
|
71 |
-
return None, [] # we skip models with the wrong version
|
72 |
-
|
73 |
try:
|
74 |
config = data["config"]
|
75 |
except KeyError:
|
|
|
9 |
from src.display_models.utils import AutoEvalColumn, make_clickable_model
|
10 |
|
11 |
METRICS = ["acc_norm", "acc_norm", "acc", "mc2"]
|
12 |
+
BENCHMARKS = ["ko_arc_challenge", "ko_hellaswag", "ko_mmlu", "ko_truthfulqa:mc", "ko_commongen", "ethicalverification"]
|
13 |
BENCH_TO_NAME = {
|
14 |
+
"ko_arc_challenge": AutoEvalColumn.arc.name,
|
15 |
+
"ko_hellaswag": AutoEvalColumn.hellaswag.name,
|
16 |
+
"ko_mmlu": AutoEvalColumn.mmlu.name,
|
17 |
+
"ko_truthfulqa:mc": AutoEvalColumn.truthfulqa.name,
|
18 |
+
"ko_commongen": AutoEvalColumn.commongen.name,
|
19 |
"ethicalverification": AutoEvalColumn.ethicalverification.name,
|
20 |
}
|
21 |
|
|
|
66 |
with open(json_filepath) as fp:
|
67 |
data = json.load(fp)
|
68 |
|
|
|
|
|
|
|
|
|
69 |
try:
|
70 |
config = data["config"]
|
71 |
except KeyError:
|