Spaces:
Running
Running
XufengDuan
commited on
Commit
•
1daafa6
1
Parent(s):
79cf136
update scripts
Browse files- src/display/about.py +10 -10
- src/leaderboard/read_evals.py +1 -1
src/display/about.py
CHANGED
@@ -13,25 +13,25 @@ class Tasks(Enum):
|
|
13 |
Overall = Task("overall_js_divergence", "overall_js_divergence", "Overall Humanlike %")
|
14 |
Overall_ci = Task("overall_confidence_interval", "overall_confidence_interval", "Overall CI")
|
15 |
E1 = Task("E1", "E1", "E1 Humanlike %")
|
16 |
-
E1_ci = Task("
|
17 |
E2 = Task("E2", "E2", "E2 Humanlike %")
|
18 |
-
E2_ci = Task("
|
19 |
E3 = Task("E3", "E3", "E3 Humanlike %")
|
20 |
-
E3_ci = Task("
|
21 |
E4 = Task("E4", "E4", "E4 Humanlike %")
|
22 |
-
E4_ci = Task("
|
23 |
E5 = Task("E5", "E5", "E5 Humanlike %")
|
24 |
-
E5_ci = Task("
|
25 |
E6 = Task("E6", "E6", "E6 Humanlike %")
|
26 |
-
E6_ci = Task("
|
27 |
E7 = Task("E7", "E7", "E7 Humanlike %")
|
28 |
-
E7_ci = Task("
|
29 |
E8 = Task("E8", "E8", "E8 Humanlike %")
|
30 |
-
E8_ci = Task("
|
31 |
E9 = Task("E9", "E9", "E9 Humanlike %")
|
32 |
-
E9_ci = Task("
|
33 |
E10 = Task("E10", "E10", "E10 Humanlike %")
|
34 |
-
E10_ci = Task("
|
35 |
|
36 |
# factual_consistency_rate = Task("factual_consistency_rate", "factual_consistency_rate", "Factual Consistency Rate (%)")
|
37 |
# answer_rate = Task("answer_rate", "answer_rate", "Answer Rate (%)")
|
|
|
13 |
Overall = Task("overall_js_divergence", "overall_js_divergence", "Overall Humanlike %")
|
14 |
Overall_ci = Task("overall_confidence_interval", "overall_confidence_interval", "Overall CI")
|
15 |
E1 = Task("E1", "E1", "E1 Humanlike %")
|
16 |
+
E1_ci = Task("E1_ci", "E1_ci", "E1 CI")
|
17 |
E2 = Task("E2", "E2", "E2 Humanlike %")
|
18 |
+
E2_ci = Task("E2_ci", "E2_ci", "E2 CI")
|
19 |
E3 = Task("E3", "E3", "E3 Humanlike %")
|
20 |
+
E3_ci = Task("E3_ci", "E3_ci", "E3 CI")
|
21 |
E4 = Task("E4", "E4", "E4 Humanlike %")
|
22 |
+
E4_ci = Task("E4_ci", "E4_ci", "E4 CI")
|
23 |
E5 = Task("E5", "E5", "E5 Humanlike %")
|
24 |
+
E5_ci = Task("E5_ci", "E5_ci", "E5 CI")
|
25 |
E6 = Task("E6", "E6", "E6 Humanlike %")
|
26 |
+
E6_ci = Task("E6_ci", "E6_ci", "E6 CI")
|
27 |
E7 = Task("E7", "E7", "E7 Humanlike %")
|
28 |
+
E7_ci = Task("E7_ci", "E7_ci", "E7 CI")
|
29 |
E8 = Task("E8", "E8", "E8 Humanlike %")
|
30 |
+
E8_ci = Task("E8_ci", "E8_ci", "E8 CI")
|
31 |
E9 = Task("E9", "E9", "E9 Humanlike %")
|
32 |
+
E9_ci = Task("E9_ci", "E9_ci", "E9 CI")
|
33 |
E10 = Task("E10", "E10", "E10 Humanlike %")
|
34 |
+
E10_ci = Task("E10_ci", "E10_ci", "E10 CI")
|
35 |
|
36 |
# factual_consistency_rate = Task("factual_consistency_rate", "factual_consistency_rate", "Factual Consistency Rate (%)")
|
37 |
# answer_rate = Task("answer_rate", "answer_rate", "Answer Rate (%)")
|
src/leaderboard/read_evals.py
CHANGED
@@ -77,7 +77,7 @@ class EvalResult:
|
|
77 |
if isinstance(task.metric, str):
|
78 |
# accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if
|
79 |
# task.benchmark == k and isinstance(v, dict)])
|
80 |
-
accs = np.array([v for k, v in data["results"].items() if task.benchmark == k])
|
81 |
|
82 |
# 过滤掉 None 值,确保 accs 只包含有效的数值
|
83 |
accs = accs[accs != None]
|
|
|
77 |
if isinstance(task.metric, str):
|
78 |
# accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if
|
79 |
# task.benchmark == k and isinstance(v, dict)])
|
80 |
+
accs = np.array([np.around(v, decimals=3) for k, v in data["results"].items() if task.benchmark == k])
|
81 |
|
82 |
# 过滤掉 None 值,确保 accs 只包含有效的数值
|
83 |
accs = accs[accs != None]
|