XufengDuan commited on
Commit
1daafa6
1 Parent(s): 79cf136

update scripts

Browse files
src/display/about.py CHANGED
@@ -13,25 +13,25 @@ class Tasks(Enum):
13
  Overall = Task("overall_js_divergence", "overall_js_divergence", "Overall Humanlike %")
14
  Overall_ci = Task("overall_confidence_interval", "overall_confidence_interval", "Overall CI")
15
  E1 = Task("E1", "E1", "E1 Humanlike %")
16
- E1_ci = Task("E1", "E1_ci", "E1 CI")
17
  E2 = Task("E2", "E2", "E2 Humanlike %")
18
- E2_ci = Task("E2", "E2_ci", "E2 CI")
19
  E3 = Task("E3", "E3", "E3 Humanlike %")
20
- E3_ci = Task("E3", "E3_ci", "E3 CI")
21
  E4 = Task("E4", "E4", "E4 Humanlike %")
22
- E4_ci = Task("E4", "E4_ci", "E4 CI")
23
  E5 = Task("E5", "E5", "E5 Humanlike %")
24
- E5_ci = Task("E5", "E5_ci", "E5 CI")
25
  E6 = Task("E6", "E6", "E6 Humanlike %")
26
- E6_ci = Task("E6", "E6_ci", "E6 CI")
27
  E7 = Task("E7", "E7", "E7 Humanlike %")
28
- E7_ci = Task("E7", "E7_ci", "E7 CI")
29
  E8 = Task("E8", "E8", "E8 Humanlike %")
30
- E8_ci = Task("E8", "E8_ci", "E8 CI")
31
  E9 = Task("E9", "E9", "E9 Humanlike %")
32
- E9_ci = Task("E9", "E9_ci", "E9 CI")
33
  E10 = Task("E10", "E10", "E10 Humanlike %")
34
- E10_ci = Task("E10", "E10_ci", "E10 CI")
35
 
36
  # factual_consistency_rate = Task("factual_consistency_rate", "factual_consistency_rate", "Factual Consistency Rate (%)")
37
  # answer_rate = Task("answer_rate", "answer_rate", "Answer Rate (%)")
 
13
  Overall = Task("overall_js_divergence", "overall_js_divergence", "Overall Humanlike %")
14
  Overall_ci = Task("overall_confidence_interval", "overall_confidence_interval", "Overall CI")
15
  E1 = Task("E1", "E1", "E1 Humanlike %")
16
+ E1_ci = Task("E1_ci", "E1_ci", "E1 CI")
17
  E2 = Task("E2", "E2", "E2 Humanlike %")
18
+ E2_ci = Task("E2_ci", "E2_ci", "E2 CI")
19
  E3 = Task("E3", "E3", "E3 Humanlike %")
20
+ E3_ci = Task("E3_ci", "E3_ci", "E3 CI")
21
  E4 = Task("E4", "E4", "E4 Humanlike %")
22
+ E4_ci = Task("E4_ci", "E4_ci", "E4 CI")
23
  E5 = Task("E5", "E5", "E5 Humanlike %")
24
+ E5_ci = Task("E5_ci", "E5_ci", "E5 CI")
25
  E6 = Task("E6", "E6", "E6 Humanlike %")
26
+ E6_ci = Task("E6_ci", "E6_ci", "E6 CI")
27
  E7 = Task("E7", "E7", "E7 Humanlike %")
28
+ E7_ci = Task("E7_ci", "E7_ci", "E7 CI")
29
  E8 = Task("E8", "E8", "E8 Humanlike %")
30
+ E8_ci = Task("E8_ci", "E8_ci", "E8 CI")
31
  E9 = Task("E9", "E9", "E9 Humanlike %")
32
+ E9_ci = Task("E9_ci", "E9_ci", "E9 CI")
33
  E10 = Task("E10", "E10", "E10 Humanlike %")
34
+ E10_ci = Task("E10_ci", "E10_ci", "E10 CI")
35
 
36
  # factual_consistency_rate = Task("factual_consistency_rate", "factual_consistency_rate", "Factual Consistency Rate (%)")
37
  # answer_rate = Task("answer_rate", "answer_rate", "Answer Rate (%)")
src/leaderboard/read_evals.py CHANGED
@@ -77,7 +77,7 @@ class EvalResult:
77
  if isinstance(task.metric, str):
78
  # accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if
79
  # task.benchmark == k and isinstance(v, dict)])
80
- accs = np.array([v for k, v in data["results"].items() if task.benchmark == k])
81
 
82
  # 过滤掉 None 值,确保 accs 只包含有效的数值
83
  accs = accs[accs != None]
 
77
  if isinstance(task.metric, str):
78
  # accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if
79
  # task.benchmark == k and isinstance(v, dict)])
80
+ accs = np.array([np.around(v, decimals=3) for k, v in data["results"].items() if task.benchmark == k])
81
 
82
  # 过滤掉 None 值,确保 accs 只包含有效的数值
83
  accs = accs[accs != None]