Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
user name
commited on
Commit
•
a16057d
1
Parent(s):
5e86dd4
factuality, faithfulness fix
Browse files- src/populate.py +19 -0
src/populate.py
CHANGED
@@ -12,6 +12,25 @@ from src.leaderboard.read_evals import get_raw_eval_results, EvalResult, update_
|
|
12 |
from src.backend.envs import Tasks as BackendTasks
|
13 |
from src.display.utils import Tasks
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
def get_leaderboard_df(results_path: str,
|
17 |
requests_path: str,
|
|
|
12 |
from src.backend.envs import Tasks as BackendTasks
|
13 |
from src.display.utils import Tasks
|
14 |
|
15 |
+
factuality_tasks = [
|
16 |
+
"NQ Open/EM",
|
17 |
+
"TriviaQA/EM",
|
18 |
+
"PopQA/EM",
|
19 |
+
"FEVER/Acc",
|
20 |
+
"TrueFalse/Acc",
|
21 |
+
"TruthQA MC2/Acc",
|
22 |
+
]
|
23 |
+
faithfulness_tasks = [
|
24 |
+
"MemoTrap/Acc",
|
25 |
+
"IFEval/Acc",
|
26 |
+
"NQ-Swap/EM",
|
27 |
+
"RACE/Acc",
|
28 |
+
"SQuADv2/EM",
|
29 |
+
"CNN-DM/ROUGE",
|
30 |
+
"XSum/ROUGE",
|
31 |
+
"HaluQA/Acc",
|
32 |
+
"FaithDial/Acc",
|
33 |
+
]
|
34 |
|
35 |
def get_leaderboard_df(results_path: str,
|
36 |
requests_path: str,
|