Spaces:
Running
Running
Zhuoyang Song
commited on
Commit
•
e637e0c
1
Parent(s):
868c1b2
FIX: extraction func of C-Eval; logging metrics
Browse files
tasks.py
CHANGED
@@ -149,14 +149,15 @@ class Task:
|
|
149 |
return
|
150 |
self.outputs = outputs
|
151 |
try:
|
152 |
-
|
153 |
-
|
154 |
-
|
|
|
|
|
|
|
|
|
|
|
155 |
except Exception as e:
|
156 |
-
result = self.metric.compute(
|
157 |
-
responses=outputs, references=self.dataset[self.label_column]
|
158 |
-
)
|
159 |
-
finally:
|
160 |
result = outputs
|
161 |
# if log:
|
162 |
# name = name or pipeline.__name__
|
@@ -188,7 +189,7 @@ class Metrics:
|
|
188 |
mmlu = multichoice
|
189 |
|
190 |
def ceval(responses: list[str], answers: list[str | int]):
|
191 |
-
responses = [
|
192 |
return responses, answers
|
193 |
|
194 |
def winogrande(responses: list[str], answers: list[str | int]):
|
@@ -892,7 +893,7 @@ class CEVAL:
|
|
892 |
prefix = (
|
893 |
f"以下是中国关于{_ch_name}考试的单项选择题,请选出其中的正确答案。\n"
|
894 |
if chat
|
895 |
-
else "
|
896 |
)
|
897 |
|
898 |
prompt = prefix + f'{example["question"]}'
|
@@ -1043,6 +1044,7 @@ class CEVAL:
|
|
1043 |
suite = defaultdict(list)
|
1044 |
cls.categories = defaultdict(list)
|
1045 |
for task, info in cls.ceval_subject_mapping.items():
|
|
|
1046 |
cls.categories[info[2]].append(task)
|
1047 |
cls.categories["all"] = list(cls.ceval_subject_mapping.keys())
|
1048 |
for k, v in cls.categories.items():
|
|
|
149 |
return
|
150 |
self.outputs = outputs
|
151 |
try:
|
152 |
+
try:
|
153 |
+
result = self.metric._compute(
|
154 |
+
responses=outputs, references=self.dataset[self.label_column]
|
155 |
+
)
|
156 |
+
except Exception as e:
|
157 |
+
result = self.metric.compute(
|
158 |
+
responses=outputs, references=self.dataset[self.label_column]
|
159 |
+
)
|
160 |
except Exception as e:
|
|
|
|
|
|
|
|
|
161 |
result = outputs
|
162 |
# if log:
|
163 |
# name = name or pipeline.__name__
|
|
|
189 |
mmlu = multichoice
|
190 |
|
191 |
def ceval(responses: list[str], answers: list[str | int]):
|
192 |
+
responses = [extract_choice_zh(pred) for pred in responses]
|
193 |
return responses, answers
|
194 |
|
195 |
def winogrande(responses: list[str], answers: list[str | int]):
|
|
|
893 |
prefix = (
|
894 |
f"以下是中国关于{_ch_name}考试的单项选择题,请选出其中的正确答案。\n"
|
895 |
if chat
|
896 |
+
else "问题:"
|
897 |
)
|
898 |
|
899 |
prompt = prefix + f'{example["question"]}'
|
|
|
1044 |
suite = defaultdict(list)
|
1045 |
cls.categories = defaultdict(list)
|
1046 |
for task, info in cls.ceval_subject_mapping.items():
|
1047 |
+
cls.categories[info[0]].append(task)
|
1048 |
cls.categories[info[2]].append(task)
|
1049 |
cls.categories["all"] = list(cls.ceval_subject_mapping.keys())
|
1050 |
for k, v in cls.categories.items():
|