Zhuoyang Song commited on
Commit
e637e0c
1 Parent(s): 868c1b2

FIX: extraction func of C-Eval; logging metrics

Browse files
Files changed (1) hide show
  1. tasks.py +11 -9
tasks.py CHANGED
@@ -149,14 +149,15 @@ class Task:
149
  return
150
  self.outputs = outputs
151
  try:
152
- result = self.metric._compute(
153
- responses=outputs, references=self.dataset[self.label_column]
154
- )
 
 
 
 
 
155
  except Exception as e:
156
- result = self.metric.compute(
157
- responses=outputs, references=self.dataset[self.label_column]
158
- )
159
- finally:
160
  result = outputs
161
  # if log:
162
  # name = name or pipeline.__name__
@@ -188,7 +189,7 @@ class Metrics:
188
  mmlu = multichoice
189
 
190
  def ceval(responses: list[str], answers: list[str | int]):
191
- responses = [first_capital_postprocess(pred) for pred in responses]
192
  return responses, answers
193
 
194
  def winogrande(responses: list[str], answers: list[str | int]):
@@ -892,7 +893,7 @@ class CEVAL:
892
  prefix = (
893
  f"以下是中国关于{_ch_name}考试的单项选择题,请选出其中的正确答案。\n"
894
  if chat
895
- else "问题"
896
  )
897
 
898
  prompt = prefix + f'{example["question"]}'
@@ -1043,6 +1044,7 @@ class CEVAL:
1043
  suite = defaultdict(list)
1044
  cls.categories = defaultdict(list)
1045
  for task, info in cls.ceval_subject_mapping.items():
 
1046
  cls.categories[info[2]].append(task)
1047
  cls.categories["all"] = list(cls.ceval_subject_mapping.keys())
1048
  for k, v in cls.categories.items():
 
149
  return
150
  self.outputs = outputs
151
  try:
152
+ try:
153
+ result = self.metric._compute(
154
+ responses=outputs, references=self.dataset[self.label_column]
155
+ )
156
+ except Exception as e:
157
+ result = self.metric.compute(
158
+ responses=outputs, references=self.dataset[self.label_column]
159
+ )
160
  except Exception as e:
 
 
 
 
161
  result = outputs
162
  # if log:
163
  # name = name or pipeline.__name__
 
189
  mmlu = multichoice
190
 
191
  def ceval(responses: list[str], answers: list[str | int]):
192
+ responses = [extract_choice_zh(pred) for pred in responses]
193
  return responses, answers
194
 
195
  def winogrande(responses: list[str], answers: list[str | int]):
 
893
  prefix = (
894
  f"以下是中国关于{_ch_name}考试的单项选择题,请选出其中的正确答案。\n"
895
  if chat
896
+ else "问题:"
897
  )
898
 
899
  prompt = prefix + f'{example["question"]}'
 
1044
  suite = defaultdict(list)
1045
  cls.categories = defaultdict(list)
1046
  for task, info in cls.ceval_subject_mapping.items():
1047
+ cls.categories[info[0]].append(task)
1048
  cls.categories[info[2]].append(task)
1049
  cls.categories["all"] = list(cls.ceval_subject_mapping.keys())
1050
  for k, v in cls.categories.items():