|
import re |
|
from typing import Callable, Optional, Union |
|
|
|
from opencompass.registry import TEXT_POSTPROCESSORS |
|
|
|
|
|
@TEXT_POSTPROCESSORS.register_module('general') |
|
def general_postprocess(text: str) -> str: |
|
|
|
truncated_text = re.split(r'[\n.,]', text, 1)[0] |
|
|
|
|
|
no_punctuation = re.sub(r'[^\w\s]', '', truncated_text) |
|
|
|
|
|
no_articles = re.sub(r'\b(a|an|the)\b', |
|
'', |
|
no_punctuation, |
|
flags=re.IGNORECASE) |
|
|
|
|
|
cleaned_text = re.sub(r'\s+', ' ', no_articles).strip() |
|
|
|
return cleaned_text |
|
|
|
|
|
@TEXT_POSTPROCESSORS.register_module('general_cn') |
|
def general_cn_postprocess(text: str) -> str: |
|
truncated_text = re.split(r'[\n.,]', text, 1)[0] |
|
|
|
no_punctuation = re.sub(r'[^\w\s]', '', truncated_text) |
|
|
|
no_articles = re.sub(r'\b(a|an|the)\b', |
|
'', |
|
no_punctuation, |
|
flags=re.IGNORECASE) |
|
|
|
cleaned_text = re.sub(r'\s+', ' ', no_articles).strip() |
|
import jieba |
|
cleaned_text = ' '.join(jieba.cut(text)) |
|
return cleaned_text |
|
|
|
|
|
@TEXT_POSTPROCESSORS.register_module('first-capital') |
|
def first_capital_postprocess(text: str) -> str: |
|
for t in text: |
|
if t.isupper(): |
|
return t |
|
return '' |
|
|
|
|
|
@TEXT_POSTPROCESSORS.register_module('last-capital') |
|
def last_capital_postprocess(text: str) -> str: |
|
for t in text[::-1]: |
|
if t.isupper(): |
|
return t |
|
return '' |
|
|
|
|
|
def first_option_postprocess(text: str, options: str, cushion=True) -> str: |
|
"""Find first valid option for text.""" |
|
|
|
|
|
|
|
patterns = [ |
|
f'答案是?\s?([{options}])', |
|
f'答案是?\s?:([{options}])', |
|
f'答案是?\s?:([{options}])', |
|
f'答案应该?是\s?([{options}])', |
|
f'答案应该?选\s?([{options}])', |
|
f'答案为\s?([{options}])', |
|
f'答案选\s?([{options}])', |
|
f'选择?\s?([{options}])', |
|
f'故选?\s?([{options}])' |
|
f'只有选?项?\s?([{options}])\s?是?对', |
|
f'只有选?项?\s?([{options}])\s?是?错', |
|
f'只有选?项?\s?([{options}])\s?不?正确', |
|
f'只有选?项?\s?([{options}])\s?错误', |
|
f'说法不?对选?项?的?是\s?([{options}])', |
|
f'说法不?正确选?项?的?是\s?([{options}])', |
|
f'说法错误选?项?的?是\s?([{options}])', |
|
f'([{options}])\s?是正确的', |
|
f'([{options}])\s?是正确答案', |
|
f'选项\s?([{options}])\s?正确', |
|
f'所以答\s?([{options}])', |
|
f'所以\s?([{options}][.。$]?$)', |
|
f'所有\s?([{options}][.。$]?$)', |
|
f'[\s,::,]([{options}])[。,,\.]?$', |
|
f'[\s,,::][故即]([{options}])[。\.]?$', |
|
f'[\s,,::]因此([{options}])[。\.]?$', |
|
f'[是为。]\s?([{options}])[。\.]?$', |
|
f'因此\s?([{options}])[。\.]?$', |
|
f'显然\s?([{options}])[。\.]?$', |
|
f'答案是\s?(\S+)(?:。|$)', |
|
f'答案应该是\s?(\S+)(?:。|$)', |
|
f'答案为\s?(\S+)(?:。|$)', |
|
f'[Tt]he answer is ([{options}])', |
|
f'[Tt]he answer is option ([{options}])', |
|
f'[Tt]he correct answer is ([{options}])', |
|
f'[Tt]he correct answer is option ([{options}])', |
|
f'[Tt]he answer to the question is ([{options}])', |
|
f'^选项\s?([{options}])', |
|
f'^([{options}])\s?选?项', |
|
f'(\s|^)[{options}][\s。,,::\.$]', |
|
f'(\s|^)[{options}](\s|$)', |
|
f'1.\s?(.*?)$', |
|
f'1.\s?([{options}])[.。$]?$', |
|
] |
|
cushion_patterns = [ |
|
f'([{options}]):', |
|
f'[{options}]', |
|
] |
|
|
|
|
|
|
|
if cushion: |
|
patterns.extend(cushion_patterns) |
|
for pattern in patterns: |
|
match = re.search(pattern, text) |
|
if match: |
|
outputs = match.group(0) |
|
for i in options: |
|
if i in outputs: |
|
return i |
|
return '' |
|
|
|
|
|
@TEXT_POSTPROCESSORS.register_module('first-capital-multi') |
|
def first_capital_postprocess_multi(text: str) -> str: |
|
match = re.search(r'([A-D]+)', text) |
|
if match: |
|
return match.group(1) |
|
return '' |
|
|
|
|
|
def last_option_postprocess(text: str, options: str) -> str: |
|
match = re.findall(rf'([{options}])', text) |
|
if match: |
|
return match[-1] |
|
return '' |
|
|
|
|
|
def first_number_postprocess(text: str) -> float: |
|
"""Return the first number in a string.""" |
|
|
|
pattern = r'(-?\d*\.?\d+)' |
|
|
|
|
|
match = re.search(pattern, text) |
|
|
|
|
|
return float(match.group(1)) if match else None |
|
|
|
|
|
@TEXT_POSTPROCESSORS.register_module('multiple-select') |
|
def multiple_select_postprocess(text: str) -> str: |
|
ret = set([t for t in text if t.isupper()]) |
|
return ''.join(sorted(ret)) |
|
|
|
|
|
def general_eval_wrapper_postprocess(text: str, |
|
postprocess: Optional[Union[ |
|
str, Callable]] = None, |
|
**kwargs) -> str: |
|
"""Wrapper for eval text repr. Especially for chatglmpro. |
|
|
|
Args: |
|
text(str): Text to be postprocessed. |
|
postprocess(Callable, optional): Original post processing function. |
|
Defaults to None. |
|
**kwargs: Other necessary kwargs for post processing function. |
|
""" |
|
try: |
|
text = eval(text) |
|
except Exception: |
|
|
|
pass |
|
|
|
if postprocess: |
|
if isinstance(postprocess, str): |
|
postprocess = TEXT_POSTPROCESSORS.get(postprocess) |
|
return postprocess(text, **kwargs) |
|
else: |
|
return text |
|
|