Spaces:
Runtime error
Runtime error
# Copyright (c) Facebook, Inc. and its affiliates. | |
import re | |
from tqdm import tqdm | |
class EvalAIAnswerProcessor: | |
""" | |
Processes an answer similar to Eval AI | |
copied from | |
https://github.com/facebookresearch/mmf/blob/c46b3b3391275b4181567db80943473a89ab98ab/pythia/tasks/processors.py#L897 | |
""" | |
CONTRACTIONS = { | |
"aint": "ain't", | |
"arent": "aren't", | |
"cant": "can't", | |
"couldve": "could've", | |
"couldnt": "couldn't", | |
"couldn'tve": "couldn't've", | |
"couldnt've": "couldn't've", | |
"didnt": "didn't", | |
"doesnt": "doesn't", | |
"dont": "don't", | |
"hadnt": "hadn't", | |
"hadnt've": "hadn't've", | |
"hadn'tve": "hadn't've", | |
"hasnt": "hasn't", | |
"havent": "haven't", | |
"hed": "he'd", | |
"hed've": "he'd've", | |
"he'dve": "he'd've", | |
"hes": "he's", | |
"howd": "how'd", | |
"howll": "how'll", | |
"hows": "how's", | |
"Id've": "I'd've", | |
"I'dve": "I'd've", | |
"Im": "I'm", | |
"Ive": "I've", | |
"isnt": "isn't", | |
"itd": "it'd", | |
"itd've": "it'd've", | |
"it'dve": "it'd've", | |
"itll": "it'll", | |
"let's": "let's", | |
"maam": "ma'am", | |
"mightnt": "mightn't", | |
"mightnt've": "mightn't've", | |
"mightn'tve": "mightn't've", | |
"mightve": "might've", | |
"mustnt": "mustn't", | |
"mustve": "must've", | |
"neednt": "needn't", | |
"notve": "not've", | |
"oclock": "o'clock", | |
"oughtnt": "oughtn't", | |
"ow's'at": "'ow's'at", | |
"'ows'at": "'ow's'at", | |
"'ow'sat": "'ow's'at", | |
"shant": "shan't", | |
"shed've": "she'd've", | |
"she'dve": "she'd've", | |
"she's": "she's", | |
"shouldve": "should've", | |
"shouldnt": "shouldn't", | |
"shouldnt've": "shouldn't've", | |
"shouldn'tve": "shouldn't've", | |
"somebody'd": "somebodyd", | |
"somebodyd've": "somebody'd've", | |
"somebody'dve": "somebody'd've", | |
"somebodyll": "somebody'll", | |
"somebodys": "somebody's", | |
"someoned": "someone'd", | |
"someoned've": "someone'd've", | |
"someone'dve": "someone'd've", | |
"someonell": "someone'll", | |
"someones": "someone's", | |
"somethingd": "something'd", | |
"somethingd've": "something'd've", | |
"something'dve": "something'd've", | |
"somethingll": "something'll", | |
"thats": "that's", | |
"thered": "there'd", | |
"thered've": "there'd've", | |
"there'dve": "there'd've", | |
"therere": "there're", | |
"theres": "there's", | |
"theyd": "they'd", | |
"theyd've": "they'd've", | |
"they'dve": "they'd've", | |
"theyll": "they'll", | |
"theyre": "they're", | |
"theyve": "they've", | |
"twas": "'twas", | |
"wasnt": "wasn't", | |
"wed've": "we'd've", | |
"we'dve": "we'd've", | |
"weve": "we've", | |
"werent": "weren't", | |
"whatll": "what'll", | |
"whatre": "what're", | |
"whats": "what's", | |
"whatve": "what've", | |
"whens": "when's", | |
"whered": "where'd", | |
"wheres": "where's", | |
"whereve": "where've", | |
"whod": "who'd", | |
"whod've": "who'd've", | |
"who'dve": "who'd've", | |
"wholl": "who'll", | |
"whos": "who's", | |
"whove": "who've", | |
"whyll": "why'll", | |
"whyre": "why're", | |
"whys": "why's", | |
"wont": "won't", | |
"wouldve": "would've", | |
"wouldnt": "wouldn't", | |
"wouldnt've": "wouldn't've", | |
"wouldn'tve": "wouldn't've", | |
"yall": "y'all", | |
"yall'll": "y'all'll", | |
"y'allll": "y'all'll", | |
"yall'd've": "y'all'd've", | |
"y'alld've": "y'all'd've", | |
"y'all'dve": "y'all'd've", | |
"youd": "you'd", | |
"youd've": "you'd've", | |
"you'dve": "you'd've", | |
"youll": "you'll", | |
"youre": "you're", | |
"youve": "you've", | |
} | |
NUMBER_MAP = { | |
"none": "0", | |
"zero": "0", | |
"one": "1", | |
"two": "2", | |
"three": "3", | |
"four": "4", | |
"five": "5", | |
"six": "6", | |
"seven": "7", | |
"eight": "8", | |
"nine": "9", | |
"ten": "10", | |
} | |
ARTICLES = ["a", "an", "the"] | |
PERIOD_STRIP = re.compile(r"(?!<=\d)(\.)(?!\d)") | |
COMMA_STRIP = re.compile(r"(?<=\d)(\,)+(?=\d)") | |
PUNCTUATIONS = [ | |
";", | |
r"/", | |
"[", | |
"]", | |
'"', | |
"{", | |
"}", | |
"(", | |
")", | |
"=", | |
"+", | |
"\\", | |
"_", | |
"-", | |
">", | |
"<", | |
"@", | |
"`", | |
",", | |
"?", | |
"!", | |
] | |
def __init__(self, *args, **kwargs): | |
pass | |
def word_tokenize(self, word): | |
word = word.lower() | |
word = word.replace(",", "").replace("?", "").replace("'s", " 's") | |
return word.strip() | |
def process_punctuation(self, in_text): | |
out_text = in_text | |
for p in self.PUNCTUATIONS: | |
if (p + " " in in_text or " " + p in in_text) or ( | |
re.search(self.COMMA_STRIP, in_text) is not None | |
): | |
out_text = out_text.replace(p, "") | |
else: | |
out_text = out_text.replace(p, " ") | |
out_text = self.PERIOD_STRIP.sub("", out_text, re.UNICODE) | |
return out_text | |
def process_digit_article(self, in_text): | |
out_text = [] | |
temp_text = in_text.lower().split() | |
for word in temp_text: | |
word = self.NUMBER_MAP.setdefault(word, word) | |
if word not in self.ARTICLES: | |
out_text.append(word) | |
else: | |
pass | |
for word_id, word in enumerate(out_text): | |
if word in self.CONTRACTIONS: | |
out_text[word_id] = self.CONTRACTIONS[word] | |
out_text = " ".join(out_text) | |
return out_text | |
def __call__(self, item): | |
item = self.word_tokenize(item) | |
item = item.replace("\n", " ").replace("\t", " ").strip() | |
item = self.process_punctuation(item) | |
item = self.process_digit_article(item) | |
return item | |
class TextVQAAccuracyEvaluator: | |
def __init__(self): | |
self.answer_processor = EvalAIAnswerProcessor() | |
def _compute_answer_scores(self, raw_answers): | |
""" | |
compute the accuracy (soft score) of human answers | |
""" | |
answers = [self.answer_processor(a) for a in raw_answers] | |
assert len(answers) == 10 | |
gt_answers = list(enumerate(answers)) | |
unique_answers = set(answers) | |
unique_answer_scores = {} | |
for unique_answer in unique_answers: | |
accs = [] | |
for gt_answer in gt_answers: | |
other_answers = [item for item in gt_answers if item != gt_answer] | |
matching_answers = [ | |
item for item in other_answers if item[1] == unique_answer | |
] | |
acc = min(1, float(len(matching_answers)) / 3) | |
accs.append(acc) | |
unique_answer_scores[unique_answer] = sum(accs) / len(accs) | |
return unique_answer_scores | |
def eval_pred_list(self, pred_list): | |
pred_scores = [] | |
for entry in tqdm(pred_list): | |
pred_answer = self.answer_processor(entry["pred_answer"]) | |
unique_answer_scores = self._compute_answer_scores(entry["gt_answers"]) | |
score = unique_answer_scores.get(pred_answer, 0.0) | |
pred_scores.append(score) | |
accuracy = sum(pred_scores) / len(pred_scores) | |
return accuracy | |
class STVQAAccuracyEvaluator: | |
def __init__(self): | |
self.answer_processor = EvalAIAnswerProcessor() | |
def eval_pred_list(self, pred_list): | |
pred_scores = [] | |
for entry in pred_list: | |
pred_answer = self.answer_processor(entry["pred_answer"]) | |
gts = [self.answer_processor(a) for a in entry["gt_answers"]] | |
score = 1.0 if pred_answer in gts else 0.0 | |
pred_scores.append(score) | |
accuracy = sum(pred_scores) / len(pred_scores) | |
return accuracy | |
class STVQAANLSEvaluator: | |
def __init__(self): | |
import editdistance # install with `pip install editdistance` | |
self.get_edit_distance = editdistance.eval | |
def get_anls(self, s1, s2): | |
s1 = s1.lower().strip() | |
s2 = s2.lower().strip() | |
iou = 1 - self.get_edit_distance(s1, s2) / max(len(s1), len(s2)) | |
anls = iou if iou >= 0.5 else 0.0 | |
return anls | |
def eval_pred_list(self, pred_list): | |
pred_scores = [] | |
for entry in pred_list: | |
anls = max( | |
self.get_anls(entry["pred_answer"], gt) for gt in entry["gt_answers"] | |
) | |
pred_scores.append(anls) | |
accuracy = sum(pred_scores) / len(pred_scores) | |
return accuracy | |
class TextCapsBleu4Evaluator: | |
def __init__(self): | |
# The following script requires Java 1.8.0 and pycocotools installed. | |
# The pycocoevalcap can be installed with pip as | |
# pip install git+https://github.com/ronghanghu/coco-caption.git@python23 | |
# Original pycocoevalcap code is at https://github.com/tylin/coco-caption | |
# but has no python3 support yet. | |
try: | |
from pycocoevalcap.bleu.bleu import Bleu | |
from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer | |
except ModuleNotFoundError: | |
print( | |
"Please install pycocoevalcap module using " | |
"pip install git+https://github.com/ronghanghu/coco-caption.git@python23" # noqa | |
) | |
raise | |
self.tokenizer = PTBTokenizer() | |
self.scorer = Bleu(4) | |
def eval_pred_list(self, pred_list): | |
# Create reference and hypotheses captions. | |
gts = {} | |
res = {} | |
for idx, entry in enumerate(pred_list): | |
gts[idx] = [{"caption": a} for a in entry["gt_answers"]] | |
res[idx] = [{"caption": entry["pred_answer"]}] | |
gts = self.tokenizer.tokenize(gts) | |
res = self.tokenizer.tokenize(res) | |
score, _ = self.scorer.compute_score(gts, res) | |
bleu4 = score[3] # score is (Bleu-1, Bleu-2, Bleu-3, Bleu-4) | |
return bleu4 | |