Spaces:
Build error
Build error
import copy | |
import json | |
import numpy as np | |
import fire | |
def evaluate_annotation(key2refs, scorer): | |
if scorer.method() == "Bleu": | |
scores = np.array([ 0.0 for n in range(4) ]) | |
else: | |
scores = 0 | |
num_cap_per_audio = len(next(iter(key2refs.values()))) | |
for i in range(num_cap_per_audio): | |
if i > 0: | |
for key in key2refs: | |
key2refs[key].insert(0, res[key][0]) | |
res = { key: [refs.pop(),] for key, refs in key2refs.items() } | |
score, _ = scorer.compute_score(key2refs, res) | |
if scorer.method() == "Bleu": | |
scores += np.array(score) | |
else: | |
scores += score | |
score = scores / num_cap_per_audio | |
return score | |
def evaluate_prediction(key2pred, key2refs, scorer): | |
if scorer.method() == "Bleu": | |
scores = np.array([ 0.0 for n in range(4) ]) | |
else: | |
scores = 0 | |
num_cap_per_audio = len(next(iter(key2refs.values()))) | |
for i in range(num_cap_per_audio): | |
key2refs_i = {} | |
for key, refs in key2refs.items(): | |
key2refs_i[key] = refs[:i] + refs[i+1:] | |
score, _ = scorer.compute_score(key2refs_i, key2pred) | |
if scorer.method() == "Bleu": | |
scores += np.array(score) | |
else: | |
scores += score | |
score = scores / num_cap_per_audio | |
return score | |
class Evaluator(object): | |
def eval_annotation(self, annotation, output): | |
captions = json.load(open(annotation, "r"))["audios"] | |
key2refs = {} | |
for audio_idx in range(len(captions)): | |
audio_id = captions[audio_idx]["audio_id"] | |
key2refs[audio_id] = [] | |
for caption in captions[audio_idx]["captions"]: | |
key2refs[audio_id].append(caption["caption"]) | |
from fense.fense import Fense | |
scores = {} | |
scorer = Fense() | |
scores[scorer.method()] = evaluate_annotation(copy.deepcopy(key2refs), scorer) | |
refs4eval = {} | |
for key, refs in key2refs.items(): | |
refs4eval[key] = [] | |
for idx, ref in enumerate(refs): | |
refs4eval[key].append({ | |
"audio_id": key, | |
"id": idx, | |
"caption": ref | |
}) | |
from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer | |
tokenizer = PTBTokenizer() | |
key2refs = tokenizer.tokenize(refs4eval) | |
from pycocoevalcap.bleu.bleu import Bleu | |
from pycocoevalcap.cider.cider import Cider | |
from pycocoevalcap.rouge.rouge import Rouge | |
from pycocoevalcap.meteor.meteor import Meteor | |
from pycocoevalcap.spice.spice import Spice | |
scorers = [Bleu(), Rouge(), Cider(), Meteor(), Spice()] | |
for scorer in scorers: | |
scores[scorer.method()] = evaluate_annotation(copy.deepcopy(key2refs), scorer) | |
spider = 0 | |
with open(output, "w") as f: | |
for name, score in scores.items(): | |
if name == "Bleu": | |
for n in range(4): | |
f.write("Bleu-{}: {:6.3f}\n".format(n + 1, score[n])) | |
else: | |
f.write("{}: {:6.3f}\n".format(name, score)) | |
if name in ["CIDEr", "SPICE"]: | |
spider += score | |
f.write("SPIDEr: {:6.3f}\n".format(spider / 2)) | |
def eval_prediction(self, prediction, annotation, output): | |
ref_captions = json.load(open(annotation, "r"))["audios"] | |
key2refs = {} | |
for audio_idx in range(len(ref_captions)): | |
audio_id = ref_captions[audio_idx]["audio_id"] | |
key2refs[audio_id] = [] | |
for caption in ref_captions[audio_idx]["captions"]: | |
key2refs[audio_id].append(caption["caption"]) | |
pred_captions = json.load(open(prediction, "r"))["predictions"] | |
key2pred = {} | |
for audio_idx in range(len(pred_captions)): | |
item = pred_captions[audio_idx] | |
audio_id = item["filename"] | |
key2pred[audio_id] = [item["tokens"]] | |
from fense.fense import Fense | |
scores = {} | |
scorer = Fense() | |
scores[scorer.method()] = evaluate_prediction(key2pred, key2refs, scorer) | |
refs4eval = {} | |
for key, refs in key2refs.items(): | |
refs4eval[key] = [] | |
for idx, ref in enumerate(refs): | |
refs4eval[key].append({ | |
"audio_id": key, | |
"id": idx, | |
"caption": ref | |
}) | |
preds4eval = {} | |
for key, preds in key2pred.items(): | |
preds4eval[key] = [] | |
for idx, pred in enumerate(preds): | |
preds4eval[key].append({ | |
"audio_id": key, | |
"id": idx, | |
"caption": pred | |
}) | |
from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer | |
tokenizer = PTBTokenizer() | |
key2refs = tokenizer.tokenize(refs4eval) | |
key2pred = tokenizer.tokenize(preds4eval) | |
from pycocoevalcap.bleu.bleu import Bleu | |
from pycocoevalcap.cider.cider import Cider | |
from pycocoevalcap.rouge.rouge import Rouge | |
from pycocoevalcap.meteor.meteor import Meteor | |
from pycocoevalcap.spice.spice import Spice | |
scorers = [Bleu(), Rouge(), Cider(), Meteor(), Spice()] | |
for scorer in scorers: | |
scores[scorer.method()] = evaluate_prediction(key2pred, key2refs, scorer) | |
spider = 0 | |
with open(output, "w") as f: | |
for name, score in scores.items(): | |
if name == "Bleu": | |
for n in range(4): | |
f.write("Bleu-{}: {:6.3f}\n".format(n + 1, score[n])) | |
else: | |
f.write("{}: {:6.3f}\n".format(name, score)) | |
if name in ["CIDEr", "SPICE"]: | |
spider += score | |
f.write("SPIDEr: {:6.3f}\n".format(spider / 2)) | |
if __name__ == "__main__": | |
fire.Fire(Evaluator) | |