AudioGPT / audio_to_text /captioning /utils /eval_round_robin.py
lmzjms's picture
Upload 46 files
8121fee
raw
history blame
6.07 kB
import copy
import json
import numpy as np
import fire
def evaluate_annotation(key2refs, scorer):
if scorer.method() == "Bleu":
scores = np.array([ 0.0 for n in range(4) ])
else:
scores = 0
num_cap_per_audio = len(next(iter(key2refs.values())))
for i in range(num_cap_per_audio):
if i > 0:
for key in key2refs:
key2refs[key].insert(0, res[key][0])
res = { key: [refs.pop(),] for key, refs in key2refs.items() }
score, _ = scorer.compute_score(key2refs, res)
if scorer.method() == "Bleu":
scores += np.array(score)
else:
scores += score
score = scores / num_cap_per_audio
return score
def evaluate_prediction(key2pred, key2refs, scorer):
if scorer.method() == "Bleu":
scores = np.array([ 0.0 for n in range(4) ])
else:
scores = 0
num_cap_per_audio = len(next(iter(key2refs.values())))
for i in range(num_cap_per_audio):
key2refs_i = {}
for key, refs in key2refs.items():
key2refs_i[key] = refs[:i] + refs[i+1:]
score, _ = scorer.compute_score(key2refs_i, key2pred)
if scorer.method() == "Bleu":
scores += np.array(score)
else:
scores += score
score = scores / num_cap_per_audio
return score
class Evaluator(object):
def eval_annotation(self, annotation, output):
captions = json.load(open(annotation, "r"))["audios"]
key2refs = {}
for audio_idx in range(len(captions)):
audio_id = captions[audio_idx]["audio_id"]
key2refs[audio_id] = []
for caption in captions[audio_idx]["captions"]:
key2refs[audio_id].append(caption["caption"])
from fense.fense import Fense
scores = {}
scorer = Fense()
scores[scorer.method()] = evaluate_annotation(copy.deepcopy(key2refs), scorer)
refs4eval = {}
for key, refs in key2refs.items():
refs4eval[key] = []
for idx, ref in enumerate(refs):
refs4eval[key].append({
"audio_id": key,
"id": idx,
"caption": ref
})
from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
tokenizer = PTBTokenizer()
key2refs = tokenizer.tokenize(refs4eval)
from pycocoevalcap.bleu.bleu import Bleu
from pycocoevalcap.cider.cider import Cider
from pycocoevalcap.rouge.rouge import Rouge
from pycocoevalcap.meteor.meteor import Meteor
from pycocoevalcap.spice.spice import Spice
scorers = [Bleu(), Rouge(), Cider(), Meteor(), Spice()]
for scorer in scorers:
scores[scorer.method()] = evaluate_annotation(copy.deepcopy(key2refs), scorer)
spider = 0
with open(output, "w") as f:
for name, score in scores.items():
if name == "Bleu":
for n in range(4):
f.write("Bleu-{}: {:6.3f}\n".format(n + 1, score[n]))
else:
f.write("{}: {:6.3f}\n".format(name, score))
if name in ["CIDEr", "SPICE"]:
spider += score
f.write("SPIDEr: {:6.3f}\n".format(spider / 2))
def eval_prediction(self, prediction, annotation, output):
ref_captions = json.load(open(annotation, "r"))["audios"]
key2refs = {}
for audio_idx in range(len(ref_captions)):
audio_id = ref_captions[audio_idx]["audio_id"]
key2refs[audio_id] = []
for caption in ref_captions[audio_idx]["captions"]:
key2refs[audio_id].append(caption["caption"])
pred_captions = json.load(open(prediction, "r"))["predictions"]
key2pred = {}
for audio_idx in range(len(pred_captions)):
item = pred_captions[audio_idx]
audio_id = item["filename"]
key2pred[audio_id] = [item["tokens"]]
from fense.fense import Fense
scores = {}
scorer = Fense()
scores[scorer.method()] = evaluate_prediction(key2pred, key2refs, scorer)
refs4eval = {}
for key, refs in key2refs.items():
refs4eval[key] = []
for idx, ref in enumerate(refs):
refs4eval[key].append({
"audio_id": key,
"id": idx,
"caption": ref
})
preds4eval = {}
for key, preds in key2pred.items():
preds4eval[key] = []
for idx, pred in enumerate(preds):
preds4eval[key].append({
"audio_id": key,
"id": idx,
"caption": pred
})
from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
tokenizer = PTBTokenizer()
key2refs = tokenizer.tokenize(refs4eval)
key2pred = tokenizer.tokenize(preds4eval)
from pycocoevalcap.bleu.bleu import Bleu
from pycocoevalcap.cider.cider import Cider
from pycocoevalcap.rouge.rouge import Rouge
from pycocoevalcap.meteor.meteor import Meteor
from pycocoevalcap.spice.spice import Spice
scorers = [Bleu(), Rouge(), Cider(), Meteor(), Spice()]
for scorer in scorers:
scores[scorer.method()] = evaluate_prediction(key2pred, key2refs, scorer)
spider = 0
with open(output, "w") as f:
for name, score in scores.items():
if name == "Bleu":
for n in range(4):
f.write("Bleu-{}: {:6.3f}\n".format(n + 1, score[n]))
else:
f.write("{}: {:6.3f}\n".format(name, score))
if name in ["CIDEr", "SPICE"]:
spider += score
f.write("SPIDEr: {:6.3f}\n".format(spider / 2))
if __name__ == "__main__":
fire.Fire(Evaluator)