lmzjms's picture
Upload 46 files
8121fee
raw
history blame
3.58 kB
import os
import sys
import copy
import pickle
import numpy as np
import pandas as pd
import fire
sys.path.append(os.getcwd())
def coco_score(refs, pred, scorer):
if scorer.method() == "Bleu":
scores = np.array([ 0.0 for n in range(4) ])
else:
scores = 0
num_cap_per_audio = len(refs[list(refs.keys())[0]])
for i in range(num_cap_per_audio):
if i > 0:
for key in refs:
refs[key].insert(0, res[key][0])
res = {key: [refs[key].pop(),] for key in refs}
score, _ = scorer.compute_score(refs, pred)
if scorer.method() == "Bleu":
scores += np.array(score)
else:
scores += score
score = scores / num_cap_per_audio
for key in refs:
refs[key].insert(0, res[key][0])
score_allref, _ = scorer.compute_score(refs, pred)
diff = score_allref - score
return diff
def embedding_score(refs, pred, scorer):
num_cap_per_audio = len(refs[list(refs.keys())[0]])
scores = 0
for i in range(num_cap_per_audio):
res = {key: [refs[key][i],] for key in refs.keys() if len(refs[key]) == num_cap_per_audio}
refs_i = {key: np.concatenate([refs[key][:i], refs[key][i+1:]]) for key in refs.keys() if len(refs[key]) == num_cap_per_audio}
score, _ = scorer.compute_score(refs_i, pred)
scores += score
score = scores / num_cap_per_audio
score_allref, _ = scorer.compute_score(refs, pred)
diff = score_allref - score
return diff
def main(output_file, eval_caption_file, eval_embedding_file, output, zh=False):
output_df = pd.read_json(output_file)
output_df["key"] = output_df["filename"].apply(lambda x: os.path.splitext(os.path.basename(x))[0])
pred = output_df.groupby("key")["tokens"].apply(list).to_dict()
label_df = pd.read_json(eval_caption_file)
if zh:
refs = label_df.groupby("key")["tokens"].apply(list).to_dict()
else:
refs = label_df.groupby("key")["caption"].apply(list).to_dict()
from pycocoevalcap.bleu.bleu import Bleu
from pycocoevalcap.cider.cider import Cider
from pycocoevalcap.rouge.rouge import Rouge
scorer = Bleu(zh=zh)
bleu_scores = coco_score(copy.deepcopy(refs), pred, scorer)
scorer = Cider(zh=zh)
cider_score = coco_score(copy.deepcopy(refs), pred, scorer)
scorer = Rouge(zh=zh)
rouge_score = coco_score(copy.deepcopy(refs), pred, scorer)
if not zh:
from pycocoevalcap.meteor.meteor import Meteor
scorer = Meteor()
meteor_score = coco_score(copy.deepcopy(refs), pred, scorer)
from pycocoevalcap.spice.spice import Spice
scorer = Spice()
spice_score = coco_score(copy.deepcopy(refs), pred, scorer)
# from audiocaptioneval.sentbert.sentencebert import SentenceBert
# scorer = SentenceBert(zh=zh)
# with open(eval_embedding_file, "rb") as f:
# ref_embeddings = pickle.load(f)
# sent_bert = embedding_score(ref_embeddings, pred, scorer)
with open(output, "w") as f:
f.write("Diff:\n")
for n in range(4):
f.write("BLEU-{}: {:6.3f}\n".format(n+1, bleu_scores[n]))
f.write("CIDEr: {:6.3f}\n".format(cider_score))
f.write("ROUGE: {:6.3f}\n".format(rouge_score))
if not zh:
f.write("Meteor: {:6.3f}\n".format(meteor_score))
f.write("SPICE: {:6.3f}\n".format(spice_score))
# f.write("SentenceBert: {:6.3f}\n".format(sent_bert))
if __name__ == "__main__":
fire.Fire(main)