TempCompass / src /compute.py
lyx97's picture
update
5aa60a6
raw
history blame
4.32 kB
import json
import os
import glob
import argparse
import csv
def chatgpt_json(merge_file):
# chat results
merge_data = merge_file.decode("utf-8")
merge_data = merge_data.replace(": true,", ": \"true\",")
merge_data = merge_data.replace(": false,", ": \"false\",")
merge_data = eval(merge_data)
dataset_scores_dict = {}
for dataset_name, dataset_results in merge_data.items():
correct, total_nums = 0, 0
for id in dataset_results:
for dim in dataset_results[id]:
for result in dataset_results[id][dim]:
correct += result['rating']
total_nums += 1
dataset_scores_dict[dataset_name] = round(correct / total_nums * 100, 2)
# dataset_scores_dict[dataset_name] = round(correct / total_nums , 4)
return dataset_scores_dict
def compute_scores(merge_file):
merge_data = merge_file.decode("utf-8")
merge_data = merge_data.replace(": true,", ": \"true\",")
merge_data = merge_data.replace(": false,", ": \"false\",")
merge_data = merge_data.replace(": null,", ": \"null\",")
merge_data = eval(merge_data)
dataset_scores_dict = {}
total_correct, total_num = 0, 0
eval_dims = ['action', 'speed', 'direction', 'order', 'attribute_change', 'avg']
for dataset_name, dataset_results in merge_data.items():
dataset_correct, dataset_num = {dim: 0 for dim in eval_dims}, {dim: 0 for dim in eval_dims}
for id in dataset_results:
for dim in dataset_results[id]:
for result in dataset_results[id][dim]:
dataset_correct['avg'] += result['rating']
dataset_correct[dim] += result['rating']
dataset_num['avg'] += 1
dataset_num[dim] += 1
total_correct += dataset_correct['avg']
total_num += dataset_num['avg']
for dim in eval_dims:
dataset_scores_dict[f"{dim}_{dataset_name}"] = round(dataset_correct[dim] / dataset_num[dim] * 100, 2)
dataset_scores_dict["avg_all"] = round(total_correct / total_num * 100, 2)
# print(dataset_score_dict)
# with open(args.score_output_file, 'w', encoding='utf-8') as f:
# json.dump(dataset_score_dict, f, indent=2)
# print(f'{args.score_output_file} is saved!')
# ========================
data = [
["Avg. All", "Avg. Multi-Choice", "Avg. Yes/No", "Avg. Caption Matching", "Avg. Caption Generation",
"Action. Multi-Choice", "Action. Yes/No", "Action. Caption Matching", "Action. Caption Generation",
"Direction. Multi-Choice", "Direction. Yes/No", "Direction. Caption Matching", "Direction. Caption Generation",
"Speed. Multi-Choice", "Speed. Yes/No", "Speed. Caption Matching", "Speed. Caption Generation",
"Event Order. Multi-Choice", "Event Order. Yes/No", "Event Order. Caption Matching", "Event Order. Caption Generation",
"Attribute Change. Multi-Choice", "Attribute Change. Yes/No", "Attribute Change. Caption Matching", "Attribute Change. Caption Generation"],
[dataset_scores_dict["avg_all"], dataset_scores_dict["avg_multi-choice"], dataset_scores_dict["avg_yes_no"], dataset_scores_dict["avg_caption_matching"], dataset_scores_dict["avg_captioning"],
dataset_scores_dict['action_multi-choice'], dataset_scores_dict['action_yes_no'], dataset_scores_dict['action_caption_matching'], dataset_scores_dict['action_captioning'],
dataset_scores_dict['direction_multi-choice'], dataset_scores_dict['direction_yes_no'], dataset_scores_dict['direction_caption_matching'], dataset_scores_dict['direction_captioning'],
dataset_scores_dict['speed_multi-choice'], dataset_scores_dict['speed_yes_no'], dataset_scores_dict['speed_caption_matching'], dataset_scores_dict['speed_captioning'],
dataset_scores_dict['order_multi-choice'], dataset_scores_dict['order_yes_no'], dataset_scores_dict['order_caption_matching'], dataset_scores_dict['order_captioning'],
dataset_scores_dict['attribute_change_multi-choice'], dataset_scores_dict['attribute_change_yes_no'], dataset_scores_dict['attribute_change_caption_matching'], dataset_scores_dict['attribute_change_captioning'],
],
]
return data