Spaces:
Running
Running
# import json | |
# import os | |
# import glob | |
# import argparse | |
# import csv | |
# | |
# | |
# def chatgpt_json(merge_file): | |
# # chat results | |
# merge_data = merge_file.decode("utf-8") | |
# merge_data = eval(merge_data) | |
# correct_answer_file = 'file/ANSWER.json' | |
# with open(correct_answer_file, 'r', encoding='utf-8') as f: | |
# correct_answer_data = json.load(f) | |
# | |
# dataset_scores_dict = {} | |
# for dataset_name, item in merge_data.items(): | |
# | |
# total_nums = len(item) | |
# correct = 0 | |
# # assert len(item) >= len(correct_answer_data[dataset_name]), f'Video-Bench-Input.json---{dataset_name}---is incomplete!' | |
# for id, sub_item in item.items(): | |
# if sub_item['output_chatgpt_choice'] == correct_answer_data[dataset_name][id]['answer']: | |
# correct += 1 | |
# | |
# dataset_scores_dict[dataset_name] = round(correct / total_nums * 100, 2) | |
# return dataset_scores_dict | |
# | |
# | |
# def compute_scores(merge_file): | |
# dataset_score_dict = chatgpt_json(merge_file) | |
# dataset_weight = { | |
# 1: | |
# { | |
# "ActivityNet": 1, | |
# "MSVD": 1, | |
# "MSRVTT": 1, | |
# "TGIF": 1, | |
# "Youcook2": 1, | |
# "Ucfcrime": 1, | |
# "MOT": 0.5, | |
# }, | |
# | |
# 2: | |
# { | |
# "TVQA": 1, | |
# "MV": 1, | |
# "NBA": 1, | |
# }, | |
# | |
# 3: | |
# { | |
# "Driving-exam": 0.5, | |
# "Driving-decision-making": 1, | |
# "SQA3D": 1, | |
# } | |
# | |
# } | |
# | |
# # Video-exclusive Understanding score | |
# exclusive_understanding_weight = dataset_weight[1] | |
# weights_sum = sum(exclusive_understanding_weight.values()) | |
# exclusive_understanding_score = 0 | |
# # import ipdb; ipdb.set_trace() | |
# for dataset_name, weight in exclusive_understanding_weight.items(): | |
# exclusive_understanding_score += weight * dataset_score_dict[dataset_name] / weights_sum | |
# | |
# # Prior Knowledge-based Question-answer | |
# prior_QA_weight = dataset_weight[2] | |
# weights_sum = sum(prior_QA_weight.values()) | |
# prior_QA_score = 0 | |
# for dataset_name, weight in prior_QA_weight.items(): | |
# prior_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum | |
# | |
# # Comprehension and Decision-making | |
# com_and_dec_QA_weight = dataset_weight[3] | |
# weights_sum = sum(com_and_dec_QA_weight.values()) | |
# com_and_dec_QA_score = 0 | |
# for dataset_name, weight in com_and_dec_QA_weight.items(): | |
# com_and_dec_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum | |
# | |
# dataset_score_dict['Exclusive_understanding'] = exclusive_understanding_score | |
# dataset_score_dict['Prior_Knowledge'] = prior_QA_score | |
# dataset_score_dict['Comprehension_and_Decision-making'] = com_and_dec_QA_score | |
# | |
# # final score | |
# final_score = sum([exclusive_understanding_score, prior_QA_score, com_and_dec_QA_score]) / 3 | |
# dataset_score_dict['final_score'] = final_score | |
# | |
# # print(dataset_score_dict) | |
# # with open(args.score_output_file, 'w', encoding='utf-8') as f: | |
# # json.dump(dataset_score_dict, f, indent=2) | |
# # print(f'{args.score_output_file} is saved!') | |
# # ======================== | |
# data = [ | |
# | |
# ["Avg. All", "Avg. Video-Exclusive", "Avg. Prior-Knowledge QA", "Avg. Decision-Making", | |
# "ActivityNet", "MSVD", "MSRVTT", "TGIF", "Youcook2", "Ucfcrime", | |
# "MOT", "TVQA", "MV", "NBA", "Driving-exam", "Driving-decision-making", "SQA3D"], | |
# | |
# [final_score, exclusive_understanding_score, prior_QA_score, com_and_dec_QA_score, | |
# dataset_score_dict['ActivityNet'], | |
# dataset_score_dict["MSVD"], | |
# dataset_score_dict['MSRVTT'], | |
# dataset_score_dict['TGIF'], | |
# dataset_score_dict['Youcook2'], | |
# dataset_score_dict['Ucfcrime'], | |
# dataset_score_dict['MOT'], | |
# dataset_score_dict['TVQA'], | |
# dataset_score_dict['MV'], | |
# dataset_score_dict['NBA'], | |
# dataset_score_dict['Driving-exam'], | |
# dataset_score_dict['Driving-decision-making'], | |
# dataset_score_dict['SQA3D'], | |
# ], | |
# ] | |
# | |
# return data | |
# | |
import json | |
import os | |
import glob | |
import argparse | |
import csv | |
def chatgpt_json(merge_file): | |
# chat results | |
merge_data = merge_file.decode("utf-8") | |
merge_data = eval(merge_data) | |
correct_answer_file = 'file/ANSWER.json' | |
with open(correct_answer_file, 'r', encoding='utf-8') as f: | |
correct_answer_data = json.load(f) | |
dataset_scores_dict = {} | |
for dataset_name, item in merge_data.items(): | |
total_nums = len(item) | |
correct = 0 | |
# assert len(item) >= len(correct_answer_data[dataset_name]), f'Video-Bench-Input.json---{dataset_name}---is incomplete!' | |
for id, sub_item in item.items(): | |
if sub_item['output_chatgpt_choice'] == correct_answer_data[dataset_name][id]['answer']: | |
correct += 1 | |
# dataset_scores_dict[dataset_name] = round(correct / total_nums * 100, 2) | |
dataset_scores_dict[dataset_name] = round(correct / total_nums , 4) | |
return dataset_scores_dict | |
def compute_scores(merge_file): | |
dataset_score_dict = chatgpt_json(merge_file) | |
dataset_weight = { | |
1: | |
{ | |
"ActivityNet": 1, | |
"MSVD": 1, | |
"MSRVTT": 1, | |
"TGIF": 1, | |
"Youcook2": 1, | |
"Ucfcrime": 1, | |
"MOT": 0.5, | |
}, | |
2: | |
{ | |
"TVQA": 1, | |
"MV": 1, | |
"NBA": 1, | |
}, | |
3: | |
{ | |
"Driving-exam": 0.5, | |
"Driving-decision-making": 1, | |
"SQA3D": 1, | |
} | |
} | |
# Video-exclusive Understanding score | |
exclusive_understanding_weight = dataset_weight[1] | |
weights_sum = sum(exclusive_understanding_weight.values()) | |
exclusive_understanding_score = 0 | |
# import ipdb; ipdb.set_trace() | |
for dataset_name, weight in exclusive_understanding_weight.items(): | |
exclusive_understanding_score += weight * dataset_score_dict[dataset_name] / weights_sum * 100 | |
# Prior Knowledge-based Question-answer | |
prior_QA_weight = dataset_weight[2] | |
weights_sum = sum(prior_QA_weight.values()) | |
prior_QA_score = 0 | |
for dataset_name, weight in prior_QA_weight.items(): | |
prior_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum *100 | |
# Comprehension and Decision-making | |
com_and_dec_QA_weight = dataset_weight[3] | |
weights_sum = sum(com_and_dec_QA_weight.values()) | |
com_and_dec_QA_score = 0 | |
for dataset_name, weight in com_and_dec_QA_weight.items(): | |
com_and_dec_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum *100 | |
dataset_score_dict['Exclusive_understanding'] = exclusive_understanding_score | |
dataset_score_dict['Prior_Knowledge'] = prior_QA_score | |
dataset_score_dict['Comprehension_and_Decision-making'] = com_and_dec_QA_score | |
# final score | |
final_score = sum([exclusive_understanding_score, prior_QA_score, com_and_dec_QA_score]) / 3 | |
dataset_score_dict['final_score'] = final_score | |
# print(dataset_score_dict) | |
# with open(args.score_output_file, 'w', encoding='utf-8') as f: | |
# json.dump(dataset_score_dict, f, indent=2) | |
# print(f'{args.score_output_file} is saved!') | |
# ======================== | |
data = [ | |
["Avg. All", "Avg. Video-Exclusive", "Avg. Prior-Knowledge QA", "Avg. Decision-Making", | |
"ActivityNet", "MSVD", "MSRVTT", "TGIF", "Youcook2", "Ucfcrime", | |
"MOT", "TVQA", "MV", "NBA", "Driving-exam", "Driving-decision-making", "SQA3D"], | |
[final_score, exclusive_understanding_score, prior_QA_score, com_and_dec_QA_score, | |
dataset_score_dict['ActivityNet'], | |
dataset_score_dict["MSVD"], | |
dataset_score_dict['MSRVTT'], | |
dataset_score_dict['TGIF'], | |
dataset_score_dict['Youcook2'], | |
dataset_score_dict['Ucfcrime'], | |
dataset_score_dict['MOT'], | |
dataset_score_dict['TVQA'], | |
dataset_score_dict['MV'], | |
dataset_score_dict['NBA'], | |
dataset_score_dict['Driving-exam'], | |
dataset_score_dict['Driving-decision-making'], | |
dataset_score_dict['SQA3D'], | |
], | |
] | |
return data | |