# import json # import os # import glob # import argparse # import csv # # # def chatgpt_json(merge_file): # # chat results # merge_data = merge_file.decode("utf-8") # merge_data = eval(merge_data) # correct_answer_file = 'file/ANSWER.json' # with open(correct_answer_file, 'r', encoding='utf-8') as f: # correct_answer_data = json.load(f) # # dataset_scores_dict = {} # for dataset_name, item in merge_data.items(): # # total_nums = len(item) # correct = 0 # # assert len(item) >= len(correct_answer_data[dataset_name]), f'Video-Bench-Input.json---{dataset_name}---is incomplete!' # for id, sub_item in item.items(): # if sub_item['output_chatgpt_choice'] == correct_answer_data[dataset_name][id]['answer']: # correct += 1 # # dataset_scores_dict[dataset_name] = round(correct / total_nums * 100, 2) # return dataset_scores_dict # # # def compute_scores(merge_file): # dataset_score_dict = chatgpt_json(merge_file) # dataset_weight = { # 1: # { # "ActivityNet": 1, # "MSVD": 1, # "MSRVTT": 1, # "TGIF": 1, # "Youcook2": 1, # "Ucfcrime": 1, # "MOT": 0.5, # }, # # 2: # { # "TVQA": 1, # "MV": 1, # "NBA": 1, # }, # # 3: # { # "Driving-exam": 0.5, # "Driving-decision-making": 1, # "SQA3D": 1, # } # # } # # # Video-exclusive Understanding score # exclusive_understanding_weight = dataset_weight[1] # weights_sum = sum(exclusive_understanding_weight.values()) # exclusive_understanding_score = 0 # # import ipdb; ipdb.set_trace() # for dataset_name, weight in exclusive_understanding_weight.items(): # exclusive_understanding_score += weight * dataset_score_dict[dataset_name] / weights_sum # # # Prior Knowledge-based Question-answer # prior_QA_weight = dataset_weight[2] # weights_sum = sum(prior_QA_weight.values()) # prior_QA_score = 0 # for dataset_name, weight in prior_QA_weight.items(): # prior_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum # # # Comprehension and Decision-making # com_and_dec_QA_weight = dataset_weight[3] # weights_sum = sum(com_and_dec_QA_weight.values()) # com_and_dec_QA_score = 0 # for dataset_name, weight in com_and_dec_QA_weight.items(): # com_and_dec_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum # # dataset_score_dict['Exclusive_understanding'] = exclusive_understanding_score # dataset_score_dict['Prior_Knowledge'] = prior_QA_score # dataset_score_dict['Comprehension_and_Decision-making'] = com_and_dec_QA_score # # # final score # final_score = sum([exclusive_understanding_score, prior_QA_score, com_and_dec_QA_score]) / 3 # dataset_score_dict['final_score'] = final_score # # # print(dataset_score_dict) # # with open(args.score_output_file, 'w', encoding='utf-8') as f: # # json.dump(dataset_score_dict, f, indent=2) # # print(f'{args.score_output_file} is saved!') # # ======================== # data = [ # # ["Avg. All", "Avg. Video-Exclusive", "Avg. Prior-Knowledge QA", "Avg. Decision-Making", # "ActivityNet", "MSVD", "MSRVTT", "TGIF", "Youcook2", "Ucfcrime", # "MOT", "TVQA", "MV", "NBA", "Driving-exam", "Driving-decision-making", "SQA3D"], # # [final_score, exclusive_understanding_score, prior_QA_score, com_and_dec_QA_score, # dataset_score_dict['ActivityNet'], # dataset_score_dict["MSVD"], # dataset_score_dict['MSRVTT'], # dataset_score_dict['TGIF'], # dataset_score_dict['Youcook2'], # dataset_score_dict['Ucfcrime'], # dataset_score_dict['MOT'], # dataset_score_dict['TVQA'], # dataset_score_dict['MV'], # dataset_score_dict['NBA'], # dataset_score_dict['Driving-exam'], # dataset_score_dict['Driving-decision-making'], # dataset_score_dict['SQA3D'], # ], # ] # # return data # import json import os import glob import argparse import csv def chatgpt_json(merge_file): # chat results merge_data = merge_file.decode("utf-8") merge_data = eval(merge_data) correct_answer_file = 'file/ANSWER.json' with open(correct_answer_file, 'r', encoding='utf-8') as f: correct_answer_data = json.load(f) dataset_scores_dict = {} for dataset_name, item in merge_data.items(): total_nums = len(item) correct = 0 # assert len(item) >= len(correct_answer_data[dataset_name]), f'Video-Bench-Input.json---{dataset_name}---is incomplete!' for id, sub_item in item.items(): if sub_item['output_chatgpt_choice'] == correct_answer_data[dataset_name][id]['answer']: correct += 1 # dataset_scores_dict[dataset_name] = round(correct / total_nums * 100, 2) dataset_scores_dict[dataset_name] = round(correct / total_nums , 4) return dataset_scores_dict def compute_scores(merge_file): dataset_score_dict = chatgpt_json(merge_file) dataset_weight = { 1: { "ActivityNet": 1, "MSVD": 1, "MSRVTT": 1, "TGIF": 1, "Youcook2": 1, "Ucfcrime": 1, "MOT": 0.5, }, 2: { "TVQA": 1, "MV": 1, "NBA": 1, }, 3: { "Driving-exam": 0.5, "Driving-decision-making": 1, "SQA3D": 1, } } # Video-exclusive Understanding score exclusive_understanding_weight = dataset_weight[1] weights_sum = sum(exclusive_understanding_weight.values()) exclusive_understanding_score = 0 # import ipdb; ipdb.set_trace() for dataset_name, weight in exclusive_understanding_weight.items(): exclusive_understanding_score += weight * dataset_score_dict[dataset_name] / weights_sum * 100 # Prior Knowledge-based Question-answer prior_QA_weight = dataset_weight[2] weights_sum = sum(prior_QA_weight.values()) prior_QA_score = 0 for dataset_name, weight in prior_QA_weight.items(): prior_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum *100 # Comprehension and Decision-making com_and_dec_QA_weight = dataset_weight[3] weights_sum = sum(com_and_dec_QA_weight.values()) com_and_dec_QA_score = 0 for dataset_name, weight in com_and_dec_QA_weight.items(): com_and_dec_QA_score += weight * dataset_score_dict[dataset_name] / weights_sum *100 dataset_score_dict['Exclusive_understanding'] = exclusive_understanding_score dataset_score_dict['Prior_Knowledge'] = prior_QA_score dataset_score_dict['Comprehension_and_Decision-making'] = com_and_dec_QA_score # final score final_score = sum([exclusive_understanding_score, prior_QA_score, com_and_dec_QA_score]) / 3 dataset_score_dict['final_score'] = final_score # print(dataset_score_dict) # with open(args.score_output_file, 'w', encoding='utf-8') as f: # json.dump(dataset_score_dict, f, indent=2) # print(f'{args.score_output_file} is saved!') # ======================== data = [ ["Avg. All", "Avg. Video-Exclusive", "Avg. Prior-Knowledge QA", "Avg. Decision-Making", "ActivityNet", "MSVD", "MSRVTT", "TGIF", "Youcook2", "Ucfcrime", "MOT", "TVQA", "MV", "NBA", "Driving-exam", "Driving-decision-making", "SQA3D"], [final_score, exclusive_understanding_score, prior_QA_score, com_and_dec_QA_score, dataset_score_dict['ActivityNet'], dataset_score_dict["MSVD"], dataset_score_dict['MSRVTT'], dataset_score_dict['TGIF'], dataset_score_dict['Youcook2'], dataset_score_dict['Ucfcrime'], dataset_score_dict['MOT'], dataset_score_dict['TVQA'], dataset_score_dict['MV'], dataset_score_dict['NBA'], dataset_score_dict['Driving-exam'], dataset_score_dict['Driving-decision-making'], dataset_score_dict['SQA3D'], ], ] return data