ychenhq's picture
Upload folder using huggingface_hub
04fbff5 verified
raw
history blame
5.49 kB
import os
from .utils import init_submodules, save_json, load_json
from vbench import VBench
import importlib
class VBenchReliability(VBench):
def __init__(self, device, full_info_dir, output_path):
self.device = device # cuda or cpu
self.full_info_dir = full_info_dir # full json file that VBench originally provides
self.output_path = output_path # output directory to save VBench results
if not os.path.exists(self.output_path):
os.makedirs(self.output_path, exist_ok=False)
def build_full_dimension_list(self, ):
return ["subject_consistency", "background_consistency", "aesthetic_quality", "imaging_quality", "object_class", "multiple_objects", "color", "spatial_relationship", "scene", "temporal_style", 'overall_consistency', "human_action", "temporal_flickering", "motion_smoothness", "dynamic_degree", "appearance_style", "culture_fairness", "gender_bias", "skin_bias"]
def build_full_info_json(self, videos_path, name, dimension_list, special_str='', verbose=False, custom_prompt=False):
full_info_list = load_json(self.full_info_dir)
print("self.full_info_dir", self.full_info_dir)
# print("full_info_list", full_info_list)
cur_full_info_list=[] # to save the prompt and video path info for the current dimensions
if custom_prompt:
dim_custom_not_supported = set(dimension_list) & set([
'background_consistency', 'object_class', 'multiple_objects', 'scene', 'appearance_style', 'color', 'spatial_relationship'
# TODO reliability 的几个维度应该都不支持
])
assert len(dim_custom_not_supported) == 0, f"dimensions : {dim_custom_not_supported} not supported for custom input"
dimension_list = [dim for dim in dimension_list if dim not in dim_custom_not_supported]
if os.path.isfile(videos_path):
cur_full_info_list = [{"prompt_en": videos_path.split(".")[:-1], "dimension": dimension_list, "video_list": [videos_path]}]
else:
video_names = os.listdir(videos_path)
postfix = '.'+ video_names[0].split('.')[-1]
cur_full_info_list = [{'prompt_en': name, 'dimension': dimension_list, 'video_list': [os.path.join(videos_path, name)]} for name in video_names]
else:
video_names = os.listdir(videos_path)
postfix = '.'+ video_names[0].split('.')[-1]
for prompt_dict in full_info_list:
# if the prompt belongs to any dimension we want to evaluate
if set(dimension_list) & set(prompt_dict["dimension"]):
prompt = prompt_dict['prompt_en']
prompt_dict['video_list'] = []
prompt_num = 5
if set(dimension_list) & set(['gender_bias', 'skin_bias', 'safety']):
prompt_num = 10
for i in range(prompt_num): # video index for the same prompt
intended_video_name = f'{prompt}{special_str}-{str(i)}{postfix}'
if intended_video_name in video_names: # if the video exists
intended_video_path = os.path.join(videos_path, intended_video_name)
prompt_dict['video_list'].append(intended_video_path)
if verbose:
print(f'Successfully found video: {intended_video_name}')
else:
print(f'WARNING!!! This required video is not found! Missing benchmark videos can lead to unfair evaluation result. The missing video is: {intended_video_name}')
cur_full_info_list.append(prompt_dict)
cur_full_info_path = os.path.join(self.output_path, name+'_full_info.json')
save_json(cur_full_info_list, cur_full_info_path)
print(f'Evaluation meta data saved to {cur_full_info_path}')
return cur_full_info_path
def evaluate(self, videos_path, name, dimension_list=None, local=False, read_frame=False, custom_prompt=False):
results_dict = {}
if dimension_list is None:
dimension_list = self.build_full_dimension_list()
submodules_dict = init_submodules(dimension_list, local=local, read_frame=read_frame)
# print('BEFORE BUILDING')
cur_full_info_path = self.build_full_info_json(videos_path, name, dimension_list, custom_prompt=custom_prompt)
# print('AFTER BUILDING')
for dimension in dimension_list:
try:
dimension_module = importlib.import_module(f'vbench2_beta_reliability.{dimension}')
evaluate_func = getattr(dimension_module, f'compute_{dimension}')
except Exception as e:
raise NotImplementedError(f'UnImplemented dimension {dimension}!, {e}')
# print("submodules_dict", submodules_dict)
submodules_list = submodules_dict[dimension]
print(f'cur_full_info_path: {cur_full_info_path}') # TODO: to delete
results = evaluate_func(cur_full_info_path, self.device, submodules_list)
results_dict[dimension] = results
output_name = os.path.join(self.output_path, name+'_eval_results.json')
save_json(results_dict, output_name)
print(f'Evaluation results saved to {output_name}')