File size: 2,317 Bytes
256a159 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import torch
from mmengine.evaluator import BaseMetric
from opencompass.registry import METRICS
EVAL_DIM_MAPPING = {
1: 'Scene Understanding',
2: 'Instance Identity',
3: 'Instance Attributes',
4: 'Instance Location',
5: 'Instance Counting',
6: 'Spatial Relations',
7: 'Instance Interaction',
8: 'Visual Reasoning',
9: 'Text Recognition',
10: 'Action Recognition',
11: 'Action Prediction',
12: 'Procedure Understanding',
}
@METRICS.register_module()
class SEEDBenchAcc(BaseMetric):
"""Compute results for SEED-Bench."""
def process(self, data_batch, data_samples) -> None:
for data_sample in data_samples:
losses = data_sample['losses']
class_ranks = torch.argsort(losses, dim=-1).cpu()
pred_id = ['A', 'B', 'C', 'D'][class_ranks[0]]
answer_record = {
'q_id': data_sample['question_id'],
'prediction': pred_id,
'gt': data_sample['answer'],
'q_type_id': data_sample['question_type_id'],
'losses': [str(num) for num in list(losses.cpu().numpy())],
}
self.results.append(answer_record)
def compute_metrics(self, results: list) -> dict:
type_counts = {}
correct_counts = {}
out = {}
out['answer_records'] = results
for item in results:
pred, gt = item['prediction'], item['gt']
data_type = item['q_type_id']
type_counts[data_type] = type_counts.get(data_type, 0) + 1
if pred == gt:
correct_counts[data_type] = correct_counts.get(data_type,
0) + 1
total_count = 0
total_correct = 0
for data_type in type_counts.keys():
accuracy = correct_counts.get(data_type,
0) / type_counts[data_type] * 100
category = EVAL_DIM_MAPPING[data_type]
out[f'Data type {data_type} - {category}'] = accuracy
total_count += type_counts[data_type]
total_correct += correct_counts.get(data_type, 0)
total_accuracy = total_correct / total_count * 100
out['Total accuracy'] = total_accuracy
return out
|