File size: 2,317 Bytes
256a159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import torch
from mmengine.evaluator import BaseMetric

from opencompass.registry import METRICS

EVAL_DIM_MAPPING = {
    1: 'Scene Understanding',
    2: 'Instance Identity',
    3: 'Instance Attributes',
    4: 'Instance Location',
    5: 'Instance Counting',
    6: 'Spatial Relations',
    7: 'Instance Interaction',
    8: 'Visual Reasoning',
    9: 'Text Recognition',
    10: 'Action Recognition',
    11: 'Action Prediction',
    12: 'Procedure Understanding',
}


@METRICS.register_module()
class SEEDBenchAcc(BaseMetric):
    """Compute results for SEED-Bench."""

    def process(self, data_batch, data_samples) -> None:
        for data_sample in data_samples:
            losses = data_sample['losses']
            class_ranks = torch.argsort(losses, dim=-1).cpu()
            pred_id = ['A', 'B', 'C', 'D'][class_ranks[0]]
            answer_record = {
                'q_id': data_sample['question_id'],
                'prediction': pred_id,
                'gt': data_sample['answer'],
                'q_type_id': data_sample['question_type_id'],
                'losses': [str(num) for num in list(losses.cpu().numpy())],
            }
            self.results.append(answer_record)

    def compute_metrics(self, results: list) -> dict:
        type_counts = {}
        correct_counts = {}
        out = {}
        out['answer_records'] = results
        for item in results:
            pred, gt = item['prediction'], item['gt']
            data_type = item['q_type_id']

            type_counts[data_type] = type_counts.get(data_type, 0) + 1
            if pred == gt:
                correct_counts[data_type] = correct_counts.get(data_type,
                                                               0) + 1

        total_count = 0
        total_correct = 0
        for data_type in type_counts.keys():
            accuracy = correct_counts.get(data_type,
                                          0) / type_counts[data_type] * 100
            category = EVAL_DIM_MAPPING[data_type]
            out[f'Data type {data_type} - {category}'] = accuracy

            total_count += type_counts[data_type]
            total_correct += correct_counts.get(data_type, 0)

        total_accuracy = total_correct / total_count * 100
        out['Total accuracy'] = total_accuracy
        return out