# Copyright (c) OpenMMLab. All rights reserved. import json import math import os.path as osp import tempfile import pytest import torch from mmocr.datasets.kie_dataset import KIEDataset def _create_dummy_ann_file(ann_file): ann_info1 = { 'file_name': 'sample1.png', 'height': 200, 'width': 200, 'annotations': [{ 'text': 'store', 'box': [11.0, 0.0, 22.0, 0.0, 12.0, 12.0, 0.0, 12.0], 'label': 1 }, { 'text': 'address', 'box': [23.0, 2.0, 31.0, 1.0, 24.0, 11.0, 16.0, 11.0], 'label': 1 }, { 'text': 'price', 'box': [33.0, 2.0, 43.0, 2.0, 36.0, 12.0, 25.0, 12.0], 'label': 1 }, { 'text': '1.0', 'box': [46.0, 2.0, 61.0, 2.0, 53.0, 12.0, 39.0, 12.0], 'label': 1 }, { 'text': 'google', 'box': [61.0, 2.0, 69.0, 2.0, 63.0, 12.0, 55.0, 12.0], 'label': 1 }] } with open(ann_file, 'w') as fw: for ann_info in [ann_info1]: fw.write(json.dumps(ann_info) + '\n') return ann_info1 def _create_dummy_dict_file(dict_file): dict_str = '0123' with open(dict_file, 'w') as fw: for char in list(dict_str): fw.write(char + '\n') return dict_str def _create_dummy_loader(): loader = dict( type='HardDiskLoader', repeat=1, parser=dict( type='LineJsonParser', keys=['file_name', 'height', 'width', 'annotations'])) return loader def test_kie_dataset(): tmp_dir = tempfile.TemporaryDirectory() # create dummy data ann_file = osp.join(tmp_dir.name, 'fake_data.txt') ann_info1 = _create_dummy_ann_file(ann_file) dict_file = osp.join(tmp_dir.name, 'fake_dict.txt') _create_dummy_dict_file(dict_file) # test initialization loader = _create_dummy_loader() dataset = KIEDataset(ann_file, loader, dict_file, pipeline=[]) tmp_dir.cleanup() dataset.prepare_train_img(0) # test pre_pipeline img_ann_info = dataset.data_infos[0] img_info = { 'filename': img_ann_info['file_name'], 'height': img_ann_info['height'], 'width': img_ann_info['width'] } ann_info = dataset._parse_anno_info(img_ann_info['annotations']) results = dict(img_info=img_info, ann_info=ann_info) dataset.pre_pipeline(results) assert results['img_prefix'] == dataset.img_prefix # test _parse_anno_info annos = ann_info1['annotations'] with pytest.raises(AssertionError): dataset._parse_anno_info(annos[0]) tmp_annos = [{ 'text': 'store', 'box': [11.0, 0.0, 22.0, 0.0, 12.0, 12.0, 0.0, 12.0] }] dataset._parse_anno_info(tmp_annos) tmp_annos = [{'text': 'store'}] with pytest.raises(AssertionError): dataset._parse_anno_info(tmp_annos) return_anno = dataset._parse_anno_info(annos) assert 'bboxes' in return_anno assert 'relations' in return_anno assert 'texts' in return_anno assert 'labels' in return_anno # test evaluation result = {} result['nodes'] = torch.full((5, 5), 1, dtype=torch.float) result['nodes'][:, 1] = 100. print('hello', result['nodes'].size()) results = [result for _ in range(5)] eval_res = dataset.evaluate(results) assert math.isclose(eval_res['macro_f1'], 0.2, abs_tol=1e-4) test_kie_dataset()