# Copyright (c) OpenMMLab. All rights reserved. import argparse import json from functools import partial import mmcv from mmocr.utils import list_from_file, list_to_file def convert(closeset_line, merge_bg_others=False, ignore_idx=0, others_idx=25): """Convert line-json str of closeset to line-json str of openset. Note that this function is designed for closeset-wildreceipt to openset-wildreceipt. It may not be suitable to your own dataset. Args: closeset_line (str): The string to be deserialized to the closeset dictionary object. merge_bg_others (bool): If True, give the same label to "background" class and "others" class. ignore_idx (int): Index for ``ignore`` class. others_idx (int): Index for ``others`` class. """ # Two labels at the same index of the following two lists # make up a key-value pair. For example, in wildreceipt, # closeset_key_inds[0] maps to "Store_name_key" # and closeset_value_inds[0] maps to "Store_addr_value". closeset_key_inds = list(range(2, others_idx, 2)) closeset_value_inds = list(range(1, others_idx, 2)) openset_node_label_mapping = {'bg': 0, 'key': 1, 'value': 2, 'others': 3} if merge_bg_others: openset_node_label_mapping['others'] = openset_node_label_mapping['bg'] closeset_obj = json.loads(closeset_line) openset_obj = { 'file_name': closeset_obj['file_name'], 'height': closeset_obj['height'], 'width': closeset_obj['width'], 'annotations': [] } edge_idx = 1 label_to_edge = {} for anno in closeset_obj['annotations']: label = anno['label'] if label == ignore_idx: anno['label'] = openset_node_label_mapping['bg'] anno['edge'] = edge_idx edge_idx += 1 elif label == others_idx: anno['label'] = openset_node_label_mapping['others'] anno['edge'] = edge_idx edge_idx += 1 else: edge = label_to_edge.get(label, None) if edge is not None: anno['edge'] = edge if label in closeset_key_inds: anno['label'] = openset_node_label_mapping['key'] elif label in closeset_value_inds: anno['label'] = openset_node_label_mapping['value'] else: tmp_key = 'key' if label in closeset_key_inds: label_with_same_edge = closeset_value_inds[ closeset_key_inds.index(label)] elif label in closeset_value_inds: label_with_same_edge = closeset_key_inds[ closeset_value_inds.index(label)] tmp_key = 'value' edge_counterpart = label_to_edge.get(label_with_same_edge, None) if edge_counterpart is not None: anno['edge'] = edge_counterpart else: anno['edge'] = edge_idx edge_idx += 1 anno['label'] = openset_node_label_mapping[tmp_key] label_to_edge[label] = anno['edge'] openset_obj['annotations'] = closeset_obj['annotations'] return json.dumps(openset_obj, ensure_ascii=False) def process(closeset_file, openset_file, merge_bg_others=False, n_proc=10): closeset_lines = list_from_file(closeset_file) convert_func = partial(convert, merge_bg_others=merge_bg_others) openset_lines = mmcv.track_parallel_progress( convert_func, closeset_lines, nproc=n_proc) list_to_file(openset_file, openset_lines) def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('in_file', help='Annotation file for closeset.') parser.add_argument('out_file', help='Annotation file for openset.') parser.add_argument( '--merge', action='store_true', help='Merge two classes: "background" and "others" in closeset ' 'to one class in openset.') parser.add_argument( '--n_proc', type=int, default=10, help='Number of process.') args = parser.parse_args() return args def main(): args = parse_args() process(args.in_file, args.out_file, args.merge, args.n_proc) print('finish') if __name__ == '__main__': main()