diff --git a/Home.py b/Home.py new file mode 100644 index 0000000000000000000000000000000000000000..d1ad314d72581e9bd87370f69889b6779c0d6352 --- /dev/null +++ b/Home.py @@ -0,0 +1,19 @@ +import streamlit as st + +st.set_page_config(page_title='OCR Comparator', layout ="wide") +st.image('ocr.png') + +st.write("") + +st.markdown('''#### OCR, or Optical Character Recognition, is a computer vision task, \ +which includes the detection of text areas, and the recognition of characters.''') +st.write("") +st.write("") + +st.markdown("##### This app allows you to compare, from a given image, the results of different solutions:") +st.markdown("##### *EasyOcr, PaddleOCR, MMOCR, Tesseract*") +st.write("") +st.write("") +st.markdown("👈 Select the **About** page from the sidebar for information on how the app works") + +st.markdown("👈 or directly select the **App** page") \ No newline at end of file diff --git a/README.md b/README.md index e984bf10456fb24b0b9a6ee6d54aab18f4638e5a..2894464cad0f3f4d163e1f8e72750b10259159da 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,11 @@ --- title: Streamlit OCR Comparator -emoji: 🐨 +emoji: 📰🔍🔤 colorFrom: indigo -colorTo: pink +colorTo: gray sdk: streamlit sdk_version: 1.10.0 -app_file: app.py -pinned: false ---- - -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +app_file: Home.py +tags: [streamlit, ocr] +pinned: true +--- \ No newline at end of file diff --git a/configs/_base_/default_runtime.py b/configs/_base_/default_runtime.py new file mode 100644 index 0000000000000000000000000000000000000000..de7f9650ce73ba7ca633652b50df021b67498362 --- /dev/null +++ b/configs/_base_/default_runtime.py @@ -0,0 +1,17 @@ +# yapf:disable +log_config = dict( + interval=5, + hooks=[ + dict(type='TextLoggerHook') + ]) +# yapf:enable +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] + +# disable opencv multithreading to avoid system being overloaded +opencv_num_threads = 0 +# set multi-process start method as `fork` to speed up the training +mp_start_method = 'fork' diff --git a/configs/_base_/det_datasets/ctw1500.py b/configs/_base_/det_datasets/ctw1500.py new file mode 100644 index 0000000000000000000000000000000000000000..466ea7e1ea6871917bd6449019b48cd11c516a01 --- /dev/null +++ b/configs/_base_/det_datasets/ctw1500.py @@ -0,0 +1,18 @@ +dataset_type = 'IcdarDataset' +data_root = 'data/ctw1500' + +train = dict( + type=dataset_type, + ann_file=f'{data_root}/instances_training.json', + img_prefix=f'{data_root}/imgs', + pipeline=None) + +test = dict( + type=dataset_type, + ann_file=f'{data_root}/instances_test.json', + img_prefix=f'{data_root}/imgs', + pipeline=None) + +train_list = [train] + +test_list = [test] diff --git a/configs/_base_/det_datasets/icdar2015.py b/configs/_base_/det_datasets/icdar2015.py new file mode 100644 index 0000000000000000000000000000000000000000..f711c06dce76d53b8737288c8de318e6f90ce585 --- /dev/null +++ b/configs/_base_/det_datasets/icdar2015.py @@ -0,0 +1,18 @@ +dataset_type = 'IcdarDataset' +data_root = 'data/icdar2015' + +train = dict( + type=dataset_type, + ann_file=f'{data_root}/instances_training.json', + img_prefix=f'{data_root}/imgs', + pipeline=None) + +test = dict( + type=dataset_type, + ann_file=f'{data_root}/instances_test.json', + img_prefix=f'{data_root}/imgs', + pipeline=None) + +train_list = [train] + +test_list = [test] diff --git a/configs/_base_/det_datasets/icdar2017.py b/configs/_base_/det_datasets/icdar2017.py new file mode 100644 index 0000000000000000000000000000000000000000..446ea7ef13a95be5e427994a7a61ed571d95db15 --- /dev/null +++ b/configs/_base_/det_datasets/icdar2017.py @@ -0,0 +1,18 @@ +dataset_type = 'IcdarDataset' +data_root = 'data/icdar2017' + +train = dict( + type=dataset_type, + ann_file=f'{data_root}/instances_training.json', + img_prefix=f'{data_root}/imgs', + pipeline=None) + +test = dict( + type=dataset_type, + ann_file=f'{data_root}/instances_val.json', + img_prefix=f'{data_root}/imgs', + pipeline=None) + +train_list = [train] + +test_list = [test] diff --git a/configs/_base_/det_datasets/synthtext.py b/configs/_base_/det_datasets/synthtext.py new file mode 100644 index 0000000000000000000000000000000000000000..fb9a44b3422dae5a9788d39b0901335dfc6076a9 --- /dev/null +++ b/configs/_base_/det_datasets/synthtext.py @@ -0,0 +1,18 @@ +dataset_type = 'TextDetDataset' +data_root = 'data/synthtext' + +train = dict( + type=dataset_type, + ann_file=f'{data_root}/instances_training.lmdb', + loader=dict( + type='AnnFileLoader', + repeat=1, + file_format='lmdb', + parser=dict( + type='LineJsonParser', + keys=['file_name', 'height', 'width', 'annotations'])), + img_prefix=f'{data_root}/imgs', + pipeline=None) + +train_list = [train] +test_list = [train] diff --git a/configs/_base_/det_datasets/toy_data.py b/configs/_base_/det_datasets/toy_data.py new file mode 100644 index 0000000000000000000000000000000000000000..512d1d20372a3fa3f662cc908c8cf4b66b35b797 --- /dev/null +++ b/configs/_base_/det_datasets/toy_data.py @@ -0,0 +1,41 @@ +root = 'tests/data/toy_dataset' + +# dataset with type='TextDetDataset' +train1 = dict( + type='TextDetDataset', + img_prefix=f'{root}/imgs', + ann_file=f'{root}/instances_test.txt', + loader=dict( + type='AnnFileLoader', + repeat=4, + file_format='txt', + parser=dict( + type='LineJsonParser', + keys=['file_name', 'height', 'width', 'annotations'])), + pipeline=None, + test_mode=False) + +# dataset with type='IcdarDataset' +train2 = dict( + type='IcdarDataset', + ann_file=f'{root}/instances_test.json', + img_prefix=f'{root}/imgs', + pipeline=None) + +test = dict( + type='TextDetDataset', + img_prefix=f'{root}/imgs', + ann_file=f'{root}/instances_test.txt', + loader=dict( + type='AnnFileLoader', + repeat=1, + file_format='txt', + parser=dict( + type='LineJsonParser', + keys=['file_name', 'height', 'width', 'annotations'])), + pipeline=None, + test_mode=True) + +train_list = [train1, train2] + +test_list = [test] diff --git a/configs/_base_/det_models/dbnet_r18_fpnc.py b/configs/_base_/det_models/dbnet_r18_fpnc.py new file mode 100644 index 0000000000000000000000000000000000000000..7507605d84f602dbfc0ce3b6b0519add917afe5f --- /dev/null +++ b/configs/_base_/det_models/dbnet_r18_fpnc.py @@ -0,0 +1,21 @@ +model = dict( + type='DBNet', + backbone=dict( + type='mmdet.ResNet', + depth=18, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type='BN', requires_grad=True), + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'), + norm_eval=False, + style='caffe'), + neck=dict( + type='FPNC', in_channels=[64, 128, 256, 512], lateral_channels=256), + bbox_head=dict( + type='DBHead', + in_channels=256, + loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True), + postprocessor=dict(type='DBPostprocessor', text_repr_type='quad')), + train_cfg=None, + test_cfg=None) diff --git a/configs/_base_/det_models/dbnet_r50dcnv2_fpnc.py b/configs/_base_/det_models/dbnet_r50dcnv2_fpnc.py new file mode 100644 index 0000000000000000000000000000000000000000..1cd1f1baf011554c03c16575b69ebd94eae986b0 --- /dev/null +++ b/configs/_base_/det_models/dbnet_r50dcnv2_fpnc.py @@ -0,0 +1,23 @@ +model = dict( + type='DBNet', + backbone=dict( + type='mmdet.ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + style='pytorch', + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + stage_with_dcn=(False, True, True, True)), + neck=dict( + type='FPNC', in_channels=[256, 512, 1024, 2048], lateral_channels=256), + bbox_head=dict( + type='DBHead', + in_channels=256, + loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True), + postprocessor=dict(type='DBPostprocessor', text_repr_type='quad')), + train_cfg=None, + test_cfg=None) diff --git a/configs/_base_/det_models/dbnetpp_r50dcnv2_fpnc.py b/configs/_base_/det_models/dbnetpp_r50dcnv2_fpnc.py new file mode 100644 index 0000000000000000000000000000000000000000..f8eaf2ffd6efe9fa4ad63f5dc208f9b134a38380 --- /dev/null +++ b/configs/_base_/det_models/dbnetpp_r50dcnv2_fpnc.py @@ -0,0 +1,28 @@ +model = dict( + type='DBNet', + backbone=dict( + type='mmdet.ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=False, + style='pytorch', + dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False), + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + stage_with_dcn=(False, True, True, True)), + neck=dict( + type='FPNC', + in_channels=[256, 512, 1024, 2048], + lateral_channels=256, + asf_cfg=dict(attention_type='ScaleChannelSpatial')), + bbox_head=dict( + type='DBHead', + in_channels=256, + loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True), + postprocessor=dict( + type='DBPostprocessor', text_repr_type='quad', + epsilon_ratio=0.002)), + train_cfg=None, + test_cfg=None) diff --git a/configs/_base_/det_models/drrg_r50_fpn_unet.py b/configs/_base_/det_models/drrg_r50_fpn_unet.py new file mode 100644 index 0000000000000000000000000000000000000000..78156cca6030bcf7ac12b75287342915882eb0b3 --- /dev/null +++ b/configs/_base_/det_models/drrg_r50_fpn_unet.py @@ -0,0 +1,21 @@ +model = dict( + type='DRRG', + backbone=dict( + type='mmdet.ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type='BN', requires_grad=True), + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + norm_eval=True, + style='caffe'), + neck=dict( + type='FPN_UNet', in_channels=[256, 512, 1024, 2048], out_channels=32), + bbox_head=dict( + type='DRRGHead', + in_channels=32, + text_region_thr=0.3, + center_region_thr=0.4, + loss=dict(type='DRRGLoss'), + postprocessor=dict(type='DRRGPostprocessor', link_thr=0.80))) diff --git a/configs/_base_/det_models/fcenet_r50_fpn.py b/configs/_base_/det_models/fcenet_r50_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..3c2bd12b6295858895c53e5e1700df3962a8a7d5 --- /dev/null +++ b/configs/_base_/det_models/fcenet_r50_fpn.py @@ -0,0 +1,33 @@ +model = dict( + type='FCENet', + backbone=dict( + type='mmdet.ResNet', + depth=50, + num_stages=4, + out_indices=(1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type='BN', requires_grad=True), + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + norm_eval=False, + style='pytorch'), + neck=dict( + type='mmdet.FPN', + in_channels=[512, 1024, 2048], + out_channels=256, + add_extra_convs='on_output', + num_outs=3, + relu_before_extra_convs=True, + act_cfg=None), + bbox_head=dict( + type='FCEHead', + in_channels=256, + scales=(8, 16, 32), + fourier_degree=5, + loss=dict(type='FCELoss', num_sample=50), + postprocessor=dict( + type='FCEPostprocessor', + text_repr_type='quad', + num_reconstr_points=50, + alpha=1.2, + beta=1.0, + score_thr=0.3))) diff --git a/configs/_base_/det_models/fcenet_r50dcnv2_fpn.py b/configs/_base_/det_models/fcenet_r50dcnv2_fpn.py new file mode 100644 index 0000000000000000000000000000000000000000..8e76e39a6e8088ac20671f72fc5ed8448b21250b --- /dev/null +++ b/configs/_base_/det_models/fcenet_r50dcnv2_fpn.py @@ -0,0 +1,35 @@ +model = dict( + type='FCENet', + backbone=dict( + type='mmdet.ResNet', + depth=50, + num_stages=4, + out_indices=(1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='pytorch', + dcn=dict(type='DCNv2', deform_groups=2, fallback_on_stride=False), + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + stage_with_dcn=(False, True, True, True)), + neck=dict( + type='mmdet.FPN', + in_channels=[512, 1024, 2048], + out_channels=256, + add_extra_convs='on_output', + num_outs=3, + relu_before_extra_convs=True, + act_cfg=None), + bbox_head=dict( + type='FCEHead', + in_channels=256, + scales=(8, 16, 32), + fourier_degree=5, + loss=dict(type='FCELoss', num_sample=50), + postprocessor=dict( + type='FCEPostprocessor', + text_repr_type='poly', + num_reconstr_points=50, + alpha=1.0, + beta=2.0, + score_thr=0.3))) diff --git a/configs/_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem.py b/configs/_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem.py new file mode 100644 index 0000000000000000000000000000000000000000..843fd36fc60682706503120f16866ba511cf7310 --- /dev/null +++ b/configs/_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem.py @@ -0,0 +1,126 @@ +# model settings +model = dict( + type='OCRMaskRCNN', + backbone=dict( + type='mmdet.ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + norm_eval=True, + style='pytorch'), + neck=dict( + type='mmdet.FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[4], + ratios=[0.17, 0.44, 1.13, 2.90, 7.46], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + roi_head=dict( + type='StandardRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=1, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=1, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), + + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1, + gpu_assign_thr=50), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='OHEMSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_across_levels=False, + nms_pre=1000, + nms_post=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100, + mask_thr_binary=0.5))) diff --git a/configs/_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem_poly.py b/configs/_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem_poly.py new file mode 100644 index 0000000000000000000000000000000000000000..abbac26851d4eeef04fa904c8e69c50a58c2b54d --- /dev/null +++ b/configs/_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem_poly.py @@ -0,0 +1,126 @@ +# model settings +model = dict( + type='OCRMaskRCNN', + text_repr_type='poly', + backbone=dict( + type='mmdet.ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + style='pytorch'), + neck=dict( + type='mmdet.FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='RPNHead', + in_channels=256, + feat_channels=256, + anchor_generator=dict( + type='AnchorGenerator', + scales=[4], + ratios=[0.17, 0.44, 1.13, 2.90, 7.46], + strides=[4, 8, 16, 32, 64]), + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[.0, .0, .0, .0], + target_stds=[1.0, 1.0, 1.0, 1.0]), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + roi_head=dict( + type='StandardRoIHead', + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7, sample_num=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=dict( + type='Shared2FCBBoxHead', + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=80, + bbox_coder=dict( + type='DeltaXYWHBBoxCoder', + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2]), + reg_class_agnostic=False, + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), + loss_bbox=dict(type='L1Loss', loss_weight=1.0)), + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=14, sample_num=0), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='FCNMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=80, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))), + # model training and testing settings + train_cfg=dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + match_low_quality=True, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=-1, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + match_low_quality=True, + ignore_iof_thr=-1, + gpu_assign_thr=50), + sampler=dict( + type='OHEMSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False)), + test_cfg=dict( + rpn=dict( + nms_across_levels=False, + nms_pre=1000, + nms_post=1000, + max_per_img=1000, + nms=dict(type='nms', iou_threshold=0.7), + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_threshold=0.5), + max_per_img=100, + mask_thr_binary=0.5))) diff --git a/configs/_base_/det_models/panet_r18_fpem_ffm.py b/configs/_base_/det_models/panet_r18_fpem_ffm.py new file mode 100644 index 0000000000000000000000000000000000000000..a69a4d87603275bc1f89b5f58c722d79274e4fd7 --- /dev/null +++ b/configs/_base_/det_models/panet_r18_fpem_ffm.py @@ -0,0 +1,43 @@ +model_poly = dict( + type='PANet', + backbone=dict( + type='mmdet.ResNet', + depth=18, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type='SyncBN', requires_grad=True), + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'), + norm_eval=True, + style='caffe'), + neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]), + bbox_head=dict( + type='PANHead', + in_channels=[128, 128, 128, 128], + out_channels=6, + loss=dict(type='PANLoss'), + postprocessor=dict(type='PANPostprocessor', text_repr_type='poly')), + train_cfg=None, + test_cfg=None) + +model_quad = dict( + type='PANet', + backbone=dict( + type='mmdet.ResNet', + depth=18, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type='SyncBN', requires_grad=True), + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'), + norm_eval=True, + style='caffe'), + neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]), + bbox_head=dict( + type='PANHead', + in_channels=[128, 128, 128, 128], + out_channels=6, + loss=dict(type='PANLoss'), + postprocessor=dict(type='PANPostprocessor', text_repr_type='quad')), + train_cfg=None, + test_cfg=None) diff --git a/configs/_base_/det_models/panet_r50_fpem_ffm.py b/configs/_base_/det_models/panet_r50_fpem_ffm.py new file mode 100644 index 0000000000000000000000000000000000000000..4d8812532c73f8945097de8262b539d0109055df --- /dev/null +++ b/configs/_base_/det_models/panet_r50_fpem_ffm.py @@ -0,0 +1,21 @@ +model = dict( + type='PANet', + pretrained='torchvision://resnet50', + backbone=dict( + type='mmdet.ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + norm_cfg=dict(type='BN', requires_grad=True), + norm_eval=True, + style='caffe'), + neck=dict(type='FPEM_FFM', in_channels=[256, 512, 1024, 2048]), + bbox_head=dict( + type='PANHead', + in_channels=[128, 128, 128, 128], + out_channels=6, + loss=dict(type='PANLoss', speedup_bbox_thr=32), + postprocessor=dict(type='PANPostprocessor', text_repr_type='poly')), + train_cfg=None, + test_cfg=None) diff --git a/configs/_base_/det_models/psenet_r50_fpnf.py b/configs/_base_/det_models/psenet_r50_fpnf.py new file mode 100644 index 0000000000000000000000000000000000000000..a3aff0d1325d3b9e25b5ed095cea28d313f611a0 --- /dev/null +++ b/configs/_base_/det_models/psenet_r50_fpnf.py @@ -0,0 +1,51 @@ +model_poly = dict( + type='PSENet', + backbone=dict( + type='mmdet.ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type='SyncBN', requires_grad=True), + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + norm_eval=True, + style='caffe'), + neck=dict( + type='FPNF', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + fusion_type='concat'), + bbox_head=dict( + type='PSEHead', + in_channels=[256], + out_channels=7, + loss=dict(type='PSELoss'), + postprocessor=dict(type='PSEPostprocessor', text_repr_type='poly')), + train_cfg=None, + test_cfg=None) + +model_quad = dict( + type='PSENet', + backbone=dict( + type='mmdet.ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type='SyncBN', requires_grad=True), + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + norm_eval=True, + style='caffe'), + neck=dict( + type='FPNF', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + fusion_type='concat'), + bbox_head=dict( + type='PSEHead', + in_channels=[256], + out_channels=7, + loss=dict(type='PSELoss'), + postprocessor=dict(type='PSEPostprocessor', text_repr_type='quad')), + train_cfg=None, + test_cfg=None) diff --git a/configs/_base_/det_models/textsnake_r50_fpn_unet.py b/configs/_base_/det_models/textsnake_r50_fpn_unet.py new file mode 100644 index 0000000000000000000000000000000000000000..7d74f376b8c635451a3036e780ffc88e7640bf2c --- /dev/null +++ b/configs/_base_/det_models/textsnake_r50_fpn_unet.py @@ -0,0 +1,22 @@ +model = dict( + type='TextSnake', + backbone=dict( + type='mmdet.ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=-1, + norm_cfg=dict(type='BN', requires_grad=True), + init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), + norm_eval=True, + style='caffe'), + neck=dict( + type='FPN_UNet', in_channels=[256, 512, 1024, 2048], out_channels=32), + bbox_head=dict( + type='TextSnakeHead', + in_channels=32, + loss=dict(type='TextSnakeLoss'), + postprocessor=dict( + type='TextSnakePostprocessor', text_repr_type='poly')), + train_cfg=None, + test_cfg=None) diff --git a/configs/_base_/det_pipelines/dbnet_pipeline.py b/configs/_base_/det_pipelines/dbnet_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..40eee02db3b68d5682841532d1122c92bdca2a65 --- /dev/null +++ b/configs/_base_/det_pipelines/dbnet_pipeline.py @@ -0,0 +1,88 @@ +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +train_pipeline_r18 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='LoadTextAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5), + dict(type='Normalize', **img_norm_cfg), + dict( + type='ImgAug', + args=[['Fliplr', 0.5], + dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]), + dict(type='EastRandomCrop', target_size=(640, 640)), + dict(type='DBNetTargets', shrink_ratio=0.4), + dict(type='Pad', size_divisor=32), + dict( + type='CustomFormatBundle', + keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'], + visualize=dict(flag=False, boundary_key='gt_shrink')), + dict( + type='Collect', + keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask']) +] + +test_pipeline_1333_736 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 736), # used by Resize + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +# for dbnet_r50dcnv2_fpnc +img_norm_cfg_r50dcnv2 = dict( + mean=[122.67891434, 116.66876762, 104.00698793], + std=[58.395, 57.12, 57.375], + to_rgb=True) + +train_pipeline_r50dcnv2 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='LoadTextAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5), + dict(type='Normalize', **img_norm_cfg_r50dcnv2), + dict( + type='ImgAug', + args=[['Fliplr', 0.5], + dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]), + dict(type='EastRandomCrop', target_size=(640, 640)), + dict(type='DBNetTargets', shrink_ratio=0.4), + dict(type='Pad', size_divisor=32), + dict( + type='CustomFormatBundle', + keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'], + visualize=dict(flag=False, boundary_key='gt_shrink')), + dict( + type='Collect', + keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask']) +] + +test_pipeline_4068_1024 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='MultiScaleFlipAug', + img_scale=(4068, 1024), # used by Resize + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='Normalize', **img_norm_cfg_r50dcnv2), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] diff --git a/configs/_base_/det_pipelines/drrg_pipeline.py b/configs/_base_/det_pipelines/drrg_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..09189b51cda03d4557d58f5193366caeaf71bcc9 --- /dev/null +++ b/configs/_base_/det_pipelines/drrg_pipeline.py @@ -0,0 +1,60 @@ +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +train_pipeline = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='LoadTextAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomScaling', size=800, scale=(0.75, 2.5)), + dict( + type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2), + dict( + type='RandomCropPolyInstances', + instance_key='gt_masks', + crop_ratio=0.8, + min_side_ratio=0.3), + dict( + type='RandomRotatePolyInstances', + rotate_ratio=0.5, + max_angle=60, + pad_with_fixed_color=False), + dict(type='SquareResizePad', target_size=800, pad_ratio=0.6), + dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'), + dict(type='DRRGTargets'), + dict(type='Pad', size_divisor=32), + dict( + type='CustomFormatBundle', + keys=[ + 'gt_text_mask', 'gt_center_region_mask', 'gt_mask', + 'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map', + 'gt_cos_map', 'gt_comp_attribs' + ], + visualize=dict(flag=False, boundary_key='gt_text_mask')), + dict( + type='Collect', + keys=[ + 'img', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask', + 'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map', + 'gt_cos_map', 'gt_comp_attribs' + ]) +] + +test_pipeline = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='MultiScaleFlipAug', + img_scale=(1024, 640), # used by Resize + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] diff --git a/configs/_base_/det_pipelines/fcenet_pipeline.py b/configs/_base_/det_pipelines/fcenet_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..badb4536b10bd74760fdf519fe03f5c8d2bd7767 --- /dev/null +++ b/configs/_base_/det_pipelines/fcenet_pipeline.py @@ -0,0 +1,118 @@ +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +# for icdar2015 +leval_prop_range_icdar2015 = ((0, 0.4), (0.3, 0.7), (0.6, 1.0)) +train_pipeline_icdar2015 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='LoadTextAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict( + type='ColorJitter', + brightness=32.0 / 255, + saturation=0.5, + contrast=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomScaling', size=800, scale=(3. / 4, 5. / 2)), + dict( + type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2), + dict( + type='RandomCropPolyInstances', + instance_key='gt_masks', + crop_ratio=0.8, + min_side_ratio=0.3), + dict( + type='RandomRotatePolyInstances', + rotate_ratio=0.5, + max_angle=30, + pad_with_fixed_color=False), + dict(type='SquareResizePad', target_size=800, pad_ratio=0.6), + dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'), + dict(type='Pad', size_divisor=32), + dict( + type='FCENetTargets', + fourier_degree=5, + level_proportion_range=leval_prop_range_icdar2015), + dict( + type='CustomFormatBundle', + keys=['p3_maps', 'p4_maps', 'p5_maps'], + visualize=dict(flag=False, boundary_key=None)), + dict(type='Collect', keys=['img', 'p3_maps', 'p4_maps', 'p5_maps']) +] + +img_scale_icdar2015 = (2260, 2260) +test_pipeline_icdar2015 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale_icdar2015, # used by Resize + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +# for ctw1500 +leval_prop_range_ctw1500 = ((0, 0.25), (0.2, 0.65), (0.55, 1.0)) +train_pipeline_ctw1500 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='LoadTextAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict( + type='ColorJitter', + brightness=32.0 / 255, + saturation=0.5, + contrast=0.5), + dict(type='Normalize', **img_norm_cfg), + dict(type='RandomScaling', size=800, scale=(3. / 4, 5. / 2)), + dict( + type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2), + dict( + type='RandomCropPolyInstances', + instance_key='gt_masks', + crop_ratio=0.8, + min_side_ratio=0.3), + dict( + type='RandomRotatePolyInstances', + rotate_ratio=0.5, + max_angle=30, + pad_with_fixed_color=False), + dict(type='SquareResizePad', target_size=800, pad_ratio=0.6), + dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'), + dict(type='Pad', size_divisor=32), + dict( + type='FCENetTargets', + fourier_degree=5, + level_proportion_range=leval_prop_range_ctw1500), + dict( + type='CustomFormatBundle', + keys=['p3_maps', 'p4_maps', 'p5_maps'], + visualize=dict(flag=False, boundary_key=None)), + dict(type='Collect', keys=['img', 'p3_maps', 'p4_maps', 'p5_maps']) +] + +img_scale_ctw1500 = (1080, 736) +test_pipeline_ctw1500 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale_ctw1500, # used by Resize + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] diff --git a/configs/_base_/det_pipelines/maskrcnn_pipeline.py b/configs/_base_/det_pipelines/maskrcnn_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..fff3e071ea115843752f34de8141fa982b8ad14b --- /dev/null +++ b/configs/_base_/det_pipelines/maskrcnn_pipeline.py @@ -0,0 +1,57 @@ +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +train_pipeline = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict(type='LoadAnnotations', with_bbox=True, with_mask=True), + dict( + type='ScaleAspectJitter', + img_scale=None, + keep_ratio=False, + resize_type='indep_sample_in_range', + scale_range=(640, 2560)), + dict(type='RandomFlip', flip_ratio=0.5), + dict(type='Normalize', **img_norm_cfg), + dict( + type='RandomCropInstances', + target_size=(640, 640), + mask_type='union_all', + instance_key='gt_masks'), + dict(type='Pad', size_divisor=32), + dict(type='DefaultFormatBundle'), + dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), +] + +# for ctw1500 +img_scale_ctw1500 = (1600, 1600) +test_pipeline_ctw1500 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale_ctw1500, # used by Resize + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +# for icdar2015 +img_scale_icdar2015 = (1920, 1920) +test_pipeline_icdar2015 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale_icdar2015, # used by Resize + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='RandomFlip'), + dict(type='Normalize', **img_norm_cfg), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] diff --git a/configs/_base_/det_pipelines/panet_pipeline.py b/configs/_base_/det_pipelines/panet_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..eae50de4fab0536d114509854f9250c0d613cb3c --- /dev/null +++ b/configs/_base_/det_pipelines/panet_pipeline.py @@ -0,0 +1,156 @@ +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +# for ctw1500 +img_scale_train_ctw1500 = [(3000, 640)] +shrink_ratio_train_ctw1500 = (1.0, 0.7) +target_size_train_ctw1500 = (640, 640) +train_pipeline_ctw1500 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='LoadTextAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5), + dict(type='Normalize', **img_norm_cfg), + dict( + type='ScaleAspectJitter', + img_scale=img_scale_train_ctw1500, + ratio_range=(0.7, 1.3), + aspect_ratio_range=(0.9, 1.1), + multiscale_mode='value', + keep_ratio=False), + # shrink_ratio is from big to small. The 1st must be 1.0 + dict(type='PANetTargets', shrink_ratio=shrink_ratio_train_ctw1500), + dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'), + dict(type='RandomRotateTextDet'), + dict( + type='RandomCropInstances', + target_size=target_size_train_ctw1500, + instance_key='gt_kernels'), + dict(type='Pad', size_divisor=32), + dict( + type='CustomFormatBundle', + keys=['gt_kernels', 'gt_mask'], + visualize=dict(flag=False, boundary_key='gt_kernels')), + dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask']) +] + +img_scale_test_ctw1500 = (3000, 640) +test_pipeline_ctw1500 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale_test_ctw1500, # used by Resize + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +# for icdar2015 +img_scale_train_icdar2015 = [(3000, 736)] +shrink_ratio_train_icdar2015 = (1.0, 0.5) +target_size_train_icdar2015 = (736, 736) +train_pipeline_icdar2015 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='LoadTextAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5), + dict(type='Normalize', **img_norm_cfg), + dict( + type='ScaleAspectJitter', + img_scale=img_scale_train_icdar2015, + ratio_range=(0.7, 1.3), + aspect_ratio_range=(0.9, 1.1), + multiscale_mode='value', + keep_ratio=False), + dict(type='PANetTargets', shrink_ratio=shrink_ratio_train_icdar2015), + dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'), + dict(type='RandomRotateTextDet'), + dict( + type='RandomCropInstances', + target_size=target_size_train_icdar2015, + instance_key='gt_kernels'), + dict(type='Pad', size_divisor=32), + dict( + type='CustomFormatBundle', + keys=['gt_kernels', 'gt_mask'], + visualize=dict(flag=False, boundary_key='gt_kernels')), + dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask']) +] + +img_scale_test_icdar2015 = (1333, 736) +test_pipeline_icdar2015 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale_test_icdar2015, # used by Resize + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +# for icdar2017 +img_scale_train_icdar2017 = [(3000, 800)] +shrink_ratio_train_icdar2017 = (1.0, 0.5) +target_size_train_icdar2017 = (800, 800) +train_pipeline_icdar2017 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='LoadTextAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5), + dict(type='Normalize', **img_norm_cfg), + dict( + type='ScaleAspectJitter', + img_scale=img_scale_train_icdar2017, + ratio_range=(0.7, 1.3), + aspect_ratio_range=(0.9, 1.1), + multiscale_mode='value', + keep_ratio=False), + dict(type='PANetTargets', shrink_ratio=shrink_ratio_train_icdar2017), + dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'), + dict(type='RandomRotateTextDet'), + dict( + type='RandomCropInstances', + target_size=target_size_train_icdar2017, + instance_key='gt_kernels'), + dict(type='Pad', size_divisor=32), + dict( + type='CustomFormatBundle', + keys=['gt_kernels', 'gt_mask'], + visualize=dict(flag=False, boundary_key='gt_kernels')), + dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask']) +] + +img_scale_test_icdar2017 = (1333, 800) +test_pipeline_icdar2017 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale_test_icdar2017, # used by Resize + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] diff --git a/configs/_base_/det_pipelines/psenet_pipeline.py b/configs/_base_/det_pipelines/psenet_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..fd99dc3c2eb14921bbbf64ae861e5e5d6aa55c66 --- /dev/null +++ b/configs/_base_/det_pipelines/psenet_pipeline.py @@ -0,0 +1,70 @@ +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +train_pipeline = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='LoadTextAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5), + dict(type='Normalize', **img_norm_cfg), + dict( + type='ScaleAspectJitter', + img_scale=[(3000, 736)], + ratio_range=(0.5, 3), + aspect_ratio_range=(1, 1), + multiscale_mode='value', + long_size_bound=1280, + short_size_bound=640, + resize_type='long_short_bound', + keep_ratio=False), + dict(type='PSENetTargets'), + dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'), + dict(type='RandomRotateTextDet'), + dict( + type='RandomCropInstances', + target_size=(640, 640), + instance_key='gt_kernels'), + dict(type='Pad', size_divisor=32), + dict( + type='CustomFormatBundle', + keys=['gt_kernels', 'gt_mask'], + visualize=dict(flag=False, boundary_key='gt_kernels')), + dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask']) +] + +# for ctw1500 +img_scale_test_ctw1500 = (1280, 1280) +test_pipeline_ctw1500 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale_test_ctw1500, # used by Resize + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] + +# for icdar2015 +img_scale_test_icdar2015 = (2240, 2240) +test_pipeline_icdar2015 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='MultiScaleFlipAug', + img_scale=img_scale_test_icdar2015, # used by Resize + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] diff --git a/configs/_base_/det_pipelines/textsnake_pipeline.py b/configs/_base_/det_pipelines/textsnake_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..dc4b44819e5c3f3f725df096903fc0a809313913 --- /dev/null +++ b/configs/_base_/det_pipelines/textsnake_pipeline.py @@ -0,0 +1,65 @@ +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + +train_pipeline = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='LoadTextAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5), + dict(type='Normalize', **img_norm_cfg), + dict( + type='RandomCropPolyInstances', + instance_key='gt_masks', + crop_ratio=0.65, + min_side_ratio=0.3), + dict( + type='RandomRotatePolyInstances', + rotate_ratio=0.5, + max_angle=20, + pad_with_fixed_color=False), + dict( + type='ScaleAspectJitter', + img_scale=[(3000, 736)], # unused + ratio_range=(0.7, 1.3), + aspect_ratio_range=(0.9, 1.1), + multiscale_mode='value', + long_size_bound=800, + short_size_bound=480, + resize_type='long_short_bound', + keep_ratio=False), + dict(type='SquareResizePad', target_size=800, pad_ratio=0.6), + dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'), + dict(type='TextSnakeTargets'), + dict(type='Pad', size_divisor=32), + dict( + type='CustomFormatBundle', + keys=[ + 'gt_text_mask', 'gt_center_region_mask', 'gt_mask', + 'gt_radius_map', 'gt_sin_map', 'gt_cos_map' + ], + visualize=dict(flag=False, boundary_key='gt_text_mask')), + dict( + type='Collect', + keys=[ + 'img', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask', + 'gt_radius_map', 'gt_sin_map', 'gt_cos_map' + ]) +] + +test_pipeline = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='MultiScaleFlipAug', + img_scale=(1333, 736), # used by Resize + flip=False, + transforms=[ + dict(type='Resize', keep_ratio=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='ImageToTensor', keys=['img']), + dict(type='Collect', keys=['img']), + ]) +] diff --git a/configs/_base_/recog_datasets/MJ_train.py b/configs/_base_/recog_datasets/MJ_train.py new file mode 100644 index 0000000000000000000000000000000000000000..be42cc47035d02403a036330eb0af7d0058b8675 --- /dev/null +++ b/configs/_base_/recog_datasets/MJ_train.py @@ -0,0 +1,21 @@ +# Text Recognition Training set, including: +# Synthetic Datasets: Syn90k + +train_root = 'data/mixture/Syn90k' + +train_img_prefix = f'{train_root}/mnt/ramdisk/max/90kDICT32px' +train_ann_file = f'{train_root}/label.lmdb' + +train = dict( + type='OCRDataset', + img_prefix=train_img_prefix, + ann_file=train_ann_file, + loader=dict( + type='AnnFileLoader', + repeat=1, + file_format='lmdb', + parser=dict(type='LineJsonParser', keys=['filename', 'text'])), + pipeline=None, + test_mode=False) + +train_list = [train] diff --git a/configs/_base_/recog_datasets/ST_MJ_alphanumeric_train.py b/configs/_base_/recog_datasets/ST_MJ_alphanumeric_train.py new file mode 100644 index 0000000000000000000000000000000000000000..5fc1abac0a48b9deef3ac41353dc24d3748d2426 --- /dev/null +++ b/configs/_base_/recog_datasets/ST_MJ_alphanumeric_train.py @@ -0,0 +1,31 @@ +# Text Recognition Training set, including: +# Synthetic Datasets: SynthText, Syn90k +# Both annotations are filtered so that +# only alphanumeric terms are left + +train_root = 'data/mixture' + +train_img_prefix1 = f'{train_root}/Syn90k/mnt/ramdisk/max/90kDICT32px' +train_ann_file1 = f'{train_root}/Syn90k/label.lmdb' + +train1 = dict( + type='OCRDataset', + img_prefix=train_img_prefix1, + ann_file=train_ann_file1, + loader=dict( + type='AnnFileLoader', + repeat=1, + file_format='lmdb', + parser=dict(type='LineJsonParser', keys=['filename', 'text'])), + pipeline=None, + test_mode=False) + +train_img_prefix2 = f'{train_root}/SynthText/' + \ + 'synthtext/SynthText_patch_horizontal' +train_ann_file2 = f'{train_root}/SynthText/alphanumeric_label.lmdb' + +train2 = {key: value for key, value in train1.items()} +train2['img_prefix'] = train_img_prefix2 +train2['ann_file'] = train_ann_file2 + +train_list = [train1, train2] diff --git a/configs/_base_/recog_datasets/ST_MJ_train.py b/configs/_base_/recog_datasets/ST_MJ_train.py new file mode 100644 index 0000000000000000000000000000000000000000..aedccc9df54829c2b841ba47882dea0cbcc8b23d --- /dev/null +++ b/configs/_base_/recog_datasets/ST_MJ_train.py @@ -0,0 +1,29 @@ +# Text Recognition Training set, including: +# Synthetic Datasets: SynthText, Syn90k + +train_root = 'data/mixture' + +train_img_prefix1 = f'{train_root}/Syn90k/mnt/ramdisk/max/90kDICT32px' +train_ann_file1 = f'{train_root}/Syn90k/label.lmdb' + +train1 = dict( + type='OCRDataset', + img_prefix=train_img_prefix1, + ann_file=train_ann_file1, + loader=dict( + type='AnnFileLoader', + repeat=1, + file_format='lmdb', + parser=dict(type='LineJsonParser', keys=['filename', 'text'])), + pipeline=None, + test_mode=False) + +train_img_prefix2 = f'{train_root}/SynthText/' + \ + 'synthtext/SynthText_patch_horizontal' +train_ann_file2 = f'{train_root}/SynthText/label.lmdb' + +train2 = {key: value for key, value in train1.items()} +train2['img_prefix'] = train_img_prefix2 +train2['ann_file'] = train_ann_file2 + +train_list = [train1, train2] diff --git a/configs/_base_/recog_datasets/ST_SA_MJ_real_train.py b/configs/_base_/recog_datasets/ST_SA_MJ_real_train.py new file mode 100644 index 0000000000000000000000000000000000000000..87dab3352d92c3105684908f50b9b8f6bcc71a16 --- /dev/null +++ b/configs/_base_/recog_datasets/ST_SA_MJ_real_train.py @@ -0,0 +1,81 @@ +# Text Recognition Training set, including: +# Synthetic Datasets: SynthText, SynthAdd, Syn90k +# Real Dataset: IC11, IC13, IC15, COCO-Test, IIIT5k + +train_prefix = 'data/mixture' + +train_img_prefix1 = f'{train_prefix}/icdar_2011' +train_img_prefix2 = f'{train_prefix}/icdar_2013' +train_img_prefix3 = f'{train_prefix}/icdar_2015' +train_img_prefix4 = f'{train_prefix}/coco_text' +train_img_prefix5 = f'{train_prefix}/IIIT5K' +train_img_prefix6 = f'{train_prefix}/SynthText_Add' +train_img_prefix7 = f'{train_prefix}/SynthText' +train_img_prefix8 = f'{train_prefix}/Syn90k' + +train_ann_file1 = f'{train_prefix}/icdar_2011/train_label.txt', +train_ann_file2 = f'{train_prefix}/icdar_2013/train_label.txt', +train_ann_file3 = f'{train_prefix}/icdar_2015/train_label.txt', +train_ann_file4 = f'{train_prefix}/coco_text/train_label.txt', +train_ann_file5 = f'{train_prefix}/IIIT5K/train_label.txt', +train_ann_file6 = f'{train_prefix}/SynthText_Add/label.txt', +train_ann_file7 = f'{train_prefix}/SynthText/shuffle_labels.txt', +train_ann_file8 = f'{train_prefix}/Syn90k/shuffle_labels.txt' + +train1 = dict( + type='OCRDataset', + img_prefix=train_img_prefix1, + ann_file=train_ann_file1, + loader=dict( + type='AnnFileLoader', + repeat=20, + file_format='txt', + parser=dict( + type='LineStrParser', + keys=['filename', 'text'], + keys_idx=[0, 1], + separator=' ')), + pipeline=None, + test_mode=False) + +train2 = {key: value for key, value in train1.items()} +train2['img_prefix'] = train_img_prefix2 +train2['ann_file'] = train_ann_file2 + +train3 = {key: value for key, value in train1.items()} +train3['img_prefix'] = train_img_prefix3 +train3['ann_file'] = train_ann_file3 + +train4 = {key: value for key, value in train1.items()} +train4['img_prefix'] = train_img_prefix4 +train4['ann_file'] = train_ann_file4 + +train5 = {key: value for key, value in train1.items()} +train5['img_prefix'] = train_img_prefix5 +train5['ann_file'] = train_ann_file5 + +train6 = dict( + type='OCRDataset', + img_prefix=train_img_prefix6, + ann_file=train_ann_file6, + loader=dict( + type='AnnFileLoader', + repeat=1, + file_format='txt', + parser=dict( + type='LineStrParser', + keys=['filename', 'text'], + keys_idx=[0, 1], + separator=' ')), + pipeline=None, + test_mode=False) + +train7 = {key: value for key, value in train6.items()} +train7['img_prefix'] = train_img_prefix7 +train7['ann_file'] = train_ann_file7 + +train8 = {key: value for key, value in train6.items()} +train8['img_prefix'] = train_img_prefix8 +train8['ann_file'] = train_ann_file8 + +train_list = [train1, train2, train3, train4, train5, train6, train7, train8] diff --git a/configs/_base_/recog_datasets/ST_SA_MJ_train.py b/configs/_base_/recog_datasets/ST_SA_MJ_train.py new file mode 100644 index 0000000000000000000000000000000000000000..bc272bf9fad66ab89de3dd672618a7ae01c142f7 --- /dev/null +++ b/configs/_base_/recog_datasets/ST_SA_MJ_train.py @@ -0,0 +1,48 @@ +# Text Recognition Training set, including: +# Synthetic Datasets: SynthText, Syn90k + +train_root = 'data/mixture' + +train_img_prefix1 = f'{train_root}/Syn90k/mnt/ramdisk/max/90kDICT32px' +train_ann_file1 = f'{train_root}/Syn90k/label.lmdb' + +train1 = dict( + type='OCRDataset', + img_prefix=train_img_prefix1, + ann_file=train_ann_file1, + loader=dict( + type='AnnFileLoader', + repeat=1, + file_format='lmdb', + parser=dict(type='LineJsonParser', keys=['filename', 'text'])), + pipeline=None, + test_mode=False) + +train_img_prefix2 = f'{train_root}/SynthText/' + \ + 'synthtext/SynthText_patch_horizontal' +train_ann_file2 = f'{train_root}/SynthText/label.lmdb' + +train_img_prefix3 = f'{train_root}/SynthText_Add' +train_ann_file3 = f'{train_root}/SynthText_Add/label.txt' + +train2 = {key: value for key, value in train1.items()} +train2['img_prefix'] = train_img_prefix2 +train2['ann_file'] = train_ann_file2 + +train3 = dict( + type='OCRDataset', + img_prefix=train_img_prefix3, + ann_file=train_ann_file3, + loader=dict( + type='AnnFileLoader', + repeat=1, + file_format='txt', + parser=dict( + type='LineStrParser', + keys=['filename', 'text'], + keys_idx=[0, 1], + separator=' ')), + pipeline=None, + test_mode=False) + +train_list = [train1, train2, train3] diff --git a/configs/_base_/recog_datasets/ST_charbox_train.py b/configs/_base_/recog_datasets/ST_charbox_train.py new file mode 100644 index 0000000000000000000000000000000000000000..45d50d0d151fca5c4e9118d1f6b1f094f8a51324 --- /dev/null +++ b/configs/_base_/recog_datasets/ST_charbox_train.py @@ -0,0 +1,23 @@ +# Text Recognition Training set, including: +# Synthetic Datasets: SynthText (with character level boxes) + +train_img_root = 'data/mixture' + +train_img_prefix = f'{train_img_root}/SynthText' + +train_ann_file = f'{train_img_root}/SynthText/instances_train.txt' + +train = dict( + type='OCRSegDataset', + img_prefix=train_img_prefix, + ann_file=train_ann_file, + loader=dict( + type='AnnFileLoader', + repeat=1, + file_format='txt', + parser=dict( + type='LineJsonParser', keys=['file_name', 'annotations', 'text'])), + pipeline=None, + test_mode=False) + +train_list = [train] diff --git a/configs/_base_/recog_datasets/academic_test.py b/configs/_base_/recog_datasets/academic_test.py new file mode 100644 index 0000000000000000000000000000000000000000..888ab3d3be5b40e15596086d4af567bd37f6ec05 --- /dev/null +++ b/configs/_base_/recog_datasets/academic_test.py @@ -0,0 +1,57 @@ +# Text Recognition Testing set, including: +# Regular Datasets: IIIT5K, SVT, IC13 +# Irregular Datasets: IC15, SVTP, CT80 + +test_root = 'data/mixture' + +test_img_prefix1 = f'{test_root}/IIIT5K/' +test_img_prefix2 = f'{test_root}/svt/' +test_img_prefix3 = f'{test_root}/icdar_2013/' +test_img_prefix4 = f'{test_root}/icdar_2015/' +test_img_prefix5 = f'{test_root}/svtp/' +test_img_prefix6 = f'{test_root}/ct80/' + +test_ann_file1 = f'{test_root}/IIIT5K/test_label.txt' +test_ann_file2 = f'{test_root}/svt/test_label.txt' +test_ann_file3 = f'{test_root}/icdar_2013/test_label_1015.txt' +test_ann_file4 = f'{test_root}/icdar_2015/test_label.txt' +test_ann_file5 = f'{test_root}/svtp/test_label.txt' +test_ann_file6 = f'{test_root}/ct80/test_label.txt' + +test1 = dict( + type='OCRDataset', + img_prefix=test_img_prefix1, + ann_file=test_ann_file1, + loader=dict( + type='AnnFileLoader', + repeat=1, + file_format='txt', + parser=dict( + type='LineStrParser', + keys=['filename', 'text'], + keys_idx=[0, 1], + separator=' ')), + pipeline=None, + test_mode=True) + +test2 = {key: value for key, value in test1.items()} +test2['img_prefix'] = test_img_prefix2 +test2['ann_file'] = test_ann_file2 + +test3 = {key: value for key, value in test1.items()} +test3['img_prefix'] = test_img_prefix3 +test3['ann_file'] = test_ann_file3 + +test4 = {key: value for key, value in test1.items()} +test4['img_prefix'] = test_img_prefix4 +test4['ann_file'] = test_ann_file4 + +test5 = {key: value for key, value in test1.items()} +test5['img_prefix'] = test_img_prefix5 +test5['ann_file'] = test_ann_file5 + +test6 = {key: value for key, value in test1.items()} +test6['img_prefix'] = test_img_prefix6 +test6['ann_file'] = test_ann_file6 + +test_list = [test1, test2, test3, test4, test5, test6] diff --git a/configs/_base_/recog_datasets/seg_toy_data.py b/configs/_base_/recog_datasets/seg_toy_data.py new file mode 100644 index 0000000000000000000000000000000000000000..7f0b7d8f4c520ec7847d69743d8e430b8795b656 --- /dev/null +++ b/configs/_base_/recog_datasets/seg_toy_data.py @@ -0,0 +1,34 @@ +prefix = 'tests/data/ocr_char_ann_toy_dataset/' + +train = dict( + type='OCRSegDataset', + img_prefix=f'{prefix}/imgs', + ann_file=f'{prefix}/instances_train.txt', + loader=dict( + type='AnnFileLoader', + repeat=100, + file_format='txt', + parser=dict( + type='LineJsonParser', keys=['file_name', 'annotations', 'text'])), + pipeline=None, + test_mode=True) + +test = dict( + type='OCRDataset', + img_prefix=f'{prefix}/imgs', + ann_file=f'{prefix}/instances_test.txt', + loader=dict( + type='AnnFileLoader', + repeat=1, + file_format='txt', + parser=dict( + type='LineStrParser', + keys=['filename', 'text'], + keys_idx=[0, 1], + separator=' ')), + pipeline=None, + test_mode=True) + +train_list = [train] + +test_list = [test] diff --git a/configs/_base_/recog_datasets/toy_data.py b/configs/_base_/recog_datasets/toy_data.py new file mode 100644 index 0000000000000000000000000000000000000000..259f14943c027f2719ebf30858ee9572ff5584ea --- /dev/null +++ b/configs/_base_/recog_datasets/toy_data.py @@ -0,0 +1,54 @@ +dataset_type = 'OCRDataset' + +root = 'tests/data/ocr_toy_dataset' +img_prefix = f'{root}/imgs' +train_anno_file1 = f'{root}/label.txt' + +train1 = dict( + type=dataset_type, + img_prefix=img_prefix, + ann_file=train_anno_file1, + loader=dict( + type='AnnFileLoader', + repeat=100, + file_format='txt', + file_storage_backend='disk', + parser=dict( + type='LineStrParser', + keys=['filename', 'text'], + keys_idx=[0, 1], + separator=' ')), + pipeline=None, + test_mode=False) + +train_anno_file2 = f'{root}/label.lmdb' +train2 = dict( + type=dataset_type, + img_prefix=img_prefix, + ann_file=train_anno_file2, + loader=dict( + type='AnnFileLoader', + repeat=100, + file_format='lmdb', + file_storage_backend='disk', + parser=dict(type='LineJsonParser', keys=['filename', 'text'])), + pipeline=None, + test_mode=False) + +test_anno_file1 = f'{root}/label.lmdb' +test = dict( + type=dataset_type, + img_prefix=img_prefix, + ann_file=test_anno_file1, + loader=dict( + type='AnnFileLoader', + repeat=1, + file_format='lmdb', + file_storage_backend='disk', + parser=dict(type='LineJsonParser', keys=['filename', 'text'])), + pipeline=None, + test_mode=True) + +train_list = [train1, train2] + +test_list = [test] diff --git a/configs/_base_/recog_models/abinet.py b/configs/_base_/recog_models/abinet.py new file mode 100644 index 0000000000000000000000000000000000000000..19c6b66731f0b205741037ece8d6b49f91d0110b --- /dev/null +++ b/configs/_base_/recog_models/abinet.py @@ -0,0 +1,70 @@ +# num_chars depends on the configuration of label_convertor. The actual +# dictionary size is 36 + 1 (). +# TODO: Automatically update num_chars based on the configuration of +# label_convertor +num_chars = 37 +max_seq_len = 26 + +label_convertor = dict( + type='ABIConvertor', + dict_type='DICT36', + with_unknown=False, + with_padding=False, + lower=True, +) + +model = dict( + type='ABINet', + backbone=dict(type='ResNetABI'), + encoder=dict( + type='ABIVisionModel', + encoder=dict( + type='TransformerEncoder', + n_layers=3, + n_head=8, + d_model=512, + d_inner=2048, + dropout=0.1, + max_len=8 * 32, + ), + decoder=dict( + type='ABIVisionDecoder', + in_channels=512, + num_channels=64, + attn_height=8, + attn_width=32, + attn_mode='nearest', + use_result='feature', + num_chars=num_chars, + max_seq_len=max_seq_len, + init_cfg=dict(type='Xavier', layer='Conv2d')), + ), + decoder=dict( + type='ABILanguageDecoder', + d_model=512, + n_head=8, + d_inner=2048, + n_layers=4, + dropout=0.1, + detach_tokens=True, + use_self_attn=False, + pad_idx=num_chars - 1, + num_chars=num_chars, + max_seq_len=max_seq_len, + init_cfg=None), + fuser=dict( + type='ABIFuser', + d_model=512, + num_chars=num_chars, + init_cfg=None, + max_seq_len=max_seq_len, + ), + loss=dict( + type='ABILoss', + enc_weight=1.0, + dec_weight=1.0, + fusion_weight=1.0, + num_classes=num_chars), + label_convertor=label_convertor, + max_seq_len=max_seq_len, + iter_size=3) diff --git a/configs/_base_/recog_models/crnn.py b/configs/_base_/recog_models/crnn.py new file mode 100644 index 0000000000000000000000000000000000000000..b316c6a8a7f4f79c0cff3062583391b746f3cad8 --- /dev/null +++ b/configs/_base_/recog_models/crnn.py @@ -0,0 +1,12 @@ +label_convertor = dict( + type='CTCConvertor', dict_type='DICT36', with_unknown=False, lower=True) + +model = dict( + type='CRNNNet', + preprocessor=None, + backbone=dict(type='VeryDeepVgg', leaky_relu=False, input_channels=1), + encoder=None, + decoder=dict(type='CRNNDecoder', in_channels=512, rnn_flag=True), + loss=dict(type='CTCLoss'), + label_convertor=label_convertor, + pretrained=None) diff --git a/configs/_base_/recog_models/crnn_tps.py b/configs/_base_/recog_models/crnn_tps.py new file mode 100644 index 0000000000000000000000000000000000000000..9719eb3c521cee55beee1711a73bd29a07d10366 --- /dev/null +++ b/configs/_base_/recog_models/crnn_tps.py @@ -0,0 +1,18 @@ +# model +label_convertor = dict( + type='CTCConvertor', dict_type='DICT36', with_unknown=False, lower=True) + +model = dict( + type='CRNNNet', + preprocessor=dict( + type='TPSPreprocessor', + num_fiducial=20, + img_size=(32, 100), + rectified_img_size=(32, 100), + num_img_channel=1), + backbone=dict(type='VeryDeepVgg', leaky_relu=False, input_channels=1), + encoder=None, + decoder=dict(type='CRNNDecoder', in_channels=512, rnn_flag=True), + loss=dict(type='CTCLoss'), + label_convertor=label_convertor, + pretrained=None) diff --git a/configs/_base_/recog_models/master.py b/configs/_base_/recog_models/master.py new file mode 100644 index 0000000000000000000000000000000000000000..39eaef248e132f7ccd6675b63ba21ef41e350c3b --- /dev/null +++ b/configs/_base_/recog_models/master.py @@ -0,0 +1,61 @@ +label_convertor = dict( + type='AttnConvertor', dict_type='DICT90', with_unknown=True) + +model = dict( + type='MASTER', + backbone=dict( + type='ResNet', + in_channels=3, + stem_channels=[64, 128], + block_cfgs=dict( + type='BasicBlock', + plugins=dict( + cfg=dict( + type='GCAModule', + ratio=0.0625, + n_head=1, + pooling_type='att', + is_att_scale=False, + fusion_type='channel_add'), + position='after_conv2')), + arch_layers=[1, 2, 5, 3], + arch_channels=[256, 256, 512, 512], + strides=[1, 1, 1, 1], + plugins=[ + dict( + cfg=dict(type='Maxpool2d', kernel_size=2, stride=(2, 2)), + stages=(True, True, False, False), + position='before_stage'), + dict( + cfg=dict(type='Maxpool2d', kernel_size=(2, 1), stride=(2, 1)), + stages=(False, False, True, False), + position='before_stage'), + dict( + cfg=dict( + type='ConvModule', + kernel_size=3, + stride=1, + padding=1, + norm_cfg=dict(type='BN'), + act_cfg=dict(type='ReLU')), + stages=(True, True, True, True), + position='after_stage') + ], + init_cfg=[ + dict(type='Kaiming', layer='Conv2d'), + dict(type='Constant', val=1, layer='BatchNorm2d'), + ]), + encoder=None, + decoder=dict( + type='MasterDecoder', + d_model=512, + n_head=8, + attn_drop=0., + ffn_drop=0., + d_inner=2048, + n_layers=3, + feat_pe_drop=0.2, + feat_size=6 * 40), + loss=dict(type='TFLoss', reduction='mean'), + label_convertor=label_convertor, + max_seq_len=30) diff --git a/configs/_base_/recog_models/nrtr_modality_transform.py b/configs/_base_/recog_models/nrtr_modality_transform.py new file mode 100644 index 0000000000000000000000000000000000000000..3c2e87f4318959d3fb6c1c84c11360ff3dbd4eb1 --- /dev/null +++ b/configs/_base_/recog_models/nrtr_modality_transform.py @@ -0,0 +1,11 @@ +label_convertor = dict( + type='AttnConvertor', dict_type='DICT36', with_unknown=True, lower=True) + +model = dict( + type='NRTR', + backbone=dict(type='NRTRModalityTransform'), + encoder=dict(type='NRTREncoder', n_layers=12), + decoder=dict(type='NRTRDecoder'), + loss=dict(type='TFLoss'), + label_convertor=label_convertor, + max_seq_len=40) diff --git a/configs/_base_/recog_models/robust_scanner.py b/configs/_base_/recog_models/robust_scanner.py new file mode 100644 index 0000000000000000000000000000000000000000..4cc2fa108855a102e1f4e48b6f94bac3b7f7d644 --- /dev/null +++ b/configs/_base_/recog_models/robust_scanner.py @@ -0,0 +1,24 @@ +label_convertor = dict( + type='AttnConvertor', dict_type='DICT90', with_unknown=True) + +hybrid_decoder = dict(type='SequenceAttentionDecoder') + +position_decoder = dict(type='PositionAttentionDecoder') + +model = dict( + type='RobustScanner', + backbone=dict(type='ResNet31OCR'), + encoder=dict( + type='ChannelReductionEncoder', + in_channels=512, + out_channels=128, + ), + decoder=dict( + type='RobustScannerDecoder', + dim_input=512, + dim_model=128, + hybrid_decoder=hybrid_decoder, + position_decoder=position_decoder), + loss=dict(type='SARLoss'), + label_convertor=label_convertor, + max_seq_len=30) diff --git a/configs/_base_/recog_models/sar.py b/configs/_base_/recog_models/sar.py new file mode 100644 index 0000000000000000000000000000000000000000..8438d9b921f5124c52fcd9ff566e28cddeb33041 --- /dev/null +++ b/configs/_base_/recog_models/sar.py @@ -0,0 +1,24 @@ +label_convertor = dict( + type='AttnConvertor', dict_type='DICT90', with_unknown=True) + +model = dict( + type='SARNet', + backbone=dict(type='ResNet31OCR'), + encoder=dict( + type='SAREncoder', + enc_bi_rnn=False, + enc_do_rnn=0.1, + enc_gru=False, + ), + decoder=dict( + type='ParallelSARDecoder', + enc_bi_rnn=False, + dec_bi_rnn=False, + dec_do_rnn=0, + dec_gru=False, + pred_dropout=0.1, + d_k=512, + pred_concat=True), + loss=dict(type='SARLoss'), + label_convertor=label_convertor, + max_seq_len=30) diff --git a/configs/_base_/recog_models/satrn.py b/configs/_base_/recog_models/satrn.py new file mode 100644 index 0000000000000000000000000000000000000000..f7a6de8637c77a18a930e032bfb752434b173ba4 --- /dev/null +++ b/configs/_base_/recog_models/satrn.py @@ -0,0 +1,11 @@ +label_convertor = dict( + type='AttnConvertor', dict_type='DICT36', with_unknown=True, lower=True) + +model = dict( + type='SATRN', + backbone=dict(type='ShallowCNN'), + encoder=dict(type='SatrnEncoder'), + decoder=dict(type='TFDecoder'), + loss=dict(type='TFLoss'), + label_convertor=label_convertor, + max_seq_len=40) diff --git a/configs/_base_/recog_models/seg.py b/configs/_base_/recog_models/seg.py new file mode 100644 index 0000000000000000000000000000000000000000..291e547ff45de81ddd512bf04ce0af7957b89ae7 --- /dev/null +++ b/configs/_base_/recog_models/seg.py @@ -0,0 +1,21 @@ +label_convertor = dict( + type='SegConvertor', dict_type='DICT36', with_unknown=True, lower=True) + +model = dict( + type='SegRecognizer', + backbone=dict( + type='ResNet31OCR', + layers=[1, 2, 5, 3], + channels=[32, 64, 128, 256, 512, 512], + out_indices=[0, 1, 2, 3], + stage4_pool_cfg=dict(kernel_size=2, stride=2), + last_stage_pool=True), + neck=dict( + type='FPNOCR', in_channels=[128, 256, 512, 512], out_channels=256), + head=dict( + type='SegHead', + in_channels=256, + upsample_param=dict(scale_factor=2.0, mode='nearest')), + loss=dict( + type='SegLoss', seg_downsample_ratio=1.0, seg_with_loss_weight=True), + label_convertor=label_convertor) diff --git a/configs/_base_/recog_pipelines/abinet_pipeline.py b/configs/_base_/recog_pipelines/abinet_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..3a54dfe6a8c310ab74f9a01b4671d7288436d0a7 --- /dev/null +++ b/configs/_base_/recog_pipelines/abinet_pipeline.py @@ -0,0 +1,96 @@ +img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='ResizeOCR', + height=32, + min_width=128, + max_width=128, + keep_aspect_ratio=False, + width_downsample_ratio=0.25), + dict( + type='RandomWrapper', + p=0.5, + transforms=[ + dict( + type='OneOfWrapper', + transforms=[ + dict( + type='RandomRotateTextDet', + max_angle=15, + ), + dict( + type='TorchVisionWrapper', + op='RandomAffine', + degrees=15, + translate=(0.3, 0.3), + scale=(0.5, 2.), + shear=(-45, 45), + ), + dict( + type='TorchVisionWrapper', + op='RandomPerspective', + distortion_scale=0.5, + p=1, + ), + ]) + ], + ), + dict( + type='RandomWrapper', + p=0.25, + transforms=[ + dict(type='PyramidRescale'), + dict( + type='Albu', + transforms=[ + dict(type='GaussNoise', var_limit=(20, 20), p=0.5), + dict(type='MotionBlur', blur_limit=6, p=0.5), + ]), + ]), + dict( + type='RandomWrapper', + p=0.25, + transforms=[ + dict( + type='TorchVisionWrapper', + op='ColorJitter', + brightness=0.5, + saturation=0.5, + contrast=0.5, + hue=0.1), + ]), + dict(type='ToTensorOCR'), + dict(type='NormalizeOCR', **img_norm_cfg), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio', + 'resize_shape' + ]), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiRotateAugOCR', + rotate_degrees=[0, 90, 270], + transforms=[ + dict( + type='ResizeOCR', + height=32, + min_width=128, + max_width=128, + keep_aspect_ratio=False, + width_downsample_ratio=0.25), + dict(type='ToTensorOCR'), + dict(type='NormalizeOCR', **img_norm_cfg), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'ori_shape', 'img_shape', 'valid_ratio', + 'resize_shape', 'img_norm_cfg', 'ori_filename' + ]), + ]) +] diff --git a/configs/_base_/recog_pipelines/crnn_pipeline.py b/configs/_base_/recog_pipelines/crnn_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..3173eac695d40ac95e9929896cf82c753624b073 --- /dev/null +++ b/configs/_base_/recog_pipelines/crnn_pipeline.py @@ -0,0 +1,35 @@ +img_norm_cfg = dict(mean=[127], std=[127]) + +train_pipeline = [ + dict(type='LoadImageFromFile', color_type='grayscale'), + dict( + type='ResizeOCR', + height=32, + min_width=100, + max_width=100, + keep_aspect_ratio=False), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img'], + meta_keys=['filename', 'resize_shape', 'text', 'valid_ratio']), +] +test_pipeline = [ + dict(type='LoadImageFromFile', color_type='grayscale'), + dict( + type='ResizeOCR', + height=32, + min_width=32, + max_width=None, + keep_aspect_ratio=True), + dict(type='Normalize', **img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'resize_shape', 'valid_ratio', 'img_norm_cfg', + 'ori_filename', 'img_shape', 'ori_shape' + ]), +] diff --git a/configs/_base_/recog_pipelines/crnn_tps_pipeline.py b/configs/_base_/recog_pipelines/crnn_tps_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..3a2eea55a739206c11ae876ba82e9c2f6ea1ff6d --- /dev/null +++ b/configs/_base_/recog_pipelines/crnn_tps_pipeline.py @@ -0,0 +1,37 @@ +img_norm_cfg = dict(mean=[0.5], std=[0.5]) + +train_pipeline = [ + dict(type='LoadImageFromFile', color_type='grayscale'), + dict( + type='ResizeOCR', + height=32, + min_width=100, + max_width=100, + keep_aspect_ratio=False), + dict(type='ToTensorOCR'), + dict(type='NormalizeOCR', **img_norm_cfg), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'ori_shape', 'resize_shape', 'text', 'valid_ratio' + ]), +] +test_pipeline = [ + dict(type='LoadImageFromFile', color_type='grayscale'), + dict( + type='ResizeOCR', + height=32, + min_width=32, + max_width=100, + keep_aspect_ratio=False), + dict(type='ToTensorOCR'), + dict(type='NormalizeOCR', **img_norm_cfg), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'ori_shape', 'resize_shape', 'valid_ratio', + 'img_norm_cfg', 'ori_filename', 'img_shape' + ]), +] diff --git a/configs/_base_/recog_pipelines/master_pipeline.py b/configs/_base_/recog_pipelines/master_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..2071df4f665932dacd4a827e418603996fb562c8 --- /dev/null +++ b/configs/_base_/recog_pipelines/master_pipeline.py @@ -0,0 +1,42 @@ +img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='ResizeOCR', + height=48, + min_width=48, + max_width=160, + keep_aspect_ratio=True), + dict(type='ToTensorOCR'), + dict(type='NormalizeOCR', **img_norm_cfg), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio', + 'resize_shape' + ]), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiRotateAugOCR', + rotate_degrees=[0, 90, 270], + transforms=[ + dict( + type='ResizeOCR', + height=48, + min_width=48, + max_width=160, + keep_aspect_ratio=True), + dict(type='ToTensorOCR'), + dict(type='NormalizeOCR', **img_norm_cfg), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'ori_shape', 'img_shape', 'valid_ratio', + 'img_norm_cfg', 'ori_filename', 'resize_shape' + ]), + ]) +] diff --git a/configs/_base_/recog_pipelines/nrtr_pipeline.py b/configs/_base_/recog_pipelines/nrtr_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..71a19804309aa6692970b5eef642eddf87770559 --- /dev/null +++ b/configs/_base_/recog_pipelines/nrtr_pipeline.py @@ -0,0 +1,38 @@ +img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='ResizeOCR', + height=32, + min_width=32, + max_width=160, + keep_aspect_ratio=True, + width_downsample_ratio=0.25), + dict(type='ToTensorOCR'), + dict(type='NormalizeOCR', **img_norm_cfg), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'ori_shape', 'resize_shape', 'text', 'valid_ratio' + ]), +] + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='ResizeOCR', + height=32, + min_width=32, + max_width=160, + keep_aspect_ratio=True), + dict(type='ToTensorOCR'), + dict(type='NormalizeOCR', **img_norm_cfg), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'ori_shape', 'resize_shape', 'valid_ratio', + 'img_norm_cfg', 'ori_filename', 'img_shape' + ]) +] diff --git a/configs/_base_/recog_pipelines/sar_pipeline.py b/configs/_base_/recog_pipelines/sar_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..f43ded30f5b7fb54c302a442483b07ca8bf8af69 --- /dev/null +++ b/configs/_base_/recog_pipelines/sar_pipeline.py @@ -0,0 +1,43 @@ +img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='ResizeOCR', + height=48, + min_width=48, + max_width=160, + keep_aspect_ratio=True, + width_downsample_ratio=0.25), + dict(type='ToTensorOCR'), + dict(type='NormalizeOCR', **img_norm_cfg), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'ori_shape', 'resize_shape', 'text', 'valid_ratio' + ]), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiRotateAugOCR', + rotate_degrees=[0, 90, 270], + transforms=[ + dict( + type='ResizeOCR', + height=48, + min_width=48, + max_width=160, + keep_aspect_ratio=True, + width_downsample_ratio=0.25), + dict(type='ToTensorOCR'), + dict(type='NormalizeOCR', **img_norm_cfg), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'ori_shape', 'resize_shape', 'valid_ratio', + 'img_norm_cfg', 'ori_filename', 'img_shape' + ]), + ]) +] diff --git a/configs/_base_/recog_pipelines/satrn_pipeline.py b/configs/_base_/recog_pipelines/satrn_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..f191c5235a08eeae7d1e61002c00eccbdac39ed4 --- /dev/null +++ b/configs/_base_/recog_pipelines/satrn_pipeline.py @@ -0,0 +1,44 @@ +img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='ResizeOCR', + height=32, + min_width=100, + max_width=100, + keep_aspect_ratio=False, + width_downsample_ratio=0.25), + dict(type='ToTensorOCR'), + dict(type='NormalizeOCR', **img_norm_cfg), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio', + 'resize_shape' + ]), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiRotateAugOCR', + rotate_degrees=[0, 90, 270], + transforms=[ + dict( + type='ResizeOCR', + height=32, + min_width=100, + max_width=100, + keep_aspect_ratio=False, + width_downsample_ratio=0.25), + dict(type='ToTensorOCR'), + dict(type='NormalizeOCR', **img_norm_cfg), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'ori_shape', 'img_shape', 'valid_ratio', + 'resize_shape', 'img_norm_cfg', 'ori_filename' + ]), + ]) +] diff --git a/configs/_base_/recog_pipelines/seg_pipeline.py b/configs/_base_/recog_pipelines/seg_pipeline.py new file mode 100644 index 0000000000000000000000000000000000000000..378474dfb5341ec93e73bb61047c43ba72d5e127 --- /dev/null +++ b/configs/_base_/recog_pipelines/seg_pipeline.py @@ -0,0 +1,66 @@ +img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) + +gt_label_convertor = dict( + type='SegConvertor', dict_type='DICT36', with_unknown=True, lower=True) + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='RandomPaddingOCR', + max_ratio=[0.15, 0.2, 0.15, 0.2], + box_type='char_quads'), + dict(type='OpencvToPil'), + dict( + type='RandomRotateImageBox', + min_angle=-17, + max_angle=17, + box_type='char_quads'), + dict(type='PilToOpencv'), + dict( + type='ResizeOCR', + height=64, + min_width=64, + max_width=512, + keep_aspect_ratio=True), + dict( + type='OCRSegTargets', + label_convertor=gt_label_convertor, + box_type='char_quads'), + dict(type='RandomRotateTextDet', rotate_ratio=0.5, max_angle=15), + dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4), + dict(type='ToTensorOCR'), + dict(type='FancyPCA'), + dict(type='NormalizeOCR', **img_norm_cfg), + dict( + type='CustomFormatBundle', + keys=['gt_kernels'], + visualize=dict(flag=False, boundary_key=None), + call_super=False), + dict( + type='Collect', + keys=['img', 'gt_kernels'], + meta_keys=['filename', 'ori_shape', 'resize_shape']) +] + +test_img_norm_cfg = dict( + mean=[x * 255 for x in img_norm_cfg['mean']], + std=[x * 255 for x in img_norm_cfg['std']]) + +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='ResizeOCR', + height=64, + min_width=64, + max_width=None, + keep_aspect_ratio=True), + dict(type='Normalize', **test_img_norm_cfg), + dict(type='DefaultFormatBundle'), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'resize_shape', 'img_norm_cfg', 'ori_filename', + 'img_shape', 'ori_shape' + ]) +] diff --git a/configs/_base_/schedules/schedule_adadelta_18e.py b/configs/_base_/schedules/schedule_adadelta_18e.py new file mode 100644 index 0000000000000000000000000000000000000000..33f7960c51bf7d0f2b5bc03e8707a85a01e000fd --- /dev/null +++ b/configs/_base_/schedules/schedule_adadelta_18e.py @@ -0,0 +1,8 @@ +# optimizer +optimizer = dict(type='Adadelta', lr=0.5) +optimizer_config = dict(grad_clip=dict(max_norm=0.5)) +# learning policy +lr_config = dict(policy='step', step=[8, 14, 16]) +# running settings +runner = dict(type='EpochBasedRunner', max_epochs=18) +checkpoint_config = dict(interval=1) diff --git a/configs/_base_/schedules/schedule_adadelta_5e.py b/configs/_base_/schedules/schedule_adadelta_5e.py new file mode 100644 index 0000000000000000000000000000000000000000..ad996d65f8aca131023d34712e2d960bf6928cce --- /dev/null +++ b/configs/_base_/schedules/schedule_adadelta_5e.py @@ -0,0 +1,8 @@ +# optimizer +optimizer = dict(type='Adadelta', lr=1.0) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy='step', step=[]) +# running settings +runner = dict(type='EpochBasedRunner', max_epochs=5) +checkpoint_config = dict(interval=1) diff --git a/configs/_base_/schedules/schedule_adam_600e.py b/configs/_base_/schedules/schedule_adam_600e.py new file mode 100644 index 0000000000000000000000000000000000000000..a77dc52004ba597b4ba7f2df13a96e123c4029ab --- /dev/null +++ b/configs/_base_/schedules/schedule_adam_600e.py @@ -0,0 +1,8 @@ +# optimizer +optimizer = dict(type='Adam', lr=1e-3) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy='poly', power=0.9) +# running settings +runner = dict(type='EpochBasedRunner', max_epochs=600) +checkpoint_config = dict(interval=100) diff --git a/configs/_base_/schedules/schedule_adam_step_12e.py b/configs/_base_/schedules/schedule_adam_step_12e.py new file mode 100644 index 0000000000000000000000000000000000000000..c92289d3b7a69015afc51c9a248744bae5ec9197 --- /dev/null +++ b/configs/_base_/schedules/schedule_adam_step_12e.py @@ -0,0 +1,12 @@ +# optimizer +optimizer = dict(type='Adam', lr=4e-4) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=100, + warmup_ratio=1.0 / 3, + step=[11]) +runner = dict(type='EpochBasedRunner', max_epochs=12) +checkpoint_config = dict(interval=1) diff --git a/configs/_base_/schedules/schedule_adam_step_20e.py b/configs/_base_/schedules/schedule_adam_step_20e.py new file mode 100644 index 0000000000000000000000000000000000000000..81fb92cb4a35491493a4a76e22c86c5b804ec329 --- /dev/null +++ b/configs/_base_/schedules/schedule_adam_step_20e.py @@ -0,0 +1,14 @@ +# optimizer +optimizer = dict(type='Adam', lr=1e-4) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + step=[16, 18], + warmup='linear', + warmup_iters=1, + warmup_ratio=0.001, + warmup_by_epoch=True) +# running settings +runner = dict(type='EpochBasedRunner', max_epochs=20) +checkpoint_config = dict(interval=1) diff --git a/configs/_base_/schedules/schedule_adam_step_5e.py b/configs/_base_/schedules/schedule_adam_step_5e.py new file mode 100644 index 0000000000000000000000000000000000000000..371a3781bfe51ab0b9d841a3911bfe00c4e85197 --- /dev/null +++ b/configs/_base_/schedules/schedule_adam_step_5e.py @@ -0,0 +1,8 @@ +# optimizer +optimizer = dict(type='Adam', lr=1e-3) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy='step', step=[3, 4]) +# running settings +runner = dict(type='EpochBasedRunner', max_epochs=5) +checkpoint_config = dict(interval=1) diff --git a/configs/_base_/schedules/schedule_adam_step_600e.py b/configs/_base_/schedules/schedule_adam_step_600e.py new file mode 100644 index 0000000000000000000000000000000000000000..5daa2d4cf5ee79e48de7d984fcfdbc336f885a96 --- /dev/null +++ b/configs/_base_/schedules/schedule_adam_step_600e.py @@ -0,0 +1,8 @@ +# optimizer +optimizer = dict(type='Adam', lr=1e-4) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy='step', step=[200, 400]) +# running settings +runner = dict(type='EpochBasedRunner', max_epochs=600) +checkpoint_config = dict(interval=100) diff --git a/configs/_base_/schedules/schedule_adam_step_6e.py b/configs/_base_/schedules/schedule_adam_step_6e.py new file mode 100644 index 0000000000000000000000000000000000000000..5b33a2f924e502fc3a7f53f080a43fae983bb00c --- /dev/null +++ b/configs/_base_/schedules/schedule_adam_step_6e.py @@ -0,0 +1,8 @@ +# optimizer +optimizer = dict(type='Adam', lr=1e-3) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy='step', step=[3, 4]) +# running settings +runner = dict(type='EpochBasedRunner', max_epochs=6) +checkpoint_config = dict(interval=1) diff --git a/configs/_base_/schedules/schedule_sgd_100k_iters.py b/configs/_base_/schedules/schedule_sgd_100k_iters.py new file mode 100644 index 0000000000000000000000000000000000000000..df2a3300f057145757b5164ec062b58e9d2f96c6 --- /dev/null +++ b/configs/_base_/schedules/schedule_sgd_100k_iters.py @@ -0,0 +1,8 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-7, by_epoch=False) +# running settings +runner = dict(type='IterBasedRunner', max_iters=100000) +checkpoint_config = dict(interval=10000) diff --git a/configs/_base_/schedules/schedule_sgd_1200e.py b/configs/_base_/schedules/schedule_sgd_1200e.py new file mode 100644 index 0000000000000000000000000000000000000000..bc7fbf69b42b11ea9b8ae4d14216d2fcf20e717c --- /dev/null +++ b/configs/_base_/schedules/schedule_sgd_1200e.py @@ -0,0 +1,8 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-7, by_epoch=True) +# running settings +runner = dict(type='EpochBasedRunner', max_epochs=1200) +checkpoint_config = dict(interval=100) diff --git a/configs/_base_/schedules/schedule_sgd_1500e.py b/configs/_base_/schedules/schedule_sgd_1500e.py new file mode 100644 index 0000000000000000000000000000000000000000..3368175eceafdd019087461c51643a08e2e06d95 --- /dev/null +++ b/configs/_base_/schedules/schedule_sgd_1500e.py @@ -0,0 +1,8 @@ +# optimizer +optimizer = dict(type='SGD', lr=1e-3, momentum=0.90, weight_decay=5e-4) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy='poly', power=0.9, min_lr=1e-7, by_epoch=True) +# running settings +runner = dict(type='EpochBasedRunner', max_epochs=1500) +checkpoint_config = dict(interval=100) diff --git a/configs/_base_/schedules/schedule_sgd_160e.py b/configs/_base_/schedules/schedule_sgd_160e.py new file mode 100644 index 0000000000000000000000000000000000000000..985b8f63b3cb34f04ff55b298b44a53568a50ae8 --- /dev/null +++ b/configs/_base_/schedules/schedule_sgd_160e.py @@ -0,0 +1,13 @@ +# optimizer +optimizer = dict(type='SGD', lr=0.08, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=0.001, + step=[80, 128]) +# running settings +runner = dict(type='EpochBasedRunner', max_epochs=160) +checkpoint_config = dict(interval=10) diff --git a/configs/_base_/schedules/schedule_sgd_600e.py b/configs/_base_/schedules/schedule_sgd_600e.py new file mode 100644 index 0000000000000000000000000000000000000000..ed57b422ded5d302f758ff570187e7b1db809adf --- /dev/null +++ b/configs/_base_/schedules/schedule_sgd_600e.py @@ -0,0 +1,8 @@ +# optimizer +optimizer = dict(type='SGD', lr=1e-3, momentum=0.99, weight_decay=5e-4) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy='step', step=[200, 400]) +# running settings +runner = dict(type='EpochBasedRunner', max_epochs=600) +checkpoint_config = dict(interval=100) diff --git a/configs/kie/sdmgr/README.md b/configs/kie/sdmgr/README.md new file mode 100644 index 0000000000000000000000000000000000000000..645696b75c76e496c394a8f6773a8fa8a0d939da --- /dev/null +++ b/configs/kie/sdmgr/README.md @@ -0,0 +1,52 @@ +# SDMGR + +> [Spatial Dual-Modality Graph Reasoning for Key Information Extraction](https://arxiv.org/abs/2103.14470) + + + +## Abstract + +Key information extraction from document images is of paramount importance in office automation. Conventional template matching based approaches fail to generalize well to document images of unseen templates, and are not robust against text recognition errors. In this paper, we propose an end-to-end Spatial Dual-Modality Graph Reasoning method (SDMG-R) to extract key information from unstructured document images. We model document images as dual-modality graphs, nodes of which encode both the visual and textual features of detected text regions, and edges of which represent the spatial relations between neighboring text regions. The key information extraction is solved by iteratively propagating messages along graph edges and reasoning the categories of graph nodes. In order to roundly evaluate our proposed method as well as boost the future research, we release a new dataset named WildReceipt, which is collected and annotated tailored for the evaluation of key information extraction from document images of unseen templates in the wild. It contains 25 key information categories, a total of about 69000 text boxes, and is about 2 times larger than the existing public datasets. Extensive experiments validate that all information including visual features, textual features and spatial relations can benefit key information extraction. It has been shown that SDMG-R can effectively extract key information from document images of unseen templates, and obtain new state-of-the-art results on the recent popular benchmark SROIE and our WildReceipt. Our code and dataset will be publicly released. + +
+ +
+ +## Results and models + +### WildReceipt + +| Method | Modality | Macro F1-Score | Download | +| :--------------------------------------------------------------------: | :--------------: | :------------: | :--------------------------------------------------------------------------------------------------: | +| [sdmgr_unet16](/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py) | Visual + Textual | 0.888 | [model](https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_unet16_60e_wildreceipt_20210520-7489e6de.pth) \| [log](https://download.openmmlab.com/mmocr/kie/sdmgr/20210520_132236.log.json) | +| [sdmgr_novisual](/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py) | Textual | 0.870 | [model](https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_novisual_60e_wildreceipt_20210517-a44850da.pth) \| [log](https://download.openmmlab.com/mmocr/kie/sdmgr/20210517_205829.log.json) | + +```{note} +1. For `sdmgr_novisual`, images are not needed for training and testing. So fake `img_prefix` can be used in configs. As well, fake `file_name` can be used in annotation files. +``` + +### WildReceiptOpenset + +| Method | Modality | Edge F1-Score | Node Macro F1-Score | Node Micro F1-Score | Download | +| :-------------------------------------------------------------------: | :------: | :-----------: | :-----------------: | :-----------------: | :----------------------------------------------------------------------: | +| [sdmgr_novisual](/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt_openset.py) | Textual | 0.786 | 0.926 | 0.935 | [model](https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_novisual_60e_wildreceipt_openset_20210917-d236b3ea.pth) \| [log](https://download.openmmlab.com/mmocr/kie/sdmgr/20210917_050824.log.json) | + +```{note} +1. In the case of openset, the number of node categories is unknown or unfixed, and more node category can be added. +2. To show that our method can handle openset problem, we modify the ground truth of `WildReceipt` to `WildReceiptOpenset`. The `nodes` are just classified into 4 classes: `background, key, value, others`, while adding `edge` labels for each box. +3. The model is used to predict whether two nodes are a pair connecting by a valid edge. +4. You can learn more about the key differences between CloseSet and OpenSet annotations in our [tutorial](tutorials/kie_closeset_openset.md). +``` + +## Citation + +```bibtex +@misc{sun2021spatial, + title={Spatial Dual-Modality Graph Reasoning for Key Information Extraction}, + author={Hongbin Sun and Zhanghui Kuang and Xiaoyu Yue and Chenhao Lin and Wayne Zhang}, + year={2021}, + eprint={2103.14470}, + archivePrefix={arXiv}, + primaryClass={cs.CV} +} +``` diff --git a/configs/kie/sdmgr/metafile.yml b/configs/kie/sdmgr/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..f1a9695991156ae658e40f1aa2ab1dba06da2e9c --- /dev/null +++ b/configs/kie/sdmgr/metafile.yml @@ -0,0 +1,39 @@ +Collections: +- Name: SDMGR + Metadata: + Training Data: KIEDataset + Training Techniques: + - Adam + Training Resources: 1x GeForce GTX 1080 Ti + Architecture: + - UNet + - SDMGRHead + Paper: + URL: https://arxiv.org/abs/2103.14470.pdf + Title: 'Spatial Dual-Modality Graph Reasoning for Key Information Extraction' + README: configs/kie/sdmgr/README.md + +Models: + - Name: sdmgr_unet16_60e_wildreceipt + In Collection: SDMGR + Config: configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py + Metadata: + Training Data: wildreceipt + Results: + - Task: Key Information Extraction + Dataset: wildreceipt + Metrics: + macro_f1: 0.876 + Weights: https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_unet16_60e_wildreceipt_20210405-16a47642.pth + + - Name: sdmgr_novisual_60e_wildreceipt + In Collection: SDMGR + Config: configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py + Metadata: + Training Data: wildreceipt + Results: + - Task: Key Information Extraction + Dataset: wildreceipt + Metrics: + macro_f1: 0.864 + Weights: https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_novisual_60e_wildreceipt_20210405-07bc26ad.pth diff --git a/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py b/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py new file mode 100644 index 0000000000000000000000000000000000000000..220135a0b037909599fbaf77c75b06f48f8b1ba7 --- /dev/null +++ b/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py @@ -0,0 +1,98 @@ +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +max_scale, min_scale = 1024, 512 + +train_pipeline = [ + dict(type='LoadAnnotations'), + dict( + type='ResizeNoImg', img_scale=(max_scale, min_scale), keep_ratio=True), + dict(type='KIEFormatBundle'), + dict( + type='Collect', + keys=['img', 'relations', 'texts', 'gt_bboxes', 'gt_labels'], + meta_keys=('filename', 'ori_texts')) +] +test_pipeline = [ + dict(type='LoadAnnotations'), + dict( + type='ResizeNoImg', img_scale=(max_scale, min_scale), keep_ratio=True), + dict(type='KIEFormatBundle'), + dict( + type='Collect', + keys=['img', 'relations', 'texts', 'gt_bboxes'], + meta_keys=('filename', 'ori_texts', 'img_norm_cfg', 'ori_filename', + 'img_shape')) +] + +dataset_type = 'KIEDataset' +data_root = 'data/wildreceipt' + +loader = dict( + type='HardDiskLoader', + repeat=1, + parser=dict( + type='LineJsonParser', + keys=['file_name', 'height', 'width', 'annotations'])) + +train = dict( + type=dataset_type, + ann_file=f'{data_root}/train.txt', + pipeline=train_pipeline, + img_prefix=data_root, + loader=loader, + dict_file=f'{data_root}/dict.txt', + test_mode=False) +test = dict( + type=dataset_type, + ann_file=f'{data_root}/test.txt', + pipeline=test_pipeline, + img_prefix=data_root, + loader=loader, + dict_file=f'{data_root}/dict.txt', + test_mode=True) + +data = dict( + samples_per_gpu=4, + workers_per_gpu=1, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=train, + val=test, + test=test) + +evaluation = dict( + interval=1, + metric='macro_f1', + metric_options=dict( + macro_f1=dict( + ignores=[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 25]))) + +model = dict( + type='SDMGR', + backbone=dict(type='UNet', base_channels=16), + bbox_head=dict( + type='SDMGRHead', visual_dim=16, num_chars=92, num_classes=26), + visual_modality=False, + train_cfg=None, + test_cfg=None, + class_list=f'{data_root}/class_list.txt') + +optimizer = dict(type='Adam', weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1, + warmup_ratio=1, + step=[40, 50]) +total_epochs = 60 + +checkpoint_config = dict(interval=1) +log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')]) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] + +find_unused_parameters = True diff --git a/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt_openset.py b/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt_openset.py new file mode 100644 index 0000000000000000000000000000000000000000..b295bd13c3b098d4e8e25b1e7fc42c4bbf5ff5f1 --- /dev/null +++ b/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt_openset.py @@ -0,0 +1,84 @@ +_base_ = ['../../_base_/default_runtime.py'] + +model = dict( + type='SDMGR', + backbone=dict(type='UNet', base_channels=16), + bbox_head=dict( + type='SDMGRHead', visual_dim=16, num_chars=92, num_classes=4), + visual_modality=False, + train_cfg=None, + test_cfg=None, + class_list=None, + openset=True) + +optimizer = dict(type='Adam', weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1, + warmup_ratio=1, + step=[40, 50]) +total_epochs = 60 + +train_pipeline = [ + dict(type='LoadAnnotations'), + dict(type='ResizeNoImg', img_scale=(1024, 512), keep_ratio=True), + dict(type='KIEFormatBundle'), + dict( + type='Collect', + keys=['img', 'relations', 'texts', 'gt_bboxes', 'gt_labels'], + meta_keys=('filename', 'ori_filename', 'ori_texts')) +] +test_pipeline = [ + dict(type='LoadAnnotations'), + dict(type='ResizeNoImg', img_scale=(1024, 512), keep_ratio=True), + dict(type='KIEFormatBundle'), + dict( + type='Collect', + keys=['img', 'relations', 'texts', 'gt_bboxes'], + meta_keys=('filename', 'ori_filename', 'ori_texts', 'ori_bboxes', + 'img_norm_cfg', 'ori_filename', 'img_shape')) +] + +dataset_type = 'OpensetKIEDataset' +data_root = 'data/wildreceipt' + +loader = dict( + type='HardDiskLoader', + repeat=1, + parser=dict( + type='LineJsonParser', + keys=['file_name', 'height', 'width', 'annotations'])) + +train = dict( + type=dataset_type, + ann_file=f'{data_root}/openset_train.txt', + pipeline=train_pipeline, + img_prefix=data_root, + link_type='one-to-many', + loader=loader, + dict_file=f'{data_root}/dict.txt', + test_mode=False) +test = dict( + type=dataset_type, + ann_file=f'{data_root}/openset_test.txt', + pipeline=test_pipeline, + img_prefix=data_root, + link_type='one-to-many', + loader=loader, + dict_file=f'{data_root}/dict.txt', + test_mode=True) + +data = dict( + samples_per_gpu=4, + workers_per_gpu=1, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=train, + val=test, + test=test) + +evaluation = dict(interval=1, metric='openset_f1', metric_options=None) + +find_unused_parameters = True diff --git a/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py b/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py new file mode 100644 index 0000000000000000000000000000000000000000..f073064affebe05d3830e18d76453c1cceb0f1a1 --- /dev/null +++ b/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py @@ -0,0 +1,105 @@ +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +max_scale, min_scale = 1024, 512 + +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(max_scale, min_scale), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='KIEFormatBundle'), + dict( + type='Collect', + keys=['img', 'relations', 'texts', 'gt_bboxes', 'gt_labels']) +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadAnnotations'), + dict(type='Resize', img_scale=(max_scale, min_scale), keep_ratio=True), + dict(type='RandomFlip', flip_ratio=0.), + dict(type='Normalize', **img_norm_cfg), + dict(type='Pad', size_divisor=32), + dict(type='KIEFormatBundle'), + dict( + type='Collect', + keys=['img', 'relations', 'texts', 'gt_bboxes'], + meta_keys=[ + 'img_norm_cfg', 'img_shape', 'ori_filename', 'filename', + 'ori_texts' + ]) +] + +dataset_type = 'KIEDataset' +data_root = 'data/wildreceipt' + +loader = dict( + type='HardDiskLoader', + repeat=1, + parser=dict( + type='LineJsonParser', + keys=['file_name', 'height', 'width', 'annotations'])) + +train = dict( + type=dataset_type, + ann_file=f'{data_root}/train.txt', + pipeline=train_pipeline, + img_prefix=data_root, + loader=loader, + dict_file=f'{data_root}/dict.txt', + test_mode=False) +test = dict( + type=dataset_type, + ann_file=f'{data_root}/test.txt', + pipeline=test_pipeline, + img_prefix=data_root, + loader=loader, + dict_file=f'{data_root}/dict.txt', + test_mode=True) + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=train, + val=test, + test=test) + +evaluation = dict( + interval=1, + metric='macro_f1', + metric_options=dict( + macro_f1=dict( + ignores=[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 25]))) + +model = dict( + type='SDMGR', + backbone=dict(type='UNet', base_channels=16), + bbox_head=dict( + type='SDMGRHead', visual_dim=16, num_chars=92, num_classes=26), + visual_modality=True, + train_cfg=None, + test_cfg=None, + class_list=f'{data_root}/class_list.txt') + +optimizer = dict(type='Adam', weight_decay=0.0001) +optimizer_config = dict(grad_clip=None) +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=1, + warmup_ratio=1, + step=[40, 50]) +total_epochs = 60 + +checkpoint_config = dict(interval=1) +log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')]) +dist_params = dict(backend='nccl') +log_level = 'INFO' +load_from = None +resume_from = None +workflow = [('train', 1)] + +find_unused_parameters = True diff --git a/configs/ner/bert_softmax/README.md b/configs/ner/bert_softmax/README.md new file mode 100644 index 0000000000000000000000000000000000000000..650d18c4d56406e5f064085229f49875f5b4aea5 --- /dev/null +++ b/configs/ner/bert_softmax/README.md @@ -0,0 +1,47 @@ +# Bert + +> [Bert: Pre-training of deep bidirectional transformers for language understanding](https://arxiv.org/abs/1810.04805) + + + +## Abstract + +We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models, BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications. +BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5% (7.7% point absolute improvement), MultiNLI accuracy to 86.7% (4.6% absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement). + + + +
+ +
+ +## Dataset + +### Train Dataset + +| trainset | text_num | entity_num | +| :---------: | :------: | :--------: | +| CLUENER2020 | 10748 | 23338 | + +### Test Dataset + +| testset | text_num | entity_num | +| :---------: | :------: | :--------: | +| CLUENER2020 | 1343 | 2982 | + +## Results and models + +| Method | Pretrain | Precision | Recall | F1-Score | Download | +| :-------------------------------------------------------: | :----------------------------------------------------------: | :-------: | :----: | :------: | :----------------------------------------------------------: | +| [bert_softmax](/configs/ner/bert_softmax/bert_softmax_cluener_18e.py) | [pretrain](https://download.openmmlab.com/mmocr/ner/bert_softmax/bert_pretrain.pth) | 0.7885 | 0.7998 | 0.7941 | [model](https://download.openmmlab.com/mmocr/ner/bert_softmax/bert_softmax_cluener-eea70ea2.pth) \| [log](https://download.openmmlab.com/mmocr/ner/bert_softmax/20210514_172645.log.json) | + +## Citation + +```bibtex +@article{devlin2018bert, + title={Bert: Pre-training of deep bidirectional transformers for language understanding}, + author={Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina}, + journal={arXiv preprint arXiv:1810.04805}, + year={2018} +} +``` diff --git a/configs/ner/bert_softmax/bert_softmax_cluener_18e.py b/configs/ner/bert_softmax/bert_softmax_cluener_18e.py new file mode 100644 index 0000000000000000000000000000000000000000..5fd85d9a858236f4feb8903e3f4bf95f9eccaf94 --- /dev/null +++ b/configs/ner/bert_softmax/bert_softmax_cluener_18e.py @@ -0,0 +1,70 @@ +_base_ = [ + '../../_base_/schedules/schedule_adadelta_18e.py', + '../../_base_/default_runtime.py' +] + +categories = [ + 'address', 'book', 'company', 'game', 'government', 'movie', 'name', + 'organization', 'position', 'scene' +] + +test_ann_file = 'data/cluener2020/dev.json' +train_ann_file = 'data/cluener2020/train.json' +vocab_file = 'data/cluener2020/vocab.txt' + +max_len = 128 +loader = dict( + type='HardDiskLoader', + repeat=1, + parser=dict(type='LineJsonParser', keys=['text', 'label'])) + +ner_convertor = dict( + type='NerConvertor', + annotation_type='bio', + vocab_file=vocab_file, + categories=categories, + max_len=max_len) + +test_pipeline = [ + dict(type='NerTransform', label_convertor=ner_convertor, max_len=max_len), + dict(type='ToTensorNER') +] + +train_pipeline = [ + dict(type='NerTransform', label_convertor=ner_convertor, max_len=max_len), + dict(type='ToTensorNER') +] +dataset_type = 'NerDataset' + +train = dict( + type=dataset_type, + ann_file=train_ann_file, + loader=loader, + pipeline=train_pipeline, + test_mode=False) + +test = dict( + type=dataset_type, + ann_file=test_ann_file, + loader=loader, + pipeline=test_pipeline, + test_mode=True) +data = dict( + samples_per_gpu=8, workers_per_gpu=2, train=train, val=test, test=test) + +evaluation = dict(interval=1, metric='f1-score') + +model = dict( + type='NerClassifier', + encoder=dict( + type='BertEncoder', + max_position_embeddings=512, + init_cfg=dict( + type='Pretrained', + checkpoint='https://download.openmmlab.com/mmocr/ner/' + 'bert_softmax/bert_pretrain.pth')), + decoder=dict(type='FCDecoder'), + loss=dict(type='MaskedCrossEntropyLoss'), + label_convertor=ner_convertor) + +test_cfg = None diff --git a/configs/textdet/dbnet/README.md b/configs/textdet/dbnet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..d2007c72ec2b45e70d30c6edea128b7e0be2baca --- /dev/null +++ b/configs/textdet/dbnet/README.md @@ -0,0 +1,33 @@ +# DBNet + +> [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/abs/1911.08947) + + + +## Abstract + +Recently, segmentation-based methods are quite popular in scene text detection, as the segmentation results can more accurately describe scene text of various shapes such as curve text. However, the post-processing of binarization is essential for segmentation-based detection, which converts probability maps produced by a segmentation method into bounding boxes/regions of text. In this paper, we propose a module named Differentiable Binarization (DB), which can perform the binarization process in a segmentation network. Optimized along with a DB module, a segmentation network can adaptively set the thresholds for binarization, which not only simplifies the post-processing but also enhances the performance of text detection. Based on a simple segmentation network, we validate the performance improvements of DB on five benchmark datasets, which consistently achieves state-of-the-art results, in terms of both detection accuracy and speed. In particular, with a light-weight backbone, the performance improvements by DB are significant so that we can look for an ideal tradeoff between detection accuracy and efficiency. Specifically, with a backbone of ResNet-18, our detector achieves an F-measure of 82.8, running at 62 FPS, on the MSRA-TD500 dataset. + +
+ +
+ +## Results and models + +### ICDAR2015 + +| Method | Pretrained Model | Training set | Test set | #epochs | Test size | Recall | Precision | Hmean | Download | +| :---------------------------------------: | :-------------------------------------------------: | :-------------: | :------------: | :-----: | :-------: | :----: | :-------: | :---: | :-----------------------------------------: | +| [DBNet_r18](/configs/textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py) | ImageNet | ICDAR2015 Train | ICDAR2015 Test | 1200 | 736 | 0.731 | 0.871 | 0.795 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_r18_fpnc_sbn_1200e_icdar2015_20210329-ba3ab597.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_r18_fpnc_sbn_1200e_icdar2015_20210329-ba3ab597.log.json) | +| [DBNet_r50dcn](/configs/textdet/dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py) | [Synthtext](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_r50dcnv2_fpnc_sbn_2e_synthtext_20210325-aa96e477.pth) | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.814 | 0.868 | 0.840 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_r50dcnv2_fpnc_sbn_1200e_icdar2015_20211025-9fe3b590.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_r50dcnv2_fpnc_sbn_1200e_icdar2015_20211025-9fe3b590.log.json) | + +## Citation + +```bibtex +@article{Liao_Wan_Yao_Chen_Bai_2020, + title={Real-Time Scene Text Detection with Differentiable Binarization}, + journal={Proceedings of the AAAI Conference on Artificial Intelligence}, + author={Liao, Minghui and Wan, Zhaoyi and Yao, Cong and Chen, Kai and Bai, Xiang}, + year={2020}, + pages={11474-11481}} +``` diff --git a/configs/textdet/dbnet/dbnet_r18_fpnc_100k_iters_synthtext.py b/configs/textdet/dbnet/dbnet_r18_fpnc_100k_iters_synthtext.py new file mode 100644 index 0000000000000000000000000000000000000000..78a2bbbf87405a052690546681db127bd93ff738 --- /dev/null +++ b/configs/textdet/dbnet/dbnet_r18_fpnc_100k_iters_synthtext.py @@ -0,0 +1,59 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_sgd_100k_iters.py', + '../../_base_/det_models/dbnet_r18_fpnc.py', + '../../_base_/det_datasets/synthtext.py', + '../../_base_/det_pipelines/dbnet_pipeline.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +train_pipeline_r18 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='LoadTextAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5), + dict(type='Normalize', **img_norm_cfg), + dict( + type='ImgAug', + args=[['Fliplr', 0.5], + dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]], + clip_invalid_ploys=False), + dict(type='EastRandomCrop', target_size=(640, 640)), + dict(type='DBNetTargets', shrink_ratio=0.4), + dict(type='Pad', size_divisor=32), + dict( + type='CustomFormatBundle', + keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'], + visualize=dict(flag=False, boundary_key='gt_shrink')), + dict( + type='Collect', + keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask']) +] +test_pipeline_1333_736 = {{_base_.test_pipeline_1333_736}} + +data = dict( + samples_per_gpu=16, + workers_per_gpu=8, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline_r18), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_1333_736), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_1333_736)) + +evaluation = dict(interval=999999, metric='hmean-iou') # do not evaluate diff --git a/configs/textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py b/configs/textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py new file mode 100644 index 0000000000000000000000000000000000000000..467903fef4604dedfd1efbeaa011de0a32dcea74 --- /dev/null +++ b/configs/textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py @@ -0,0 +1,33 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_sgd_1200e.py', + '../../_base_/det_models/dbnet_r18_fpnc.py', + '../../_base_/det_datasets/icdar2015.py', + '../../_base_/det_pipelines/dbnet_pipeline.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline_r18 = {{_base_.train_pipeline_r18}} +test_pipeline_1333_736 = {{_base_.test_pipeline_1333_736}} + +data = dict( + samples_per_gpu=16, + workers_per_gpu=8, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline_r18), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_1333_736), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_1333_736)) + +evaluation = dict(interval=100, metric='hmean-iou') diff --git a/configs/textdet/dbnet/dbnet_r50dcnv2_fpnc_100k_iters_synthtext.py b/configs/textdet/dbnet/dbnet_r50dcnv2_fpnc_100k_iters_synthtext.py new file mode 100644 index 0000000000000000000000000000000000000000..0ccd22c9b0675062571ed971a16dd75958ac03e0 --- /dev/null +++ b/configs/textdet/dbnet/dbnet_r50dcnv2_fpnc_100k_iters_synthtext.py @@ -0,0 +1,61 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_sgd_100k_iters.py', + '../../_base_/det_models/dbnet_r50dcnv2_fpnc.py', + '../../_base_/det_datasets/synthtext.py', + '../../_base_/det_pipelines/dbnet_pipeline.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +img_norm_cfg_r50dcnv2 = dict( + mean=[122.67891434, 116.66876762, 104.00698793], + std=[58.395, 57.12, 57.375], + to_rgb=True) +train_pipeline_r50dcnv2 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='LoadTextAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5), + dict(type='Normalize', **img_norm_cfg_r50dcnv2), + dict( + type='ImgAug', + args=[['Fliplr', 0.5], + dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]], + clip_invalid_ploys=False), + dict(type='EastRandomCrop', target_size=(640, 640)), + dict(type='DBNetTargets', shrink_ratio=0.4), + dict(type='Pad', size_divisor=32), + dict( + type='CustomFormatBundle', + keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'], + visualize=dict(flag=False, boundary_key='gt_shrink')), + dict( + type='Collect', + keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask']) +] +test_pipeline_4068_1024 = {{_base_.test_pipeline_4068_1024}} + +data = dict( + samples_per_gpu=16, + workers_per_gpu=8, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline_r50dcnv2), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_4068_1024), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_4068_1024)) + +evaluation = dict(interval=999999, metric='hmean-iou') # do not evaluate diff --git a/configs/textdet/dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py b/configs/textdet/dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py new file mode 100644 index 0000000000000000000000000000000000000000..251b7bc2faaaa254766e0902c4238b2917f0d230 --- /dev/null +++ b/configs/textdet/dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py @@ -0,0 +1,35 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_sgd_1200e.py', + '../../_base_/det_models/dbnet_r50dcnv2_fpnc.py', + '../../_base_/det_datasets/icdar2015.py', + '../../_base_/det_pipelines/dbnet_pipeline.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline_r50dcnv2 = {{_base_.train_pipeline_r50dcnv2}} +test_pipeline_4068_1024 = {{_base_.test_pipeline_4068_1024}} + +load_from = 'checkpoints/textdet/dbnet/res50dcnv2_synthtext.pth' + +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline_r50dcnv2), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_4068_1024), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_4068_1024)) + +evaluation = dict(interval=100, metric='hmean-iou') diff --git a/configs/textdet/dbnet/metafile.yml b/configs/textdet/dbnet/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..c6abdbca61d760a0e6d275e5188312ef86fd055e --- /dev/null +++ b/configs/textdet/dbnet/metafile.yml @@ -0,0 +1,40 @@ +Collections: +- Name: DBNet + Metadata: + Training Data: ICDAR2015 + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 1x GeForce GTX 1080 Ti + Architecture: + - ResNet + - FPNC + Paper: + URL: https://arxiv.org/pdf/1911.08947.pdf + Title: 'Real-time Scene Text Detection with Differentiable Binarization' + README: configs/textdet/dbnet/README.md + +Models: + - Name: dbnet_r18_fpnc_1200e_icdar2015 + In Collection: DBNet + Config: configs/textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py + Metadata: + Training Data: ICDAR2015 + Results: + - Task: Text Detection + Dataset: ICDAR2015 + Metrics: + hmean-iou: 0.795 + Weights: https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_r18_fpnc_sbn_1200e_icdar2015_20210329-ba3ab597.pth + + - Name: dbnet_r50dcnv2_fpnc_1200e_icdar2015 + In Collection: DBNet + Config: configs/textdet/dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py + Metadata: + Training Data: ICDAR2015 + Results: + - Task: Text Detection + Dataset: ICDAR2015 + Metrics: + hmean-iou: 0.840 + Weights: https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_r50dcnv2_fpnc_sbn_1200e_icdar2015_20211025-9fe3b590.pth diff --git a/configs/textdet/dbnetpp/README.md b/configs/textdet/dbnetpp/README.md new file mode 100644 index 0000000000000000000000000000000000000000..995254cb89c1b88bb3698d9d550f8e0ac7ba69f6 --- /dev/null +++ b/configs/textdet/dbnetpp/README.md @@ -0,0 +1,33 @@ +# DBNetpp + +> [Real-Time Scene Text Detection with Differentiable Binarization and Adaptive Scale Fusion](https://arxiv.org/abs/2202.10304) + + + +## Abstract + +Recently, segmentation-based scene text detection methods have drawn extensive attention in the scene text detection field, because of their superiority in detecting the text instances of arbitrary shapes and extreme aspect ratios, profiting from the pixel-level descriptions. However, the vast majority of the existing segmentation-based approaches are limited to their complex post-processing algorithms and the scale robustness of their segmentation models, where the post-processing algorithms are not only isolated to the model optimization but also time-consuming and the scale robustness is usually strengthened by fusing multi-scale feature maps directly. In this paper, we propose a Differentiable Binarization (DB) module that integrates the binarization process, one of the most important steps in the post-processing procedure, into a segmentation network. Optimized along with the proposed DB module, the segmentation network can produce more accurate results, which enhances the accuracy of text detection with a simple pipeline. Furthermore, an efficient Adaptive Scale Fusion (ASF) module is proposed to improve the scale robustness by fusing features of different scales adaptively. By incorporating the proposed DB and ASF with the segmentation network, our proposed scene text detector consistently achieves state-of-the-art results, in terms of both detection accuracy and speed, on five standard benchmarks. + +
+ +
+ +## Results and models + +### ICDAR2015 + +| Method | Pretrained Model | Training set | Test set | #epochs | Test size | Recall | Precision | Hmean | Download | +| :---------------------------------------: | :-------------------------------------------------: | :-------------: | :------------: | :-----: | :-------: | :----: | :-------: | :---: | :-----------------------------------------: | +| [DBNetpp_r50dcn](/configs/textdet/dbnetpp/dbnetpp_r50dcnv2_fpnc_1200e_icdar2015.py) | [Synthtext](/configs/textdet/dbnetpp/dbnetpp_r50dcnv2_fpnc_100k_iter_synthtext.py) ([model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnetpp_r50dcnv2_fpnc_100k_iter_synthtext-20220502-db297554.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnetpp_r50dcnv2_fpnc_100k_iter_synthtext-20220502-db297554.log.json)) | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.822 | 0.901 | 0.860 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnetpp_r50dcnv2_fpnc_1200e_icdar2015-20220502-d7a76fff.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnetpp_r50dcnv2_fpnc_1200e_icdar2015-20220502-d7a76fff.log.json) | + +## Citation + +```bibtex +@article{liao2022real, + title={Real-Time Scene Text Detection with Differentiable Binarization and Adaptive Scale Fusion}, + author={Liao, Minghui and Zou, Zhisheng and Wan, Zhaoyi and Yao, Cong and Bai, Xiang}, + journal={IEEE Transactions on Pattern Analysis and Machine Intelligence}, + year={2022}, + publisher={IEEE} +} +``` diff --git a/configs/textdet/dbnetpp/dbnetpp_r50dcnv2_fpnc_100k_iter_synthtext.py b/configs/textdet/dbnetpp/dbnetpp_r50dcnv2_fpnc_100k_iter_synthtext.py new file mode 100644 index 0000000000000000000000000000000000000000..5f3835ea998e5195b471671a8685c0032733b0a2 --- /dev/null +++ b/configs/textdet/dbnetpp/dbnetpp_r50dcnv2_fpnc_100k_iter_synthtext.py @@ -0,0 +1,62 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_sgd_100k_iters.py', + '../../_base_/det_models/dbnetpp_r50dcnv2_fpnc.py', + '../../_base_/det_datasets/synthtext.py', + '../../_base_/det_pipelines/dbnet_pipeline.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +img_norm_cfg_r50dcnv2 = dict( + mean=[122.67891434, 116.66876762, 104.00698793], + std=[58.395, 57.12, 57.375], + to_rgb=True) +train_pipeline_r50dcnv2 = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='LoadTextAnnotations', + with_bbox=True, + with_mask=True, + poly2mask=False), + dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5), + dict(type='Normalize', **img_norm_cfg_r50dcnv2), + dict( + type='ImgAug', + args=[['Fliplr', 0.5], + dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]], + clip_invalid_ploys=False), + dict(type='EastRandomCrop', target_size=(640, 640)), + dict(type='DBNetTargets', shrink_ratio=0.4), + dict(type='Pad', size_divisor=32), + dict( + type='CustomFormatBundle', + keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'], + visualize=dict(flag=False, boundary_key='gt_shrink')), + dict( + type='Collect', + keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask']) +] + +test_pipeline_4068_1024 = {{_base_.test_pipeline_4068_1024}} + +data = dict( + samples_per_gpu=16, + workers_per_gpu=8, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline_r50dcnv2), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_4068_1024), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_4068_1024)) + +evaluation = dict(interval=200000, metric='hmean-iou') # do not evaluate diff --git a/configs/textdet/dbnetpp/dbnetpp_r50dcnv2_fpnc_1200e_icdar2015.py b/configs/textdet/dbnetpp/dbnetpp_r50dcnv2_fpnc_1200e_icdar2015.py new file mode 100644 index 0000000000000000000000000000000000000000..bc6ab78cacc3f5b62549dfcf8c93cc0cc5c3a6ac --- /dev/null +++ b/configs/textdet/dbnetpp/dbnetpp_r50dcnv2_fpnc_1200e_icdar2015.py @@ -0,0 +1,39 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_sgd_1200e.py', + '../../_base_/det_models/dbnetpp_r50dcnv2_fpnc.py', + '../../_base_/det_datasets/icdar2015.py', + '../../_base_/det_pipelines/dbnet_pipeline.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline_r50dcnv2 = {{_base_.train_pipeline_r50dcnv2}} +test_pipeline_4068_1024 = {{_base_.test_pipeline_4068_1024}} + +load_from = 'checkpoints/textdet/dbnetpp/res50dcnv2_synthtext.pth' + +data = dict( + samples_per_gpu=32, + workers_per_gpu=8, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline_r50dcnv2), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_4068_1024), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_4068_1024)) + +evaluation = dict( + interval=100, + metric='hmean-iou', + save_best='0_hmean-iou:hmean', + rule='greater') diff --git a/configs/textdet/dbnetpp/metafile.yml b/configs/textdet/dbnetpp/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..b40571c11faf09bad058709ec333b9445f48b4f3 --- /dev/null +++ b/configs/textdet/dbnetpp/metafile.yml @@ -0,0 +1,28 @@ +Collections: +- Name: DBNetpp + Metadata: + Training Data: ICDAR2015 + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 1x Nvidia A100 + Architecture: + - ResNet + - FPNC + Paper: + URL: https://arxiv.org/abs/2202.10304 + Title: 'Real-Time Scene Text Detection with Differentiable Binarization and Adaptive Scale Fusion' + README: configs/textdet/dbnetpp/README.md + +Models: + - Name: dbnetpp_r50dcnv2_fpnc_1200e_icdar2015.py + In Collection: DBNetpp + Config: configs/textdet/dbnetpp/dbnetpp_r50dcnv2_fpnc_1200e_icdar2015.py + Metadata: + Training Data: ICDAR2015 + Results: + - Task: Text Detection + Dataset: ICDAR2015 + Metrics: + hmean-iou: 0.860 + Weights: https://download.openmmlab.com/mmocr/textdet/dbnet/dbnetpp_r50dcnv2_fpnc_1200e_icdar2015-20220502-d7a76fff.pth diff --git a/configs/textdet/drrg/README.md b/configs/textdet/drrg/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2f2beb1b757ccbf2dd2e41a70769d963b098264d --- /dev/null +++ b/configs/textdet/drrg/README.md @@ -0,0 +1,37 @@ +# DRRG + +> [Deep relational reasoning graph network for arbitrary shape text detection](https://arxiv.org/abs/2003.07493) + + + +## Abstract + +Arbitrary shape text detection is a challenging task due to the high variety and complexity of scenes texts. In this paper, we propose a novel unified relational reasoning graph network for arbitrary shape text detection. In our method, an innovative local graph bridges a text proposal model via Convolutional Neural Network (CNN) and a deep relational reasoning network via Graph Convolutional Network (GCN), making our network end-to-end trainable. To be concrete, every text instance will be divided into a series of small rectangular components, and the geometry attributes (e.g., height, width, and orientation) of the small components will be estimated by our text proposal model. Given the geometry attributes, the local graph construction model can roughly establish linkages between different text components. For further reasoning and deducing the likelihood of linkages between the component and its neighbors, we adopt a graph-based network to perform deep relational reasoning on local graphs. Experiments on public available datasets demonstrate the state-of-the-art performance of our method. + +
+ +
+ +## Results and models + +### CTW1500 + +| Method | Pretrained Model | Training set | Test set | #epochs | Test size | Recall | Precision | Hmean | Download | +| :-------------------------------------------------: | :--------------: | :-----------: | :----------: | :-----: | :-------: | :-----------: | :-----------: | :-----------: | :---------------------------------------------------: | +| [DRRG](configs/textdet/drrg/drrg_r50_fpn_unet_1200e_ctw1500.py) | ImageNet | CTW1500 Train | CTW1500 Test | 1200 | 640 | 0.822 (0.791) | 0.858 (0.862) | 0.840 (0.825) | [model](https://download.openmmlab.com/mmocr/textdet/drrg/drrg_r50_fpn_unet_1200e_ctw1500_20211022-fb30b001.pth) \\ [log](https://download.openmmlab.com/mmocr/textdet/drrg/20210511_234719.log) | + +```{note} +We've upgraded our IoU backend from `Polygon3` to `shapely`. There are some performance differences for some models due to the backends' different logics to handle invalid polygons (more info [here](https://github.com/open-mmlab/mmocr/issues/465)). **New evaluation result is presented in brackets** and new logs will be uploaded soon. +``` + +## Citation + +```bibtex +@article{zhang2020drrg, + title={Deep relational reasoning graph network for arbitrary shape text detection}, + author={Zhang, Shi-Xue and Zhu, Xiaobin and Hou, Jie-Bo and Liu, Chang and Yang, Chun and Wang, Hongfa and Yin, Xu-Cheng}, + booktitle={CVPR}, + pages={9699-9708}, + year={2020} +} +``` diff --git a/configs/textdet/drrg/drrg_r50_fpn_unet_1200e_ctw1500.py b/configs/textdet/drrg/drrg_r50_fpn_unet_1200e_ctw1500.py new file mode 100644 index 0000000000000000000000000000000000000000..7121ef83297d3a1976c9b62d2b47f0b5ba52bd66 --- /dev/null +++ b/configs/textdet/drrg/drrg_r50_fpn_unet_1200e_ctw1500.py @@ -0,0 +1,33 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_sgd_1200e.py', + '../../_base_/det_models/drrg_r50_fpn_unet.py', + '../../_base_/det_datasets/ctw1500.py', + '../../_base_/det_pipelines/drrg_pipeline.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=20, metric='hmean-iou') diff --git a/configs/textdet/drrg/metafile.yml b/configs/textdet/drrg/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..8e7224eb352d419fc65637d6b0fc17d6cc4230d8 --- /dev/null +++ b/configs/textdet/drrg/metafile.yml @@ -0,0 +1,27 @@ +Collections: +- Name: DRRG + Metadata: + Training Data: SCUT-CTW1500 + Training Techniques: + - SGD with Momentum + Training Resources: 1x GeForce GTX 3090 + Architecture: + - ResNet + - FPN_UNet + Paper: + URL: https://arxiv.org/abs/2003.07493.pdf + Title: 'Deep Relational Reasoning Graph Network for Arbitrary Shape Text Detection' + README: configs/textdet/drrg/README.md + +Models: + - Name: drrg_r50_fpn_unet_1200e_ctw1500 + In Collection: DRRG + Config: configs/textdet/drrg/drrg_r50_fpn_unet_1200e_ctw1500.py + Metadata: + Training Data: CTW1500 + Results: + - Task: Text Detection + Dataset: CTW1500 + Metrics: + hmean-iou: 0.840 + Weights: https://download.openmmlab.com/mmocr/textdet/drrg/drrg_r50_fpn_unet_1200e_ctw1500_20211022-fb30b001.pth diff --git a/configs/textdet/fcenet/README.md b/configs/textdet/fcenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f1acd2b1d8daa4557b16c8375b8c1ab4aa36cf6c --- /dev/null +++ b/configs/textdet/fcenet/README.md @@ -0,0 +1,38 @@ +# FCENet + +> [Fourier Contour Embedding for Arbitrary-Shaped Text Detection](https://arxiv.org/abs/2104.10442) + + + +## Abstract + +One of the main challenges for arbitrary-shaped text detection is to design a good text instance representation that allows networks to learn diverse text geometry variances. Most of existing methods model text instances in image spatial domain via masks or contour point sequences in the Cartesian or the polar coordinate system. However, the mask representation might lead to expensive post-processing, while the point sequence one may have limited capability to model texts with highly-curved shapes. To tackle these problems, we model text instances in the Fourier domain and propose one novel Fourier Contour Embedding (FCE) method to represent arbitrary shaped text contours as compact signatures. We further construct FCENet with a backbone, feature pyramid networks (FPN) and a simple post-processing with the Inverse Fourier Transformation (IFT) and Non-Maximum Suppression (NMS). Different from previous methods, FCENet first predicts compact Fourier signatures of text instances, and then reconstructs text contours via IFT and NMS during test. Extensive experiments demonstrate that FCE is accurate and robust to fit contours of scene texts even with highly-curved shapes, and also validate the effectiveness and the good generalization of FCENet for arbitrary-shaped text detection. Furthermore, experimental results show that our FCENet is superior to the state-of-the-art (SOTA) methods on CTW1500 and Total-Text, especially on challenging highly-curved text subset. + +
+ +
+ +## Results and models + +### CTW1500 + +| Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Recall | Precision | Hmean | Download | +| :-------------------------------------------------: | :--------------: | :--------------: | :-----------: | :----------: | :-----: | :---------: | :----: | :-------: | :---: | :----------------------------------------------------: | +| [FCENet](/configs/textdet/fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py) | ResNet50 + DCNv2 | ImageNet | CTW1500 Train | CTW1500 Test | 1500 | (736, 1080) | 0.828 | 0.875 | 0.851 | [model](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500_20211022-e326d7ec.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/fcenet/20210511_181328.log.json) | + +### ICDAR2015 + +| Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Recall | Precision | Hmean | Download | +| :-------------------------------------------------------: | :------: | :--------------: | :----------: | :-------: | :-----: | :----------: | :----: | :-------: | :---: | :---------------------------------------------------------: | +| [FCENet](/configs/textdet/fcenet/fcenet_r50_fpn_1500e_icdar2015.py) | ResNet50 | ImageNet | IC15 Train | IC15 Test | 1500 | (2260, 2260) | 0.819 | 0.880 | 0.849 | [model](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_r50_fpn_1500e_icdar2015_20211022-daefb6ed.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/fcenet/20210601_222655.log.json) | + +## Citation + +```bibtex +@InProceedings{zhu2021fourier, + title={Fourier Contour Embedding for Arbitrary-Shaped Text Detection}, + author={Yiqin Zhu and Jianyong Chen and Lingyu Liang and Zhanghui Kuang and Lianwen Jin and Wayne Zhang}, + year={2021}, + booktitle = {CVPR} + } +``` diff --git a/configs/textdet/fcenet/fcenet_r50_fpn_1500e_icdar2015.py b/configs/textdet/fcenet/fcenet_r50_fpn_1500e_icdar2015.py new file mode 100644 index 0000000000000000000000000000000000000000..d4a9c642307466c86f667d64bbeb4057db571b66 --- /dev/null +++ b/configs/textdet/fcenet/fcenet_r50_fpn_1500e_icdar2015.py @@ -0,0 +1,33 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_sgd_1500e.py', + '../../_base_/det_models/fcenet_r50_fpn.py', + '../../_base_/det_datasets/icdar2015.py', + '../../_base_/det_pipelines/fcenet_pipeline.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline_icdar2015 = {{_base_.train_pipeline_icdar2015}} +test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}} + +data = dict( + samples_per_gpu=8, + workers_per_gpu=2, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline_icdar2015), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_icdar2015), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_icdar2015)) + +evaluation = dict(interval=10, metric='hmean-iou') diff --git a/configs/textdet/fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py b/configs/textdet/fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py new file mode 100644 index 0000000000000000000000000000000000000000..44bbfcd55a2efc29f441e06fb33079a48de61905 --- /dev/null +++ b/configs/textdet/fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py @@ -0,0 +1,33 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_sgd_1500e.py', + '../../_base_/det_models/fcenet_r50dcnv2_fpn.py', + '../../_base_/det_datasets/ctw1500.py', + '../../_base_/det_pipelines/fcenet_pipeline.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline_ctw1500 = {{_base_.train_pipeline_ctw1500}} +test_pipeline_ctw1500 = {{_base_.test_pipeline_ctw1500}} + +data = dict( + samples_per_gpu=6, + workers_per_gpu=2, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline_ctw1500), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_ctw1500), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_ctw1500)) + +evaluation = dict(interval=10, metric='hmean-iou') diff --git a/configs/textdet/fcenet/metafile.yml b/configs/textdet/fcenet/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..7b60e518e2b28f281ea799179848cfb53e065d1c --- /dev/null +++ b/configs/textdet/fcenet/metafile.yml @@ -0,0 +1,38 @@ +Collections: +- Name: FCENet + Metadata: + Training Data: SCUT-CTW1500 + Training Techniques: + - SGD with Momentum + Training Resources: 1x GeForce GTX 2080 Ti + Architecture: + - ResNet with DCNv2 + - FPN + Paper: + URL: https://arxiv.org/abs/2002.02709.pdf + Title: 'FourierNet: Compact mask representation for instance segmentation using differentiable shape decoders' + README: configs/textdet/fcenet/README.md + +Models: + - Name: fcenet_r50dcnv2_fpn_1500e_ctw1500 + In Collection: FCENet + Config: configs/textdet/fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py + Metadata: + Training Data: CTW1500 + Results: + - Task: Text Detection + Dataset: CTW1500 + Metrics: + hmean-iou: 0.851 + Weights: https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500_20211022-e326d7ec.pth + - Name: fcenet_r50_fpn_1500e_icdar2015 + In Collection: FCENet + Config: configs/textdet/fcenet/fcenet_r50_fpn_1500e_icdar2015.py + Metadata: + Training Data: ICDAR2015 + Results: + - Task: Text Detection + Dataset: ICDAR2015 + Metrics: + hmean-iou: 0.849 + Weights: https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_r50_fpn_1500e_icdar2015_20211022-daefb6ed.pth diff --git a/configs/textdet/maskrcnn/README.md b/configs/textdet/maskrcnn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..c6ef17e7659558a4f41834f4614d58caddcbe208 --- /dev/null +++ b/configs/textdet/maskrcnn/README.md @@ -0,0 +1,49 @@ +# Mask R-CNN + +> [Mask R-CNN](https://arxiv.org/abs/1703.06870) + + + +## Abstract + +We present a conceptually simple, flexible, and general framework for object instance segmentation. Our approach efficiently detects objects in an image while simultaneously generating a high-quality segmentation mask for each instance. The method, called Mask R-CNN, extends Faster R-CNN by adding a branch for predicting an object mask in parallel with the existing branch for bounding box recognition. Mask R-CNN is simple to train and adds only a small overhead to Faster R-CNN, running at 5 fps. Moreover, Mask R-CNN is easy to generalize to other tasks, e.g., allowing us to estimate human poses in the same framework. We show top results in all three tracks of the COCO suite of challenges, including instance segmentation, bounding-box object detection, and person keypoint detection. Without bells and whistles, Mask R-CNN outperforms all existing, single-model entries on every task, including the COCO 2016 challenge winners. We hope our simple and effective approach will serve as a solid baseline and help ease future research in instance-level recognition. + +
+ +
+ +## Results and models + +### CTW1500 + +| Method | Pretrained Model | Training set | Test set | #epochs | Test size | Recall | Precision | Hmean | Download | +| :----------------------------------------------------------: | :--------------: | :-----------: | :----------: | :-----: | :-------: | :----: | :-------: | :---: | :-------------------------------------------------------------: | +| [MaskRCNN](/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py) | ImageNet | CTW1500 Train | CTW1500 Test | 160 | 1600 | 0.753 | 0.712 | 0.732 | [model](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500_20210219-96497a76.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500_20210219-96497a76.log.json) | + +### ICDAR2015 + +| Method | Pretrained Model | Training set | Test set | #epochs | Test size | Recall | Precision | Hmean | Download | +| :--------------------------------------------------------: | :--------------: | :-------------: | :------------: | :-----: | :-------: | :----: | :-------: | :---: | :-----------------------------------------------------------: | +| [MaskRCNN](/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py) | ImageNet | ICDAR2015 Train | ICDAR2015 Test | 160 | 1920 | 0.783 | 0.872 | 0.825 | [model](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015_20210219-8eb340a3.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015_20210219-8eb340a3.log.json) | + +### ICDAR2017 + +| Method | Pretrained Model | Training set | Test set | #epochs | Test size | Recall | Precision | Hmean | Download | +| :---------------------------------------------------------: | :--------------: | :-------------: | :-----------: | :-----: | :-------: | :----: | :-------: | :---: | :-----------------------------------------------------------: | +| [MaskRCNN](/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py) | ImageNet | ICDAR2017 Train | ICDAR2017 Val | 160 | 1600 | 0.754 | 0.827 | 0.789 | [model](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017_20210218-c6ec3ebb.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017_20210218-c6ec3ebb.log.json) | + +```{note} +We tuned parameters with the techniques in [Pyramid Mask Text Detector](https://arxiv.org/abs/1903.11800) +``` + +## Citation + +```bibtex +@INPROCEEDINGS{8237584, + author={K. {He} and G. {Gkioxari} and P. {Dollár} and R. {Girshick}}, + booktitle={2017 IEEE International Conference on Computer Vision (ICCV)}, + title={Mask R-CNN}, + year={2017}, + pages={2980-2988}, + doi={10.1109/ICCV.2017.322}} +``` diff --git a/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py b/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py new file mode 100644 index 0000000000000000000000000000000000000000..beeb1853d9f1067b9f1882d389d3ae83f88e3308 --- /dev/null +++ b/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py @@ -0,0 +1,33 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem_poly.py', + '../../_base_/schedules/schedule_sgd_160e.py', + '../../_base_/det_datasets/ctw1500.py', + '../../_base_/det_pipelines/maskrcnn_pipeline.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline_ctw1500 = {{_base_.test_pipeline_ctw1500}} + +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_ctw1500), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_ctw1500)) + +evaluation = dict(interval=10, metric='hmean-iou') diff --git a/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py b/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py new file mode 100644 index 0000000000000000000000000000000000000000..5feb0c61ff2738338527e1aceaa569051a655cf8 --- /dev/null +++ b/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py @@ -0,0 +1,33 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem.py', + '../../_base_/schedules/schedule_sgd_160e.py', + '../../_base_/det_datasets/icdar2015.py', + '../../_base_/det_pipelines/maskrcnn_pipeline.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}} + +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_icdar2015), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_icdar2015)) + +evaluation = dict(interval=10, metric='hmean-iou') diff --git a/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py b/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py new file mode 100644 index 0000000000000000000000000000000000000000..e22571e74511bab4303138f0e4816687fadac69e --- /dev/null +++ b/configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py @@ -0,0 +1,33 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem.py', + '../../_base_/schedules/schedule_sgd_160e.py', + '../../_base_/det_datasets/icdar2017.py', + '../../_base_/det_pipelines/maskrcnn_pipeline.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}} + +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_icdar2015), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_icdar2015)) + +evaluation = dict(interval=10, metric='hmean-iou') diff --git a/configs/textdet/maskrcnn/metafile.yml b/configs/textdet/maskrcnn/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..90a2e3c3d33888beba652bf02c4cc1ae685eb24c --- /dev/null +++ b/configs/textdet/maskrcnn/metafile.yml @@ -0,0 +1,53 @@ +Collections: +- Name: Mask R-CNN + Metadata: + Training Data: ICDAR SCUT-CTW1500 + Training Techniques: + - SGD with Momentum + - Weight Decay + Training Resources: 8x GeForce GTX 1080 Ti + Architecture: + - ResNet + - FPN + - RPN + Paper: + URL: https://arxiv.org/pdf/1703.06870.pdf + Title: 'Mask R-CNN' + README: configs/textdet/maskrcnn/README.md + +Models: + - Name: mask_rcnn_r50_fpn_160e_ctw1500 + In Collection: Mask R-CNN + Config: configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py + Metadata: + Training Data: CTW1500 + Results: + - Task: Text Detection + Dataset: CTW1500 + Metrics: + hmean: 0.732 + Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500_20210219-96497a76.pth + + - Name: mask_rcnn_r50_fpn_160e_icdar2015 + In Collection: Mask R-CNN + Config: configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py + Metadata: + Training Data: ICDAR2015 + Results: + - Task: Text Detection + Dataset: ICDAR2015 + Metrics: + hmean: 0.825 + Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015_20210219-8eb340a3.pth + + - Name: mask_rcnn_r50_fpn_160e_icdar2017 + In Collection: Mask R-CNN + Config: configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py + Metadata: + Training Data: ICDAR2017 + Results: + - Task: Text Detection + Dataset: ICDAR2017 + Metrics: + hmean: 0.789 + Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017_20210218-c6ec3ebb.pth diff --git a/configs/textdet/panet/README.md b/configs/textdet/panet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b7cdf2f061996dbcd4da3a1db582545d6dc2a48f --- /dev/null +++ b/configs/textdet/panet/README.md @@ -0,0 +1,43 @@ +# PANet + +> [Efficient and Accurate Arbitrary-Shaped Text Detection with Pixel Aggregation Network](https://arxiv.org/abs/1908.05900) + + + +## Abstract + +Scene text detection, an important step of scene text reading systems, has witnessed rapid development with convolutional neural networks. Nonetheless, two main challenges still exist and hamper its deployment to real-world applications. The first problem is the trade-off between speed and accuracy. The second one is to model the arbitrary-shaped text instance. Recently, some methods have been proposed to tackle arbitrary-shaped text detection, but they rarely take the speed of the entire pipeline into consideration, which may fall short in practical this http URL this paper, we propose an efficient and accurate arbitrary-shaped text detector, termed Pixel Aggregation Network (PAN), which is equipped with a low computational-cost segmentation head and a learnable post-processing. More specifically, the segmentation head is made up of Feature Pyramid Enhancement Module (FPEM) and Feature Fusion Module (FFM). FPEM is a cascadable U-shaped module, which can introduce multi-level information to guide the better segmentation. FFM can gather the features given by the FPEMs of different depths into a final feature for segmentation. The learnable post-processing is implemented by Pixel Aggregation (PA), which can precisely aggregate text pixels by predicted similarity vectors. Experiments on several standard benchmarks validate the superiority of the proposed PAN. It is worth noting that our method can achieve a competitive F-measure of 79.9% at 84.2 FPS on CTW1500. + +
+ +
+ +## Results and models + +### CTW1500 + +| Method | Pretrained Model | Training set | Test set | #epochs | Test size | Recall | Precision | Hmean | Download | +| :-------------------------------------------------: | :--------------: | :-----------: | :----------: | :-----: | :-------: | :-----------: | :-----------: | :-----------: | :---------------------------------------------------: | +| [PANet](https://github.com/open-mmlab/mmocr/blob/main/configs/textdet/panet/panet_r18_fpem_ffm_600e_ctw1500.py) | ImageNet | CTW1500 Train | CTW1500 Test | 600 | 640 | 0.776 (0.717) | 0.838 (0.835) | 0.806 (0.801) | [model](https://download.openmmlab.com/mmocr/textdet/panet/panet_r18_fpem_ffm_sbn_600e_ctw1500_20210219-3b3a9aa3.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/panet/panet_r18_fpem_ffm_sbn_600e_ctw1500_20210219-3b3a9aa3.log.json) | + +### ICDAR2015 + +| Method | Pretrained Model | Training set | Test set | #epochs | Test size | Recall | Precision | Hmean | Download | +| :------------------------------------------------: | :--------------: | :-------------: | :------------: | :-----: | :-------: | :----------: | :----------: | :-----------: | :--------------------------------------------------: | +| [PANet](https://github.com/open-mmlab/mmocr/blob/main/configs/textdet/panet/panet_r18_fpem_ffm_600e_icdar2015.py) | ImageNet | ICDAR2015 Train | ICDAR2015 Test | 600 | 736 | 0.734 (0.74) | 0.856 (0.86) | 0.791 (0.795) | [model](https://download.openmmlab.com/mmocr/textdet/panet/panet_r18_fpem_ffm_sbn_600e_icdar2015_20210219-42dbe46a.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/panet/panet_r18_fpem_ffm_sbn_600e_icdar2015_20210219-42dbe46a.log.json) | + +```{note} +We've upgraded our IoU backend from `Polygon3` to `shapely`. There are some performance differences for some models due to the backends' different logics to handle invalid polygons (more info [here](https://github.com/open-mmlab/mmocr/issues/465)). **New evaluation result is presented in brackets** and new logs will be uploaded soon. +``` + +## Citation + +```bibtex +@inproceedings{WangXSZWLYS19, + author={Wenhai Wang and Enze Xie and Xiaoge Song and Yuhang Zang and Wenjia Wang and Tong Lu and Gang Yu and Chunhua Shen}, + title={Efficient and Accurate Arbitrary-Shaped Text Detection With Pixel Aggregation Network}, + booktitle={ICCV}, + pages={8439--8448}, + year={2019} + } +``` diff --git a/configs/textdet/panet/metafile.yml b/configs/textdet/panet/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..468c4126c2571ad9899a2a1ed7a9ef9a37f15533 --- /dev/null +++ b/configs/textdet/panet/metafile.yml @@ -0,0 +1,39 @@ +Collections: +- Name: PANet + Metadata: + Training Data: ICDAR SCUT-CTW1500 + Training Techniques: + - Adam + Training Resources: 8x GeForce GTX 1080 Ti + Architecture: + - ResNet + - FPEM_FFM + Paper: + URL: https://arxiv.org/pdf/1803.01534.pdf + Title: 'Path Aggregation Network for Instance Segmentation' + README: configs/textdet/panet/README.md + +Models: + - Name: panet_r18_fpem_ffm_600e_ctw1500 + In Collection: PANet + Config: configs/textdet/panet/panet_r18_fpem_ffm_600e_ctw1500.py + Metadata: + Training Data: CTW1500 + Results: + - Task: Text Detection + Dataset: CTW1500 + Metrics: + hmean-iou: 0.806 + Weights: https://download.openmmlab.com/mmocr/textdet/panet/panet_r18_fpem_ffm_sbn_600e_ctw1500_20210219-3b3a9aa3.pth + + - Name: panet_r18_fpem_ffm_600e_icdar2015 + In Collection: PANet + Config: configs/textdet/panet/panet_r18_fpem_ffm_600e_icdar2015.py + Metadata: + Training Data: ICDAR2015 + Results: + - Task: Text Detection + Dataset: ICDAR2015 + Metrics: + hmean-iou: 0.791 + Weights: https://download.openmmlab.com/mmocr/textdet/panet/panet_r18_fpem_ffm_sbn_600e_icdar2015_20210219-42dbe46a.pth diff --git a/configs/textdet/panet/panet_r18_fpem_ffm_600e_ctw1500.py b/configs/textdet/panet/panet_r18_fpem_ffm_600e_ctw1500.py new file mode 100644 index 0000000000000000000000000000000000000000..91d23af68417b0c589964f0908d4de60dfcfc4e4 --- /dev/null +++ b/configs/textdet/panet/panet_r18_fpem_ffm_600e_ctw1500.py @@ -0,0 +1,35 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_adam_600e.py', + '../../_base_/det_models/panet_r18_fpem_ffm.py', + '../../_base_/det_datasets/ctw1500.py', + '../../_base_/det_pipelines/panet_pipeline.py' +] + +model = {{_base_.model_poly}} + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline_ctw1500 = {{_base_.train_pipeline_ctw1500}} +test_pipeline_ctw1500 = {{_base_.test_pipeline_ctw1500}} + +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline_ctw1500), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_ctw1500), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_ctw1500)) + +evaluation = dict(interval=10, metric='hmean-iou') diff --git a/configs/textdet/panet/panet_r18_fpem_ffm_600e_icdar2015.py b/configs/textdet/panet/panet_r18_fpem_ffm_600e_icdar2015.py new file mode 100644 index 0000000000000000000000000000000000000000..1183974024cf33d814f635ddb1454895fbd3c02c --- /dev/null +++ b/configs/textdet/panet/panet_r18_fpem_ffm_600e_icdar2015.py @@ -0,0 +1,35 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_adam_600e.py', + '../../_base_/det_models/panet_r18_fpem_ffm.py', + '../../_base_/det_datasets/icdar2015.py', + '../../_base_/det_pipelines/panet_pipeline.py' +] + +model = {{_base_.model_quad}} + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline_icdar2015 = {{_base_.train_pipeline_icdar2015}} +test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}} + +data = dict( + samples_per_gpu=8, + workers_per_gpu=2, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline_icdar2015), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_icdar2015), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_icdar2015)) + +evaluation = dict(interval=10, metric='hmean-iou') diff --git a/configs/textdet/panet/panet_r50_fpem_ffm_600e_icdar2017.py b/configs/textdet/panet/panet_r50_fpem_ffm_600e_icdar2017.py new file mode 100644 index 0000000000000000000000000000000000000000..0e9768d4742e845a45bd343d70bd06f3cb0e4fcb --- /dev/null +++ b/configs/textdet/panet/panet_r50_fpem_ffm_600e_icdar2017.py @@ -0,0 +1,33 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_adam_600e.py', + '../../_base_/det_models/panet_r50_fpem_ffm.py', + '../../_base_/det_datasets/icdar2017.py', + '../../_base_/det_pipelines/panet_pipeline.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline_icdar2017 = {{_base_.train_pipeline_icdar2017}} +test_pipeline_icdar2017 = {{_base_.test_pipeline_icdar2017}} + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline_icdar2017), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_icdar2017), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_icdar2017)) + +evaluation = dict(interval=10, metric='hmean-iou') diff --git a/configs/textdet/psenet/README.md b/configs/textdet/psenet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..b4293a3ce823c5dd285fda86dbc47b41465129b3 --- /dev/null +++ b/configs/textdet/psenet/README.md @@ -0,0 +1,44 @@ +# PSENet + +> [Shape robust text detection with progressive scale expansion network](https://arxiv.org/abs/1903.12473) + + + +## Abstract + +Scene text detection has witnessed rapid progress especially with the recent development of convolutional neural networks. However, there still exists two challenges which prevent the algorithm into industry applications. On the one hand, most of the state-of-art algorithms require quadrangle bounding box which is in-accurate to locate the texts with arbitrary shape. On the other hand, two text instances which are close to each other may lead to a false detection which covers both instances. Traditionally, the segmentation-based approach can relieve the first problem but usually fail to solve the second challenge. To address these two challenges, in this paper, we propose a novel Progressive Scale Expansion Network (PSENet), which can precisely detect text instances with arbitrary shapes. More specifically, PSENet generates the different scale of kernels for each text instance, and gradually expands the minimal scale kernel to the text instance with the complete shape. Due to the fact that there are large geometrical margins among the minimal scale kernels, our method is effective to split the close text instances, making it easier to use segmentation-based methods to detect arbitrary-shaped text instances. Extensive experiments on CTW1500, Total-Text, ICDAR 2015 and ICDAR 2017 MLT validate the effectiveness of PSENet. Notably, on CTW1500, a dataset full of long curve texts, PSENet achieves a F-measure of 74.3% at 27 FPS, and our best F-measure (82.2%) outperforms state-of-art algorithms by 6.6%. The code will be released in the future. + +
+ +
+ +## Results and models + +### CTW1500 + +| Method | Backbone | Extra Data | Training set | Test set | #epochs | Test size | Recall | Precision | Hmean | Download | +| :------------------------------------------------: | :------: | :--------: | :-----------: | :----------: | :-----: | :-------: | :-----------: | :-----------: | :-----------: | :--------------------------------------------------: | +| [PSENet-4s](configs/textdet/psenet/psenet_r50_fpnf_600e_ctw1500.py) | ResNet50 | - | CTW1500 Train | CTW1500 Test | 600 | 1280 | 0.728 (0.717) | 0.849 (0.852) | 0.784 (0.779) | [model](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_r50_fpnf_600e_ctw1500_20210401-216fed50.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/psenet/20210401_215421.log.json) | + +### ICDAR2015 + +| Method | Backbone | Extra Data | Training set | Test set | #epochs | Test size | Recall | Precision | Hmean | Download | +| :----------------------------------: | :------: | :---------------------------------------: | :----------: | :-------: | :-----: | :-------: | :-----------: | :-----------: | :-----------: | :-------------------------------------: | +| [PSENet-4s](configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py) | ResNet50 | - | IC15 Train | IC15 Test | 600 | 2240 | 0.784 (0.753) | 0.831 (0.867) | 0.807 (0.806) | [model](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_r50_fpnf_600e_icdar2015-c6131f0d.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/psenet/20210331_214145.log.json) | +| [PSENet-4s](configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py) | ResNet50 | pretrain on IC17 MLT [model](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_r50_fpnf_600e_icdar2017_as_pretrain-3bd6056c.pth) | IC15 Train | IC15 Test | 600 | 2240 | 0.834 | 0.861 | 0.847 | [model](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_r50_fpnf_600e_icdar2015_pretrain-eefd8fe6.pth) \| [log](<>) | + +```{note} +We've upgraded our IoU backend from `Polygon3` to `shapely`. There are some performance differences for some models due to the backends' different logics to handle invalid polygons (more info [here](https://github.com/open-mmlab/mmocr/issues/465)). **New evaluation result is presented in brackets** and new logs will be uploaded soon. +``` + +## Citation + +```bibtex +@inproceedings{wang2019shape, + title={Shape robust text detection with progressive scale expansion network}, + author={Wang, Wenhai and Xie, Enze and Li, Xiang and Hou, Wenbo and Lu, Tong and Yu, Gang and Shao, Shuai}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + pages={9336--9345}, + year={2019} +} +``` diff --git a/configs/textdet/psenet/metafile.yml b/configs/textdet/psenet/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..40ec1dd0de63b8055f3399506355629ee32f2ab9 --- /dev/null +++ b/configs/textdet/psenet/metafile.yml @@ -0,0 +1,51 @@ +Collections: +- Name: PSENet + Metadata: + Training Data: ICDAR SCUT-CTW1500 + Training Techniques: + - Adam + Training Resources: 8x GeForce GTX 1080 Ti + Architecture: + - ResNet + - FPNF + Paper: + URL: https://arxiv.org/abs/1806.02559.pdf + Title: 'Shape Robust Text Detection with Progressive Scale Expansion Network' + README: configs/textdet/psenet/README.md + +Models: + - Name: psenet_r50_fpnf_600e_ctw1500 + In Collection: PSENet + Config: configs/textdet/psenet/psenet_r50_fpnf_600e_ctw1500.py + Metadata: + Training Data: CTW1500 + Results: + - Task: Text Detection + Dataset: CTW1500 + Metrics: + hmean-iou: 0.784 + Weights: https://download.openmmlab.com/mmocr/textdet/psenet/psenet_r50_fpnf_600e_ctw1500_20210401-216fed50.pth + + - Name: psenet_r50_fpnf_600e_icdar2015 + In Collection: PSENet + Config: configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py + Metadata: + Training Data: ICDAR2015 + Results: + - Task: Text Detection + Dataset: ICDAR2015 + Metrics: + hmean-iou: 0.807 + Weights: https://download.openmmlab.com/mmocr/textdet/psenet/psenet_r50_fpnf_600e_icdar2015-c6131f0d.pth + + - Name: psenet_r50_fpnf_600e_icdar2015 + In Collection: PSENet + Config: configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py + Metadata: + Training Data: ICDAR2017 ICDAR2015 + Results: + - Task: Text Detection + Dataset: ICDAR2017 ICDAR2015 + Metrics: + hmean-iou: 0.847 + Weights: https://download.openmmlab.com/mmocr/textdet/psenet/psenet_r50_fpnf_600e_icdar2015_pretrain-eefd8fe6.pth diff --git a/configs/textdet/psenet/psenet_r50_fpnf_600e_ctw1500.py b/configs/textdet/psenet/psenet_r50_fpnf_600e_ctw1500.py new file mode 100644 index 0000000000000000000000000000000000000000..483a2b2e1e7e584dfba26c7c5f506ce544953db8 --- /dev/null +++ b/configs/textdet/psenet/psenet_r50_fpnf_600e_ctw1500.py @@ -0,0 +1,35 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_adam_step_600e.py', + '../../_base_/det_models/psenet_r50_fpnf.py', + '../../_base_/det_datasets/ctw1500.py', + '../../_base_/det_pipelines/psenet_pipeline.py' +] + +model = {{_base_.model_poly}} + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline_ctw1500 = {{_base_.test_pipeline_ctw1500}} + +data = dict( + samples_per_gpu=2, + workers_per_gpu=2, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_ctw1500), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_ctw1500)) + +evaluation = dict(interval=10, metric='hmean-iou') diff --git a/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py b/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py new file mode 100644 index 0000000000000000000000000000000000000000..fbaacc19b19f6f8284eb65c7d2d2aa95e8051427 --- /dev/null +++ b/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py @@ -0,0 +1,35 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_adam_step_600e.py', + '../../_base_/det_models/psenet_r50_fpnf.py', + '../../_base_/det_datasets/icdar2015.py', + '../../_base_/det_pipelines/psenet_pipeline.py' +] + +model = {{_base_.model_quad}} + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}} + +data = dict( + samples_per_gpu=8, + workers_per_gpu=2, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_icdar2015), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_icdar2015)) + +evaluation = dict(interval=10, metric='hmean-iou') diff --git a/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2017.py b/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2017.py new file mode 100644 index 0000000000000000000000000000000000000000..ca3d1105b5e6bdc9e47afa21dd3bc0b7d2ebd8d7 --- /dev/null +++ b/configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2017.py @@ -0,0 +1,35 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_sgd_600e.py', + '../../_base_/det_models/psenet_r50_fpnf.py', + '../../_base_/det_datasets/icdar2017.py', + '../../_base_/det_pipelines/psenet_pipeline.py' +] + +model = {{_base_.model_quad}} + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}} + +data = dict( + samples_per_gpu=8, + workers_per_gpu=4, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_icdar2015), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline_icdar2015)) + +evaluation = dict(interval=10, metric='hmean-iou') diff --git a/configs/textdet/textsnake/README.md b/configs/textdet/textsnake/README.md new file mode 100644 index 0000000000000000000000000000000000000000..be7f3fe7bb15f5610669e937179adca7210039b8 --- /dev/null +++ b/configs/textdet/textsnake/README.md @@ -0,0 +1,33 @@ +# Textsnake + +> [TextSnake: A Flexible Representation for Detecting Text of Arbitrary Shapes](https://arxiv.org/abs/1807.01544) + + + +## Abstract + +Driven by deep neural networks and large scale datasets, scene text detection methods have progressed substantially over the past years, continuously refreshing the performance records on various standard benchmarks. However, limited by the representations (axis-aligned rectangles, rotated rectangles or quadrangles) adopted to describe text, existing methods may fall short when dealing with much more free-form text instances, such as curved text, which are actually very common in real-world scenarios. To tackle this problem, we propose a more flexible representation for scene text, termed as TextSnake, which is able to effectively represent text instances in horizontal, oriented and curved forms. In TextSnake, a text instance is described as a sequence of ordered, overlapping disks centered at symmetric axes, each of which is associated with potentially variable radius and orientation. Such geometry attributes are estimated via a Fully Convolutional Network (FCN) model. In experiments, the text detector based on TextSnake achieves state-of-the-art or comparable performance on Total-Text and SCUT-CTW1500, the two newly published benchmarks with special emphasis on curved text in natural images, as well as the widely-used datasets ICDAR 2015 and MSRA-TD500. Specifically, TextSnake outperforms the baseline on Total-Text by more than 40% in F-measure. + +
+ +
+ +## Results and models + +### CTW1500 + +| Method | Pretrained Model | Training set | Test set | #epochs | Test size | Recall | Precision | Hmean | Download | +| :----------------------------------------------------------: | :--------------: | :-----------: | :----------: | :-----: | :-------: | :----: | :-------: | :---: | :-------------------------------------------------------------: | +| [TextSnake](/configs/textdet/textsnake/textsnake_r50_fpn_unet_600e_ctw1500.py) | ImageNet | CTW1500 Train | CTW1500 Test | 1200 | 736 | 0.795 | 0.840 | 0.817 | [model](https://download.openmmlab.com/mmocr/textdet/textsnake/textsnake_r50_fpn_unet_1200e_ctw1500-27f65b64.pth) \| [log](<>) | + +## Citation + +```bibtex +@article{long2018textsnake, + title={TextSnake: A Flexible Representation for Detecting Text of Arbitrary Shapes}, + author={Long, Shangbang and Ruan, Jiaqiang and Zhang, Wenjie and He, Xin and Wu, Wenhao and Yao, Cong}, + booktitle={ECCV}, + pages={20-36}, + year={2018} +} +``` diff --git a/configs/textdet/textsnake/metafile.yml b/configs/textdet/textsnake/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..9be247b84304df68df199c61592972aaf0b30fc9 --- /dev/null +++ b/configs/textdet/textsnake/metafile.yml @@ -0,0 +1,27 @@ +Collections: +- Name: TextSnake + Metadata: + Training Data: SCUT-CTW1500 + Training Techniques: + - SGD with Momentum + Training Resources: 8x GeForce GTX 1080 Ti + Architecture: + - ResNet + - FPN_UNet + Paper: + URL: https://arxiv.org/abs/1807.01544.pdf + Title: 'TextSnake: A Flexible Representation for Detecting Text of Arbitrary Shapes' + README: configs/textdet/textsnake/README.md + +Models: + - Name: textsnake_r50_fpn_unet_1200e_ctw1500 + In Collection: TextSnake + Config: configs/textdet/textsnake/textsnake_r50_fpn_unet_1200e_ctw1500.py + Metadata: + Training Data: CTW1500 + Results: + - Task: Text Detection + Dataset: CTW1500 + Metrics: + hmean-iou: 0.817 + Weights: https://download.openmmlab.com/mmocr/textdet/textsnake/textsnake_r50_fpn_unet_1200e_ctw1500-27f65b64.pth diff --git a/configs/textdet/textsnake/textsnake_r50_fpn_unet_1200e_ctw1500.py b/configs/textdet/textsnake/textsnake_r50_fpn_unet_1200e_ctw1500.py new file mode 100644 index 0000000000000000000000000000000000000000..045e89a3bb1fa44ff33da1d2b8b32b42e396c58b --- /dev/null +++ b/configs/textdet/textsnake/textsnake_r50_fpn_unet_1200e_ctw1500.py @@ -0,0 +1,33 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_sgd_1200e.py', + '../../_base_/det_models/textsnake_r50_fpn_unet.py', + '../../_base_/det_datasets/ctw1500.py', + '../../_base_/det_pipelines/textsnake_pipeline.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +data = dict( + samples_per_gpu=4, + workers_per_gpu=4, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=10, metric='hmean-iou') diff --git a/configs/textrecog/abinet/README.md b/configs/textrecog/abinet/README.md new file mode 100644 index 0000000000000000000000000000000000000000..40d8fdb7c7b62490de46fc4c411c495b6f1c8588 --- /dev/null +++ b/configs/textrecog/abinet/README.md @@ -0,0 +1,59 @@ +# ABINet + +> [Read Like Humans: Autonomous, Bidirectional and Iterative Language Modeling for Scene Text Recognition](https://arxiv.org/abs/2103.06495) + + + +## Abstract + +Linguistic knowledge is of great benefit to scene text recognition. However, how to effectively model linguistic rules in end-to-end deep networks remains a research challenge. In this paper, we argue that the limited capacity of language models comes from: 1) implicitly language modeling; 2) unidirectional feature representation; and 3) language model with noise input. Correspondingly, we propose an autonomous, bidirectional and iterative ABINet for scene text recognition. Firstly, the autonomous suggests to block gradient flow between vision and language models to enforce explicitly language modeling. Secondly, a novel bidirectional cloze network (BCN) as the language model is proposed based on bidirectional feature representation. Thirdly, we propose an execution manner of iterative correction for language model which can effectively alleviate the impact of noise input. Additionally, based on the ensemble of iterative predictions, we propose a self-training method which can learn from unlabeled images effectively. Extensive experiments indicate that ABINet has superiority on low-quality images and achieves state-of-the-art results on several mainstream benchmarks. Besides, the ABINet trained with ensemble self-training shows promising improvement in realizing human-level recognition. + +
+ +
+ +## Dataset + +### Train Dataset + +| trainset | instance_num | repeat_num | note | +| :-------: | :----------: | :--------: | :----------: | +| Syn90k | 8919273 | 1 | synth | +| SynthText | 7239272 | 1 | alphanumeric | + +### Test Dataset + +| testset | instance_num | note | +| :-----: | :----------: | :-------: | +| IIIT5K | 3000 | regular | +| SVT | 647 | regular | +| IC13 | 1015 | regular | +| IC15 | 2077 | irregular | +| SVTP | 645 | irregular | +| CT80 | 288 | irregular | + +## Results and models + +| methods | pretrained | | Regular Text | | | Irregular Text | | download | +| :------------------------------------------------: | :----------------------------------------------------: | :----: | :----------: | :--: | :--: | :------------: | :--: | :--------------------------------------------------- | +| | | IIIT5K | SVT | IC13 | IC15 | SVTP | CT80 | | +| [ABINet-Vision](https://github.com/open-mmlab/mmocr/tree/master/configs/textrecog/abinet/abinet_vision_only_academic.py) | - | 94.7 | 91.7 | 93.6 | 83.0 | 85.1 | 86.5 | [model](https://download.openmmlab.com/mmocr/textrecog/abinet/abinet_vision_only_academic-e6b9ea89.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/abinet/20211201_195512.log) | +| [ABINet](https://github.com/open-mmlab/mmocr/tree/master/configs/textrecog/abinet/abinet_academic.py) | [Pretrained](https://download.openmmlab.com/mmocr/textrecog/abinet/abinet_pretrain-1bed979b.pth) | 95.7 | 94.6 | 95.7 | 85.1 | 90.4 | 90.3 | [model](https://download.openmmlab.com/mmocr/textrecog/abinet/abinet_academic-f718abf6.pth) \| [log1](https://download.openmmlab.com/mmocr/textrecog/abinet/20211210_095832.log) \| [log2](https://download.openmmlab.com/mmocr/textrecog/abinet/20211213_131724.log) | + +```{note} +1. ABINet allows its encoder to run and be trained without decoder and fuser. Its encoder is designed to recognize texts as a stand-alone model and therefore can work as an independent text recognizer. We release it as ABINet-Vision. +2. Facts about the pretrained model: MMOCR does not have a systematic pipeline to pretrain the language model (LM) yet, thus the weights of LM are converted from [the official pretrained model](https://github.com/FangShancheng/ABINet). The weights of ABINet-Vision are directly used as the vision model of ABINet. +3. Due to some technical issues, the training process of ABINet was interrupted at the 13th epoch and we resumed it later. Both logs are released for full reference. +4. The model architecture in the logs looks slightly different from the final released version, since it was refactored afterward. However, both architectures are essentially equivalent. +``` + +## Citation + +```bibtex +@article{fang2021read, + title={Read Like Humans: Autonomous, Bidirectional and Iterative Language Modeling for Scene Text Recognition}, + author={Fang, Shancheng and Xie, Hongtao and Wang, Yuxin and Mao, Zhendong and Zhang, Yongdong}, + booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, + year={2021} +} +``` diff --git a/configs/textrecog/abinet/abinet_academic.py b/configs/textrecog/abinet/abinet_academic.py new file mode 100644 index 0000000000000000000000000000000000000000..4abb87a6ee576a6c8a299d30baf4fee2ae56a1bf --- /dev/null +++ b/configs/textrecog/abinet/abinet_academic.py @@ -0,0 +1,35 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_adam_step_20e.py', + '../../_base_/recog_pipelines/abinet_pipeline.py', + '../../_base_/recog_models/abinet.py', + # '../../_base_/recog_datasets/ST_MJ_alphanumeric_train.py', + '../../_base_/recog_datasets/toy_data.py' + # '../../_base_/recog_datasets/academic_test.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +data = dict( + samples_per_gpu=192, + workers_per_gpu=8, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') diff --git a/configs/textrecog/abinet/abinet_vision_only_academic.py b/configs/textrecog/abinet/abinet_vision_only_academic.py new file mode 100644 index 0000000000000000000000000000000000000000..318144d2418c7e77568d4915d72f01882835ba94 --- /dev/null +++ b/configs/textrecog/abinet/abinet_vision_only_academic.py @@ -0,0 +1,81 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_adam_step_20e.py', + '../../_base_/recog_pipelines/abinet_pipeline.py', + '../../_base_/recog_datasets/toy_data.py' + # '../../_base_/recog_datasets/ST_MJ_alphanumeric_train.py', + # '../../_base_/recog_datasets/academic_test.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +# Model +num_chars = 37 +max_seq_len = 26 +label_convertor = dict( + type='ABIConvertor', + dict_type='DICT36', + with_unknown=False, + with_padding=False, + lower=True, +) + +model = dict( + type='ABINet', + backbone=dict(type='ResNetABI'), + encoder=dict( + type='ABIVisionModel', + encoder=dict( + type='TransformerEncoder', + n_layers=3, + n_head=8, + d_model=512, + d_inner=2048, + dropout=0.1, + max_len=8 * 32, + ), + decoder=dict( + type='ABIVisionDecoder', + in_channels=512, + num_channels=64, + attn_height=8, + attn_width=32, + attn_mode='nearest', + use_result='feature', + num_chars=num_chars, + max_seq_len=max_seq_len, + init_cfg=dict(type='Xavier', layer='Conv2d')), + ), + loss=dict( + type='ABILoss', + enc_weight=1.0, + dec_weight=1.0, + fusion_weight=1.0, + num_classes=num_chars), + label_convertor=label_convertor, + max_seq_len=max_seq_len, + iter_size=1) + +data = dict( + samples_per_gpu=192, + workers_per_gpu=8, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') diff --git a/configs/textrecog/abinet/metafile.yml b/configs/textrecog/abinet/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..14b5561019191aac73ad3bf63c5dc331f66972fe --- /dev/null +++ b/configs/textrecog/abinet/metafile.yml @@ -0,0 +1,87 @@ +Collections: +- Name: ABINet + Metadata: + Training Data: OCRDataset + Training Techniques: + - Adam + Epochs: 20 + Batch Size: 1536 + Training Resources: 8x Tesla V100 + Architecture: + - ResNetABI + - ABIVisionModel + - ABILanguageDecoder + - ABIFuser + Paper: + URL: https://arxiv.org/pdf/2103.06495.pdf + Title: 'Read Like Humans: Autonomous, Bidirectional and Iterative Language Modeling for Scene Text Recognition' + README: configs/textrecog/abinet/README.md + +Models: + - Name: abinet_vision_only_academic + In Collection: ABINet + Config: configs/textrecog/abinet/abinet_vision_only_academic.py + Metadata: + Training Data: + - SynthText + - Syn90k + Results: + - Task: Text Recognition + Dataset: IIIT5K + Metrics: + word_acc: 94.7 + - Task: Text Recognition + Dataset: SVT + Metrics: + word_acc: 91.7 + - Task: Text Recognition + Dataset: ICDAR2013 + Metrics: + word_acc: 93.6 + - Task: Text Recognition + Dataset: ICDAR2015 + Metrics: + word_acc: 83.0 + - Task: Text Recognition + Dataset: SVTP + Metrics: + word_acc: 85.1 + - Task: Text Recognition + Dataset: CT80 + Metrics: + word_acc: 86.5 + Weights: https://download.openmmlab.com/mmocr/textrecog/abinet/abinet_vision_only_academic-e6b9ea89.pth + + - Name: abinet_academic + In Collection: ABINet + Config: configs/textrecog/abinet/abinet_academic.py + Metadata: + Training Data: + - SynthText + - Syn90k + Results: + - Task: Text Recognition + Dataset: IIIT5K + Metrics: + word_acc: 95.7 + - Task: Text Recognition + Dataset: SVT + Metrics: + word_acc: 94.6 + - Task: Text Recognition + Dataset: ICDAR2013 + Metrics: + word_acc: 95.7 + - Task: Text Recognition + Dataset: ICDAR2015 + Metrics: + word_acc: 85.1 + - Task: Text Recognition + Dataset: SVTP + Metrics: + word_acc: 90.4 + - Task: Text Recognition + Dataset: CT80 + Metrics: + word_acc: 90.3 + Weights: https://download.openmmlab.com/mmocr/textrecog/abinet/abinet_academic-f718abf6.pth diff --git a/configs/textrecog/crnn/README.md b/configs/textrecog/crnn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..52232587e512eb53f16e652e3f3afd0a53686faf --- /dev/null +++ b/configs/textrecog/crnn/README.md @@ -0,0 +1,50 @@ +# CRNN + +> [An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition](https://arxiv.org/abs/1507.05717) + + + +## Abstract + +Image-based sequence recognition has been a long-standing research topic in computer vision. In this paper, we investigate the problem of scene text recognition, which is among the most important and challenging tasks in image-based sequence recognition. A novel neural network architecture, which integrates feature extraction, sequence modeling and transcription into a unified framework, is proposed. Compared with previous systems for scene text recognition, the proposed architecture possesses four distinctive properties: (1) It is end-to-end trainable, in contrast to most of the existing algorithms whose components are separately trained and tuned. (2) It naturally handles sequences in arbitrary lengths, involving no character segmentation or horizontal scale normalization. (3) It is not confined to any predefined lexicon and achieves remarkable performances in both lexicon-free and lexicon-based scene text recognition tasks. (4) It generates an effective yet much smaller model, which is more practical for real-world application scenarios. The experiments on standard benchmarks, including the IIIT-5K, Street View Text and ICDAR datasets, demonstrate the superiority of the proposed algorithm over the prior arts. Moreover, the proposed algorithm performs well in the task of image-based music score recognition, which evidently verifies the generality of it. + +
+ +
+ +## Dataset + +### Train Dataset + +| trainset | instance_num | repeat_num | note | +| :------: | :----------: | :--------: | :---: | +| Syn90k | 8919273 | 1 | synth | + +### Test Dataset + +| testset | instance_num | note | +| :-----: | :----------: | :-------: | +| IIIT5K | 3000 | regular | +| SVT | 647 | regular | +| IC13 | 1015 | regular | +| IC15 | 2077 | irregular | +| SVTP | 645 | irregular | +| CT80 | 288 | irregular | + +## Results and models + +| methods | | Regular Text | | | | Irregular Text | | download | +| :------------------------------------------------------: | :----: | :----------: | :--: | :-: | :--: | :------------: | :--: | :-----------------------------------------------------------------------------------------------: | +| methods | IIIT5K | SVT | IC13 | | IC15 | SVTP | CT80 | | +| [CRNN](/configs/textrecog/crnn/crnn_academic_dataset.py) | 80.5 | 81.5 | 86.5 | | 54.1 | 59.1 | 55.6 | [model](https://download.openmmlab.com/mmocr/textrecog/crnn/crnn_academic-a723a1c5.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/crnn/20210326_111035.log.json) | + +## Citation + +```bibtex +@article{shi2016end, + title={An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition}, + author={Shi, Baoguang and Bai, Xiang and Yao, Cong}, + journal={IEEE transactions on pattern analysis and machine intelligence}, + year={2016} +} +``` diff --git a/configs/textrecog/crnn/crnn_academic_dataset.py b/configs/textrecog/crnn/crnn_academic_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..b8288cb5a1cb48ddc6b32e988b45305e01e76df5 --- /dev/null +++ b/configs/textrecog/crnn/crnn_academic_dataset.py @@ -0,0 +1,35 @@ +_base_ = [ + '../../_base_/default_runtime.py', '../../_base_/recog_models/crnn.py', + '../../_base_/recog_pipelines/crnn_pipeline.py', + '../../_base_/recog_datasets/MJ_train.py', + '../../_base_/recog_datasets/academic_test.py', + '../../_base_/schedules/schedule_adadelta_5e.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +data = dict( + samples_per_gpu=64, + workers_per_gpu=4, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') + +cudnn_benchmark = True diff --git a/configs/textrecog/crnn/crnn_toy_dataset.py b/configs/textrecog/crnn/crnn_toy_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..f61c68afe285e4d1943cbcbb8ede1fe965a99a4b --- /dev/null +++ b/configs/textrecog/crnn/crnn_toy_dataset.py @@ -0,0 +1,47 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/recog_pipelines/crnn_pipeline.py', + '../../_base_/recog_datasets/toy_data.py', + '../../_base_/schedules/schedule_adadelta_5e.py' +] + +label_convertor = dict( + type='CTCConvertor', dict_type='DICT36', with_unknown=True, lower=True) + +model = dict( + type='CRNNNet', + preprocessor=None, + backbone=dict(type='VeryDeepVgg', leaky_relu=False, input_channels=1), + encoder=None, + decoder=dict(type='CRNNDecoder', in_channels=512, rnn_flag=True), + loss=dict(type='CTCLoss'), + label_convertor=label_convertor, + pretrained=None) + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +data = dict( + samples_per_gpu=32, + workers_per_gpu=2, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') + +cudnn_benchmark = True diff --git a/configs/textrecog/crnn/metafile.yml b/configs/textrecog/crnn/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..c7b058c6a27d8a627788d702bc4ee942713ad7db --- /dev/null +++ b/configs/textrecog/crnn/metafile.yml @@ -0,0 +1,37 @@ +Collections: +- Name: CRNN + Metadata: + Training Data: OCRDataset + Training Techniques: + - Adadelta + Epochs: 5 + Batch Size: 256 + Training Resources: 4x GeForce GTX 1080 Ti + Architecture: + - VeryDeepVgg + - CRNNDecoder + Paper: + URL: https://arxiv.org/pdf/1507.05717.pdf + Title: 'An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition' + README: configs/textrecog/crnn/README.md + +Models: + - Name: crnn_academic_dataset + In Collection: CRNN + Config: configs/textrecog/crnn/crnn_academic_dataset.py + Metadata: + Training Data: Syn90k + Results: + - Task: Text Recognition + Dataset: IIIT5K + Metrics: + word_acc: 80.5 + - Task: Text Recognition + Dataset: SVT + Metrics: + word_acc: 81.5 + - Task: Text Recognition + Dataset: ICDAR2013 + Metrics: + word_acc: 86.5 + Weights: https://download.openmmlab.com/mmocr/textrecog/crnn/crnn_academic-a723a1c5.pth diff --git a/configs/textrecog/master/README.md b/configs/textrecog/master/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ce89cc2911e26813c9d594b0a8dbab7f88db5d37 --- /dev/null +++ b/configs/textrecog/master/README.md @@ -0,0 +1,52 @@ +# MASTER + +> [MASTER: Multi-aspect non-local network for scene text recognition](https://arxiv.org/abs/1910.02562) + + + +## Abstract + +Attention-based scene text recognizers have gained huge success, which leverages a more compact intermediate representation to learn 1d- or 2d- attention by a RNN-based encoder-decoder architecture. However, such methods suffer from attention-drift problem because high similarity among encoded features leads to attention confusion under the RNN-based local attention mechanism. Moreover, RNN-based methods have low efficiency due to poor parallelization. To overcome these problems, we propose the MASTER, a self-attention based scene text recognizer that (1) not only encodes the input-output attention but also learns self-attention which encodes feature-feature and target-target relationships inside the encoder and decoder and (2) learns a more powerful and robust intermediate representation to spatial distortion, and (3) owns a great training efficiency because of high training parallelization and a high-speed inference because of an efficient memory-cache mechanism. Extensive experiments on various benchmarks demonstrate the superior performance of our MASTER on both regular and irregular scene text. + +
+ +
+ +## Dataset + +### Train Dataset + +| trainset | instance_num | repeat_num | source | +| :-------: | :----------: | :--------: | :----: | +| SynthText | 7266686 | 1 | synth | +| SynthAdd | 1216889 | 1 | synth | +| Syn90k | 8919273 | 1 | synth | + +### Test Dataset + +| testset | instance_num | type | +| :-----: | :----------: | :-------: | +| IIIT5K | 3000 | regular | +| SVT | 647 | regular | +| IC13 | 1015 | regular | +| IC15 | 2077 | irregular | +| SVTP | 645 | irregular | +| CT80 | 288 | irregular | + +## Results and Models + +| Methods | Backbone | | Regular Text | | | | Irregular Text | | download | +| :------------------------------------------------------------: | :-----------: | :----: | :----------: | :---: | :-: | :---: | :------------: | :---: | :-------------------------------------------------------------------------: | +| | | IIIT5K | SVT | IC13 | | IC15 | SVTP | CT80 | | +| [MASTER](/configs/textrecog/master/master_r31_12e_ST_MJ_SA.py) | R31-GCAModule | 95.27 | 89.8 | 95.17 | | 77.03 | 82.95 | 89.93 | [model](https://download.openmmlab.com/mmocr/textrecog/master/master_r31_12e_ST_MJ_SA-787edd36.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/master/master_r31_12e_ST_MJ_SA-787edd36.log.json) | + +## Citation + +```bibtex +@article{Lu2021MASTER, + title={{MASTER}: Multi-Aspect Non-local Network for Scene Text Recognition}, + author={Ning Lu and Wenwen Yu and Xianbiao Qi and Yihao Chen and Ping Gong and Rong Xiao and Xiang Bai}, + journal={Pattern Recognition}, + year={2021} +} +``` diff --git a/configs/textrecog/master/master_r31_12e_ST_MJ_SA.py b/configs/textrecog/master/master_r31_12e_ST_MJ_SA.py new file mode 100644 index 0000000000000000000000000000000000000000..88f4dbeae79584720134969a9ff1179e0352471d --- /dev/null +++ b/configs/textrecog/master/master_r31_12e_ST_MJ_SA.py @@ -0,0 +1,33 @@ +_base_ = [ + '../../_base_/default_runtime.py', '../../_base_/recog_models/master.py', + '../../_base_/schedules/schedule_adam_step_12e.py', + '../../_base_/recog_pipelines/master_pipeline.py', + '../../_base_/recog_datasets/ST_SA_MJ_train.py', + '../../_base_/recog_datasets/academic_test.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +data = dict( + samples_per_gpu=512, + workers_per_gpu=4, + val_dataloader=dict(samples_per_gpu=128), + test_dataloader=dict(samples_per_gpu=128), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') diff --git a/configs/textrecog/master/master_toy_dataset.py b/configs/textrecog/master/master_toy_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..3d0440240a28a2d64b2f0442cae7d628a7542f42 --- /dev/null +++ b/configs/textrecog/master/master_toy_dataset.py @@ -0,0 +1,30 @@ +_base_ = [ + '../../_base_/default_runtime.py', '../../_base_/recog_models/master.py', + '../../_base_/schedules/schedule_adam_step_12e.py', + '../../_base_/recog_pipelines/master_pipeline.py', + '../../_base_/recog_datasets/toy_data.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +data = dict( + workers_per_gpu=2, + samples_per_gpu=8, + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') diff --git a/configs/textrecog/master/metafile.yml b/configs/textrecog/master/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..1eb6b11e2835a2c9ca1f8f5f15a7509351871bcb --- /dev/null +++ b/configs/textrecog/master/metafile.yml @@ -0,0 +1,52 @@ +Collections: + - Name: MASTER + Metadata: + Training Data: OCRDataset + Training Techniques: + - Adam + Epochs: 12 + Batch Size: 512 + Training Resources: 4x Tesla A100 + Architecture: + - ResNet31-GCAModule + - MASTERDecoder + Paper: + URL: https://arxiv.org/abs/1910.02562 + Title: "MASTER: Multi-Aspect Non-local Network for Scene Text Recognition" + README: configs/textrecog/master/README.md + +Models: + - Name: master_academic + In Collection: MASTER + Config: configs/textrecog/master/master_r31_12e_ST_MJ_SA.py + Metadata: + Training Data: + - SynthText + - SynthAdd + - Syn90k + Results: + - Task: Text Recognition + Dataset: IIIT5K + Metrics: + word_acc: 95.27 + - Task: Text Recognition + Dataset: SVT + Metrics: + word_acc: 89.8 + - Task: Text Recognition + Dataset: ICDAR2013 + Metrics: + word_acc: 95.17 + - Task: Text Recognition + Dataset: ICDAR2015 + Metrics: + word_acc: 77.03 + - Task: Text Recognition + Dataset: SVTP + Metrics: + word_acc: 82.95 + - Task: Text Recognition + Dataset: CT80 + Metrics: + word_acc: 89.93 + Weights: https://download.openmmlab.com/mmocr/textrecog/master/master_r31_12e_ST_MJ_SA-787edd36.pth diff --git a/configs/textrecog/nrtr/README.md b/configs/textrecog/nrtr/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f64af8923d9b81493478fc458f93a19786abd0f7 --- /dev/null +++ b/configs/textrecog/nrtr/README.md @@ -0,0 +1,66 @@ +# NRTR + +> [NRTR: A No-Recurrence Sequence-to-Sequence Model For Scene Text Recognition](https://arxiv.org/abs/1806.00926) + + + +## Abstract + +Scene text recognition has attracted a great many researches due to its importance to various applications. Existing methods mainly adopt recurrence or convolution based networks. Though have obtained good performance, these methods still suffer from two limitations: slow training speed due to the internal recurrence of RNNs, and high complexity due to stacked convolutional layers for long-term feature extraction. This paper, for the first time, proposes a no-recurrence sequence-to-sequence text recognizer, named NRTR, that dispenses with recurrences and convolutions entirely. NRTR follows the encoder-decoder paradigm, where the encoder uses stacked self-attention to extract image features, and the decoder applies stacked self-attention to recognize texts based on encoder output. NRTR relies solely on self-attention mechanism thus could be trained with more parallelization and less complexity. Considering scene image has large variation in text and background, we further design a modality-transform block to effectively transform 2D input images to 1D sequences, combined with the encoder to extract more discriminative features. NRTR achieves state-of-the-art or highly competitive performance on both regular and irregular benchmarks, while requires only a small fraction of training time compared to the best model from the literature (at least 8 times faster). + +
+ +
+ +## Dataset + +### Train Dataset + +| trainset | instance_num | repeat_num | source | +| :-------: | :----------: | :--------: | :----: | +| SynthText | 7266686 | 1 | synth | +| Syn90k | 8919273 | 1 | synth | + +### Test Dataset + +| testset | instance_num | type | +| :-----: | :----------: | :-------: | +| IIIT5K | 3000 | regular | +| SVT | 647 | regular | +| IC13 | 1015 | regular | +| IC15 | 2077 | irregular | +| SVTP | 645 | irregular | +| CT80 | 288 | irregular | + +## Results and Models + +| Methods | Backbone | | Regular Text | | | | Irregular Text | | download | +| :-------------------------------------------------------------: | :----------: | :----: | :----------: | :--: | :-: | :--: | :------------: | :--: | :----------------------------------------------------------------------------: | +| | | IIIT5K | SVT | IC13 | | IC15 | SVTP | CT80 | | +| [NRTR](/configs/textrecog/nrtr/nrtr_r31_1by16_1by8_academic.py) | R31-1/16-1/8 | 94.7 | 87.3 | 94.3 | | 73.5 | 78.9 | 85.1 | [model](https://download.openmmlab.com/mmocr/textrecog/nrtr/nrtr_r31_1by16_1by8_academic_20211124-f60cebf4.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/nrtr/20211124_002420.log.json) | +| [NRTR](/configs/textrecog/nrtr/nrtr_r31_1by8_1by4_academic.py) | R31-1/8-1/4 | 95.2 | 90.0 | 94.0 | | 74.1 | 79.4 | 88.2 | [model](https://download.openmmlab.com/mmocr/textrecog/nrtr/nrtr_r31_1by8_1by4_academic_20211123-e1fdb322.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/nrtr/20211123_232151.log.json) | + +```{note} + +- For backbone `R31-1/16-1/8`: + - The output consists of 92 classes, including 26 lowercase letters, 26 uppercase letters, 28 symbols, 10 digital numbers, 1 unknown token and 1 end-of-sequence token. + - The encoder-block number is 6. + - `1/16-1/8` means the height of feature from backbone is 1/16 of input image, where 1/8 for width. +- For backbone `R31-1/8-1/4`: + - The output consists of 92 classes, including 26 lowercase letters, 26 uppercase letters, 28 symbols, 10 digital numbers, 1 unknown token and 1 end-of-sequence token. + - The encoder-block number is 6. + - `1/8-1/4` means the height of feature from backbone is 1/8 of input image, where 1/4 for width. +``` + +## Citation + +```bibtex +@inproceedings{sheng2019nrtr, + title={NRTR: A no-recurrence sequence-to-sequence model for scene text recognition}, + author={Sheng, Fenfen and Chen, Zhineng and Xu, Bo}, + booktitle={2019 International Conference on Document Analysis and Recognition (ICDAR)}, + pages={781--786}, + year={2019}, + organization={IEEE} +} +``` diff --git a/configs/textrecog/nrtr/metafile.yml b/configs/textrecog/nrtr/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..7d5ca150109386635eba9f3739891d2b58955634 --- /dev/null +++ b/configs/textrecog/nrtr/metafile.yml @@ -0,0 +1,86 @@ +Collections: +- Name: NRTR + Metadata: + Training Data: OCRDataset + Training Techniques: + - Adam + Epochs: 6 + Batch Size: 6144 + Training Resources: 48x GeForce GTX 1080 Ti + Architecture: + - CNN + - NRTREncoder + - NRTRDecoder + Paper: + URL: https://arxiv.org/pdf/1806.00926.pdf + Title: 'NRTR: A No-Recurrence Sequence-to-Sequence Model For Scene Text Recognition' + README: configs/textrecog/nrtr/README.md + +Models: + - Name: nrtr_r31_1by16_1by8_academic + In Collection: NRTR + Config: configs/textrecog/nrtr/nrtr_r31_1by16_1by8_academic.py + Metadata: + Training Data: + - SynthText + - Syn90k + Results: + - Task: Text Recognition + Dataset: IIIT5K + Metrics: + word_acc: 94.7 + - Task: Text Recognition + Dataset: SVT + Metrics: + word_acc: 87.3 + - Task: Text Recognition + Dataset: ICDAR2013 + Metrics: + word_acc: 94.3 + - Task: Text Recognition + Dataset: ICDAR2015 + Metrics: + word_acc: 73.5 + - Task: Text Recognition + Dataset: SVTP + Metrics: + word_acc: 78.9 + - Task: Text Recognition + Dataset: CT80 + Metrics: + word_acc: 85.1 + Weights: https://download.openmmlab.com/mmocr/textrecog/nrtr/nrtr_r31_1by16_1by8_academic_20211124-f60cebf4.pth + + - Name: nrtr_r31_1by8_1by4_academic + In Collection: NRTR + Config: configs/textrecog/nrtr/nrtr_r31_1by8_1by4_academic.py + Metadata: + Training Data: + - SynthText + - Syn90k + Results: + - Task: Text Recognition + Dataset: IIIT5K + Metrics: + word_acc: 95.2 + - Task: Text Recognition + Dataset: SVT + Metrics: + word_acc: 90.0 + - Task: Text Recognition + Dataset: ICDAR2013 + Metrics: + word_acc: 94.0 + - Task: Text Recognition + Dataset: ICDAR2015 + Metrics: + word_acc: 74.1 + - Task: Text Recognition + Dataset: SVTP + Metrics: + word_acc: 79.4 + - Task: Text Recognition + Dataset: CT80 + Metrics: + word_acc: 88.2 + Weights: https://download.openmmlab.com/mmocr/textrecog/nrtr/nrtr_r31_1by8_1by4_academic_20211123-e1fdb322.pth diff --git a/configs/textrecog/nrtr/nrtr_modality_transform_academic.py b/configs/textrecog/nrtr/nrtr_modality_transform_academic.py new file mode 100644 index 0000000000000000000000000000000000000000..471926ba998640123ff356c146dc8bbdb9b3c261 --- /dev/null +++ b/configs/textrecog/nrtr/nrtr_modality_transform_academic.py @@ -0,0 +1,32 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/recog_models/nrtr_modality_transform.py', + '../../_base_/schedules/schedule_adam_step_6e.py', + '../../_base_/recog_datasets/ST_MJ_train.py', + '../../_base_/recog_datasets/academic_test.py', + '../../_base_/recog_pipelines/nrtr_pipeline.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +data = dict( + samples_per_gpu=128, + workers_per_gpu=4, + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') diff --git a/configs/textrecog/nrtr/nrtr_modality_transform_toy_dataset.py b/configs/textrecog/nrtr/nrtr_modality_transform_toy_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..1bb350fc3f49418f2841df2d65f183c34e08db0e --- /dev/null +++ b/configs/textrecog/nrtr/nrtr_modality_transform_toy_dataset.py @@ -0,0 +1,31 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/recog_models/nrtr_modality_transform.py', + '../../_base_/schedules/schedule_adam_step_6e.py', + '../../_base_/recog_datasets/toy_data.py', + '../../_base_/recog_pipelines/nrtr_pipeline.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +data = dict( + samples_per_gpu=16, + workers_per_gpu=2, + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') diff --git a/configs/textrecog/nrtr/nrtr_r31_1by16_1by8_academic.py b/configs/textrecog/nrtr/nrtr_r31_1by16_1by8_academic.py new file mode 100644 index 0000000000000000000000000000000000000000..b7adc0d30cda5e5556821ff941d6e00dcd3b4ba7 --- /dev/null +++ b/configs/textrecog/nrtr/nrtr_r31_1by16_1by8_academic.py @@ -0,0 +1,48 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_adam_step_6e.py', + '../../_base_/recog_pipelines/nrtr_pipeline.py', + '../../_base_/recog_datasets/ST_MJ_train.py', + '../../_base_/recog_datasets/academic_test.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +label_convertor = dict( + type='AttnConvertor', dict_type='DICT90', with_unknown=True) + +model = dict( + type='NRTR', + backbone=dict( + type='ResNet31OCR', + layers=[1, 2, 5, 3], + channels=[32, 64, 128, 256, 512, 512], + stage4_pool_cfg=dict(kernel_size=(2, 1), stride=(2, 1)), + last_stage_pool=True), + encoder=dict(type='NRTREncoder'), + decoder=dict(type='NRTRDecoder'), + loss=dict(type='TFLoss'), + label_convertor=label_convertor, + max_seq_len=40) + +data = dict( + samples_per_gpu=128, + workers_per_gpu=4, + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') diff --git a/configs/textrecog/nrtr/nrtr_r31_1by8_1by4_academic.py b/configs/textrecog/nrtr/nrtr_r31_1by8_1by4_academic.py new file mode 100644 index 0000000000000000000000000000000000000000..397122b55ea57df647a6bb5097973e0eebf4979d --- /dev/null +++ b/configs/textrecog/nrtr/nrtr_r31_1by8_1by4_academic.py @@ -0,0 +1,48 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_adam_step_6e.py', + '../../_base_/recog_pipelines/nrtr_pipeline.py', + '../../_base_/recog_datasets/ST_MJ_train.py', + '../../_base_/recog_datasets/academic_test.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +label_convertor = dict( + type='AttnConvertor', dict_type='DICT90', with_unknown=True) + +model = dict( + type='NRTR', + backbone=dict( + type='ResNet31OCR', + layers=[1, 2, 5, 3], + channels=[32, 64, 128, 256, 512, 512], + stage4_pool_cfg=dict(kernel_size=(2, 1), stride=(2, 1)), + last_stage_pool=False), + encoder=dict(type='NRTREncoder'), + decoder=dict(type='NRTRDecoder'), + loss=dict(type='TFLoss'), + label_convertor=label_convertor, + max_seq_len=40) + +data = dict( + samples_per_gpu=64, + workers_per_gpu=4, + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') diff --git a/configs/textrecog/robust_scanner/README.md b/configs/textrecog/robust_scanner/README.md new file mode 100644 index 0000000000000000000000000000000000000000..165ef248c56640f55772ac5c1d2aae29e69d42e8 --- /dev/null +++ b/configs/textrecog/robust_scanner/README.md @@ -0,0 +1,61 @@ +# RobustScanner + +> [RobustScanner: Dynamically Enhancing Positional Clues for Robust Text Recognition](https://arxiv.org/abs/2007.07542) + + + +## Abstract + +The attention-based encoder-decoder framework has recently achieved impressive results for scene text recognition, and many variants have emerged with improvements in recognition quality. However, it performs poorly on contextless texts (e.g., random character sequences) which is unacceptable in most of real application scenarios. In this paper, we first deeply investigate the decoding process of the decoder. We empirically find that a representative character-level sequence decoder utilizes not only context information but also positional information. Contextual information, which the existing approaches heavily rely on, causes the problem of attention drift. To suppress such side-effect, we propose a novel position enhancement branch, and dynamically fuse its outputs with those of the decoder attention module for scene text recognition. Specifically, it contains a position aware module to enable the encoder to output feature vectors encoding their own spatial positions, and an attention module to estimate glimpses using the positional clue (i.e., the current decoding time step) only. The dynamic fusion is conducted for more robust feature via an element-wise gate mechanism. Theoretically, our proposed method, dubbed \\emph{RobustScanner}, decodes individual characters with dynamic ratio between context and positional clues, and utilizes more positional ones when the decoding sequences with scarce context, and thus is robust and practical. Empirically, it has achieved new state-of-the-art results on popular regular and irregular text recognition benchmarks while without much performance drop on contextless benchmarks, validating its robustness in both contextual and contextless application scenarios. + +
+ +
+ +## Dataset + +### Train Dataset + +| trainset | instance_num | repeat_num | source | +| :--------: | :----------: | :--------: | :------------------------: | +| icdar_2011 | 3567 | 20 | real | +| icdar_2013 | 848 | 20 | real | +| icdar2015 | 4468 | 20 | real | +| coco_text | 42142 | 20 | real | +| IIIT5K | 2000 | 20 | real | +| SynthText | 2400000 | 1 | synth | +| SynthAdd | 1216889 | 1 | synth, 1.6m in [\[1\]](#1) | +| Syn90k | 2400000 | 1 | synth | + +### Test Dataset + +| testset | instance_num | type | +| :-----: | :----------: | :---------------------------: | +| IIIT5K | 3000 | regular | +| SVT | 647 | regular | +| IC13 | 1015 | regular | +| IC15 | 2077 | irregular | +| SVTP | 645 | irregular, 639 in [\[1\]](#1) | +| CT80 | 288 | irregular | + +## Results and Models + +| Methods | GPUs | | Regular Text | | | | Irregular Text | | download | +| :------------------------------------------------------------------------: | :--: | :----: | :----------: | :--: | :-: | :--: | :------------: | :--: | :-------------------------------------------------------------------------: | +| | | IIIT5K | SVT | IC13 | | IC15 | SVTP | CT80 | | +| [RobustScanner](configs/textrecog/robust_scanner/robustscanner_r31_academic.py) | 16 | 95.1 | 89.2 | 93.1 | | 77.8 | 80.3 | 90.3 | [model](https://download.openmmlab.com/mmocr/textrecog/robustscanner/robustscanner_r31_academic-5f05874f.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/robustscanner/20210401_170932.log.json) | + +## References + +\[1\] Li, Hui and Wang, Peng and Shen, Chunhua and Zhang, Guyu. Show, attend and read: A simple and strong baseline for irregular text recognition. In AAAI 2019. + +## Citation + +```bibtex +@inproceedings{yue2020robustscanner, + title={RobustScanner: Dynamically Enhancing Positional Clues for Robust Text Recognition}, + author={Yue, Xiaoyu and Kuang, Zhanghui and Lin, Chenhao and Sun, Hongbin and Zhang, Wayne}, + booktitle={European Conference on Computer Vision}, + year={2020} +} +``` diff --git a/configs/textrecog/robust_scanner/metafile.yml b/configs/textrecog/robust_scanner/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..95892543d9bc81bf45b08aecdb4e139c90490100 --- /dev/null +++ b/configs/textrecog/robust_scanner/metafile.yml @@ -0,0 +1,58 @@ +Collections: +- Name: RobustScanner + Metadata: + Training Data: OCRDataset + Training Techniques: + - Adam + Epochs: 5 + Batch Size: 1024 + Training Resources: 16x GeForce GTX 1080 Ti + Architecture: + - ResNet31OCR + - ChannelReductionEncoder + - RobustScannerDecoder + Paper: + URL: https://arxiv.org/pdf/2007.07542.pdf + Title: 'RobustScanner: Dynamically Enhancing Positional Clues for Robust Text Recognition' + README: configs/textrecog/robust_scanner/README.md + +Models: + - Name: robustscanner_r31_academic + In Collection: RobustScanner + Config: configs/textrecog/robust_scanner/robustscanner_r31_academic.py + Metadata: + Training Data: + - ICDAR2011 + - ICDAR2013 + - ICDAR2015 + - COCO text + - IIIT5K + - SynthText + - SynthAdd + - Syn90k + Results: + - Task: Text Recognition + Dataset: IIIT5K + Metrics: + word_acc: 95.1 + - Task: Text Recognition + Dataset: SVT + Metrics: + word_acc: 89.2 + - Task: Text Recognition + Dataset: ICDAR2013 + Metrics: + word_acc: 93.1 + - Task: Text Recognition + Dataset: ICDAR2015 + Metrics: + word_acc: 77.8 + - Task: Text Recognition + Dataset: SVTP + Metrics: + word_acc: 80.3 + - Task: Text Recognition + Dataset: CT80 + Metrics: + word_acc: 90.3 + Weights: https://download.openmmlab.com/mmocr/textrecog/robustscanner/robustscanner_r31_academic-5f05874f.pth diff --git a/configs/textrecog/robust_scanner/robustscanner_r31_academic.py b/configs/textrecog/robust_scanner/robustscanner_r31_academic.py new file mode 100644 index 0000000000000000000000000000000000000000..65a980b61684dee9929b7800ee82b4461ed2fc40 --- /dev/null +++ b/configs/textrecog/robust_scanner/robustscanner_r31_academic.py @@ -0,0 +1,34 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/recog_models/robust_scanner.py', + '../../_base_/schedules/schedule_adam_step_5e.py', + '../../_base_/recog_pipelines/sar_pipeline.py', + '../../_base_/recog_datasets/ST_SA_MJ_real_train.py', + '../../_base_/recog_datasets/academic_test.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +data = dict( + samples_per_gpu=64, + workers_per_gpu=2, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') diff --git a/configs/textrecog/sar/README.md b/configs/textrecog/sar/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f7046aea44e5a6e36267bda38379eedbf6441319 --- /dev/null +++ b/configs/textrecog/sar/README.md @@ -0,0 +1,82 @@ +# SAR + +> [Show, Attend and Read: A Simple and Strong Baseline for Irregular Text Recognition](https://arxiv.org/abs/1811.00751) + + + +## Abstract + +Recognizing irregular text in natural scene images is challenging due to the large variance in text appearance, such as curvature, orientation and distortion. Most existing approaches rely heavily on sophisticated model designs and/or extra fine-grained annotations, which, to some extent, increase the difficulty in algorithm implementation and data collection. In this work, we propose an easy-to-implement strong baseline for irregular scene text recognition, using off-the-shelf neural network components and only word-level annotations. It is composed of a 31-layer ResNet, an LSTM-based encoder-decoder framework and a 2-dimensional attention module. Despite its simplicity, the proposed method is robust and achieves state-of-the-art performance on both regular and irregular scene text recognition benchmarks. + +
+ +
+ +## Dataset + +### Train Dataset + +| trainset | instance_num | repeat_num | source | +| :--------: | :----------: | :--------: | :------------------------: | +| icdar_2011 | 3567 | 20 | real | +| icdar_2013 | 848 | 20 | real | +| icdar2015 | 4468 | 20 | real | +| coco_text | 42142 | 20 | real | +| IIIT5K | 2000 | 20 | real | +| SynthText | 2400000 | 1 | synth | +| SynthAdd | 1216889 | 1 | synth, 1.6m in [\[1\]](#1) | +| Syn90k | 2400000 | 1 | synth | + +### Test Dataset + +| testset | instance_num | type | +| :-----: | :----------: | :---------------------------: | +| IIIT5K | 3000 | regular | +| SVT | 647 | regular | +| IC13 | 1015 | regular | +| IC15 | 2077 | irregular | +| SVTP | 645 | irregular, 639 in [\[1\]](#1) | +| CT80 | 288 | irregular | + +## Results and Models + +| Methods | Backbone | Decoder | | Regular Text | | | | Irregular Text | | download | +| :----------------------------------------------------------: | :---------: | :------------------: | :----: | :----------: | :--: | :-: | :--: | :------------: | :--: | :------------------------------------------------------------: | +| | | | IIIT5K | SVT | IC13 | | IC15 | SVTP | CT80 | | +| [SAR](/configs/textrecog/sar/sar_r31_parallel_decoder_academic.py) | R31-1/8-1/4 | ParallelSARDecoder | 95.0 | 89.6 | 93.7 | | 79.0 | 82.2 | 88.9 | [model](https://download.openmmlab.com/mmocr/textrecog/sar/sar_r31_parallel_decoder_academic-dba3a4a3.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/sar/20210327_154129.log.json) | +| [SAR](configs/textrecog/sar/sar_r31_sequential_decoder_academic.py) | R31-1/8-1/4 | SequentialSARDecoder | 95.2 | 88.7 | 92.4 | | 78.2 | 81.9 | 89.6 | [model](https://download.openmmlab.com/mmocr/textrecog/sar/sar_r31_sequential_decoder_academic-d06c9a8e.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/sar/20210330_105728.log.json) | + +## Chinese Dataset + +## Results and Models + +| Methods | Backbone | Decoder | | download | +| :---------------------------------------------------------------: | :---------: | :----------------: | :-: | :-----------------------------------------------------------------------------------------------------: | +| [SAR](/configs/textrecog/sar/sar_r31_parallel_decoder_chinese.py) | R31-1/8-1/4 | ParallelSARDecoder | | [model](https://download.openmmlab.com/mmocr/textrecog/sar/sar_r31_parallel_decoder_chineseocr_20210507-b4be8214.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/sar/20210506_225557.log.json) \| [dict](https://download.openmmlab.com/mmocr/textrecog/sar/dict_printed_chinese_english_digits.txt) | + +```{note} + +- `R31-1/8-1/4` means the height of feature from backbone is 1/8 of input image, where 1/4 for width. +- We did not use beam search during decoding. +- We implemented two kinds of decoder. Namely, `ParallelSARDecoder` and `SequentialSARDecoder`. + - `ParallelSARDecoder`: Parallel decoding during training with `LSTM` layer. It would be faster. + - `SequentialSARDecoder`: Sequential Decoding during training with `LSTMCell`. It would be easier to understand. +- For train dataset. + - We did not construct distinct data groups (20 groups in [[1]](#1)) to train the model group-by-group since it would render model training too complicated. + - Instead, we randomly selected `2.4m` patches from `Syn90k`, `2.4m` from `SynthText` and `1.2m` from `SynthAdd`, and grouped all data together. See [config](https://download.openmmlab.com/mmocr/textrecog/sar/sar_r31_academic.py) for details. +- We used 48 GPUs with `total_batch_size = 64 * 48` in the experiment above to speedup training, while keeping the `initial lr = 1e-3` unchanged. +``` + +## Citation + +```bibtex +@inproceedings{li2019show, + title={Show, attend and read: A simple and strong baseline for irregular text recognition}, + author={Li, Hui and Wang, Peng and Shen, Chunhua and Zhang, Guyu}, + booktitle={Proceedings of the AAAI Conference on Artificial Intelligence}, + volume={33}, + number={01}, + pages={8610--8617}, + year={2019} +} +``` diff --git a/configs/textrecog/sar/metafile.yml b/configs/textrecog/sar/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..9f4115817efefb8b5f9c9bbdcebdaf33411febea --- /dev/null +++ b/configs/textrecog/sar/metafile.yml @@ -0,0 +1,98 @@ +Collections: +- Name: SAR + Metadata: + Training Data: OCRDataset + Training Techniques: + - Adam + Training Resources: 48x GeForce GTX 1080 Ti + Epochs: 5 + Batch Size: 3072 + Architecture: + - ResNet31OCR + - SAREncoder + - ParallelSARDecoder + Paper: + URL: https://arxiv.org/pdf/1811.00751.pdf + Title: 'Show, Attend and Read:A Simple and Strong Baseline for Irregular Text Recognition' + README: configs/textrecog/sar/README.md + +Models: + - Name: sar_r31_parallel_decoder_academic + In Collection: SAR + Config: configs/textrecog/sar/sar_r31_parallel_decoder_academic.py + Metadata: + Training Data: + - ICDAR2011 + - ICDAR2013 + - ICDAR2015 + - COCO text + - IIIT5K + - SynthText + - SynthAdd + - Syn90k + Results: + - Task: Text Recognition + Dataset: IIIT5K + Metrics: + word_acc: 95.0 + - Task: Text Recognition + Dataset: SVT + Metrics: + word_acc: 89.6 + - Task: Text Recognition + Dataset: ICDAR2013 + Metrics: + word_acc: 93.7 + - Task: Text Recognition + Dataset: ICDAR2015 + Metrics: + word_acc: 79.0 + - Task: Text Recognition + Dataset: SVTP + Metrics: + word_acc: 82.2 + - Task: Text Recognition + Dataset: CT80 + Metrics: + word_acc: 88.9 + Weights: https://download.openmmlab.com/mmocr/textrecog/sar/sar_r31_parallel_decoder_academic-dba3a4a3.pth + + - Name: sar_r31_sequential_decoder_academic + In Collection: SAR + Config: configs/textrecog/sar/sar_r31_sequential_decoder_academic.py + Metadata: + Training Data: + - ICDAR2011 + - ICDAR2013 + - ICDAR2015 + - COCO text + - IIIT5K + - SynthText + - SynthAdd + - Syn90k + Results: + - Task: Text Recognition + Dataset: IIIT5K + Metrics: + word_acc: 95.2 + - Task: Text Recognition + Dataset: SVT + Metrics: + word_acc: 88.7 + - Task: Text Recognition + Dataset: ICDAR2013 + Metrics: + word_acc: 92.4 + - Task: Text Recognition + Dataset: ICDAR2015 + Metrics: + word_acc: 78.2 + - Task: Text Recognition + Dataset: SVTP + Metrics: + word_acc: 81.9 + - Task: Text Recognition + Dataset: CT80 + Metrics: + word_acc: 89.6 + Weights: https://download.openmmlab.com/mmocr/textrecog/sar/sar_r31_sequential_decoder_academic-d06c9a8e.pth diff --git a/configs/textrecog/sar/sar_r31_parallel_decoder_academic.py b/configs/textrecog/sar/sar_r31_parallel_decoder_academic.py new file mode 100644 index 0000000000000000000000000000000000000000..983378118b4d589f531a7f401a06d238966a45d4 --- /dev/null +++ b/configs/textrecog/sar/sar_r31_parallel_decoder_academic.py @@ -0,0 +1,33 @@ +_base_ = [ + '../../_base_/default_runtime.py', '../../_base_/recog_models/sar.py', + '../../_base_/schedules/schedule_adam_step_5e.py', + '../../_base_/recog_pipelines/sar_pipeline.py', + '../../_base_/recog_datasets/ST_SA_MJ_real_train.py', + '../../_base_/recog_datasets/academic_test.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +data = dict( + samples_per_gpu=64, + workers_per_gpu=2, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') diff --git a/configs/textrecog/sar/sar_r31_parallel_decoder_chinese.py b/configs/textrecog/sar/sar_r31_parallel_decoder_chinese.py new file mode 100644 index 0000000000000000000000000000000000000000..58856312705bcc757550ca84f97a097f80f9be24 --- /dev/null +++ b/configs/textrecog/sar/sar_r31_parallel_decoder_chinese.py @@ -0,0 +1,128 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_adam_step_5e.py' +] + +dict_file = 'data/chineseocr/labels/dict_printed_chinese_english_digits.txt' +label_convertor = dict( + type='AttnConvertor', dict_file=dict_file, with_unknown=True) + +model = dict( + type='SARNet', + backbone=dict(type='ResNet31OCR'), + encoder=dict( + type='SAREncoder', + enc_bi_rnn=False, + enc_do_rnn=0.1, + enc_gru=False, + ), + decoder=dict( + type='ParallelSARDecoder', + enc_bi_rnn=False, + dec_bi_rnn=False, + dec_do_rnn=0, + dec_gru=False, + pred_dropout=0.1, + d_k=512, + pred_concat=True), + loss=dict(type='SARLoss'), + label_convertor=label_convertor, + max_seq_len=30) + +img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='ResizeOCR', + height=48, + min_width=48, + max_width=256, + keep_aspect_ratio=True, + width_downsample_ratio=0.25), + dict(type='ToTensorOCR'), + dict(type='NormalizeOCR', **img_norm_cfg), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'ori_shape', 'resize_shape', 'text', 'valid_ratio' + ]), +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict( + type='MultiRotateAugOCR', + rotate_degrees=[0, 90, 270], + transforms=[ + dict( + type='ResizeOCR', + height=48, + min_width=48, + max_width=256, + keep_aspect_ratio=True, + width_downsample_ratio=0.25), + dict(type='ToTensorOCR'), + dict(type='NormalizeOCR', **img_norm_cfg), + dict( + type='Collect', + keys=['img'], + meta_keys=[ + 'filename', 'ori_shape', 'resize_shape', 'valid_ratio' + ]), + ]) +] + +dataset_type = 'OCRDataset' + +train_prefix = 'data/chinese/' + +train_ann_file = train_prefix + 'labels/train.txt' + +train = dict( + type=dataset_type, + img_prefix=train_prefix, + ann_file=train_ann_file, + loader=dict( + type='HardDiskLoader', + repeat=1, + parser=dict( + type='LineStrParser', + keys=['filename', 'text'], + keys_idx=[0, 1], + separator=' ')), + pipeline=None, + test_mode=False) + +test_prefix = 'data/chineseocr/' + +test_ann_file = test_prefix + 'labels/test.txt' + +test = dict( + type=dataset_type, + img_prefix=test_prefix, + ann_file=test_ann_file, + loader=dict( + type='HardDiskLoader', + repeat=1, + parser=dict( + type='LineStrParser', + keys=['filename', 'text'], + keys_idx=[0, 1], + separator=' ')), + pipeline=None, + test_mode=False) + +data = dict( + samples_per_gpu=40, + workers_per_gpu=2, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', datasets=[train], + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', datasets=[test], pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', datasets=[test], pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') diff --git a/configs/textrecog/sar/sar_r31_parallel_decoder_toy_dataset.py b/configs/textrecog/sar/sar_r31_parallel_decoder_toy_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..40688d1290080c010beccc271214e5b246b45a32 --- /dev/null +++ b/configs/textrecog/sar/sar_r31_parallel_decoder_toy_dataset.py @@ -0,0 +1,30 @@ +_base_ = [ + '../../_base_/default_runtime.py', '../../_base_/recog_models/sar.py', + '../../_base_/schedules/schedule_adam_step_5e.py', + '../../_base_/recog_pipelines/sar_pipeline.py', + '../../_base_/recog_datasets/toy_data.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +data = dict( + workers_per_gpu=2, + samples_per_gpu=8, + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') diff --git a/configs/textrecog/sar/sar_r31_sequential_decoder_academic.py b/configs/textrecog/sar/sar_r31_sequential_decoder_academic.py new file mode 100644 index 0000000000000000000000000000000000000000..46ca259b3abb8863348f8eef71b0126f77e269eb --- /dev/null +++ b/configs/textrecog/sar/sar_r31_sequential_decoder_academic.py @@ -0,0 +1,58 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/schedules/schedule_adam_step_5e.py', + '../../_base_/recog_pipelines/sar_pipeline.py', + '../../_base_/recog_datasets/ST_SA_MJ_real_train.py', + '../../_base_/recog_datasets/academic_test.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +label_convertor = dict( + type='AttnConvertor', dict_type='DICT90', with_unknown=True) + +model = dict( + type='SARNet', + backbone=dict(type='ResNet31OCR'), + encoder=dict( + type='SAREncoder', + enc_bi_rnn=False, + enc_do_rnn=0.1, + enc_gru=False, + ), + decoder=dict( + type='SequentialSARDecoder', + enc_bi_rnn=False, + dec_bi_rnn=False, + dec_do_rnn=0, + dec_gru=False, + pred_dropout=0.1, + d_k=512, + pred_concat=True), + loss=dict(type='SARLoss'), + label_convertor=label_convertor, + max_seq_len=30) + +data = dict( + samples_per_gpu=64, + workers_per_gpu=2, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') diff --git a/configs/textrecog/satrn/README.md b/configs/textrecog/satrn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4bb92f3fc9d37d0c1a9563769b645d20fc598eb2 --- /dev/null +++ b/configs/textrecog/satrn/README.md @@ -0,0 +1,51 @@ +# SATRN + +> [On Recognizing Texts of Arbitrary Shapes with 2D Self-Attention](https://arxiv.org/abs/1910.04396) + + + +## Abstract + +Scene text recognition (STR) is the task of recognizing character sequences in natural scenes. While there have been great advances in STR methods, current methods still fail to recognize texts in arbitrary shapes, such as heavily curved or rotated texts, which are abundant in daily life (e.g. restaurant signs, product labels, company logos, etc). This paper introduces a novel architecture to recognizing texts of arbitrary shapes, named Self-Attention Text Recognition Network (SATRN), which is inspired by the Transformer. SATRN utilizes the self-attention mechanism to describe two-dimensional (2D) spatial dependencies of characters in a scene text image. Exploiting the full-graph propagation of self-attention, SATRN can recognize texts with arbitrary arrangements and large inter-character spacing. As a result, SATRN outperforms existing STR models by a large margin of 5.7 pp on average in "irregular text" benchmarks. We provide empirical analyses that illustrate the inner mechanisms and the extent to which the model is applicable (e.g. rotated and multi-line text). We will open-source the code. + +
+ +
+ +## Dataset + +### Train Dataset + +| trainset | instance_num | repeat_num | source | +| :-------: | :----------: | :--------: | :----: | +| SynthText | 7266686 | 1 | synth | +| Syn90k | 8919273 | 1 | synth | + +### Test Dataset + +| testset | instance_num | type | +| :-----: | :----------: | :-------: | +| IIIT5K | 3000 | regular | +| SVT | 647 | regular | +| IC13 | 1015 | regular | +| IC15 | 2077 | irregular | +| SVTP | 645 | irregular | +| CT80 | 288 | irregular | + +## Results and Models + +| Methods | | Regular Text | | | | Irregular Text | | download | +| :----------------------------------------------------: | :----: | :----------: | :--: | :-: | :--: | :------------: | :--: | :-------------------------------------------------------------------------------------------------: | +| | IIIT5K | SVT | IC13 | | IC15 | SVTP | CT80 | | +| [Satrn](/configs/textrecog/satrn/satrn_academic.py) | 96.1 | 93.5 | 95.7 | | 84.1 | 88.5 | 90.3 | [model](https://download.openmmlab.com/mmocr/textrecog/satrn/satrn_academic_20211009-cb8b1580.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/satrn/20210809_093244.log.json) | +| [Satrn_small](/configs/textrecog/satrn/satrn_small.py) | 94.7 | 91.3 | 95.4 | | 81.9 | 85.9 | 86.5 | [model](https://download.openmmlab.com/mmocr/textrecog/satrn/satrn_small_20211009-2cf13355.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/satrn/20210811_053047.log.json) | + +## Citation + +```bibtex +@article{junyeop2019recognizing, + title={On Recognizing Texts of Arbitrary Shapes with 2D Self-Attention}, + author={Junyeop Lee, Sungrae Park, Jeonghun Baek, Seong Joon Oh, Seonghyeon Kim, Hwalsuk Lee}, + year={2019} +} +``` diff --git a/configs/textrecog/satrn/metafile.yml b/configs/textrecog/satrn/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..5dd03fe550617330589c2880d88734a1fb3a4b3a --- /dev/null +++ b/configs/textrecog/satrn/metafile.yml @@ -0,0 +1,86 @@ +Collections: +- Name: SATRN + Metadata: + Training Data: OCRDataset + Training Techniques: + - Adam + Training Resources: 8x Tesla V100 + Epochs: 6 + Batch Size: 512 + Architecture: + - ShallowCNN + - SatrnEncoder + - TFDecoder + Paper: + URL: https://arxiv.org/pdf/1910.04396.pdf + Title: 'On Recognizing Texts of Arbitrary Shapes with 2D Self-Attention' + README: configs/textrecog/satrn/README.md + +Models: + - Name: satrn_academic + In Collection: SATRN + Config: configs/textrecog/satrn/satrn_academic.py + Metadata: + Training Data: + - SynthText + - Syn90k + Results: + - Task: Text Recognition + Dataset: IIIT5K + Metrics: + word_acc: 96.1 + - Task: Text Recognition + Dataset: SVT + Metrics: + word_acc: 93.5 + - Task: Text Recognition + Dataset: ICDAR2013 + Metrics: + word_acc: 95.7 + - Task: Text Recognition + Dataset: ICDAR2015 + Metrics: + word_acc: 84.1 + - Task: Text Recognition + Dataset: SVTP + Metrics: + word_acc: 88.5 + - Task: Text Recognition + Dataset: CT80 + Metrics: + word_acc: 90.3 + Weights: https://download.openmmlab.com/mmocr/textrecog/satrn/satrn_academic_20211009-cb8b1580.pth + + - Name: satrn_small + In Collection: SATRN + Config: configs/textrecog/satrn/satrn_small.py + Metadata: + Training Data: + - SynthText + - Syn90k + Results: + - Task: Text Recognition + Dataset: IIIT5K + Metrics: + word_acc: 94.7 + - Task: Text Recognition + Dataset: SVT + Metrics: + word_acc: 91.3 + - Task: Text Recognition + Dataset: ICDAR2013 + Metrics: + word_acc: 95.4 + - Task: Text Recognition + Dataset: ICDAR2015 + Metrics: + word_acc: 81.9 + - Task: Text Recognition + Dataset: SVTP + Metrics: + word_acc: 85.9 + - Task: Text Recognition + Dataset: CT80 + Metrics: + word_acc: 86.5 + Weights: https://download.openmmlab.com/mmocr/textrecog/satrn/satrn_small_20211009-2cf13355.pth diff --git a/configs/textrecog/satrn/satrn_academic.py b/configs/textrecog/satrn/satrn_academic.py new file mode 100644 index 0000000000000000000000000000000000000000..00a664e2093f4b4c5cbf77708813c66761428814 --- /dev/null +++ b/configs/textrecog/satrn/satrn_academic.py @@ -0,0 +1,68 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/recog_pipelines/satrn_pipeline.py', + '../../_base_/recog_datasets/ST_MJ_train.py', + '../../_base_/recog_datasets/academic_test.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +label_convertor = dict( + type='AttnConvertor', dict_type='DICT90', with_unknown=True) + +model = dict( + type='SATRN', + backbone=dict(type='ShallowCNN', input_channels=3, hidden_dim=512), + encoder=dict( + type='SatrnEncoder', + n_layers=12, + n_head=8, + d_k=512 // 8, + d_v=512 // 8, + d_model=512, + n_position=100, + d_inner=512 * 4, + dropout=0.1), + decoder=dict( + type='NRTRDecoder', + n_layers=6, + d_embedding=512, + n_head=8, + d_model=512, + d_inner=512 * 4, + d_k=512 // 8, + d_v=512 // 8), + loss=dict(type='TFLoss'), + label_convertor=label_convertor, + max_seq_len=25) + +# optimizer +optimizer = dict(type='Adam', lr=3e-4) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy='step', step=[3, 4]) +total_epochs = 6 + +data = dict( + samples_per_gpu=64, + workers_per_gpu=4, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') diff --git a/configs/textrecog/satrn/satrn_small.py b/configs/textrecog/satrn/satrn_small.py new file mode 100644 index 0000000000000000000000000000000000000000..96f86797f4700fd6ab9590fa983323f3e22d15c2 --- /dev/null +++ b/configs/textrecog/satrn/satrn_small.py @@ -0,0 +1,68 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/recog_pipelines/satrn_pipeline.py', + '../../_base_/recog_datasets/ST_MJ_train.py', + '../../_base_/recog_datasets/academic_test.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +label_convertor = dict( + type='AttnConvertor', dict_type='DICT90', with_unknown=True) + +model = dict( + type='SATRN', + backbone=dict(type='ShallowCNN', input_channels=3, hidden_dim=256), + encoder=dict( + type='SatrnEncoder', + n_layers=6, + n_head=8, + d_k=256 // 8, + d_v=256 // 8, + d_model=256, + n_position=100, + d_inner=256 * 4, + dropout=0.1), + decoder=dict( + type='NRTRDecoder', + n_layers=6, + d_embedding=256, + n_head=8, + d_model=256, + d_inner=256 * 4, + d_k=256 // 8, + d_v=256 // 8), + loss=dict(type='TFLoss'), + label_convertor=label_convertor, + max_seq_len=25) + +# optimizer +optimizer = dict(type='Adam', lr=3e-4) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy='step', step=[3, 4]) +total_epochs = 6 + +data = dict( + samples_per_gpu=64, + workers_per_gpu=4, + val_dataloader=dict(samples_per_gpu=1), + test_dataloader=dict(samples_per_gpu=1), + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') diff --git a/configs/textrecog/seg/README.md b/configs/textrecog/seg/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f8ab29e61727e3fa648c2aa090fcae8076bbf5e2 --- /dev/null +++ b/configs/textrecog/seg/README.md @@ -0,0 +1,48 @@ +# SegOCR + + + +## Abstract + +Just a simple Seg-based baseline for text recognition tasks. + +## Dataset + +### Train Dataset + +| trainset | instance_num | repeat_num | source | +| :-------: | :----------: | :--------: | :----: | +| SynthText | 7266686 | 1 | synth | + +### Test Dataset + +| testset | instance_num | type | +| :-----: | :----------: | :-------: | +| IIIT5K | 3000 | regular | +| SVT | 647 | regular | +| IC13 | 1015 | regular | +| CT80 | 288 | irregular | + +## Results and Models + +| Backbone | Neck | Head | | | Regular Text | | | Irregular Text | download | +| :------: | :----: | :--: | :-: | :----: | :----------: | :--: | :-: | :------------: | :------------------------------------------------------------------------------------------------------------------------------------------: | +| | | | | IIIT5K | SVT | IC13 | | CT80 | | +| R31-1/16 | FPNOCR | 1x | | 90.9 | 81.8 | 90.7 | | 80.9 | [model](https://download.openmmlab.com/mmocr/textrecog/seg/seg_r31_1by16_fpnocr_academic-72235b11.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/seg/20210325_112835.log.json) | + +```{note} + +- `R31-1/16` means the size (both height and width ) of feature from backbone is 1/16 of input image. +- `1x` means the size (both height and width) of feature from head is the same with input image. +``` + +## Citation + +```bibtex +@unpublished{key, + title={SegOCR Simple Baseline.}, + author={}, + note={Unpublished Manuscript}, + year={2021} +} +``` diff --git a/configs/textrecog/seg/metafile.yml b/configs/textrecog/seg/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..937747f41dcdce01e297ab44d9a9ee9189073fd9 --- /dev/null +++ b/configs/textrecog/seg/metafile.yml @@ -0,0 +1,39 @@ +Collections: +- Name: SegOCR + Metadata: + Training Data: mixture + Training Techniques: + - Adam + Epochs: 5 + Batch Size: 64 + Training Resources: 4x GeForce GTX 1080 Ti + Architecture: + - ResNet31OCR + - FPNOCR + Paper: + README: configs/textrecog/seg/README.md + +Models: + - Name: seg_r31_1by16_fpnocr_academic + In Collection: SegOCR + Config: configs/textrecog/seg/seg_r31_1by16_fpnocr_academic.py + Metadata: + Training Data: SynthText + Results: + - Task: Text Recognition + Dataset: IIIT5K + Metrics: + word_acc: 90.9 + - Task: Text Recognition + Dataset: SVT + Metrics: + word_acc: 81.8 + - Task: Text Recognition + Dataset: ICDAR2013 + Metrics: + word_acc: 90.7 + - Task: Text Recognition + Dataset: CT80 + Metrics: + word_acc: 80.9 + Weights: https://download.openmmlab.com/mmocr/textrecog/seg/seg_r31_1by16_fpnocr_academic-72235b11.pth diff --git a/configs/textrecog/seg/seg_r31_1by16_fpnocr_academic.py b/configs/textrecog/seg/seg_r31_1by16_fpnocr_academic.py new file mode 100644 index 0000000000000000000000000000000000000000..4e37856c06fb43cb0b67a6a1760bd7ef9eeddb66 --- /dev/null +++ b/configs/textrecog/seg/seg_r31_1by16_fpnocr_academic.py @@ -0,0 +1,40 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/recog_pipelines/seg_pipeline.py', + '../../_base_/recog_models/seg.py', + '../../_base_/recog_datasets/ST_charbox_train.py', + '../../_base_/recog_datasets/academic_test.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +# optimizer +optimizer = dict(type='Adam', lr=1e-4) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy='step', step=[3, 4]) +total_epochs = 5 + +find_unused_parameters = True + +data = dict( + samples_per_gpu=16, + workers_per_gpu=2, + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') diff --git a/configs/textrecog/seg/seg_r31_1by16_fpnocr_toy_dataset.py b/configs/textrecog/seg/seg_r31_1by16_fpnocr_toy_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..893bebba496c04e9364bdcea3caef651e3d426d0 --- /dev/null +++ b/configs/textrecog/seg/seg_r31_1by16_fpnocr_toy_dataset.py @@ -0,0 +1,39 @@ +_base_ = [ + '../../_base_/default_runtime.py', + '../../_base_/recog_datasets/seg_toy_data.py', + '../../_base_/recog_models/seg.py', + '../../_base_/recog_pipelines/seg_pipeline.py', +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +# optimizer +optimizer = dict(type='Adam', lr=1e-4) +optimizer_config = dict(grad_clip=None) +# learning policy +lr_config = dict(policy='step', step=[3, 4]) +total_epochs = 5 + +data = dict( + samples_per_gpu=8, + workers_per_gpu=1, + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') + +find_unused_parameters = True diff --git a/configs/textrecog/tps/README.md b/configs/textrecog/tps/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0066fb154bf7f2fa26a3ac00acaddb2ed4d30f03 --- /dev/null +++ b/configs/textrecog/tps/README.md @@ -0,0 +1,52 @@ +# CRNN-STN + + + +## Abstract + +Image-based sequence recognition has been a long-standing research topic in computer vision. In this paper, we investigate the problem of scene text recognition, which is among the most important and challenging tasks in image-based sequence recognition. A novel neural network architecture, which integrates feature extraction, sequence modeling and transcription into a unified framework, is proposed. Compared with previous systems for scene text recognition, the proposed architecture possesses four distinctive properties: (1) It is end-to-end trainable, in contrast to most of the existing algorithms whose components are separately trained and tuned. (2) It naturally handles sequences in arbitrary lengths, involving no character segmentation or horizontal scale normalization. (3) It is not confined to any predefined lexicon and achieves remarkable performances in both lexicon-free and lexicon-based scene text recognition tasks. (4) It generates an effective yet much smaller model, which is more practical for real-world application scenarios. The experiments on standard benchmarks, including the IIIT-5K, Street View Text and ICDAR datasets, demonstrate the superiority of the proposed algorithm over the prior arts. Moreover, the proposed algorithm performs well in the task of image-based music score recognition, which evidently verifies the generality of it. + +
+ +
+ +```{note} +We use STN from this paper as the preprocessor and CRNN as the recognition network. +``` + +## Dataset + +### Train Dataset + +| trainset | instance_num | repeat_num | note | +| :------: | :----------: | :--------: | :---: | +| Syn90k | 8919273 | 1 | synth | + +### Test Dataset + +| testset | instance_num | note | +| :-----: | :----------: | :-------: | +| IIIT5K | 3000 | regular | +| SVT | 647 | regular | +| IC13 | 1015 | regular | +| IC15 | 2077 | irregular | +| SVTP | 645 | irregular | +| CT80 | 288 | irregular | + +## Results and models + +| methods | | Regular Text | | | | Irregular Text | | download | +| :-------------------------------------------------------------: | :----: | :----------: | :--: | :-: | :--: | :------------: | :--: | :----------------------------------------------------------------------------------------: | +| | IIIT5K | SVT | IC13 | | IC15 | SVTP | CT80 | | +| [CRNN-STN](/configs/textrecog/tps/crnn_tps_academic_dataset.py) | 80.8 | 81.3 | 85.0 | | 59.6 | 68.1 | 53.8 | [model](https://download.openmmlab.com/mmocr/textrecog/tps/crnn_tps_academic_dataset_20210510-d221a905.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/tps/20210510_204353.log.json) | + +## Citation + +```bibtex +@article{shi2016robust, + title={Robust Scene Text Recognition with Automatic Rectification}, + author={Shi, Baoguang and Wang, Xinggang and Lyu, Pengyuan and Yao, + Cong and Bai, Xiang}, + year={2016} +} +``` diff --git a/configs/textrecog/tps/crnn_tps_academic_dataset.py b/configs/textrecog/tps/crnn_tps_academic_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..15607538d0c31de2e4baadf0b30d781f534b99bb --- /dev/null +++ b/configs/textrecog/tps/crnn_tps_academic_dataset.py @@ -0,0 +1,33 @@ +_base_ = [ + '../../_base_/default_runtime.py', '../../_base_/recog_models/crnn_tps.py', + '../../_base_/recog_pipelines/crnn_tps_pipeline.py', + '../../_base_/recog_datasets/MJ_train.py', + '../../_base_/recog_datasets/academic_test.py', + '../../_base_/schedules/schedule_adadelta_5e.py' +] + +train_list = {{_base_.train_list}} +test_list = {{_base_.test_list}} + +train_pipeline = {{_base_.train_pipeline}} +test_pipeline = {{_base_.test_pipeline}} + +data = dict( + samples_per_gpu=64, + workers_per_gpu=4, + train=dict( + type='UniformConcatDataset', + datasets=train_list, + pipeline=train_pipeline), + val=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline), + test=dict( + type='UniformConcatDataset', + datasets=test_list, + pipeline=test_pipeline)) + +evaluation = dict(interval=1, metric='acc') + +cudnn_benchmark = True diff --git a/configs/textrecog/tps/metafile.yml b/configs/textrecog/tps/metafile.yml new file mode 100644 index 0000000000000000000000000000000000000000..afd9be9c2789f05547ba31dae165ccedb709e43f --- /dev/null +++ b/configs/textrecog/tps/metafile.yml @@ -0,0 +1,51 @@ +Collections: +- Name: TPS-CRNN + Metadata: + Training Data: OCRDataset + Training Techniques: + - Adadelta + Epochs: 5 + Batch Size: 256 + Training Resources: 4x GeForce GTX 1080 Ti + Architecture: + - TPSPreprocessor + - VeryDeepVgg + - CRNNDecoder + - CTCLoss + Paper: + URL: https://arxiv.org/pdf/1603.03915.pdf + Title: 'Robust Scene Text Recognition with Automatic Rectification' + README: configs/textrecog/tps/README.md + +Models: + - Name: crnn_tps_academic_dataset + In Collection: TPS-CRNN + Config: configs/textrecog/tps/crnn_tps_academic_dataset.py + Metadata: + Training Data: Syn90k + Results: + - Task: Text Recognition + Dataset: IIIT5K + Metrics: + word_acc: 80.8 + - Task: Text Recognition + Dataset: SVT + Metrics: + word_acc: 81.3 + - Task: Text Recognition + Dataset: ICDAR2013 + Metrics: + word_acc: 85.0 + - Task: Text Recognition + Dataset: ICDAR2015 + Metrics: + word_acc: 59.6 + - Task: Text Recognition + Dataset: SVTP + Metrics: + word_acc: 68.1 + - Task: Text Recognition + Dataset: CT80 + Metrics: + word_acc: 53.8 + Weights: https://download.openmmlab.com/mmocr/textrecog/tps/crnn_tps_academic_dataset_20210510-d221a905.pth diff --git a/data/chineseocr/labels/dict_printed_chinese_english_digits.txt b/data/chineseocr/labels/dict_printed_chinese_english_digits.txt new file mode 100644 index 0000000000000000000000000000000000000000..2fb5f7326d863a2eefb67777df6becfb15671275 --- /dev/null +++ b/data/chineseocr/labels/dict_printed_chinese_english_digits.txt @@ -0,0 +1,11377 @@ +! +" +# +$ +% +& +' +( +) +* ++ +, +- +. +/ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +: +; +< += +> +? +@ +A +B +C +D +E +F +G +H +I +J +K +L +M +N +O +P +Q +R +S +T +U +V +W +X +Y +Z +[ +\ +] +^ +_ +` +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +v +w +x +y +z +{ +| +} +~ +® +° +± +³ +´ +· +Â +Ä +Ç +È +É +Ê +Ô +× +Ü +ß +à +ä +è +é +ì +ò +ó +ô +ö +÷ +ü +ō +ɑ +˙ +Ω +β +δ +ο +Д +з +ـ +– +— +― +‖ +‘ +’ +“ +” +• +‥ +… +‰ +′ +※ +€ +℃ +™ +Ⅰ +Ⅱ +Ⅲ +Ⅳ +Ⅴ +Ⅵ +→ +∅ +∈ +− +√ +∞ +∶ +≠ +≤ +① +─ +━ +┌ +■ +□ +▪ +▲ +△ +▶ +▸ +▼ +▽ +◆ +◇ +○ +◎ +◥ +★ +☆ +☑ +♀ +♥ +♪ +♭ +✕ +❤ +、 +。 +々 +〇 +〈 +〉 +《 +》 +「 +」 +『 +』 +【 +】 +〔 +〕 +〖 +〗 +〜 +ぁ +あ +ぃ +い +ぅ +う +ぇ +え +お +か +が +き +ぎ +く +ぐ +け +げ +こ +ご +さ +ざ +し +じ +す +ず +せ +ぜ +そ +ぞ +た +だ +ち +ぢ +っ +つ +づ +て +で +と +ど +な +に +ぬ +ね +の +は +ば +ぱ +ひ +び +ぴ +ふ +ぶ +ぷ +へ +べ +ぺ +ほ +ぼ +ぽ +ま +み +む +め +も +ゃ +や +ゅ +ゆ +ょ +よ +ら +り +る +れ +ろ +わ +ゑ +を +ん +゜ +ァ +ア +ィ +イ +ゥ +ウ +ェ +エ +ォ +オ +カ +ガ +キ +ギ +ク +グ +ケ +ゲ +コ +ゴ +サ +ザ +シ +ジ +ス +ズ +セ +ゼ +ソ +ゾ +タ +ダ +チ +ッ +ツ +ヅ +テ +デ +ト +ド +ナ +ニ +ヌ +ネ +ノ +ハ +バ +パ +ヒ +ビ +ピ +フ +ブ +プ +ヘ +ベ +ペ +ホ +ボ +ポ +マ +ミ +ム +メ +モ +ャ +ヤ +ュ +ユ +ョ +ヨ +ラ +リ +ル +レ +ロ +ワ +ヱ +ヲ +ン +ヴ +ヵ +ヶ +ー +㎝ +㎡ +㓥 +㔉 +㖞 +㧑 +㶉 +㺍 +䁖 +䇲 +䌷 +䌸 +䌹 +䌽 +䍁 +䓕 +䗖 +䜣 +䝙 +䠙 +䯄 +䴕 +䴖 +䴗 +䴙 +一 +丁 +七 +万 +丈 +三 +上 +下 +不 +与 +丐 +丑 +专 +且 +丕 +世 +丘 +丙 +业 +丛 +东 +丝 +丞 +両 +丢 +丣 +两 +严 +丧 +丨 +个 +丫 +丬 +中 +丰 +丱 +串 +临 +丶 +丸 +丹 +为 +主 +丼 +丽 +举 +丿 +乂 +乃 +乄 +久 +么 +义 +之 +乌 +乍 +乎 +乏 +乐 +乒 +乓 +乔 +乖 +乗 +乘 +乙 +乜 +九 +乞 +也 +习 +乡 +书 +乩 +买 +乱 +乳 +乸 +乾 +亀 +亁 +亂 +了 +予 +争 +事 +二 +亍 +于 +亏 +云 +互 +亓 +五 +井 +亘 +亚 +些 +亜 +亟 +亡 +亢 +交 +亥 +亦 +产 +亨 +亩 +享 +京 +亭 +亮 +亲 +亳 +亵 +亶 +亸 +亹 +人 +亻 +亿 +什 +仁 +仂 +仃 +仄 +仅 +仆 +仇 +仉 +今 +介 +仍 +从 +仏 +仑 +仓 +仔 +仕 +他 +仗 +付 +仙 +仝 +仞 +仟 +仠 +仡 +代 +令 +以 +仨 +仪 +仫 +们 +仭 +仮 +仰 +仱 +仲 +仳 +仵 +件 +价 +任 +份 +仿 +企 +伃 +伈 +伉 +伊 +伋 +伍 +伎 +伏 +伐 +休 +伓 +伖 +众 +优 +伙 +会 +伛 +伝 +伞 +伟 +传 +伢 +伣 +伤 +伥 +伦 +伧 +伩 +伪 +伫 +伬 +伯 +估 +伱 +伲 +伴 +伶 +伷 +伸 +伺 +伻 +似 +伽 +伾 +佃 +但 +佈 +佉 +佌 +位 +低 +住 +佐 +佑 +体 +佔 +何 +佗 +佘 +余 +佚 +佛 +作 +佝 +佞 +佟 +你 +佢 +佣 +佤 +佥 +佧 +佩 +佬 +佮 +佯 +佰 +佳 +佴 +併 +佶 +佷 +佸 +佹 +佺 +佻 +佼 +佽 +佾 +使 +侁 +侂 +侃 +侄 +來 +侈 +侉 +例 +侍 +侏 +侑 +侔 +侗 +侘 +供 +侜 +依 +侠 +価 +侣 +侥 +侦 +侧 +侨 +侩 +侪 +侬 +侭 +侮 +侯 +侲 +侵 +侹 +侼 +便 +俀 +係 +促 +俄 +俅 +俆 +俉 +俊 +俎 +俏 +俐 +俑 +俗 +俘 +俙 +俚 +俛 +俜 +保 +俞 +俟 +信 +俣 +俤 +俦 +俧 +俨 +俩 +俪 +俫 +俬 +俭 +修 +俯 +俱 +俳 +俴 +俵 +俶 +俷 +俸 +俺 +俾 +倅 +倉 +個 +倌 +倍 +倏 +倐 +們 +倒 +倓 +倔 +倕 +倖 +倘 +候 +倚 +倛 +倜 +倞 +借 +倡 +倢 +値 +倥 +倦 +倨 +倩 +倪 +倫 +倬 +倭 +倮 +倳 +倴 +倶 +倷 +倸 +倹 +债 +值 +倾 +偀 +偁 +偃 +偄 +假 +偈 +偌 +偎 +偏 +偓 +偕 +偘 +做 +停 +偢 +健 +偪 +偬 +偭 +偰 +偱 +偲 +側 +偶 +偷 +偻 +偾 +偿 +傀 +傃 +傅 +傈 +傍 +傎 +傒 +傕 +備 +傜 +傞 +傣 +傥 +傦 +傧 +储 +傩 +傪 +傫 +催 +傯 +傲 +傳 +債 +傷 +傺 +傻 +傾 +僁 +僄 +僆 +僇 +僊 +働 +僎 +像 +僔 +僖 +僚 +僛 +僜 +僝 +僡 +僤 +僦 +僧 +僪 +僬 +僭 +僮 +僰 +僱 +僳 +僴 +僵 +僶 +僸 +價 +僻 +僽 +僾 +僿 +儀 +儁 +儆 +儇 +儋 +儌 +儏 +儒 +儓 +儗 +儙 +儛 +償 +儡 +儣 +儦 +儲 +儴 +儵 +儽 +儾 +儿 +兀 +允 +元 +兄 +充 +兆 +先 +光 +克 +兌 +免 +児 +兑 +兒 +兔 +兕 +兖 +党 +兜 +兟 +兢 +入 +內 +全 +兩 +八 +公 +六 +兮 +兰 +共 +关 +兴 +兵 +其 +具 +典 +兹 +养 +兼 +兽 +兿 +冀 +冁 +内 +円 +冇 +冈 +冉 +冊 +册 +再 +冏 +冑 +冒 +冓 +冔 +冕 +冗 +写 +冚 +军 +农 +冞 +冠 +冢 +冤 +冥 +冧 +冨 +冬 +冯 +冰 +冱 +冲 +决 +冴 +况 +冶 +冷 +冻 +冼 +冽 +冿 +净 +凃 +凄 +准 +凇 +凈 +凉 +凊 +凋 +凌 +减 +凑 +凘 +凛 +凝 +几 +凡 +凤 +処 +凧 +凪 +凫 +凭 +凯 +凰 +凱 +凳 +凶 +凸 +凹 +出 +击 +凼 +函 +凿 +刀 +刁 +刃 +分 +切 +刈 +刊 +刌 +刍 +刎 +刑 +划 +刓 +刖 +列 +刘 +则 +刚 +创 +刜 +初 +删 +判 +別 +刨 +利 +别 +刬 +刭 +刮 +到 +刲 +刳 +刵 +制 +刷 +券 +刹 +刺 +刻 +刽 +刿 +剀 +剁 +剂 +剃 +剅 +則 +剉 +削 +剌 +前 +剎 +剐 +剑 +剔 +剖 +剚 +剜 +剞 +剟 +剡 +剣 +剤 +剥 +剧 +剨 +剩 +剪 +副 +剰 +割 +剸 +剺 +剽 +剿 +劂 +劄 +劈 +劓 +劖 +劘 +劙 +劚 +力 +劝 +办 +功 +加 +务 +劢 +劣 +动 +助 +努 +劫 +劬 +劭 +励 +劲 +劳 +労 +劵 +劷 +効 +劻 +劼 +劾 +势 +勃 +勇 +勉 +勋 +勍 +勐 +勑 +勒 +勔 +動 +勖 +勘 +務 +勚 +勝 +募 +勢 +勤 +勧 +勩 +勮 +勰 +勲 +勷 +勸 +勺 +勼 +勾 +勿 +匀 +匁 +匂 +匄 +包 +匆 +匈 +匊 +匌 +匍 +匏 +匐 +匕 +化 +北 +匙 +匜 +匝 +匠 +匡 +匣 +匤 +匦 +匪 +匮 +匯 +匳 +匹 +区 +医 +匼 +匽 +匾 +匿 +區 +十 +千 +卅 +升 +午 +卉 +半 +卍 +华 +协 +卑 +卒 +卓 +協 +单 +卖 +南 +単 +博 +卜 +卞 +卟 +占 +卡 +卢 +卣 +卤 +卦 +卧 +卨 +卫 +卬 +卭 +卮 +卯 +印 +危 +卲 +即 +却 +卵 +卷 +卸 +卺 +卻 +卼 +卿 +厂 +厄 +厅 +历 +厉 +压 +厌 +厍 +厎 +厐 +厓 +厔 +厕 +厖 +厘 +厚 +厝 +原 +厢 +厣 +厥 +厦 +厨 +厩 +厭 +厮 +厲 +厳 +厹 +去 +厾 +县 +叁 +参 +叄 +叆 +叇 +又 +叉 +及 +友 +双 +反 +収 +发 +叔 +取 +受 +变 +叙 +叛 +叞 +叟 +叠 +叡 +口 +古 +句 +另 +叨 +叩 +只 +叫 +召 +叭 +叮 +可 +台 +叱 +史 +右 +叵 +叶 +号 +司 +叹 +叻 +叼 +叽 +吁 +吃 +各 +吆 +合 +吉 +吊 +吋 +同 +名 +后 +吏 +吐 +向 +吒 +吓 +吔 +吕 +吖 +吗 +君 +吝 +吞 +吟 +吠 +吡 +否 +吧 +吨 +吩 +吪 +含 +听 +吭 +吮 +启 +吰 +吱 +吲 +吳 +吴 +吵 +吷 +吸 +吹 +吺 +吻 +吼 +吽 +吾 +吿 +呀 +呃 +呆 +呈 +呉 +告 +呋 +呎 +呐 +呒 +呓 +呔 +呕 +呖 +呗 +员 +呙 +呚 +呛 +呜 +呢 +呣 +呤 +呥 +呦 +周 +呪 +呫 +呰 +呱 +呲 +味 +呴 +呵 +呶 +呷 +呸 +呺 +呻 +呼 +命 +呿 +咀 +咁 +咂 +咄 +咆 +咇 +咈 +咉 +咋 +和 +咍 +咎 +咏 +咐 +咑 +咒 +咔 +咕 +咖 +咗 +咘 +咙 +咚 +咛 +咝 +咢 +咣 +咤 +咥 +咦 +咧 +咨 +咩 +咪 +咫 +咬 +咭 +咮 +咯 +咱 +咲 +咳 +咶 +咷 +咸 +咺 +咻 +咽 +咾 +咿 +哀 +品 +哂 +哃 +哄 +哆 +哇 +哈 +哉 +哋 +哌 +响 +哎 +哏 +哐 +哑 +哒 +哓 +哔 +哕 +哗 +哙 +哚 +哜 +哝 +哞 +哟 +員 +哢 +哣 +哤 +哥 +哦 +哧 +哨 +哩 +哪 +哭 +哮 +哲 +哳 +哺 +哼 +哽 +哿 +唁 +唅 +唆 +唇 +唈 +唉 +唊 +唎 +唏 +唐 +唑 +唔 +唛 +唝 +唞 +唠 +唡 +唢 +唣 +唤 +唥 +唦 +唧 +唪 +唫 +唬 +唭 +售 +唯 +唰 +唱 +唲 +唳 +唵 +唶 +唷 +唸 +唻 +唼 +唽 +唾 +唿 +啀 +啁 +啃 +啄 +啅 +商 +啈 +啉 +啊 +啋 +啍 +問 +啐 +啑 +啒 +啕 +啖 +啗 +啛 +啜 +啝 +啟 +啡 +啤 +啥 +啦 +啧 +啩 +啪 +啫 +啬 +啭 +啮 +啰 +啱 +啲 +啳 +啴 +啵 +啶 +啷 +啸 +啻 +啼 +啾 +喀 +喁 +喂 +喃 +善 +喆 +喇 +喈 +喉 +喊 +喋 +喏 +喐 +喑 +喓 +喔 +喘 +喙 +喜 +喝 +喞 +喟 +喢 +喣 +喤 +喦 +喧 +喨 +喪 +喫 +喬 +喭 +單 +喯 +喰 +喱 +喳 +喵 +営 +喷 +喹 +喺 +喻 +喼 +喽 +喾 +嗄 +嗅 +嗈 +嗉 +嗋 +嗌 +嗍 +嗐 +嗑 +嗒 +嗓 +嗔 +嗖 +嗗 +嗙 +嗛 +嗜 +嗝 +嗞 +嗟 +嗡 +嗢 +嗣 +嗤 +嗥 +嗦 +嗨 +嗪 +嗫 +嗬 +嗮 +嗯 +嗰 +嗱 +嗲 +嗳 +嗵 +嗷 +嗻 +嗽 +嗾 +嗿 +嘀 +嘁 +嘄 +嘅 +嘆 +嘈 +嘉 +嘌 +嘎 +嘏 +嘐 +嘒 +嘗 +嘘 +嘚 +嘛 +嘞 +嘟 +嘠 +嘡 +嘢 +嘣 +嘤 +嘥 +嘧 +嘬 +嘭 +嘱 +嘲 +嘴 +嘶 +嘷 +嘹 +嘻 +嘿 +噀 +噁 +噂 +噃 +噆 +噇 +噉 +噌 +噍 +噎 +噏 +噔 +噗 +噘 +噙 +噛 +噜 +噞 +噢 +噣 +噤 +器 +噩 +噪 +噫 +噬 +噭 +噱 +噳 +噶 +噷 +噻 +噼 +嚄 +嚅 +嚆 +嚋 +嚎 +嚏 +嚒 +嚓 +嚘 +嚚 +嚜 +嚟 +嚡 +嚢 +嚣 +嚤 +嚥 +嚬 +嚭 +嚯 +嚰 +嚱 +嚴 +嚵 +嚷 +嚺 +嚼 +嚿 +囂 +囄 +囊 +囋 +囍 +囏 +囐 +囓 +囔 +囗 +囚 +四 +囝 +回 +囟 +因 +囡 +团 +団 +囤 +囦 +囧 +囨 +囫 +园 +囮 +囯 +困 +囱 +囲 +図 +围 +囵 +囷 +囹 +固 +国 +图 +囿 +圂 +圃 +圄 +圆 +圈 +圉 +圊 +國 +圌 +圍 +圏 +圐 +園 +圖 +團 +圙 +圛 +圜 +圞 +土 +圠 +圣 +圥 +圧 +在 +圩 +圪 +圬 +圭 +圮 +圯 +地 +圳 +圴 +圹 +场 +圻 +圾 +圿 +址 +坂 +均 +坊 +坌 +坍 +坎 +坏 +坐 +坑 +块 +坚 +坛 +坜 +坝 +坞 +坟 +坠 +坡 +坣 +坤 +坦 +坨 +坩 +坪 +坫 +坬 +坭 +坯 +坰 +坱 +坳 +坵 +坷 +坺 +坻 +坼 +坾 +垂 +垃 +垄 +垅 +垆 +垇 +垊 +型 +垌 +垍 +垎 +垐 +垒 +垓 +垔 +垕 +垚 +垛 +垝 +垞 +垟 +垠 +垡 +垢 +垣 +垤 +垦 +垧 +垨 +垩 +垫 +垭 +垮 +垯 +垱 +垲 +垴 +垵 +垸 +垺 +垾 +垿 +埂 +埃 +埆 +埇 +埈 +埋 +埌 +城 +埏 +埒 +埔 +埕 +埗 +埘 +埙 +埚 +埜 +埝 +埞 +域 +埠 +埤 +埥 +埧 +埨 +埩 +埪 +埫 +埭 +埮 +埯 +埳 +埴 +埵 +埶 +執 +埸 +培 +基 +埼 +埽 +埿 +堀 +堂 +堃 +堆 +堇 +堉 +堋 +堌 +堍 +堎 +堑 +堕 +堙 +堛 +堞 +堠 +堡 +堣 +堤 +堦 +堧 +堨 +堪 +堭 +堮 +堰 +報 +堳 +場 +堵 +堶 +堺 +堼 +堽 +塀 +塁 +塄 +塅 +塆 +塇 +塈 +塉 +塌 +塍 +塑 +塔 +塘 +塙 +塚 +塝 +塞 +塠 +塨 +塩 +填 +塬 +塭 +塯 +塱 +塸 +塼 +塽 +塾 +塿 +墀 +墁 +境 +墄 +墅 +墈 +墉 +墊 +墋 +墍 +墐 +墒 +墓 +墕 +増 +墘 +墙 +增 +墟 +墠 +墡 +墦 +墨 +墩 +墹 +墺 +壁 +壅 +壆 +壇 +壈 +壊 +壌 +壑 +壒 +壓 +壕 +壖 +壝 +壞 +壡 +壤 +壩 +士 +壬 +壮 +声 +壱 +売 +壳 +壶 +壸 +壹 +壺 +壼 +壽 +夀 +处 +夅 +备 +変 +复 +夏 +夐 +夔 +夕 +外 +夙 +多 +夜 +够 +夠 +夡 +夢 +夤 +夥 +大 +天 +太 +夫 +夬 +夭 +央 +夯 +夰 +失 +头 +夷 +夸 +夹 +夺 +夼 +夾 +夿 +奀 +奁 +奂 +奄 +奅 +奇 +奈 +奉 +奋 +奌 +奎 +奏 +契 +奓 +奔 +奕 +奖 +套 +奘 +奚 +奠 +奡 +奢 +奣 +奤 +奥 +奧 +奨 +奫 +奭 +奰 +女 +奴 +奶 +奸 +她 +好 +妁 +如 +妃 +妄 +妆 +妇 +妈 +妉 +妊 +妍 +妒 +妓 +妖 +妗 +妘 +妙 +妞 +妣 +妤 +妥 +妨 +妩 +妪 +妫 +妬 +妮 +妯 +妲 +妳 +妹 +妻 +妼 +妾 +姁 +姅 +姆 +姉 +姊 +始 +姌 +姐 +姑 +姒 +姓 +委 +姗 +姘 +姚 +姜 +姝 +姞 +姣 +姤 +姥 +姨 +姪 +姫 +姬 +姮 +姱 +姵 +姹 +姺 +姻 +姿 +娀 +威 +娃 +娄 +娅 +娆 +娇 +娈 +娉 +娌 +娑 +娓 +娖 +娘 +娙 +娚 +娜 +娝 +娟 +娠 +娡 +娣 +娥 +娩 +娭 +娯 +娱 +娲 +娴 +娵 +娶 +娼 +婀 +婄 +婆 +婉 +婊 +婑 +婕 +婗 +婘 +婚 +婛 +婞 +婢 +婣 +婥 +婦 +婧 +婨 +婪 +婲 +婴 +婵 +婶 +婷 +婺 +婼 +婿 +媉 +媒 +媕 +媖 +媚 +媛 +媞 +媟 +媠 +媢 +媥 +媪 +媭 +媮 +媲 +媳 +媵 +媸 +媻 +媽 +媾 +媿 +嫁 +嫂 +嫄 +嫈 +嫉 +嫋 +嫌 +嫑 +嫒 +嫔 +嫖 +嫘 +嫚 +嫛 +嫜 +嫠 +嫡 +嫣 +嫦 +嫧 +嫩 +嫪 +嫫 +嫭 +嫮 +嫰 +嫱 +嫲 +嫳 +嫶 +嫽 +嬁 +嬃 +嬅 +嬇 +嬉 +嬐 +嬓 +嬖 +嬗 +嬛 +嬢 +嬬 +嬲 +嬴 +嬷 +嬾 +嬿 +孀 +孃 +孅 +子 +孑 +孓 +孔 +孕 +孖 +字 +存 +孙 +孚 +孛 +孜 +孝 +孟 +孢 +季 +孤 +孥 +学 +孩 +孪 +孬 +孭 +孰 +孱 +孲 +孳 +孵 +學 +孺 +孻 +孽 +宀 +宁 +它 +宄 +宅 +宇 +守 +安 +宋 +完 +宍 +宎 +宏 +宓 +宕 +宗 +官 +宙 +定 +宛 +宜 +宝 +实 +実 +宠 +审 +客 +宣 +室 +宥 +宦 +宪 +宫 +宬 +宮 +宰 +害 +宴 +宵 +家 +宸 +容 +宼 +宽 +宾 +宿 +寀 +寂 +寄 +寅 +密 +寇 +富 +寐 +寑 +寒 +寓 +寔 +寕 +寖 +寗 +寘 +寙 +寛 +寝 +寞 +察 +寡 +寢 +寤 +寥 +實 +寧 +寨 +審 +寮 +寯 +寰 +寶 +寸 +对 +寺 +寻 +导 +対 +寿 +封 +専 +尃 +射 +尅 +将 +將 +尉 +尊 +尋 +對 +小 +尐 +少 +尓 +尔 +尕 +尖 +尘 +尙 +尚 +尝 +尟 +尤 +尥 +尧 +尨 +尪 +尫 +尬 +尭 +尰 +就 +尴 +尸 +尹 +尺 +尻 +尼 +尽 +尾 +尿 +局 +屁 +层 +屃 +屄 +居 +屈 +屉 +届 +屋 +屌 +屍 +屎 +屏 +屐 +屑 +展 +屘 +屙 +屝 +属 +屟 +屠 +屡 +屣 +履 +屦 +屧 +屩 +屭 +屮 +屯 +山 +屳 +屴 +屹 +屺 +屼 +屾 +屿 +岁 +岂 +岈 +岊 +岌 +岍 +岏 +岐 +岑 +岔 +岕 +岖 +岗 +岘 +岙 +岚 +岛 +岜 +岝 +岞 +岡 +岢 +岣 +岧 +岨 +岩 +岫 +岬 +岭 +岱 +岳 +岵 +岷 +岸 +岽 +岿 +峁 +峃 +峄 +峇 +峋 +峒 +峘 +峙 +峛 +峞 +峠 +峡 +峣 +峤 +峥 +峦 +峧 +峨 +峩 +峪 +峭 +峯 +峰 +峻 +峿 +崀 +崁 +崂 +崃 +崄 +崆 +崇 +崈 +崋 +崌 +崎 +崐 +崑 +崒 +崔 +崕 +崖 +崙 +崚 +崛 +崞 +崟 +崠 +崣 +崤 +崦 +崧 +崩 +崭 +崮 +崱 +崴 +崶 +崷 +崽 +崾 +崿 +嵁 +嵂 +嵇 +嵊 +嵋 +嵌 +嵎 +嵒 +嵓 +嵔 +嵕 +嵖 +嵘 +嵙 +嵚 +嵛 +嵝 +嵡 +嵥 +嵦 +嵩 +嵫 +嵬 +嵯 +嵰 +嵲 +嵴 +嵷 +嵸 +嵺 +嵽 +嵾 +嶂 +嶅 +嶋 +嶌 +嶒 +嶓 +嶔 +嶘 +嶙 +嶛 +嶝 +嶞 +嶟 +嶨 +嶪 +嶭 +嶮 +嶰 +嶱 +嶲 +嶶 +嶷 +嶽 +嶾 +巁 +巂 +巃 +巅 +巇 +巉 +巌 +巍 +巏 +巑 +巖 +巘 +巛 +川 +州 +巡 +巢 +巣 +工 +左 +巧 +巨 +巩 +巫 +差 +巯 +己 +已 +巳 +巴 +巵 +巷 +巻 +巽 +巾 +巿 +币 +市 +布 +帅 +帆 +帇 +师 +帊 +希 +帏 +帐 +帑 +帔 +帕 +帖 +帘 +帙 +帚 +帛 +帜 +帝 +帟 +帡 +帢 +帣 +带 +帧 +帨 +師 +席 +帮 +帯 +帰 +帱 +帳 +帶 +帷 +常 +帻 +帼 +帽 +帿 +幂 +幄 +幅 +幌 +幓 +幔 +幕 +幙 +幛 +幝 +幞 +幠 +幡 +幢 +幣 +幤 +幥 +幨 +幩 +幪 +幭 +幮 +幰 +干 +平 +年 +幵 +并 +幷 +幸 +幹 +幺 +幻 +幼 +幽 +幾 +广 +庀 +庁 +広 +庄 +庆 +庇 +床 +庋 +序 +庐 +庑 +库 +应 +底 +庖 +店 +庙 +庚 +府 +庞 +废 +庠 +庤 +庥 +度 +座 +庪 +庭 +庳 +庵 +庶 +康 +庸 +庹 +庾 +廃 +廉 +廊 +廋 +廌 +廐 +廓 +廖 +廙 +廛 +廞 +廢 +廣 +廥 +廦 +廧 +廨 +廪 +廭 +延 +廷 +廸 +建 +廻 +廼 +廿 +开 +弁 +异 +弃 +弄 +弇 +弈 +弊 +弋 +弎 +式 +弐 +弑 +弒 +弓 +弔 +引 +弗 +弘 +弛 +弝 +弟 +张 +弢 +弥 +弦 +弧 +弨 +弩 +弪 +弭 +弮 +弯 +弰 +弱 +張 +弶 +強 +弸 +弹 +强 +弼 +弾 +彀 +彁 +彊 +彍 +彐 +归 +当 +彔 +录 +彖 +彗 +彘 +彝 +彟 +彡 +形 +彤 +彦 +彧 +彩 +彪 +彫 +彬 +彭 +彯 +彰 +影 +彳 +彴 +彷 +役 +彻 +彼 +彿 +往 +征 +徂 +径 +待 +徇 +很 +徉 +徊 +律 +後 +徐 +徒 +従 +徕 +得 +徘 +徙 +徚 +徜 +從 +御 +徧 +徨 +循 +徬 +徭 +微 +徯 +徳 +徴 +徵 +徶 +德 +徹 +徼 +徽 +心 +必 +忆 +忉 +忌 +忍 +忏 +忐 +忑 +忒 +忔 +忕 +忖 +志 +忘 +忙 +応 +忝 +忞 +忠 +忡 +忤 +忧 +忪 +快 +忭 +忮 +忱 +忳 +念 +忸 +忺 +忻 +忼 +忽 +忾 +忿 +怀 +态 +怂 +怃 +怄 +怅 +怆 +怊 +怍 +怎 +怏 +怐 +怑 +怒 +怓 +怔 +怕 +怖 +怗 +怙 +怚 +怛 +怜 +思 +怠 +怡 +急 +怦 +性 +怨 +怩 +怪 +怫 +怯 +怱 +怲 +怳 +怵 +总 +怼 +怿 +恁 +恂 +恃 +恄 +恇 +恈 +恋 +恌 +恍 +恐 +恑 +恒 +恓 +恕 +恙 +恚 +恛 +恝 +恟 +恠 +恢 +恣 +恤 +恧 +恨 +恩 +恪 +恫 +恬 +恭 +息 +恰 +恳 +恵 +恶 +恸 +恹 +恺 +恻 +恼 +恽 +恿 +悁 +悃 +悄 +悅 +悆 +悉 +悊 +悌 +悍 +悒 +悔 +悖 +悚 +悛 +悝 +悞 +悟 +悠 +悢 +患 +悦 +您 +悩 +悪 +悫 +悬 +悭 +悮 +悯 +悰 +悱 +悲 +悴 +悸 +悻 +悼 +惃 +惄 +情 +惆 +惇 +惉 +惊 +惋 +惏 +惑 +惓 +惔 +惕 +惘 +惙 +惚 +惛 +惜 +惝 +惟 +惠 +惡 +惦 +惧 +惨 +惩 +惫 +惬 +惭 +惮 +惯 +惰 +想 +惴 +惵 +惶 +惸 +惹 +惺 +愀 +愁 +愆 +愈 +愉 +愊 +愍 +愎 +意 +愒 +愓 +愔 +愕 +愗 +愚 +愛 +感 +愠 +愣 +愤 +愥 +愦 +愧 +愫 +愬 +愵 +愿 +慅 +慆 +慈 +慊 +態 +慌 +慎 +慐 +慑 +慒 +慓 +慕 +慙 +慜 +慝 +慞 +慠 +慢 +慥 +慧 +慨 +慬 +慭 +慰 +慱 +慴 +慵 +慶 +慷 +慽 +慾 +憀 +憁 +憂 +憋 +憍 +憎 +憔 +憕 +憖 +憘 +憚 +憝 +憧 +憨 +憩 +憬 +憭 +憯 +憰 +憲 +憷 +憸 +憹 +憺 +憾 +懁 +懂 +懃 +懆 +懈 +應 +懊 +懋 +懌 +懐 +懑 +懒 +懔 +懘 +懜 +懟 +懠 +懡 +懦 +懭 +懰 +懲 +懵 +懹 +懻 +懽 +懿 +戀 +戁 +戃 +戄 +戆 +戈 +戊 +戋 +戌 +戍 +戎 +戏 +成 +我 +戒 +戕 +或 +戗 +战 +戙 +戚 +戛 +戟 +戡 +戢 +戥 +戦 +截 +戫 +戬 +戭 +戮 +戲 +戳 +戴 +戶 +户 +戸 +戺 +戻 +戽 +戾 +房 +所 +扁 +扂 +扃 +扅 +扆 +扇 +扈 +扉 +扊 +手 +扌 +才 +扎 +扐 +扑 +扒 +打 +扔 +払 +托 +扙 +扚 +扛 +扜 +扞 +扠 +扡 +扢 +扣 +扤 +扥 +扦 +执 +扩 +扪 +扫 +扬 +扭 +扮 +扯 +扰 +扱 +扳 +扶 +批 +扼 +扽 +找 +承 +技 +抂 +抃 +抄 +抆 +抉 +把 +抌 +抏 +抑 +抒 +抓 +抔 +投 +抖 +抗 +折 +抚 +抛 +抜 +択 +抟 +抠 +抡 +抢 +护 +报 +抨 +披 +抬 +抱 +抵 +抶 +抹 +抻 +押 +抽 +抿 +拀 +拂 +拃 +拄 +担 +拆 +拇 +拈 +拉 +拊 +拌 +拍 +拎 +拏 +拐 +拑 +拒 +拓 +拔 +拖 +拗 +拘 +拙 +拚 +招 +拜 +拝 +拟 +拠 +拡 +拢 +拣 +拥 +拦 +拧 +拨 +择 +拫 +括 +拭 +拮 +拯 +拱 +拳 +拴 +拶 +拷 +拼 +拽 +拾 +拿 +持 +挂 +挃 +指 +挈 +按 +挎 +挐 +挑 +挒 +挓 +挖 +挙 +挚 +挛 +挜 +挝 +挞 +挟 +挠 +挡 +挢 +挣 +挤 +挥 +挦 +挨 +挪 +挫 +挭 +振 +挱 +挲 +挵 +挹 +挺 +挼 +挽 +挿 +捂 +捃 +捄 +捅 +捆 +捉 +捋 +捌 +捍 +捎 +捏 +捐 +捕 +捗 +捘 +捜 +捞 +损 +捡 +换 +捣 +捧 +捨 +捩 +捭 +据 +捰 +捱 +捲 +捴 +捶 +捷 +捺 +捻 +捽 +掀 +掁 +掂 +掅 +掇 +授 +掉 +掊 +掌 +掎 +掏 +掐 +排 +掕 +掖 +掘 +掞 +掟 +掠 +採 +探 +掣 +掤 +接 +控 +推 +掩 +措 +掬 +掮 +掯 +掰 +掱 +掲 +掳 +掴 +掷 +掸 +掹 +掺 +掻 +掼 +掾 +掿 +揃 +揄 +揆 +揈 +揉 +揌 +揍 +揎 +描 +提 +插 +揕 +揖 +揗 +揜 +揝 +揞 +揟 +揠 +握 +揣 +揥 +揦 +揩 +揪 +揫 +揭 +揰 +揲 +援 +揵 +揶 +揸 +揺 +揼 +揽 +揾 +揿 +搀 +搁 +搂 +搅 +搉 +搊 +搋 +搌 +損 +搎 +搏 +搐 +搒 +搓 +搔 +搕 +搘 +搚 +搜 +搞 +搠 +搡 +搢 +搣 +搤 +搥 +搦 +搧 +搨 +搪 +搬 +搭 +搯 +搰 +搴 +搵 +携 +搽 +搾 +摁 +摂 +摄 +摅 +摆 +摇 +摈 +摊 +摋 +摌 +摍 +摎 +摐 +摒 +摔 +摘 +摛 +摞 +摠 +摧 +摩 +摭 +摴 +摵 +摸 +摹 +摺 +摽 +撂 +撃 +撄 +撅 +撇 +撍 +撑 +撒 +撕 +撖 +撙 +撚 +撝 +撞 +撤 +撥 +撦 +撧 +撩 +撬 +播 +撮 +撰 +撱 +撴 +撵 +撶 +撷 +撸 +撺 +撼 +擀 +擂 +擅 +擉 +操 +擎 +擏 +擐 +擒 +擔 +擖 +擗 +擘 +據 +擞 +擢 +擤 +擦 +擩 +擫 +擷 +擸 +擿 +攀 +攃 +攉 +攋 +攒 +攕 +攘 +攚 +攝 +攞 +攥 +攧 +攩 +攫 +攮 +支 +攰 +攲 +收 +攸 +改 +攻 +攽 +放 +政 +故 +效 +敌 +敏 +救 +敔 +敕 +敖 +教 +敛 +敝 +敞 +敢 +散 +敦 +敧 +敩 +敫 +敬 +数 +敱 +敲 +整 +敷 +數 +敺 +敻 +斁 +斄 +斅 +文 +斉 +斋 +斌 +斎 +斐 +斑 +斒 +斓 +斗 +料 +斛 +斜 +斝 +斟 +斠 +斡 +斤 +斥 +斧 +斨 +斩 +斫 +断 +斮 +斯 +新 +斲 +斴 +斶 +斸 +方 +於 +施 +斿 +旁 +旂 +旃 +旄 +旅 +旆 +旋 +旌 +旍 +旎 +族 +旐 +旒 +旓 +旖 +旗 +旘 +旛 +旜 +旟 +无 +旡 +既 +日 +旦 +旧 +旨 +早 +旬 +旭 +旮 +旯 +旰 +旱 +旳 +旴 +旵 +时 +旷 +旸 +旺 +旻 +旼 +旿 +昀 +昂 +昃 +昄 +昆 +昇 +昈 +昉 +昊 +昌 +明 +昏 +昒 +易 +昔 +昕 +昙 +昚 +昝 +昞 +星 +映 +昣 +昤 +春 +昧 +昨 +昪 +昫 +昬 +昭 +是 +昰 +昱 +昳 +昴 +昵 +昶 +昺 +昼 +昽 +显 +晁 +時 +晃 +晅 +晊 +晋 +晌 +晏 +晒 +晓 +晔 +晕 +晖 +晗 +晙 +晚 +晛 +晞 +晟 +晡 +晢 +晣 +晤 +晥 +晦 +晧 +晨 +晩 +晫 +晬 +普 +景 +晰 +晳 +晴 +晶 +晷 +晹 +智 +晻 +晼 +晾 +暁 +暂 +暄 +暇 +暌 +暍 +暎 +暐 +暑 +暕 +暖 +暗 +暝 +暞 +暠 +暣 +暦 +暧 +暨 +暬 +暮 +暱 +暲 +暴 +暵 +暶 +暹 +暻 +暾 +曀 +曈 +曌 +曒 +曙 +曚 +曛 +曜 +曝 +曢 +曦 +曧 +曨 +曩 +曪 +曭 +曰 +曱 +曲 +曳 +更 +曵 +曷 +書 +曹 +曼 +曽 +曾 +替 +最 +朂 +會 +朅 +朆 +月 +有 +朊 +朋 +服 +朏 +朐 +朒 +朓 +朔 +朕 +朗 +朘 +望 +朝 +期 +朣 +朦 +木 +未 +末 +本 +札 +朮 +术 +朱 +朳 +朴 +朵 +朶 +机 +朽 +朿 +杀 +杂 +权 +杅 +杆 +杈 +杉 +杌 +李 +杏 +材 +村 +杓 +杕 +杖 +杙 +杜 +杞 +束 +杠 +条 +杢 +来 +杧 +杨 +杩 +杪 +杬 +杭 +杯 +杰 +東 +杲 +杳 +杵 +杷 +杻 +杼 +松 +板 +极 +构 +枅 +枇 +枉 +枋 +枌 +枍 +枎 +析 +枑 +枒 +枕 +林 +枘 +枚 +果 +枝 +枞 +枟 +枠 +枡 +枢 +枣 +枥 +枦 +枧 +枨 +枪 +枫 +枬 +枭 +枮 +枯 +枰 +枲 +枳 +枵 +架 +枷 +枸 +枹 +枻 +枿 +柁 +柂 +柃 +柄 +柅 +柈 +柊 +柎 +柏 +某 +柑 +柒 +染 +柔 +柖 +柘 +柙 +柚 +柜 +柝 +柞 +柟 +柠 +柢 +柣 +柤 +查 +柩 +柬 +柮 +柯 +柰 +柱 +柳 +柴 +柷 +柹 +柺 +査 +柽 +柿 +栀 +栂 +栃 +栄 +栅 +栆 +标 +栈 +栉 +栊 +栋 +栌 +栎 +栏 +栐 +树 +栒 +栓 +栖 +栗 +栘 +栜 +栝 +栟 +校 +栢 +栩 +株 +栯 +栱 +栲 +栳 +栴 +栵 +样 +核 +根 +栻 +格 +栽 +栾 +栿 +桀 +桁 +桂 +桃 +桄 +桅 +框 +案 +桉 +桊 +桋 +桌 +桎 +桐 +桑 +桓 +桔 +桕 +桚 +桜 +桝 +桞 +桠 +桡 +桢 +档 +桤 +桥 +桦 +桧 +桨 +桩 +桫 +桮 +桯 +桲 +桴 +桶 +桷 +桹 +桻 +梀 +梁 +梃 +梅 +梆 +梏 +梐 +梓 +梗 +條 +梠 +梡 +梢 +梣 +梦 +梧 +梨 +梩 +梪 +梫 +梬 +梭 +梯 +械 +梱 +梲 +梳 +梴 +梵 +梶 +梼 +梽 +梾 +梿 +检 +棁 +棂 +棃 +棄 +棅 +棆 +棉 +棊 +棋 +棍 +棐 +棑 +棒 +棓 +棕 +棘 +棙 +棚 +棝 +棠 +棡 +棣 +棨 +棪 +棫 +棬 +森 +棯 +棰 +棱 +棲 +棵 +棸 +棹 +棺 +棻 +棼 +棽 +椀 +椁 +椅 +椆 +椇 +椋 +植 +椎 +椐 +椑 +椒 +椓 +椗 +椙 +検 +椟 +椠 +椤 +椩 +椫 +椭 +椮 +椰 +椲 +椴 +椸 +椹 +椽 +椿 +楂 +楅 +楇 +楍 +楎 +楔 +楕 +楗 +楘 +楙 +楚 +楛 +楝 +楞 +楟 +楠 +楢 +楣 +楤 +楥 +楦 +楨 +楩 +楪 +楫 +業 +楮 +楯 +楰 +楱 +極 +楶 +楷 +楸 +楹 +楺 +楼 +楽 +榀 +概 +榃 +榄 +榅 +榆 +榇 +榈 +榉 +榊 +榍 +榔 +榕 +榖 +榘 +榛 +榜 +榞 +榠 +榥 +榧 +榨 +榫 +榭 +榰 +榱 +榴 +榷 +榺 +榻 +榼 +榾 +槁 +槃 +槅 +槇 +槊 +構 +槌 +槎 +槐 +槑 +槓 +槔 +槖 +様 +槙 +槚 +槛 +槟 +槠 +槢 +槥 +槩 +槬 +槭 +槱 +槲 +槵 +槻 +槽 +槾 +槿 +樀 +樂 +樊 +樋 +樏 +樓 +樕 +樗 +樘 +標 +樛 +樝 +樟 +模 +樣 +樨 +権 +横 +樫 +樭 +樯 +樱 +樵 +樹 +樽 +樾 +橄 +橅 +橇 +橉 +橌 +橎 +橐 +橑 +橘 +橙 +橚 +橛 +橞 +機 +橡 +橦 +橪 +橱 +橹 +橺 +橼 +橿 +檀 +檄 +檇 +檉 +檊 +檋 +檍 +檎 +檐 +檑 +檔 +檖 +檗 +檛 +檝 +檞 +檠 +檥 +檧 +檨 +檩 +檫 +檬 +檰 +檵 +檻 +檿 +櫁 +櫂 +櫆 +櫈 +櫌 +櫐 +櫑 +櫜 +櫞 +櫡 +櫰 +櫻 +櫼 +欃 +欉 +權 +欌 +欎 +欓 +欕 +欝 +欠 +次 +欢 +欣 +欤 +欦 +欧 +欬 +欱 +欲 +欷 +欸 +欹 +欺 +欻 +款 +欿 +歃 +歆 +歇 +歈 +歉 +歊 +歌 +歓 +歔 +歕 +歗 +歘 +歙 +歛 +歜 +歠 +止 +正 +此 +步 +武 +歧 +歩 +歪 +歯 +歳 +歴 +歸 +歹 +死 +歼 +殁 +殂 +殃 +殄 +殆 +殇 +殉 +殊 +残 +殍 +殑 +殒 +殓 +殖 +殘 +殚 +殛 +殡 +殢 +殣 +殪 +殭 +殳 +殴 +段 +殷 +殽 +殿 +毁 +毂 +毅 +毈 +毉 +毋 +母 +毎 +每 +毐 +毒 +毓 +比 +毕 +毖 +毗 +毘 +毙 +毚 +毛 +毡 +毨 +毫 +毬 +毯 +毰 +毳 +毵 +毶 +毸 +毹 +毻 +毽 +氄 +氅 +氆 +氇 +氍 +氎 +氏 +氐 +民 +氓 +气 +氕 +氖 +気 +氘 +氙 +氚 +氛 +氟 +氡 +氢 +氣 +氤 +氦 +氧 +氨 +氩 +氪 +氮 +氯 +氰 +氲 +水 +氵 +氷 +永 +氹 +氽 +氾 +氿 +汀 +汁 +求 +汃 +汆 +汇 +汉 +汊 +汍 +汎 +汏 +汐 +汔 +汕 +汗 +汚 +汛 +汜 +汝 +汞 +江 +池 +污 +汣 +汤 +汧 +汨 +汩 +汪 +汫 +汭 +汯 +汰 +汲 +汴 +汶 +汸 +汹 +決 +汽 +汾 +沁 +沂 +沃 +沄 +沅 +沆 +沇 +沈 +沉 +沋 +沌 +沍 +沏 +沐 +沒 +沓 +沔 +沕 +沙 +沚 +沛 +沜 +沟 +没 +沢 +沣 +沤 +沥 +沦 +沧 +沨 +沩 +沪 +沫 +沬 +沭 +沮 +沱 +沲 +河 +沴 +沵 +沶 +沸 +油 +治 +沼 +沽 +沾 +沿 +泂 +泃 +泄 +泅 +泆 +泇 +泉 +泊 +泌 +泐 +泓 +泔 +法 +泖 +泗 +泘 +泚 +泛 +泜 +泝 +泞 +泟 +泠 +泡 +波 +泣 +泥 +注 +泪 +泫 +泬 +泮 +泯 +泰 +泱 +泲 +泳 +泵 +泷 +泸 +泺 +泻 +泼 +泽 +泾 +洁 +洄 +洇 +洈 +洊 +洋 +洌 +洎 +洏 +洑 +洒 +洗 +洙 +洚 +洛 +洞 +洟 +洣 +洤 +津 +洧 +洨 +洩 +洪 +洫 +洭 +洮 +洱 +洲 +洳 +洴 +洵 +洸 +洹 +洺 +活 +洼 +洽 +派 +洿 +流 +浃 +浄 +浅 +浆 +浇 +浈 +浉 +浊 +测 +浍 +济 +浏 +浐 +浑 +浒 +浓 +浔 +浕 +浘 +浙 +浚 +浛 +浜 +浞 +浠 +浡 +浣 +浤 +浥 +浦 +浩 +浪 +浬 +浮 +浯 +浰 +浱 +浲 +浴 +海 +浸 +浼 +浿 +涂 +涅 +消 +涉 +涌 +涎 +涑 +涒 +涓 +涔 +涕 +涖 +涘 +涙 +涚 +涛 +涜 +涝 +涞 +涟 +涠 +涡 +涢 +涣 +涤 +涥 +润 +涧 +涨 +涩 +涪 +涫 +涬 +涮 +涯 +液 +涴 +涵 +涷 +涸 +涼 +涿 +淀 +淄 +淅 +淆 +淇 +淈 +淋 +淌 +淏 +淐 +淑 +淓 +淕 +淖 +淘 +淙 +淛 +淜 +淝 +淞 +淠 +淡 +淢 +淤 +淦 +淨 +淫 +淬 +淮 +淯 +淰 +深 +淳 +混 +淹 +添 +淼 +渀 +清 +渇 +済 +渉 +渊 +渋 +渌 +渍 +渎 +渏 +渐 +渑 +渓 +渔 +渕 +渖 +渗 +渙 +渚 +減 +渝 +渟 +渠 +渡 +渢 +渣 +渤 +渥 +渧 +温 +渫 +測 +渭 +港 +渰 +渱 +渲 +渴 +渶 +游 +渺 +渻 +渼 +湁 +湃 +湄 +湆 +湉 +湋 +湍 +湎 +湑 +湓 +湔 +湖 +湘 +湛 +湜 +湝 +湟 +湡 +湢 +湣 +湦 +湧 +湩 +湫 +湮 +湲 +湳 +湴 +湼 +湾 +湿 +満 +溁 +溂 +溃 +溅 +溆 +溇 +溉 +溊 +溋 +溍 +溎 +溏 +源 +溓 +溔 +準 +溘 +溜 +溞 +溟 +溠 +溡 +溢 +溥 +溦 +溧 +溪 +溫 +溯 +溱 +溲 +溳 +溴 +溵 +溶 +溷 +溺 +溻 +溽 +滁 +滂 +滃 +滆 +滇 +滈 +滉 +滋 +滍 +滏 +滑 +滓 +滔 +滕 +滗 +滘 +滙 +滚 +滜 +滝 +滞 +滟 +滠 +满 +滢 +滤 +滥 +滦 +滧 +滨 +滩 +滪 +滫 +滮 +滴 +滹 +滺 +滽 +漂 +漆 +漈 +漉 +漋 +漍 +漎 +漏 +漓 +演 +漕 +漖 +漘 +漙 +漠 +漢 +漤 +漦 +漩 +漪 +漫 +漭 +漯 +漰 +漱 +漳 +漴 +漶 +漷 +漹 +漻 +漼 +漾 +潀 +潄 +潆 +潇 +潈 +潋 +潍 +潎 +潏 +潒 +潓 +潕 +潘 +潜 +潝 +潞 +潟 +潠 +潢 +潤 +潦 +潩 +潬 +潭 +潮 +潲 +潴 +潵 +潸 +潺 +潼 +潽 +潾 +澂 +澄 +澈 +澉 +澌 +澍 +澎 +澒 +澔 +澗 +澘 +澙 +澛 +澜 +澡 +澣 +澤 +澥 +澧 +澨 +澪 +澫 +澭 +澳 +澴 +澶 +澹 +澼 +澾 +激 +濂 +濅 +濆 +濈 +濉 +濊 +濋 +濎 +濑 +濒 +濙 +濛 +濞 +濟 +濠 +濡 +濦 +濩 +濫 +濬 +濮 +濯 +濸 +瀁 +瀄 +瀌 +瀍 +瀎 +瀑 +瀔 +瀖 +瀚 +瀛 +瀜 +瀞 +瀡 +瀣 +瀩 +瀬 +瀰 +瀱 +瀴 +瀵 +瀹 +瀺 +瀼 +瀽 +灂 +灈 +灉 +灊 +灌 +灏 +灑 +灒 +灞 +灩 +火 +灬 +灭 +灯 +灰 +灵 +灶 +灸 +灺 +灼 +灾 +灿 +炀 +炅 +炆 +炉 +炊 +炌 +炎 +炒 +炓 +炔 +炕 +炖 +炘 +炙 +炜 +炝 +炟 +炡 +炤 +炧 +炫 +炬 +炭 +炮 +炯 +炰 +炱 +炳 +炵 +炷 +炸 +点 +為 +炼 +炽 +炿 +烀 +烁 +烂 +烃 +烈 +烊 +烋 +烓 +烔 +烘 +烙 +烚 +烛 +烜 +烝 +烟 +烤 +烦 +烧 +烨 +烩 +烫 +烬 +热 +烯 +烷 +烹 +烺 +烻 +烽 +焄 +焉 +焊 +焌 +焏 +焐 +焒 +焓 +焕 +焖 +焗 +焘 +焙 +焚 +焜 +焞 +焟 +焠 +無 +焦 +焩 +焫 +焮 +焯 +焰 +焱 +焲 +焴 +然 +焹 +焻 +焼 +煀 +煁 +煃 +煅 +煊 +煌 +煍 +煎 +煐 +煓 +煕 +煚 +煜 +煞 +煟 +煠 +煤 +煦 +照 +煨 +煮 +煲 +煳 +煴 +煵 +煶 +煸 +煹 +煺 +煽 +煿 +熂 +熄 +熇 +熉 +熊 +熏 +熔 +熕 +熘 +熙 +熛 +熜 +熝 +熟 +熠 +熤 +熨 +熬 +熭 +熯 +熱 +熳 +熴 +熵 +熸 +熹 +熿 +燀 +燂 +燃 +燅 +燈 +燊 +燋 +燎 +燏 +燐 +燑 +燔 +燕 +燖 +燚 +燝 +營 +燠 +燢 +燥 +燧 +燬 +燮 +燹 +燿 +爆 +爇 +爊 +爋 +爎 +爔 +爘 +爚 +爝 +爞 +爟 +爢 +爧 +爨 +爪 +爬 +爭 +爰 +爱 +爵 +父 +爷 +爸 +爹 +爻 +爽 +爿 +牁 +牂 +片 +版 +牋 +牌 +牍 +牏 +牒 +牖 +牙 +牛 +牝 +牟 +牡 +牢 +牣 +牤 +牥 +牦 +牧 +物 +牮 +牯 +牲 +牴 +牵 +牷 +牸 +特 +牺 +牻 +牾 +牿 +犀 +犁 +犄 +犇 +犉 +犊 +犋 +犍 +犏 +犒 +犘 +犜 +犟 +犨 +犬 +犭 +犯 +犰 +犴 +状 +犷 +犸 +犹 +犺 +犼 +犽 +狁 +狂 +狃 +狄 +狈 +狌 +狍 +狎 +狐 +狒 +狖 +狗 +狘 +狙 +狛 +狝 +狞 +狟 +狠 +狡 +狢 +狥 +狧 +狨 +狩 +独 +狭 +狮 +狯 +狰 +狱 +狲 +狳 +狴 +狶 +狷 +狸 +狺 +狻 +狼 +猁 +猃 +猄 +猇 +猊 +猋 +猎 +猕 +猖 +猗 +猘 +猛 +猜 +猝 +猞 +猟 +猡 +猢 +猥 +猧 +猩 +猪 +猫 +猬 +献 +猰 +猱 +猲 +猳 +猴 +猵 +猶 +猷 +猸 +猾 +猿 +獂 +獈 +獍 +獏 +獐 +獒 +獗 +獘 +獚 +獜 +獝 +獠 +獣 +獦 +獨 +獬 +獭 +獮 +獯 +獲 +獴 +獶 +獻 +獾 +玁 +玃 +玄 +率 +玈 +玉 +玊 +王 +玍 +玎 +玑 +玒 +玓 +玕 +玖 +玗 +玘 +玙 +玚 +玛 +玟 +玠 +玡 +玢 +玥 +玦 +玩 +玫 +玭 +玮 +环 +现 +玱 +玲 +玳 +玶 +玷 +玺 +玻 +玼 +珀 +珂 +珅 +珈 +珉 +珊 +珌 +珍 +珎 +珏 +珐 +珑 +珓 +珔 +珖 +珙 +珝 +珞 +珠 +珣 +珥 +珦 +珧 +珩 +珪 +班 +珮 +珰 +珲 +珵 +珸 +珹 +珺 +珽 +現 +球 +琅 +理 +琇 +琉 +琊 +琍 +琎 +琏 +琐 +琖 +琚 +琛 +琠 +琢 +琣 +琤 +琥 +琦 +琨 +琪 +琫 +琬 +琭 +琮 +琯 +琰 +琱 +琲 +琳 +琴 +琵 +琶 +琻 +琼 +瑀 +瑁 +瑂 +瑃 +瑄 +瑅 +瑆 +瑊 +瑌 +瑍 +瑑 +瑔 +瑕 +瑗 +瑙 +瑚 +瑛 +瑜 +瑞 +瑟 +瑠 +瑢 +瑧 +瑨 +瑪 +瑭 +瑮 +瑰 +瑱 +瑳 +瑴 +瑶 +瑷 +瑸 +瑽 +瑾 +瑿 +璀 +璁 +璂 +璃 +璅 +璆 +璇 +璈 +璊 +璋 +璎 +璐 +璕 +璘 +璙 +璚 +璜 +璝 +璞 +璟 +璠 +璤 +璥 +璧 +璨 +璩 +璪 +璲 +璵 +璷 +璸 +璹 +璺 +璿 +瓀 +瓅 +瓈 +瓊 +瓎 +瓒 +瓖 +瓘 +瓚 +瓛 +瓜 +瓞 +瓟 +瓠 +瓢 +瓣 +瓤 +瓦 +瓨 +瓬 +瓮 +瓯 +瓴 +瓶 +瓷 +瓹 +瓻 +瓿 +甀 +甂 +甃 +甄 +甈 +甋 +甍 +甑 +甒 +甓 +甔 +甕 +甖 +甗 +甘 +甙 +甚 +甜 +生 +甡 +產 +甥 +甦 +用 +甩 +甪 +甫 +甬 +甭 +甯 +田 +由 +甲 +申 +甴 +电 +男 +甸 +町 +画 +甽 +甾 +甿 +畀 +畅 +畇 +畈 +畊 +畋 +界 +畎 +畏 +畑 +畓 +畔 +留 +畚 +畛 +畜 +畝 +畟 +畠 +畤 +略 +畦 +畧 +番 +畫 +畬 +畯 +畲 +畳 +畴 +當 +畷 +畸 +畹 +畼 +畽 +畿 +疁 +疃 +疆 +疈 +疋 +疍 +疎 +疏 +疐 +疑 +疔 +疖 +疗 +疙 +疚 +疝 +疟 +疠 +疡 +疢 +疣 +疤 +疥 +疧 +疫 +疬 +疭 +疮 +疯 +疰 +疱 +疲 +疳 +疴 +疵 +疸 +疹 +疻 +疼 +疽 +疾 +痁 +痂 +痃 +痄 +病 +症 +痈 +痉 +痊 +痌 +痍 +痎 +痏 +痒 +痔 +痕 +痖 +痗 +痘 +痛 +痞 +痟 +痠 +痡 +痢 +痣 +痤 +痦 +痧 +痨 +痩 +痪 +痫 +痯 +痰 +痱 +痲 +痴 +痹 +痺 +痻 +痼 +痾 +痿 +瘀 +瘁 +瘅 +瘆 +瘈 +瘉 +瘊 +瘌 +瘏 +瘐 +瘕 +瘖 +瘗 +瘘 +瘙 +瘛 +瘝 +瘟 +瘠 +瘢 +瘣 +瘤 +瘥 +瘦 +瘨 +瘩 +瘪 +瘫 +瘭 +瘰 +瘱 +瘳 +瘴 +瘵 +瘸 +瘼 +瘾 +瘿 +癀 +癃 +癌 +癏 +癒 +癔 +癖 +癙 +癜 +癞 +癢 +癣 +癥 +癦 +癨 +癪 +癫 +癯 +癴 +癶 +癸 +癹 +発 +登 +發 +白 +百 +癿 +皁 +皂 +的 +皆 +皇 +皈 +皋 +皎 +皑 +皓 +皖 +皙 +皛 +皝 +皞 +皤 +皦 +皪 +皮 +皱 +皲 +皴 +皿 +盂 +盅 +盆 +盈 +益 +盍 +盎 +盏 +盐 +监 +盒 +盔 +盖 +盗 +盘 +盛 +盜 +盝 +盟 +盡 +盢 +監 +盥 +盩 +盬 +盭 +目 +盯 +盱 +盲 +直 +盵 +相 +盹 +盻 +盼 +盾 +眀 +省 +眄 +眆 +眇 +眈 +眉 +眊 +看 +県 +眎 +眐 +眙 +眚 +眛 +眞 +真 +眠 +眢 +眦 +眨 +眩 +眬 +眭 +眯 +眴 +眵 +眶 +眷 +眸 +眹 +眺 +眼 +眽 +着 +睁 +睃 +睅 +睆 +睇 +睍 +睎 +睐 +睑 +睒 +睖 +睗 +睚 +睛 +睟 +睠 +睡 +睢 +督 +睥 +睦 +睨 +睩 +睪 +睫 +睬 +睭 +睰 +睳 +睷 +睹 +睺 +睽 +睾 +睿 +瞀 +瞂 +瞄 +瞅 +瞆 +瞋 +瞌 +瞍 +瞎 +瞑 +瞒 +瞓 +瞚 +瞟 +瞠 +瞢 +瞤 +瞥 +瞧 +瞩 +瞪 +瞬 +瞭 +瞯 +瞰 +瞳 +瞵 +瞻 +瞽 +瞾 +瞿 +矂 +矇 +矋 +矍 +矐 +矑 +矖 +矗 +矛 +矜 +矞 +矟 +矢 +矣 +知 +矧 +矨 +矩 +矫 +矬 +短 +矮 +矰 +矱 +矲 +石 +矶 +矸 +矻 +矼 +矽 +矾 +矿 +砀 +码 +砂 +砃 +砅 +砆 +砉 +砌 +砍 +砏 +砑 +砒 +研 +砖 +砗 +砘 +砚 +砜 +砝 +砟 +砠 +砢 +砣 +砥 +砦 +砧 +砬 +砭 +砮 +砯 +砰 +砲 +破 +砷 +砸 +砹 +砺 +砻 +砼 +砾 +础 +硁 +硅 +硇 +硉 +硊 +硌 +硍 +硎 +硏 +硐 +硒 +硕 +硖 +硗 +硙 +硚 +硝 +硡 +硪 +硫 +硬 +确 +硰 +硵 +硷 +硼 +硾 +硿 +碁 +碃 +碆 +碇 +碉 +碌 +碍 +碎 +碏 +碐 +碑 +碓 +碔 +碕 +碗 +碘 +碚 +碛 +碜 +碟 +碡 +碣 +碥 +碧 +碨 +碪 +碫 +碰 +碱 +碲 +碳 +碴 +碶 +確 +碻 +碾 +磁 +磂 +磅 +磈 +磉 +磊 +磋 +磎 +磏 +磐 +磑 +磒 +磔 +磕 +磖 +磙 +磛 +磜 +磝 +磡 +磢 +磨 +磬 +磲 +磳 +磴 +磵 +磷 +磹 +磺 +磻 +磾 +磿 +礁 +礅 +礉 +礌 +礐 +礒 +礓 +礜 +礞 +礡 +礤 +礧 +礨 +礮 +礲 +礴 +礶 +示 +礼 +礽 +社 +礿 +祀 +祁 +祃 +祄 +祅 +祆 +祇 +祈 +祉 +祊 +祋 +祎 +祏 +祐 +祓 +祔 +祕 +祖 +祗 +祚 +祛 +祜 +祝 +神 +祟 +祠 +祢 +祤 +祥 +祧 +票 +祫 +祭 +祯 +祱 +祲 +祴 +祶 +祷 +祸 +祺 +祼 +祾 +祿 +禀 +禁 +禂 +禄 +禅 +禇 +禊 +禋 +禎 +福 +禑 +禔 +禖 +禗 +禘 +禚 +禛 +禜 +禟 +禤 +禥 +禧 +禨 +禩 +禫 +禮 +禳 +禴 +禵 +禷 +禹 +禺 +离 +禽 +禾 +秀 +私 +秃 +秄 +秅 +秆 +秇 +秉 +秋 +种 +秏 +科 +秒 +秔 +秕 +秖 +秘 +秛 +秞 +租 +秠 +秣 +秤 +秦 +秧 +秩 +秪 +秫 +秬 +秭 +积 +称 +秴 +秸 +移 +秽 +秾 +秿 +稀 +稂 +稃 +稅 +稆 +稊 +程 +稌 +稍 +税 +稔 +稖 +稗 +稙 +稚 +稛 +稞 +稠 +稣 +稭 +種 +稱 +稲 +稳 +稷 +稹 +稺 +稻 +稼 +稽 +稾 +稿 +穀 +穂 +穅 +穆 +穇 +穈 +穉 +穊 +穋 +積 +穏 +穑 +穗 +穞 +穟 +穠 +穡 +穧 +穨 +穬 +穰 +穴 +穵 +究 +穷 +穸 +穹 +空 +穼 +穽 +穾 +穿 +窀 +突 +窃 +窄 +窅 +窆 +窈 +窊 +窋 +窌 +窍 +窎 +窑 +窒 +窓 +窕 +窖 +窗 +窘 +窙 +窜 +窝 +窞 +窟 +窠 +窡 +窣 +窥 +窦 +窨 +窩 +窫 +窬 +窭 +窰 +窱 +窳 +窴 +窸 +窹 +窺 +窻 +窽 +窾 +窿 +竂 +竉 +立 +竑 +竖 +竘 +站 +竛 +竜 +竝 +竞 +竟 +章 +竣 +童 +竦 +竭 +竮 +端 +竹 +竺 +竻 +竽 +竿 +笃 +笄 +笆 +笈 +笉 +笊 +笋 +笏 +笐 +笑 +笔 +笕 +笙 +笛 +笞 +笠 +笡 +笤 +笥 +符 +笨 +笪 +笫 +第 +笭 +笮 +笯 +笱 +笲 +笳 +笴 +笵 +笸 +笹 +笺 +笼 +笾 +筇 +筈 +等 +筋 +筌 +筏 +筐 +筑 +筒 +筓 +答 +策 +筚 +筛 +筜 +筝 +筠 +筢 +筤 +筥 +筦 +筩 +筭 +筮 +筯 +筰 +筱 +筲 +筳 +筴 +筵 +筷 +筹 +筻 +筼 +签 +简 +箄 +箅 +箇 +箊 +箍 +箎 +箐 +箑 +箒 +箓 +箔 +箕 +算 +箘 +箜 +箝 +箠 +管 +箢 +箣 +箦 +箧 +箨 +箩 +箪 +箫 +箬 +箭 +箯 +箱 +箴 +箵 +箸 +箻 +箼 +箾 +篁 +篃 +篆 +篇 +篌 +篑 +篓 +篘 +篙 +篚 +篛 +篝 +篡 +篢 +篥 +篦 +篨 +篪 +篭 +篮 +篯 +篱 +篲 +篷 +篸 +篹 +篻 +篼 +篾 +簁 +簃 +簄 +簇 +簈 +簉 +簋 +簌 +簏 +簕 +簖 +簜 +簟 +簠 +簡 +簦 +簧 +簨 +簪 +簬 +簰 +簳 +簴 +簵 +簸 +簿 +籀 +籁 +籊 +籋 +籌 +籍 +籏 +籐 +籓 +籛 +籝 +籞 +籢 +籣 +籤 +籥 +籧 +籯 +米 +籴 +籹 +籺 +类 +籼 +籽 +籾 +粃 +粆 +粉 +粊 +粋 +粐 +粑 +粒 +粔 +粕 +粗 +粘 +粜 +粝 +粞 +粟 +粢 +粤 +粥 +粧 +粩 +粪 +粮 +粱 +粲 +粳 +粶 +粹 +粺 +粻 +粼 +粽 +精 +粿 +糀 +糁 +糅 +糇 +糈 +糊 +糌 +糍 +糒 +糕 +糖 +糗 +糙 +糜 +糟 +糠 +糦 +糧 +糨 +糯 +糵 +糸 +系 +紀 +紃 +約 +紅 +紊 +納 +紑 +純 +級 +紞 +素 +索 +紧 +紫 +累 +紵 +紽 +紾 +絁 +終 +組 +絆 +絇 +経 +絏 +絓 +絕 +絖 +絙 +絜 +絠 +絣 +給 +絩 +絪 +絫 +絮 +絵 +絷 +絺 +絻 +絿 +綀 +綅 +綈 +綌 +綍 +經 +綖 +継 +続 +綝 +綟 +綦 +綪 +綮 +綯 +綴 +綷 +緃 +緊 +緌 +緎 +総 +緒 +線 +緝 +緣 +緩 +緺 +緼 +縁 +縄 +縆 +縓 +縕 +縠 +縢 +縦 +縩 +縮 +縯 +縺 +縻 +縿 +繁 +繂 +繄 +繇 +繊 +繋 +繍 +繐 +繑 +織 +繖 +繙 +繟 +繣 +繳 +繸 +繻 +纁 +纂 +纆 +纇 +纑 +纔 +纕 +纚 +纛 +纟 +纠 +纡 +红 +纣 +纤 +纥 +约 +级 +纨 +纩 +纪 +纫 +纬 +纭 +纮 +纯 +纰 +纱 +纲 +纳 +纴 +纵 +纶 +纷 +纸 +纹 +纺 +纻 +纼 +纽 +纾 +线 +绀 +绁 +绂 +练 +组 +绅 +细 +织 +终 +绉 +绊 +绋 +绌 +绍 +绎 +经 +绐 +绑 +绒 +结 +绔 +绕 +绖 +绗 +绘 +给 +绚 +绛 +络 +绝 +绞 +统 +绠 +绡 +绢 +绣 +绤 +绥 +绦 +继 +绨 +绩 +绪 +绫 +续 +绮 +绯 +绰 +绱 +绲 +绳 +维 +绵 +绶 +绷 +绸 +绹 +绺 +绻 +综 +绽 +绾 +绿 +缀 +缁 +缃 +缄 +缅 +缆 +缇 +缈 +缉 +缊 +缋 +缌 +缍 +缎 +缏 +缐 +缑 +缒 +缓 +缔 +缕 +编 +缗 +缘 +缙 +缚 +缛 +缜 +缝 +缞 +缟 +缠 +缡 +缢 +缣 +缤 +缥 +缦 +缧 +缨 +缩 +缪 +缫 +缬 +缭 +缮 +缯 +缰 +缱 +缲 +缳 +缴 +缵 +缶 +缸 +缺 +缼 +缾 +罂 +罃 +罄 +罅 +罈 +罉 +罍 +罐 +网 +罔 +罕 +罗 +罘 +罙 +罚 +罛 +罝 +罟 +罠 +罡 +罢 +罣 +罥 +罦 +罨 +罩 +罪 +罭 +置 +罯 +署 +罳 +罴 +罶 +罹 +罻 +罽 +罾 +罿 +羀 +羁 +羂 +羅 +羆 +羇 +羉 +羊 +羌 +美 +羑 +羒 +羔 +羕 +羖 +羗 +羚 +羜 +羝 +羞 +羟 +羠 +羡 +羣 +群 +羧 +羨 +義 +羫 +羭 +羯 +羰 +羱 +羲 +羴 +羵 +羶 +羸 +羹 +羼 +羽 +羾 +羿 +翀 +翁 +翂 +翃 +翅 +翈 +翊 +翌 +翎 +翏 +習 +翔 +翕 +翖 +翘 +翙 +翚 +翛 +翟 +翠 +翡 +翣 +翥 +翦 +翩 +翫 +翬 +翮 +翯 +翰 +翱 +翲 +翳 +翶 +翻 +翼 +翾 +翿 +耀 +老 +考 +耄 +者 +耆 +耇 +耈 +耋 +而 +耍 +耎 +耐 +耒 +耔 +耕 +耖 +耗 +耘 +耙 +耜 +耡 +耢 +耤 +耥 +耦 +耧 +耨 +耩 +耭 +耰 +耱 +耳 +耵 +耶 +耷 +耸 +耻 +耽 +耿 +聂 +聃 +聆 +聊 +聋 +职 +聍 +聒 +联 +聖 +聘 +聚 +聛 +聡 +聩 +聪 +聰 +聱 +聲 +聴 +聻 +聽 +聿 +肃 +肄 +肆 +肇 +肉 +肋 +肌 +肏 +肐 +肓 +肖 +肘 +肚 +肛 +肜 +肝 +肞 +肟 +肠 +股 +肢 +肣 +肤 +肥 +肦 +肩 +肪 +肫 +肬 +肭 +肮 +肯 +肱 +育 +肳 +肴 +肶 +肸 +肺 +肼 +肽 +肾 +肿 +胀 +胁 +胂 +胃 +胄 +胆 +胈 +背 +胍 +胎 +胐 +胔 +胖 +胗 +胘 +胙 +胚 +胛 +胜 +胝 +胞 +胠 +胡 +胤 +胥 +胧 +胨 +胩 +胪 +胫 +胬 +胭 +胮 +胯 +胰 +胱 +胲 +胳 +胴 +胶 +胸 +胹 +胺 +胻 +胼 +能 +胾 +脁 +脂 +脃 +脆 +脇 +脉 +脊 +脍 +脎 +脏 +脐 +脑 +脒 +脓 +脔 +脖 +脘 +脙 +脚 +脝 +脞 +脟 +脡 +脢 +脣 +脤 +脩 +脬 +脯 +脰 +脱 +脲 +脳 +脶 +脷 +脸 +脹 +脽 +脾 +脿 +腄 +腆 +腈 +腊 +腋 +腌 +腍 +腐 +腑 +腒 +腓 +腔 +腕 +腘 +腙 +腚 +腠 +腥 +腧 +腩 +腬 +腭 +腮 +腯 +腰 +腱 +腲 +腴 +腷 +腹 +腺 +腻 +腼 +腽 +腾 +腿 +膀 +膂 +膄 +膇 +膈 +膊 +膋 +膍 +膏 +膑 +膘 +膚 +膛 +膜 +膝 +膣 +膦 +膨 +膪 +膫 +膰 +膳 +膴 +膶 +膺 +膻 +臀 +臁 +臂 +臃 +臄 +臆 +臊 +臋 +臌 +臎 +臑 +臓 +臙 +臛 +臜 +臝 +臞 +臡 +臣 +臧 +臨 +臩 +自 +臬 +臭 +臯 +臱 +臲 +至 +致 +臺 +臻 +臼 +臾 +臿 +舀 +舁 +舂 +舄 +舅 +舆 +與 +興 +舉 +舊 +舋 +舌 +舍 +舎 +舐 +舒 +舔 +舕 +舗 +舘 +舛 +舜 +舝 +舞 +舟 +舠 +舡 +舢 +舣 +舥 +舦 +舨 +航 +舫 +般 +舰 +舱 +舲 +舳 +舴 +舵 +舶 +舷 +舸 +船 +舺 +舻 +舼 +艄 +艅 +艇 +艋 +艎 +艏 +艐 +艑 +艓 +艔 +艘 +艚 +艛 +艟 +艨 +艮 +良 +艰 +色 +艳 +艴 +艹 +艺 +艻 +艽 +艾 +艿 +节 +芃 +芄 +芈 +芊 +芋 +芍 +芎 +芏 +芐 +芑 +芒 +芔 +芗 +芘 +芙 +芛 +芜 +芝 +芟 +芡 +芣 +芤 +芥 +芦 +芧 +芨 +芩 +芪 +芫 +芬 +芭 +芮 +芯 +芰 +花 +芳 +芴 +芶 +芷 +芸 +芹 +芺 +芼 +芽 +芾 +苁 +苄 +苅 +苇 +苈 +苊 +苋 +苌 +苍 +苎 +苏 +苑 +苒 +苓 +苔 +苕 +苖 +苗 +苘 +苙 +苛 +苜 +苝 +苞 +苟 +苠 +苡 +苣 +苤 +若 +苦 +苧 +苨 +苫 +苯 +英 +苳 +苴 +苶 +苷 +苹 +苺 +苻 +苾 +茀 +茁 +茂 +范 +茄 +茅 +茆 +茇 +茈 +茉 +茌 +茍 +茎 +茏 +茐 +茑 +茔 +茕 +茖 +茗 +茙 +茚 +茛 +茜 +茝 +茞 +茧 +茨 +茫 +茬 +茭 +茯 +茱 +茲 +茳 +茴 +茵 +茶 +茷 +茸 +茹 +茺 +茼 +荀 +荂 +荃 +荄 +荅 +荆 +荇 +荈 +草 +荊 +荍 +荎 +荏 +荐 +荑 +荒 +荓 +荔 +荘 +荙 +荚 +荛 +荜 +荞 +荟 +荠 +荡 +荣 +荤 +荥 +荦 +荧 +荨 +荩 +荪 +荫 +荬 +荭 +荮 +药 +荴 +荵 +荷 +荸 +荻 +荼 +荽 +莅 +莆 +莉 +莊 +莋 +莎 +莐 +莒 +莓 +莕 +莘 +莙 +莛 +莜 +莞 +莠 +莨 +莩 +莪 +莫 +莰 +莱 +莲 +莳 +莴 +莶 +获 +莸 +莹 +莺 +莼 +莽 +莿 +菀 +菁 +菂 +菅 +菇 +菈 +菉 +菊 +菌 +菍 +菏 +菑 +菓 +菔 +菖 +菘 +菙 +菜 +菝 +菟 +菠 +菡 +菢 +菥 +菧 +菩 +菪 +菫 +菭 +華 +菰 +菱 +菲 +菴 +菵 +菶 +菷 +菸 +菹 +菺 +菼 +菽 +菾 +萁 +萃 +萄 +萆 +萋 +萌 +萍 +萎 +萏 +萐 +萑 +萘 +萚 +萜 +萝 +萣 +萤 +营 +萦 +萧 +萨 +萩 +萬 +萮 +萯 +萱 +萶 +萷 +萸 +萹 +萼 +落 +葅 +葆 +葇 +葉 +葊 +葍 +葎 +葐 +葑 +葓 +葖 +著 +葙 +葚 +葛 +葜 +葡 +董 +葨 +葩 +葫 +葬 +葭 +葰 +葱 +葳 +葴 +葵 +葶 +葸 +葹 +葺 +葽 +蒀 +蒂 +蒈 +蒉 +蒊 +蒋 +蒌 +蒍 +蒎 +蒐 +蒔 +蒗 +蒙 +蒜 +蒟 +蒡 +蒢 +蒧 +蒨 +蒮 +蒯 +蒱 +蒲 +蒴 +蒶 +蒸 +蒹 +蒺 +蒻 +蒼 +蒽 +蒾 +蒿 +蓁 +蓂 +蓄 +蓇 +蓉 +蓊 +蓍 +蓏 +蓐 +蓑 +蓓 +蓖 +蓗 +蓝 +蓞 +蓟 +蓠 +蓢 +蓣 +蓥 +蓦 +蓧 +蓪 +蓫 +蓬 +蓮 +蓰 +蓱 +蓳 +蓴 +蓵 +蓷 +蓺 +蓼 +蓿 +蔀 +蔂 +蔃 +蔇 +蔊 +蔌 +蔑 +蔓 +蔕 +蔗 +蔘 +蔚 +蔞 +蔟 +蔡 +蔪 +蔫 +蔬 +蔯 +蔵 +蔷 +蔸 +蔹 +蔺 +蔻 +蔼 +蔽 +蕃 +蕅 +蕈 +蕉 +蕊 +蕑 +蕖 +蕗 +蕙 +蕝 +蕞 +蕟 +蕡 +蕣 +蕤 +蕨 +蕫 +蕰 +蕲 +蕳 +蕴 +蕹 +蕺 +蕻 +蕾 +薁 +薄 +薅 +薆 +薇 +薍 +薏 +薖 +薗 +薙 +薛 +薜 +薝 +薡 +薢 +薤 +薦 +薨 +薪 +薫 +薬 +薮 +薯 +薰 +薶 +薷 +薸 +薹 +薿 +藁 +藂 +藇 +藉 +藊 +藋 +藏 +藐 +藓 +藕 +藖 +藘 +藙 +藚 +藜 +藞 +藟 +藠 +藢 +藤 +藥 +藦 +藨 +藩 +藫 +藭 +藳 +藹 +藻 +藾 +藿 +蘂 +蘅 +蘋 +蘎 +蘑 +蘖 +蘗 +蘘 +蘙 +蘡 +蘤 +蘧 +蘩 +蘭 +蘵 +蘸 +蘼 +蘽 +虀 +虉 +虌 +虎 +虏 +虐 +虑 +虒 +虓 +虔 +處 +虖 +虙 +虚 +虜 +虞 +號 +虡 +虢 +虥 +虧 +虫 +虬 +虮 +虱 +虵 +虹 +虺 +虻 +虼 +虽 +虾 +虿 +蚀 +蚁 +蚂 +蚃 +蚊 +蚋 +蚌 +蚍 +蚏 +蚑 +蚓 +蚕 +蚖 +蚗 +蚘 +蚜 +蚝 +蚡 +蚢 +蚣 +蚤 +蚧 +蚨 +蚩 +蚪 +蚬 +蚯 +蚰 +蚱 +蚳 +蚴 +蚵 +蚶 +蚷 +蚹 +蚺 +蚻 +蚾 +蚿 +蛀 +蛁 +蛃 +蛄 +蛆 +蛇 +蛉 +蛊 +蛋 +蛎 +蛏 +蛐 +蛑 +蛔 +蛕 +蛙 +蛚 +蛛 +蛜 +蛞 +蛟 +蛣 +蛤 +蛦 +蛩 +蛪 +蛫 +蛭 +蛮 +蛰 +蛱 +蛲 +蛳 +蛴 +蛸 +蛹 +蛾 +蜀 +蜂 +蜃 +蜄 +蜇 +蜈 +蜉 +蜊 +蜋 +蜍 +蜎 +蜐 +蜑 +蜒 +蜓 +蜔 +蜕 +蜗 +蜘 +蜚 +蜜 +蜞 +蜡 +蜢 +蜣 +蜤 +蜥 +蜨 +蜩 +蜮 +蜰 +蜱 +蜴 +蜷 +蜺 +蜻 +蜼 +蜾 +蜿 +蝀 +蝃 +蝄 +蝇 +蝈 +蝉 +蝋 +蝌 +蝍 +蝎 +蝓 +蝗 +蝘 +蝙 +蝚 +蝛 +蝝 +蝠 +蝡 +蝣 +蝤 +蝥 +蝮 +蝯 +蝰 +蝱 +蝲 +蝴 +蝶 +蝻 +蝼 +蝽 +蝾 +螀 +螂 +螃 +螅 +螆 +螈 +螉 +螋 +螌 +融 +螓 +螕 +螗 +螘 +螟 +螣 +螨 +螩 +螫 +螬 +螭 +螮 +螯 +螳 +螵 +螹 +螺 +螽 +螾 +螿 +蟀 +蟅 +蟆 +蟉 +蟊 +蟋 +蟏 +蟑 +蟒 +蟚 +蟛 +蟜 +蟝 +蟟 +蟠 +蟢 +蟥 +蟦 +蟧 +蟩 +蟪 +蟫 +蟭 +蟮 +蟳 +蟹 +蟺 +蟾 +蠀 +蠂 +蠃 +蠄 +蠊 +蠋 +蠍 +蠓 +蠔 +蠕 +蠖 +蠗 +蠙 +蠛 +蠡 +蠢 +蠨 +蠪 +蠭 +蠲 +蠹 +蠻 +蠼 +血 +衄 +衅 +衉 +行 +衍 +衎 +衏 +衒 +衔 +衖 +街 +衙 +衠 +衡 +衢 +衣 +衤 +补 +表 +衩 +衫 +衬 +衮 +衰 +衱 +衲 +衵 +衷 +衹 +衽 +衾 +衿 +袁 +袂 +袄 +袅 +袆 +袈 +袋 +袍 +袑 +袒 +袓 +袖 +袗 +袚 +袛 +袜 +袞 +袢 +袤 +袨 +袩 +袪 +被 +袭 +袯 +袱 +袴 +袷 +袸 +袺 +袼 +袿 +裀 +裁 +裂 +裄 +装 +裆 +裇 +裈 +裋 +裌 +裎 +裒 +裓 +裔 +裕 +裖 +裘 +裙 +裛 +補 +裟 +裢 +裤 +裥 +裨 +裪 +裯 +裰 +裱 +裳 +裴 +裸 +裹 +裼 +製 +裾 +褂 +褆 +褊 +褎 +褐 +褒 +褓 +褔 +褕 +褙 +褚 +褛 +褡 +褣 +褥 +褦 +褧 +褪 +褫 +褭 +褯 +褰 +褱 +褴 +褵 +褶 +褷 +褾 +襀 +襁 +襄 +襆 +襋 +襌 +襕 +襚 +襛 +襜 +襞 +襟 +襦 +襪 +襫 +襭 +襮 +襰 +襵 +襶 +襻 +襼 +西 +要 +覃 +覆 +覇 +覈 +見 +覌 +規 +視 +覗 +覙 +覚 +覧 +親 +観 +觀 +见 +观 +规 +觅 +视 +觇 +览 +觉 +觊 +觋 +觌 +觎 +觏 +觐 +觑 +角 +觖 +觘 +觚 +觜 +觞 +解 +觥 +触 +觩 +觫 +觭 +觯 +觱 +觳 +觺 +觼 +觽 +觿 +言 +訂 +訇 +計 +訍 +討 +訏 +託 +記 +訚 +訜 +訦 +訧 +訪 +設 +許 +訳 +訴 +訹 +註 +証 +訾 +訿 +詀 +詈 +詝 +詞 +詟 +詠 +詢 +詧 +詰 +話 +詹 +誉 +誊 +誌 +認 +誓 +誕 +誠 +誨 +読 +誰 +課 +誷 +誾 +調 +諄 +請 +諐 +論 +諟 +諠 +諡 +諮 +諲 +諴 +諵 +諾 +謇 +謏 +謔 +謥 +謦 +謷 +譄 +譆 +證 +譊 +譓 +譖 +識 +譞 +警 +譩 +譬 +議 +譲 +譳 +譶 +譺 +譾 +變 +讎 +讓 +讙 +讟 +讠 +计 +订 +讣 +认 +讥 +讦 +讧 +讨 +让 +讪 +讫 +讬 +训 +议 +讯 +记 +讱 +讲 +讳 +讴 +讵 +讶 +讷 +许 +讹 +论 +讻 +讼 +讽 +设 +访 +诀 +证 +诂 +诃 +评 +诅 +识 +诇 +诈 +诉 +诊 +诋 +诌 +词 +诎 +诏 +诐 +译 +诒 +诓 +诔 +试 +诖 +诗 +诘 +诙 +诚 +诛 +诜 +话 +诞 +诟 +诠 +诡 +询 +诣 +诤 +该 +详 +诧 +诨 +诩 +诪 +诫 +诬 +语 +诮 +误 +诰 +诱 +诲 +诳 +说 +诵 +诶 +请 +诸 +诹 +诺 +读 +诼 +诽 +课 +诿 +谀 +谁 +谂 +调 +谄 +谅 +谆 +谇 +谈 +谊 +谋 +谌 +谍 +谎 +谏 +谐 +谑 +谒 +谓 +谔 +谕 +谖 +谗 +谘 +谙 +谚 +谛 +谜 +谝 +谞 +谟 +谠 +谡 +谢 +谣 +谤 +谥 +谦 +谧 +谨 +谩 +谪 +谫 +谬 +谭 +谮 +谯 +谰 +谱 +谲 +谳 +谴 +谵 +谶 +谷 +谹 +谺 +谼 +谽 +谾 +谿 +豀 +豁 +豄 +豆 +豇 +豉 +豊 +豌 +豏 +豐 +豕 +豗 +豚 +豜 +豝 +象 +豢 +豨 +豩 +豪 +豫 +豭 +豯 +豰 +豳 +豵 +豸 +豹 +豺 +豿 +貂 +貅 +貆 +貉 +貊 +貋 +貌 +貍 +貎 +貐 +貑 +貒 +貔 +貕 +貘 +貝 +貞 +負 +財 +貤 +貨 +責 +貮 +貳 +貴 +買 +貸 +費 +貼 +貿 +賀 +資 +賚 +賛 +賝 +賠 +賣 +賦 +賨 +賸 +購 +贁 +贇 +贈 +贔 +贝 +贞 +负 +贠 +贡 +财 +责 +贤 +败 +账 +货 +质 +贩 +贪 +贫 +贬 +购 +贮 +贯 +贰 +贱 +贲 +贳 +贴 +贵 +贶 +贷 +贸 +费 +贺 +贻 +贼 +贽 +贾 +贿 +赀 +赁 +赂 +赃 +资 +赅 +赆 +赇 +赈 +赉 +赊 +赋 +赌 +赍 +赎 +赏 +赐 +赑 +赒 +赓 +赔 +赖 +赗 +赘 +赙 +赚 +赛 +赜 +赝 +赞 +赟 +赠 +赡 +赢 +赣 +赤 +赥 +赦 +赧 +赩 +赪 +赫 +赭 +走 +赳 +赴 +赵 +赶 +起 +赺 +趁 +趄 +超 +越 +趋 +趍 +趐 +趑 +趔 +趖 +趗 +趟 +趠 +趢 +趣 +趨 +趫 +趮 +趯 +趱 +足 +趴 +趵 +趷 +趸 +趹 +趺 +趼 +趾 +趿 +跂 +跃 +跄 +跅 +跆 +跇 +跋 +跌 +跍 +跎 +跏 +跐 +跑 +跕 +跖 +跗 +跙 +跚 +跛 +跜 +距 +跟 +跡 +跣 +跤 +跧 +跨 +跩 +跪 +跫 +跬 +路 +跱 +跳 +践 +跶 +跷 +跸 +跹 +跺 +跻 +跼 +跽 +跿 +踄 +踅 +踆 +踇 +踉 +踊 +踌 +踎 +踏 +踔 +踕 +踖 +踘 +踝 +踞 +踟 +踠 +踡 +踢 +踣 +踦 +踧 +踩 +踪 +踬 +踭 +踮 +踯 +踰 +踱 +踵 +踶 +踸 +踹 +踺 +踼 +踽 +蹀 +蹁 +蹂 +蹄 +蹅 +蹇 +蹈 +蹉 +蹊 +蹋 +蹏 +蹐 +蹑 +蹒 +蹓 +蹔 +蹙 +蹚 +蹛 +蹜 +蹟 +蹠 +蹡 +蹢 +蹦 +蹩 +蹬 +蹭 +蹮 +蹯 +蹰 +蹱 +蹲 +蹳 +蹴 +蹶 +蹸 +蹻 +蹼 +蹿 +躁 +躃 +躄 +躅 +躇 +躏 +躐 +躔 +躜 +躝 +躞 +躠 +躧 +躨 +躩 +身 +躬 +躭 +躯 +躲 +躴 +躺 +躿 +軃 +車 +軍 +軏 +軜 +転 +軥 +軧 +軨 +軱 +軵 +軶 +軷 +軽 +軿 +輀 +輈 +輋 +輗 +輠 +輣 +輤 +輧 +輮 +輴 +輵 +輶 +輷 +輸 +輼 +轉 +轑 +轒 +轓 +轗 +轘 +轝 +轣 +轥 +车 +轧 +轨 +轩 +轪 +轫 +转 +轭 +轮 +软 +轰 +轱 +轲 +轳 +轴 +轵 +轶 +轸 +轹 +轺 +轻 +轼 +载 +轾 +轿 +辀 +辁 +辂 +较 +辄 +辅 +辆 +辇 +辈 +辉 +辊 +辋 +辍 +辎 +辏 +辐 +辑 +输 +辔 +辕 +辖 +辗 +辘 +辙 +辚 +辛 +辜 +辞 +辟 +辣 +辦 +辨 +辩 +辫 +辰 +辱 +辴 +辶 +辷 +边 +辺 +辻 +込 +辽 +达 +辿 +迁 +迂 +迄 +迅 +过 +迈 +迋 +迍 +迎 +运 +近 +迒 +迓 +返 +迕 +还 +这 +进 +远 +违 +连 +迟 +迢 +迤 +迥 +迦 +迨 +迩 +迪 +迫 +迭 +迮 +述 +迳 +迴 +迵 +迷 +迸 +迹 +迺 +追 +迾 +退 +送 +适 +逃 +逄 +逅 +逆 +逈 +选 +逊 +逋 +逍 +透 +逐 +逑 +递 +途 +逖 +逗 +這 +通 +逛 +逝 +逞 +速 +造 +逡 +逢 +連 +逤 +逥 +逦 +逭 +逮 +逯 +進 +逴 +逵 +逶 +逷 +逸 +逻 +逼 +逾 +逿 +遁 +遂 +遄 +遅 +遆 +遇 +遍 +遏 +遐 +遑 +遒 +道 +違 +遗 +遘 +遛 +遝 +遞 +遠 +遡 +遢 +遣 +遥 +遨 +適 +遫 +遭 +遮 +遯 +遱 +遴 +遵 +遶 +遹 +遺 +遽 +避 +邀 +邂 +邃 +還 +邅 +邈 +邊 +邋 +邍 +邑 +邓 +邕 +邗 +邘 +邙 +邛 +邝 +邞 +邠 +邡 +邢 +那 +邤 +邥 +邦 +邨 +邪 +邬 +邮 +邯 +邰 +邱 +邲 +邳 +邴 +邵 +邶 +邷 +邸 +邹 +邺 +邻 +邽 +邾 +郁 +郃 +郄 +郅 +郇 +郈 +郉 +郊 +郎 +郏 +郐 +郑 +郓 +郕 +郗 +郚 +郛 +郜 +郝 +郞 +郡 +郢 +郤 +郦 +郧 +部 +郩 +郪 +郫 +郭 +郯 +郴 +郷 +郸 +都 +郾 +郿 +鄀 +鄂 +鄄 +鄋 +鄌 +鄏 +鄑 +鄗 +鄘 +鄙 +鄚 +鄛 +鄜 +鄞 +鄠 +鄡 +鄢 +鄣 +鄤 +鄦 +鄨 +鄩 +鄪 +鄬 +鄮 +鄯 +鄰 +鄱 +鄳 +鄹 +鄼 +鄽 +酂 +酃 +酄 +酅 +酆 +酇 +酉 +酊 +酋 +酌 +配 +酎 +酏 +酐 +酑 +酒 +酔 +酕 +酖 +酗 +酘 +酚 +酝 +酞 +酡 +酢 +酣 +酤 +酥 +酦 +酧 +酩 +酪 +酬 +酭 +酮 +酯 +酰 +酱 +酲 +酴 +酵 +酶 +酷 +酸 +酹 +酺 +酽 +酾 +酿 +醁 +醃 +醄 +醅 +醆 +醇 +醉 +醋 +醌 +醍 +醎 +醐 +醑 +醒 +醓 +醘 +醚 +醛 +醜 +醡 +醢 +醤 +醥 +醦 +醨 +醪 +醫 +醭 +醮 +醯 +醲 +醳 +醴 +醵 +醸 +醹 +醺 +醻 +醽 +醾 +醿 +釂 +釆 +采 +釈 +釉 +释 +里 +重 +野 +量 +釐 +金 +釚 +釜 +釭 +釱 +釴 +鈇 +鈋 +鈌 +鈖 +鈚 +鈜 +鈟 +鈡 +鈣 +鈴 +鈵 +鉁 +鉄 +鉊 +鉌 +鉏 +鉝 +鉞 +鉟 +鉢 +鉤 +鉥 +鉧 +鉨 +鉱 +鉲 +鉴 +鉼 +鉽 +銀 +銁 +銍 +銙 +銛 +銭 +銮 +銶 +銺 +鋂 +鋆 +鋈 +鋋 +鋐 +鋕 +鋗 +鋘 +鋣 +鋪 +鋹 +錏 +錔 +錞 +錢 +錤 +錧 +錫 +錬 +錯 +錱 +錺 +錻 +錽 +錾 +鍑 +鍜 +鍟 +鍧 +鍪 +鍭 +鍮 +鍱 +鍼 +鎈 +鎌 +鎎 +鎏 +鎓 +鎕 +鎗 +鎛 +鎝 +鎞 +鎬 +鎶 +鎷 +鎹 +鏁 +鏊 +鏖 +鏣 +鏦 +鏸 +鐀 +鐈 +鐍 +鐖 +鐘 +鐻 +鑑 +鑙 +鑛 +鑨 +鑫 +鑯 +鑴 +钀 +钅 +钆 +钇 +针 +钉 +钊 +钌 +钍 +钎 +钏 +钐 +钑 +钒 +钓 +钕 +钖 +钗 +钘 +钙 +钚 +钛 +钜 +钝 +钞 +钟 +钠 +钡 +钢 +钣 +钤 +钥 +钦 +钧 +钨 +钩 +钪 +钫 +钬 +钭 +钮 +钯 +钰 +钱 +钲 +钳 +钴 +钵 +钸 +钹 +钺 +钻 +钼 +钽 +钾 +钿 +铀 +铁 +铂 +铃 +铄 +铅 +铆 +铇 +铈 +铉 +铊 +铋 +铌 +铍 +铎 +铏 +铐 +铑 +铒 +铓 +铔 +铕 +铖 +铗 +铘 +铙 +铚 +铛 +铜 +铝 +铟 +铠 +铡 +铢 +铣 +铤 +铥 +铦 +铧 +铨 +铩 +铪 +铫 +铬 +铭 +铮 +铯 +铰 +铱 +铲 +铳 +铴 +铵 +银 +铷 +铸 +铺 +铻 +铼 +铽 +链 +铿 +销 +锁 +锂 +锃 +锄 +锅 +锆 +锇 +锈 +锉 +锋 +锌 +锍 +锏 +锐 +锑 +锒 +锓 +锔 +锕 +锖 +锗 +锘 +错 +锚 +锛 +锜 +锝 +锞 +锟 +锠 +锡 +锢 +锣 +锤 +锥 +锦 +锧 +锨 +锪 +锭 +键 +锯 +锰 +锱 +锲 +锴 +锵 +锶 +锷 +锸 +锹 +锺 +锻 +锼 +锽 +锾 +镀 +镁 +镂 +镃 +镆 +镇 +镈 +镉 +镊 +镋 +镌 +镍 +镎 +镏 +镐 +镑 +镒 +镓 +镔 +镕 +镖 +镗 +镘 +镚 +镛 +镜 +镝 +镞 +镠 +镡 +镢 +镣 +镤 +镥 +镦 +镧 +镨 +镩 +镪 +镫 +镬 +镭 +镮 +镯 +镰 +镱 +镲 +镳 +镴 +镵 +镶 +長 +长 +門 +閁 +閈 +開 +閑 +閒 +間 +閛 +閜 +閞 +閟 +関 +閤 +閦 +閧 +閪 +閴 +闇 +闉 +闍 +闕 +闘 +闙 +闚 +關 +闟 +闢 +门 +闩 +闪 +闫 +闬 +闭 +问 +闯 +闰 +闱 +闲 +闳 +间 +闵 +闷 +闸 +闹 +闺 +闻 +闼 +闽 +闾 +闿 +阀 +阁 +阂 +阃 +阄 +阅 +阆 +阇 +阈 +阉 +阊 +阋 +阌 +阍 +阎 +阏 +阐 +阑 +阒 +阓 +阔 +阕 +阖 +阗 +阘 +阙 +阚 +阛 +阜 +阝 +队 +阠 +阡 +阨 +阪 +阬 +阮 +阯 +阱 +防 +阳 +阴 +阵 +阶 +阻 +阼 +阽 +阿 +陀 +陁 +陂 +附 +际 +陆 +陇 +陈 +陉 +陋 +陌 +降 +限 +陑 +陔 +陕 +陛 +陜 +陟 +陡 +院 +除 +陥 +陧 +陨 +险 +陪 +陬 +陭 +陲 +陴 +陵 +陶 +陷 +険 +陻 +陼 +陽 +陾 +隃 +隅 +隆 +隈 +隊 +隋 +隍 +階 +随 +隐 +隑 +隔 +隕 +隗 +隘 +隙 +際 +障 +隞 +隠 +隣 +隤 +隥 +隦 +隧 +隨 +隩 +險 +隮 +隰 +隳 +隶 +隷 +隹 +隻 +隼 +隽 +难 +雀 +雁 +雄 +雅 +集 +雇 +雉 +雊 +雌 +雍 +雎 +雏 +雑 +雒 +雕 +雘 +雙 +雜 +雝 +雠 +難 +雨 +雩 +雪 +雫 +雯 +雰 +雱 +雲 +雳 +零 +雷 +雹 +電 +雾 +需 +霁 +霂 +霃 +霄 +霅 +霆 +震 +霈 +霉 +霊 +霍 +霎 +霏 +霑 +霓 +霔 +霖 +霙 +霜 +霞 +霠 +霡 +霢 +霣 +霤 +霨 +霪 +霭 +霮 +霰 +露 +霳 +霸 +霹 +霾 +霿 +靂 +靃 +青 +靓 +靖 +靗 +静 +靛 +非 +靠 +靡 +面 +靥 +靦 +靧 +革 +靫 +靮 +靰 +靳 +靴 +靶 +靷 +靸 +靺 +靼 +靿 +鞀 +鞃 +鞄 +鞅 +鞉 +鞋 +鞍 +鞑 +鞒 +鞓 +鞔 +鞕 +鞗 +鞘 +鞙 +鞚 +鞞 +鞟 +鞠 +鞣 +鞨 +鞫 +鞬 +鞭 +鞮 +鞯 +鞲 +鞳 +鞴 +鞵 +鞶 +鞸 +鞹 +鞺 +鞾 +鞿 +韂 +韅 +韍 +韎 +韐 +韓 +韔 +韘 +韝 +韠 +韡 +韦 +韧 +韨 +韩 +韪 +韫 +韬 +韭 +韮 +音 +韵 +韶 +韹 +韻 +頂 +項 +須 +頉 +頋 +頍 +預 +頔 +頚 +頞 +頠 +頣 +頩 +頫 +頬 +頭 +頯 +頳 +頵 +頼 +顇 +顉 +額 +顏 +顒 +顕 +顗 +顜 +類 +顡 +顦 +页 +顶 +顷 +顸 +项 +顺 +须 +顼 +顽 +顾 +顿 +颀 +颁 +颂 +颃 +预 +颅 +领 +颇 +颈 +颉 +颊 +颋 +颌 +颍 +颎 +颏 +颐 +频 +颒 +颓 +颔 +颕 +颖 +颗 +题 +颙 +颚 +颛 +颜 +额 +颞 +颟 +颠 +颡 +颢 +颣 +颤 +颥 +颦 +颧 +風 +颾 +颿 +飁 +飉 +飋 +飍 +风 +飏 +飐 +飑 +飒 +飓 +飔 +飕 +飖 +飗 +飘 +飙 +飚 +飛 +飞 +食 +飡 +飣 +飧 +飨 +飯 +飰 +飱 +飶 +飽 +餂 +養 +餍 +餐 +餔 +餗 +餘 +餠 +餤 +餧 +館 +餬 +餮 +餲 +餴 +餸 +餽 +饁 +饇 +饉 +饎 +饐 +饑 +饔 +饕 +饗 +饘 +饙 +饛 +饟 +饣 +饤 +饥 +饦 +饧 +饨 +饩 +饪 +饫 +饬 +饭 +饮 +饯 +饰 +饱 +饲 +饴 +饵 +饶 +饷 +饸 +饹 +饺 +饼 +饽 +饾 +饿 +馀 +馁 +馃 +馄 +馅 +馆 +馇 +馈 +馉 +馊 +馋 +馌 +馍 +馎 +馏 +馐 +馑 +馒 +馓 +馔 +馕 +首 +馗 +馘 +香 +馛 +馞 +馠 +馡 +馤 +馥 +馧 +馨 +馬 +馯 +馹 +馺 +馽 +駃 +駄 +駅 +駆 +駉 +駊 +駓 +駖 +駜 +駠 +駪 +駬 +駮 +駰 +駱 +駷 +駸 +駹 +駻 +駼 +駽 +駾 +駿 +騀 +騂 +騃 +騄 +騅 +騊 +騋 +騏 +騑 +騒 +験 +騕 +騞 +騠 +騡 +騢 +騣 +騧 +騪 +騰 +騱 +騳 +騵 +騹 +騺 +驈 +驎 +驒 +驔 +驖 +驚 +驛 +驩 +马 +驭 +驮 +驯 +驰 +驱 +驲 +驳 +驴 +驵 +驶 +驷 +驸 +驹 +驺 +驻 +驼 +驽 +驾 +驿 +骀 +骁 +骂 +骃 +骄 +骅 +骆 +骇 +骈 +骉 +骊 +骋 +验 +骍 +骎 +骏 +骐 +骑 +骒 +骓 +骕 +骖 +骗 +骘 +骙 +骚 +骛 +骜 +骝 +骞 +骟 +骠 +骡 +骢 +骣 +骤 +骥 +骦 +骧 +骨 +骫 +骭 +骰 +骱 +骶 +骷 +骸 +骹 +骺 +骼 +髀 +髁 +髂 +髃 +髅 +髆 +髇 +髋 +髌 +髎 +髐 +髑 +髓 +體 +高 +髙 +髟 +髠 +髡 +髢 +髤 +髦 +髧 +髪 +髫 +髭 +髮 +髯 +髲 +髳 +髵 +髹 +髻 +髽 +髾 +鬃 +鬅 +鬇 +鬈 +鬋 +鬐 +鬑 +鬒 +鬓 +鬖 +鬘 +鬙 +鬝 +鬟 +鬡 +鬣 +鬬 +鬯 +鬱 +鬲 +鬵 +鬷 +鬺 +鬻 +鬼 +鬽 +魀 +魁 +魂 +魃 +魄 +魅 +魆 +魇 +魈 +魉 +魋 +魍 +魏 +魑 +魔 +魖 +魗 +魟 +魠 +魣 +魭 +魮 +魯 +魶 +魼 +魾 +魿 +鮀 +鮄 +鮅 +鮆 +鮇 +鮈 +鮍 +鮖 +鮗 +鮟 +鮠 +鮡 +鮣 +鮧 +鮨 +鮮 +鮰 +鮸 +鮹 +鮻 +鮼 +鯄 +鯈 +鯏 +鯙 +鯥 +鯮 +鯵 +鯶 +鯺 +鯻 +鯾 +鰅 +鰆 +鰋 +鰔 +鰕 +鰞 +鰟 +鰤 +鰧 +鰬 +鰶 +鰽 +鱄 +鱇 +鱊 +鱋 +鱍 +鱎 +鱏 +鱗 +鱚 +鱠 +鱥 +鱮 +鱲 +鱳 +鱵 +鱻 +鱼 +鱾 +鱿 +鲀 +鲁 +鲂 +鲃 +鲅 +鲆 +鲇 +鲈 +鲉 +鲊 +鲋 +鲌 +鲍 +鲎 +鲏 +鲐 +鲑 +鲒 +鲓 +鲔 +鲕 +鲖 +鲘 +鲙 +鲚 +鲛 +鲜 +鲞 +鲟 +鲠 +鲡 +鲢 +鲣 +鲤 +鲥 +鲦 +鲧 +鲨 +鲩 +鲪 +鲫 +鲬 +鲭 +鲮 +鲯 +鲰 +鲱 +鲲 +鲳 +鲴 +鲵 +鲶 +鲷 +鲸 +鲹 +鲺 +鲻 +鲼 +鲽 +鲾 +鲿 +鳀 +鳁 +鳂 +鳃 +鳄 +鳅 +鳆 +鳇 +鳈 +鳉 +鳊 +鳋 +鳌 +鳍 +鳎 +鳏 +鳐 +鳑 +鳒 +鳓 +鳔 +鳕 +鳖 +鳗 +鳙 +鳚 +鳜 +鳝 +鳞 +鳟 +鳠 +鳡 +鳢 +鳣 +鳤 +鳥 +鳦 +鳧 +鳩 +鳲 +鳳 +鳴 +鳶 +鳷 +鳸 +鳺 +鳼 +鳽 +鴂 +鴃 +鴇 +鴈 +鴋 +鴐 +鴒 +鴜 +鴥 +鴳 +鴹 +鴽 +鵉 +鵊 +鵕 +鵙 +鵝 +鵞 +鵟 +鵩 +鵰 +鵱 +鵻 +鶀 +鶂 +鶃 +鶋 +鶏 +鶒 +鶖 +鶗 +鶡 +鶢 +鶤 +鶬 +鶱 +鶵 +鶹 +鶺 +鷃 +鷇 +鷉 +鷊 +鷕 +鷛 +鷞 +鷟 +鷠 +鷢 +鷣 +鷤 +鷩 +鷫 +鷮 +鷾 +鷿 +鸂 +鸃 +鸄 +鸊 +鸐 +鸑 +鸒 +鸓 +鸘 +鸜 +鸝 +鸟 +鸠 +鸡 +鸢 +鸣 +鸤 +鸥 +鸦 +鸧 +鸨 +鸩 +鸪 +鸫 +鸬 +鸭 +鸮 +鸯 +鸰 +鸱 +鸲 +鸳 +鸴 +鸵 +鸶 +鸷 +鸸 +鸹 +鸺 +鸻 +鸽 +鸾 +鸿 +鹀 +鹁 +鹂 +鹃 +鹄 +鹅 +鹆 +鹇 +鹈 +鹉 +鹊 +鹋 +鹌 +鹍 +鹎 +鹏 +鹐 +鹑 +鹒 +鹓 +鹔 +鹕 +鹖 +鹗 +鹘 +鹙 +鹚 +鹛 +鹜 +鹝 +鹞 +鹟 +鹠 +鹡 +鹢 +鹣 +鹤 +鹥 +鹦 +鹧 +鹨 +鹩 +鹪 +鹫 +鹬 +鹭 +鹮 +鹯 +鹰 +鹱 +鹲 +鹳 +鹴 +鹾 +鹿 +麀 +麂 +麃 +麇 +麈 +麋 +麌 +麏 +麐 +麑 +麒 +麓 +麕 +麖 +麗 +麚 +麛 +麝 +麞 +麟 +麤 +麦 +麧 +麨 +麯 +麰 +麴 +麸 +麹 +麺 +麻 +麼 +麽 +麾 +麿 +黁 +黃 +黄 +黈 +黉 +黍 +黎 +黏 +黐 +黑 +黒 +黓 +黔 +黕 +黖 +默 +黙 +黛 +黜 +黝 +點 +黟 +黠 +黡 +黢 +黤 +黥 +黦 +黧 +黨 +黩 +黪 +黫 +黬 +黮 +黯 +黳 +黸 +黻 +黼 +黾 +鼆 +鼋 +鼍 +鼎 +鼐 +鼒 +鼓 +鼗 +鼙 +鼛 +鼟 +鼠 +鼢 +鼩 +鼪 +鼫 +鼬 +鼮 +鼯 +鼷 +鼹 +鼺 +鼻 +鼽 +鼾 +鼿 +齁 +齃 +齄 +齅 +齊 +齎 +齐 +齑 +齚 +齢 +齧 +齨 +齮 +齰 +齾 +齿 +龀 +龁 +龂 +龃 +龄 +龅 +龆 +龇 +龈 +龉 +龊 +龋 +龌 +龍 +龏 +龑 +龙 +龚 +龛 +龜 +龟 +龠 +거 +나 +났 +다 +딜 +또 +리 +맥 +버 +요 +워 +타 + + +凉 +︰ +﹐ +﹒ +﹖ +! +& +( +) +, +- +. +/ +2 +: +; +< +> +? +@ +_ +` +| +~ +¥ +𡒄 +𨱏 \ No newline at end of file diff --git a/img.jpg b/img.jpg new file mode 100644 index 0000000000000000000000000000000000000000..b2459d9d2bcd9e4558c6dbcdd4a124ddbe7bad50 Binary files /dev/null and b/img.jpg differ diff --git a/img_demo.jpg b/img_demo.jpg new file mode 100644 index 0000000000000000000000000000000000000000..101201db6574aa03b6104e50ab0da1be17f193bb Binary files /dev/null and b/img_demo.jpg differ diff --git a/ocr.png b/ocr.png new file mode 100644 index 0000000000000000000000000000000000000000..a50c5523449c2e38a4fc9488e25de2749b7d7c37 Binary files /dev/null and b/ocr.png differ diff --git a/packages.txt b/packages.txt new file mode 100644 index 0000000000000000000000000000000000000000..b7f051d570651730f6fd8c60cf4beb63ba67d116 --- /dev/null +++ b/packages.txt @@ -0,0 +1,4 @@ +libgl1 +cmake +libssl-dev +tesseract-ocr-all \ No newline at end of file diff --git a/pages/About.py b/pages/About.py new file mode 100644 index 0000000000000000000000000000000000000000..9b447a202d9a570f7b78892e67a9fcf135a56255 --- /dev/null +++ b/pages/About.py @@ -0,0 +1,35 @@ +import streamlit as st + +st.title("OCR solutions comparator") + +st.write("") +st.write("") +st.write("") + +st.markdown("##### This app allows you to compare, from a given image, the results of different solutions:") +st.markdown("##### *EasyOcr, PaddleOCR, MMOCR, Tesseract*") +st.write("") +st.write("") + +st.markdown(''' The 1st step is to choose the language for the text recognition (not all solutions \ +support the same languages), and then upload the image to consider. It is possible to use a demo file. \ +It is then possible to change the default values for the text area detection process, \ +before launching the detection task for each solution.''') +st.write("") + +st.markdown(''' The different results are then presented. The 2nd step is to choose one of these \ +detection results, in order to carry out the text recognition process there. It is also possible to change \ +the default settings for each solution.''') +st.write("") + +st.markdown("###### The recognition results appear in 2 formats:") +st.markdown(''' - a visual format resumes the initial image, replacing the detected areas with \ +the recognized text. The background is + or - strongly colored in green according to the \ +confidence level of the recognition. + A slider allows you to change the font size, another \ +allows you to modify the confidence threshold above which the text color changes: if it is at \ +70% for example, then all the texts with a confidence threshold higher or equal to 70 will appear \ +in white, in black otherwise.''') + +st.markdown(" - a detailed format presents the results in a table, for each text box detected. \ +It is possible to download this results in a local csv file.") \ No newline at end of file diff --git a/pages/App.py b/pages/App.py new file mode 100644 index 0000000000000000000000000000000000000000..641bd8b3a6e2c25b7475ced782176de6a3b87781 --- /dev/null +++ b/pages/App.py @@ -0,0 +1,1361 @@ +"""This Streamlit app allows you to compare, from a given image, the results of different solutions: + EasyOcr, PaddleOCR, MMOCR, Tesseract +""" +import streamlit as st +import plotly.express as px +import numpy as np +import math +import pandas as pd + +import cv2 +from PIL import Image, ImageColor +import PIL +import easyocr +from paddleocr import PaddleOCR +from mmocr.utils.ocr import MMOCR +import pytesseract +from pytesseract import Output +import os +from mycolorpy import colorlist as mcp + +################################################################################################### +## FUNCTIONS +################################################################################################### + +@st.cache +def convert_df(in_df): + """Convert data frame function, used by download button + + Args: + in_df (data frame): data frame to convert + + Returns: + data frame: converted data frame + """ + # IMPORTANT: Cache the conversion to prevent computation on every rerun + return in_df.to_csv().encode('utf-8') + +### +def easyocr_coord_convert(in_list_coord): + """Convert easyocr coordinates to standard format used by others functions + + Args: + in_list_coord (list of numbers): format [x_min, x_max, y_min, y_max] + + Returns: + list of lists: format [ [x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max] ] + """ + + coord = in_list_coord + return [[coord[0], coord[2]], [coord[1], coord[2]], [coord[1], coord[3]], [coord[0], coord[3]]] + +@st.cache(show_spinner=False) +def initializations(): + """Initializations for the app + + Returns: + list of strings : list of OCR solutions names + (['EasyOCR', 'PPOCR', 'MMOCR', 'Tesseract']) + dict : names and indices of the OCR solutions + ({'EasyOCR': 0, 'PPOCR': 1, 'MMOCR': 2, 'Tesseract': 3}) + tuple : color of the detected boxes + list of dicts : list of languages supported by each OCR solution + list of int : columns for recognition details results + dict : confidence color scale + plotly figure : confidence color scale figure + """ + # the readers considered + out_reader_type_list = ['EasyOCR', 'PPOCR', 'MMOCR', 'Tesseract'] + out_reader_type_dict = {'EasyOCR': 0, 'PPOCR': 1, 'MMOCR': 2, 'Tesseract': 3} + + # Columns for recognition details results + out_cols_size = [2] + [2,1]*(len(out_reader_type_list)-1) # Except Tesseract + + # Color of the detected boxes + out_color = (0, 76, 153) + + # Dicts of laguages supported by each reader + out_dict_lang_easyocr = {'Abaza': 'abq', 'Adyghe': 'ady', 'Afrikaans': 'af', 'Angika': 'ang', \ + 'Arabic': 'ar', 'Assamese': 'as', 'Avar': 'ava', 'Azerbaijani': 'az', 'Belarusian': 'be', \ + 'Bulgarian': 'bg', 'Bihari': 'bh', 'Bhojpuri': 'bho', 'Bengali': 'bn', 'Bosnian': 'bs', \ + 'Simplified Chinese': 'ch_sim', 'Traditional Chinese': 'ch_tra', 'Chechen': 'che', \ + 'Czech': 'cs', 'Welsh': 'cy', 'Danish': 'da', 'Dargwa': 'dar', 'German': 'de', \ + 'English': 'en', 'Spanish': 'es', 'Estonian': 'et', 'Persian (Farsi)': 'fa', 'French': 'fr', \ + 'Irish': 'ga', 'Goan Konkani': 'gom', 'Hindi': 'hi', 'Croatian': 'hr', 'Hungarian': 'hu', \ + 'Indonesian': 'id', 'Ingush': 'inh', 'Icelandic': 'is', 'Italian': 'it', 'Japanese': 'ja', \ + 'Kabardian': 'kbd', 'Kannada': 'kn', 'Korean': 'ko', 'Kurdish': 'ku', 'Latin': 'la', \ + 'Lak': 'lbe', 'Lezghian': 'lez', 'Lithuanian': 'lt', 'Latvian': 'lv', 'Magahi': 'mah', \ + 'Maithili': 'mai', 'Maori': 'mi', 'Mongolian': 'mn', 'Marathi': 'mr', 'Malay': 'ms', \ + 'Maltese': 'mt', 'Nepali': 'ne', 'Newari': 'new', 'Dutch': 'nl', 'Norwegian': 'no', \ + 'Occitan': 'oc', 'Pali': 'pi', 'Polish': 'pl', 'Portuguese': 'pt', 'Romanian': 'ro', \ + 'Russian': 'ru', 'Serbian (cyrillic)': 'rs_cyrillic', 'Serbian (latin)': 'rs_latin', \ + 'Nagpuri': 'sck', 'Slovak': 'sk', 'Slovenian': 'sl', 'Albanian': 'sq', 'Swedish': 'sv', \ + 'Swahili': 'sw', 'Tamil': 'ta', 'Tabassaran': 'tab', 'Telugu': 'te', 'Thai': 'th', \ + 'Tajik': 'tjk', 'Tagalog': 'tl', 'Turkish': 'tr', 'Uyghur': 'ug', 'Ukranian': 'uk', \ + 'Urdu': 'ur', 'Uzbek': 'uz', 'Vietnamese': 'vi'} + + out_dict_lang_ppocr = {'Abaza': 'abq', 'Adyghe': 'ady', 'Afrikaans': 'af', 'Albanian': 'sq', \ + 'Angika': 'ang', 'Arabic': 'ar', 'Avar': 'ava', 'Azerbaijani': 'az', 'Belarusian': 'be', \ + 'Bhojpuri': 'bho','Bihari': 'bh','Bosnian': 'bs','Bulgarian': 'bg','Chinese & English': 'ch', \ + 'Chinese Traditional': 'chinese_cht', 'Croatian': 'hr', 'Czech': 'cs', 'Danish': 'da', \ + 'Dargwa': 'dar', 'Dutch': 'nl', 'English': 'en', 'Estonian': 'et', 'French': 'fr', \ + 'German': 'german','Goan Konkani': 'gom','Hindi': 'hi','Hungarian': 'hu','Icelandic': 'is', \ + 'Indonesian': 'id', 'Ingush': 'inh', 'Irish': 'ga', 'Italian': 'it', 'Japan': 'japan', \ + 'Kabardian': 'kbd', 'Korean': 'korean', 'Kurdish': 'ku', 'Lak': 'lbe', 'Latvian': 'lv', \ + 'Lezghian': 'lez', 'Lithuanian': 'lt', 'Magahi': 'mah', 'Maithili': 'mai', 'Malay': 'ms', \ + 'Maltese': 'mt', 'Maori': 'mi', 'Marathi': 'mr', 'Mongolian': 'mn', 'Nagpur': 'sck', \ + 'Nepali': 'ne', 'Newari': 'new', 'Norwegian': 'no', 'Occitan': 'oc', 'Persian': 'fa', \ + 'Polish': 'pl', 'Portuguese': 'pt', 'Romanian': 'ro', 'Russia': 'ru', 'Saudi Arabia': 'sa', \ + 'Serbian(cyrillic)': 'rs_cyrillic', 'Serbian(latin)': 'rs_latin', 'Slovak': 'sk', \ + 'Slovenian': 'sl', 'Spanish': 'es', 'Swahili': 'sw', 'Swedish': 'sv', 'Tabassaran': 'tab', \ + 'Tagalog': 'tl', 'Tamil': 'ta', 'Telugu': 'te', 'Turkish': 'tr', 'Ukranian': 'uk', \ + 'Urdu': 'ur', 'Uyghur': 'ug', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy'} + + out_dict_lang_mmocr = {'English & Chinese': 'en'} + + out_dict_lang_tesseract = {'Afrikaans': 'afr','Albanian': 'sqi','Amharic': 'amh', \ + 'Arabic': 'ara', 'Armenian': 'hye','Assamese': 'asm','Azerbaijani - Cyrilic': 'aze_cyrl', \ + 'Azerbaijani': 'aze', 'Basque': 'eus','Belarusian': 'bel','Bengali': 'ben','Bosnian': 'bos', \ + 'Breton': 'bre', 'Bulgarian': 'bul','Burmese': 'mya','Catalan; Valencian': 'cat', \ + 'Cebuano': 'ceb', 'Central Khmer': 'khm','Cherokee': 'chr','Chinese - Simplified': 'chi_sim', \ + 'Chinese - Traditional': 'chi_tra','Corsican': 'cos','Croatian': 'hrv','Czech': 'ces', \ + 'Danish':'dan','Dutch; Flemish':'nld','Dzongkha':'dzo','English, Middle (1100-1500)':'enm', \ + 'English': 'eng','Esperanto': 'epo','Estonian': 'est','Faroese': 'fao', \ + 'Filipino (old - Tagalog)': 'fil','Finnish': 'fin','French, Middle (ca.1400-1600)': 'frm', \ + 'French': 'fra','Galician': 'glg','Georgian - Old': 'kat_old','Georgian': 'kat', \ + 'German - Fraktur': 'frk','German': 'deu','Greek, Modern (1453-)': 'ell','Gujarati': 'guj', \ + 'Haitian; Haitian Creole': 'hat','Hebrew': 'heb','Hindi': 'hin','Hungarian': 'hun', \ + 'Icelandic': 'isl','Indonesian': 'ind','Inuktitut': 'iku','Irish': 'gle', \ + 'Italian - Old': 'ita_old','Italian': 'ita','Japanese': 'jpn','Javanese': 'jav', \ + 'Kannada': 'kan','Kazakh': 'kaz','Kirghiz; Kyrgyz': 'kir','Korean (vertical)': 'kor_vert', \ + 'Korean': 'kor','Kurdish (Arabic Script)': 'kur_ara','Lao': 'lao','Latin': 'lat', \ + 'Latvian':'lav','Lithuanian':'lit','Luxembourgish':'ltz','Macedonian':'mkd','Malay':'msa', \ + 'Malayalam': 'mal','Maltese': 'mlt','Maori': 'mri','Marathi': 'mar','Mongolian': 'mon', \ + 'Nepali': 'nep','Norwegian': 'nor','Occitan (post 1500)': 'oci', \ + 'Orientation and script detection module':'osd','Oriya':'ori','Panjabi; Punjabi':'pan', \ + 'Persian':'fas','Polish':'pol','Portuguese':'por','Pushto; Pashto':'pus','Quechua':'que', \ + 'Romanian; Moldavian; Moldovan': 'ron','Russian': 'rus','Sanskrit': 'san', \ + 'Scottish Gaelic': 'gla','Serbian - Latin': 'srp_latn','Serbian': 'srp','Sindhi': 'snd', \ + 'Sinhala; Sinhalese': 'sin','Slovak': 'slk','Slovenian': 'slv', \ + 'Spanish; Castilian - Old': 'spa_old','Spanish; Castilian': 'spa','Sundanese': 'sun', \ + 'Swahili': 'swa','Swedish': 'swe','Syriac': 'syr','Tajik': 'tgk','Tamil': 'tam', \ + 'Tatar':'tat','Telugu':'tel','Thai':'tha','Tibetan':'bod','Tigrinya':'tir','Tonga':'ton', \ + 'Turkish': 'tur','Uighur; Uyghur': 'uig','Ukrainian': 'ukr','Urdu': 'urd', \ + 'Uzbek - Cyrilic': 'uzb_cyrl','Uzbek': 'uzb','Vietnamese': 'vie','Welsh': 'cym', \ + 'Western Frisian': 'fry','Yiddish': 'yid','Yoruba': 'yor'} + + out_list_dict_lang = [out_dict_lang_easyocr, out_dict_lang_ppocr, out_dict_lang_mmocr, \ + out_dict_lang_tesseract] + + # Initialization of detection form + if 'columns_size' not in st.session_state: + st.session_state.columns_size = [2] + [1 for x in out_reader_type_list[1:]] + if 'column_width' not in st.session_state: + st.session_state.column_width = [500] + [400 for x in out_reader_type_list[1:]] + if 'columns_color' not in st.session_state: + st.session_state.columns_color = ["rgb(228,26,28)"] + \ + ["rgb(0,0,0)" for x in out_reader_type_list[1:]] + + # Confidence color scale + out_list_confid = list(np.arange(0,101,1)) + out_list_grad = mcp.gen_color_normalized(cmap="Greens",data_arr=np.array(out_list_confid)) + out_dict_back_colors = {out_list_confid[i]: out_list_grad[i] \ + for i in range(len(out_list_confid))} + + list_y = [1 for i in out_list_confid] + df_confid = pd.DataFrame({'% confidence scale': out_list_confid, 'y': list_y}) + + out_fig = px.scatter(df_confid, x='% confidence scale', y='y', \ + hover_data={'% confidence scale': True, 'y': False}, + color=out_dict_back_colors.values(), range_y=[0.9,1.1], range_x=[0,100], + color_discrete_map="identity",height=50,symbol='y',symbol_sequence=['square']) + out_fig.update_xaxes(showticklabels=False) + out_fig.update_yaxes(showticklabels=False, range=[0.1, 1.1], visible=False) + out_fig.update_traces(marker_size=50) + out_fig.update_layout(paper_bgcolor="white", margin=dict(b=0,r=0,t=0,l=0), xaxis_side="top", \ + showlegend=False) + + return out_reader_type_list, out_reader_type_dict, out_color, out_list_dict_lang, \ + out_cols_size, out_dict_back_colors, out_fig + +### +@st.experimental_memo(show_spinner=False) +def init_easyocr(in_params): + """Initialization of easyOCR reader + + Args: + in_params (list): list with the language + + Returns: + easyocr reader: the easyocr reader instance + """ + out_ocr = easyocr.Reader(in_params) + return out_ocr + +### +@st.cache(show_spinner=False) +#@st.experimental_memo(show_spinner=False) +def init_ppocr(in_params): + """Initialization of PPOCR reader + + Args: + in_params (dict): dict with parameters + + Returns: + ppocr reader: the ppocr reader instance + """ + out_ocr = PaddleOCR(lang=in_params[0], **in_params[1]) + return out_ocr + +### +@st.experimental_memo(show_spinner=False) +def init_mmocr(in_params): + """Initialization of MMOCR reader + + Args: + in_params (dict): dict with parameters + + Returns: + mmocr reader: the ppocr reader instance + """ + out_ocr = MMOCR(recog=None, **in_params[1]) + return out_ocr + +### +def init_readers(in_list_params): + """Initialization of the readers, and return them as list + + Args: + in_list_params (list): list of dicts of parameters for each reader + + Returns: + list: list of the reader's instances + """ + # Instantiations of the readers : + # - EasyOCR + with st.spinner("EasyOCR reader initialization in progress ..."): + reader_easyocr = init_easyocr([in_list_params[0][0]]) + + # - PPOCR + # Paddleocr + with st.spinner("PPOCR reader initialization in progress ..."): + reader_ppocr = init_ppocr(in_list_params[1]) + + # - MMOCR + with st.spinner("MMOCR reader initialization in progress ..."): + reader_mmocr = init_mmocr(in_list_params[2]) + + out_list_readers = [reader_easyocr, reader_ppocr, reader_mmocr] + + return out_list_readers + +### +#@st.cache(show_spinner=False) +@st.experimental_memo(show_spinner=False) +def load_image(in_image_file): + """Load input file and open it + + Args: + in_image_file (string or Streamlit UploadedFile): image to consider + + Returns: + string : locally saved image path + PIL.Image : input file opened with Pillow + matrix : input file opened with Opencv + """ + if isinstance(in_image_file, str): + out_image_path = "img."+in_image_file.split('.')[-1] + else: + out_image_path = "img."+in_image_file.name.split('.')[-1] + img = Image.open(in_image_file) + img_saved = img.save(out_image_path) + + # Read image + out_image_orig = Image.open(out_image_path) + out_image_cv2 = cv2.cvtColor(cv2.imread(out_image_path), cv2.COLOR_BGR2RGB) + + return out_image_path, out_image_orig, out_image_cv2 + +### +#@st.cache(show_spinner=False) +@st.experimental_memo(show_spinner=False) +def easyocr_detect(_in_reader, in_image_path, in_params): + """Detection with EasyOCR + + Args: + _in_reader (EasyOCR reader) : the previously initialized instance + in_image_path (string ) : locally saved image path + in_params (list) : list with the parameters for detection + + Returns: + list : list of the boxes coordinates + exception on error, string 'OK' otherwise + """ + try: + dict_param = in_params[1] + detection_result = _in_reader.detect(in_image_path, + #width_ths=0.7, + #mag_ratio=1.5 + **dict_param + ) + easyocr_coordinates = detection_result[0][0] + + # The format of the coordinate is as follows: [x_min, x_max, y_min, y_max] + # Format boxes coordinates for draw + out_easyocr_boxes_coordinates = list(map(easyocr_coord_convert, easyocr_coordinates)) + out_status = 'OK' + except Exception as e: + out_easyocr_boxes_coordinates = [] + out_status = e + + return out_easyocr_boxes_coordinates, out_status + +### +#@st.cache(show_spinner=False) +@st.experimental_memo(show_spinner=False) +def ppocr_detect(_in_reader, in_image_path): + """Detection with PPOCR + + Args: + _in_reader (PPOCR reader) : the previously initialized instance + in_image_path (string ) : locally saved image path + + Returns: + list : list of the boxes coordinates + exception on error, string 'OK' otherwise + """ + # PPOCR detection method + try: + out_ppocr_boxes_coordinates = _in_reader.ocr(in_image_path, rec=False) + out_status = 'OK' + except Exception as e: + out_ppocr_boxes_coordinates = [] + out_status = e + + return out_ppocr_boxes_coordinates, out_status + +### +#@st.cache(show_spinner=False, hash_funcs={torch.nn.parameter.Parameter: lambda _: None}) +#@st.cache(show_spinner=False) +@st.experimental_memo(show_spinner=False) +def mmocr_detect(_in_reader, in_image_path): + """Detection with MMOCR + + Args: + _in_reader (EasyORC reader) : the previously initialized instance + in_image_path (string) : locally saved image path + in_params (list) : list with the parameters + + Returns: + list : list of the boxes coordinates + exception on error, string 'OK' otherwise + """ + # MMOCR detection method + out_mmocr_boxes_coordinates = [] + try: + det_result = _in_reader.readtext(in_image_path, details=True) + bboxes_list = [res['boundary_result'] for res in det_result] + for bboxes in bboxes_list: + for bbox in bboxes: + if len(bbox) > 9: + min_x = min(bbox[0:-1:2]) + min_y = min(bbox[1:-1:2]) + max_x = max(bbox[0:-1:2]) + max_y = max(bbox[1:-1:2]) + #box = [min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y] + else: + min_x = min(bbox[0:-1:2]) + min_y = min(bbox[1::2]) + max_x = max(bbox[0:-1:2]) + max_y = max(bbox[1::2]) + box4 = [ [min_x, min_y], [max_x, min_y], [max_x, max_y], [min_x, max_y] ] + out_mmocr_boxes_coordinates.append(box4) + out_status = 'OK' + except Exception as e: + out_status = e + + return out_mmocr_boxes_coordinates, out_status + +### +def cropped_1box(in_box, in_img): + """Construction of an cropped image corresponding to an area of the initial image + + Args: + in_box (list) : box with coordinates + in_img (matrix) : image + + Returns: + matrix : cropped image + """ + box_ar = np.array(in_box).astype(np.int64) + x_min = box_ar[:, 0].min() + x_max = box_ar[:, 0].max() + y_min = box_ar[:, 1].min() + y_max = box_ar[:, 1].max() + out_cropped = in_img[y_min:y_max, x_min:x_max] + + return out_cropped + +### +@st.experimental_memo(show_spinner=False) +def tesserocr_detect(_in_img, in_params): + """Detection with Tesseract + + Args: + _in_img (PIL.Image) : image to consider + in_params (list) : list with the parameters for detection + + Returns: + list : list of the boxes coordinates + exception on error, string 'OK' otherwise + """ + try: + dict_param = in_params[1] + df_res = pytesseract.image_to_data(_in_img, **dict_param, output_type=Output.DATAFRAME) + df_res['box'] = df_res.apply(lambda d: [[d['left'], d['top']], \ + [d['left'] + d['width'], d['top']], \ + [d['left'] + d['width'], d['top'] + d['height']], \ + [d['left'], d['top'] + d['height']], \ + ], axis=1) + out_tesserocr_boxes_coordinates = df_res[df_res.word_num > 0]['box'].to_list() + out_status = 'OK' + except Exception as e: + out_tesserocr_boxes_coordinates = [] + out_status = e + + return out_tesserocr_boxes_coordinates, out_status + +### +@st.experimental_memo(show_spinner=False) +def process_detect(in_image_path, _in_list_images, _in_list_readers, in_list_params, in_color): + """Detection process for each OCR solution + + Args: + in_image_path (string) : locally saved image path + _in_list_images (list) : list of original image + _in_list_readers (list) : list with previously initialized reader's instances + in_list_params (list) : list with dict parameters for each OCR solution + in_color (tuple) : color for boxes around text + + Returns: + list: list of detection results images + list: list of boxes coordinates + """ + ## ------- EasyOCR Text detection + with st.spinner('EasyOCR Text detection in progress ...'): + easyocr_boxes_coordinates,easyocr_status = easyocr_detect(_in_list_readers[0], \ + in_image_path, in_list_params[0]) + # Visualization + if easyocr_boxes_coordinates: + easyocr_image_detect = draw_detected(_in_list_images[0], easyocr_boxes_coordinates, \ + in_color, 'None', 7) + else: + easyocr_boxes_coordinates = easyocr_status + ## + + ## ------- PPOCR Text detection + with st.spinner('PPOCR Text detection in progress ...'): + ppocr_boxes_coordinates, ppocr_status = ppocr_detect(_in_list_readers[1], in_image_path) + # Visualization + if ppocr_boxes_coordinates: + ppocr_image_detect = draw_detected(_in_list_images[0], ppocr_boxes_coordinates, \ + in_color, 'None', 7) + else: + ppocr_image_detect = ppocr_status + ## + + ## ------- MMOCR Text detection + with st.spinner('MMOCR Text detection in progress ...'): + mmocr_boxes_coordinates, mmocr_status = mmocr_detect(_in_list_readers[2], in_image_path) + # Visualization + if mmocr_boxes_coordinates: + mmocr_image_detect = draw_detected(_in_list_images[0], mmocr_boxes_coordinates, \ + in_color, 'None', 7) + else: + mmocr_image_detect = mmocr_status + ## + + ## ------- Tesseract Text detection + with st.spinner('Tesseract Text detection in progress ...'): + tesserocr_boxes_coordinates, tesserocr_status = tesserocr_detect(_in_list_images[0], \ + in_list_params[3]) + # Visualization + if tesserocr_boxes_coordinates: + tesserocr_image_detect = draw_detected(_in_list_images[0],tesserocr_boxes_coordinates,\ + in_color, 'None', 7) + else: + tesserocr_image_detect = tesserocr_status + ## + # + out_list_images = _in_list_images + [easyocr_image_detect, ppocr_image_detect, \ + mmocr_image_detect, tesserocr_image_detect] + out_list_coordinates = [easyocr_boxes_coordinates, ppocr_boxes_coordinates, \ + mmocr_boxes_coordinates, tesserocr_boxes_coordinates] + # + + return out_list_images, out_list_coordinates + +### +def draw_detected(in_image, in_boxes_coordinates, in_color, posit=None, in_thickness=4): + """Draw boxes around detected text + + Args: + in_image (PIL.Image) : original image + in_boxes_coordinates (list) : boxes coordinates, from top to bottom and from left to right + [ [ [x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max] ], + [ ... ] + ] + in_color (tuple) : color for boxes around text + posit (str, optional) : position for text. Defaults to 'None'. + in_thickness (int, optional): thickness of the box. Defaults to 4. + + Returns: + PIL.Image : original image with detected areas + """ + work_img = in_image.copy() + font = cv2.FONT_HERSHEY_SIMPLEX + for ind_box, box in enumerate(in_boxes_coordinates): + box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64) + work_img = cv2.polylines(np.array(work_img), [box], True, in_color, in_thickness) + if posit != None: + if posit == 'top_left': + pos = tuple(box[0][0]) + elif posit == 'top_right': + pos = tuple(box[1][0]) + work_img = cv2.putText(work_img, str(ind_box+1), pos, font, 5.5, color, \ + in_thickness,cv2.LINE_AA) + + out_image_drawn = Image.fromarray(work_img) + + return out_image_drawn + +## +#@st.cache(show_spinner=False) +@st.experimental_memo(show_spinner=False) +def get_cropped(in_boxes_coordinates, in_image_cv): + """Construct list of cropped images corresponding of the input boxes coordinates list + + Args: + in_boxes_coordinates (list) : list of boxes coordinates + in_image_cv (matrix) : original image + + Returns: + list : list with cropped images + """ + out_list_images = [] + for box in in_boxes_coordinates: + cropped = cropped_1box(box, in_image_cv) + out_list_images.append(cropped) + return out_list_images + +### +def process_recog(in_list_readers, in_image_cv, in_boxes_coordinates, in_list_dict_params): + """Recognition process for each OCR solution + + Args: + in_list_readers (list) : list with previously initialized reader's instances + in_image_cv (matrix) : original image + in_boxes_coordinates (list) : list of boxes coordinates + in_list_dict_params (list) : list with dict parameters for each OCR solution + + Returns: + data frame : results for each OCR solution, except Tesseract + data frame : results for Tesseract + list : status for each recognition (exception or 'OK') + """ + out_df_results = pd.DataFrame([]) + + list_text_easyocr = [] + list_confidence_easyocr = [] + list_text_ppocr = [] + list_confidence_ppocr = [] + list_text_mmocr = [] + list_confidence_mmocr = [] + + # Create cropped images from detection + list_cropped_images = get_cropped(in_boxes_coordinates, in_image_cv) + + # Recognize with EasyOCR + with st.spinner('EasyOCR Text recognition in progress ...'): + list_text_easyocr, list_confidence_easyocr, status_easyocr = \ + easyocr_recog(list_cropped_images, in_list_readers[0], in_list_dict_params[0]) + ## + + # Recognize with PPOCR + with st.spinner('PPOCR Text recognition in progress ...'): + list_text_ppocr, list_confidence_ppocr, status_ppocr = \ + ppocr_recog(list_cropped_images, in_list_dict_params[1]) + ## + + # Recognize with MMOCR + with st.spinner('MMOCR Text recognition in progress ...'): + list_text_mmocr, list_confidence_mmocr, status_mmocr = \ + mmocr_recog(list_cropped_images, in_list_dict_params[2]) + ## + + # Recognize with Tesseract + with st.spinner('Tesseract Text recognition in progress ...'): + out_df_results_tesseract, status_tesseract = \ + tesserocr_recog(in_image_cv, in_list_dict_params[3], len(list_cropped_images)) + ## + + # Create results data frame + out_df_results = pd.DataFrame({'cropped_image': list_cropped_images, + 'text_easyocr': list_text_easyocr, + 'confidence_easyocr': list_confidence_easyocr, + 'text_ppocr': list_text_ppocr, + 'confidence_ppocr': list_confidence_ppocr, + 'text_mmocr': list_text_mmocr, + 'confidence_mmocr': list_confidence_mmocr + } + ) + + out_list_reco_status = [status_easyocr, status_ppocr, status_mmocr, status_tesseract] + + return out_df_results, out_df_results_tesseract, out_list_reco_status + +### +@st.experimental_memo(suppress_st_warning=True, show_spinner=False) +def easyocr_recog(in_list_images, _in_reader_easyocr, in_params): + """Recognition with EasyOCR + + Args: + in_list_images (list) : list of cropped images + _in_reader_easyocr (EasyOCR reader) : the previously initialized instance + in_params (dict) : parameters for recognition + + Returns: + list : list of recognized text + list : list of recognition confidence + string/Exception : recognition status + """ + progress_bar = st.progress(0) + out_list_text_easyocr = [] + out_list_confidence_easyocr = [] + ## ------- EasyOCR Text recognition + try: + step = 0*len(in_list_images) # first recognition process + nb_steps = 4 * len(in_list_images) + for ind_img, cropped in enumerate(in_list_images): + result = _in_reader_easyocr.recognize(cropped, **in_params) + try: + out_list_text_easyocr.append(result[0][1]) + out_list_confidence_easyocr.append(np.round(100*result[0][2], 1)) + except: + out_list_text_easyocr.append('Not recognize') + out_list_confidence_easyocr.append(100.) + progress_bar.progress((step+ind_img+1)/nb_steps) + out_status = 'OK' + except Exception as e: + out_status = e + progress_bar.empty() + + return out_list_text_easyocr, out_list_confidence_easyocr, out_status + +## +@st.experimental_memo(suppress_st_warning=True, show_spinner=False) +def ppocr_recog(in_list_images, in_params): + """Recognition with PPOCR + + Args: + in_list_images (list) : list of cropped images + in_params (dict) : parameters for recognition + + Returns: + list : list of recognized text + list : list of recognition confidence + string/Exception : recognition status + """ + ## ------- PPOCR Text recognition + out_list_text_ppocr = [] + out_list_confidence_ppocr = [] + try: + reader_ppocr = PaddleOCR(**in_params) + step = 1*len(in_list_images) # second recognition process + nb_steps = 4 * len(in_list_images) + progress_bar = st.progress(step/nb_steps) + + for ind_img, cropped in enumerate(in_list_images): + result = reader_ppocr.ocr(cropped, det=False, cls=False) + try: + out_list_text_ppocr.append(result[0][0]) + out_list_confidence_ppocr.append(np.round(100*result[0][1], 1)) + except: + out_list_text_ppocr.append('Not recognize') + out_list_confidence_ppocr.append(100.) + progress_bar.progress((step+ind_img+1)/nb_steps) + out_status = 'OK' + except Exception as e: + out_status = e + progress_bar.empty() + + return out_list_text_ppocr, out_list_confidence_ppocr, out_status + +## +@st.experimental_memo(suppress_st_warning=True, show_spinner=False) +def mmocr_recog(in_list_images, in_params): + """Recognition with MMOCR + + Args: + in_list_images (list) : list of cropped images + in_params (dict) : parameters for recognition + + Returns: + list : list of recognized text + list : list of recognition confidence + string/Exception : recognition status + """ + ## ------- MMOCR Text recognition + out_list_text_mmocr = [] + out_list_confidence_mmocr = [] + try: + reader_mmocr = MMOCR(det=None, **in_params) + step = 2*len(in_list_images) # third recognition process + nb_steps = 4 * len(in_list_images) + progress_bar = st.progress(step/nb_steps) + + for ind_img, cropped in enumerate(in_list_images): + result = reader_mmocr.readtext(cropped, details=True) + try: + out_list_text_mmocr.append(result[0]['text']) + out_list_confidence_mmocr.append(np.round(100* \ + (np.array(result[0]['score']).mean()), 1)) + except: + out_list_text_mmocr.append('Not recognize') + out_list_confidence_mmocr.append(100.) + progress_bar.progress((step+ind_img+1)/nb_steps) + out_status = 'OK' + except Exception as e: + out_status = e + progress_bar.empty() + + return out_list_text_mmocr, out_list_confidence_mmocr, out_status + +## +#@st.cache(show_spinner=False) +@st.experimental_memo(suppress_st_warning=True, show_spinner=False) +def tesserocr_recog(in_img, in_params, in_nb_images): + """Recognition with Tesseract + + Args: + in_image_cv (matrix) : original image + in_params (dict) : parameters for recognition + in_nb_images : nb cropped images (used for progress bar) + + Returns: + Pandas data frame : recognition results + string/Exception : recognition status + """ + ## ------- Tesseract Text recognition + step = 3*in_nb_images # fourth recognition process + nb_steps = 4 * in_nb_images + progress_bar = st.progress(step/nb_steps) + + try: + out_df_result = pytesseract.image_to_data(in_img, **in_params,output_type=Output.DATAFRAME) + + out_df_result['box'] = out_df_result.apply(lambda d: [[d['left'], d['top']], \ + [d['left'] + d['width'], d['top']], \ + [d['left']+d['width'], d['top']+d['height']], \ + [d['left'], d['top'] + d['height']], \ + ], axis=1) + out_df_result['cropped'] = out_df_result['box'].apply(lambda b: cropped_1box(b, in_img)) + out_df_result = out_df_result[(out_df_result.word_num > 0) & (out_df_result.text != ' ')] \ + .reset_index(drop=True) + out_status = 'OK' + except Exception as e: + out_df_result = pd.DataFrame([]) + out_status = e + + progress_bar.progress(1.) + + return out_df_result, out_status + +### +def draw_reco_images(in_image, in_boxes_coordinates, in_list_texts, in_list_confid, \ + in_dict_back_colors, in_df_results_tesseract, in_reader_type_list, \ + in_font_scale=3, in_conf_threshold=65): + """Draw recognized text on original image, for each OCR solution used + + Args: + in_image (matrix) : original image + in_boxes_coordinates (list) : list of boxes coordinates + in_list_texts (list): list of recognized text for each recognizer (except Tesseract) + in_list_confid (list): list of recognition confidence for each recognizer (except Tesseract) + in_df_results_tesseract (Pandas data frame): Tesseract recognition results + in_font_scale (int, optional): text font scale. Defaults to 3. + + Returns: + shows the results container + """ + img = in_image.copy() + nb_readers = len(in_reader_type_list) + list_reco_images = [img.copy() for i in range(nb_readers)] + + for num, box_ in enumerate(in_boxes_coordinates): + box = np.array(box_).astype(np.int64) + + # For each box : draw the results of each recognizer + for ind_r in range(nb_readers-1): + confid = np.round(in_list_confid[ind_r][num], 0) + rgb_color = ImageColor.getcolor(in_dict_back_colors[confid], "RGB") + if confid < in_conf_threshold: + text_color = (0, 0, 0) + else: + text_color = (255, 255, 255) + + list_reco_images[ind_r] = cv2.rectangle(list_reco_images[ind_r], \ + (box[0][0], box[0][1]), \ + (box[2][0], box[2][1]), rgb_color, -1) + list_reco_images[ind_r] = cv2.putText(list_reco_images[ind_r], \ + in_list_texts[ind_r][num], \ + (box[0][0],int(np.round((box[0][1]+box[2][1])/2,0))), \ + cv2.FONT_HERSHEY_DUPLEX, in_font_scale, text_color, 2) + + # Add Tesseract process + if not in_df_results_tesseract.empty: + ind_tessocr = nb_readers-1 + for num, box_ in enumerate(in_df_results_tesseract['box'].to_list()): + box = np.array(box_).astype(np.int64) + confid = np.round(in_df_results_tesseract.iloc[num]['conf'], 0) + rgb_color = ImageColor.getcolor(in_dict_back_colors[confid], "RGB") + if confid < in_conf_threshold: + text_color = (0, 0, 0) + else: + text_color = (255, 255, 255) + + list_reco_images[ind_tessocr] = \ + cv2.rectangle(list_reco_images[ind_tessocr], (box[0][0], box[0][1]), \ + (box[2][0], box[2][1]), rgb_color, -1) + list_reco_images[ind_tessocr] = \ + cv2.putText(list_reco_images[ind_tessocr], \ + in_df_results_tesseract.iloc[num]['text'], \ + (box[0][0],int(np.round((box[0][1]+box[2][1])/2,0))), \ + cv2.FONT_HERSHEY_DUPLEX, in_font_scale, text_color, 4) + + with show_reco.container(): + # Draw the results, 2 images per line + reco_lines = math.ceil(len(in_reader_type_list) / 2) + column_width = 500 + for ind_lig in range(0, reco_lines+1, 2): + cols = st.columns(2) + for ind_col in range(2): + ind = ind_lig + ind_col + if ind <= len(in_reader_type_list): + if in_reader_type_list[ind] == 'Tesseract': + column_title = '

Recognition with ' + in_reader_type_list[ind] + \ + ' (with its own detector) \ +

' + else: + column_title = '

Recognition with ' + \ + in_reader_type_list[ind]+ '

' + cols[ind_col].markdown(column_title, unsafe_allow_html=True) + if st.session_state.list_reco_status[ind] == 'OK': + cols[ind_col].image(list_reco_images[ind], \ + width=column_width, use_column_width=True) + else: + cols[ind_col].write(list_reco_status[ind], \ + use_column_width=True) + +## +def highlight(): + """Draw recognized text on original image, for each OCR solution used + + Args: + in_image (matrix) : original image + in_boxes_coordinates (list) : list of boxes coordinates + in_list_texts (list): list of recognized text for each recognizer (except Tesseract) + in_list_confid (list): list of recognition confidence for each recognizer (except Tesseract) + in_df_results_tesseract (Pandas data frame): Tesseract recognition results + in_font_scale (int, optional): text font scale. Defaults to 3. + + Returns: + shows the results container + """ + show_detect.empty() + with show_detect.container(): + columns_size = [1 for x in reader_type_list] + column_width = [400 for x in reader_type_list] + columns_color = ["rgb(0,0,0)" for x in reader_type_list] + columns_size[reader_type_dict[st.session_state.detect_reader]] = 2 + column_width[reader_type_dict[st.session_state.detect_reader]] = 500 + columns_color[reader_type_dict[st.session_state.detect_reader]] = "rgb(228,26,28)" + columns = st.columns(columns_size, ) #gap='medium') + + for ind_col, col in enumerate(columns): + column_title = '

Detection with ' + reader_type_list[ind_col]+ '

' + col.markdown(column_title, unsafe_allow_html=True) + if isinstance(list_images[ind_col+2], PIL.Image.Image): + col.image(list_images[ind_col+2], width=column_width[ind_col], \ + use_column_width=True) + else: + col.write(list_images[ind_col+2], use_column_width=True) + st.session_state.columns_size = columns_size + st.session_state.column_width = column_width + st.session_state.columns_color = columns_color + +################################################################################################### +## MAIN +################################################################################################### + +##----------- Initializations --------------------------------------------------------------------- +#print("PID : ", os.getpid()) + +st.title("OCR solutions comparator") +st.markdown("##### *EasyOCR, PPOCR, MMOCR, Tesseract*") +#st.markdown("#### PID : " + str(os.getpid())) + +# Initializations +with st.spinner("Initializations in progress ..."): + reader_type_list, reader_type_dict, color, list_dict_lang, \ + cols_size, dict_back_colors, fig_colorscale = initializations() + +##----------- Choose language & image ------------------------------------------------------------- +st.markdown("#### Choose languages for the text recognition:") +lang_col = st.columns(4) +easyocr_key_lang = lang_col[0].selectbox(reader_type_list[0]+" :", list_dict_lang[0].keys(), 26) +easyocr_lang = list_dict_lang[0][easyocr_key_lang] +ppocr_key_lang = lang_col[1].selectbox(reader_type_list[1]+" :", list_dict_lang[1].keys(), 22) +ppocr_lang = list_dict_lang[1][ppocr_key_lang] +mmocr_key_lang = lang_col[2].selectbox(reader_type_list[2]+" :", list_dict_lang[2].keys(), 0) +mmocr_lang = list_dict_lang[2][mmocr_key_lang] +tesserocr_key_lang = lang_col[3].selectbox(reader_type_list[3]+" :", list_dict_lang[3].keys(), 35) +tesserocr_lang = list_dict_lang[3][tesserocr_key_lang] +st.markdown("#### Upload image:") + +image_file = st.file_uploader("", type=["png","jpg","jpeg"]) +use_demo = st.checkbox('Use a demo file', False) + +if use_demo: + image_file = "img_demo.jpg" + +##----------- Process input image ----------------------------------------------------------------- +if image_file is not None: + image_path, image_orig, image_cv2 = load_image(image_file) + list_images = [image_orig, image_cv2] + +##----------- Form with original image & hyperparameters for detectors ---------------------------- + with st.form("form1"): + col1, col2 = st.columns(2, ) #gap="medium") + col1.markdown("##### Original image") + col1.image(list_images[0], width=500, use_column_width=True) + col2.markdown("##### Hyperparameters values for detection") + + with col2.expander("Choose detection hyperparameters for " + reader_type_list[0], \ + expanded=False): + t0_min_size = st.slider("min_size", 1, 20, 10, step=1, \ + help="min_size (int, default = 10) - Filter text box smaller than \ + minimum value in pixel") + t0_text_threshold = st.slider("text_threshold", 0.1, 1., 0.7, step=0.1, \ + help="text_threshold (float, default = 0.7) - Text confidence threshold") + t0_low_text = st.slider("low_text", 0.1, 1., 0.4, step=0.1, \ + help="low_text (float, default = 0.4) - Text low-bound score") + t0_link_threshold = st.slider("link_threshold", 0.1, 1., 0.4, step=0.1, \ + help="link_threshold (float, default = 0.4) - Link confidence threshold") + t0_canvas_size = st.slider("canvas_size", 2000, 5000, 2560, step=10, \ + help='''canvas_size (int, default = 2560) \n +Maximum e size. Image bigger than this value will be resized down''') + t0_mag_ratio = st.slider("mag_ratio", 0.1, 5., 1., step=0.1, \ + help="mag_ratio (float, default = 1) - Image magnification ratio") + t0_slope_ths = st.slider("slope_ths", 0.01, 1., 0.1, step=0.01, \ + help='''slope_ths (float, default = 0.1) - Maximum slope \ + (delta y/delta x) to considered merging. \n +Low valuans tiled boxes will not be merged.''') + t0_ycenter_ths = st.slider("ycenter_ths", 0.1, 1., 0.5, step=0.1, \ + help='''ycenter_ths (float, default = 0.5) - Maximum shift in y direction. \n +Boxes wiifferent level should not be merged.''') + t0_height_ths = st.slider("height_ths", 0.1, 1., 0.5, step=0.1, \ + help='''height_ths (float, default = 0.5) - Maximum different in box height. \n +Boxes wiery different text size should not be merged.''') + t0_width_ths = st.slider("width_ths", 0.1, 1., 0.5, step=0.1, \ + help="width_ths (float, default = 0.5) - Maximum horizontal \ + distance to merge boxes.") + t0_add_margin = st.slider("add_margin", 0.1, 1., 0.1, step=0.1, \ + help='''add_margin (float, default = 0.1) - \ + Extend bounding boxes in all direction by certain value. \n +This is rtant for language with complex script (E.g. Thai).''') + t0_optimal_num_chars = st.slider("optimal_num_chars", None, 100, None, step=10, \ + help="optimal_num_chars (int, default = None) - If specified, bounding boxes \ + with estimated number of characters near this value are returned first.") + + with col2.expander("Choose detection hyperparameters for " + reader_type_list[1], \ + expanded=False): + t1_det_algorithm = st.selectbox('det_algorithm', ['DB'], \ + help='Type of detection algorithm selected. (default = DB)') + t1_det_max_side_len = st.slider('det_max_side_len', 500, 2000, 960, step=10, \ + help='''The maximum size of the long side of the image. (default = 960)\n +Limit thximum image height and width.\n +When theg side exceeds this value, the long side will be resized to this size, and the short side \ +will be ed proportionally.''') + t1_det_db_thresh = st.slider('det_db_thresh', 0.1, 1., 0.3, step=0.1, \ + help='''Binarization threshold value of DB output map. (default = 0.3) \n +Used to er the binarized image of DB prediction, setting 0.-0.3 has no obvious effect on the result.''') + t1_det_db_box_thresh = st.slider('det_db_box_thresh', 0.1, 1., 0.6, step=0.1, \ + help='''The threshold value of the DB output box. (default = 0.6) \n +DB post-essing filter box threshold, if there is a missing box detected, it can be reduced as appropriate. \n +Boxes sclower than this value will be discard.''') + t1_det_db_unclip_ratio = st.slider('det_db_unclip_ratio', 1., 3.0, 1.6, step=0.1, \ + help='''The expanded ratio of DB output box. (default = 1.6) \n +Indicatee compactness of the text box, the smaller the value, the closer the text box to the text.''') + t1_det_east_score_thresh = st.slider('det_east_cover_thresh', 0.1, 1., 0.8, step=0.1, \ + help="Binarization threshold value of EAST output map. (default = 0.8)") + t1_det_east_cover_thresh = st.slider('det_east_cover_thresh', 0.1, 1., 0.1, step=0.1, \ + help='''The threshold value of the EAST output box. (default = 0.1) \n +Boxes sclower than this value will be discarded.''') + t1_det_east_nms_thresh = st.slider('det_east_nms_thresh', 0.1, 1., 0.2, step=0.1, \ + help="The NMS threshold value of EAST model output box. (default = 0.2)") + t1_det_db_score_mode = st.selectbox('det_db_score_mode', ['fast', 'slow'], \ + help='''slow: use polygon box to calculate bbox score, fast: use rectangle box \ + to calculate. (default = fast) \n +Use rectlar box to calculate faster, and polygonal box more accurate for curved text area.''') + + with col2.expander("Choose detection hyperparameters for " + reader_type_list[2], \ + expanded=False): + t2_det = st.selectbox('det', ['DB_r18','DB_r50','DBPP_r50','DRRG','FCE_IC15', \ + 'FCE_CTW_DCNv2','MaskRCNN_CTW','MaskRCNN_IC15', \ + 'MaskRCNN_IC17', 'PANet_CTW','PANet_IC15','PS_CTW',\ + 'PS_IC15','Tesseract','TextSnake'], 10, \ + help='Text detection algorithm. (default = PANet_IC15)') + st.write("###### *More about text detection models* 👉 \ + [here](https://mmocr.readthedocs.io/en/latest/textdet_models.html)") + t2_merge_xdist = st.slider('merge_xdist', 1, 50, 20, step=1, \ + help='The maximum x-axis distance to merge boxes. (defaut=20)') + + with col2.expander("Choose detection hyperparameters for " + reader_type_list[3], \ + expanded=False): + t3_psm = st.selectbox('Page segmentation mode (psm)', \ + [' - Default', \ + ' 4 Assume a single column of text of variable sizes', \ + ' 5 Assume a single uniform block of vertically aligned text', \ + ' 6 Assume a single uniform block of text', \ + ' 7 Treat the image as a single text line', \ + ' 8 Treat the image as a single word', \ + ' 9 Treat the image as a single word in a circle', \ + '10 Treat the image as a single character', \ + '11 Sparse text. Find as much text as possible in no \ + particular order', \ + '13 Raw line. Treat the image as a single text line, \ + bypassing hacks that are Tesseract-specific']) + t3_oem = st.selectbox('OCR engine mode', ['0 Legacy engine only', \ + '1 Neural nets LSTM engine only', \ + '2 Legacy + LSTM engines', \ + '3 Default, based on what is available'], 3) + t3_whitelist = st.text_input('Limit tesseract to recognize only this characters :', \ + placeholder='Limit tesseract to recognize only this characters', \ + help='Example for numbers only : 0123456789') + + + submit_detect = st.form_submit_button("Launch detection") + +##----------- Process text detection -------------------------------------------------------------- + if submit_detect: + # Process text detection + + if t0_optimal_num_chars == 0: + t0_optimal_num_chars = None + + # Construct the config Tesseract parameter + t3_config = '' + psm = t3_psm[:2] + if psm != ' -': + t3_config += '--psm ' + psm.strip() + oem = t3_oem[:1] + if oem != '3': + t3_config += ' --oem ' + oem + if t3_whitelist != '': + t3_config += ' -c tessedit_char_whitelist=' + t3_whitelist + + list_params_det = \ + [[easyocr_lang, \ + {'min_size': t0_min_size, 'text_threshold': t0_text_threshold, \ + 'low_text': t0_low_text, 'link_threshold': t0_link_threshold, \ + 'canvas_size': t0_canvas_size, 'mag_ratio': t0_mag_ratio, \ + 'slope_ths': t0_slope_ths, 'ycenter_ths': t0_ycenter_ths, \ + 'height_ths': t0_height_ths, 'width_ths': t0_width_ths, \ + 'add_margin': t0_add_margin, 'optimal_num_chars': t0_optimal_num_chars \ + }], \ + [ppocr_lang, \ + {'det_algorithm': t1_det_algorithm, 'det_max_side_len': t1_det_max_side_len, \ + 'det_db_thresh': t1_det_db_thresh, 'det_db_box_thresh': t1_det_db_box_thresh, \ + 'det_db_unclip_ratio': t1_det_db_unclip_ratio, \ + 'det_east_score_thresh': t1_det_east_score_thresh, \ + 'det_east_cover_thresh': t1_det_east_cover_thresh, \ + 'det_east_nms_thresh': t1_det_east_nms_thresh, \ + 'det_db_score_mode': t1_det_db_score_mode}], + [mmocr_lang, {'det': t2_det, 'merge_xdist': t2_merge_xdist}], + [tesserocr_lang, {'lang': tesserocr_lang, 'config': t3_config}] + ] + + show_info1 = st.empty() + show_info1.info("Readers initializations in progress (it may take a while) ...") + list_readers = init_readers(list_params_det) + + show_info1.info("Text detection in progress ...") + list_images, list_coordinates = process_detect(image_path, list_images, list_readers, \ + list_params_det, color) + show_info1.empty() + + if 'list_readers' not in st.session_state: + st.session_state.list_readers = list_readers + if 'list_coordinates' not in st.session_state: + st.session_state.list_coordinates = list_coordinates + if 'list_images' not in st.session_state: + st.session_state.list_images = list_images + if 'list_params_det' not in st.session_state: + st.session_state.list_params_det = list_params_det + + if 'columns_size' not in st.session_state: + st.session_state.columns_size = [2] + [1 for x in reader_type_list[1:]] + if 'column_width' not in st.session_state: + st.session_state.column_width = [500] + [400 for x in reader_type_list[1:]] + if 'columns_color' not in st.session_state: + st.session_state.columns_color = ["rgb(228,26,28)"] + \ + ["rgb(0,0,0)" for x in reader_type_list[1:]] + + if 'list_coordinates' in st.session_state: + list_coordinates = st.session_state.list_coordinates + list_images = st.session_state.list_images + list_readers = st.session_state.list_readers + list_params_det = st.session_state.list_params_det + +##----------- Text detection results -------------------------------------------------------------- + st.subheader("Text detection") + show_detect = st.empty() + list_ok_detect = [] + with show_detect.container(): + columns = st.columns(st.session_state.columns_size, ) #gap='medium') + for no_col, col in enumerate(columns): + column_title = '

Detection with ' + reader_type_list[no_col]+ '

' + col.markdown(column_title, unsafe_allow_html=True) + if isinstance(list_images[no_col+2], PIL.Image.Image): + col.image(list_images[no_col+2], width=st.session_state.column_width[no_col], \ + use_column_width=True) + list_ok_detect.append(reader_type_list[no_col]) + else: + col.write(list_images[no_col+2], use_column_width=True) + + st.subheader("Text recognition") + + st.markdown("##### Using detection performed above by:") + st.radio('Choose the detecter:', list_ok_detect, key='detect_reader', \ + horizontal=True, on_change=highlight) + +##----------- Form with hyperparameters for recognition ----------------------- + with st.form("form2"): + st.markdown("##### Hyperparameters values for recognition") + with st.expander("Choose recognition hyperparameters for " + reader_type_list[0], \ + expanded=False): + t0_decoder = st.selectbox('decoder', ['greedy', 'beamsearch', 'wordbeamsearch'], \ + help="decoder (string, default = 'greedy') - options are 'greedy', \ + 'beamsearch' and 'wordbeamsearch.") + t0_beamWidth = st.slider('beamWidth', 2, 20, 5, step=1, \ + help="beamWidth (int, default = 5) - How many beam to keep when decoder = \ + 'beamsearch' or 'wordbeamsearch'.") + t0_batch_size = st.slider('batch_size', 1, 10, 1, step=1, \ + help="batch_size (int, default = 1) - batch_size>1 will make EasyOCR faster \ + but use more memory.") + t0_workers = st.slider('workers', 0, 10, 0, step=1, \ + help="workers (int, default = 0) - Number thread used in of dataloader.") + t0_allowlist = st.text_input('allowlist', value="", max_chars=None, \ + placeholder='Force EasyOCR to recognize only this subset of characters', \ + help='''allowlist (string) - Force EasyOCR to recognize only subset of characters.\n + Usefor specific problem (E.g. license plate, etc.)''') + t0_blocklist = st.text_input('blocklist', value="", max_chars=None, \ + placeholder='Block subset of character (will be ignored if allowlist is given)', \ + help='''blocklist (string) - Block subset of character. This argument will be \ + ignored if allowlist is given.''') + t0_detail = st.radio('detail', [0, 1], 1, horizontal=True, \ + help="detail (int, default = 1) - Set this to 0 for simple output") + t0_paragraph = st.radio('paragraph', [True, False], 1, horizontal=True, \ + help='paragraph (bool, default = False) - Combine result into paragraph') + t0_contrast_ths = st.slider('contrast_ths', 0.05, 1., 0.1, step=0.01, \ + help='''contrast_ths (float, default = 0.1) - Text box with contrast lower than \ + this value will be passed into model 2 times.\n + Firs with original image and second with contrast adjusted to 'adjust_contrast' value.\n + The with more confident level will be returned as a result.''') + t0_adjust_contrast = st.slider('adjust_contrast', 0.1, 1., 0.5, step=0.1, \ + help = 'adjust_contrast (float, default = 0.5) - target contrast level for low \ + contrast text box') + + with st.expander("Choose recognition hyperparameters for " + reader_type_list[1], \ + expanded=False): + t1_rec_algorithm = st.selectbox('rec_algorithm', ['CRNN', 'SVTR_LCNet'], 0, \ + help="Type of recognition algorithm selected. (default=CRNN)") + t1_rec_batch_num = st.slider('rec_batch_num', 1, 50, step=1, \ + help="When performing recognition, the batchsize of forward images. \ + (default=30)") + t1_max_text_length = st.slider('max_text_length', 3, 250, 25, step=1, \ + help="The maximum text length that the recognition algorithm can recognize. \ + (default=25)") + t1_use_space_char = st.radio('use_space_char', [True, False], 0, horizontal=True, \ + help="Whether to recognize spaces. (default=TRUE)") + t1_drop_score = st.slider('drop_score', 0., 1., 0.25, step=.05, \ + help="Filter the output by score (from the recognition model), and those \ + below this score will not be returned. (default=0.5)") + + with st.expander("Choose recognition hyperparameters for " + reader_type_list[2], \ + expanded=False): + t2_recog = st.selectbox('recog', ['ABINet','CRNN','CRNN_TPS','MASTER', \ + 'NRTR_1/16-1/8','NRTR_1/8-1/4','RobustScanner','SAR','SAR_CN', \ + 'SATRN','SATRN_sm','SEG','Tesseract'], 7, \ + help='Text recognition algorithm. (default = SAR)') + st.write("###### *More about text recognition models* 👉 \ + [here](https://mmocr.readthedocs.io/en/latest/textrecog_models.html)") + + with st.expander("Choose recognition hyperparameters for " + reader_type_list[3], \ + expanded=False): + t3r_psm = st.selectbox('Page segmentation mode (psm)', \ + [' - Default', \ + ' 4 Assume a single column of text of variable sizes', \ + ' 5 Assume a single uniform block of vertically aligned \ + text', \ + ' 6 Assume a single uniform block of text', \ + ' 7 Treat the image as a single text line', \ + ' 8 Treat the image as a single word', \ + ' 9 Treat the image as a single word in a circle', \ + '10 Treat the image as a single character', \ + '11 Sparse text. Find as much text as possible in no \ + particular order', \ + '13 Raw line. Treat the image as a single text line, \ + bypassing hacks that are Tesseract-specific']) + t3r_oem = st.selectbox('OCR engine mode', ['0 Legacy engine only', \ + '1 Neural nets LSTM engine only', \ + '2 Legacy + LSTM engines', \ + '3 Default, based on what is available'], 3) + t3r_whitelist = st.text_input('Limit tesseract to recognize only this \ + characters :', \ + placeholder='Limit tesseract to recognize only this characters', \ + help='Example for numbers only : 0123456789') + + submit_reco = st.form_submit_button("Launch recognition") + + if submit_reco: + process_detect.clear() +##----------- Process recognition ------------------------------------------ + reader_ind = reader_type_dict[st.session_state.detect_reader] + list_boxes = list_coordinates[reader_ind] + + # Construct the config Tesseract parameter + t3r_config = '' + psm = t3r_psm[:2] + if psm != ' -': + t3r_config += '--psm ' + psm.strip() + oem = t3r_oem[:1] + if oem != '3': + t3r_config += ' --oem ' + oem + if t3r_whitelist != '': + t3r_config += ' -c tessedit_char_whitelist=' + t3r_whitelist + + list_params_rec = \ + [{'decoder': t0_decoder, 'beamWidth': t0_beamWidth, \ + 'batch_size': t0_batch_size, 'workers': t0_workers, \ + 'allowlist': t0_allowlist, 'blocklist': t0_blocklist, \ + 'detail': t0_detail, 'paragraph': t0_paragraph, \ + 'contrast_ths': t0_contrast_ths, 'adjust_contrast': t0_adjust_contrast + }, + { **list_params_det[1][1], **{'rec_algorithm': t1_rec_algorithm, \ + 'rec_batch_num': t1_rec_batch_num, 'max_text_length': t1_max_text_length, \ + 'use_space_char': t1_use_space_char, 'drop_score': t1_drop_score}, \ + **{'lang': list_params_det[1][0]} + }, + {'recog': t2_recog}, + {'lang': tesserocr_lang, 'config': t3r_config} + ] + + show_info2 = st.empty() + + with show_info2.container(): + st.info("Text recognition in progress ...") + df_results, df_results_tesseract, list_reco_status = \ + process_recog(list_readers, list_images[1], list_boxes, list_params_rec) + show_info2.empty() + + st.session_state.df_results = df_results + st.session_state.list_boxes = list_boxes + st.session_state.df_results_tesseract = df_results_tesseract + st.session_state.list_reco_status = list_reco_status + + if 'df_results' in st.session_state: +##----------- Show recognition results ------------------------------------------------------------ + results_cols = st.session_state.df_results.columns + list_col_text = np.arange(1, len(cols_size), 2) + list_col_confid = np.arange(2, len(cols_size), 2) + + dict_draw_reco = {'in_image': st.session_state.list_images[1], \ + 'in_boxes_coordinates': st.session_state.list_boxes, \ + 'in_list_texts': [st.session_state.df_results[x].to_list() \ + for x in results_cols[list_col_text]], \ + 'in_list_confid': [st.session_state.df_results[x].to_list() \ + for x in results_cols[list_col_confid]], \ + 'in_dict_back_colors': dict_back_colors, \ + 'in_df_results_tesseract' : st.session_state.df_results_tesseract, \ + 'in_reader_type_list': reader_type_list + } + show_reco = st.empty() + + with st.form("form3"): + st.plotly_chart(fig_colorscale, use_container_width=True) + + col_font, col_threshold = st.columns(2) + + col_font.slider('Font scale', 1, 7, 4, step=1, key="font_scale_sld") + col_threshold.slider('% confidence threshold for text color change', 40, 100, 64, \ + step=1, key="conf_threshold_sld") + col_threshold.write("(text color is black below this % confidence threshold, \ + and white above)") + + draw_reco_images(**dict_draw_reco) + + submit_resize = st.form_submit_button("Refresh") + + if submit_resize: + draw_reco_images(**dict_draw_reco, \ + in_font_scale=st.session_state.font_scale_sld, \ + in_conf_threshold=st.session_state.conf_threshold_sld) + + st.subheader("Recognition details") + with st.expander("Detailed areas for EasyOCR, PPOCR, MMOCR", expanded=False): + cols = st.columns(cols_size) + cols[0].markdown('#### Detected area') + for i in range(1, (len(reader_type_list)-1)*2, 2): + cols[i].markdown('#### with ' + reader_type_list[i//2]) + + for row in st.session_state.df_results.itertuples(): + #cols = st.columns(1 + len(reader_type_list)*2) + cols = st.columns(cols_size) + cols[0].image(row.cropped_image, width=150) + for ind_col in range(1, len(cols), 2): + cols[ind_col].write(getattr(row, results_cols[ind_col])) + cols[ind_col+1].write("("+str( \ + getattr(row, results_cols[ind_col+1]))+"%)") + + st.download_button( + label="Download results as CSV file", + data=convert_df(st.session_state.df_results), + file_name='OCR_comparator_results.csv', + mime='text/csv', + ) + + if not st.session_state.df_results_tesseract.empty: + with st.expander("Detailed areas for Tesseract", expanded=False): + cols = st.columns([2,2,1]) + cols[0].markdown('#### Detected area') + cols[1].markdown('#### with Tesseract') + + for row in st.session_state.df_results_tesseract.itertuples(): + cols = st.columns([2,2,1]) + cols[0].image(row.cropped, width=150) + cols[1].write(getattr(row, 'text')) + cols[2].write("("+str(getattr(row, 'conf'))+"%)") + + st.download_button( + label="Download Tesseract results as CSV file", + data=convert_df(st.session_state.df_results), + file_name='OCR_comparator_Tesseract_results.csv', + mime='text/csv', + ) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7beea4d114d0d6805f7594ae973687a17a193ae6 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,14 @@ +# https://github.com/streamlit/streamlit/issues/5315 +easyocr +streamlit +opencv-python-headless==4.5.5.64 +Pillow +mmdet==2.25.1 +mmocr==0.6.1 +paddleocr==2.6 +paddlepaddle==2.3.2 +mycolorpy==1.5.1 +plotly==5.10.0 +plotly-express==0.4.1 +mmcv-full==1.6.1 --no-binary mmcv-full==1.6.1 +pytesseract==0.3.10 \ No newline at end of file